From da16e3c82b018422a1a3eb64a47e4ce8d9bb8b10 Mon Sep 17 00:00:00 2001 From: Moses Narrow Date: Sat, 19 Oct 2024 11:58:52 -0500 Subject: [PATCH] update all go deps except github.com/dgraph-io/badger/v3 v3.2103.2 and github.com/dgraph-io/ristretto v0.1.0 --- go.mod | 151 +- go.sum | 474 +- .../Microsoft/go-winio/.golangci.yml | 10 +- .../github.com/Microsoft/go-winio/backup.go | 33 +- vendor/github.com/Microsoft/go-winio/file.go | 85 +- .../github.com/Microsoft/go-winio/fileinfo.go | 22 +- .../github.com/Microsoft/go-winio/hvsock.go | 47 +- .../Microsoft/go-winio/internal/fs/fs.go | 72 +- .../go-winio/internal/fs/zsyscall_windows.go | 9 +- .../go-winio/internal/socket/socket.go | 8 +- .../internal/socket/zsyscall_windows.go | 9 +- .../go-winio/internal/stringbuffer/wstring.go | 2 +- vendor/github.com/Microsoft/go-winio/pipe.go | 125 +- .../Microsoft/go-winio/privilege.go | 9 +- vendor/github.com/Microsoft/go-winio/sd.go | 37 +- vendor/github.com/Microsoft/go-winio/tools.go | 5 - .../Microsoft/go-winio/zsyscall_windows.go | 205 +- .../VictoriaMetrics/metrics/README.md | 5 +- .../VictoriaMetrics/metrics/counter.go | 9 + .../VictoriaMetrics/metrics/floatcounter.go | 4 + .../VictoriaMetrics/metrics/gauge.go | 67 +- .../VictoriaMetrics/metrics/go_metrics.go | 186 +- .../VictoriaMetrics/metrics/histogram.go | 42 +- .../VictoriaMetrics/metrics/metrics.go | 142 +- .../metrics/process_metrics_linux.go | 49 +- .../metrics/process_metrics_windows.go | 25 +- .../VictoriaMetrics/metrics/push.go | 441 +- .../github.com/VictoriaMetrics/metrics/set.go | 47 +- .../VictoriaMetrics/metrics/summary.go | 8 + vendor/github.com/bitfield/script/README.md | 50 +- vendor/github.com/bitfield/script/script.go | 112 +- vendor/github.com/bytedance/sonic/.gitignore | 5 +- vendor/github.com/bytedance/sonic/.gitmodules | 7 +- vendor/github.com/bytedance/sonic/Makefile | 111 - vendor/github.com/bytedance/sonic/README.md | 112 +- .../bytedance/sonic/README_ZH_CN.md | 91 +- vendor/github.com/bytedance/sonic/api.go | 76 +- .../sonic/ast/{api_amd64.go => api.go} | 52 +- .../bytedance/sonic/ast/api_compat.go | 82 +- .../github.com/bytedance/sonic/ast/buffer.go | 223 +- .../github.com/bytedance/sonic/ast/decode.go | 44 +- .../github.com/bytedance/sonic/ast/encode.go | 87 +- .../github.com/bytedance/sonic/ast/error.go | 6 +- .../bytedance/sonic/ast/iterator.go | 35 +- vendor/github.com/bytedance/sonic/ast/node.go | 677 +- .../github.com/bytedance/sonic/ast/parser.go | 151 +- .../github.com/bytedance/sonic/ast/search.go | 127 + .../github.com/bytedance/sonic/ast/stubs.go | 142 + .../bytedance/sonic/ast/stubs_go115.go | 55 - .../bytedance/sonic/ast/stubs_go120.go | 55 - .../github.com/bytedance/sonic/ast/visitor.go | 47 +- vendor/github.com/bytedance/sonic/compat.go | 4 +- .../bytedance/sonic/decoder/decoder_compat.go | 10 +- .../{decoder_amd64.go => decoder_native.go} | 37 +- .../bytedance/sonic/encoder/encoder_compat.go | 56 +- .../{encoder_amd64.go => encoder_native.go} | 14 +- vendor/github.com/bytedance/sonic/go.work | 5 +- vendor/github.com/bytedance/sonic/go.work.sum | 1 + .../sonic/internal/base64/b64_amd64.go | 46 + .../sonic/internal/base64/b64_compat.go | 44 + .../bytedance/sonic/internal/cpu/features.go | 4 +- .../internal/decoder/{ => api}/decoder.go | 136 +- .../internal/decoder/api/decoder_amd64.go | 38 + .../internal/decoder/api/decoder_arm64.go | 38 + .../internal/decoder/{ => api}/stream.go | 198 +- .../internal/decoder/asm_stubs_amd64_go116.go | 130 - .../decoder/assembler_stkabi_amd64.go | 1949 - .../sonic/internal/decoder/consts/option.go | 36 + .../internal/decoder/{ => errors}/errors.go | 16 +- .../internal/decoder/generic_stkabi_amd64.go | 733 - .../sonic/internal/decoder/{ => jitdec}/asm.s | 0 .../{ => jitdec}/asm_stubs_amd64_go117.go | 2 +- .../{ => jitdec}/asm_stubs_amd64_go121.go | 5 +- .../{ => jitdec}/assembler_regabi_amd64.go | 111 +- .../internal/decoder/{ => jitdec}/compiler.go | 144 +- .../internal/decoder/{ => jitdec}/debug.go | 2 +- .../sonic/internal/decoder/jitdec/decoder.go | 140 + .../{ => jitdec}/generic_regabi_amd64.go | 8 +- .../{ => jitdec}/generic_regabi_amd64_test.s | 2 +- .../internal/decoder/{ => jitdec}/pools.go | 4 +- .../decoder/{ => jitdec}/primitives.go | 11 +- .../decoder/{ => jitdec}/stubs_go116.go | 13 +- .../decoder/{ => jitdec}/stubs_go120.go | 11 +- .../internal/decoder/{ => jitdec}/types.go | 2 +- .../internal/decoder/{ => jitdec}/utils.go | 2 +- .../internal/decoder/optdec/compile_struct.go | 174 + .../sonic/internal/decoder/optdec/compiler.go | 449 + .../sonic/internal/decoder/optdec/const.go | 60 + .../sonic/internal/decoder/optdec/context.go | 3 + .../sonic/internal/decoder/optdec/decoder.go | 160 + .../sonic/internal/decoder/optdec/errors.go | 73 + .../sonic/internal/decoder/optdec/functor.go | 281 + .../sonic/internal/decoder/optdec/helper.go | 101 + .../internal/decoder/optdec/interface.go | 169 + .../sonic/internal/decoder/optdec/map.go | 430 + .../sonic/internal/decoder/optdec/native.go | 269 + .../sonic/internal/decoder/optdec/node.go | 1279 + .../sonic/internal/decoder/optdec/slice.go | 224 + .../internal/decoder/optdec/stringopts.go | 360 + .../sonic/internal/decoder/optdec/structs.go | 61 + .../sonic/internal/decoder/optdec/types.go | 60 + .../internal/encoder/{ => alg}/mapiter.go | 89 +- .../sonic/internal/encoder/alg/opts.go | 31 + .../sonic/internal/encoder/alg/primitives.go | 95 + .../sonic/internal/encoder/{ => alg}/sort.go | 2 +- .../sonic/internal/encoder/alg/spec.go | 198 + .../sonic/internal/encoder/alg/spec_compat.go | 148 + .../internal/encoder/asm_stubs_amd64_go116.go | 51 - .../encoder/assembler_regabi_amd64.go | 1177 - .../encoder/assembler_stkabi_amd64.go | 1175 - .../sonic/internal/encoder/compiler.go | 1517 +- .../sonic/internal/encoder/debug_go117.go | 205 - .../sonic/internal/encoder/encode_norace.go | 24 + .../sonic/internal/encoder/encode_race.go | 54 + .../sonic/internal/encoder/encoder.go | 191 +- .../bytedance/sonic/internal/encoder/ir/op.go | 473 + .../bytedance/sonic/internal/encoder/pools.go | 193 - .../sonic/internal/encoder/pools_amd64.go | 97 + .../sonic/internal/encoder/pools_compt.go | 24 + .../sonic/internal/encoder/primitives.go | 168 - .../sonic/internal/encoder/stream.go | 35 +- .../sonic/internal/encoder/stubs_go116.go | 65 - .../sonic/internal/encoder/stubs_go117.go | 66 - .../sonic/internal/encoder/stubs_go120.go | 66 - .../sonic/internal/encoder/stubs_go121.go | 66 - .../sonic/internal/encoder/vars/cache.go | 48 + .../sonic/internal/encoder/vars/const.go | 42 + .../internal/encoder/{ => vars}/errors.go | 20 +- .../sonic/internal/encoder/vars/stack.go | 146 + .../internal/encoder/{ => vars}/types.go | 20 +- .../sonic/internal/encoder/vm/stbus.go | 45 + .../bytedance/sonic/internal/encoder/vm/vm.go | 374 + .../{ => x86}/asm_stubs_amd64_go117.go | 20 +- .../{ => x86}/asm_stubs_amd64_go121.go | 22 +- .../encoder/x86/assembler_regabi_amd64.go | 1194 + .../internal/encoder/{ => x86}/debug_go116.go | 4 +- .../sonic/internal/encoder/x86/debug_go117.go | 201 + .../sonic/internal/encoder/x86/stbus.go | 54 + .../bytedance/sonic/internal/envs/decode.go | 24 + .../sonic/internal/jit/assembler_amd64.go | 12 - .../bytedance/sonic/internal/jit/runtime.go | 7 +- .../sonic/internal/native/avx/native_amd64.go | 191 - .../internal/native/avx/native_subr_amd64.go | 603 - .../internal/native/avx/native_text_amd64.go | 14075 ---- .../sonic/internal/native/avx2/f32toa.go | 35 + .../sonic/internal/native/avx2/f32toa_subr.go | 46 + .../internal/native/avx2/f32toa_text_amd64.go | 1063 + .../sonic/internal/native/avx2/f64toa.go | 35 + .../sonic/internal/native/avx2/f64toa_subr.go | 46 + .../internal/native/avx2/f64toa_text_amd64.go | 2497 + .../sonic/internal/native/avx2/get_by_path.go | 35 + .../internal/native/avx2/get_by_path_subr.go | 46 + .../native/avx2/get_by_path_text_amd64.go | 6241 ++ .../sonic/internal/native/avx2/html_escape.go | 34 + .../internal/native/avx2/html_escape_subr.go | 45 + .../native/avx2/html_escape_text_amd64.go | 833 + .../sonic/internal/native/avx2/i64toa.go | 35 + .../sonic/internal/native/avx2/i64toa_subr.go | 47 + .../internal/native/avx2/i64toa_text_amd64.go | 631 + .../internal/native/avx2/lookup_small_key.go | 37 + .../native/avx2/lookup_small_key_subr.go | 45 + .../avx2/lookup_small_key_text_amd64.go | 216 + .../sonic/internal/native/avx2/lspace.go | 35 + .../sonic/internal/native/avx2/lspace_subr.go | 38 + .../internal/native/avx2/lspace_text_amd64.go | 113 + .../internal/native/avx2/native_amd64.go | 191 - .../internal/native/avx2/native_export.go | 51 + .../internal/native/avx2/native_subr_amd64.go | 610 - .../internal/native/avx2/native_text_amd64.go | 14800 ---- .../native/avx2/parse_with_padding.go | 36 + .../native/avx2/parse_with_padding_subr.go | 46 + .../avx2/parse_with_padding_text_amd64.go | 15284 ++++ .../sonic/internal/native/avx2/quote.go | 33 + .../sonic/internal/native/avx2/quote_subr.go | 46 + .../internal/native/avx2/quote_text_amd64.go | 1349 + .../sonic/internal/native/avx2/skip_array.go | 35 + .../internal/native/avx2/skip_array_subr.go | 46 + .../native/avx2/skip_array_text_amd64.go | 4348 + .../sonic/internal/native/avx2/skip_number.go | 34 + .../internal/native/avx2/skip_number_subr.go | 46 + .../native/avx2/skip_number_text_amd64.go | 494 + .../sonic/internal/native/avx2/skip_object.go | 35 + .../internal/native/avx2/skip_object_subr.go | 46 + .../native/avx2/skip_object_text_amd64.go | 4348 + .../sonic/internal/native/avx2/skip_one.go | 35 + .../internal/native/avx2/skip_one_fast.go | 35 + .../native/avx2/skip_one_fast_subr.go | 46 + .../native/avx2/skip_one_fast_text_amd64.go | 957 + .../internal/native/avx2/skip_one_subr.go | 46 + .../native/avx2/skip_one_text_amd64.go | 3719 + .../sonic/internal/native/avx2/u64toa.go | 34 + .../sonic/internal/native/avx2/u64toa_subr.go | 39 + .../internal/native/avx2/u64toa_text_amd64.go | 364 + .../sonic/internal/native/avx2/unquote.go | 34 + .../internal/native/avx2/unquote_subr.go | 46 + .../native/avx2/unquote_text_amd64.go | 594 + .../internal/native/avx2/validate_one.go | 35 + .../internal/native/avx2/validate_one_subr.go | 46 + .../native/avx2/validate_one_text_amd64.go | 4348 + .../internal/native/avx2/validate_utf8.go | 37 + .../native/avx2/validate_utf8_fast.go | 34 + .../native/avx2/validate_utf8_fast_subr.go | 42 + .../avx2/validate_utf8_fast_text_amd64.go | 752 + .../native/avx2/validate_utf8_subr.go | 44 + .../native/avx2/validate_utf8_text_amd64.go | 193 + .../sonic/internal/native/avx2/value.go | 33 + .../sonic/internal/native/avx2/value_subr.go | 46 + .../internal/native/avx2/value_text_amd64.go | 5660 ++ .../sonic/internal/native/avx2/vnumber.go | 33 + .../internal/native/avx2/vnumber_subr.go | 46 + .../native/avx2/vnumber_text_amd64.go | 4283 + .../sonic/internal/native/avx2/vsigned.go | 33 + .../internal/native/avx2/vsigned_subr.go | 50 + .../native/avx2/vsigned_text_amd64.go | 120 + .../sonic/internal/native/avx2/vstring.go | 33 + .../internal/native/avx2/vstring_subr.go | 46 + .../native/avx2/vstring_text_amd64.go | 505 + .../sonic/internal/native/avx2/vunsigned.go | 33 + .../internal/native/avx2/vunsigned_subr.go | 57 + .../native/avx2/vunsigned_text_amd64.go | 127 + .../sonic/internal/native/dispatch_amd64.go | 171 +- .../sonic/internal/native/dispatch_arm64.go | 169 + .../sonic/internal/native/f32toa.tmpl | 35 + .../sonic/internal/native/f64toa.tmpl | 35 + ...at_amd64_test.tmpl => fastfloat_test.tmpl} | 3 + ...tint_amd64_test.tmpl => fastint_test.tmpl} | 3 + .../sonic/internal/native/get_by_path.tmpl | 35 + .../sonic/internal/native/html_escape.tmpl | 34 + .../sonic/internal/native/i64toa.tmpl | 35 + .../internal/native/lookup_small_key.tmpl | 37 + .../sonic/internal/native/lspace.tmpl | 35 + .../sonic/internal/native/native_amd64.tmpl | 189 - .../sonic/internal/native/native_export.tmpl | 51 + ...ative_amd64_test.tmpl => native_test.tmpl} | 13 +- .../internal/native/neon/f32toa_arm64.go | 29 + .../sonic/internal/native/neon/f32toa_arm64.s | 1019 + .../internal/native/neon/f32toa_subr_arm64.go | 25 + .../internal/native/neon/f64toa_arm64.go | 29 + .../sonic/internal/native/neon/f64toa_arm64.s | 2492 + .../internal/native/neon/f64toa_subr_arm64.go | 25 + .../internal/native/neon/get_by_path_arm64.go | 33 + .../internal/native/neon/get_by_path_arm64.s | 5627 ++ .../native/neon/get_by_path_subr_arm64.go | 25 + .../neon/html_escape_arm64.go} | 35 +- .../internal/native/neon/html_escape_arm64.s | 1406 + .../native/neon/html_escape_subr_arm64.go | 25 + .../internal/native/neon/i64toa_arm64.go | 29 + .../sonic/internal/native/neon/i64toa_arm64.s | 985 + .../internal/native/neon/i64toa_subr_arm64.go | 25 + .../native/neon/lookup_small_key_arm64.go | 31 + .../native/neon/lookup_small_key_arm64.s | 354 + .../neon/lookup_small_key_subr_arm64.go | 25 + .../internal/native/neon/lspace_arm64.go | 35 + .../sonic/internal/native/neon/lspace_arm64.s | 68 + .../internal/native/neon/lspace_subr_arm64.go | 25 + .../native/neon/native_export_arm64.go | 51 + .../native/neon/parse_with_padding_arm64.go | 30 + .../native/neon/parse_with_padding_arm64.s | 14122 ++++ .../neon/parse_with_padding_subr_arm64.go | 25 + .../sonic/internal/native/neon/quote_arm64.go | 35 + .../sonic/internal/native/neon/quote_arm64.s | 2563 + .../internal/native/neon/quote_subr_arm64.go | 25 + .../internal/native/neon/skip_array_arm64.go | 35 + .../internal/native/neon/skip_array_arm64.s | 3481 + .../native/neon/skip_array_subr_arm64.go | 25 + .../internal/native/neon/skip_number_arm64.go | 29 + .../internal/native/neon/skip_number_arm64.s | 373 + .../native/neon/skip_number_subr_arm64.go | 25 + .../internal/native/neon/skip_object_arm64.go | 35 + .../internal/native/neon/skip_object_arm64.s | 3481 + .../native/neon/skip_object_subr_arm64.go | 25 + .../internal/native/neon/skip_one_arm64.go | 35 + .../internal/native/neon/skip_one_arm64.s | 3294 + .../native/neon/skip_one_fast_arm64.go | 29 + .../native/neon/skip_one_fast_arm64.s | 998 + .../native/neon/skip_one_fast_subr_arm64.go | 25 + .../native/neon/skip_one_subr_arm64.go | 25 + .../internal/native/neon/u64toa_arm64.go | 29 + .../sonic/internal/native/neon/u64toa_arm64.s | 598 + .../internal/native/neon/u64toa_subr_arm64.go | 25 + .../internal/native/neon/unquote_arm64.go | 35 + .../internal/native/neon/unquote_arm64.s | 615 + .../native/neon/unquote_subr_arm64.go | 25 + .../native/neon/validate_one_arm64.go | 35 + .../internal/native/neon/validate_one_arm64.s | 3480 + .../native/neon/validate_one_subr_arm64.go | 25 + .../native/neon/validate_utf8_arm64.go | 36 + .../native/neon/validate_utf8_arm64.s | 222 + .../native/neon/validate_utf8_fast_arm64.go | 29 + .../native/neon/validate_utf8_fast_arm64.s | 182 + .../neon/validate_utf8_fast_subr_arm64.go | 25 + .../native/neon/validate_utf8_subr_arm64.go | 25 + .../sonic/internal/native/neon/value_arm64.go | 34 + .../sonic/internal/native/neon/value_arm64.s | 5968 ++ .../internal/native/neon/value_subr_arm64.go | 25 + .../internal/native/neon/vnumber_arm64.go | 34 + .../internal/native/neon/vnumber_arm64.s | 4690 ++ .../native/neon/vnumber_subr_arm64.go | 25 + .../internal/native/neon/vsigned_arm64.go | 32 + .../internal/native/neon/vsigned_arm64.s | 156 + .../native/neon/vsigned_subr_arm64.go | 25 + .../internal/native/neon/vstring_arm64.go | 32 + .../internal/native/neon/vstring_arm64.s | 628 + .../native/neon/vstring_subr_arm64.go | 25 + .../internal/native/neon/vunsigned_arm64.go | 32 + .../internal/native/neon/vunsigned_arm64.s | 150 + .../native/neon/vunsigned_subr_arm64.go | 25 + .../internal/native/parse_with_padding.tmpl | 36 + .../sonic/internal/native/quote.tmpl | 33 + .../internal/native/recover_amd64_test.tmpl | 697 - .../sonic/internal/native/recover_test.tmpl | 722 + .../sonic/internal/native/skip_array.tmpl | 35 + .../sonic/internal/native/skip_number.tmpl | 34 + .../sonic/internal/native/skip_object.tmpl | 35 + .../sonic/internal/native/skip_one.tmpl | 35 + .../sonic/internal/native/skip_one_fast.tmpl | 35 + .../sonic/internal/native/sse/f32toa.go | 35 + .../sonic/internal/native/sse/f32toa_subr.go | 46 + .../internal/native/sse/f32toa_text_amd64.go | 1052 + .../sonic/internal/native/sse/f64toa.go | 35 + .../sonic/internal/native/sse/f64toa_subr.go | 46 + .../internal/native/sse/f64toa_text_amd64.go | 2486 + .../sonic/internal/native/sse/get_by_path.go | 35 + .../internal/native/sse/get_by_path_subr.go | 46 + .../native/sse/get_by_path_text_amd64.go | 6154 ++ .../sonic/internal/native/sse/html_escape.go | 34 + .../internal/native/sse/html_escape_subr.go | 45 + .../native/sse/html_escape_text_amd64.go | 635 + .../sonic/internal/native/sse/i64toa.go | 35 + .../sonic/internal/native/sse/i64toa_subr.go | 47 + .../internal/native/sse/i64toa_text_amd64.go | 658 + .../internal/native/sse/lookup_small_key.go | 37 + .../native/sse/lookup_small_key_subr.go | 45 + .../native/sse/lookup_small_key_text_amd64.go | 239 + .../sonic/internal/native/sse/lspace.go | 35 + .../sonic/internal/native/sse/lspace_subr.go | 33 + .../internal/native/sse/lspace_text_amd64.go | 37 + .../sonic/internal/native/sse/native_amd64.go | 191 - .../internal/native/sse/native_export.go | 51 + .../internal/native/sse/native_subr_amd64.go | 604 - .../internal/native/sse/native_text_amd64.go | 14266 ---- .../internal/native/sse/parse_with_padding.go | 36 + .../native/sse/parse_with_padding_subr.go | 46 + .../sse/parse_with_padding_text_amd64.go | 15013 ++++ .../sonic/internal/native/sse/quote.go | 33 + .../sonic/internal/native/sse/quote_subr.go | 46 + .../internal/native/sse/quote_text_amd64.go | 1100 + .../sonic/internal/native/sse/skip_array.go | 35 + .../internal/native/sse/skip_array_subr.go | 46 + .../native/sse/skip_array_text_amd64.go | 4017 + .../sonic/internal/native/sse/skip_number.go | 34 + .../internal/native/sse/skip_number_subr.go | 46 + .../native/sse/skip_number_text_amd64.go | 351 + .../sonic/internal/native/sse/skip_object.go | 35 + .../internal/native/sse/skip_object_subr.go | 46 + .../native/sse/skip_object_text_amd64.go | 4017 + .../sonic/internal/native/sse/skip_one.go | 35 + .../internal/native/sse/skip_one_fast.go | 35 + .../internal/native/sse/skip_one_fast_subr.go | 46 + .../native/sse/skip_one_fast_text_amd64.go | 1037 + .../internal/native/sse/skip_one_subr.go | 46 + .../native/sse/skip_one_text_amd64.go | 3844 + .../sonic/internal/native/sse/u64toa.go | 34 + .../sonic/internal/native/sse/u64toa_subr.go | 39 + .../internal/native/sse/u64toa_text_amd64.go | 384 + .../sonic/internal/native/sse/unquote.go | 34 + .../sonic/internal/native/sse/unquote_subr.go | 46 + .../internal/native/sse/unquote_text_amd64.go | 538 + .../sonic/internal/native/sse/validate_one.go | 35 + .../internal/native/sse/validate_one_subr.go | 46 + .../native/sse/validate_one_text_amd64.go | 4017 + .../internal/native/sse/validate_utf8.go | 37 + .../internal/native/sse/validate_utf8_fast.go | 34 + .../native/sse/validate_utf8_fast_subr.go | 41 + .../sse/validate_utf8_fast_text_amd64.go | 158 + .../internal/native/sse/validate_utf8_subr.go | 44 + .../native/sse/validate_utf8_text_amd64.go | 193 + .../sonic/internal/native/sse/value.go | 33 + .../sonic/internal/native/sse/value_subr.go | 46 + .../internal/native/sse/value_text_amd64.go | 5439 ++ .../sonic/internal/native/sse/vnumber.go | 33 + .../sonic/internal/native/sse/vnumber_subr.go | 46 + .../internal/native/sse/vnumber_text_amd64.go | 4161 + .../sonic/internal/native/sse/vsigned.go | 33 + .../sonic/internal/native/sse/vsigned_subr.go | 50 + .../internal/native/sse/vsigned_text_amd64.go | 120 + .../sonic/internal/native/sse/vstring.go | 33 + .../sonic/internal/native/sse/vstring_subr.go | 46 + .../internal/native/sse/vstring_text_amd64.go | 653 + .../sonic/internal/native/sse/vunsigned.go | 33 + .../internal/native/sse/vunsigned_subr.go | 57 + .../native/sse/vunsigned_text_amd64.go | 127 + .../internal/native/traceback_test.mock_tmpl | 379 + .../sonic/internal/native/types/types.go | 9 +- .../sonic/internal/native/u64toa.tmpl | 34 + .../sonic/internal/native/unquote.tmpl | 34 + .../sonic/internal/native/validate_one.tmpl | 35 + .../sonic/internal/native/validate_utf8.tmpl | 37 + .../internal/native/validate_utf8_fast.tmpl | 34 + .../sonic/internal/native/value.tmpl | 33 + .../sonic/internal/native/vnumber.tmpl | 33 + .../sonic/internal/native/vsigned.tmpl | 33 + .../sonic/internal/native/vstring.tmpl | 33 + .../sonic/internal/native/vunsigned.tmpl | 33 + .../internal/{encoder => optcaching}/asm.s | 0 .../sonic/internal/optcaching/fcache.go | 362 + .../bytedance/sonic/internal/rt/asm_amd64.s | 40 - .../bytedance/sonic/internal/rt/assertI2I.go | 42 + .../sonic/internal/rt/base64_amd64.go | 20 + .../sonic/internal/rt/base64_compat.go | 20 + .../bytedance/sonic/internal/rt/fastconv.go | 175 + .../bytedance/sonic/internal/rt/fastmem.go | 39 +- .../bytedance/sonic/internal/rt/fastvalue.go | 244 +- .../bytedance/sonic/internal/rt/gcwb.go | 55 +- .../sonic/internal/rt/gcwb_legacy.go | 29 + .../bytedance/sonic/internal/rt/growslice.go | 36 + .../sonic/internal/rt/growslice_legacy.go | 27 + .../bytedance/sonic/internal/rt/int48.go | 6 +- .../bytedance/sonic/internal/rt/pool.go | 31 + .../bytedance/sonic/internal/rt/stackmap.go | 2 +- .../bytedance/sonic/internal/rt/stubs.go | 165 + .../bytedance/sonic/internal/rt/table.go | 118 + .../bytedance/sonic/internal/rt/types.go | 45 + .../sonic/loader}/LICENSE | 0 .../bytedance/sonic/loader/funcdata_compat.go | 6 +- .../bytedance/sonic/loader/funcdata_go116.go | 6 +- .../bytedance/sonic/loader/funcdata_go118.go | 2 +- .../bytedance/sonic/loader/funcdata_go120.go | 2 +- .../bytedance/sonic/loader/funcdata_go121.go | 6 +- .../bytedance/sonic/loader/funcdata_go123.go | 118 + .../bytedance/sonic/loader/funcdata_latest.go | 6 +- .../sonic/{ => loader}/internal/abi/abi.go | 2 +- .../{ => loader}/internal/abi/abi_amd64.go | 2 +- .../internal/abi/abi_legacy_amd64.go | 2 +- .../internal/abi/abi_regabi_amd64.go | 2 +- .../sonic/{ => loader}/internal/abi/stubs.go | 2 +- .../sonic/loader/internal/rt/fastmem.go | 62 + .../sonic/loader/internal/rt/fastvalue.go | 183 + .../sonic/loader/internal/rt/stackmap.go | 181 + .../bytedance/sonic/loader/loader_latest.go | 5 +- .../bytedance/sonic/loader/mmap_unix.go | 4 +- .../bytedance/sonic/loader/stubs.go | 34 +- .../bytedance/sonic/loader/wrapper.go | 4 +- .../bytedance/sonic/option/option.go | 12 +- vendor/github.com/bytedance/sonic/sonic.go | 55 +- .../bytedance/sonic/unquote/unquote.go | 31 +- .../github.com/bytedance/sonic/utf8/utf8.go | 12 +- vendor/github.com/cespare/xxhash/v2/README.md | 33 +- .../github.com/cespare/xxhash/v2/testall.sh | 10 + vendor/github.com/cespare/xxhash/v2/xxhash.go | 70 +- .../cespare/xxhash/v2/xxhash_amd64.s | 336 +- .../cespare/xxhash/v2/xxhash_arm64.s | 183 + .../v2/{xxhash_amd64.go => xxhash_asm.go} | 4 +- .../cespare/xxhash/v2/xxhash_other.go | 24 +- .../cespare/xxhash/v2/xxhash_safe.go | 3 +- .../cespare/xxhash/v2/xxhash_unsafe.go | 5 +- .../github.com/chenzhuoyu/base64x/.gitignore | 43 - .../github.com/chenzhuoyu/base64x/.gitmodules | 3 - vendor/github.com/chenzhuoyu/base64x/cpuid.go | 17 - .../github.com/chenzhuoyu/iasm/expr/pools.go | 26 - .../github.com/chenzhuoyu/iasm/expr/term.go | 7 - .../github.com/chenzhuoyu/iasm/x86_64/asm.s | 0 .../chenzhuoyu/iasm/x86_64/operands.go | 510 - .../chenzhuoyu/iasm/x86_64/pools.go | 117 - .../chenzhuoyu/iasm/x86_64/program.go | 542 - .../github.com/cloudwego/base64x/.gitignore | 30 + .../cloudwego/base64x/.golangci.yaml | 37 + .../cloudwego/base64x/.licenserc.yaml | 14 + .../cloudwego/base64x/CODE_OF_CONDUCT.md | 128 + .../cloudwego/base64x/CONTRIBUTING.md | 55 + .../iasm => cloudwego/base64x}/LICENSE | 0 .../cloudwego/base64x/LICENSE-APACHE} | 24 - .../base64x/Makefile | 0 .../base64x/README.md | 0 .../github.com/cloudwego/base64x/_typos.toml | 4 + .../base64x/base64x.go | 16 + .../cloudwego/base64x/check_branch_name.sh | 10 + vendor/github.com/cloudwego/base64x/cpuid.go | 33 + .../base64x/faststr.go | 16 + .../base64x/native_amd64.go | 16 + .../base64x/native_subr_amd64.go | 0 .../base64x/native_text_amd64.go | 0 .../github.com/cloudwego/iasm/LICENSE-APACHE | 177 + .../iasm/expr/ast.go | 16 + .../iasm/expr/errors.go | 16 + .../iasm/expr/ops.go | 16 + .../iasm/expr/parser.go | 16 + .../iasm/expr/pools.go} | 47 +- vendor/github.com/cloudwego/iasm/expr/term.go | 23 + .../iasm/expr/utils.go | 16 + .../iasm/x86_64/arch.go | 16 + vendor/github.com/cloudwego/iasm/x86_64/asm.s | 16 + .../iasm/x86_64/assembler.go | 18 +- .../iasm/x86_64/assembler_alias.go | 16 + .../iasm/x86_64/eface.go | 16 + .../iasm/x86_64/encodings.go | 16 + .../iasm/x86_64/instructions.go | 16 + .../iasm/x86_64/instructions_table.go | 16 + .../cloudwego/iasm/x86_64/operands.go | 665 + .../github.com/cloudwego/iasm/x86_64/pools.go | 54 + .../cloudwego/iasm/x86_64/program.go | 584 + .../iasm/x86_64/registers.go | 16 + .../iasm/x86_64/utils.go | 16 + vendor/github.com/creack/pty/.editorconfig | 54 + vendor/github.com/creack/pty/.golangci.yml | 324 + .../github.com/creack/pty/Dockerfile.golang | 2 +- vendor/github.com/creack/pty/Dockerfile.riscv | 23 - vendor/github.com/creack/pty/README.md | 2 +- vendor/github.com/creack/pty/ioctl.go | 31 +- vendor/github.com/creack/pty/ioctl_inner.go | 20 + vendor/github.com/creack/pty/ioctl_legacy.go | 10 + vendor/github.com/creack/pty/ioctl_solaris.go | 2 +- .../creack/pty/ioctl_unsupported.go | 2 +- vendor/github.com/creack/pty/pty_darwin.go | 6 +- vendor/github.com/creack/pty/pty_dragonfly.go | 10 +- vendor/github.com/creack/pty/pty_freebsd.go | 8 +- vendor/github.com/creack/pty/pty_linux.go | 6 +- vendor/github.com/creack/pty/pty_netbsd.go | 4 +- vendor/github.com/creack/pty/pty_openbsd.go | 17 +- vendor/github.com/creack/pty/pty_solaris.go | 41 +- .../creack/pty/test_crosscompile.sh | 8 +- vendor/github.com/creack/pty/winsize.go | 5 +- vendor/github.com/creack/pty/winsize_unix.go | 12 +- .../creack/pty/ztypes_freebsd_riscv64.go | 13 + .../creack/pty/ztypes_openbsd_32bit_int.go | 3 +- vendor/github.com/creack/pty/ztypes_ppc.go | 9 + vendor/github.com/creack/pty/ztypes_sparcx.go | 12 + .../github.com/dustin/go-humanize/.travis.yml | 16 +- .../dustin/go-humanize/README.markdown | 2 +- .../github.com/dustin/go-humanize/bigbytes.go | 20 +- .../github.com/dustin/go-humanize/commaf.go | 1 + vendor/github.com/dustin/go-humanize/ftoa.go | 3 + .../github.com/dustin/go-humanize/number.go | 2 +- vendor/github.com/dustin/go-humanize/si.go | 4 + vendor/github.com/fatih/color/color.go | 61 +- .../flopp/go-coordsparser/.travis.yml | 3 - .../flopp/go-coordsparser/README.md | 1 - .../flopp/go-coordsparser/renovate.json | 6 + .../github.com/flopp/go-staticmaps/.gitignore | 1 + .../github.com/flopp/go-staticmaps/Makefile | 19 + .../github.com/flopp/go-staticmaps/color.go | 54 +- .../github.com/flopp/go-staticmaps/context.go | 38 +- .../github.com/flopp/go-staticmaps/marker.go | 6 +- .../flopp/go-staticmaps/renovate.json | 6 + .../flopp/go-staticmaps/tile_fetcher.go | 5 +- .../flopp/go-staticmaps/tile_provider.go | 53 +- .../gabriel-vasile/mimetype/LICENSE | 2 +- .../gabriel-vasile/mimetype/README.md | 8 +- .../mimetype/internal/json/json.go | 27 +- .../mimetype/internal/magic/archive.go | 104 +- .../mimetype/internal/magic/binary.go | 40 +- .../mimetype/internal/magic/ftyp.go | 43 +- .../mimetype/internal/magic/magic.go | 16 +- .../mimetype/internal/magic/ms_office.go | 45 +- .../mimetype/internal/magic/text.go | 46 +- .../mimetype/internal/magic/text_csv.go | 60 +- .../mimetype/internal/magic/zip.go | 90 +- .../gabriel-vasile/mimetype/mimetype.gif | Bin 1343793 -> 0 bytes .../gabriel-vasile/mimetype/mimetype.go | 11 +- .../mimetype/supported_mimes.md | 29 +- .../gabriel-vasile/mimetype/tree.go | 46 +- vendor/github.com/ghodss/yaml/.gitignore | 20 - vendor/github.com/ghodss/yaml/.travis.yml | 7 - vendor/github.com/ghodss/yaml/README.md | 121 - vendor/github.com/ghodss/yaml/fields.go | 501 - vendor/github.com/ghodss/yaml/yaml.go | 277 - vendor/github.com/gin-gonic/gin/.gitignore | 4 + vendor/github.com/gin-gonic/gin/.golangci.yml | 1 - .../github.com/gin-gonic/gin/.goreleaser.yaml | 29 +- vendor/github.com/gin-gonic/gin/Makefile | 1 + vendor/github.com/gin-gonic/gin/auth.go | 25 + .../gin-gonic/gin/binding/binding.go | 27 +- .../gin/binding/binding_nomsgpack.go | 3 +- .../gin/binding/default_validator.go | 5 +- .../gin-gonic/gin/binding/form_mapping.go | 28 + vendor/github.com/gin-gonic/gin/codecov.yml | 13 + vendor/github.com/gin-gonic/gin/context.go | 52 +- vendor/github.com/gin-gonic/gin/debug.go | 20 +- vendor/github.com/gin-gonic/gin/deprecated.go | 2 + vendor/github.com/gin-gonic/gin/gin.go | 37 +- vendor/github.com/gin-gonic/gin/logger.go | 53 +- .../github.com/gin-gonic/gin/render/render.go | 32 +- .../github.com/gin-gonic/gin/render/yaml.go | 2 +- vendor/github.com/gin-gonic/gin/tree.go | 43 +- vendor/github.com/gin-gonic/gin/version.go | 2 +- vendor/github.com/go-chi/chi/v5/CHANGELOG.md | 5 + .../github.com/go-chi/chi/v5/CONTRIBUTING.md | 12 +- vendor/github.com/go-chi/chi/v5/README.md | 2 + vendor/github.com/go-chi/chi/v5/context.go | 3 +- .../go-chi/chi/v5/middleware/maybe.go | 2 +- .../go-chi/chi/v5/middleware/realip.go | 2 +- .../go-chi/chi/v5/middleware/wrap_writer.go | 35 +- vendor/github.com/go-chi/chi/v5/mux.go | 16 + vendor/github.com/go-chi/chi/v5/path_value.go | 20 + .../go-chi/chi/v5/path_value_fallback.go | 19 + vendor/github.com/go-chi/httprate/README.md | 177 +- vendor/github.com/go-chi/httprate/httprate.go | 39 +- .../github.com/go-chi/httprate/limit_key.go | 15 + vendor/github.com/go-chi/httprate/limiter.go | 253 +- .../go-chi/httprate/local_counter.go | 78 + .../go-chi/httprate/local_counter_go1.20.go | 21 + .../go-chi/httprate/local_counter_go1.21.go | 25 + .../go-echarts/v2/actions/global.go | 29 - .../go-echarts/go-echarts/v2/charts/base.go | 100 +- .../go-echarts/go-echarts/v2/charts/funnel.go | 7 - .../go-echarts/go-echarts/v2/charts/gauge.go | 7 - .../go-echarts/go-echarts/v2/charts/geo.go | 7 - .../go-echarts/go-echarts/v2/charts/graph.go | 7 - .../go-echarts/go-echarts/v2/charts/liquid.go | 7 - .../go-echarts/go-echarts/v2/charts/map.go | 7 - .../go-echarts/v2/charts/parallel.go | 7 - .../go-echarts/go-echarts/v2/charts/pie.go | 7 - .../go-echarts/go-echarts/v2/charts/radar.go | 9 +- .../go-echarts/v2/charts/rectangle.go | 13 +- .../go-echarts/go-echarts/v2/charts/sankey.go | 7 - .../go-echarts/go-echarts/v2/charts/series.go | 252 +- .../go-echarts/v2/charts/sunburst.go | 7 - .../go-echarts/v2/charts/themeriver.go | 7 - .../go-echarts/go-echarts/v2/charts/tree.go | 7 - .../go-echarts/v2/charts/treemap.go | 7 - .../go-echarts/v2/charts/wordcloud.go | 9 +- .../go-echarts/go-echarts/v2/event/event.go | 35 + .../go-echarts/v2/opts/angle_axis.go | 8 + .../go-echarts/v2/opts/axis_pointer.go | 34 + .../go-echarts/go-echarts/v2/opts/brush.go | 20 + .../go-echarts/go-echarts/v2/opts/calendar.go | 173 + .../go-echarts/go-echarts/v2/opts/charts.go | 307 +- .../go-echarts/v2/opts/data_zoom.go | 71 + .../go-echarts/go-echarts/v2/opts/dataset.go | 9 + .../go-echarts/go-echarts/v2/opts/geo.go | 17 + .../go-echarts/go-echarts/v2/opts/gl_3d.go | 175 + .../go-echarts/go-echarts/v2/opts/global.go | 1661 +- .../go-echarts/go-echarts/v2/opts/grid.go | 32 + .../go-echarts/go-echarts/v2/opts/js.go | 52 + .../go-echarts/go-echarts/v2/opts/legend.go | 140 + .../go-echarts/go-echarts/v2/opts/parallel.go | 30 + .../go-echarts/v2/opts/parallel_axis.go | 35 + .../go-echarts/go-echarts/v2/opts/polar.go | 75 + .../go-echarts/v2/opts/primitivie.go | 19 + .../go-echarts/go-echarts/v2/opts/radar.go | 40 + .../go-echarts/v2/opts/radius_axis.go | 27 + .../go-echarts/go-echarts/v2/opts/series.go | 105 +- .../go-echarts/v2/opts/series_bar.go | 73 + .../v2/opts/series_effect_scatter.go | 47 + .../go-echarts/v2/opts/series_line.go | 84 + .../go-echarts/v2/opts/series_pie.go | 65 + .../go-echarts/v2/opts/series_radar.go | 34 + .../go-echarts/v2/opts/series_scatter.go | 62 + .../go-echarts/v2/opts/single_axis.go | 51 + .../go-echarts/v2/opts/text_style.go | 35 + .../go-echarts/go-echarts/v2/opts/title.go | 90 + .../go-echarts/go-echarts/v2/opts/toolbox.go | 205 + .../go-echarts/go-echarts/v2/opts/tooltip.go | 94 + .../go-echarts/v2/opts/visual_map.go | 104 + .../go-echarts/go-echarts/v2/opts/x_axis.go | 278 + .../go-echarts/go-echarts/v2/opts/y_axis.go | 107 + .../go-echarts/go-echarts/v2/render/chart.go | 79 + .../go-echarts/go-echarts/v2/render/engine.go | 86 +- .../go-echarts/go-echarts/v2/render/page.go | 42 + .../go-echarts/go-echarts/v2/render/render.go | 12 + .../go-echarts/v2/templates/base.tpl | 33 +- .../go-echarts/v2/templates/base_element.tpl | 3 + .../go-echarts/v2/templates/base_option.tpl | 1 + .../go-echarts/v2/templates/base_script.tpl | 1 + .../go-echarts/v2/templates/page.tpl | 6 + .../go-echarts/v2/templates/template.go | 9 + .../go-echarts/go-echarts/v2/types/lang.go | 15 - .../go-echarts/v2/types/orderedset.go | 6 + .../go-echarts/v2/types/primitivie.go | 13 + .../go-echarts/v2/util/default_val_setter.go | 42 + .../go-echarts/v2/util/id_gennerator.go | 31 + .../go-playground/validator/v10/Makefile | 6 +- .../go-playground/validator/v10/README.md | 139 +- .../go-playground/validator/v10/baked_in.go | 284 +- .../go-playground/validator/v10/cache.go | 30 +- .../validator/v10/country_codes.go | 2305 +- .../validator/v10/currency_codes.go | 148 +- .../go-playground/validator/v10/doc.go | 38 +- .../go-playground/validator/v10/errors.go | 8 +- .../go-playground/validator/v10/options.go | 26 + .../validator/v10/postcode_regexes.go | 12 +- .../go-playground/validator/v10/regexes.go | 158 +- .../go-playground/validator/v10/util.go | 22 +- .../go-playground/validator/v10/validator.go | 169 +- .../validator/v10/validator_instance.go | 42 +- .../go-task/slim-sprig/{ => v3}/.editorconfig | 0 .../slim-sprig/{ => v3}/.gitattributes | 0 .../go-task/slim-sprig/{ => v3}/.gitignore | 0 .../go-task/slim-sprig/{ => v3}/CHANGELOG.md | 19 + .../go-task/slim-sprig/{ => v3}/LICENSE.txt | 0 .../go-task/slim-sprig/{ => v3}/README.md | 2 +- .../go-task/slim-sprig/{ => v3}/Taskfile.yml | 2 +- .../go-task/slim-sprig/{ => v3}/crypto.go | 0 .../go-task/slim-sprig/{ => v3}/date.go | 0 .../go-task/slim-sprig/{ => v3}/defaults.go | 0 .../go-task/slim-sprig/{ => v3}/dict.go | 0 .../go-task/slim-sprig/{ => v3}/doc.go | 0 .../go-task/slim-sprig/{ => v3}/functions.go | 0 .../go-task/slim-sprig/{ => v3}/list.go | 0 .../go-task/slim-sprig/{ => v3}/network.go | 0 .../go-task/slim-sprig/{ => v3}/numeric.go | 0 .../go-task/slim-sprig/{ => v3}/reflect.go | 0 .../go-task/slim-sprig/{ => v3}/regex.go | 0 .../go-task/slim-sprig/{ => v3}/strings.go | 0 .../go-task/slim-sprig/{ => v3}/url.go | 0 vendor/github.com/gocarina/gocsv/decode.go | 62 +- vendor/github.com/goccy/go-json/.golangci.yml | 3 + vendor/github.com/goccy/go-json/Makefile | 2 +- vendor/github.com/goccy/go-json/encode.go | 4 +- .../goccy/go-json/internal/decoder/ptr.go | 1 + .../internal/decoder/unmarshal_text.go | 2 +- .../goccy/go-json/internal/encoder/compact.go | 2 +- .../go-json/internal/encoder/compiler.go | 2 +- .../goccy/go-json/internal/encoder/int.go | 24 + .../goccy/go-json/internal/encoder/string.go | 24 + .../goccy/go-json/internal/runtime/rtype.go | 1 - vendor/github.com/goccy/go-json/json.go | 35 +- vendor/github.com/golang/snappy/AUTHORS | 1 + vendor/github.com/golang/snappy/CONTRIBUTORS | 2 + vendor/github.com/golang/snappy/decode.go | 83 +- .../github.com/golang/snappy/encode_arm64.s | 4 +- .../flatbuffers/{LICENSE.txt => LICENSE} | 0 .../google/flatbuffers/go/builder.go | 72 +- .../google/flatbuffers/go/encode.go | 2 +- .../github.com/google/flatbuffers/go/lib.go | 37 + .../github.com/google/pprof/profile/encode.go | 8 + .../pprof/profile/legacy_java_profile.go | 4 +- .../github.com/google/pprof/profile/merge.go | 9 +- .../google/pprof/profile/profile.go | 23 +- vendor/github.com/google/uuid/CHANGELOG.md | 31 + vendor/github.com/google/uuid/CONTRIBUTING.md | 2 +- vendor/github.com/google/uuid/hash.go | 6 + vendor/github.com/google/uuid/time.go | 21 +- vendor/github.com/google/uuid/uuid.go | 79 +- vendor/github.com/google/uuid/version6.go | 56 + vendor/github.com/google/uuid/version7.go | 104 + .../gorilla/securecookie/.editorconfig | 20 + .../gorilla/securecookie/.gitignore | 1 + .../gorilla/securecookie/.travis.yml | 19 - .../github.com/gorilla/securecookie/LICENSE | 2 +- .../github.com/gorilla/securecookie/Makefile | 39 + .../github.com/gorilla/securecookie/README.md | 76 +- .../github.com/gorilla/securecookie/fuzz.go | 25 - .../gorilla/securecookie/securecookie.go | 45 +- vendor/github.com/hashicorp/yamux/LICENSE | 2 + vendor/github.com/hashicorp/yamux/mux.go | 8 +- vendor/github.com/hashicorp/yamux/session.go | 21 +- vendor/github.com/hashicorp/yamux/spec.md | 2 +- vendor/github.com/hashicorp/yamux/stream.go | 34 +- vendor/github.com/hashicorp/yamux/util.go | 7 + vendor/github.com/itchyny/gojq/.dockerignore | 1 - vendor/github.com/itchyny/gojq/.gitattributes | 3 +- vendor/github.com/itchyny/gojq/.gitignore | 1 - vendor/github.com/itchyny/gojq/CHANGELOG.md | 37 + vendor/github.com/itchyny/gojq/Dockerfile | 4 +- vendor/github.com/itchyny/gojq/LICENSE | 2 +- vendor/github.com/itchyny/gojq/README.md | 22 +- vendor/github.com/itchyny/gojq/_gojq | 8 +- vendor/github.com/itchyny/gojq/builtin.go | 120 +- vendor/github.com/itchyny/gojq/builtin.jq | 9 +- vendor/github.com/itchyny/gojq/compare.go | 10 +- vendor/github.com/itchyny/gojq/compiler.go | 98 +- vendor/github.com/itchyny/gojq/debug.go | 17 +- vendor/github.com/itchyny/gojq/error.go | 37 +- vendor/github.com/itchyny/gojq/execute.go | 26 +- vendor/github.com/itchyny/gojq/func.go | 114 +- vendor/github.com/itchyny/gojq/go.dev.mod | 6 +- vendor/github.com/itchyny/gojq/go.dev.sum | 8 +- vendor/github.com/itchyny/gojq/lexer.go | 63 +- .../github.com/itchyny/gojq/module_loader.go | 106 +- vendor/github.com/itchyny/gojq/operator.go | 32 +- vendor/github.com/itchyny/gojq/parser.go | 1196 +- vendor/github.com/itchyny/gojq/parser.go.y | 286 +- vendor/github.com/itchyny/gojq/query.go | 71 +- vendor/github.com/itchyny/gojq/release.go | 10 +- .../itchyny/timefmt-go/CHANGELOG.md | 5 + .../github.com/itchyny/timefmt-go/README.md | 6 +- .../github.com/itchyny/timefmt-go/format.go | 42 +- vendor/github.com/itchyny/timefmt-go/parse.go | 260 +- .../jackc/chunkreader/v2/.travis.yml | 9 - .../github.com/jackc/chunkreader/v2/README.md | 8 - .../jackc/chunkreader/v2/chunkreader.go | 104 - vendor/github.com/jackc/pgconn/.gitignore | 3 - vendor/github.com/jackc/pgconn/CHANGELOG.md | 177 - vendor/github.com/jackc/pgconn/LICENSE | 22 - vendor/github.com/jackc/pgconn/README.md | 62 - vendor/github.com/jackc/pgconn/doc.go | 29 - .../github.com/jackc/pgconn/stmtcache/lru.go | 169 - .../jackc/pgconn/stmtcache/stmtcache.go | 58 - vendor/github.com/jackc/pgio/.travis.yml | 9 - vendor/github.com/jackc/pgio/LICENSE | 22 - vendor/github.com/jackc/pgio/README.md | 11 - .../github.com/jackc/pgproto3/v2/.travis.yml | 9 - vendor/github.com/jackc/pgproto3/v2/LICENSE | 22 - vendor/github.com/jackc/pgproto3/v2/README.md | 18 - .../jackc/pgproto3/v2/chunkreader.go | 19 - vendor/github.com/jackc/pgproto3/v2/doc.go | 4 - .../github.com/jackc/pgproto3/v2/frontend.go | 210 - .../jackc/pgservicefile/.travis.yml | 9 - .../github.com/jackc/pgservicefile/README.md | 5 +- .../jackc/pgservicefile/pgservicefile.go | 4 +- vendor/github.com/jackc/pgtype/CHANGELOG.md | 164 - vendor/github.com/jackc/pgtype/README.md | 14 - vendor/github.com/jackc/pgtype/aclitem.go | 138 - .../github.com/jackc/pgtype/aclitem_array.go | 428 - vendor/github.com/jackc/pgtype/array_type.go | 353 - vendor/github.com/jackc/pgtype/bit.go | 45 - vendor/github.com/jackc/pgtype/bool.go | 217 - vendor/github.com/jackc/pgtype/bool_array.go | 517 - vendor/github.com/jackc/pgtype/box.go | 165 - vendor/github.com/jackc/pgtype/bpchar.go | 93 - .../github.com/jackc/pgtype/bpchar_array.go | 517 - vendor/github.com/jackc/pgtype/bytea.go | 163 - vendor/github.com/jackc/pgtype/bytea_array.go | 489 - vendor/github.com/jackc/pgtype/cid.go | 61 - vendor/github.com/jackc/pgtype/cidr.go | 43 - vendor/github.com/jackc/pgtype/cidr_array.go | 546 - vendor/github.com/jackc/pgtype/circle.go | 150 - .../jackc/pgtype/composite_fields.go | 107 - .../github.com/jackc/pgtype/composite_type.go | 682 - vendor/github.com/jackc/pgtype/convert.go | 476 - .../github.com/jackc/pgtype/database_sql.go | 41 - vendor/github.com/jackc/pgtype/date.go | 324 - vendor/github.com/jackc/pgtype/date_array.go | 518 - vendor/github.com/jackc/pgtype/daterange.go | 267 - vendor/github.com/jackc/pgtype/enum_array.go | 428 - vendor/github.com/jackc/pgtype/enum_type.go | 168 - vendor/github.com/jackc/pgtype/float4.go | 282 - .../github.com/jackc/pgtype/float4_array.go | 517 - vendor/github.com/jackc/pgtype/float8.go | 272 - .../github.com/jackc/pgtype/float8_array.go | 517 - .../github.com/jackc/pgtype/generic_binary.go | 39 - .../github.com/jackc/pgtype/generic_text.go | 39 - vendor/github.com/jackc/pgtype/hstore.go | 465 - .../github.com/jackc/pgtype/hstore_array.go | 489 - vendor/github.com/jackc/pgtype/inet.go | 304 - vendor/github.com/jackc/pgtype/inet_array.go | 546 - vendor/github.com/jackc/pgtype/int2.go | 321 - vendor/github.com/jackc/pgtype/int2_array.go | 909 - vendor/github.com/jackc/pgtype/int4.go | 312 - vendor/github.com/jackc/pgtype/int4_array.go | 909 - .../jackc/pgtype/int4_multirange.go | 239 - vendor/github.com/jackc/pgtype/int4range.go | 267 - vendor/github.com/jackc/pgtype/int8.go | 298 - vendor/github.com/jackc/pgtype/int8_array.go | 909 - .../jackc/pgtype/int8_multirange.go | 239 - vendor/github.com/jackc/pgtype/int8range.go | 267 - vendor/github.com/jackc/pgtype/interval.go | 257 - vendor/github.com/jackc/pgtype/json.go | 209 - vendor/github.com/jackc/pgtype/json_array.go | 546 - vendor/github.com/jackc/pgtype/jsonb.go | 85 - vendor/github.com/jackc/pgtype/jsonb_array.go | 546 - vendor/github.com/jackc/pgtype/line.go | 148 - vendor/github.com/jackc/pgtype/lseg.go | 165 - vendor/github.com/jackc/pgtype/ltree.go | 72 - vendor/github.com/jackc/pgtype/macaddr.go | 173 - .../github.com/jackc/pgtype/macaddr_array.go | 518 - vendor/github.com/jackc/pgtype/multirange.go | 83 - vendor/github.com/jackc/pgtype/name.go | 58 - .../github.com/jackc/pgtype/num_multirange.go | 239 - vendor/github.com/jackc/pgtype/numeric.go | 853 - .../github.com/jackc/pgtype/numeric_array.go | 685 - vendor/github.com/jackc/pgtype/numrange.go | 267 - vendor/github.com/jackc/pgtype/oid.go | 81 - vendor/github.com/jackc/pgtype/oid_value.go | 55 - vendor/github.com/jackc/pgtype/path.go | 195 - vendor/github.com/jackc/pgtype/pgtype.go | 1001 - vendor/github.com/jackc/pgtype/pguint32.go | 162 - vendor/github.com/jackc/pgtype/point.go | 214 - vendor/github.com/jackc/pgtype/polygon.go | 226 - vendor/github.com/jackc/pgtype/qchar.go | 152 - vendor/github.com/jackc/pgtype/record.go | 126 - .../github.com/jackc/pgtype/record_array.go | 318 - vendor/github.com/jackc/pgtype/text.go | 212 - vendor/github.com/jackc/pgtype/text_array.go | 517 - vendor/github.com/jackc/pgtype/tid.go | 156 - vendor/github.com/jackc/pgtype/time.go | 231 - vendor/github.com/jackc/pgtype/timestamp.go | 261 - .../jackc/pgtype/timestamp_array.go | 518 - vendor/github.com/jackc/pgtype/timestamptz.go | 322 - .../jackc/pgtype/timestamptz_array.go | 518 - vendor/github.com/jackc/pgtype/tsrange.go | 267 - .../github.com/jackc/pgtype/tsrange_array.go | 470 - vendor/github.com/jackc/pgtype/tstzrange.go | 267 - .../jackc/pgtype/tstzrange_array.go | 470 - .../jackc/pgtype/typed_array.go.erb | 512 - .../jackc/pgtype/typed_array_gen.sh | 31 - .../jackc/pgtype/typed_multirange.go.erb | 239 - .../jackc/pgtype/typed_multirange_gen.sh | 8 - .../jackc/pgtype/typed_range.go.erb | 269 - .../jackc/pgtype/typed_range_gen.sh | 7 - vendor/github.com/jackc/pgtype/unknown.go | 44 - vendor/github.com/jackc/pgtype/uuid.go | 231 - vendor/github.com/jackc/pgtype/uuid_array.go | 573 - vendor/github.com/jackc/pgtype/varbit.go | 133 - vendor/github.com/jackc/pgtype/varchar.go | 66 - .../github.com/jackc/pgtype/varchar_array.go | 517 - vendor/github.com/jackc/pgtype/xid.go | 64 - vendor/github.com/jackc/pgx/v4/CHANGELOG.md | 304 - vendor/github.com/jackc/pgx/v4/LICENSE | 22 - vendor/github.com/jackc/pgx/v4/README.md | 196 - vendor/github.com/jackc/pgx/v4/batch.go | 228 - vendor/github.com/jackc/pgx/v4/conn.go | 857 - vendor/github.com/jackc/pgx/v4/doc.go | 340 - .../jackc/pgx/v4/extended_query_builder.go | 161 - vendor/github.com/jackc/pgx/v4/go_stdlib.go | 61 - vendor/github.com/jackc/pgx/v4/logger.go | 107 - vendor/github.com/jackc/pgx/v4/messages.go | 23 - vendor/github.com/jackc/pgx/v4/rows.go | 351 - vendor/github.com/jackc/pgx/v4/values.go | 280 - .../jackc/pgx/{v4 => v5}/.gitignore | 3 + vendor/github.com/jackc/pgx/v5/CHANGELOG.md | 422 + .../github.com/jackc/pgx/v5/CONTRIBUTING.md | 121 + .../jackc/{pgtype => pgx/v5}/LICENSE | 0 vendor/github.com/jackc/pgx/v5/README.md | 174 + vendor/github.com/jackc/pgx/v5/Rakefile | 18 + vendor/github.com/jackc/pgx/v5/batch.go | 443 + vendor/github.com/jackc/pgx/v5/conn.go | 1413 + .../jackc/pgx/{v4 => v5}/copy_from.go | 127 +- .../github.com/jackc/pgx/v5/derived_types.go | 262 + vendor/github.com/jackc/pgx/v5/doc.go | 194 + .../jackc/pgx/v5/extended_query_builder.go | 146 + .../pgx/v5/internal/iobufpool/iobufpool.go | 70 + .../jackc/pgx/v5/internal/pgio/README.md | 6 + .../jackc/{ => pgx/v5/internal}/pgio/doc.go | 0 .../jackc/{ => pgx/v5/internal}/pgio/write.go | 0 .../{v4 => v5}/internal/sanitize/sanitize.go | 13 +- .../pgx/v5/internal/stmtcache/lru_cache.go | 112 + .../pgx/v5/internal/stmtcache/stmtcache.go | 45 + .../v5/internal/stmtcache/unlimited_cache.go | 77 + .../jackc/pgx/{v4 => v5}/large_objects.go | 76 +- vendor/github.com/jackc/pgx/v5/named_args.go | 295 + .../github.com/jackc/pgx/v5/pgconn/README.md | 29 + .../jackc/{ => pgx/v5}/pgconn/auth_scram.go | 20 +- .../jackc/{ => pgx/v5}/pgconn/config.go | 237 +- .../v5/pgconn}/ctxwatch/context_watcher.go | 25 +- .../jackc/{ => pgx/v5}/pgconn/defaults.go | 2 - .../{ => pgx/v5}/pgconn/defaults_windows.go | 2 - vendor/github.com/jackc/pgx/v5/pgconn/doc.go | 38 + .../jackc/{ => pgx/v5}/pgconn/errors.go | 134 +- .../v5/pgconn/internal/bgreader/bgreader.go | 139 + .../jackc/{ => pgx/v5}/pgconn/krb5.go | 9 +- .../jackc/{ => pgx/v5}/pgconn/pgconn.go | 1441 +- .../jackc/pgx/v5/pgproto3/README.md | 7 + .../authentication_cleartext_password.go | 2 +- .../v5/pgproto3}/authentication_gss.go | 2 +- .../pgproto3}/authentication_gss_continue.go | 2 +- .../pgproto3}/authentication_md5_password.go | 2 +- .../v5/pgproto3}/authentication_ok.go | 2 +- .../v5/pgproto3}/authentication_sasl.go | 9 +- .../pgproto3}/authentication_sasl_continue.go | 2 +- .../v5/pgproto3}/authentication_sasl_final.go | 2 +- .../v2 => pgx/v5/pgproto3}/backend.go | 102 +- .../v5/pgproto3}/backend_key_data.go | 2 +- .../v2 => pgx/v5/pgproto3}/big_endian.go | 0 .../{pgproto3/v2 => pgx/v5/pgproto3}/bind.go | 2 +- .../v2 => pgx/v5/pgproto3}/bind_complete.go | 0 .../v2 => pgx/v5/pgproto3}/cancel_request.go | 2 +- .../jackc/pgx/v5/pgproto3/chunkreader.go | 90 + .../{pgproto3/v2 => pgx/v5/pgproto3}/close.go | 0 .../v2 => pgx/v5/pgproto3}/close_complete.go | 0 .../v5/pgproto3}/command_complete.go | 5 +- .../v5/pgproto3}/copy_both_response.go | 2 +- .../v2 => pgx/v5/pgproto3}/copy_data.go | 0 .../v2 => pgx/v5/pgproto3}/copy_done.go | 0 .../v2 => pgx/v5/pgproto3}/copy_fail.go | 0 .../v5/pgproto3}/copy_in_response.go | 2 +- .../v5/pgproto3}/copy_out_response.go | 2 +- .../v2 => pgx/v5/pgproto3}/data_row.go | 12 +- .../v2 => pgx/v5/pgproto3}/describe.go | 0 .../github.com/jackc/pgx/v5/pgproto3/doc.go | 11 + .../v5/pgproto3}/empty_query_response.go | 0 .../v2 => pgx/v5/pgproto3}/error_response.go | 0 .../v2 => pgx/v5/pgproto3}/execute.go | 2 +- .../{pgproto3/v2 => pgx/v5/pgproto3}/flush.go | 0 .../jackc/pgx/v5/pgproto3/frontend.go | 454 + .../v2 => pgx/v5/pgproto3}/function_call.go | 2 +- .../v5/pgproto3}/function_call_response.go | 2 +- .../v2 => pgx/v5/pgproto3}/gss_enc_request.go | 2 +- .../v2 => pgx/v5/pgproto3}/gss_response.go | 0 .../v2 => pgx/v5/pgproto3}/no_data.go | 0 .../v2 => pgx/v5/pgproto3}/notice_response.go | 0 .../v5/pgproto3}/notification_response.go | 6 +- .../v5/pgproto3}/parameter_description.go | 2 +- .../v5/pgproto3}/parameter_status.go | 0 .../{pgproto3/v2 => pgx/v5/pgproto3}/parse.go | 2 +- .../v2 => pgx/v5/pgproto3}/parse_complete.go | 0 .../v5/pgproto3}/password_message.go | 0 .../v2 => pgx/v5/pgproto3}/pgproto3.go | 35 +- .../v5/pgproto3}/portal_suspended.go | 0 .../{pgproto3/v2 => pgx/v5/pgproto3}/query.go | 0 .../v2 => pgx/v5/pgproto3}/ready_for_query.go | 0 .../v2 => pgx/v5/pgproto3}/row_description.go | 2 +- .../v5/pgproto3}/sasl_initial_response.go | 11 +- .../v2 => pgx/v5/pgproto3}/sasl_response.go | 9 +- .../v2 => pgx/v5/pgproto3}/ssl_request.go | 2 +- .../v2 => pgx/v5/pgproto3}/startup_message.go | 6 +- .../{pgproto3/v2 => pgx/v5/pgproto3}/sync.go | 0 .../v2 => pgx/v5/pgproto3}/terminate.go | 0 .../github.com/jackc/pgx/v5/pgproto3/trace.go | 416 + .../jackc/{ => pgx/v5}/pgtype/array.go | 177 +- .../jackc/pgx/v5/pgtype/array_codec.go | 405 + vendor/github.com/jackc/pgx/v5/pgtype/bits.go | 210 + vendor/github.com/jackc/pgx/v5/pgtype/bool.go | 343 + vendor/github.com/jackc/pgx/v5/pgtype/box.go | 238 + .../jackc/pgx/v5/pgtype/builtin_wrappers.go | 952 + .../github.com/jackc/pgx/v5/pgtype/bytea.go | 255 + .../github.com/jackc/pgx/v5/pgtype/circle.go | 222 + .../jackc/pgx/v5/pgtype/composite.go | 602 + .../github.com/jackc/pgx/v5/pgtype/convert.go | 108 + vendor/github.com/jackc/pgx/v5/pgtype/date.go | 351 + vendor/github.com/jackc/pgx/v5/pgtype/doc.go | 191 + .../jackc/pgx/v5/pgtype/enum_codec.go | 109 + .../github.com/jackc/pgx/v5/pgtype/float4.go | 319 + .../github.com/jackc/pgx/v5/pgtype/float8.go | 365 + .../github.com/jackc/pgx/v5/pgtype/hstore.go | 486 + vendor/github.com/jackc/pgx/v5/pgtype/inet.go | 200 + vendor/github.com/jackc/pgx/v5/pgtype/int.go | 1980 + .../github.com/jackc/pgx/v5/pgtype/int.go.erb | 548 + .../jackc/pgx/v5/pgtype/int_test.go.erb | 93 + .../pgtype/integration_benchmark_test.go.erb | 62 + .../pgtype/integration_benchmark_test_gen.sh | 2 + .../jackc/pgx/v5/pgtype/interval.go | 297 + vendor/github.com/jackc/pgx/v5/pgtype/json.go | 223 + .../github.com/jackc/pgx/v5/pgtype/jsonb.go | 129 + vendor/github.com/jackc/pgx/v5/pgtype/line.go | 225 + vendor/github.com/jackc/pgx/v5/pgtype/lseg.go | 238 + .../github.com/jackc/pgx/v5/pgtype/ltree.go | 122 + .../github.com/jackc/pgx/v5/pgtype/macaddr.go | 162 + .../jackc/pgx/v5/pgtype/multirange.go | 443 + .../github.com/jackc/pgx/v5/pgtype/numeric.go | 823 + vendor/github.com/jackc/pgx/v5/pgtype/path.go | 272 + .../github.com/jackc/pgx/v5/pgtype/pgtype.go | 2065 + .../jackc/pgx/v5/pgtype/pgtype_default.go | 229 + .../github.com/jackc/pgx/v5/pgtype/point.go | 266 + .../github.com/jackc/pgx/v5/pgtype/polygon.go | 253 + .../github.com/jackc/pgx/v5/pgtype/qchar.go | 141 + .../jackc/{ => pgx/v5}/pgtype/range.go | 71 +- .../jackc/pgx/v5/pgtype/range_codec.go | 379 + .../jackc/pgx/v5/pgtype/record_codec.go | 125 + .../v5/pgtype/register_default_pg_types.go | 35 + .../register_default_pg_types_disabled.go | 6 + vendor/github.com/jackc/pgx/v5/pgtype/text.go | 223 + .../pgx/v5/pgtype/text_format_only_codec.go | 13 + vendor/github.com/jackc/pgx/v5/pgtype/tid.go | 241 + vendor/github.com/jackc/pgx/v5/pgtype/time.go | 274 + .../jackc/pgx/v5/pgtype/timestamp.go | 356 + .../jackc/pgx/v5/pgtype/timestamptz.go | 366 + .../github.com/jackc/pgx/v5/pgtype/uint32.go | 325 + vendor/github.com/jackc/pgx/v5/pgtype/uuid.go | 281 + vendor/github.com/jackc/pgx/v5/pgtype/xml.go | 198 + .../jackc/pgx/v5/pgxpool/batch_results.go | 52 + .../github.com/jackc/pgx/v5/pgxpool/conn.go | 134 + vendor/github.com/jackc/pgx/v5/pgxpool/doc.go | 27 + .../github.com/jackc/pgx/v5/pgxpool/pool.go | 717 + .../github.com/jackc/pgx/v5/pgxpool/rows.go | 116 + .../github.com/jackc/pgx/v5/pgxpool/stat.go | 84 + .../github.com/jackc/pgx/v5/pgxpool/tracer.go | 33 + vendor/github.com/jackc/pgx/v5/pgxpool/tx.go | 83 + vendor/github.com/jackc/pgx/v5/rows.go | 856 + .../jackc/pgx/{v4 => v5}/stdlib/sql.go | 392 +- vendor/github.com/jackc/pgx/v5/tracer.go | 107 + vendor/github.com/jackc/pgx/{v4 => v5}/tx.go | 216 +- vendor/github.com/jackc/pgx/v5/values.go | 63 + .../github.com/jackc/puddle/v2/CHANGELOG.md | 79 + .../jackc/{chunkreader => puddle}/v2/LICENSE | 2 +- vendor/github.com/jackc/puddle/v2/README.md | 80 + vendor/github.com/jackc/puddle/v2/context.go | 24 + vendor/github.com/jackc/puddle/v2/doc.go | 11 + .../puddle/v2/internal/genstack/gen_stack.go | 85 + .../puddle/v2/internal/genstack/stack.go | 39 + vendor/github.com/jackc/puddle/v2/log.go | 32 + vendor/github.com/jackc/puddle/v2/nanotime.go | 16 + vendor/github.com/jackc/puddle/v2/pool.go | 710 + .../jackc/puddle/v2/resource_list.go | 28 + vendor/github.com/jaypipes/ghw/Dockerfile | 6 +- vendor/github.com/jaypipes/ghw/Makefile | 31 +- vendor/github.com/jaypipes/ghw/README.md | 958 +- vendor/github.com/jaypipes/ghw/SECURITY.md | 23 + vendor/github.com/jaypipes/ghw/alias.go | 51 +- vendor/github.com/jaypipes/ghw/doc.go | 308 +- .../jaypipes/ghw/pkg/block/block.go | 228 +- .../jaypipes/ghw/pkg/block/block_darwin.go | 15 +- .../jaypipes/ghw/pkg/block/block_linux.go | 92 +- .../jaypipes/ghw/pkg/block/block_windows.go | 48 +- .../jaypipes/ghw/pkg/cpu/cpu_darwin.go | 135 + .../jaypipes/ghw/pkg/cpu/cpu_linux.go | 18 +- .../jaypipes/ghw/pkg/cpu/cpu_stub.go | 4 +- .../jaypipes/ghw/pkg/gpu/gpu_linux.go | 3 +- .../jaypipes/ghw/pkg/linuxdmi/dmi_linux.go | 4 +- .../jaypipes/ghw/pkg/marshal/marshal.go | 20 +- .../jaypipes/ghw/pkg/memory/memory.go | 13 + .../jaypipes/ghw/pkg/memory/memory_cache.go | 60 +- .../ghw/pkg/memory/memory_cache_linux.go | 21 +- .../jaypipes/ghw/pkg/memory/memory_linux.go | 19 +- vendor/github.com/jaypipes/ghw/pkg/net/net.go | 73 +- .../jaypipes/ghw/pkg/net/net_linux.go | 14 +- .../jaypipes/ghw/pkg/net/net_windows.go | 2 +- .../jaypipes/ghw/pkg/option/option.go | 5 +- .../jaypipes/ghw/pkg/pci/address/address.go | 11 +- vendor/github.com/jaypipes/ghw/pkg/pci/pci.go | 20 +- .../jaypipes/ghw/pkg/pci/pci_linux.go | 36 +- .../jaypipes/ghw/pkg/snapshot/clonetree.go | 3 +- .../ghw/pkg/snapshot/clonetree_block_linux.go | 13 +- .../ghw/pkg/snapshot/clonetree_linux.go | 3 +- .../ghw/pkg/snapshot/clonetree_pci_linux.go | 5 +- .../jaypipes/ghw/pkg/snapshot/unpack.go | 5 +- .../ghw/pkg/topology/topology_linux.go | 6 +- .../github.com/jaypipes/ghw/pkg/util/util.go | 3 +- vendor/github.com/jaypipes/pcidb/README.md | 2 +- .../klauspost/compress/.goreleaser.yml | 28 +- .../github.com/klauspost/compress/README.md | 89 +- .../klauspost/compress/flate/deflate.go | 988 - .../klauspost/compress/flate/dict_decoder.go | 184 - .../klauspost/compress/flate/fast_encoder.go | 216 - .../compress/flate/huffman_bit_writer.go | 1182 - .../klauspost/compress/flate/huffman_code.go | 417 - .../compress/flate/huffman_sortByFreq.go | 159 - .../compress/flate/huffman_sortByLiteral.go | 201 - .../klauspost/compress/flate/inflate.go | 793 - .../klauspost/compress/flate/inflate_gen.go | 1283 - .../klauspost/compress/flate/level1.go | 241 - .../klauspost/compress/flate/level2.go | 214 - .../klauspost/compress/flate/level3.go | 241 - .../klauspost/compress/flate/level4.go | 221 - .../klauspost/compress/flate/level5.go | 310 - .../klauspost/compress/flate/level6.go | 325 - .../klauspost/compress/flate/regmask_amd64.go | 37 - .../klauspost/compress/flate/regmask_other.go | 40 - .../klauspost/compress/flate/stateless.go | 318 - .../klauspost/compress/flate/token.go | 379 - .../klauspost/compress/fse/bitwriter.go | 3 +- .../klauspost/compress/fse/compress.go | 5 +- .../klauspost/compress/fse/decompress.go | 2 +- .../klauspost/compress/huff0/bitwriter.go | 3 +- .../klauspost/compress/huff0/bytereader.go | 44 - .../klauspost/compress/huff0/compress.go | 25 +- .../klauspost/compress/huff0/decompress.go | 4 +- .../klauspost/compress/huff0/huff0.go | 4 +- .../compress/internal/snapref/encode_other.go | 2 +- vendor/github.com/klauspost/compress/s2sx.mod | 2 +- .../klauspost/compress/zstd/README.md | 2 +- .../klauspost/compress/zstd/bitreader.go | 34 +- .../klauspost/compress/zstd/bitwriter.go | 3 +- .../klauspost/compress/zstd/blockdec.go | 7 +- .../klauspost/compress/zstd/blockenc.go | 49 +- .../klauspost/compress/zstd/decodeheader.go | 56 +- .../klauspost/compress/zstd/decoder.go | 2 +- .../klauspost/compress/zstd/dict.go | 410 +- .../klauspost/compress/zstd/enc_best.go | 94 +- .../klauspost/compress/zstd/enc_better.go | 62 +- .../klauspost/compress/zstd/enc_dfast.go | 16 +- .../klauspost/compress/zstd/encoder.go | 58 +- .../compress/zstd/encoder_options.go | 6 +- .../klauspost/compress/zstd/framedec.go | 4 +- .../klauspost/compress/zstd/frameenc.go | 6 +- .../compress/zstd/fse_decoder_generic.go | 11 +- .../zstd/internal/xxhash/xxhash_arm64.s | 4 +- .../klauspost/compress/zstd/matchlen_amd64.s | 10 +- .../klauspost/compress/zstd/seqdec.go | 17 +- .../klauspost/compress/zstd/seqdec_amd64.go | 4 +- .../klauspost/compress/zstd/seqdec_amd64.s | 272 +- .../klauspost/compress/zstd/seqdec_generic.go | 2 +- .../klauspost/compress/zstd/snappy.go | 5 +- .../klauspost/compress/zstd/zstd.go | 4 + .../github.com/klauspost/cpuid/v2/README.md | 15 +- vendor/github.com/klauspost/cpuid/v2/cpuid.go | 459 +- .../klauspost/cpuid/v2/detect_x86.go | 2 + .../klauspost/cpuid/v2/featureid_string.go | 413 +- .../klauspost/reedsolomon/README.md | 22 +- .../klauspost/reedsolomon/galois.go | 42 +- .../klauspost/reedsolomon/galois_amd64.go | 135 +- .../klauspost/reedsolomon/galois_amd64.s | 86 +- .../klauspost/reedsolomon/galois_arm64.go | 51 +- .../klauspost/reedsolomon/galois_arm64.s | 38 +- .../klauspost/reedsolomon/galois_gen_amd64.go | 1243 +- .../klauspost/reedsolomon/galois_gen_amd64.s | 42334 ++++++++-- .../klauspost/reedsolomon/galois_gen_arm64.go | 125 + .../klauspost/reedsolomon/galois_gen_arm64.s | 27052 ++++++ .../klauspost/reedsolomon/galois_gen_none.go | 31 +- .../reedsolomon/galois_gen_nopshufb_amd64.go | 2264 + .../reedsolomon/galois_gen_nopshufb_amd64.s | 67987 ++++++++++++++++ .../reedsolomon/galois_gen_switch_amd64.go | 1160 +- .../reedsolomon/galois_gen_switch_arm64.go | 219 + .../galois_gen_switch_nopshufb_amd64.go | 1415 + .../galois_gen_switch_nopshufb_arm64.go | 22 + .../klauspost/reedsolomon/galois_noasm.go | 12 +- .../reedsolomon/galois_nopshufb_amd64.go | 146 + .../klauspost/reedsolomon/galois_notamd64.go | 14 - .../klauspost/reedsolomon/galois_ppc64le.go | 10 +- .../klauspost/reedsolomon/galois_ppc64le.s | 1 + .../klauspost/reedsolomon/leopard.go | 25 +- .../klauspost/reedsolomon/leopard8.go | 25 +- .../klauspost/reedsolomon/matrix.go | 6 +- .../klauspost/reedsolomon/options.go | 91 +- .../github.com/klauspost/reedsolomon/race.go | 61 + .../klauspost/reedsolomon/race_none.go | 17 + .../klauspost/reedsolomon/reedsolomon.go | 203 +- .../klauspost/reedsolomon/xor_arm64.go | 23 + .../klauspost/reedsolomon/xor_arm64.s | 29 + .../klauspost/reedsolomon/xor_noasm.go | 7 + vendor/github.com/leodido/go-urn/.gitignore | 3 +- vendor/github.com/leodido/go-urn/README.md | 126 +- vendor/github.com/leodido/go-urn/kind.go | 10 + vendor/github.com/leodido/go-urn/machine.go | 6178 +- .../github.com/leodido/go-urn/machine.go.rl | 283 +- vendor/github.com/leodido/go-urn/makefile | 20 +- vendor/github.com/leodido/go-urn/options.go | 9 + .../github.com/leodido/go-urn/parsing_mode.go | 12 + vendor/github.com/leodido/go-urn/scim.go | 48 + .../leodido/go-urn/scim/schema/type.go | 36 + vendor/github.com/leodido/go-urn/urn.go | 75 +- vendor/github.com/leodido/go-urn/urn8141.go | 30 + .../github.com/mattn/go-isatty/isatty_bsd.go | 3 +- .../mattn/go-isatty/isatty_others.go | 5 +- .../mattn/go-isatty/isatty_tcgets.go | 3 +- .../mazznoer/csscolorparser/.gitignore | 16 + .../mazznoer/csscolorparser/CHANGELOG.md | 19 + .../mazznoer/csscolorparser/LICENSE | 21 + .../mazznoer/csscolorparser/Makefile | 11 + .../mazznoer/csscolorparser/README.md | 78 + .../mazznoer/csscolorparser/colorparser.go | 480 + .../mazznoer/csscolorparser/named_colors.go | 152 + .../ginkgo/v2/ginkgo/build/build_command.go | 15 +- .../v2/ginkgo/generators/bootstrap_command.go | 2 +- .../v2/ginkgo/generators/generate_command.go | 3 +- .../onsi/ginkgo/v2/ginkgo/internal/compile.go | 14 +- .../ginkgo/v2/ginkgo/internal/gocovmerge.go | 129 + .../ginkgo/internal/profiles_and_reports.go | 46 +- .../ginkgo/v2/ginkgo/internal/test_suite.go | 9 +- .../onsi/ginkgo/v2/ginkgo/outline/ginkgo.go | 3 +- .../onsi/ginkgo/v2/ginkgo/outline/import.go | 9 +- .../ginkgo/v2/ginkgo/watch/dependencies.go | 2 +- .../ginkgo/v2/ginkgo/watch/package_hash.go | 13 +- .../ginkgo/v2/reporters/default_reporter.go | 61 +- .../onsi/ginkgo/v2/reporters/json_report.go | 6 +- .../onsi/ginkgo/v2/reporters/junit_report.go | 14 + .../onsi/ginkgo/v2/types/code_location.go | 2 +- .../github.com/onsi/ginkgo/v2/types/config.go | 26 +- .../github.com/onsi/ginkgo/v2/types/errors.go | 9 + .../github.com/onsi/ginkgo/v2/types/flags.go | 15 +- .../onsi/ginkgo/v2/types/label_filter.go | 229 +- .../onsi/ginkgo/v2/types/version.go | 2 +- .../pelletier/go-toml/v2/.gitignore | 3 +- .../pelletier/go-toml/v2/.goreleaser.yaml | 4 + .../pelletier/go-toml/v2/CONTRIBUTING.md | 31 +- .../github.com/pelletier/go-toml/v2/LICENSE | 3 +- .../github.com/pelletier/go-toml/v2/README.md | 161 +- .../pelletier/go-toml/v2/SECURITY.md | 3 - vendor/github.com/pelletier/go-toml/v2/ci.sh | 22 +- .../github.com/pelletier/go-toml/v2/decode.go | 2 +- .../go-toml/v2/internal/tracker/seen.go | 74 +- .../pelletier/go-toml/v2/marshaler.go | 97 +- .../pelletier/go-toml/v2/unmarshaler.go | 114 +- .../pelletier/go-toml/v2/unstable/parser.go | 6 + .../go-toml/v2/unstable/unmarshaler.go | 7 + .../github.com/pires/go-proxyproto/header.go | 6 +- .../github.com/pires/go-proxyproto/policy.go | 57 + .../pires/go-proxyproto/protocol.go | 150 +- vendor/github.com/pires/go-proxyproto/v1.go | 24 +- vendor/github.com/quic-go/quic-go/.gitignore | 1 + .../github.com/quic-go/quic-go/.golangci.yml | 21 +- vendor/github.com/quic-go/quic-go/README.md | 205 +- vendor/github.com/quic-go/quic-go/client.go | 4 +- vendor/github.com/quic-go/quic-go/config.go | 11 + .../github.com/quic-go/quic-go/connection.go | 358 +- .../quic-go/quic-go/connection_logging.go | 173 + .../quic-go/quic-go/crypto_stream.go | 49 +- .../quic-go/quic-go/crypto_stream_manager.go | 51 +- vendor/github.com/quic-go/quic-go/errors.go | 2 +- vendor/github.com/quic-go/quic-go/framer.go | 138 +- .../github.com/quic-go/quic-go/interface.go | 55 +- .../ackhandler/received_packet_history.go | 104 +- .../ackhandler/sent_packet_handler.go | 10 +- .../ackhandler/sent_packet_history.go | 11 +- .../quic-go/internal/congestion/cubic.go | 8 +- .../internal/congestion/cubic_sender.go | 2 +- .../flowcontrol/connection_flow_controller.go | 10 +- .../quic-go/internal/flowcontrol/interface.go | 6 +- .../flowcontrol/stream_flow_controller.go | 22 +- .../internal/handshake/crypto_setup.go | 69 +- .../quic-go/internal/handshake/interface.go | 26 +- .../internal/handshake/session_ticket.go | 12 +- .../internal/handshake/token_generator.go | 2 +- .../internal/handshake/token_protector.go | 20 +- .../quic-go/internal/logutils/frame.go | 50 - .../internal/protocol/packet_number.go | 68 +- .../quic-go/internal/protocol/params.go | 11 +- .../internal/{handshake => qtls}/conn.go | 2 +- .../quic-go/quic-go/internal/qtls/qtls.go | 34 +- .../quic-go/internal/utils/byteorder.go | 21 - .../internal/utils/byteorder_big_endian.go | 103 - .../quic-go/quic-go/internal/utils/ip.go | 10 - .../quic-go/quic-go/internal/utils/minmax.go | 36 - .../quic-go/internal/utils/rtt_stats.go | 23 +- .../quic-go/internal/wire/ack_frame.go | 67 +- .../internal/wire/connection_close_frame.go | 37 +- .../quic-go/internal/wire/crypto_frame.go | 29 +- .../internal/wire/data_blocked_frame.go | 12 +- .../quic-go/internal/wire/datagram_frame.go | 25 +- .../quic-go/internal/wire/extended_header.go | 70 +- .../quic-go/internal/wire/frame_parser.go | 85 +- .../quic-go/quic-go/internal/wire/header.go | 168 +- .../quic-go/internal/wire/max_data_frame.go | 12 +- .../internal/wire/max_stream_data_frame.go | 23 +- .../internal/wire/max_streams_frame.go | 13 +- .../internal/wire/new_connection_id_frame.go | 49 +- .../quic-go/internal/wire/new_token_frame.go | 24 +- .../internal/wire/path_challenge_frame.go | 15 +- .../internal/wire/path_response_frame.go | 15 +- .../internal/wire/reset_stream_frame.go | 25 +- .../wire/retire_connection_id_frame.go | 12 +- .../quic-go/internal/wire/short_header.go | 17 +- .../internal/wire/stop_sending_frame.go | 19 +- .../wire/stream_data_blocked_frame.go | 20 +- .../quic-go/internal/wire/stream_frame.go | 50 +- .../internal/wire/streams_blocked_frame.go | 13 +- .../internal/wire/transport_parameters.go | 138 +- .../quic-go/logging/connection_tracer.go | 16 +- .../quic-go/quic-go/logging/interface.go | 20 +- .../quic-go/quic-go/logging/tracer.go | 4 +- vendor/github.com/quic-go/quic-go/mockgen.go | 11 +- .../quic-go/quic-go/mtu_discoverer.go | 170 +- vendor/github.com/quic-go/quic-go/oss-fuzz.sh | 6 +- .../quic-go/quic-go/packet_packer.go | 11 +- .../quic-go/quic-go/packet_unpacker.go | 23 +- .../quic-go/quic-go/quicvarint/varint.go | 43 +- .../quic-go/quic-go/receive_stream.go | 187 +- .../github.com/quic-go/quic-go/send_stream.go | 213 +- vendor/github.com/quic-go/quic-go/server.go | 42 +- vendor/github.com/quic-go/quic-go/stream.go | 48 +- .../github.com/quic-go/quic-go/streams_map.go | 40 +- .../quic-go/quic-go/streams_map_outgoing.go | 2 +- .../quic-go/quic-go/sys_conn_helper_darwin.go | 4 +- .../quic-go/sys_conn_helper_freebsd.go | 4 +- .../quic-go/quic-go/sys_conn_helper_linux.go | 50 +- .../quic-go/quic-go/sys_conn_oob.go | 8 +- .../github.com/quic-go/quic-go/transport.go | 12 + .../quic-go/quic-go/window_update_queue.go | 71 - vendor/github.com/rs/cors/README.md | 31 +- vendor/github.com/rs/cors/cors.go | 244 +- .../github.com/rs/cors/internal/sortedset.go | 201 + vendor/github.com/rs/cors/utils.go | 66 +- vendor/github.com/spf13/cobra/.golangci.yml | 29 +- vendor/github.com/spf13/cobra/README.md | 8 +- vendor/github.com/spf13/cobra/active_help.go | 13 +- vendor/github.com/spf13/cobra/active_help.md | 157 - vendor/github.com/spf13/cobra/args.go | 4 +- .../spf13/cobra/bash_completions.go | 27 +- .../spf13/cobra/bash_completions.md | 93 - .../spf13/cobra/bash_completionsV2.go | 2 +- vendor/github.com/spf13/cobra/cobra.go | 15 +- vendor/github.com/spf13/cobra/command.go | 118 +- vendor/github.com/spf13/cobra/completions.go | 91 +- .../spf13/cobra/fish_completions.go | 2 +- .../spf13/cobra/fish_completions.md | 4 - vendor/github.com/spf13/cobra/flag_groups.go | 86 +- .../spf13/cobra/powershell_completions.go | 10 +- .../spf13/cobra/powershell_completions.md | 3 - .../spf13/cobra/projects_using_cobra.md | 64 - .../spf13/cobra/shell_completions.md | 576 - vendor/github.com/spf13/cobra/user_guide.md | 726 - .../github.com/spf13/cobra/zsh_completions.md | 48 - .../tkrajina/gpxgo/gpx/converters.go | 22 +- .../tkrajina/gpxgo/gpx/fixedpoint_float64.go | 21 + vendor/github.com/tkrajina/gpxgo/gpx/geo.go | 3 +- vendor/github.com/tkrajina/gpxgo/gpx/gpx.go | 139 +- vendor/github.com/tkrajina/gpxgo/gpx/gpx10.go | 4 +- vendor/github.com/tkrajina/gpxgo/gpx/gpx11.go | 33 +- .../tkrajina/gpxgo/gpx/nullable_float64.go | 27 +- vendor/github.com/tkrajina/gpxgo/gpx/xml.go | 81 +- vendor/github.com/ugorji/go/codec/cbor.go | 23 +- vendor/github.com/ugorji/go/codec/decode.go | 13 +- vendor/github.com/ugorji/go/codec/encode.go | 11 +- vendor/github.com/ugorji/go/codec/gen.go | 16 +- vendor/github.com/ugorji/go/codec/helper.go | 31 +- vendor/github.com/ugorji/go/codec/json.go | 3 + vendor/github.com/ugorji/go/codec/msgpack.go | 2 +- vendor/github.com/ugorji/go/codec/reader.go | 4 +- vendor/github.com/zcalusic/sysinfo/README.md | 5 +- vendor/github.com/zcalusic/sysinfo/cpu.go | 6 - vendor/github.com/zcalusic/sysinfo/memory.go | 4 +- vendor/github.com/zcalusic/sysinfo/network.go | 3 +- vendor/github.com/zcalusic/sysinfo/node.go | 16 - vendor/github.com/zcalusic/sysinfo/os.go | 19 + vendor/github.com/zcalusic/sysinfo/product.go | 18 +- vendor/github.com/zcalusic/sysinfo/storage.go | 3 +- vendor/github.com/zcalusic/sysinfo/util.go | 5 +- vendor/github.com/zcalusic/sysinfo/version.go | 2 +- vendor/go.etcd.io/bbolt/.go-version | 1 + vendor/go.etcd.io/bbolt/Makefile | 13 + vendor/go.etcd.io/bbolt/README.md | 23 +- vendor/go.etcd.io/bbolt/bolt_openbsd.go | 15 +- vendor/go.etcd.io/bbolt/bucket.go | 30 +- vendor/go.etcd.io/bbolt/cursor.go | 11 +- vendor/go.etcd.io/bbolt/db.go | 54 +- vendor/go.etcd.io/bbolt/freelist.go | 19 +- vendor/go.etcd.io/bbolt/page.go | 6 +- vendor/go.etcd.io/bbolt/tx.go | 7 + vendor/go.etcd.io/bbolt/unsafe.go | 12 - vendor/go.opencensus.io/Makefile | 8 +- vendor/go.opencensus.io/opencensus.go | 2 +- vendor/go.opencensus.io/trace/doc.go | 13 +- vendor/go.opencensus.io/trace/lrumap.go | 2 +- vendor/go.opencensus.io/trace/trace_go11.go | 1 + .../go.opencensus.io/trace/trace_nongo11.go | 1 + vendor/go.uber.org/mock/mockgen/deprecated.go | 41 + .../mockgen/{generic_go118.go => generic.go} | 27 - .../mock/mockgen/generic_notgo118.go | 41 - vendor/go.uber.org/mock/mockgen/gob.go | 21 + vendor/go.uber.org/mock/mockgen/mockgen.go | 77 +- .../go.uber.org/mock/mockgen/package_mode.go | 358 + vendor/go.uber.org/mock/mockgen/reflect.go | 256 - vendor/golang.org/x/arch/LICENSE | 4 +- vendor/golang.org/x/arch/x86/x86asm/gnu.go | 2 +- vendor/golang.org/x/arch/x86/x86asm/inst.go | 2 +- vendor/golang.org/x/crypto/LICENSE | 4 +- .../x/crypto/blake2b/blake2bAVX2_amd64.s | 5167 +- .../x/crypto/blake2b/blake2b_amd64.s | 1681 +- vendor/golang.org/x/crypto/blake2s/blake2s.go | 10 +- .../x/crypto/blake2s/blake2s_amd64.s | 2571 +- .../golang.org/x/crypto/blake2s/register.go | 21 - vendor/golang.org/x/crypto/blowfish/cipher.go | 2 +- vendor/golang.org/x/crypto/cast5/cast5.go | 2 +- .../x/crypto/chacha20/chacha_ppc64le.s | 110 +- .../chacha20poly1305/chacha20poly1305.go | 2 +- .../chacha20poly1305/chacha20poly1305_amd64.s | 11503 ++- .../x/crypto/curve25519/curve25519.go | 39 +- .../x/crypto/curve25519/curve25519_compat.go | 105 - .../x/crypto/curve25519/curve25519_go120.go | 46 - .../x/crypto/curve25519/internal/field/README | 7 - .../x/crypto/curve25519/internal/field/fe.go | 416 - .../curve25519/internal/field/fe_amd64.go | 15 - .../curve25519/internal/field/fe_amd64.s | 378 - .../internal/field/fe_amd64_noasm.go | 11 - .../curve25519/internal/field/fe_arm64.go | 15 - .../curve25519/internal/field/fe_arm64.s | 42 - .../internal/field/fe_arm64_noasm.go | 11 - .../curve25519/internal/field/fe_generic.go | 264 - .../curve25519/internal/field/sync.checkpoint | 1 - .../crypto/curve25519/internal/field/sync.sh | 19 - vendor/golang.org/x/crypto/hkdf/hkdf.go | 2 +- .../x/crypto/internal/poly1305/sum_amd64.s | 133 +- vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go | 2 +- .../x/crypto/salsa20/salsa/hsalsa20.go | 2 +- .../x/crypto/salsa20/salsa/salsa20_amd64.s | 1742 +- vendor/golang.org/x/crypto/salsa20/salsa20.go | 2 +- vendor/golang.org/x/crypto/sha3/doc.go | 2 +- vendor/golang.org/x/crypto/sha3/hashes.go | 42 +- .../x/crypto/sha3/hashes_generic.go | 27 - .../golang.org/x/crypto/sha3/hashes_noasm.go | 23 + .../golang.org/x/crypto/sha3/keccakf_amd64.s | 5787 +- vendor/golang.org/x/crypto/sha3/register.go | 18 - vendor/golang.org/x/crypto/sha3/sha3.go | 62 +- vendor/golang.org/x/crypto/sha3/sha3_s390x.go | 67 +- vendor/golang.org/x/crypto/sha3/shake.go | 20 +- .../golang.org/x/crypto/sha3/shake_generic.go | 19 - .../golang.org/x/crypto/sha3/shake_noasm.go | 15 + vendor/golang.org/x/crypto/sha3/xor.go | 45 +- .../golang.org/x/crypto/sha3/xor_generic.go | 28 - .../golang.org/x/crypto/sha3/xor_unaligned.go | 66 - vendor/golang.org/x/crypto/twofish/twofish.go | 2 +- vendor/golang.org/x/crypto/xtea/cipher.go | 2 +- vendor/golang.org/x/exp/LICENSE | 4 +- vendor/golang.org/x/image/AUTHORS | 3 - vendor/golang.org/x/image/CONTRIBUTORS | 3 - vendor/golang.org/x/image/LICENSE | 4 +- vendor/golang.org/x/image/draw/draw.go | 6 + vendor/golang.org/x/image/draw/draw_go117.go | 27 - vendor/golang.org/x/image/draw/impl.go | 2820 +- vendor/golang.org/x/image/draw/scale.go | 56 +- .../x/image/font/basicfont/basicfont.go | 65 +- vendor/golang.org/x/image/font/font.go | 79 +- vendor/golang.org/x/mod/LICENSE | 4 +- vendor/golang.org/x/mod/modfile/read.go | 9 +- vendor/golang.org/x/mod/modfile/rule.go | 205 +- vendor/golang.org/x/mod/modfile/work.go | 54 +- vendor/golang.org/x/net/LICENSE | 4 +- vendor/golang.org/x/net/html/doc.go | 2 +- .../golang.org/x/net/http/httpguts/httplex.go | 13 +- vendor/golang.org/x/net/http2/config.go | 122 + vendor/golang.org/x/net/http2/config_go124.go | 61 + .../x/net/http2/config_pre_go124.go | 16 + vendor/golang.org/x/net/http2/frame.go | 40 +- vendor/golang.org/x/net/http2/http2.go | 72 +- vendor/golang.org/x/net/http2/pipe.go | 11 +- vendor/golang.org/x/net/http2/server.go | 271 +- vendor/golang.org/x/net/http2/timer.go | 20 + vendor/golang.org/x/net/http2/transport.go | 373 +- vendor/golang.org/x/net/http2/write.go | 10 + .../x/net/http2/writesched_priority.go | 4 +- vendor/golang.org/x/net/proxy/per_host.go | 8 +- .../ghodss/yaml => golang.org/x/sync}/LICENSE | 27 +- vendor/golang.org/x/sync/PATENTS | 22 + vendor/golang.org/x/sync/errgroup/errgroup.go | 135 + vendor/golang.org/x/sync/errgroup/go120.go | 13 + .../golang.org/x/sync/errgroup/pre_go120.go | 14 + .../golang.org/x/sync/semaphore/semaphore.go | 160 + vendor/golang.org/x/sys/LICENSE | 4 +- vendor/golang.org/x/sys/cpu/cpu.go | 21 + vendor/golang.org/x/sys/cpu/cpu_arm64.go | 12 + .../golang.org/x/sys/cpu/cpu_linux_arm64.go | 5 + .../golang.org/x/sys/cpu/cpu_linux_noinit.go | 2 +- .../golang.org/x/sys/cpu/cpu_linux_riscv64.go | 137 + vendor/golang.org/x/sys/cpu/cpu_riscv64.go | 11 +- vendor/golang.org/x/sys/execabs/execabs.go | 102 - .../golang.org/x/sys/execabs/execabs_go118.go | 17 - .../golang.org/x/sys/execabs/execabs_go119.go | 20 - vendor/golang.org/x/sys/unix/README.md | 2 +- vendor/golang.org/x/sys/unix/mkerrors.sh | 8 +- vendor/golang.org/x/sys/unix/mremap.go | 5 + vendor/golang.org/x/sys/unix/syscall_aix.go | 2 +- .../golang.org/x/sys/unix/syscall_darwin.go | 61 + vendor/golang.org/x/sys/unix/syscall_hurd.go | 1 + vendor/golang.org/x/sys/unix/syscall_linux.go | 64 +- .../x/sys/unix/syscall_linux_arm64.go | 2 + .../x/sys/unix/syscall_linux_loong64.go | 2 + .../x/sys/unix/syscall_linux_riscv64.go | 2 + .../golang.org/x/sys/unix/syscall_openbsd.go | 1 + vendor/golang.org/x/sys/unix/syscall_unix.go | 9 + .../golang.org/x/sys/unix/vgetrandom_linux.go | 13 + .../x/sys/unix/vgetrandom_unsupported.go | 11 + .../x/sys/unix/zerrors_darwin_amd64.go | 12 + .../x/sys/unix/zerrors_darwin_arm64.go | 12 + vendor/golang.org/x/sys/unix/zerrors_linux.go | 69 +- .../x/sys/unix/zerrors_linux_386.go | 8 + .../x/sys/unix/zerrors_linux_amd64.go | 8 + .../x/sys/unix/zerrors_linux_arm.go | 7 + .../x/sys/unix/zerrors_linux_arm64.go | 8 + .../x/sys/unix/zerrors_linux_loong64.go | 7 + .../x/sys/unix/zerrors_linux_mips.go | 7 + .../x/sys/unix/zerrors_linux_mips64.go | 7 + .../x/sys/unix/zerrors_linux_mips64le.go | 7 + .../x/sys/unix/zerrors_linux_mipsle.go | 7 + .../x/sys/unix/zerrors_linux_ppc.go | 7 + .../x/sys/unix/zerrors_linux_ppc64.go | 7 + .../x/sys/unix/zerrors_linux_ppc64le.go | 7 + .../x/sys/unix/zerrors_linux_riscv64.go | 7 + .../x/sys/unix/zerrors_linux_s390x.go | 7 + .../x/sys/unix/zerrors_linux_sparc64.go | 7 + .../x/sys/unix/zerrors_zos_s390x.go | 2 + .../x/sys/unix/zsyscall_darwin_amd64.go | 101 + .../x/sys/unix/zsyscall_darwin_amd64.s | 25 + .../x/sys/unix/zsyscall_darwin_arm64.go | 101 + .../x/sys/unix/zsyscall_darwin_arm64.s | 25 + .../golang.org/x/sys/unix/zsyscall_linux.go | 33 +- .../x/sys/unix/zsyscall_openbsd_386.go | 24 + .../x/sys/unix/zsyscall_openbsd_386.s | 5 + .../x/sys/unix/zsyscall_openbsd_amd64.go | 24 + .../x/sys/unix/zsyscall_openbsd_amd64.s | 5 + .../x/sys/unix/zsyscall_openbsd_arm.go | 24 + .../x/sys/unix/zsyscall_openbsd_arm.s | 5 + .../x/sys/unix/zsyscall_openbsd_arm64.go | 24 + .../x/sys/unix/zsyscall_openbsd_arm64.s | 5 + .../x/sys/unix/zsyscall_openbsd_mips64.go | 24 + .../x/sys/unix/zsyscall_openbsd_mips64.s | 5 + .../x/sys/unix/zsyscall_openbsd_ppc64.go | 24 + .../x/sys/unix/zsyscall_openbsd_ppc64.s | 6 + .../x/sys/unix/zsyscall_openbsd_riscv64.go | 24 + .../x/sys/unix/zsyscall_openbsd_riscv64.s | 5 + .../x/sys/unix/zsysnum_linux_386.go | 1 + .../x/sys/unix/zsysnum_linux_amd64.go | 2 + .../x/sys/unix/zsysnum_linux_arm.go | 1 + .../x/sys/unix/zsysnum_linux_arm64.go | 3 +- .../x/sys/unix/zsysnum_linux_loong64.go | 3 + .../x/sys/unix/zsysnum_linux_mips.go | 1 + .../x/sys/unix/zsysnum_linux_mips64.go | 1 + .../x/sys/unix/zsysnum_linux_mips64le.go | 1 + .../x/sys/unix/zsysnum_linux_mipsle.go | 1 + .../x/sys/unix/zsysnum_linux_ppc.go | 1 + .../x/sys/unix/zsysnum_linux_ppc64.go | 1 + .../x/sys/unix/zsysnum_linux_ppc64le.go | 1 + .../x/sys/unix/zsysnum_linux_riscv64.go | 3 +- .../x/sys/unix/zsysnum_linux_s390x.go | 1 + .../x/sys/unix/zsysnum_linux_sparc64.go | 1 + .../x/sys/unix/ztypes_darwin_amd64.go | 13 + .../x/sys/unix/ztypes_darwin_arm64.go | 13 + .../x/sys/unix/ztypes_freebsd_386.go | 1 + .../x/sys/unix/ztypes_freebsd_amd64.go | 1 + .../x/sys/unix/ztypes_freebsd_arm.go | 1 + .../x/sys/unix/ztypes_freebsd_arm64.go | 1 + .../x/sys/unix/ztypes_freebsd_riscv64.go | 1 + vendor/golang.org/x/sys/unix/ztypes_linux.go | 129 +- .../x/sys/unix/ztypes_linux_riscv64.go | 33 + .../golang.org/x/sys/windows/dll_windows.go | 2 +- .../x/sys/windows/security_windows.go | 25 +- .../x/sys/windows/syscall_windows.go | 16 +- .../golang.org/x/sys/windows/types_windows.go | 72 +- .../x/sys/windows/zsyscall_windows.go | 89 + vendor/golang.org/x/term/LICENSE | 4 +- vendor/golang.org/x/term/term_windows.go | 1 + vendor/golang.org/x/text/LICENSE | 4 +- vendor/golang.org/x/tools/LICENSE | 4 +- .../x/tools/cmd/stringer/stringer.go | 657 - vendor/golang.org/x/tools/cover/profile.go | 266 + .../x/tools/go/ast/astutil/enclosing.go | 32 +- .../x/tools/go/ast/astutil/rewrite.go | 8 +- .../golang.org/x/tools/go/ast/astutil/util.go | 11 +- .../x/tools/go/ast/inspector/inspector.go | 9 + .../x/tools/go/ast/inspector/iter.go | 85 + .../x/tools/go/ast/inspector/typeof.go | 4 +- .../x/tools/go/gcexportdata/gcexportdata.go | 2 +- .../tools/go/internal/packagesdriver/sizes.go | 48 - vendor/golang.org/x/tools/go/packages/doc.go | 65 +- .../x/tools/go/packages/external.go | 103 +- .../golang.org/x/tools/go/packages/golist.go | 238 +- .../x/tools/go/packages/golist_overlay.go | 492 - .../x/tools/go/packages/loadmode_string.go | 69 +- .../x/tools/go/packages/packages.go | 497 +- .../golang.org/x/tools/go/packages/visit.go | 9 + .../x/tools/go/types/objectpath/objectpath.go | 237 +- .../x/tools/go/types/typeutil/callee.go | 68 + .../x/tools/go/types/typeutil/imports.go | 30 + .../x/tools/go/types/typeutil/map.go | 517 + .../tools/go/types/typeutil/methodsetcache.go | 71 + .../x/tools/go/types/typeutil/ui.go | 53 + vendor/golang.org/x/tools/imports/forward.go | 4 +- .../x/tools/internal/aliases/aliases.go | 38 + .../x/tools/internal/aliases/aliases_go122.go | 80 + .../x/tools/internal/event/keys/util.go | 21 + .../x/tools/internal/event/tag/tag.go | 59 - .../x/tools/internal/fastwalk/fastwalk.go | 196 - .../internal/fastwalk/fastwalk_darwin.go | 119 - .../fastwalk/fastwalk_dirent_fileno.go | 14 - .../internal/fastwalk/fastwalk_dirent_ino.go | 15 - .../fastwalk/fastwalk_dirent_namlen_bsd.go | 14 - .../fastwalk/fastwalk_dirent_namlen_linux.go | 29 - .../internal/fastwalk/fastwalk_portable.go | 38 - .../tools/internal/fastwalk/fastwalk_unix.go | 153 - .../x/tools/internal/gcimporter/bimport.go | 61 - .../x/tools/internal/gcimporter/gcimporter.go | 21 +- .../x/tools/internal/gcimporter/iexport.go | 310 +- .../x/tools/internal/gcimporter/iimport.go | 118 +- .../internal/gcimporter/newInterface10.go | 22 - .../internal/gcimporter/newInterface11.go | 14 - .../tools/internal/gcimporter/predeclared.go | 91 + .../internal/gcimporter/support_go117.go | 16 - .../internal/gcimporter/support_go118.go | 37 - .../x/tools/internal/gcimporter/unified_no.go | 10 - .../tools/internal/gcimporter/unified_yes.go | 10 - .../x/tools/internal/gcimporter/ureader_no.go | 19 - .../tools/internal/gcimporter/ureader_yes.go | 50 +- .../x/tools/internal/gocommand/invoke.go | 173 +- .../x/tools/internal/gocommand/vendor.go | 54 + .../x/tools/internal/gopathwalk/walk.go | 289 +- .../x/tools/internal/imports/fix.go | 674 +- .../x/tools/internal/imports/imports.go | 10 +- .../x/tools/internal/imports/mod.go | 366 +- .../x/tools/internal/imports/mod_cache.go | 119 +- .../x/tools/internal/imports/sortimports.go | 2 +- .../x/tools/internal/imports/zstdlib.go | 11345 --- .../internal/packagesinternal/packages.go | 8 - .../x/tools/internal/pkgbits/decoder.go | 34 +- .../x/tools/internal/pkgbits/encoder.go | 43 +- .../x/tools/internal/pkgbits/frames_go1.go | 21 - .../x/tools/internal/pkgbits/frames_go17.go | 28 - .../x/tools/internal/pkgbits/support.go | 2 +- .../x/tools/internal/pkgbits/sync.go | 23 + .../internal/pkgbits/syncmarker_string.go | 7 +- .../x/tools/internal/pkgbits/version.go | 85 + .../x/tools/internal/stdlib/manifest.go | 17431 ++++ .../x/tools/internal/stdlib/stdlib.go | 97 + .../internal/tokeninternal/tokeninternal.go | 151 - .../x/tools/internal/typeparams/common.go | 102 +- .../x/tools/internal/typeparams/coretype.go | 58 +- .../internal/typeparams/enabled_go117.go | 12 - .../internal/typeparams/enabled_go118.go | 15 - .../x/tools/internal/typeparams/free.go | 118 + .../x/tools/internal/typeparams/normalize.go | 20 +- .../x/tools/internal/typeparams/termlist.go | 2 +- .../internal/typeparams/typeparams_go117.go | 197 - .../internal/typeparams/typeparams_go118.go | 151 - .../x/tools/internal/typeparams/typeterm.go | 9 +- .../x/tools/internal/typesinternal/element.go | 133 + .../tools/internal/typesinternal/errorcode.go | 14 +- .../internal/typesinternal/objectpath.go | 24 - .../x/tools/internal/typesinternal/recv.go | 41 + .../x/tools/internal/typesinternal/toonew.go | 89 + .../x/tools/internal/typesinternal/types.go | 15 +- .../tools/internal/typesinternal/types_118.go | 19 - .../x/tools/internal/versions/constraint.go | 13 + .../internal/versions/constraint_go121.go | 14 + .../x/tools/internal/versions/features.go | 43 + .../x/tools/internal/versions/gover.go | 172 + .../x/tools/internal/versions/types.go | 38 + .../x/tools/internal/versions/versions.go | 57 + .../wireguard/conn/bind_std.go | 544 + .../wireguard/conn/bind_windows.go | 601 + .../wireguard/conn/boundif_android.go | 34 + .../golang.zx2c4.com/wireguard/conn/conn.go | 133 + .../wireguard/conn/controlfns.go | 43 + .../wireguard/conn/controlfns_linux.go | 69 + .../wireguard/conn/controlfns_unix.go | 35 + .../wireguard/conn/controlfns_windows.go | 23 + .../wireguard/conn/default.go | 10 + .../wireguard/conn/errors_default.go | 12 + .../wireguard/conn/errors_linux.go | 26 + .../wireguard/conn/features_default.go | 15 + .../wireguard/conn/features_linux.go | 29 + .../wireguard/conn/gso_default.go | 21 + .../wireguard/conn/gso_linux.go | 65 + .../wireguard/conn/mark_default.go | 12 + .../wireguard/conn/mark_unix.go | 65 + .../wireguard/conn/sticky_default.go | 42 + .../wireguard/conn/sticky_linux.go | 112 + .../wireguard/conn/winrio/rio_windows.go | 254 + .../wireguard/rwcancel/rwcancel.go | 2 +- .../wireguard/rwcancel/rwcancel_stub.go | 2 +- .../wireguard/tun/checksum.go | 118 + .../golang.zx2c4.com/wireguard/tun/errors.go | 12 + .../wireguard/tun/offload_linux.go | 993 + vendor/golang.zx2c4.com/wireguard/tun/tun.go | 40 +- .../wireguard/tun/tun_darwin.go | 67 +- .../wireguard/tun/tun_freebsd.go | 55 +- .../wireguard/tun/tun_linux.go | 285 +- .../wireguard/tun/tun_openbsd.go | 60 +- .../wireguard/tun/tun_windows.go | 58 +- .../protobuf/encoding/prototext/decode.go | 12 +- .../protobuf/encoding/prototext/encode.go | 24 +- .../protobuf/encoding/protowire/wire.go | 28 +- .../protobuf/internal/descfmt/stringer.go | 184 +- .../protobuf/internal/descopts/options.go | 20 +- .../internal/editiondefaults/defaults.go | 12 + .../editiondefaults/editions_defaults.binpb | Bin 0 -> 93 bytes .../internal/editionssupport/editions.go | 13 + .../protobuf/internal/encoding/tag/tag.go | 4 +- .../protobuf/internal/encoding/text/decode.go | 2 +- .../protobuf/internal/errors/errors.go | 21 +- .../protobuf/internal/filedesc/desc.go | 170 +- .../protobuf/internal/filedesc/desc_init.go | 91 +- .../protobuf/internal/filedesc/desc_lazy.go | 47 +- .../internal/filedesc/desc_list_gen.go | 11 + .../protobuf/internal/filedesc/editions.go | 156 + .../protobuf/internal/filedesc/placeholder.go | 1 + .../protobuf/internal/filetype/build.go | 4 +- .../protobuf/internal/genid/descriptor_gen.go | 401 +- .../protobuf/internal/genid/doc.go | 2 +- .../internal/genid/go_features_gen.go | 36 + .../protobuf/internal/genid/map_entry.go | 2 +- .../protobuf/internal/genid/struct_gen.go | 5 + .../protobuf/internal/genid/type_gen.go | 38 + .../protobuf/internal/genid/wrappers.go | 2 +- .../protobuf/internal/impl/api_export.go | 6 +- .../protobuf/internal/impl/checkinit.go | 2 +- .../protobuf/internal/impl/codec_extension.go | 55 +- .../protobuf/internal/impl/codec_field.go | 67 +- .../protobuf/internal/impl/codec_gen.go | 113 +- .../protobuf/internal/impl/codec_map.go | 15 +- .../protobuf/internal/impl/codec_message.go | 3 + .../internal/impl/codec_messageset.go | 22 + .../protobuf/internal/impl/codec_reflect.go | 210 - .../protobuf/internal/impl/codec_tables.go | 2 +- .../protobuf/internal/impl/codec_unsafe.go | 3 - .../protobuf/internal/impl/convert.go | 4 +- .../protobuf/internal/impl/convert_list.go | 2 +- .../protobuf/internal/impl/convert_map.go | 2 +- .../protobuf/internal/impl/encode.go | 50 +- .../protobuf/internal/impl/equal.go | 224 + .../protobuf/internal/impl/extension.go | 8 +- .../protobuf/internal/impl/legacy_enum.go | 3 +- .../internal/impl/legacy_extension.go | 3 +- .../protobuf/internal/impl/legacy_file.go | 4 +- .../protobuf/internal/impl/legacy_message.go | 31 +- .../protobuf/internal/impl/message.go | 27 +- .../protobuf/internal/impl/message_reflect.go | 45 +- .../internal/impl/message_reflect_field.go | 2 +- .../internal/impl/message_reflect_gen.go | 146 +- .../protobuf/internal/impl/pointer_reflect.go | 179 - .../protobuf/internal/impl/pointer_unsafe.go | 47 +- .../protobuf/internal/order/range.go | 4 +- .../protobuf/internal/strs/strings.go | 2 +- .../protobuf/internal/strs/strings_pure.go | 28 - ...ings_unsafe.go => strings_unsafe_go120.go} | 3 +- .../internal/strs/strings_unsafe_go121.go | 73 + .../protobuf/internal/version/version.go | 4 +- .../protobuf/proto/decode.go | 4 +- .../google.golang.org/protobuf/proto/doc.go | 58 +- .../protobuf/proto/encode.go | 46 +- .../google.golang.org/protobuf/proto/equal.go | 9 + .../protobuf/proto/extension.go | 90 +- .../google.golang.org/protobuf/proto/merge.go | 2 +- .../protobuf/proto/messageset.go | 7 +- .../google.golang.org/protobuf/proto/proto.go | 18 +- .../google.golang.org/protobuf/proto/size.go | 2 + .../protobuf/reflect/protodesc/desc.go | 34 +- .../protobuf/reflect/protodesc/desc_init.go | 47 +- .../reflect/protodesc/desc_resolve.go | 9 +- .../reflect/protodesc/desc_validate.go | 75 +- .../protobuf/reflect/protodesc/editions.go | 145 + .../protobuf/reflect/protodesc/proto.go | 40 +- .../protobuf/reflect/protoreflect/methods.go | 10 + .../protobuf/reflect/protoreflect/proto.go | 87 +- .../reflect/protoreflect/source_gen.go | 85 +- .../protobuf/reflect/protoreflect/type.go | 56 +- .../protobuf/reflect/protoreflect/value.go | 24 +- .../reflect/protoreflect/value_equal.go | 8 +- .../reflect/protoreflect/value_pure.go | 60 - .../reflect/protoreflect/value_union.go | 58 +- ...{value_unsafe.go => value_unsafe_go120.go} | 9 +- .../protoreflect/value_unsafe_go121.go | 86 + .../reflect/protoregistry/registry.go | 38 +- .../protobuf/runtime/protoiface/methods.go | 18 + .../types/descriptorpb/descriptor.pb.go | 3102 +- .../types/gofeaturespb/go_features.pb.go | 165 + vendor/gopkg.in/yaml.v2/.travis.yml | 17 - vendor/gopkg.in/yaml.v2/LICENSE.libyaml | 31 - vendor/gopkg.in/yaml.v2/NOTICE | 13 - vendor/gopkg.in/yaml.v2/README.md | 133 - vendor/gopkg.in/yaml.v2/apic.go | 744 - vendor/gopkg.in/yaml.v2/decode.go | 815 - vendor/gopkg.in/yaml.v2/emitterc.go | 1685 - vendor/gopkg.in/yaml.v2/encode.go | 390 - vendor/gopkg.in/yaml.v2/parserc.go | 1095 - vendor/gopkg.in/yaml.v2/readerc.go | 412 - vendor/gopkg.in/yaml.v2/resolve.go | 258 - vendor/gopkg.in/yaml.v2/scannerc.go | 2711 - vendor/gopkg.in/yaml.v2/sorter.go | 113 - vendor/gopkg.in/yaml.v2/writerc.go | 26 - vendor/gopkg.in/yaml.v2/yaml.go | 478 - vendor/gopkg.in/yaml.v2/yamlh.go | 739 - vendor/gopkg.in/yaml.v2/yamlprivateh.go | 173 - vendor/gorm.io/driver/postgres/.gitignore | 1 + .../driver/postgres/error_translator.go | 50 + vendor/gorm.io/driver/postgres/migrator.go | 435 +- vendor/gorm.io/driver/postgres/postgres.go | 94 +- vendor/gorm.io/gorm/.gitignore | 3 +- vendor/gorm.io/gorm/.golangci.yml | 9 + vendor/gorm.io/gorm/{License => LICENSE} | 0 vendor/gorm.io/gorm/README.md | 11 +- vendor/gorm.io/gorm/association.go | 89 +- vendor/gorm.io/gorm/callbacks.go | 37 +- vendor/gorm.io/gorm/callbacks/associations.go | 41 +- vendor/gorm.io/gorm/callbacks/callmethod.go | 13 +- vendor/gorm.io/gorm/callbacks/create.go | 101 +- vendor/gorm.io/gorm/callbacks/preload.go | 172 + vendor/gorm.io/gorm/callbacks/query.go | 238 +- vendor/gorm.io/gorm/callbacks/row.go | 2 +- vendor/gorm.io/gorm/callbacks/update.go | 51 +- vendor/gorm.io/gorm/chainable_api.go | 209 +- vendor/gorm.io/gorm/clause/clause.go | 1 + vendor/gorm.io/gorm/clause/expression.go | 18 +- vendor/gorm.io/gorm/clause/joins.go | 2 +- vendor/gorm.io/gorm/clause/limit.go | 14 +- vendor/gorm.io/gorm/clause/locking.go | 7 + vendor/gorm.io/gorm/clause/on_conflict.go | 34 +- vendor/gorm.io/gorm/clause/where.go | 79 +- vendor/gorm.io/gorm/errors.go | 10 + vendor/gorm.io/gorm/finisher_api.go | 207 +- vendor/gorm.io/gorm/gorm.go | 107 +- vendor/gorm.io/gorm/interfaces.go | 8 + vendor/gorm.io/gorm/logger/logger.go | 31 +- vendor/gorm.io/gorm/logger/sql.go | 54 +- vendor/gorm.io/gorm/migrator.go | 24 +- vendor/gorm.io/gorm/migrator/index.go | 6 +- vendor/gorm.io/gorm/migrator/migrator.go | 483 +- vendor/gorm.io/gorm/migrator/table_type.go | 33 + vendor/gorm.io/gorm/model.go | 7 +- vendor/gorm.io/gorm/prepare_stmt.go | 154 +- vendor/gorm.io/gorm/scan.go | 153 +- vendor/gorm.io/gorm/schema/check.go | 35 - vendor/gorm.io/gorm/schema/constraint.go | 66 + vendor/gorm.io/gorm/schema/field.go | 117 +- vendor/gorm.io/gorm/schema/index.go | 10 +- vendor/gorm.io/gorm/schema/interfaces.go | 6 + vendor/gorm.io/gorm/schema/naming.go | 33 +- vendor/gorm.io/gorm/schema/relationship.go | 261 +- vendor/gorm.io/gorm/schema/schema.go | 143 +- vendor/gorm.io/gorm/schema/serializer.go | 40 +- vendor/gorm.io/gorm/schema/utils.go | 7 +- vendor/gorm.io/gorm/soft_delete.go | 27 +- vendor/gorm.io/gorm/statement.go | 107 +- vendor/gorm.io/gorm/utils/utils.go | 93 +- vendor/howett.net/plist/bplist_parser.go | 5 +- vendor/howett.net/plist/marshal.go | 15 +- vendor/howett.net/plist/text_parser.go | 6 + vendor/modules.txt | 321 +- vendor/mvdan.cc/sh/v3/expand/arith.go | 34 +- vendor/mvdan.cc/sh/v3/expand/braces.go | 21 +- vendor/mvdan.cc/sh/v3/expand/environ.go | 34 +- vendor/mvdan.cc/sh/v3/expand/expand.go | 127 +- vendor/mvdan.cc/sh/v3/expand/param.go | 11 +- vendor/mvdan.cc/sh/v3/fileutil/file.go | 4 +- vendor/mvdan.cc/sh/v3/pattern/pattern.go | 6 +- vendor/mvdan.cc/sh/v3/syntax/braces.go | 3 +- vendor/mvdan.cc/sh/v3/syntax/lexer.go | 44 +- vendor/mvdan.cc/sh/v3/syntax/nodes.go | 10 +- vendor/mvdan.cc/sh/v3/syntax/parser.go | 122 +- vendor/mvdan.cc/sh/v3/syntax/parser_arithm.go | 6 +- vendor/mvdan.cc/sh/v3/syntax/printer.go | 372 +- vendor/mvdan.cc/sh/v3/syntax/simplify.go | 62 +- vendor/mvdan.cc/sh/v3/syntax/tokens.go | 1 + vendor/mvdan.cc/sh/v3/syntax/walk.go | 197 +- vendor/nhooyr.io/websocket/.gitignore | 1 - vendor/nhooyr.io/websocket/LICENSE.txt | 34 +- vendor/nhooyr.io/websocket/README.md | 133 +- vendor/nhooyr.io/websocket/accept.go | 116 +- vendor/nhooyr.io/websocket/accept_js.go | 20 - vendor/nhooyr.io/websocket/close.go | 283 + vendor/nhooyr.io/websocket/close_notjs.go | 211 - vendor/nhooyr.io/websocket/compress.go | 246 +- vendor/nhooyr.io/websocket/compress_notjs.go | 181 - vendor/nhooyr.io/websocket/conn.go | 290 + vendor/nhooyr.io/websocket/conn_notjs.go | 265 - vendor/nhooyr.io/websocket/dial.go | 94 +- vendor/nhooyr.io/websocket/doc.go | 16 +- vendor/nhooyr.io/websocket/frame.go | 125 +- .../nhooyr.io/websocket/internal/util/util.go | 15 + .../websocket/internal/wsjs/wsjs_js.go | 3 +- .../nhooyr.io/websocket/internal/xsync/go.go | 3 +- vendor/nhooyr.io/websocket/make.sh | 12 + vendor/nhooyr.io/websocket/mask.go | 128 + vendor/nhooyr.io/websocket/mask_amd64.s | 127 + vendor/nhooyr.io/websocket/mask_arm64.s | 72 + vendor/nhooyr.io/websocket/mask_asm.go | 26 + vendor/nhooyr.io/websocket/mask_go.go | 7 + vendor/nhooyr.io/websocket/netconn.go | 181 +- vendor/nhooyr.io/websocket/netconn_js.go | 11 + vendor/nhooyr.io/websocket/netconn_notjs.go | 20 + vendor/nhooyr.io/websocket/read.go | 126 +- vendor/nhooyr.io/websocket/stringer.go | 2 + vendor/nhooyr.io/websocket/write.go | 149 +- vendor/nhooyr.io/websocket/ws_js.go | 255 +- 1922 files changed, 446848 insertions(+), 162407 deletions(-) delete mode 100644 vendor/github.com/Microsoft/go-winio/tools.go delete mode 100644 vendor/github.com/bytedance/sonic/Makefile rename vendor/github.com/bytedance/sonic/ast/{api_amd64.go => api.go} (71%) create mode 100644 vendor/github.com/bytedance/sonic/ast/stubs.go delete mode 100644 vendor/github.com/bytedance/sonic/ast/stubs_go115.go delete mode 100644 vendor/github.com/bytedance/sonic/ast/stubs_go120.go rename vendor/github.com/bytedance/sonic/decoder/{decoder_amd64.go => decoder_native.go} (64%) rename vendor/github.com/bytedance/sonic/encoder/{encoder_amd64.go => encoder_native.go} (86%) create mode 100644 vendor/github.com/bytedance/sonic/go.work.sum create mode 100644 vendor/github.com/bytedance/sonic/internal/base64/b64_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/base64/b64_compat.go rename vendor/github.com/bytedance/sonic/internal/decoder/{ => api}/decoder.go (57%) create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/api/decoder_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/api/decoder_arm64.go rename vendor/github.com/bytedance/sonic/internal/decoder/{ => api}/stream.go (60%) delete mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go116.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/assembler_stkabi_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/consts/option.go rename vendor/github.com/bytedance/sonic/internal/decoder/{ => errors}/errors.go (90%) delete mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/generic_stkabi_amd64.go rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/asm.s (100%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/asm_stubs_amd64_go117.go (99%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/asm_stubs_amd64_go121.go (98%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/assembler_regabi_amd64.go (96%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/compiler.go (93%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/debug.go (99%) create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/jitdec/decoder.go rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/generic_regabi_amd64.go (99%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/generic_regabi_amd64_test.s (97%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/pools.go (97%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/primitives.go (84%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/stubs_go116.go (91%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/stubs_go120.go (91%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/types.go (99%) rename vendor/github.com/bytedance/sonic/internal/decoder/{ => jitdec}/utils.go (98%) create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/compile_struct.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/compiler.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/const.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/context.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/decoder.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/errors.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/functor.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/helper.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/interface.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/map.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/native.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/node.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/slice.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/stringopts.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/structs.go create mode 100644 vendor/github.com/bytedance/sonic/internal/decoder/optdec/types.go rename vendor/github.com/bytedance/sonic/internal/encoder/{ => alg}/mapiter.go (52%) create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/alg/opts.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/alg/primitives.go rename vendor/github.com/bytedance/sonic/internal/encoder/{ => alg}/sort.go (99%) create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/alg/spec.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/alg/spec_compat.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/asm_stubs_amd64_go116.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/assembler_regabi_amd64.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/assembler_stkabi_amd64.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/debug_go117.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/encode_norace.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/encode_race.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/ir/op.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/pools.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/pools_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/pools_compt.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/primitives.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/stubs_go116.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/stubs_go117.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/stubs_go120.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/stubs_go121.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/vars/cache.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/vars/const.go rename vendor/github.com/bytedance/sonic/internal/encoder/{ => vars}/errors.go (68%) create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/vars/stack.go rename vendor/github.com/bytedance/sonic/internal/encoder/{ => vars}/types.go (63%) create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/vm/stbus.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/vm/vm.go rename vendor/github.com/bytedance/sonic/internal/encoder/{ => x86}/asm_stubs_amd64_go117.go (74%) rename vendor/github.com/bytedance/sonic/internal/encoder/{ => x86}/asm_stubs_amd64_go121.go (72%) create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/x86/assembler_regabi_amd64.go rename vendor/github.com/bytedance/sonic/internal/encoder/{ => x86}/debug_go116.go (97%) create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/x86/debug_go117.go create mode 100644 vendor/github.com/bytedance/sonic/internal/encoder/x86/stbus.go create mode 100644 vendor/github.com/bytedance/sonic/internal/envs/decode.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx/native_amd64.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx/native_subr_amd64.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx/native_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/f32toa.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/f32toa_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/f32toa_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/f64toa.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/f64toa_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/f64toa_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/get_by_path.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/get_by_path_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/get_by_path_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/html_escape.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/html_escape_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/html_escape_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/i64toa.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/i64toa_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/i64toa_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/lookup_small_key.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/lookup_small_key_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/lookup_small_key_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/lspace.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/lspace_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/lspace_text_amd64.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/native_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/native_export.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/native_subr_amd64.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/native_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/parse_with_padding.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/parse_with_padding_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/parse_with_padding_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/quote.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/quote_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/quote_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_array.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_array_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_array_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_number.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_number_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_number_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_object.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_object_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_object_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_one.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_one_fast.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_one_fast_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_one_fast_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_one_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/skip_one_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/u64toa.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/u64toa_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/u64toa_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/unquote.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/unquote_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/unquote_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_one.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_one_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_one_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_utf8.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_utf8_fast.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_utf8_fast_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_utf8_fast_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_utf8_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/validate_utf8_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/value.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/value_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/value_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vnumber.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vnumber_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vnumber_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vsigned.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vsigned_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vsigned_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vstring.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vstring_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vstring_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vunsigned.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vunsigned_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/avx2/vunsigned_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/dispatch_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/f32toa.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/f64toa.tmpl rename vendor/github.com/bytedance/sonic/internal/native/{fastfloat_amd64_test.tmpl => fastfloat_test.tmpl} (99%) rename vendor/github.com/bytedance/sonic/internal/native/{fastint_amd64_test.tmpl => fastint_test.tmpl} (99%) create mode 100644 vendor/github.com/bytedance/sonic/internal/native/get_by_path.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/html_escape.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/i64toa.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/lookup_small_key.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/lspace.tmpl delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/native_amd64.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/native_export.tmpl rename vendor/github.com/bytedance/sonic/internal/native/{native_amd64_test.tmpl => native_test.tmpl} (98%) create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/f32toa_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/f32toa_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/f32toa_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/f64toa_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/f64toa_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/f64toa_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/get_by_path_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/get_by_path_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/get_by_path_subr_arm64.go rename vendor/github.com/bytedance/sonic/internal/{encoder/utils.go => native/neon/html_escape_arm64.go} (52%) create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/html_escape_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/html_escape_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/i64toa_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/i64toa_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/i64toa_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/lookup_small_key_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/lookup_small_key_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/lookup_small_key_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/lspace_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/lspace_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/lspace_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/native_export_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/parse_with_padding_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/parse_with_padding_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/parse_with_padding_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/quote_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/quote_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/quote_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_array_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_array_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_array_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_number_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_number_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_number_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_object_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_object_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_object_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_one_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_one_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_one_fast_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_one_fast_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_one_fast_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/skip_one_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/u64toa_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/u64toa_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/u64toa_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/unquote_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/unquote_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/unquote_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_one_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_one_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_one_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_utf8_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_utf8_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_utf8_fast_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_utf8_fast_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_utf8_fast_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/validate_utf8_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/value_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/value_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/value_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vnumber_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vnumber_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vnumber_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vsigned_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vsigned_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vsigned_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vstring_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vstring_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vstring_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vunsigned_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vunsigned_arm64.s create mode 100644 vendor/github.com/bytedance/sonic/internal/native/neon/vunsigned_subr_arm64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/parse_with_padding.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/quote.tmpl delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/recover_amd64_test.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/recover_test.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/skip_array.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/skip_number.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/skip_object.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/skip_one.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/skip_one_fast.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/f32toa.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/f32toa_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/f32toa_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/f64toa.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/f64toa_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/f64toa_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/get_by_path.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/get_by_path_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/get_by_path_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/html_escape.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/html_escape_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/html_escape_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/i64toa.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/i64toa_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/i64toa_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/lookup_small_key.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/lookup_small_key_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/lookup_small_key_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/lspace.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/lspace_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/lspace_text_amd64.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/native_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/native_export.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/native_subr_amd64.go delete mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/native_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/parse_with_padding.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/parse_with_padding_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/parse_with_padding_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/quote.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/quote_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/quote_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_array.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_array_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_array_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_number.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_number_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_number_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_object.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_object_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_object_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_one.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_one_fast.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_one_fast_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_one_fast_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_one_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/skip_one_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/u64toa.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/u64toa_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/u64toa_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/unquote.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/unquote_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/unquote_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_one.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_one_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_one_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_utf8.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_utf8_fast.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_utf8_fast_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_utf8_fast_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_utf8_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/validate_utf8_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/value.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/value_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/value_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vnumber.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vnumber_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vnumber_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vsigned.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vsigned_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vsigned_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vstring.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vstring_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vstring_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vunsigned.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vunsigned_subr.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/sse/vunsigned_text_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/native/traceback_test.mock_tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/u64toa.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/unquote.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/validate_one.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/validate_utf8.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/validate_utf8_fast.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/value.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/vnumber.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/vsigned.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/vstring.tmpl create mode 100644 vendor/github.com/bytedance/sonic/internal/native/vunsigned.tmpl rename vendor/github.com/bytedance/sonic/internal/{encoder => optcaching}/asm.s (100%) create mode 100644 vendor/github.com/bytedance/sonic/internal/optcaching/fcache.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/assertI2I.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/base64_amd64.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/base64_compat.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/fastconv.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/gcwb_legacy.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/growslice.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/growslice_legacy.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/pool.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/stubs.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/table.go create mode 100644 vendor/github.com/bytedance/sonic/internal/rt/types.go rename vendor/github.com/{chenzhuoyu/base64x => bytedance/sonic/loader}/LICENSE (100%) create mode 100644 vendor/github.com/bytedance/sonic/loader/funcdata_go123.go rename vendor/github.com/bytedance/sonic/{ => loader}/internal/abi/abi.go (98%) rename vendor/github.com/bytedance/sonic/{ => loader}/internal/abi/abi_amd64.go (99%) rename vendor/github.com/bytedance/sonic/{ => loader}/internal/abi/abi_legacy_amd64.go (99%) rename vendor/github.com/bytedance/sonic/{ => loader}/internal/abi/abi_regabi_amd64.go (99%) rename vendor/github.com/bytedance/sonic/{ => loader}/internal/abi/stubs.go (94%) create mode 100644 vendor/github.com/bytedance/sonic/loader/internal/rt/fastmem.go create mode 100644 vendor/github.com/bytedance/sonic/loader/internal/rt/fastvalue.go create mode 100644 vendor/github.com/bytedance/sonic/loader/internal/rt/stackmap.go create mode 100644 vendor/github.com/cespare/xxhash/v2/testall.sh create mode 100644 vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s rename vendor/github.com/cespare/xxhash/v2/{xxhash_amd64.go => xxhash_asm.go} (53%) delete mode 100644 vendor/github.com/chenzhuoyu/base64x/.gitignore delete mode 100644 vendor/github.com/chenzhuoyu/base64x/.gitmodules delete mode 100644 vendor/github.com/chenzhuoyu/base64x/cpuid.go delete mode 100644 vendor/github.com/chenzhuoyu/iasm/expr/pools.go delete mode 100644 vendor/github.com/chenzhuoyu/iasm/expr/term.go delete mode 100644 vendor/github.com/chenzhuoyu/iasm/x86_64/asm.s delete mode 100644 vendor/github.com/chenzhuoyu/iasm/x86_64/operands.go delete mode 100644 vendor/github.com/chenzhuoyu/iasm/x86_64/pools.go delete mode 100644 vendor/github.com/chenzhuoyu/iasm/x86_64/program.go create mode 100644 vendor/github.com/cloudwego/base64x/.gitignore create mode 100644 vendor/github.com/cloudwego/base64x/.golangci.yaml create mode 100644 vendor/github.com/cloudwego/base64x/.licenserc.yaml create mode 100644 vendor/github.com/cloudwego/base64x/CODE_OF_CONDUCT.md create mode 100644 vendor/github.com/cloudwego/base64x/CONTRIBUTING.md rename vendor/github.com/{chenzhuoyu/iasm => cloudwego/base64x}/LICENSE (100%) rename vendor/{gopkg.in/yaml.v2/LICENSE => github.com/cloudwego/base64x/LICENSE-APACHE} (89%) rename vendor/github.com/{chenzhuoyu => cloudwego}/base64x/Makefile (100%) rename vendor/github.com/{chenzhuoyu => cloudwego}/base64x/README.md (100%) create mode 100644 vendor/github.com/cloudwego/base64x/_typos.toml rename vendor/github.com/{chenzhuoyu => cloudwego}/base64x/base64x.go (88%) create mode 100644 vendor/github.com/cloudwego/base64x/check_branch_name.sh create mode 100644 vendor/github.com/cloudwego/base64x/cpuid.go rename vendor/github.com/{chenzhuoyu => cloudwego}/base64x/faststr.go (65%) rename vendor/github.com/{chenzhuoyu => cloudwego}/base64x/native_amd64.go (62%) rename vendor/github.com/{chenzhuoyu => cloudwego}/base64x/native_subr_amd64.go (100%) rename vendor/github.com/{chenzhuoyu => cloudwego}/base64x/native_text_amd64.go (100%) create mode 100644 vendor/github.com/cloudwego/iasm/LICENSE-APACHE rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/expr/ast.go (91%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/expr/errors.go (54%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/expr/ops.go (62%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/expr/parser.go (92%) rename vendor/github.com/{bytedance/sonic/internal/decoder/generic_stkabi_amd64_test.s => cloudwego/iasm/expr/pools.go} (53%) create mode 100644 vendor/github.com/cloudwego/iasm/expr/term.go rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/expr/utils.go (64%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/arch.go (91%) create mode 100644 vendor/github.com/cloudwego/iasm/x86_64/asm.s rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/assembler.go (98%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/assembler_alias.go (60%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/eface.go (64%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/encodings.go (97%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/instructions.go (99%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/instructions_table.go (99%) create mode 100644 vendor/github.com/cloudwego/iasm/x86_64/operands.go create mode 100644 vendor/github.com/cloudwego/iasm/x86_64/pools.go create mode 100644 vendor/github.com/cloudwego/iasm/x86_64/program.go rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/registers.go (95%) rename vendor/github.com/{chenzhuoyu => cloudwego}/iasm/x86_64/utils.go (82%) create mode 100644 vendor/github.com/creack/pty/.editorconfig create mode 100644 vendor/github.com/creack/pty/.golangci.yml delete mode 100644 vendor/github.com/creack/pty/Dockerfile.riscv create mode 100644 vendor/github.com/creack/pty/ioctl_inner.go create mode 100644 vendor/github.com/creack/pty/ioctl_legacy.go create mode 100644 vendor/github.com/creack/pty/ztypes_freebsd_riscv64.go create mode 100644 vendor/github.com/creack/pty/ztypes_ppc.go create mode 100644 vendor/github.com/creack/pty/ztypes_sparcx.go delete mode 100644 vendor/github.com/flopp/go-coordsparser/.travis.yml create mode 100644 vendor/github.com/flopp/go-coordsparser/renovate.json create mode 100644 vendor/github.com/flopp/go-staticmaps/Makefile create mode 100644 vendor/github.com/flopp/go-staticmaps/renovate.json delete mode 100644 vendor/github.com/gabriel-vasile/mimetype/mimetype.gif delete mode 100644 vendor/github.com/ghodss/yaml/.gitignore delete mode 100644 vendor/github.com/ghodss/yaml/.travis.yml delete mode 100644 vendor/github.com/ghodss/yaml/README.md delete mode 100644 vendor/github.com/ghodss/yaml/fields.go delete mode 100644 vendor/github.com/ghodss/yaml/yaml.go create mode 100644 vendor/github.com/gin-gonic/gin/codecov.yml create mode 100644 vendor/github.com/go-chi/chi/v5/path_value.go create mode 100644 vendor/github.com/go-chi/chi/v5/path_value_fallback.go create mode 100644 vendor/github.com/go-chi/httprate/limit_key.go create mode 100644 vendor/github.com/go-chi/httprate/local_counter.go create mode 100644 vendor/github.com/go-chi/httprate/local_counter_go1.20.go create mode 100644 vendor/github.com/go-chi/httprate/local_counter_go1.21.go delete mode 100644 vendor/github.com/go-echarts/go-echarts/v2/actions/global.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/event/event.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/angle_axis.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/axis_pointer.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/brush.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/calendar.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/data_zoom.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/dataset.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/geo.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/gl_3d.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/grid.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/js.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/legend.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/parallel.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/parallel_axis.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/polar.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/primitivie.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/radar.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/radius_axis.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/series_bar.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/series_effect_scatter.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/series_line.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/series_pie.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/series_radar.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/series_scatter.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/single_axis.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/text_style.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/title.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/toolbox.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/tooltip.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/visual_map.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/x_axis.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/opts/y_axis.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/render/chart.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/render/page.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/render/render.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/templates/base_element.tpl create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/templates/base_option.tpl create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/templates/base_script.tpl delete mode 100644 vendor/github.com/go-echarts/go-echarts/v2/types/lang.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/types/primitivie.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/util/default_val_setter.go create mode 100644 vendor/github.com/go-echarts/go-echarts/v2/util/id_gennerator.go create mode 100644 vendor/github.com/go-playground/validator/v10/options.go rename vendor/github.com/go-task/slim-sprig/{ => v3}/.editorconfig (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/.gitattributes (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/.gitignore (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/CHANGELOG.md (95%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/LICENSE.txt (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/README.md (88%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/Taskfile.yml (89%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/crypto.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/date.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/defaults.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/dict.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/doc.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/functions.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/list.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/network.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/numeric.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/reflect.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/regex.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/strings.go (100%) rename vendor/github.com/go-task/slim-sprig/{ => v3}/url.go (100%) rename vendor/github.com/google/flatbuffers/{LICENSE.txt => LICENSE} (100%) create mode 100644 vendor/github.com/google/uuid/version6.go create mode 100644 vendor/github.com/google/uuid/version7.go create mode 100644 vendor/github.com/gorilla/securecookie/.editorconfig create mode 100644 vendor/github.com/gorilla/securecookie/.gitignore delete mode 100644 vendor/github.com/gorilla/securecookie/.travis.yml create mode 100644 vendor/github.com/gorilla/securecookie/Makefile delete mode 100644 vendor/github.com/gorilla/securecookie/fuzz.go delete mode 100644 vendor/github.com/jackc/chunkreader/v2/.travis.yml delete mode 100644 vendor/github.com/jackc/chunkreader/v2/README.md delete mode 100644 vendor/github.com/jackc/chunkreader/v2/chunkreader.go delete mode 100644 vendor/github.com/jackc/pgconn/.gitignore delete mode 100644 vendor/github.com/jackc/pgconn/CHANGELOG.md delete mode 100644 vendor/github.com/jackc/pgconn/LICENSE delete mode 100644 vendor/github.com/jackc/pgconn/README.md delete mode 100644 vendor/github.com/jackc/pgconn/doc.go delete mode 100644 vendor/github.com/jackc/pgconn/stmtcache/lru.go delete mode 100644 vendor/github.com/jackc/pgconn/stmtcache/stmtcache.go delete mode 100644 vendor/github.com/jackc/pgio/.travis.yml delete mode 100644 vendor/github.com/jackc/pgio/LICENSE delete mode 100644 vendor/github.com/jackc/pgio/README.md delete mode 100644 vendor/github.com/jackc/pgproto3/v2/.travis.yml delete mode 100644 vendor/github.com/jackc/pgproto3/v2/LICENSE delete mode 100644 vendor/github.com/jackc/pgproto3/v2/README.md delete mode 100644 vendor/github.com/jackc/pgproto3/v2/chunkreader.go delete mode 100644 vendor/github.com/jackc/pgproto3/v2/doc.go delete mode 100644 vendor/github.com/jackc/pgproto3/v2/frontend.go delete mode 100644 vendor/github.com/jackc/pgservicefile/.travis.yml delete mode 100644 vendor/github.com/jackc/pgtype/CHANGELOG.md delete mode 100644 vendor/github.com/jackc/pgtype/README.md delete mode 100644 vendor/github.com/jackc/pgtype/aclitem.go delete mode 100644 vendor/github.com/jackc/pgtype/aclitem_array.go delete mode 100644 vendor/github.com/jackc/pgtype/array_type.go delete mode 100644 vendor/github.com/jackc/pgtype/bit.go delete mode 100644 vendor/github.com/jackc/pgtype/bool.go delete mode 100644 vendor/github.com/jackc/pgtype/bool_array.go delete mode 100644 vendor/github.com/jackc/pgtype/box.go delete mode 100644 vendor/github.com/jackc/pgtype/bpchar.go delete mode 100644 vendor/github.com/jackc/pgtype/bpchar_array.go delete mode 100644 vendor/github.com/jackc/pgtype/bytea.go delete mode 100644 vendor/github.com/jackc/pgtype/bytea_array.go delete mode 100644 vendor/github.com/jackc/pgtype/cid.go delete mode 100644 vendor/github.com/jackc/pgtype/cidr.go delete mode 100644 vendor/github.com/jackc/pgtype/cidr_array.go delete mode 100644 vendor/github.com/jackc/pgtype/circle.go delete mode 100644 vendor/github.com/jackc/pgtype/composite_fields.go delete mode 100644 vendor/github.com/jackc/pgtype/composite_type.go delete mode 100644 vendor/github.com/jackc/pgtype/convert.go delete mode 100644 vendor/github.com/jackc/pgtype/database_sql.go delete mode 100644 vendor/github.com/jackc/pgtype/date.go delete mode 100644 vendor/github.com/jackc/pgtype/date_array.go delete mode 100644 vendor/github.com/jackc/pgtype/daterange.go delete mode 100644 vendor/github.com/jackc/pgtype/enum_array.go delete mode 100644 vendor/github.com/jackc/pgtype/enum_type.go delete mode 100644 vendor/github.com/jackc/pgtype/float4.go delete mode 100644 vendor/github.com/jackc/pgtype/float4_array.go delete mode 100644 vendor/github.com/jackc/pgtype/float8.go delete mode 100644 vendor/github.com/jackc/pgtype/float8_array.go delete mode 100644 vendor/github.com/jackc/pgtype/generic_binary.go delete mode 100644 vendor/github.com/jackc/pgtype/generic_text.go delete mode 100644 vendor/github.com/jackc/pgtype/hstore.go delete mode 100644 vendor/github.com/jackc/pgtype/hstore_array.go delete mode 100644 vendor/github.com/jackc/pgtype/inet.go delete mode 100644 vendor/github.com/jackc/pgtype/inet_array.go delete mode 100644 vendor/github.com/jackc/pgtype/int2.go delete mode 100644 vendor/github.com/jackc/pgtype/int2_array.go delete mode 100644 vendor/github.com/jackc/pgtype/int4.go delete mode 100644 vendor/github.com/jackc/pgtype/int4_array.go delete mode 100644 vendor/github.com/jackc/pgtype/int4_multirange.go delete mode 100644 vendor/github.com/jackc/pgtype/int4range.go delete mode 100644 vendor/github.com/jackc/pgtype/int8.go delete mode 100644 vendor/github.com/jackc/pgtype/int8_array.go delete mode 100644 vendor/github.com/jackc/pgtype/int8_multirange.go delete mode 100644 vendor/github.com/jackc/pgtype/int8range.go delete mode 100644 vendor/github.com/jackc/pgtype/interval.go delete mode 100644 vendor/github.com/jackc/pgtype/json.go delete mode 100644 vendor/github.com/jackc/pgtype/json_array.go delete mode 100644 vendor/github.com/jackc/pgtype/jsonb.go delete mode 100644 vendor/github.com/jackc/pgtype/jsonb_array.go delete mode 100644 vendor/github.com/jackc/pgtype/line.go delete mode 100644 vendor/github.com/jackc/pgtype/lseg.go delete mode 100644 vendor/github.com/jackc/pgtype/ltree.go delete mode 100644 vendor/github.com/jackc/pgtype/macaddr.go delete mode 100644 vendor/github.com/jackc/pgtype/macaddr_array.go delete mode 100644 vendor/github.com/jackc/pgtype/multirange.go delete mode 100644 vendor/github.com/jackc/pgtype/name.go delete mode 100644 vendor/github.com/jackc/pgtype/num_multirange.go delete mode 100644 vendor/github.com/jackc/pgtype/numeric.go delete mode 100644 vendor/github.com/jackc/pgtype/numeric_array.go delete mode 100644 vendor/github.com/jackc/pgtype/numrange.go delete mode 100644 vendor/github.com/jackc/pgtype/oid.go delete mode 100644 vendor/github.com/jackc/pgtype/oid_value.go delete mode 100644 vendor/github.com/jackc/pgtype/path.go delete mode 100644 vendor/github.com/jackc/pgtype/pgtype.go delete mode 100644 vendor/github.com/jackc/pgtype/pguint32.go delete mode 100644 vendor/github.com/jackc/pgtype/point.go delete mode 100644 vendor/github.com/jackc/pgtype/polygon.go delete mode 100644 vendor/github.com/jackc/pgtype/qchar.go delete mode 100644 vendor/github.com/jackc/pgtype/record.go delete mode 100644 vendor/github.com/jackc/pgtype/record_array.go delete mode 100644 vendor/github.com/jackc/pgtype/text.go delete mode 100644 vendor/github.com/jackc/pgtype/text_array.go delete mode 100644 vendor/github.com/jackc/pgtype/tid.go delete mode 100644 vendor/github.com/jackc/pgtype/time.go delete mode 100644 vendor/github.com/jackc/pgtype/timestamp.go delete mode 100644 vendor/github.com/jackc/pgtype/timestamp_array.go delete mode 100644 vendor/github.com/jackc/pgtype/timestamptz.go delete mode 100644 vendor/github.com/jackc/pgtype/timestamptz_array.go delete mode 100644 vendor/github.com/jackc/pgtype/tsrange.go delete mode 100644 vendor/github.com/jackc/pgtype/tsrange_array.go delete mode 100644 vendor/github.com/jackc/pgtype/tstzrange.go delete mode 100644 vendor/github.com/jackc/pgtype/tstzrange_array.go delete mode 100644 vendor/github.com/jackc/pgtype/typed_array.go.erb delete mode 100644 vendor/github.com/jackc/pgtype/typed_array_gen.sh delete mode 100644 vendor/github.com/jackc/pgtype/typed_multirange.go.erb delete mode 100644 vendor/github.com/jackc/pgtype/typed_multirange_gen.sh delete mode 100644 vendor/github.com/jackc/pgtype/typed_range.go.erb delete mode 100644 vendor/github.com/jackc/pgtype/typed_range_gen.sh delete mode 100644 vendor/github.com/jackc/pgtype/unknown.go delete mode 100644 vendor/github.com/jackc/pgtype/uuid.go delete mode 100644 vendor/github.com/jackc/pgtype/uuid_array.go delete mode 100644 vendor/github.com/jackc/pgtype/varbit.go delete mode 100644 vendor/github.com/jackc/pgtype/varchar.go delete mode 100644 vendor/github.com/jackc/pgtype/varchar_array.go delete mode 100644 vendor/github.com/jackc/pgtype/xid.go delete mode 100644 vendor/github.com/jackc/pgx/v4/CHANGELOG.md delete mode 100644 vendor/github.com/jackc/pgx/v4/LICENSE delete mode 100644 vendor/github.com/jackc/pgx/v4/README.md delete mode 100644 vendor/github.com/jackc/pgx/v4/batch.go delete mode 100644 vendor/github.com/jackc/pgx/v4/conn.go delete mode 100644 vendor/github.com/jackc/pgx/v4/doc.go delete mode 100644 vendor/github.com/jackc/pgx/v4/extended_query_builder.go delete mode 100644 vendor/github.com/jackc/pgx/v4/go_stdlib.go delete mode 100644 vendor/github.com/jackc/pgx/v4/logger.go delete mode 100644 vendor/github.com/jackc/pgx/v4/messages.go delete mode 100644 vendor/github.com/jackc/pgx/v4/rows.go delete mode 100644 vendor/github.com/jackc/pgx/v4/values.go rename vendor/github.com/jackc/pgx/{v4 => v5}/.gitignore (92%) create mode 100644 vendor/github.com/jackc/pgx/v5/CHANGELOG.md create mode 100644 vendor/github.com/jackc/pgx/v5/CONTRIBUTING.md rename vendor/github.com/jackc/{pgtype => pgx/v5}/LICENSE (100%) create mode 100644 vendor/github.com/jackc/pgx/v5/README.md create mode 100644 vendor/github.com/jackc/pgx/v5/Rakefile create mode 100644 vendor/github.com/jackc/pgx/v5/batch.go create mode 100644 vendor/github.com/jackc/pgx/v5/conn.go rename vendor/github.com/jackc/pgx/{v4 => v5}/copy_from.go (51%) create mode 100644 vendor/github.com/jackc/pgx/v5/derived_types.go create mode 100644 vendor/github.com/jackc/pgx/v5/doc.go create mode 100644 vendor/github.com/jackc/pgx/v5/extended_query_builder.go create mode 100644 vendor/github.com/jackc/pgx/v5/internal/iobufpool/iobufpool.go create mode 100644 vendor/github.com/jackc/pgx/v5/internal/pgio/README.md rename vendor/github.com/jackc/{ => pgx/v5/internal}/pgio/doc.go (100%) rename vendor/github.com/jackc/{ => pgx/v5/internal}/pgio/write.go (100%) rename vendor/github.com/jackc/pgx/{v4 => v5}/internal/sanitize/sanitize.go (96%) create mode 100644 vendor/github.com/jackc/pgx/v5/internal/stmtcache/lru_cache.go create mode 100644 vendor/github.com/jackc/pgx/v5/internal/stmtcache/stmtcache.go create mode 100644 vendor/github.com/jackc/pgx/v5/internal/stmtcache/unlimited_cache.go rename vendor/github.com/jackc/pgx/{v4 => v5}/large_objects.go (63%) create mode 100644 vendor/github.com/jackc/pgx/v5/named_args.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgconn/README.md rename vendor/github.com/jackc/{ => pgx/v5}/pgconn/auth_scram.go (95%) rename vendor/github.com/jackc/{ => pgx/v5}/pgconn/config.go (82%) rename vendor/github.com/jackc/{pgconn/internal => pgx/v5/pgconn}/ctxwatch/context_watcher.go (71%) rename vendor/github.com/jackc/{ => pgx/v5}/pgconn/defaults.go (97%) rename vendor/github.com/jackc/{ => pgx/v5}/pgconn/defaults_windows.go (97%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgconn/doc.go rename vendor/github.com/jackc/{ => pgx/v5}/pgconn/errors.go (54%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgconn/internal/bgreader/bgreader.go rename vendor/github.com/jackc/{ => pgx/v5}/pgconn/krb5.go (94%) rename vendor/github.com/jackc/{ => pgx/v5}/pgconn/pgconn.go (54%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgproto3/README.md rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/authentication_cleartext_password.go (97%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/authentication_gss.go (96%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/authentication_gss_continue.go (96%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/authentication_md5_password.go (97%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/authentication_ok.go (97%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/authentication_sasl.go (86%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/authentication_sasl_continue.go (97%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/authentication_sasl_final.go (97%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/backend.go (66%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/backend_key_data.go (97%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/big_endian.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/bind.go (99%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/bind_complete.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/cancel_request.go (96%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgproto3/chunkreader.go rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/close.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/close_complete.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/command_complete.go (90%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/copy_both_response.go (98%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/copy_data.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/copy_done.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/copy_fail.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/copy_in_response.go (98%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/copy_out_response.go (98%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/data_row.go (92%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/describe.go (100%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgproto3/doc.go rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/empty_query_response.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/error_response.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/execute.go (97%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/flush.go (100%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgproto3/frontend.go rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/function_call.go (98%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/function_call_response.go (98%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/gss_enc_request.go (96%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/gss_response.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/no_data.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/notice_response.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/notification_response.go (90%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/parameter_description.go (97%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/parameter_status.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/parse.go (98%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/parse_complete.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/password_message.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/pgproto3.go (73%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/portal_suspended.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/query.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/ready_for_query.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/row_description.go (99%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/sasl_initial_response.go (91%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/sasl_response.go (89%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/ssl_request.go (96%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/startup_message.go (92%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/sync.go (100%) rename vendor/github.com/jackc/{pgproto3/v2 => pgx/v5/pgproto3}/terminate.go (100%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgproto3/trace.go rename vendor/github.com/jackc/{ => pgx/v5}/pgtype/array.go (63%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/array_codec.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/bits.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/bool.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/box.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/builtin_wrappers.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/bytea.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/circle.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/composite.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/convert.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/date.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/doc.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/enum_codec.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/float4.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/float8.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/hstore.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/inet.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/int.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/int.go.erb create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/int_test.go.erb create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/integration_benchmark_test.go.erb create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/integration_benchmark_test_gen.sh create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/interval.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/json.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/jsonb.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/line.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/lseg.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/ltree.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/macaddr.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/multirange.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/numeric.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/path.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/pgtype.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/pgtype_default.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/point.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/polygon.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/qchar.go rename vendor/github.com/jackc/{ => pgx/v5}/pgtype/range.go (76%) create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/range_codec.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/record_codec.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/register_default_pg_types.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/register_default_pg_types_disabled.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/text.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/text_format_only_codec.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/tid.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/time.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/timestamp.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/timestamptz.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/uint32.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/uuid.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgtype/xml.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgxpool/batch_results.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgxpool/conn.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgxpool/doc.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgxpool/pool.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgxpool/rows.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgxpool/stat.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgxpool/tracer.go create mode 100644 vendor/github.com/jackc/pgx/v5/pgxpool/tx.go create mode 100644 vendor/github.com/jackc/pgx/v5/rows.go rename vendor/github.com/jackc/pgx/{v4 => v5}/stdlib/sql.go (70%) create mode 100644 vendor/github.com/jackc/pgx/v5/tracer.go rename vendor/github.com/jackc/pgx/{v4 => v5}/tx.go (70%) create mode 100644 vendor/github.com/jackc/pgx/v5/values.go create mode 100644 vendor/github.com/jackc/puddle/v2/CHANGELOG.md rename vendor/github.com/jackc/{chunkreader => puddle}/v2/LICENSE (96%) create mode 100644 vendor/github.com/jackc/puddle/v2/README.md create mode 100644 vendor/github.com/jackc/puddle/v2/context.go create mode 100644 vendor/github.com/jackc/puddle/v2/doc.go create mode 100644 vendor/github.com/jackc/puddle/v2/internal/genstack/gen_stack.go create mode 100644 vendor/github.com/jackc/puddle/v2/internal/genstack/stack.go create mode 100644 vendor/github.com/jackc/puddle/v2/log.go create mode 100644 vendor/github.com/jackc/puddle/v2/nanotime.go create mode 100644 vendor/github.com/jackc/puddle/v2/pool.go create mode 100644 vendor/github.com/jackc/puddle/v2/resource_list.go create mode 100644 vendor/github.com/jaypipes/ghw/SECURITY.md create mode 100644 vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_darwin.go delete mode 100644 vendor/github.com/klauspost/compress/flate/deflate.go delete mode 100644 vendor/github.com/klauspost/compress/flate/dict_decoder.go delete mode 100644 vendor/github.com/klauspost/compress/flate/fast_encoder.go delete mode 100644 vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go delete mode 100644 vendor/github.com/klauspost/compress/flate/huffman_code.go delete mode 100644 vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go delete mode 100644 vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go delete mode 100644 vendor/github.com/klauspost/compress/flate/inflate.go delete mode 100644 vendor/github.com/klauspost/compress/flate/inflate_gen.go delete mode 100644 vendor/github.com/klauspost/compress/flate/level1.go delete mode 100644 vendor/github.com/klauspost/compress/flate/level2.go delete mode 100644 vendor/github.com/klauspost/compress/flate/level3.go delete mode 100644 vendor/github.com/klauspost/compress/flate/level4.go delete mode 100644 vendor/github.com/klauspost/compress/flate/level5.go delete mode 100644 vendor/github.com/klauspost/compress/flate/level6.go delete mode 100644 vendor/github.com/klauspost/compress/flate/regmask_amd64.go delete mode 100644 vendor/github.com/klauspost/compress/flate/regmask_other.go delete mode 100644 vendor/github.com/klauspost/compress/flate/stateless.go delete mode 100644 vendor/github.com/klauspost/compress/flate/token.go delete mode 100644 vendor/github.com/klauspost/compress/huff0/bytereader.go create mode 100644 vendor/github.com/klauspost/reedsolomon/galois_gen_arm64.go create mode 100644 vendor/github.com/klauspost/reedsolomon/galois_gen_arm64.s create mode 100644 vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go create mode 100644 vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s create mode 100644 vendor/github.com/klauspost/reedsolomon/galois_gen_switch_arm64.go create mode 100644 vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go create mode 100644 vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_arm64.go create mode 100644 vendor/github.com/klauspost/reedsolomon/galois_nopshufb_amd64.go delete mode 100644 vendor/github.com/klauspost/reedsolomon/galois_notamd64.go create mode 100644 vendor/github.com/klauspost/reedsolomon/race.go create mode 100644 vendor/github.com/klauspost/reedsolomon/race_none.go create mode 100644 vendor/github.com/klauspost/reedsolomon/xor_arm64.go create mode 100644 vendor/github.com/klauspost/reedsolomon/xor_arm64.s create mode 100644 vendor/github.com/klauspost/reedsolomon/xor_noasm.go create mode 100644 vendor/github.com/leodido/go-urn/kind.go create mode 100644 vendor/github.com/leodido/go-urn/options.go create mode 100644 vendor/github.com/leodido/go-urn/parsing_mode.go create mode 100644 vendor/github.com/leodido/go-urn/scim.go create mode 100644 vendor/github.com/leodido/go-urn/scim/schema/type.go create mode 100644 vendor/github.com/leodido/go-urn/urn8141.go create mode 100644 vendor/github.com/mazznoer/csscolorparser/.gitignore create mode 100644 vendor/github.com/mazznoer/csscolorparser/CHANGELOG.md create mode 100644 vendor/github.com/mazznoer/csscolorparser/LICENSE create mode 100644 vendor/github.com/mazznoer/csscolorparser/Makefile create mode 100644 vendor/github.com/mazznoer/csscolorparser/README.md create mode 100644 vendor/github.com/mazznoer/csscolorparser/colorparser.go create mode 100644 vendor/github.com/mazznoer/csscolorparser/named_colors.go create mode 100644 vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/gocovmerge.go create mode 100644 vendor/github.com/pelletier/go-toml/v2/unstable/unmarshaler.go create mode 100644 vendor/github.com/quic-go/quic-go/connection_logging.go delete mode 100644 vendor/github.com/quic-go/quic-go/internal/logutils/frame.go rename vendor/github.com/quic-go/quic-go/internal/{handshake => qtls}/conn.go (97%) delete mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/byteorder.go delete mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/byteorder_big_endian.go delete mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/ip.go delete mode 100644 vendor/github.com/quic-go/quic-go/internal/utils/minmax.go delete mode 100644 vendor/github.com/quic-go/quic-go/window_update_queue.go create mode 100644 vendor/github.com/rs/cors/internal/sortedset.go delete mode 100644 vendor/github.com/spf13/cobra/active_help.md delete mode 100644 vendor/github.com/spf13/cobra/bash_completions.md delete mode 100644 vendor/github.com/spf13/cobra/fish_completions.md delete mode 100644 vendor/github.com/spf13/cobra/powershell_completions.md delete mode 100644 vendor/github.com/spf13/cobra/projects_using_cobra.md delete mode 100644 vendor/github.com/spf13/cobra/shell_completions.md delete mode 100644 vendor/github.com/spf13/cobra/user_guide.md delete mode 100644 vendor/github.com/spf13/cobra/zsh_completions.md create mode 100644 vendor/github.com/tkrajina/gpxgo/gpx/fixedpoint_float64.go create mode 100644 vendor/go.etcd.io/bbolt/.go-version create mode 100644 vendor/go.uber.org/mock/mockgen/deprecated.go rename vendor/go.uber.org/mock/mockgen/{generic_go118.go => generic.go} (84%) delete mode 100644 vendor/go.uber.org/mock/mockgen/generic_notgo118.go create mode 100644 vendor/go.uber.org/mock/mockgen/gob.go create mode 100644 vendor/go.uber.org/mock/mockgen/package_mode.go delete mode 100644 vendor/go.uber.org/mock/mockgen/reflect.go delete mode 100644 vendor/golang.org/x/crypto/blake2s/register.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/curve25519_compat.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/curve25519_go120.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/README delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/fe.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64.s delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64_noasm.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64.s delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64_noasm.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/fe_generic.go delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/sync.checkpoint delete mode 100644 vendor/golang.org/x/crypto/curve25519/internal/field/sync.sh delete mode 100644 vendor/golang.org/x/crypto/sha3/hashes_generic.go create mode 100644 vendor/golang.org/x/crypto/sha3/hashes_noasm.go delete mode 100644 vendor/golang.org/x/crypto/sha3/register.go delete mode 100644 vendor/golang.org/x/crypto/sha3/shake_generic.go create mode 100644 vendor/golang.org/x/crypto/sha3/shake_noasm.go delete mode 100644 vendor/golang.org/x/crypto/sha3/xor_generic.go delete mode 100644 vendor/golang.org/x/crypto/sha3/xor_unaligned.go delete mode 100644 vendor/golang.org/x/image/AUTHORS delete mode 100644 vendor/golang.org/x/image/CONTRIBUTORS delete mode 100644 vendor/golang.org/x/image/draw/draw_go117.go create mode 100644 vendor/golang.org/x/net/http2/config.go create mode 100644 vendor/golang.org/x/net/http2/config_go124.go create mode 100644 vendor/golang.org/x/net/http2/config_pre_go124.go create mode 100644 vendor/golang.org/x/net/http2/timer.go rename vendor/{github.com/ghodss/yaml => golang.org/x/sync}/LICENSE (53%) create mode 100644 vendor/golang.org/x/sync/PATENTS create mode 100644 vendor/golang.org/x/sync/errgroup/errgroup.go create mode 100644 vendor/golang.org/x/sync/errgroup/go120.go create mode 100644 vendor/golang.org/x/sync/errgroup/pre_go120.go create mode 100644 vendor/golang.org/x/sync/semaphore/semaphore.go create mode 100644 vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go delete mode 100644 vendor/golang.org/x/sys/execabs/execabs.go delete mode 100644 vendor/golang.org/x/sys/execabs/execabs_go118.go delete mode 100644 vendor/golang.org/x/sys/execabs/execabs_go119.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_linux.go create mode 100644 vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go delete mode 100644 vendor/golang.org/x/tools/cmd/stringer/stringer.go create mode 100644 vendor/golang.org/x/tools/cover/profile.go create mode 100644 vendor/golang.org/x/tools/go/ast/inspector/iter.go delete mode 100644 vendor/golang.org/x/tools/go/internal/packagesdriver/sizes.go create mode 100644 vendor/golang.org/x/tools/go/types/typeutil/callee.go create mode 100644 vendor/golang.org/x/tools/go/types/typeutil/imports.go create mode 100644 vendor/golang.org/x/tools/go/types/typeutil/map.go create mode 100644 vendor/golang.org/x/tools/go/types/typeutil/methodsetcache.go create mode 100644 vendor/golang.org/x/tools/go/types/typeutil/ui.go create mode 100644 vendor/golang.org/x/tools/internal/aliases/aliases.go create mode 100644 vendor/golang.org/x/tools/internal/aliases/aliases_go122.go create mode 100644 vendor/golang.org/x/tools/internal/event/keys/util.go delete mode 100644 vendor/golang.org/x/tools/internal/event/tag/tag.go delete mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk.go delete mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_darwin.go delete mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_fileno.go delete mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_ino.go delete mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_bsd.go delete mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_linux.go delete mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_portable.go delete mode 100644 vendor/golang.org/x/tools/internal/fastwalk/fastwalk_unix.go delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/newInterface10.go delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/newInterface11.go create mode 100644 vendor/golang.org/x/tools/internal/gcimporter/predeclared.go delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/support_go117.go delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/support_go118.go delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/unified_no.go delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/unified_yes.go delete mode 100644 vendor/golang.org/x/tools/internal/gcimporter/ureader_no.go delete mode 100644 vendor/golang.org/x/tools/internal/imports/zstdlib.go delete mode 100644 vendor/golang.org/x/tools/internal/pkgbits/frames_go1.go delete mode 100644 vendor/golang.org/x/tools/internal/pkgbits/frames_go17.go create mode 100644 vendor/golang.org/x/tools/internal/pkgbits/version.go create mode 100644 vendor/golang.org/x/tools/internal/stdlib/manifest.go create mode 100644 vendor/golang.org/x/tools/internal/stdlib/stdlib.go delete mode 100644 vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go delete mode 100644 vendor/golang.org/x/tools/internal/typeparams/enabled_go117.go delete mode 100644 vendor/golang.org/x/tools/internal/typeparams/enabled_go118.go create mode 100644 vendor/golang.org/x/tools/internal/typeparams/free.go delete mode 100644 vendor/golang.org/x/tools/internal/typeparams/typeparams_go117.go delete mode 100644 vendor/golang.org/x/tools/internal/typeparams/typeparams_go118.go create mode 100644 vendor/golang.org/x/tools/internal/typesinternal/element.go delete mode 100644 vendor/golang.org/x/tools/internal/typesinternal/objectpath.go create mode 100644 vendor/golang.org/x/tools/internal/typesinternal/recv.go create mode 100644 vendor/golang.org/x/tools/internal/typesinternal/toonew.go delete mode 100644 vendor/golang.org/x/tools/internal/typesinternal/types_118.go create mode 100644 vendor/golang.org/x/tools/internal/versions/constraint.go create mode 100644 vendor/golang.org/x/tools/internal/versions/constraint_go121.go create mode 100644 vendor/golang.org/x/tools/internal/versions/features.go create mode 100644 vendor/golang.org/x/tools/internal/versions/gover.go create mode 100644 vendor/golang.org/x/tools/internal/versions/types.go create mode 100644 vendor/golang.org/x/tools/internal/versions/versions.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/bind_std.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/bind_windows.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/boundif_android.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/conn.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/controlfns.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/controlfns_linux.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/controlfns_unix.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/controlfns_windows.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/default.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/errors_default.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/errors_linux.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/features_default.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/features_linux.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/gso_default.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/gso_linux.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/mark_default.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/mark_unix.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/sticky_default.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/sticky_linux.go create mode 100644 vendor/golang.zx2c4.com/wireguard/conn/winrio/rio_windows.go create mode 100644 vendor/golang.zx2c4.com/wireguard/tun/checksum.go create mode 100644 vendor/golang.zx2c4.com/wireguard/tun/errors.go create mode 100644 vendor/golang.zx2c4.com/wireguard/tun/offload_linux.go create mode 100644 vendor/google.golang.org/protobuf/internal/editiondefaults/defaults.go create mode 100644 vendor/google.golang.org/protobuf/internal/editiondefaults/editions_defaults.binpb create mode 100644 vendor/google.golang.org/protobuf/internal/editionssupport/editions.go create mode 100644 vendor/google.golang.org/protobuf/internal/filedesc/editions.go create mode 100644 vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go delete mode 100644 vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go create mode 100644 vendor/google.golang.org/protobuf/internal/impl/equal.go delete mode 100644 vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go delete mode 100644 vendor/google.golang.org/protobuf/internal/strs/strings_pure.go rename vendor/google.golang.org/protobuf/internal/strs/{strings_unsafe.go => strings_unsafe_go120.go} (97%) create mode 100644 vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go create mode 100644 vendor/google.golang.org/protobuf/reflect/protodesc/editions.go delete mode 100644 vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go rename vendor/google.golang.org/protobuf/reflect/protoreflect/{value_unsafe.go => value_unsafe_go120.go} (93%) create mode 100644 vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go create mode 100644 vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go delete mode 100644 vendor/gopkg.in/yaml.v2/.travis.yml delete mode 100644 vendor/gopkg.in/yaml.v2/LICENSE.libyaml delete mode 100644 vendor/gopkg.in/yaml.v2/NOTICE delete mode 100644 vendor/gopkg.in/yaml.v2/README.md delete mode 100644 vendor/gopkg.in/yaml.v2/apic.go delete mode 100644 vendor/gopkg.in/yaml.v2/decode.go delete mode 100644 vendor/gopkg.in/yaml.v2/emitterc.go delete mode 100644 vendor/gopkg.in/yaml.v2/encode.go delete mode 100644 vendor/gopkg.in/yaml.v2/parserc.go delete mode 100644 vendor/gopkg.in/yaml.v2/readerc.go delete mode 100644 vendor/gopkg.in/yaml.v2/resolve.go delete mode 100644 vendor/gopkg.in/yaml.v2/scannerc.go delete mode 100644 vendor/gopkg.in/yaml.v2/sorter.go delete mode 100644 vendor/gopkg.in/yaml.v2/writerc.go delete mode 100644 vendor/gopkg.in/yaml.v2/yaml.go delete mode 100644 vendor/gopkg.in/yaml.v2/yamlh.go delete mode 100644 vendor/gopkg.in/yaml.v2/yamlprivateh.go create mode 100644 vendor/gorm.io/driver/postgres/.gitignore create mode 100644 vendor/gorm.io/driver/postgres/error_translator.go rename vendor/gorm.io/gorm/{License => LICENSE} (100%) create mode 100644 vendor/gorm.io/gorm/migrator/table_type.go delete mode 100644 vendor/gorm.io/gorm/schema/check.go create mode 100644 vendor/gorm.io/gorm/schema/constraint.go delete mode 100644 vendor/nhooyr.io/websocket/.gitignore delete mode 100644 vendor/nhooyr.io/websocket/accept_js.go delete mode 100644 vendor/nhooyr.io/websocket/close_notjs.go delete mode 100644 vendor/nhooyr.io/websocket/compress_notjs.go delete mode 100644 vendor/nhooyr.io/websocket/conn_notjs.go create mode 100644 vendor/nhooyr.io/websocket/internal/util/util.go create mode 100644 vendor/nhooyr.io/websocket/make.sh create mode 100644 vendor/nhooyr.io/websocket/mask.go create mode 100644 vendor/nhooyr.io/websocket/mask_amd64.s create mode 100644 vendor/nhooyr.io/websocket/mask_arm64.s create mode 100644 vendor/nhooyr.io/websocket/mask_asm.go create mode 100644 vendor/nhooyr.io/websocket/mask_go.go create mode 100644 vendor/nhooyr.io/websocket/netconn_js.go create mode 100644 vendor/nhooyr.io/websocket/netconn_notjs.go diff --git a/go.mod b/go.mod index 6c2e0d89..f67492fb 100644 --- a/go.mod +++ b/go.mod @@ -1,123 +1,120 @@ module github.com/skycoin/skywire-services -go 1.21 +go 1.22.0 -toolchain go1.21.12 +toolchain go1.23.1 require ( - github.com/bitfield/script v0.22.1 + github.com/bitfield/script v0.23.0 github.com/dgraph-io/badger/v3 v3.2103.2 github.com/docker/docker v1.13.1 - github.com/flopp/go-staticmaps v0.0.0-20220221183018-c226716bec53 - github.com/go-chi/chi/v5 v5.0.11 - github.com/go-chi/httprate v0.9.0 - github.com/go-echarts/go-echarts/v2 v2.3.3 - github.com/go-playground/validator/v10 v10.15.1 + github.com/flopp/go-staticmaps v0.0.0-20240606055734-0bdd9c1c1478 + github.com/go-chi/chi/v5 v5.1.0 + github.com/go-chi/httprate v0.14.1 + github.com/go-echarts/go-echarts/v2 v2.4.3 + github.com/go-playground/validator/v10 v10.22.1 github.com/go-redis/redis v6.15.9+incompatible github.com/go-redis/redis/v8 v8.11.5 github.com/golang/geo v0.0.0-20230421003525-6adc56603217 - github.com/google/uuid v1.3.1 - github.com/hashicorp/yamux v0.1.1 + github.com/google/uuid v1.6.0 + github.com/hashicorp/yamux v0.1.2 github.com/ivanpirog/coloredcobra v1.0.1 github.com/james-barrow/golang-ipc v1.2.4 github.com/json-iterator/go v1.1.12 - github.com/rs/cors v1.8.2 + github.com/rs/cors v1.11.1 github.com/sirupsen/logrus v1.9.3 github.com/skycoin/dmsg v1.3.28 github.com/skycoin/skywire v1.3.28 github.com/skycoin/skywire-utilities v1.3.25 github.com/songgao/water v0.0.0-20200317203138-2b4b6d7c09d8 - github.com/spf13/cobra v1.7.0 + github.com/spf13/cobra v1.8.1 github.com/stretchr/testify v1.9.0 github.com/tidwall/pretty v1.2.1 github.com/xtaci/kcp-go v5.4.20+incompatible - golang.org/x/net v0.21.0 - golang.zx2c4.com/wireguard v0.0.0-20230223181233-21636207a675 - gorm.io/driver/postgres v1.3.8 - gorm.io/gorm v1.23.8 + golang.org/x/net v0.30.0 + golang.zx2c4.com/wireguard v0.0.0-20231211153847-12269c276173 + gorm.io/driver/postgres v1.5.9 + gorm.io/gorm v1.25.12 ) require ( github.com/ActiveState/termtest/conpty v0.5.0 // indirect github.com/AudriusButkevicius/pfilter v0.0.11 // indirect github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect - github.com/Microsoft/go-winio v0.6.1 // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect github.com/StackExchange/wmi v1.2.1 // indirect - github.com/VictoriaMetrics/metrics v1.24.0 // indirect + github.com/VictoriaMetrics/metrics v1.35.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect - github.com/bytedance/sonic v1.10.0 // indirect + github.com/bytedance/sonic v1.12.3 // indirect + github.com/bytedance/sonic/loader v0.2.0 // indirect github.com/ccding/go-stun/stun v0.0.0-20200514191101-4dc67bcdb029 // indirect github.com/cespare/xxhash v1.1.0 // indirect - github.com/cespare/xxhash/v2 v2.1.2 // indirect - github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect - github.com/chenzhuoyu/iasm v0.9.0 // indirect - github.com/creack/pty v1.1.18 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/cloudwego/base64x v0.1.4 // indirect + github.com/cloudwego/iasm v0.2.0 // indirect + github.com/creack/pty v1.1.23 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dgraph-io/ristretto v0.1.0 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/docker/distribution v2.7.1+incompatible // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.4.0 // indirect - github.com/dustin/go-humanize v1.0.0 // indirect - github.com/fatih/color v1.15.0 // indirect - github.com/flopp/go-coordsparser v0.0.0-20201115094714-8baaeb7062d5 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/fatih/color v1.17.0 // indirect + github.com/flopp/go-coordsparser v0.0.0-20240403152942-4891dc40d0a7 // indirect github.com/fogleman/gg v1.3.0 // indirect - github.com/gabriel-vasile/mimetype v1.4.2 // indirect + github.com/gabriel-vasile/mimetype v1.4.6 // indirect github.com/gen2brain/dlgs v0.0.0-20220603100644-40c77870fa8d // indirect - github.com/ghodss/yaml v1.0.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect - github.com/gin-gonic/gin v1.9.1 // indirect + github.com/gin-gonic/gin v1.10.0 // indirect github.com/go-ole/go-ole v1.3.0 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect - github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d // indirect - github.com/goccy/go-json v0.10.2 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1 // indirect + github.com/goccy/go-json v0.10.3 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.3 // indirect - github.com/golang/snappy v0.0.3 // indirect - github.com/google/flatbuffers v1.12.1 // indirect - github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect + github.com/google/pprof v0.0.0-20241017200806-017d972448fc // indirect github.com/gopherjs/gopherjs v1.17.2 // indirect - github.com/gorilla/securecookie v1.1.1 // indirect + github.com/gorilla/securecookie v1.1.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/itchyny/gojq v0.12.13 // indirect - github.com/itchyny/timefmt-go v0.1.5 // indirect - github.com/jackc/chunkreader/v2 v2.0.1 // indirect - github.com/jackc/pgconn v1.14.3 // indirect - github.com/jackc/pgio v1.0.0 // indirect + github.com/itchyny/gojq v0.12.16 // indirect + github.com/itchyny/timefmt-go v0.1.6 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect - github.com/jackc/pgproto3/v2 v2.3.3 // indirect - github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect - github.com/jackc/pgtype v1.14.0 // indirect - github.com/jackc/pgx/v4 v4.18.2 // indirect - github.com/jaypipes/ghw v0.12.0 // indirect - github.com/jaypipes/pcidb v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect + github.com/jackc/pgx/v5 v5.7.1 // indirect + github.com/jackc/puddle/v2 v2.2.2 // indirect + github.com/jaypipes/ghw v0.13.0 // indirect + github.com/jaypipes/pcidb v1.0.1 // indirect github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect - github.com/klauspost/compress v1.16.7 // indirect - github.com/klauspost/cpuid/v2 v2.2.5 // indirect - github.com/klauspost/reedsolomon v1.11.8 // indirect - github.com/leodido/go-urn v1.2.4 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/klauspost/reedsolomon v1.12.4 // indirect + github.com/leodido/go-urn v1.4.0 // indirect github.com/lib/pq v1.10.9 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.19 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mazznoer/csscolorparser v0.1.5 // indirect github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/onsi/ginkgo/v2 v2.12.0 // indirect + github.com/onsi/ginkgo/v2 v2.20.2 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/orandin/lumberjackrus v1.0.1 // indirect - github.com/pelletier/go-toml/v2 v2.0.9 // indirect - github.com/pires/go-proxyproto v0.6.2 // indirect + github.com/pelletier/go-toml/v2 v2.2.3 // indirect + github.com/pires/go-proxyproto v0.8.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/quic-go/quic-go v0.42.0 // indirect + github.com/quic-go/quic-go v0.48.0 // indirect github.com/skycoin/noise v0.0.0-20180327030543-2492fe189ae6 // indirect github.com/skycoin/skycoin v0.28.0 // indirect github.com/skycoin/systray v1.10.0 // indirect @@ -127,33 +124,33 @@ require ( github.com/templexxx/cpufeat v0.0.0-20180724012125-cef66df7f161 // indirect github.com/templexxx/xor v0.0.0-20191217153810-f85b25db303b // indirect github.com/tjfoc/gmsm v1.4.1 // indirect - github.com/tkrajina/gpxgo v1.1.2 // indirect + github.com/tkrajina/gpxgo v1.4.0 // indirect github.com/toqueteos/webbrowser v1.2.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect - github.com/ugorji/go/codec v1.2.11 // indirect + github.com/ugorji/go/codec v1.2.12 // indirect github.com/valyala/fastrand v1.1.0 // indirect github.com/valyala/histogram v1.2.0 // indirect - github.com/zcalusic/sysinfo v1.0.1 // indirect - go.etcd.io/bbolt v1.3.7 // indirect - go.opencensus.io v0.23.0 // indirect - go.uber.org/mock v0.4.0 // indirect - golang.org/x/arch v0.4.0 // indirect - golang.org/x/crypto v0.20.0 // indirect - golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 // indirect - golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d // indirect - golang.org/x/mod v0.12.0 // indirect - golang.org/x/sys v0.20.0 // indirect - golang.org/x/term v0.17.0 // indirect - golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 // indirect + github.com/zcalusic/sysinfo v1.1.2 // indirect + go.etcd.io/bbolt v1.3.11 // indirect + go.opencensus.io v0.24.0 // indirect + go.uber.org/mock v0.5.0 // indirect + golang.org/x/arch v0.11.0 // indirect + golang.org/x/crypto v0.28.0 // indirect + golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect + golang.org/x/image v0.21.0 // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/term v0.25.0 // indirect + golang.org/x/text v0.19.0 // indirect + golang.org/x/tools v0.26.0 // indirect golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect - google.golang.org/protobuf v1.31.0 // indirect + google.golang.org/protobuf v1.35.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - howett.net/plist v1.0.0 // indirect - mvdan.cc/sh/v3 v3.7.0 // indirect - nhooyr.io/websocket v1.8.7 // indirect + howett.net/plist v1.0.1 // indirect + mvdan.cc/sh/v3 v3.9.0 // indirect + nhooyr.io/websocket v1.8.17 // indirect ) // Uncomment it for tests with alternative branches and run `make dep` diff --git a/go.sum b/go.sum index a3b103c5..c2aaae36 100644 --- a/go.sum +++ b/go.sum @@ -7,28 +7,28 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc= -github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= -github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA= github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8= -github.com/VictoriaMetrics/metrics v1.24.0 h1:ILavebReOjYctAGY5QU2F9X0MYvkcrG3aEn2RKa1Zkw= -github.com/VictoriaMetrics/metrics v1.24.0/go.mod h1:eFT25kvsTidQFHb6U0oa0rTrDRdz4xTYjpL8+UPohys= +github.com/VictoriaMetrics/metrics v1.35.1 h1:o84wtBKQbzLdDy14XeskkCZih6anG+veZ1SwJHFGwrU= +github.com/VictoriaMetrics/metrics v1.35.1/go.mod h1:r7hveu6xMdUACXvB8TYdAj8WEsKzWB0EkpJN+RDtOf8= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/bitfield/script v0.22.1 h1:DphxoC5ssYciwd0ZS+N0Xae46geAD/0mVWh6a2NUxM4= -github.com/bitfield/script v0.22.1/go.mod h1:fv+6x4OzVsRs6qAlc7wiGq8fq1b5orhtQdtW0dwjUHI= +github.com/bitfield/script v0.23.0 h1:N0R5yLEl6wJIS9PR/A6xXwjMsplMubyxdi05N5l0X28= +github.com/bitfield/script v0.23.0/go.mod h1:fv+6x4OzVsRs6qAlc7wiGq8fq1b5orhtQdtW0dwjUHI= github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= -github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= -github.com/bytedance/sonic v1.10.0-rc/go.mod h1:ElCzW+ufi8qKqNW0FY314xriJhyJhuoJ3gFZdAHF7NM= -github.com/bytedance/sonic v1.10.0 h1:qtNZduETEIWJVIyDl01BeNxur2rW9OwTQ/yBqFRkKEk= -github.com/bytedance/sonic v1.10.0/go.mod h1:iZcSUejdk5aukTND/Eu/ivjQuEL0Cu9/rf50Hi0u/g4= +github.com/bytedance/sonic v1.12.3 h1:W2MGa7RCU1QTeYRTPE3+88mVC0yXmsRQRChiyVocVjU= +github.com/bytedance/sonic v1.12.3/go.mod h1:B8Gt/XvtZ3Fqj+iSKMypzymZxw/FVwgIGKzMzT9r/rk= +github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/bytedance/sonic/loader v0.2.0 h1:zNprn+lsIP06C/IqCHs3gPQIvnvpKbbxyXQP1iU4kWM= +github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= github.com/ccding/go-stun/stun v0.0.0-20200514191101-4dc67bcdb029 h1:POmUHfxXdeyM8Aomg4tKDcwATCFuW+cYLkj6pwsw9pc= github.com/ccding/go-stun/stun v0.0.0-20200514191101-4dc67bcdb029/go.mod h1:Rpr5n9cGHYdM3S3IK8ROSUUUYjQOu+MSUCZDcJbYWi8= github.com/cenkalti/backoff v1.1.0/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= @@ -36,29 +36,22 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= -github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= -github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= -github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d h1:77cEq6EriyTZ0g/qfRdp61a3Uu/AWrgIq2s0ClJV1g0= -github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d/go.mod h1:8EPpVsBuRksnlj1mLy4AWzRNQYxauNi62uWcE3to6eA= -github.com/chenzhuoyu/iasm v0.9.0 h1:9fhXjVzq5hUy2gkhhgHl95zG2cEAhw9OSGs8toWWAwo= -github.com/chenzhuoyu/iasm v0.9.0/go.mod h1:Xjy2NpN3h7aUqeqM+woSuuvxmIe6+DDsiNLIrkAmYog= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= +github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= +github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= +github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= -github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/go-systemd v0.0.0-20190719114852-fd7a80b32e1f/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= -github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= -github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.23 h1:4M6+isWdcStXEf15G/RbrMPOQj1dZ7HPZCGwE4kOeP0= +github.com/creack/pty v1.1.23/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -78,91 +71,70 @@ github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKoh github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= -github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= -github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= -github.com/flopp/go-coordsparser v0.0.0-20201115094714-8baaeb7062d5 h1:o5yuyiGtJ4c9ECOq12K6EqsQNnsGF6I+WqBZynB2Hlw= -github.com/flopp/go-coordsparser v0.0.0-20201115094714-8baaeb7062d5/go.mod h1:t5EAdR9sDhKR06Ix2ZS/8jt8INpzeV3P5uVyEkCDYJc= -github.com/flopp/go-staticmaps v0.0.0-20220221183018-c226716bec53 h1:bpgLIxOpmht6HkBsajYmp+CvNAtdXnb+uGZQw3pIxtU= -github.com/flopp/go-staticmaps v0.0.0-20220221183018-c226716bec53/go.mod h1:vGgI6wKa1TTiN9iumpzYZgNc/C7KxqsZbw9OH8O10iQ= +github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= +github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= +github.com/flopp/go-coordsparser v0.0.0-20240403152942-4891dc40d0a7 h1:ZYEbOgGPFGLZwkLxRjV9zxZAIR1lExrNfFnObG/peek= +github.com/flopp/go-coordsparser v0.0.0-20240403152942-4891dc40d0a7/go.mod h1:7y/2PxXfR1mGtIQFNtFE1daHIka2e8J480Bsm+MiCpk= +github.com/flopp/go-staticmaps v0.0.0-20240606055734-0bdd9c1c1478 h1:rKlN4NCEUFdQ+pFGHchbZmRarzflkKtupcmqbxZNOZY= +github.com/flopp/go-staticmaps v0.0.0-20240606055734-0bdd9c1c1478/go.mod h1:Rr7/JmBsMZMKycTY56uTrDJtTPp3dbqxIudNljFNYs4= github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= -github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= -github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= -github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= +github.com/gabriel-vasile/mimetype v1.4.6 h1:3+PzJTKLkvgjeTbts6msPJt4DixhT4YtFNf1gtGe3zc= +github.com/gabriel-vasile/mimetype v1.4.6/go.mod h1:JX1qVKqZd40hUPpAfiNTe0Sne7hdfKSbOqqmkq8GCXc= github.com/gen2brain/dlgs v0.0.0-20220603100644-40c77870fa8d h1:dHYKX8CBAs1zSGXm3q3M15CLAEwPEkwrK1ed8FCo+Xo= github.com/gen2brain/dlgs v0.0.0-20220603100644-40c77870fa8d/go.mod h1:/eFcjDXaU2THSOOqLxOPETIbHETnamk8FA/hMjhg/gU= -github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= -github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= -github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= -github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= -github.com/go-chi/chi/v5 v5.0.11 h1:BnpYbFZ3T3S1WMpD79r7R5ThWX40TaFB7L31Y8xqSwA= -github.com/go-chi/chi/v5 v5.0.11/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= -github.com/go-chi/httprate v0.9.0 h1:21A+4WDMDA5FyWcg7mNrhj63aNT8CGh+Z1alOE/piU8= -github.com/go-chi/httprate v0.9.0/go.mod h1:6GOYBSwnpra4CQfAKXu8sQZg+nZ0M1g9QnyFvxrAB8A= -github.com/go-echarts/go-echarts/v2 v2.3.3 h1:uImZAk6qLkC6F9ju6mZ5SPBqTyK8xjZKwSmwnCg4bxg= -github.com/go-echarts/go-echarts/v2 v2.3.3/go.mod h1:56YlvzhW/a+du15f3S2qUGNDfKnFOeJSThBIrVFHDtI= -github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= -github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= -github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= +github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= +github.com/go-chi/chi/v5 v5.1.0 h1:acVI1TYaD+hhedDJ3r54HyA6sExp3HfXq7QWEEY/xMw= +github.com/go-chi/chi/v5 v5.1.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= +github.com/go-chi/httprate v0.14.1 h1:EKZHYEZ58Cg6hWcYzoZILsv7ppb46Wt4uQ738IRtpZs= +github.com/go-chi/httprate v0.14.1/go.mod h1:TUepLXaz/pCjmCtf/obgOQJ2Sz6rC8fSf5cAt5cnTt0= +github.com/go-echarts/go-echarts/v2 v2.4.3 h1:JVxD8idXAZHIGGvrmDT2LQKGKe2HgZLUfMxqmyJRdqY= +github.com/go-echarts/go-echarts/v2 v2.4.3/go.mod h1:56YlvzhW/a+du15f3S2qUGNDfKnFOeJSThBIrVFHDtI= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= -github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= -github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= -github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= -github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= -github.com/go-playground/validator/v10 v10.15.1 h1:BSe8uhN+xQ4r5guV/ywQI4gO59C2raYcGffYWZEjZzM= -github.com/go-playground/validator/v10 v10.15.1/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= +github.com/go-playground/validator/v10 v10.22.1 h1:40JcKH+bBNGFczGuoBYgX4I6m/i27HYW8P9FDk5PbgA= +github.com/go-playground/validator/v10 v10.22.1/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= +github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI= +github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow= github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg= github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA= github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI= github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= -github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= -github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= -github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= -github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= -github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= -github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= -github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= -github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk= -github.com/gobwas/ws v1.2.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= -github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d h1:KbPOUXFUDJxwZ04vbmDOc3yuruGvVO+LOa7cVER3yWw= -github.com/gocarina/gocsv v0.0.0-20230616125104-99d496ca653d/go.mod h1:5YoVOkjYAQumqlV356Hj3xeYh4BdZuLE0/nRkf2NKkI= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1 h1:FWNFq4fM1wPfcK40yHE5UO3RUdSNPaBC+j3PokzA6OQ= +github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1/go.mod h1:5YoVOkjYAQumqlV356Hj3xeYh4BdZuLE0/nRkf2NKkI= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gofrs/uuid v4.0.0+incompatible h1:1SD/1F5pU8p29ybwgQSwpQk+mwdRrXCYuPhW6m+TnJw= -github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= -github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI= github.com/golang/geo v0.0.0-20230421003525-6adc56603217 h1:HKlyj6in2JV6wVkmQ4XmG/EIm+SCYlPZ+V4GWit7Z+I= github.com/golang/geo v0.0.0-20230421003525-6adc56603217/go.mod h1:8wI0hitZ3a1IxZfeH3/5I97CI8i5cLGsYe7xNhQGs9U= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= @@ -176,7 +148,6 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= @@ -185,13 +156,16 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/flatbuffers v1.12.1 h1:MVlul7pQNoDzWRLTw5imwYsl+usrS1TXG2H4jg6ImGw= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= +github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= github.com/google/flatbuffers v1.12.1/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -199,150 +173,90 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f h1:pDhu5sgp8yJlEF/g6osliIIpF9K4F5jvkULXa4daRDQ= -github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20241017200806-017d972448fc h1:NGyrhhFhwvRAZg02jnYVg3GBQy0qGBKmFQJwaPmpmxs= +github.com/google/pprof v0.0.0-20241017200806-017d972448fc/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= -github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ= -github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= -github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvKCM= -github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gorilla/securecookie v1.1.2 h1:YCIWL56dvtr73r6715mJs5ZvhtnY73hBvEF8kXD8ePA= +github.com/gorilla/securecookie v1.1.2/go.mod h1:NfCASbcHqRSY+3a8tlWJwsQap2VX5pwzwo4h3eOamfo= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE= -github.com/hashicorp/yamux v0.1.1/go.mod h1:CtWFDAQgb7dxtzFs4tWbplKIe2jSi3+5vKbgIO0SLnQ= +github.com/hashicorp/yamux v0.1.2 h1:XtB8kyFOyHXYVFnwT5C3+Bdo8gArse7j2AQ0DA0Uey8= +github.com/hashicorp/yamux v0.1.2/go.mod h1:C+zze2n6e/7wshOZep2A70/aQU6QBRWJO/G6FT1wIns= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/itchyny/gojq v0.12.13 h1:IxyYlHYIlspQHHTE0f3cJF0NKDMfajxViuhBLnHd/QU= -github.com/itchyny/gojq v0.12.13/go.mod h1:JzwzAqenfhrPUuwbmEz3nu3JQmFLlQTQMUcOdnu/Sf4= -github.com/itchyny/timefmt-go v0.1.5 h1:G0INE2la8S6ru/ZI5JecgyzbbJNs5lG1RcBqa7Jm6GE= -github.com/itchyny/timefmt-go v0.1.5/go.mod h1:nEP7L+2YmAbT2kZ2HfSs1d8Xtw9LY8D2stDBckWakZ8= +github.com/itchyny/gojq v0.12.16 h1:yLfgLxhIr/6sJNVmYfQjTIv0jGctu6/DgDoivmxTr7g= +github.com/itchyny/gojq v0.12.16/go.mod h1:6abHbdC2uB9ogMS38XsErnfqJ94UlngIJGlRAIj4jTM= +github.com/itchyny/timefmt-go v0.1.6 h1:ia3s54iciXDdzWzwaVKXZPbiXzxxnv1SPGFfM/myJ5Q= +github.com/itchyny/timefmt-go v0.1.6/go.mod h1:RRDZYC5s9ErkjQvTvvU7keJjxUYzIISJGxm9/mAERQg= github.com/ivanpirog/coloredcobra v1.0.1 h1:aURSdEmlR90/tSiWS0dMjdwOvCVUeYLfltLfbgNxrN4= github.com/ivanpirog/coloredcobra v1.0.1/go.mod h1:iho4nEKcnwZFiniGSdcgdvRgZNjxm+h20acv8vqmN6Q= -github.com/jackc/chunkreader v1.0.0/go.mod h1:RT6O25fNZIuasFJRyZ4R/Y2BbhasbmZXF9QQ7T3kePo= -github.com/jackc/chunkreader/v2 v2.0.0/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= -github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= -github.com/jackc/chunkreader/v2 v2.0.1/go.mod h1:odVSm741yZoC3dpHEUXIqA9tQRhFrgOHwnPIn9lDKlk= -github.com/jackc/pgconn v0.0.0-20190420214824-7e0022ef6ba3/go.mod h1:jkELnwuX+w9qN5YIfX0fl88Ehu4XC3keFuOJJk9pcnA= -github.com/jackc/pgconn v0.0.0-20190824142844-760dd75542eb/go.mod h1:lLjNuW/+OfW9/pnVKPazfWOgNfH2aPem8YQ7ilXGvJE= -github.com/jackc/pgconn v0.0.0-20190831204454-2fabfa3c18b7/go.mod h1:ZJKsE/KZfsUgOEh9hBm+xYTstcNHg7UPMVJqRfQxq4s= -github.com/jackc/pgconn v1.8.0/go.mod h1:1C2Pb36bGIP9QHGBYCjnyhqu7Rv3sGshaQUvmfGIB/o= -github.com/jackc/pgconn v1.9.0/go.mod h1:YctiPyvzfU11JFxoXokUOOKQXQmDMoJL9vJzHH8/2JY= -github.com/jackc/pgconn v1.9.1-0.20210724152538-d89c8390a530/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI= -github.com/jackc/pgconn v1.12.1/go.mod h1:ZkhRC59Llhrq3oSfrikvwQ5NaxYExr6twkdkMLaKono= -github.com/jackc/pgconn v1.14.3 h1:bVoTr12EGANZz66nZPkMInAV/KHD2TxH9npjXXgiB3w= -github.com/jackc/pgconn v1.14.3/go.mod h1:RZbme4uasqzybK2RK5c65VsHxoyaml09lx3tXOcO/VM= -github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE= -github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8= -github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE= -github.com/jackc/pgmock v0.0.0-20201204152224-4fe30f7445fd/go.mod h1:hrBW0Enj2AZTNpt/7Y5rr2xe/9Mn757Wtb2xeBzPv2c= -github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65 h1:DadwsjnMwFjfWc9y5Wi/+Zz7xoE5ALHsRQlOctkOiHc= -github.com/jackc/pgmock v0.0.0-20210724152146-4ad1a8207f65/go.mod h1:5R2h2EEX+qri8jOWMbJCtaPWkrrNc7OHwsp2TCqp7ak= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= -github.com/jackc/pgproto3 v1.1.0/go.mod h1:eR5FA3leWg7p9aeAqi37XOTgTIbkABlvcPB3E5rlc78= -github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190420180111-c116219b62db/go.mod h1:bhq50y+xrl9n5mRYyCBFKkpRVTLYJVWeCc+mEAI3yXA= -github.com/jackc/pgproto3/v2 v2.0.0-alpha1.0.20190609003834-432c2951c711/go.mod h1:uH0AWtUmuShn0bcesswc4aBTWGvw0cAxIJp+6OB//Wg= -github.com/jackc/pgproto3/v2 v2.0.0-rc3/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM= -github.com/jackc/pgproto3/v2 v2.0.0-rc3.0.20190831210041-4c03ce451f29/go.mod h1:ryONWYqW6dqSg1Lw6vXNMXoBJhpzvWKnT95C46ckYeM= -github.com/jackc/pgproto3/v2 v2.0.6/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= -github.com/jackc/pgproto3/v2 v2.1.1/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= -github.com/jackc/pgproto3/v2 v2.3.0/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= -github.com/jackc/pgproto3/v2 v2.3.3 h1:1HLSx5H+tXR9pW3in3zaztoEwQYRC9SQaYUHjTSUOag= -github.com/jackc/pgproto3/v2 v2.3.3/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= -github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b/go.mod h1:vsD4gTJCa9TptPL8sPkXrLZ+hDuNrZCnj29CQpr4X1E= -github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk= -github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= -github.com/jackc/pgtype v0.0.0-20190421001408-4ed0de4755e0/go.mod h1:hdSHsc1V01CGwFsrv11mJRHWJ6aifDLfdV3aVjFF0zg= -github.com/jackc/pgtype v0.0.0-20190824184912-ab885b375b90/go.mod h1:KcahbBH1nCMSo2DXpzsoWOAfFkdEtEJpPbVLq8eE+mc= -github.com/jackc/pgtype v0.0.0-20190828014616-a8802b16cc59/go.mod h1:MWlu30kVJrUS8lot6TQqcg7mtthZ9T0EoIBFiJcmcyw= -github.com/jackc/pgtype v1.8.1-0.20210724151600-32e20a603178/go.mod h1:C516IlIV9NKqfsMCXTdChteoXmwgUceqaLfjg2e3NlM= -github.com/jackc/pgtype v1.11.0/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4= -github.com/jackc/pgtype v1.14.0 h1:y+xUdabmyMkJLyApYuPj38mW+aAIqCe5uuBB51rH3Vw= -github.com/jackc/pgtype v1.14.0/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4= -github.com/jackc/pgx/v4 v4.0.0-20190420224344-cc3461e65d96/go.mod h1:mdxmSJJuR08CZQyj1PVQBHy9XOp5p8/SHH6a0psbY9Y= -github.com/jackc/pgx/v4 v4.0.0-20190421002000-1b8f0016e912/go.mod h1:no/Y67Jkk/9WuGR0JG/JseM9irFbnEPbuWV2EELPNuM= -github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQnOEnf1dqHGpw7JmHqHc1NxDoalibchSk9/RWuDc= -github.com/jackc/pgx/v4 v4.12.1-0.20210724153913-640aa07df17c/go.mod h1:1QD0+tgSXP7iUjYm9C1NxKhny7lq6ee99u/z+IHFcgs= -github.com/jackc/pgx/v4 v4.16.1/go.mod h1:SIhx0D5hoADaiXZVyv+3gSm3LCIIINTVO0PficsvWGQ= -github.com/jackc/pgx/v4 v4.18.2 h1:xVpYkNR5pk5bMCZGfClbO962UIqVABcAGt7ha1s/FeU= -github.com/jackc/pgx/v4 v4.18.2/go.mod h1:Ey4Oru5tH5sB6tV7hDmfWFahwF15Eb7DNXlRKx2CkVw= -github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= -github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= -github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= -github.com/jackc/puddle v1.2.1/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.7.1 h1:x7SYsPBYDkHDksogeSmZZ5xzThcTgRz++I5E+ePFUcs= +github.com/jackc/pgx/v5 v5.7.1/go.mod h1:e7O26IywZZ+naJtWWos6i6fvWK+29etgITqrqHLfoZA= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/james-barrow/golang-ipc v1.2.4 h1:d4NXRQxq6OWviWU8uAaob8R0YZGy/PhAkXGLpBNpkA4= github.com/james-barrow/golang-ipc v1.2.4/go.mod h1:+egiWSbOWmiPucFGSl4GNB1YSzrVGehyl7/7pW4N8F0= -github.com/jaypipes/ghw v0.12.0 h1:xU2/MDJfWmBhJnujHY9qwXQLs3DBsf0/Xa9vECY0Tho= -github.com/jaypipes/ghw v0.12.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g= -github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8= -github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk= +github.com/jaypipes/ghw v0.13.0 h1:log8MXuB8hzTNnSktqpXMHc0c/2k/WgjOMSUtnI1RV4= +github.com/jaypipes/ghw v0.13.0/go.mod h1:In8SsaDqlb1oTyrbmTC14uy+fbBMvp+xdqX51MidlD8= +github.com/jaypipes/pcidb v1.0.1 h1:WB2zh27T3nwg8AE8ei81sNRb9yWBii3JGNJtT7K9Oic= +github.com/jaypipes/pcidb v1.0.1/go.mod h1:6xYUz/yYEyOkIkUt2t2J2folIuZ4Yg6uByCGFXMCeE4= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= -github.com/jessevdk/go-flags v1.5.0/go.mod h1:Fw0T6WPc1dYxT4mKEZRfG5kJhaTDP9pj1c2EWnYs/m4= github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= -github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= -github.com/joeshaw/gengen v0.0.0-20190604015154-c77d87825f5a/go.mod h1:v2qvRL8Xwk4OlARK6gPlf2JreZXzv0dYp/8+kUJ0y7Q= -github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= -github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= -github.com/klauspost/reedsolomon v1.11.8 h1:s8RpUW5TK4hjr+djiOpbZJB4ksx+TdYbRH7vHQpwPOY= -github.com/klauspost/reedsolomon v1.11.8/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ5MGv0Qd8a47h6A= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klauspost/reedsolomon v1.12.4 h1:5aDr3ZGoJbgu/8+j45KtUJxzYm8k08JGtB9Wx1VQ4OA= +github.com/klauspost/reedsolomon v1.12.4/go.mod h1:d3CzOMOt0JXGIFZm1StgkyF14EYr3xneR2rNWo7NcMU= github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= github.com/konsorten/go-windows-terminal-sequences v0.0.0-20180402223658-b729f2633dfe/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= -github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= -github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= -github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= -github.com/lib/pq v1.1.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= -github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= -github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= +github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= -github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ= -github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= -github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mazznoer/csscolorparser v0.1.5 h1:Wr4uNIE+pHWN3TqZn2SGpA2nLRG064gB7WdSfSS5cz4= +github.com/mazznoer/csscolorparser v0.1.5/go.mod h1:OQRVvgCyHDCAquR1YWfSwwaDcM0LhnSffGnlbOew/3I= github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI= github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= @@ -353,53 +267,42 @@ github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= -github.com/onsi/ginkgo/v2 v2.12.0 h1:UIVDowFPwpg6yMUpPjGkYvf06K3RAiJXUhCxEwQVHRI= -github.com/onsi/ginkgo/v2 v2.12.0/go.mod h1:ZNEzXISYlqpb8S36iN71ifqLi3vVD1rVJGvWRCJOUpQ= -github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= -github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= +github.com/onsi/ginkgo/v2 v2.20.2 h1:7NVCeyIWROIAheY21RLS+3j2bb52W0W82tkberYytp4= +github.com/onsi/ginkgo/v2 v2.20.2/go.mod h1:K9gyxPIlb+aIvnZ8bd9Ak+YP18w3APlR+5coaZoE2ag= +github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= +github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/orandin/lumberjackrus v1.0.1 h1:7ysDQ0MHD79zIFN9/EiDHjUcgopNi5ehtxFDy8rUkWo= github.com/orandin/lumberjackrus v1.0.1/go.mod h1:xYLt6H8W93pKnQgUQaxsApS0Eb4BwHLOkxk5DVzf5H0= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pelletier/go-toml/v2 v2.0.9 h1:uH2qQXheeefCCkuBBSLi7jCiSmj3VRh2+Goq2N7Xxu0= -github.com/pelletier/go-toml/v2 v2.0.9/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= -github.com/pires/go-proxyproto v0.6.2 h1:KAZ7UteSOt6urjme6ZldyFm4wDe/z0ZUP0Yv0Dos0d8= -github.com/pires/go-proxyproto v0.6.2/go.mod h1:Odh9VFOZJCf9G8cLW5o435Xf1J95Jw9Gw5rnCjcwzAY= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= +github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= +github.com/pires/go-proxyproto v0.8.0 h1:5unRmEAPbHXHuLjDg01CxJWf91cw3lKHc/0xzKpXEe0= +github.com/pires/go-proxyproto v0.8.0/go.mod h1:iknsfgnH8EkjrMeMyvfKByp9TiBZCKZM0jx2xmKqnVY= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/quic-go/quic-go v0.42.0 h1:uSfdap0eveIl8KXnipv9K7nlwZ5IqLlYOpJ58u5utpM= -github.com/quic-go/quic-go v0.42.0/go.mod h1:132kz4kL3F9vxhW3CtQJLDVwcFe5wdWeJXXijhsO57M= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/quic-go/quic-go v0.48.0 h1:2TCyvBrMu1Z25rvIAlnp2dPT4lgh/uTqLqiXVpp5AeU= +github.com/quic-go/quic-go v0.48.0/go.mod h1:yBgs3rWBOADpga7F+jJsb6Ybg1LSYiQvwWlLX+/6HMs= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/rs/cors v1.6.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= -github.com/rs/cors v1.8.2 h1:KCooALfAYGs415Cwu5ABvv9n9509fSiG5SQJn/AQo4U= -github.com/rs/cors v1.8.2/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= -github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= -github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OKkWU= -github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc= +github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA= +github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4= -github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= -github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/sirupsen/logrus v1.1.1/go.mod h1:zrgwTnHtNr00buQ1vSptGe8m1f/BbgsPukg8qsT7A+A= -github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= -github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= @@ -427,8 +330,8 @@ github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g= -github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= -github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= @@ -438,7 +341,6 @@ github.com/spf13/viper v1.2.1/go.mod h1:P4AexN0a+C9tGAnUFNwDMYYZv3pjFuvmeiMyKRaN github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= @@ -446,14 +348,10 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= @@ -466,18 +364,16 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tjfoc/gmsm v1.4.1 h1:aMe1GlZb+0bLjn+cKTPEvvn9oUEBlJitaZiiBwsbgho= github.com/tjfoc/gmsm v1.4.1/go.mod h1:j4INPkHWMrhJb38G+J6W4Tw0AbuN8Thu3PbdVYhVcTE= -github.com/tkrajina/gpxgo v1.1.2 h1:il6rjS6IGm3yqa/yr7+fKBlF3ufWDEPZrYi/kxI1Jv0= -github.com/tkrajina/gpxgo v1.1.2/go.mod h1:795sjVRFo5wWyN6oOZp0RYienGGBJjpAlgOz2nCngA0= +github.com/tkrajina/gpxgo v1.4.0 h1:cSD5uSwy3VZuNFieTEZLyRnuIwhonQEkGPkPGW4XNag= +github.com/tkrajina/gpxgo v1.4.0/go.mod h1:BXSMfUAvKiEhMEXAFM2NvNsbjsSvp394mOvdcNjettg= github.com/toqueteos/webbrowser v1.1.0/go.mod h1:Hqqqmzj8AHn+VlZyVjaRWY20i25hoOZGAABCcg2el4A= github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ= github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= -github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= -github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= -github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= +github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/valyala/fastrand v1.1.0 h1:f+5HkLW4rsgzdNoleUOB69hyT9IlD2ZQh9GyDMfb5G8= github.com/valyala/fastrand v1.1.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ= @@ -491,64 +387,42 @@ github.com/xtaci/lossyconn v0.0.0-20200209145036-adba10fffc37/go.mod h1:HpMP7DB2 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/zcalusic/sysinfo v1.0.1 h1:cVh8q3codjh43AGRTa54dJ2Zq+qPejv8n2VWpxKViwc= -github.com/zcalusic/sysinfo v1.0.1/go.mod h1:LxwKwtQdbTIQc65drhjQzYzt0o7jfB80LrrZm7SWn8o= -github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= -go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ= -go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw= +github.com/zcalusic/sysinfo v1.1.2 h1:38KUgZQmCxlN9vUTt4miis4rU5ISJXGXOJ2rY7bMC8g= +github.com/zcalusic/sysinfo v1.1.2/go.mod h1:NX+qYnWGtJVPV0yWldff9uppNKU4h40hJIRPf/pGLv4= +go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0= +go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= -go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= -go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= -go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= -go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= -go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= -go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= -go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= -go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= -golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= -golang.org/x/arch v0.4.0 h1:A8WCeEWhLwPBKNbFi5Wv5UTCBx5zzubnXDlMOFAzFMc= -golang.org/x/arch v0.4.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU= +go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM= +golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4= +golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181015023909-0c41d7ab0a0e/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201012173705-84dcc777aaee/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= -golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.20.0 h1:jmAMJJZXr5KiCw05dfYK9QnqaqKLYXijU23lsEdcQqg= -golang.org/x/crypto v0.20.0/go.mod h1:Xwo95rrVNIoSMx9wa1JroENMToLWn3RNVrTBpLHgZPQ= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ= -golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8= -golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d h1:RNPAfi2nHY7C2srAV8A49jpsYr0ADedCk1wq6fTMTvs= -golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= +golang.org/x/image v0.21.0 h1:c5qV36ajHpdj4Qi0GnE0jUc/yuo33OLFaa0d+crTD5s= +golang.org/x/image v0.21.0/go.mod h1:vUbsLavqK/W303ZroQQVKQ+Af3Yl6Uz1Ppu5J/cLz78= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -556,7 +430,6 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= @@ -566,8 +439,8 @@ golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= -golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -578,29 +451,23 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180906133057-8cf3aee42992/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181023152157-44b849a8bc13/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200428200454-593003d681fa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -613,58 +480,45 @@ golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U= -golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190425163242-31fd60d6bfdc/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190603231351-8aaa1484dc10/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190823170909-c4a336ef6a2f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= -golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 h1:Vve/L0v7CXXuxUmaMGIEK/dEeq7uiqb5qBgQrZzIE7E= -golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= -golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg= golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI= -golang.zx2c4.com/wireguard v0.0.0-20230223181233-21636207a675 h1:/J/RVnr7ng4fWPRH3xa4WtBJ1Jp+Auu4YNLmGiPv5QU= -golang.zx2c4.com/wireguard v0.0.0-20230223181233-21636207a675/go.mod h1:whfbyDBt09xhCYQWtO2+3UVjlaq6/9hDZrjg2ZE6SyA= +golang.zx2c4.com/wireguard v0.0.0-20231211153847-12269c276173 h1:/jFs0duh4rdb8uIfPMv78iAJGcPKDeqAFnaLBropIC4= +golang.zx2c4.com/wireguard v0.0.0-20231211153847-12269c276173/go.mod h1:tkCQ4FQXmpAgYVh++1cq16/dH4QJtmvpRv19DWGAHSA= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= @@ -687,17 +541,12 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/inconshreveable/log15.v2 v2.0.0-20180818164646-67afb5ed74ec/go.mod h1:aPpfJ7XW+gOuirDoZ8gHhLh3kZ1B08FtV2bbmy7Jv3s= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= @@ -705,25 +554,22 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gorm.io/driver/postgres v1.3.8 h1:8bEphSAB69t3odsCR4NDzt581iZEWQuRM27Cg6KgfPY= -gorm.io/driver/postgres v1.3.8/go.mod h1:qB98Aj6AhRO/oyu/jmZsi/YM9g6UzVCjMxO/6frFvcA= -gorm.io/gorm v1.23.6/go.mod h1:l2lP/RyAtc1ynaTjFksBde/O8v9oOGIApu2/xRitmZk= -gorm.io/gorm v1.23.8 h1:h8sGJ+biDgBA1AD1Ha9gFCx7h8npU7AsLdlkX0n2TpE= -gorm.io/gorm v1.23.8/go.mod h1:l2lP/RyAtc1ynaTjFksBde/O8v9oOGIApu2/xRitmZk= +gorm.io/driver/postgres v1.5.9 h1:DkegyItji119OlcaLjqN11kHoUgZ/j13E0jkJZgD6A8= +gorm.io/driver/postgres v1.5.9/go.mod h1:DX3GReXH+3FPWGrrgffdvCk3DQ1dwDPdmbenSkweRGI= +gorm.io/gorm v1.25.12 h1:I0u8i2hWQItBq1WfE0o2+WuL9+8L21K9e2HHSTE/0f8= +gorm.io/gorm v1.25.12/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ= +gvisor.dev/gvisor v0.0.0-20230927004350-cbd86285d259 h1:TbRPT0HtzFP3Cno1zZo7yPzEEnfu8EjLfl6IU9VfqkQ= +gvisor.dev/gvisor v0.0.0-20230927004350-cbd86285d259/go.mod h1:AVgIgHMwK63XvmAzWG9vLQ41YnVHN0du0tEC46fI7yY= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM= -howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g= -mvdan.cc/sh/v3 v3.7.0 h1:lSTjdP/1xsddtaKfGg7Myu7DnlHItd3/M2tomOcNNBg= -mvdan.cc/sh/v3 v3.7.0/go.mod h1:K2gwkaesF/D7av7Kxl0HbF5kGOd2ArupNTX3X44+8l8= -nhooyr.io/websocket v1.8.7 h1:usjR2uOr/zjjkVMy0lW+PPohFok7PCow5sDjLgX4P4g= -nhooyr.io/websocket v1.8.7/go.mod h1:B70DZP8IakI65RVQ51MsWP/8jndNma26DVA/nFSCgW0= +howett.net/plist v1.0.1 h1:37GdZ8tP09Q35o9ych3ehygcsL+HqKSwzctveSlarvM= +howett.net/plist v1.0.1/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g= +mvdan.cc/sh/v3 v3.9.0 h1:it14fyjCdQUk4jf/aYxLO3FG8jFarR9GzMCtnlvvD7c= +mvdan.cc/sh/v3 v3.9.0/go.mod h1:cdBk8bgoiBI7lSZqK5JhUuq7OB64VQ7fgm85xelw3Nk= +nhooyr.io/websocket v1.8.17 h1:KEVeLJkUywCKVsnLIDlD/5gtayKp8VoCkksHCGGfT9Y= +nhooyr.io/websocket v1.8.17/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c= nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= -rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/vendor/github.com/Microsoft/go-winio/.golangci.yml b/vendor/github.com/Microsoft/go-winio/.golangci.yml index 7b503d26..faedfe93 100644 --- a/vendor/github.com/Microsoft/go-winio/.golangci.yml +++ b/vendor/github.com/Microsoft/go-winio/.golangci.yml @@ -1,7 +1,3 @@ -run: - skip-dirs: - - pkg/etw/sample - linters: enable: # style @@ -20,9 +16,13 @@ linters: - gofmt # files are gofmt'ed - gosec # security - nilerr # returns nil even with non-nil error + - thelper # test helpers without t.Helper() - unparam # unused function params issues: + exclude-dirs: + - pkg/etw/sample + exclude-rules: # err is very often shadowed in nested scopes - linters: @@ -69,9 +69,7 @@ linters-settings: # struct order is often for Win32 compat # also, ignore pointer bytes/GC issues for now until performance becomes an issue - fieldalignment - check-shadowing: true nolintlint: - allow-leading-space: false require-explanation: true require-specific: true revive: diff --git a/vendor/github.com/Microsoft/go-winio/backup.go b/vendor/github.com/Microsoft/go-winio/backup.go index 09621c88..b54341da 100644 --- a/vendor/github.com/Microsoft/go-winio/backup.go +++ b/vendor/github.com/Microsoft/go-winio/backup.go @@ -10,14 +10,14 @@ import ( "io" "os" "runtime" - "syscall" "unicode/utf16" + "github.com/Microsoft/go-winio/internal/fs" "golang.org/x/sys/windows" ) -//sys backupRead(h syscall.Handle, b []byte, bytesRead *uint32, abort bool, processSecurity bool, context *uintptr) (err error) = BackupRead -//sys backupWrite(h syscall.Handle, b []byte, bytesWritten *uint32, abort bool, processSecurity bool, context *uintptr) (err error) = BackupWrite +//sys backupRead(h windows.Handle, b []byte, bytesRead *uint32, abort bool, processSecurity bool, context *uintptr) (err error) = BackupRead +//sys backupWrite(h windows.Handle, b []byte, bytesWritten *uint32, abort bool, processSecurity bool, context *uintptr) (err error) = BackupWrite const ( BackupData = uint32(iota + 1) @@ -104,7 +104,7 @@ func (r *BackupStreamReader) Next() (*BackupHeader, error) { if err := binary.Read(r.r, binary.LittleEndian, name); err != nil { return nil, err } - hdr.Name = syscall.UTF16ToString(name) + hdr.Name = windows.UTF16ToString(name) } if wsi.StreamID == BackupSparseBlock { if err := binary.Read(r.r, binary.LittleEndian, &hdr.Offset); err != nil { @@ -205,7 +205,7 @@ func NewBackupFileReader(f *os.File, includeSecurity bool) *BackupFileReader { // Read reads a backup stream from the file by calling the Win32 API BackupRead(). func (r *BackupFileReader) Read(b []byte) (int, error) { var bytesRead uint32 - err := backupRead(syscall.Handle(r.f.Fd()), b, &bytesRead, false, r.includeSecurity, &r.ctx) + err := backupRead(windows.Handle(r.f.Fd()), b, &bytesRead, false, r.includeSecurity, &r.ctx) if err != nil { return 0, &os.PathError{Op: "BackupRead", Path: r.f.Name(), Err: err} } @@ -220,7 +220,7 @@ func (r *BackupFileReader) Read(b []byte) (int, error) { // the underlying file. func (r *BackupFileReader) Close() error { if r.ctx != 0 { - _ = backupRead(syscall.Handle(r.f.Fd()), nil, nil, true, false, &r.ctx) + _ = backupRead(windows.Handle(r.f.Fd()), nil, nil, true, false, &r.ctx) runtime.KeepAlive(r.f) r.ctx = 0 } @@ -244,7 +244,7 @@ func NewBackupFileWriter(f *os.File, includeSecurity bool) *BackupFileWriter { // Write restores a portion of the file using the provided backup stream. func (w *BackupFileWriter) Write(b []byte) (int, error) { var bytesWritten uint32 - err := backupWrite(syscall.Handle(w.f.Fd()), b, &bytesWritten, false, w.includeSecurity, &w.ctx) + err := backupWrite(windows.Handle(w.f.Fd()), b, &bytesWritten, false, w.includeSecurity, &w.ctx) if err != nil { return 0, &os.PathError{Op: "BackupWrite", Path: w.f.Name(), Err: err} } @@ -259,7 +259,7 @@ func (w *BackupFileWriter) Write(b []byte) (int, error) { // close the underlying file. func (w *BackupFileWriter) Close() error { if w.ctx != 0 { - _ = backupWrite(syscall.Handle(w.f.Fd()), nil, nil, true, false, &w.ctx) + _ = backupWrite(windows.Handle(w.f.Fd()), nil, nil, true, false, &w.ctx) runtime.KeepAlive(w.f) w.ctx = 0 } @@ -271,17 +271,14 @@ func (w *BackupFileWriter) Close() error { // // If the file opened was a directory, it cannot be used with Readdir(). func OpenForBackup(path string, access uint32, share uint32, createmode uint32) (*os.File, error) { - winPath, err := syscall.UTF16FromString(path) - if err != nil { - return nil, err - } - h, err := syscall.CreateFile(&winPath[0], - access, - share, + h, err := fs.CreateFile(path, + fs.AccessMask(access), + fs.FileShareMode(share), nil, - createmode, - syscall.FILE_FLAG_BACKUP_SEMANTICS|syscall.FILE_FLAG_OPEN_REPARSE_POINT, - 0) + fs.FileCreationDisposition(createmode), + fs.FILE_FLAG_BACKUP_SEMANTICS|fs.FILE_FLAG_OPEN_REPARSE_POINT, + 0, + ) if err != nil { err = &os.PathError{Op: "open", Path: path, Err: err} return nil, err diff --git a/vendor/github.com/Microsoft/go-winio/file.go b/vendor/github.com/Microsoft/go-winio/file.go index 175a99d3..fe82a180 100644 --- a/vendor/github.com/Microsoft/go-winio/file.go +++ b/vendor/github.com/Microsoft/go-winio/file.go @@ -15,26 +15,11 @@ import ( "golang.org/x/sys/windows" ) -//sys cancelIoEx(file syscall.Handle, o *syscall.Overlapped) (err error) = CancelIoEx -//sys createIoCompletionPort(file syscall.Handle, port syscall.Handle, key uintptr, threadCount uint32) (newport syscall.Handle, err error) = CreateIoCompletionPort -//sys getQueuedCompletionStatus(port syscall.Handle, bytes *uint32, key *uintptr, o **ioOperation, timeout uint32) (err error) = GetQueuedCompletionStatus -//sys setFileCompletionNotificationModes(h syscall.Handle, flags uint8) (err error) = SetFileCompletionNotificationModes -//sys wsaGetOverlappedResult(h syscall.Handle, o *syscall.Overlapped, bytes *uint32, wait bool, flags *uint32) (err error) = ws2_32.WSAGetOverlappedResult - -type atomicBool int32 - -func (b *atomicBool) isSet() bool { return atomic.LoadInt32((*int32)(b)) != 0 } -func (b *atomicBool) setFalse() { atomic.StoreInt32((*int32)(b), 0) } -func (b *atomicBool) setTrue() { atomic.StoreInt32((*int32)(b), 1) } - -//revive:disable-next-line:predeclared Keep "new" to maintain consistency with "atomic" pkg -func (b *atomicBool) swap(new bool) bool { - var newInt int32 - if new { - newInt = 1 - } - return atomic.SwapInt32((*int32)(b), newInt) == 1 -} +//sys cancelIoEx(file windows.Handle, o *windows.Overlapped) (err error) = CancelIoEx +//sys createIoCompletionPort(file windows.Handle, port windows.Handle, key uintptr, threadCount uint32) (newport windows.Handle, err error) = CreateIoCompletionPort +//sys getQueuedCompletionStatus(port windows.Handle, bytes *uint32, key *uintptr, o **ioOperation, timeout uint32) (err error) = GetQueuedCompletionStatus +//sys setFileCompletionNotificationModes(h windows.Handle, flags uint8) (err error) = SetFileCompletionNotificationModes +//sys wsaGetOverlappedResult(h windows.Handle, o *windows.Overlapped, bytes *uint32, wait bool, flags *uint32) (err error) = ws2_32.WSAGetOverlappedResult var ( ErrFileClosed = errors.New("file has already been closed") @@ -50,7 +35,7 @@ func (*timeoutError) Temporary() bool { return true } type timeoutChan chan struct{} var ioInitOnce sync.Once -var ioCompletionPort syscall.Handle +var ioCompletionPort windows.Handle // ioResult contains the result of an asynchronous IO operation. type ioResult struct { @@ -60,12 +45,12 @@ type ioResult struct { // ioOperation represents an outstanding asynchronous Win32 IO. type ioOperation struct { - o syscall.Overlapped + o windows.Overlapped ch chan ioResult } func initIO() { - h, err := createIoCompletionPort(syscall.InvalidHandle, 0, 0, 0xffffffff) + h, err := createIoCompletionPort(windows.InvalidHandle, 0, 0, 0xffffffff) if err != nil { panic(err) } @@ -76,10 +61,10 @@ func initIO() { // win32File implements Reader, Writer, and Closer on a Win32 handle without blocking in a syscall. // It takes ownership of this handle and will close it if it is garbage collected. type win32File struct { - handle syscall.Handle + handle windows.Handle wg sync.WaitGroup wgLock sync.RWMutex - closing atomicBool + closing atomic.Bool socket bool readDeadline deadlineHandler writeDeadline deadlineHandler @@ -90,11 +75,11 @@ type deadlineHandler struct { channel timeoutChan channelLock sync.RWMutex timer *time.Timer - timedout atomicBool + timedout atomic.Bool } // makeWin32File makes a new win32File from an existing file handle. -func makeWin32File(h syscall.Handle) (*win32File, error) { +func makeWin32File(h windows.Handle) (*win32File, error) { f := &win32File{handle: h} ioInitOnce.Do(initIO) _, err := createIoCompletionPort(h, ioCompletionPort, 0, 0xffffffff) @@ -110,7 +95,12 @@ func makeWin32File(h syscall.Handle) (*win32File, error) { return f, nil } +// Deprecated: use NewOpenFile instead. func MakeOpenFile(h syscall.Handle) (io.ReadWriteCloser, error) { + return NewOpenFile(windows.Handle(h)) +} + +func NewOpenFile(h windows.Handle) (io.ReadWriteCloser, error) { // If we return the result of makeWin32File directly, it can result in an // interface-wrapped nil, rather than a nil interface value. f, err := makeWin32File(h) @@ -124,13 +114,13 @@ func MakeOpenFile(h syscall.Handle) (io.ReadWriteCloser, error) { func (f *win32File) closeHandle() { f.wgLock.Lock() // Atomically set that we are closing, releasing the resources only once. - if !f.closing.swap(true) { + if !f.closing.Swap(true) { f.wgLock.Unlock() // cancel all IO and wait for it to complete _ = cancelIoEx(f.handle, nil) f.wg.Wait() // at this point, no new IO can start - syscall.Close(f.handle) + windows.Close(f.handle) f.handle = 0 } else { f.wgLock.Unlock() @@ -145,14 +135,14 @@ func (f *win32File) Close() error { // IsClosed checks if the file has been closed. func (f *win32File) IsClosed() bool { - return f.closing.isSet() + return f.closing.Load() } // prepareIO prepares for a new IO operation. // The caller must call f.wg.Done() when the IO is finished, prior to Close() returning. func (f *win32File) prepareIO() (*ioOperation, error) { f.wgLock.RLock() - if f.closing.isSet() { + if f.closing.Load() { f.wgLock.RUnlock() return nil, ErrFileClosed } @@ -164,12 +154,12 @@ func (f *win32File) prepareIO() (*ioOperation, error) { } // ioCompletionProcessor processes completed async IOs forever. -func ioCompletionProcessor(h syscall.Handle) { +func ioCompletionProcessor(h windows.Handle) { for { var bytes uint32 var key uintptr var op *ioOperation - err := getQueuedCompletionStatus(h, &bytes, &key, &op, syscall.INFINITE) + err := getQueuedCompletionStatus(h, &bytes, &key, &op, windows.INFINITE) if op == nil { panic(err) } @@ -182,11 +172,11 @@ func ioCompletionProcessor(h syscall.Handle) { // asyncIO processes the return value from ReadFile or WriteFile, blocking until // the operation has actually completed. func (f *win32File) asyncIO(c *ioOperation, d *deadlineHandler, bytes uint32, err error) (int, error) { - if err != syscall.ERROR_IO_PENDING { //nolint:errorlint // err is Errno + if err != windows.ERROR_IO_PENDING { //nolint:errorlint // err is Errno return int(bytes), err } - if f.closing.isSet() { + if f.closing.Load() { _ = cancelIoEx(f.handle, &c.o) } @@ -201,8 +191,8 @@ func (f *win32File) asyncIO(c *ioOperation, d *deadlineHandler, bytes uint32, er select { case r = <-c.ch: err = r.err - if err == syscall.ERROR_OPERATION_ABORTED { //nolint:errorlint // err is Errno - if f.closing.isSet() { + if err == windows.ERROR_OPERATION_ABORTED { //nolint:errorlint // err is Errno + if f.closing.Load() { err = ErrFileClosed } } else if err != nil && f.socket { @@ -214,7 +204,7 @@ func (f *win32File) asyncIO(c *ioOperation, d *deadlineHandler, bytes uint32, er _ = cancelIoEx(f.handle, &c.o) r = <-c.ch err = r.err - if err == syscall.ERROR_OPERATION_ABORTED { //nolint:errorlint // err is Errno + if err == windows.ERROR_OPERATION_ABORTED { //nolint:errorlint // err is Errno err = ErrTimeout } } @@ -235,23 +225,22 @@ func (f *win32File) Read(b []byte) (int, error) { } defer f.wg.Done() - if f.readDeadline.timedout.isSet() { + if f.readDeadline.timedout.Load() { return 0, ErrTimeout } var bytes uint32 - err = syscall.ReadFile(f.handle, b, &bytes, &c.o) + err = windows.ReadFile(f.handle, b, &bytes, &c.o) n, err := f.asyncIO(c, &f.readDeadline, bytes, err) runtime.KeepAlive(b) // Handle EOF conditions. if err == nil && n == 0 && len(b) != 0 { return 0, io.EOF - } else if err == syscall.ERROR_BROKEN_PIPE { //nolint:errorlint // err is Errno + } else if err == windows.ERROR_BROKEN_PIPE { //nolint:errorlint // err is Errno return 0, io.EOF - } else { - return n, err } + return n, err } // Write writes to a file handle. @@ -262,12 +251,12 @@ func (f *win32File) Write(b []byte) (int, error) { } defer f.wg.Done() - if f.writeDeadline.timedout.isSet() { + if f.writeDeadline.timedout.Load() { return 0, ErrTimeout } var bytes uint32 - err = syscall.WriteFile(f.handle, b, &bytes, &c.o) + err = windows.WriteFile(f.handle, b, &bytes, &c.o) n, err := f.asyncIO(c, &f.writeDeadline, bytes, err) runtime.KeepAlive(b) return n, err @@ -282,7 +271,7 @@ func (f *win32File) SetWriteDeadline(deadline time.Time) error { } func (f *win32File) Flush() error { - return syscall.FlushFileBuffers(f.handle) + return windows.FlushFileBuffers(f.handle) } func (f *win32File) Fd() uintptr { @@ -299,7 +288,7 @@ func (d *deadlineHandler) set(deadline time.Time) error { } d.timer = nil } - d.timedout.setFalse() + d.timedout.Store(false) select { case <-d.channel: @@ -314,7 +303,7 @@ func (d *deadlineHandler) set(deadline time.Time) error { } timeoutIO := func() { - d.timedout.setTrue() + d.timedout.Store(true) close(d.channel) } diff --git a/vendor/github.com/Microsoft/go-winio/fileinfo.go b/vendor/github.com/Microsoft/go-winio/fileinfo.go index 702950e7..c860eb99 100644 --- a/vendor/github.com/Microsoft/go-winio/fileinfo.go +++ b/vendor/github.com/Microsoft/go-winio/fileinfo.go @@ -18,9 +18,18 @@ type FileBasicInfo struct { _ uint32 // padding } +// alignedFileBasicInfo is a FileBasicInfo, but aligned to uint64 by containing +// uint64 rather than windows.Filetime. Filetime contains two uint32s. uint64 +// alignment is necessary to pass this as FILE_BASIC_INFO. +type alignedFileBasicInfo struct { + CreationTime, LastAccessTime, LastWriteTime, ChangeTime uint64 + FileAttributes uint32 + _ uint32 // padding +} + // GetFileBasicInfo retrieves times and attributes for a file. func GetFileBasicInfo(f *os.File) (*FileBasicInfo, error) { - bi := &FileBasicInfo{} + bi := &alignedFileBasicInfo{} if err := windows.GetFileInformationByHandleEx( windows.Handle(f.Fd()), windows.FileBasicInfo, @@ -30,16 +39,21 @@ func GetFileBasicInfo(f *os.File) (*FileBasicInfo, error) { return nil, &os.PathError{Op: "GetFileInformationByHandleEx", Path: f.Name(), Err: err} } runtime.KeepAlive(f) - return bi, nil + // Reinterpret the alignedFileBasicInfo as a FileBasicInfo so it matches the + // public API of this module. The data may be unnecessarily aligned. + return (*FileBasicInfo)(unsafe.Pointer(bi)), nil } // SetFileBasicInfo sets times and attributes for a file. func SetFileBasicInfo(f *os.File, bi *FileBasicInfo) error { + // Create an alignedFileBasicInfo based on a FileBasicInfo. The copy is + // suitable to pass to GetFileInformationByHandleEx. + biAligned := *(*alignedFileBasicInfo)(unsafe.Pointer(bi)) if err := windows.SetFileInformationByHandle( windows.Handle(f.Fd()), windows.FileBasicInfo, - (*byte)(unsafe.Pointer(bi)), - uint32(unsafe.Sizeof(*bi)), + (*byte)(unsafe.Pointer(&biAligned)), + uint32(unsafe.Sizeof(biAligned)), ); err != nil { return &os.PathError{Op: "SetFileInformationByHandle", Path: f.Name(), Err: err} } diff --git a/vendor/github.com/Microsoft/go-winio/hvsock.go b/vendor/github.com/Microsoft/go-winio/hvsock.go index c8819165..c4fdd9d4 100644 --- a/vendor/github.com/Microsoft/go-winio/hvsock.go +++ b/vendor/github.com/Microsoft/go-winio/hvsock.go @@ -10,7 +10,6 @@ import ( "io" "net" "os" - "syscall" "time" "unsafe" @@ -181,13 +180,13 @@ type HvsockConn struct { var _ net.Conn = &HvsockConn{} func newHVSocket() (*win32File, error) { - fd, err := syscall.Socket(afHVSock, syscall.SOCK_STREAM, 1) + fd, err := windows.Socket(afHVSock, windows.SOCK_STREAM, 1) if err != nil { return nil, os.NewSyscallError("socket", err) } f, err := makeWin32File(fd) if err != nil { - syscall.Close(fd) + windows.Close(fd) return nil, err } f.socket = true @@ -197,16 +196,24 @@ func newHVSocket() (*win32File, error) { // ListenHvsock listens for connections on the specified hvsock address. func ListenHvsock(addr *HvsockAddr) (_ *HvsockListener, err error) { l := &HvsockListener{addr: *addr} - sock, err := newHVSocket() + + var sock *win32File + sock, err = newHVSocket() if err != nil { return nil, l.opErr("listen", err) } + defer func() { + if err != nil { + _ = sock.Close() + } + }() + sa := addr.raw() - err = socket.Bind(windows.Handle(sock.handle), &sa) + err = socket.Bind(sock.handle, &sa) if err != nil { return nil, l.opErr("listen", os.NewSyscallError("socket", err)) } - err = syscall.Listen(sock.handle, 16) + err = windows.Listen(sock.handle, 16) if err != nil { return nil, l.opErr("listen", os.NewSyscallError("listen", err)) } @@ -246,7 +253,7 @@ func (l *HvsockListener) Accept() (_ net.Conn, err error) { var addrbuf [addrlen * 2]byte var bytes uint32 - err = syscall.AcceptEx(l.sock.handle, sock.handle, &addrbuf[0], 0 /* rxdatalen */, addrlen, addrlen, &bytes, &c.o) + err = windows.AcceptEx(l.sock.handle, sock.handle, &addrbuf[0], 0 /* rxdatalen */, addrlen, addrlen, &bytes, &c.o) if _, err = l.sock.asyncIO(c, nil, bytes, err); err != nil { return nil, l.opErr("accept", os.NewSyscallError("acceptex", err)) } @@ -263,7 +270,7 @@ func (l *HvsockListener) Accept() (_ net.Conn, err error) { conn.remote.fromRaw((*rawHvsockAddr)(unsafe.Pointer(&addrbuf[addrlen]))) // initialize the accepted socket and update its properties with those of the listening socket - if err = windows.Setsockopt(windows.Handle(sock.handle), + if err = windows.Setsockopt(sock.handle, windows.SOL_SOCKET, windows.SO_UPDATE_ACCEPT_CONTEXT, (*byte)(unsafe.Pointer(&l.sock.handle)), int32(unsafe.Sizeof(l.sock.handle))); err != nil { return nil, conn.opErr("accept", os.NewSyscallError("setsockopt", err)) @@ -334,7 +341,7 @@ func (d *HvsockDialer) Dial(ctx context.Context, addr *HvsockAddr) (conn *Hvsock }() sa := addr.raw() - err = socket.Bind(windows.Handle(sock.handle), &sa) + err = socket.Bind(sock.handle, &sa) if err != nil { return nil, conn.opErr(op, os.NewSyscallError("bind", err)) } @@ -347,7 +354,7 @@ func (d *HvsockDialer) Dial(ctx context.Context, addr *HvsockAddr) (conn *Hvsock var bytes uint32 for i := uint(0); i <= d.Retries; i++ { err = socket.ConnectEx( - windows.Handle(sock.handle), + sock.handle, &sa, nil, // sendBuf 0, // sendDataLen @@ -367,7 +374,7 @@ func (d *HvsockDialer) Dial(ctx context.Context, addr *HvsockAddr) (conn *Hvsock // update the connection properties, so shutdown can be used if err = windows.Setsockopt( - windows.Handle(sock.handle), + sock.handle, windows.SOL_SOCKET, windows.SO_UPDATE_CONNECT_CONTEXT, nil, // optvalue @@ -378,7 +385,7 @@ func (d *HvsockDialer) Dial(ctx context.Context, addr *HvsockAddr) (conn *Hvsock // get the local name var sal rawHvsockAddr - err = socket.GetSockName(windows.Handle(sock.handle), &sal) + err = socket.GetSockName(sock.handle, &sal) if err != nil { return nil, conn.opErr(op, os.NewSyscallError("getsockname", err)) } @@ -421,7 +428,7 @@ func (d *HvsockDialer) redialWait(ctx context.Context) (err error) { return ctx.Err() } -// assumes error is a plain, unwrapped syscall.Errno provided by direct syscall. +// assumes error is a plain, unwrapped windows.Errno provided by direct syscall. func canRedial(err error) bool { //nolint:errorlint // guaranteed to be an Errno switch err { @@ -447,9 +454,9 @@ func (conn *HvsockConn) Read(b []byte) (int, error) { return 0, conn.opErr("read", err) } defer conn.sock.wg.Done() - buf := syscall.WSABuf{Buf: &b[0], Len: uint32(len(b))} + buf := windows.WSABuf{Buf: &b[0], Len: uint32(len(b))} var flags, bytes uint32 - err = syscall.WSARecv(conn.sock.handle, &buf, 1, &bytes, &flags, &c.o, nil) + err = windows.WSARecv(conn.sock.handle, &buf, 1, &bytes, &flags, &c.o, nil) n, err := conn.sock.asyncIO(c, &conn.sock.readDeadline, bytes, err) if err != nil { var eno windows.Errno @@ -482,9 +489,9 @@ func (conn *HvsockConn) write(b []byte) (int, error) { return 0, conn.opErr("write", err) } defer conn.sock.wg.Done() - buf := syscall.WSABuf{Buf: &b[0], Len: uint32(len(b))} + buf := windows.WSABuf{Buf: &b[0], Len: uint32(len(b))} var bytes uint32 - err = syscall.WSASend(conn.sock.handle, &buf, 1, &bytes, 0, &c.o, nil) + err = windows.WSASend(conn.sock.handle, &buf, 1, &bytes, 0, &c.o, nil) n, err := conn.sock.asyncIO(c, &conn.sock.writeDeadline, bytes, err) if err != nil { var eno windows.Errno @@ -511,7 +518,7 @@ func (conn *HvsockConn) shutdown(how int) error { return socket.ErrSocketClosed } - err := syscall.Shutdown(conn.sock.handle, how) + err := windows.Shutdown(conn.sock.handle, how) if err != nil { // If the connection was closed, shutdowns fail with "not connected" if errors.Is(err, windows.WSAENOTCONN) || @@ -525,7 +532,7 @@ func (conn *HvsockConn) shutdown(how int) error { // CloseRead shuts down the read end of the socket, preventing future read operations. func (conn *HvsockConn) CloseRead() error { - err := conn.shutdown(syscall.SHUT_RD) + err := conn.shutdown(windows.SHUT_RD) if err != nil { return conn.opErr("closeread", err) } @@ -535,7 +542,7 @@ func (conn *HvsockConn) CloseRead() error { // CloseWrite shuts down the write end of the socket, preventing future write operations and // notifying the other endpoint that no more data will be written. func (conn *HvsockConn) CloseWrite() error { - err := conn.shutdown(syscall.SHUT_WR) + err := conn.shutdown(windows.SHUT_WR) if err != nil { return conn.opErr("closewrite", err) } diff --git a/vendor/github.com/Microsoft/go-winio/internal/fs/fs.go b/vendor/github.com/Microsoft/go-winio/internal/fs/fs.go index 509b3ec6..0cd9621d 100644 --- a/vendor/github.com/Microsoft/go-winio/internal/fs/fs.go +++ b/vendor/github.com/Microsoft/go-winio/internal/fs/fs.go @@ -11,12 +11,14 @@ import ( //go:generate go run github.com/Microsoft/go-winio/tools/mkwinsyscall -output zsyscall_windows.go fs.go // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilew -//sys CreateFile(name string, access AccessMask, mode FileShareMode, sa *syscall.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) [failretval==windows.InvalidHandle] = CreateFileW +//sys CreateFile(name string, access AccessMask, mode FileShareMode, sa *windows.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) [failretval==windows.InvalidHandle] = CreateFileW const NullHandle windows.Handle = 0 // AccessMask defines standard, specific, and generic rights. // +// Used with CreateFile and NtCreateFile (and co.). +// // Bitmask: // 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 // 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 @@ -47,6 +49,12 @@ const ( // https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilew#parameters FILE_ANY_ACCESS AccessMask = 0 + GENERIC_READ AccessMask = 0x8000_0000 + GENERIC_WRITE AccessMask = 0x4000_0000 + GENERIC_EXECUTE AccessMask = 0x2000_0000 + GENERIC_ALL AccessMask = 0x1000_0000 + ACCESS_SYSTEM_SECURITY AccessMask = 0x0100_0000 + // Specific Object Access // from ntioapi.h @@ -124,14 +132,32 @@ const ( TRUNCATE_EXISTING FileCreationDisposition = 0x05 ) +// Create disposition values for NtCreate* +type NTFileCreationDisposition uint32 + +//nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API. +const ( + // From ntioapi.h + + FILE_SUPERSEDE NTFileCreationDisposition = 0x00 + FILE_OPEN NTFileCreationDisposition = 0x01 + FILE_CREATE NTFileCreationDisposition = 0x02 + FILE_OPEN_IF NTFileCreationDisposition = 0x03 + FILE_OVERWRITE NTFileCreationDisposition = 0x04 + FILE_OVERWRITE_IF NTFileCreationDisposition = 0x05 + FILE_MAXIMUM_DISPOSITION NTFileCreationDisposition = 0x05 +) + // CreateFile and co. take flags or attributes together as one parameter. // Define alias until we can use generics to allow both - +// // https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants type FileFlagOrAttribute uint32 //nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API. -const ( // from winnt.h +const ( + // from winnt.h + FILE_FLAG_WRITE_THROUGH FileFlagOrAttribute = 0x8000_0000 FILE_FLAG_OVERLAPPED FileFlagOrAttribute = 0x4000_0000 FILE_FLAG_NO_BUFFERING FileFlagOrAttribute = 0x2000_0000 @@ -145,17 +171,51 @@ const ( // from winnt.h FILE_FLAG_FIRST_PIPE_INSTANCE FileFlagOrAttribute = 0x0008_0000 ) +// NtCreate* functions take a dedicated CreateOptions parameter. +// +// https://learn.microsoft.com/en-us/windows/win32/api/Winternl/nf-winternl-ntcreatefile +// +// https://learn.microsoft.com/en-us/windows/win32/devnotes/nt-create-named-pipe-file +type NTCreateOptions uint32 + +//nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API. +const ( + // From ntioapi.h + + FILE_DIRECTORY_FILE NTCreateOptions = 0x0000_0001 + FILE_WRITE_THROUGH NTCreateOptions = 0x0000_0002 + FILE_SEQUENTIAL_ONLY NTCreateOptions = 0x0000_0004 + FILE_NO_INTERMEDIATE_BUFFERING NTCreateOptions = 0x0000_0008 + + FILE_SYNCHRONOUS_IO_ALERT NTCreateOptions = 0x0000_0010 + FILE_SYNCHRONOUS_IO_NONALERT NTCreateOptions = 0x0000_0020 + FILE_NON_DIRECTORY_FILE NTCreateOptions = 0x0000_0040 + FILE_CREATE_TREE_CONNECTION NTCreateOptions = 0x0000_0080 + + FILE_COMPLETE_IF_OPLOCKED NTCreateOptions = 0x0000_0100 + FILE_NO_EA_KNOWLEDGE NTCreateOptions = 0x0000_0200 + FILE_DISABLE_TUNNELING NTCreateOptions = 0x0000_0400 + FILE_RANDOM_ACCESS NTCreateOptions = 0x0000_0800 + + FILE_DELETE_ON_CLOSE NTCreateOptions = 0x0000_1000 + FILE_OPEN_BY_FILE_ID NTCreateOptions = 0x0000_2000 + FILE_OPEN_FOR_BACKUP_INTENT NTCreateOptions = 0x0000_4000 + FILE_NO_COMPRESSION NTCreateOptions = 0x0000_8000 +) + type FileSQSFlag = FileFlagOrAttribute //nolint:revive // SNAKE_CASE is not idiomatic in Go, but aligned with Win32 API. -const ( // from winbase.h +const ( + // from winbase.h + SECURITY_ANONYMOUS FileSQSFlag = FileSQSFlag(SecurityAnonymous << 16) SECURITY_IDENTIFICATION FileSQSFlag = FileSQSFlag(SecurityIdentification << 16) SECURITY_IMPERSONATION FileSQSFlag = FileSQSFlag(SecurityImpersonation << 16) SECURITY_DELEGATION FileSQSFlag = FileSQSFlag(SecurityDelegation << 16) - SECURITY_SQOS_PRESENT FileSQSFlag = 0x00100000 - SECURITY_VALID_SQOS_FLAGS FileSQSFlag = 0x001F0000 + SECURITY_SQOS_PRESENT FileSQSFlag = 0x0010_0000 + SECURITY_VALID_SQOS_FLAGS FileSQSFlag = 0x001F_0000 ) // GetFinalPathNameByHandle flags diff --git a/vendor/github.com/Microsoft/go-winio/internal/fs/zsyscall_windows.go b/vendor/github.com/Microsoft/go-winio/internal/fs/zsyscall_windows.go index e2f7bb24..a94e234c 100644 --- a/vendor/github.com/Microsoft/go-winio/internal/fs/zsyscall_windows.go +++ b/vendor/github.com/Microsoft/go-winio/internal/fs/zsyscall_windows.go @@ -33,9 +33,6 @@ func errnoErr(e syscall.Errno) error { case errnoERROR_IO_PENDING: return errERROR_IO_PENDING } - // TODO: add more here, after collecting data on the common - // error values see on Windows. (perhaps when running - // all.bat?) return e } @@ -45,7 +42,7 @@ var ( procCreateFileW = modkernel32.NewProc("CreateFileW") ) -func CreateFile(name string, access AccessMask, mode FileShareMode, sa *syscall.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) { +func CreateFile(name string, access AccessMask, mode FileShareMode, sa *windows.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) { var _p0 *uint16 _p0, err = syscall.UTF16PtrFromString(name) if err != nil { @@ -54,8 +51,8 @@ func CreateFile(name string, access AccessMask, mode FileShareMode, sa *syscall. return _CreateFile(_p0, access, mode, sa, createmode, attrs, templatefile) } -func _CreateFile(name *uint16, access AccessMask, mode FileShareMode, sa *syscall.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) { - r0, _, e1 := syscall.Syscall9(procCreateFileW.Addr(), 7, uintptr(unsafe.Pointer(name)), uintptr(access), uintptr(mode), uintptr(unsafe.Pointer(sa)), uintptr(createmode), uintptr(attrs), uintptr(templatefile), 0, 0) +func _CreateFile(name *uint16, access AccessMask, mode FileShareMode, sa *windows.SecurityAttributes, createmode FileCreationDisposition, attrs FileFlagOrAttribute, templatefile windows.Handle) (handle windows.Handle, err error) { + r0, _, e1 := syscall.SyscallN(procCreateFileW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(access), uintptr(mode), uintptr(unsafe.Pointer(sa)), uintptr(createmode), uintptr(attrs), uintptr(templatefile)) handle = windows.Handle(r0) if handle == windows.InvalidHandle { err = errnoErr(e1) diff --git a/vendor/github.com/Microsoft/go-winio/internal/socket/socket.go b/vendor/github.com/Microsoft/go-winio/internal/socket/socket.go index aeb7b725..88580d97 100644 --- a/vendor/github.com/Microsoft/go-winio/internal/socket/socket.go +++ b/vendor/github.com/Microsoft/go-winio/internal/socket/socket.go @@ -156,9 +156,7 @@ func connectEx( bytesSent *uint32, overlapped *windows.Overlapped, ) (err error) { - // todo: after upgrading to 1.18, switch from syscall.Syscall9 to syscall.SyscallN - r1, _, e1 := syscall.Syscall9(connectExFunc.addr, - 7, + r1, _, e1 := syscall.SyscallN(connectExFunc.addr, uintptr(s), uintptr(name), uintptr(namelen), @@ -166,8 +164,8 @@ func connectEx( uintptr(sendDataLen), uintptr(unsafe.Pointer(bytesSent)), uintptr(unsafe.Pointer(overlapped)), - 0, - 0) + ) + if r1 == 0 { if e1 != 0 { err = error(e1) diff --git a/vendor/github.com/Microsoft/go-winio/internal/socket/zsyscall_windows.go b/vendor/github.com/Microsoft/go-winio/internal/socket/zsyscall_windows.go index 6d2e1a9e..e1504126 100644 --- a/vendor/github.com/Microsoft/go-winio/internal/socket/zsyscall_windows.go +++ b/vendor/github.com/Microsoft/go-winio/internal/socket/zsyscall_windows.go @@ -33,9 +33,6 @@ func errnoErr(e syscall.Errno) error { case errnoERROR_IO_PENDING: return errERROR_IO_PENDING } - // TODO: add more here, after collecting data on the common - // error values see on Windows. (perhaps when running - // all.bat?) return e } @@ -48,7 +45,7 @@ var ( ) func bind(s windows.Handle, name unsafe.Pointer, namelen int32) (err error) { - r1, _, e1 := syscall.Syscall(procbind.Addr(), 3, uintptr(s), uintptr(name), uintptr(namelen)) + r1, _, e1 := syscall.SyscallN(procbind.Addr(), uintptr(s), uintptr(name), uintptr(namelen)) if r1 == socketError { err = errnoErr(e1) } @@ -56,7 +53,7 @@ func bind(s windows.Handle, name unsafe.Pointer, namelen int32) (err error) { } func getpeername(s windows.Handle, name unsafe.Pointer, namelen *int32) (err error) { - r1, _, e1 := syscall.Syscall(procgetpeername.Addr(), 3, uintptr(s), uintptr(name), uintptr(unsafe.Pointer(namelen))) + r1, _, e1 := syscall.SyscallN(procgetpeername.Addr(), uintptr(s), uintptr(name), uintptr(unsafe.Pointer(namelen))) if r1 == socketError { err = errnoErr(e1) } @@ -64,7 +61,7 @@ func getpeername(s windows.Handle, name unsafe.Pointer, namelen *int32) (err err } func getsockname(s windows.Handle, name unsafe.Pointer, namelen *int32) (err error) { - r1, _, e1 := syscall.Syscall(procgetsockname.Addr(), 3, uintptr(s), uintptr(name), uintptr(unsafe.Pointer(namelen))) + r1, _, e1 := syscall.SyscallN(procgetsockname.Addr(), uintptr(s), uintptr(name), uintptr(unsafe.Pointer(namelen))) if r1 == socketError { err = errnoErr(e1) } diff --git a/vendor/github.com/Microsoft/go-winio/internal/stringbuffer/wstring.go b/vendor/github.com/Microsoft/go-winio/internal/stringbuffer/wstring.go index 7ad50570..42ebc019 100644 --- a/vendor/github.com/Microsoft/go-winio/internal/stringbuffer/wstring.go +++ b/vendor/github.com/Microsoft/go-winio/internal/stringbuffer/wstring.go @@ -62,7 +62,7 @@ func (b *WString) Free() { // ResizeTo grows the buffer to at least c and returns the new capacity, freeing the // previous buffer back into pool. func (b *WString) ResizeTo(c uint32) uint32 { - // allready sufficient (or n is 0) + // already sufficient (or n is 0) if c <= b.Cap() { return b.Cap() } diff --git a/vendor/github.com/Microsoft/go-winio/pipe.go b/vendor/github.com/Microsoft/go-winio/pipe.go index 25cc8110..a2da6639 100644 --- a/vendor/github.com/Microsoft/go-winio/pipe.go +++ b/vendor/github.com/Microsoft/go-winio/pipe.go @@ -11,7 +11,6 @@ import ( "net" "os" "runtime" - "syscall" "time" "unsafe" @@ -20,20 +19,44 @@ import ( "github.com/Microsoft/go-winio/internal/fs" ) -//sys connectNamedPipe(pipe syscall.Handle, o *syscall.Overlapped) (err error) = ConnectNamedPipe -//sys createNamedPipe(name string, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *syscall.SecurityAttributes) (handle syscall.Handle, err error) [failretval==syscall.InvalidHandle] = CreateNamedPipeW -//sys getNamedPipeInfo(pipe syscall.Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error) = GetNamedPipeInfo -//sys getNamedPipeHandleState(pipe syscall.Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) = GetNamedPipeHandleStateW -//sys localAlloc(uFlags uint32, length uint32) (ptr uintptr) = LocalAlloc -//sys ntCreateNamedPipeFile(pipe *syscall.Handle, access uint32, oa *objectAttributes, iosb *ioStatusBlock, share uint32, disposition uint32, options uint32, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (status ntStatus) = ntdll.NtCreateNamedPipeFile +//sys connectNamedPipe(pipe windows.Handle, o *windows.Overlapped) (err error) = ConnectNamedPipe +//sys createNamedPipe(name string, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *windows.SecurityAttributes) (handle windows.Handle, err error) [failretval==windows.InvalidHandle] = CreateNamedPipeW +//sys disconnectNamedPipe(pipe windows.Handle) (err error) = DisconnectNamedPipe +//sys getNamedPipeInfo(pipe windows.Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error) = GetNamedPipeInfo +//sys getNamedPipeHandleState(pipe windows.Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) = GetNamedPipeHandleStateW +//sys ntCreateNamedPipeFile(pipe *windows.Handle, access ntAccessMask, oa *objectAttributes, iosb *ioStatusBlock, share ntFileShareMode, disposition ntFileCreationDisposition, options ntFileOptions, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (status ntStatus) = ntdll.NtCreateNamedPipeFile //sys rtlNtStatusToDosError(status ntStatus) (winerr error) = ntdll.RtlNtStatusToDosErrorNoTeb //sys rtlDosPathNameToNtPathName(name *uint16, ntName *unicodeString, filePart uintptr, reserved uintptr) (status ntStatus) = ntdll.RtlDosPathNameToNtPathName_U //sys rtlDefaultNpAcl(dacl *uintptr) (status ntStatus) = ntdll.RtlDefaultNpAcl +type PipeConn interface { + net.Conn + Disconnect() error + Flush() error +} + +// type aliases for mkwinsyscall code +type ( + ntAccessMask = fs.AccessMask + ntFileShareMode = fs.FileShareMode + ntFileCreationDisposition = fs.NTFileCreationDisposition + ntFileOptions = fs.NTCreateOptions +) + type ioStatusBlock struct { Status, Information uintptr } +// typedef struct _OBJECT_ATTRIBUTES { +// ULONG Length; +// HANDLE RootDirectory; +// PUNICODE_STRING ObjectName; +// ULONG Attributes; +// PVOID SecurityDescriptor; +// PVOID SecurityQualityOfService; +// } OBJECT_ATTRIBUTES; +// +// https://learn.microsoft.com/en-us/windows/win32/api/ntdef/ns-ntdef-_object_attributes type objectAttributes struct { Length uintptr RootDirectory uintptr @@ -49,6 +72,17 @@ type unicodeString struct { Buffer uintptr } +// typedef struct _SECURITY_DESCRIPTOR { +// BYTE Revision; +// BYTE Sbz1; +// SECURITY_DESCRIPTOR_CONTROL Control; +// PSID Owner; +// PSID Group; +// PACL Sacl; +// PACL Dacl; +// } SECURITY_DESCRIPTOR, *PISECURITY_DESCRIPTOR; +// +// https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-security_descriptor type securityDescriptor struct { Revision byte Sbz1 byte @@ -80,6 +114,8 @@ type win32Pipe struct { path string } +var _ PipeConn = (*win32Pipe)(nil) + type win32MessageBytePipe struct { win32Pipe writeClosed bool @@ -103,6 +139,10 @@ func (f *win32Pipe) SetDeadline(t time.Time) error { return f.SetWriteDeadline(t) } +func (f *win32Pipe) Disconnect() error { + return disconnectNamedPipe(f.win32File.handle) +} + // CloseWrite closes the write side of a message pipe in byte mode. func (f *win32MessageBytePipe) CloseWrite() error { if f.writeClosed { @@ -146,7 +186,7 @@ func (f *win32MessageBytePipe) Read(b []byte) (int, error) { // zero-byte message, ensure that all future Read() calls // also return EOF. f.readEOF = true - } else if err == syscall.ERROR_MORE_DATA { //nolint:errorlint // err is Errno + } else if err == windows.ERROR_MORE_DATA { //nolint:errorlint // err is Errno // ERROR_MORE_DATA indicates that the pipe's read mode is message mode // and the message still has more bytes. Treat this as a success, since // this package presents all named pipes as byte streams. @@ -164,21 +204,20 @@ func (s pipeAddress) String() string { } // tryDialPipe attempts to dial the pipe at `path` until `ctx` cancellation or timeout. -func tryDialPipe(ctx context.Context, path *string, access fs.AccessMask) (syscall.Handle, error) { +func tryDialPipe(ctx context.Context, path *string, access fs.AccessMask, impLevel PipeImpLevel) (windows.Handle, error) { for { select { case <-ctx.Done(): - return syscall.Handle(0), ctx.Err() + return windows.Handle(0), ctx.Err() default: - wh, err := fs.CreateFile(*path, + h, err := fs.CreateFile(*path, access, 0, // mode nil, // security attributes fs.OPEN_EXISTING, - fs.FILE_FLAG_OVERLAPPED|fs.SECURITY_SQOS_PRESENT|fs.SECURITY_ANONYMOUS, + fs.FILE_FLAG_OVERLAPPED|fs.SECURITY_SQOS_PRESENT|fs.FileSQSFlag(impLevel), 0, // template file handle ) - h := syscall.Handle(wh) if err == nil { return h, nil } @@ -214,15 +253,33 @@ func DialPipe(path string, timeout *time.Duration) (net.Conn, error) { // DialPipeContext attempts to connect to a named pipe by `path` until `ctx` // cancellation or timeout. func DialPipeContext(ctx context.Context, path string) (net.Conn, error) { - return DialPipeAccess(ctx, path, syscall.GENERIC_READ|syscall.GENERIC_WRITE) + return DialPipeAccess(ctx, path, uint32(fs.GENERIC_READ|fs.GENERIC_WRITE)) } +// PipeImpLevel is an enumeration of impersonation levels that may be set +// when calling DialPipeAccessImpersonation. +type PipeImpLevel uint32 + +const ( + PipeImpLevelAnonymous = PipeImpLevel(fs.SECURITY_ANONYMOUS) + PipeImpLevelIdentification = PipeImpLevel(fs.SECURITY_IDENTIFICATION) + PipeImpLevelImpersonation = PipeImpLevel(fs.SECURITY_IMPERSONATION) + PipeImpLevelDelegation = PipeImpLevel(fs.SECURITY_DELEGATION) +) + // DialPipeAccess attempts to connect to a named pipe by `path` with `access` until `ctx` // cancellation or timeout. func DialPipeAccess(ctx context.Context, path string, access uint32) (net.Conn, error) { + return DialPipeAccessImpLevel(ctx, path, access, PipeImpLevelAnonymous) +} + +// DialPipeAccessImpLevel attempts to connect to a named pipe by `path` with +// `access` at `impLevel` until `ctx` cancellation or timeout. The other +// DialPipe* implementations use PipeImpLevelAnonymous. +func DialPipeAccessImpLevel(ctx context.Context, path string, access uint32, impLevel PipeImpLevel) (net.Conn, error) { var err error - var h syscall.Handle - h, err = tryDialPipe(ctx, &path, fs.AccessMask(access)) + var h windows.Handle + h, err = tryDialPipe(ctx, &path, fs.AccessMask(access), impLevel) if err != nil { return nil, err } @@ -235,7 +292,7 @@ func DialPipeAccess(ctx context.Context, path string, access uint32) (net.Conn, f, err := makeWin32File(h) if err != nil { - syscall.Close(h) + windows.Close(h) return nil, err } @@ -255,7 +312,7 @@ type acceptResponse struct { } type win32PipeListener struct { - firstHandle syscall.Handle + firstHandle windows.Handle path string config PipeConfig acceptCh chan (chan acceptResponse) @@ -263,8 +320,8 @@ type win32PipeListener struct { doneCh chan int } -func makeServerPipeHandle(path string, sd []byte, c *PipeConfig, first bool) (syscall.Handle, error) { - path16, err := syscall.UTF16FromString(path) +func makeServerPipeHandle(path string, sd []byte, c *PipeConfig, first bool) (windows.Handle, error) { + path16, err := windows.UTF16FromString(path) if err != nil { return 0, &os.PathError{Op: "open", Path: path, Err: err} } @@ -280,16 +337,20 @@ func makeServerPipeHandle(path string, sd []byte, c *PipeConfig, first bool) (sy ).Err(); err != nil { return 0, &os.PathError{Op: "open", Path: path, Err: err} } - defer localFree(ntPath.Buffer) + defer windows.LocalFree(windows.Handle(ntPath.Buffer)) //nolint:errcheck oa.ObjectName = &ntPath oa.Attributes = windows.OBJ_CASE_INSENSITIVE // The security descriptor is only needed for the first pipe. if first { if sd != nil { + //todo: does `sdb` need to be allocated on the heap, or can go allocate it? l := uint32(len(sd)) - sdb := localAlloc(0, l) - defer localFree(sdb) + sdb, err := windows.LocalAlloc(0, l) + if err != nil { + return 0, fmt.Errorf("LocalAlloc for security descriptor with of length %d: %w", l, err) + } + defer windows.LocalFree(windows.Handle(sdb)) //nolint:errcheck copy((*[0xffff]byte)(unsafe.Pointer(sdb))[:], sd) oa.SecurityDescriptor = (*securityDescriptor)(unsafe.Pointer(sdb)) } else { @@ -298,7 +359,7 @@ func makeServerPipeHandle(path string, sd []byte, c *PipeConfig, first bool) (sy if err := rtlDefaultNpAcl(&dacl).Err(); err != nil { return 0, fmt.Errorf("getting default named pipe ACL: %w", err) } - defer localFree(dacl) + defer windows.LocalFree(windows.Handle(dacl)) //nolint:errcheck sdb := &securityDescriptor{ Revision: 1, @@ -314,27 +375,27 @@ func makeServerPipeHandle(path string, sd []byte, c *PipeConfig, first bool) (sy typ |= windows.FILE_PIPE_MESSAGE_TYPE } - disposition := uint32(windows.FILE_OPEN) - access := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE | syscall.SYNCHRONIZE) + disposition := fs.FILE_OPEN + access := fs.GENERIC_READ | fs.GENERIC_WRITE | fs.SYNCHRONIZE if first { - disposition = windows.FILE_CREATE + disposition = fs.FILE_CREATE // By not asking for read or write access, the named pipe file system // will put this pipe into an initially disconnected state, blocking // client connections until the next call with first == false. - access = syscall.SYNCHRONIZE + access = fs.SYNCHRONIZE } timeout := int64(-50 * 10000) // 50ms var ( - h syscall.Handle + h windows.Handle iosb ioStatusBlock ) err = ntCreateNamedPipeFile(&h, access, &oa, &iosb, - syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE, + fs.FILE_SHARE_READ|fs.FILE_SHARE_WRITE, disposition, 0, typ, @@ -359,7 +420,7 @@ func (l *win32PipeListener) makeServerPipe() (*win32File, error) { } f, err := makeWin32File(h) if err != nil { - syscall.Close(h) + windows.Close(h) return nil, err } return f, nil @@ -418,7 +479,7 @@ func (l *win32PipeListener) listenerRoutine() { closed = err == ErrPipeListenerClosed //nolint:errorlint // err is Errno } } - syscall.Close(l.firstHandle) + windows.Close(l.firstHandle) l.firstHandle = 0 // Notify Close() and Accept() callers that the handle has been closed. close(l.doneCh) diff --git a/vendor/github.com/Microsoft/go-winio/privilege.go b/vendor/github.com/Microsoft/go-winio/privilege.go index 0ff9dac9..d9b90b6e 100644 --- a/vendor/github.com/Microsoft/go-winio/privilege.go +++ b/vendor/github.com/Microsoft/go-winio/privilege.go @@ -9,7 +9,6 @@ import ( "fmt" "runtime" "sync" - "syscall" "unicode/utf16" "golang.org/x/sys/windows" @@ -18,8 +17,8 @@ import ( //sys adjustTokenPrivileges(token windows.Token, releaseAll bool, input *byte, outputSize uint32, output *byte, requiredSize *uint32) (success bool, err error) [true] = advapi32.AdjustTokenPrivileges //sys impersonateSelf(level uint32) (err error) = advapi32.ImpersonateSelf //sys revertToSelf() (err error) = advapi32.RevertToSelf -//sys openThreadToken(thread syscall.Handle, accessMask uint32, openAsSelf bool, token *windows.Token) (err error) = advapi32.OpenThreadToken -//sys getCurrentThread() (h syscall.Handle) = GetCurrentThread +//sys openThreadToken(thread windows.Handle, accessMask uint32, openAsSelf bool, token *windows.Token) (err error) = advapi32.OpenThreadToken +//sys getCurrentThread() (h windows.Handle) = GetCurrentThread //sys lookupPrivilegeValue(systemName string, name string, luid *uint64) (err error) = advapi32.LookupPrivilegeValueW //sys lookupPrivilegeName(systemName string, luid *uint64, buffer *uint16, size *uint32) (err error) = advapi32.LookupPrivilegeNameW //sys lookupPrivilegeDisplayName(systemName string, name *uint16, buffer *uint16, size *uint32, languageId *uint32) (err error) = advapi32.LookupPrivilegeDisplayNameW @@ -29,7 +28,7 @@ const ( SE_PRIVILEGE_ENABLED = windows.SE_PRIVILEGE_ENABLED //revive:disable-next-line:var-naming ALL_CAPS - ERROR_NOT_ALL_ASSIGNED syscall.Errno = windows.ERROR_NOT_ALL_ASSIGNED + ERROR_NOT_ALL_ASSIGNED windows.Errno = windows.ERROR_NOT_ALL_ASSIGNED SeBackupPrivilege = "SeBackupPrivilege" SeRestorePrivilege = "SeRestorePrivilege" @@ -177,7 +176,7 @@ func newThreadToken() (windows.Token, error) { } var token windows.Token - err = openThreadToken(getCurrentThread(), syscall.TOKEN_ADJUST_PRIVILEGES|syscall.TOKEN_QUERY, false, &token) + err = openThreadToken(getCurrentThread(), windows.TOKEN_ADJUST_PRIVILEGES|windows.TOKEN_QUERY, false, &token) if err != nil { rerr := revertToSelf() if rerr != nil { diff --git a/vendor/github.com/Microsoft/go-winio/sd.go b/vendor/github.com/Microsoft/go-winio/sd.go index 5550ef6b..c3685e98 100644 --- a/vendor/github.com/Microsoft/go-winio/sd.go +++ b/vendor/github.com/Microsoft/go-winio/sd.go @@ -5,7 +5,7 @@ package winio import ( "errors" - "syscall" + "fmt" "unsafe" "golang.org/x/sys/windows" @@ -15,10 +15,6 @@ import ( //sys lookupAccountSid(systemName *uint16, sid *byte, name *uint16, nameSize *uint32, refDomain *uint16, refDomainSize *uint32, sidNameUse *uint32) (err error) = advapi32.LookupAccountSidW //sys convertSidToStringSid(sid *byte, str **uint16) (err error) = advapi32.ConvertSidToStringSidW //sys convertStringSidToSid(str *uint16, sid **byte) (err error) = advapi32.ConvertStringSidToSidW -//sys convertStringSecurityDescriptorToSecurityDescriptor(str string, revision uint32, sd *uintptr, size *uint32) (err error) = advapi32.ConvertStringSecurityDescriptorToSecurityDescriptorW -//sys convertSecurityDescriptorToStringSecurityDescriptor(sd *byte, revision uint32, secInfo uint32, sddl **uint16, sddlSize *uint32) (err error) = advapi32.ConvertSecurityDescriptorToStringSecurityDescriptorW -//sys localFree(mem uintptr) = LocalFree -//sys getSecurityDescriptorLength(sd uintptr) (len uint32) = advapi32.GetSecurityDescriptorLength type AccountLookupError struct { Name string @@ -64,7 +60,7 @@ func LookupSidByName(name string) (sid string, err error) { var sidSize, sidNameUse, refDomainSize uint32 err = lookupAccountName(nil, name, nil, &sidSize, nil, &refDomainSize, &sidNameUse) - if err != nil && err != syscall.ERROR_INSUFFICIENT_BUFFER { //nolint:errorlint // err is Errno + if err != nil && err != windows.ERROR_INSUFFICIENT_BUFFER { //nolint:errorlint // err is Errno return "", &AccountLookupError{name, err} } sidBuffer := make([]byte, sidSize) @@ -78,8 +74,8 @@ func LookupSidByName(name string) (sid string, err error) { if err != nil { return "", &AccountLookupError{name, err} } - sid = syscall.UTF16ToString((*[0xffff]uint16)(unsafe.Pointer(strBuffer))[:]) - localFree(uintptr(unsafe.Pointer(strBuffer))) + sid = windows.UTF16ToString((*[0xffff]uint16)(unsafe.Pointer(strBuffer))[:]) + _, _ = windows.LocalFree(windows.Handle(unsafe.Pointer(strBuffer))) return sid, nil } @@ -100,7 +96,7 @@ func LookupNameBySid(sid string) (name string, err error) { if err = convertStringSidToSid(sidBuffer, &sidPtr); err != nil { return "", &AccountLookupError{sid, err} } - defer localFree(uintptr(unsafe.Pointer(sidPtr))) + defer windows.LocalFree(windows.Handle(unsafe.Pointer(sidPtr))) //nolint:errcheck var nameSize, refDomainSize, sidNameUse uint32 err = lookupAccountSid(nil, sidPtr, nil, &nameSize, nil, &refDomainSize, &sidNameUse) @@ -120,25 +116,18 @@ func LookupNameBySid(sid string) (name string, err error) { } func SddlToSecurityDescriptor(sddl string) ([]byte, error) { - var sdBuffer uintptr - err := convertStringSecurityDescriptorToSecurityDescriptor(sddl, 1, &sdBuffer, nil) + sd, err := windows.SecurityDescriptorFromString(sddl) if err != nil { - return nil, &SddlConversionError{sddl, err} + return nil, &SddlConversionError{Sddl: sddl, Err: err} } - defer localFree(sdBuffer) - sd := make([]byte, getSecurityDescriptorLength(sdBuffer)) - copy(sd, (*[0xffff]byte)(unsafe.Pointer(sdBuffer))[:len(sd)]) - return sd, nil + b := unsafe.Slice((*byte)(unsafe.Pointer(sd)), sd.Length()) + return b, nil } func SecurityDescriptorToSddl(sd []byte) (string, error) { - var sddl *uint16 - // The returned string length seems to include an arbitrary number of terminating NULs. - // Don't use it. - err := convertSecurityDescriptorToStringSecurityDescriptor(&sd[0], 1, 0xff, &sddl, nil) - if err != nil { - return "", err + if l := int(unsafe.Sizeof(windows.SECURITY_DESCRIPTOR{})); len(sd) < l { + return "", fmt.Errorf("SecurityDescriptor (%d) smaller than expected (%d): %w", len(sd), l, windows.ERROR_INCORRECT_SIZE) } - defer localFree(uintptr(unsafe.Pointer(sddl))) - return syscall.UTF16ToString((*[0xffff]uint16)(unsafe.Pointer(sddl))[:]), nil + s := (*windows.SECURITY_DESCRIPTOR)(unsafe.Pointer(&sd[0])) + return s.String(), nil } diff --git a/vendor/github.com/Microsoft/go-winio/tools.go b/vendor/github.com/Microsoft/go-winio/tools.go deleted file mode 100644 index 2aa04584..00000000 --- a/vendor/github.com/Microsoft/go-winio/tools.go +++ /dev/null @@ -1,5 +0,0 @@ -//go:build tools - -package winio - -import _ "golang.org/x/tools/cmd/stringer" diff --git a/vendor/github.com/Microsoft/go-winio/zsyscall_windows.go b/vendor/github.com/Microsoft/go-winio/zsyscall_windows.go index 469b16f6..89b66eda 100644 --- a/vendor/github.com/Microsoft/go-winio/zsyscall_windows.go +++ b/vendor/github.com/Microsoft/go-winio/zsyscall_windows.go @@ -33,9 +33,6 @@ func errnoErr(e syscall.Errno) error { case errnoERROR_IO_PENDING: return errERROR_IO_PENDING } - // TODO: add more here, after collecting data on the common - // error values see on Windows. (perhaps when running - // all.bat?) return e } @@ -45,38 +42,34 @@ var ( modntdll = windows.NewLazySystemDLL("ntdll.dll") modws2_32 = windows.NewLazySystemDLL("ws2_32.dll") - procAdjustTokenPrivileges = modadvapi32.NewProc("AdjustTokenPrivileges") - procConvertSecurityDescriptorToStringSecurityDescriptorW = modadvapi32.NewProc("ConvertSecurityDescriptorToStringSecurityDescriptorW") - procConvertSidToStringSidW = modadvapi32.NewProc("ConvertSidToStringSidW") - procConvertStringSecurityDescriptorToSecurityDescriptorW = modadvapi32.NewProc("ConvertStringSecurityDescriptorToSecurityDescriptorW") - procConvertStringSidToSidW = modadvapi32.NewProc("ConvertStringSidToSidW") - procGetSecurityDescriptorLength = modadvapi32.NewProc("GetSecurityDescriptorLength") - procImpersonateSelf = modadvapi32.NewProc("ImpersonateSelf") - procLookupAccountNameW = modadvapi32.NewProc("LookupAccountNameW") - procLookupAccountSidW = modadvapi32.NewProc("LookupAccountSidW") - procLookupPrivilegeDisplayNameW = modadvapi32.NewProc("LookupPrivilegeDisplayNameW") - procLookupPrivilegeNameW = modadvapi32.NewProc("LookupPrivilegeNameW") - procLookupPrivilegeValueW = modadvapi32.NewProc("LookupPrivilegeValueW") - procOpenThreadToken = modadvapi32.NewProc("OpenThreadToken") - procRevertToSelf = modadvapi32.NewProc("RevertToSelf") - procBackupRead = modkernel32.NewProc("BackupRead") - procBackupWrite = modkernel32.NewProc("BackupWrite") - procCancelIoEx = modkernel32.NewProc("CancelIoEx") - procConnectNamedPipe = modkernel32.NewProc("ConnectNamedPipe") - procCreateIoCompletionPort = modkernel32.NewProc("CreateIoCompletionPort") - procCreateNamedPipeW = modkernel32.NewProc("CreateNamedPipeW") - procGetCurrentThread = modkernel32.NewProc("GetCurrentThread") - procGetNamedPipeHandleStateW = modkernel32.NewProc("GetNamedPipeHandleStateW") - procGetNamedPipeInfo = modkernel32.NewProc("GetNamedPipeInfo") - procGetQueuedCompletionStatus = modkernel32.NewProc("GetQueuedCompletionStatus") - procLocalAlloc = modkernel32.NewProc("LocalAlloc") - procLocalFree = modkernel32.NewProc("LocalFree") - procSetFileCompletionNotificationModes = modkernel32.NewProc("SetFileCompletionNotificationModes") - procNtCreateNamedPipeFile = modntdll.NewProc("NtCreateNamedPipeFile") - procRtlDefaultNpAcl = modntdll.NewProc("RtlDefaultNpAcl") - procRtlDosPathNameToNtPathName_U = modntdll.NewProc("RtlDosPathNameToNtPathName_U") - procRtlNtStatusToDosErrorNoTeb = modntdll.NewProc("RtlNtStatusToDosErrorNoTeb") - procWSAGetOverlappedResult = modws2_32.NewProc("WSAGetOverlappedResult") + procAdjustTokenPrivileges = modadvapi32.NewProc("AdjustTokenPrivileges") + procConvertSidToStringSidW = modadvapi32.NewProc("ConvertSidToStringSidW") + procConvertStringSidToSidW = modadvapi32.NewProc("ConvertStringSidToSidW") + procImpersonateSelf = modadvapi32.NewProc("ImpersonateSelf") + procLookupAccountNameW = modadvapi32.NewProc("LookupAccountNameW") + procLookupAccountSidW = modadvapi32.NewProc("LookupAccountSidW") + procLookupPrivilegeDisplayNameW = modadvapi32.NewProc("LookupPrivilegeDisplayNameW") + procLookupPrivilegeNameW = modadvapi32.NewProc("LookupPrivilegeNameW") + procLookupPrivilegeValueW = modadvapi32.NewProc("LookupPrivilegeValueW") + procOpenThreadToken = modadvapi32.NewProc("OpenThreadToken") + procRevertToSelf = modadvapi32.NewProc("RevertToSelf") + procBackupRead = modkernel32.NewProc("BackupRead") + procBackupWrite = modkernel32.NewProc("BackupWrite") + procCancelIoEx = modkernel32.NewProc("CancelIoEx") + procConnectNamedPipe = modkernel32.NewProc("ConnectNamedPipe") + procCreateIoCompletionPort = modkernel32.NewProc("CreateIoCompletionPort") + procCreateNamedPipeW = modkernel32.NewProc("CreateNamedPipeW") + procDisconnectNamedPipe = modkernel32.NewProc("DisconnectNamedPipe") + procGetCurrentThread = modkernel32.NewProc("GetCurrentThread") + procGetNamedPipeHandleStateW = modkernel32.NewProc("GetNamedPipeHandleStateW") + procGetNamedPipeInfo = modkernel32.NewProc("GetNamedPipeInfo") + procGetQueuedCompletionStatus = modkernel32.NewProc("GetQueuedCompletionStatus") + procSetFileCompletionNotificationModes = modkernel32.NewProc("SetFileCompletionNotificationModes") + procNtCreateNamedPipeFile = modntdll.NewProc("NtCreateNamedPipeFile") + procRtlDefaultNpAcl = modntdll.NewProc("RtlDefaultNpAcl") + procRtlDosPathNameToNtPathName_U = modntdll.NewProc("RtlDosPathNameToNtPathName_U") + procRtlNtStatusToDosErrorNoTeb = modntdll.NewProc("RtlNtStatusToDosErrorNoTeb") + procWSAGetOverlappedResult = modws2_32.NewProc("WSAGetOverlappedResult") ) func adjustTokenPrivileges(token windows.Token, releaseAll bool, input *byte, outputSize uint32, output *byte, requiredSize *uint32) (success bool, err error) { @@ -84,7 +77,7 @@ func adjustTokenPrivileges(token windows.Token, releaseAll bool, input *byte, ou if releaseAll { _p0 = 1 } - r0, _, e1 := syscall.Syscall6(procAdjustTokenPrivileges.Addr(), 6, uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(input)), uintptr(outputSize), uintptr(unsafe.Pointer(output)), uintptr(unsafe.Pointer(requiredSize))) + r0, _, e1 := syscall.SyscallN(procAdjustTokenPrivileges.Addr(), uintptr(token), uintptr(_p0), uintptr(unsafe.Pointer(input)), uintptr(outputSize), uintptr(unsafe.Pointer(output)), uintptr(unsafe.Pointer(requiredSize))) success = r0 != 0 if true { err = errnoErr(e1) @@ -92,33 +85,8 @@ func adjustTokenPrivileges(token windows.Token, releaseAll bool, input *byte, ou return } -func convertSecurityDescriptorToStringSecurityDescriptor(sd *byte, revision uint32, secInfo uint32, sddl **uint16, sddlSize *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procConvertSecurityDescriptorToStringSecurityDescriptorW.Addr(), 5, uintptr(unsafe.Pointer(sd)), uintptr(revision), uintptr(secInfo), uintptr(unsafe.Pointer(sddl)), uintptr(unsafe.Pointer(sddlSize)), 0) - if r1 == 0 { - err = errnoErr(e1) - } - return -} - func convertSidToStringSid(sid *byte, str **uint16) (err error) { - r1, _, e1 := syscall.Syscall(procConvertSidToStringSidW.Addr(), 2, uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(str)), 0) - if r1 == 0 { - err = errnoErr(e1) - } - return -} - -func convertStringSecurityDescriptorToSecurityDescriptor(str string, revision uint32, sd *uintptr, size *uint32) (err error) { - var _p0 *uint16 - _p0, err = syscall.UTF16PtrFromString(str) - if err != nil { - return - } - return _convertStringSecurityDescriptorToSecurityDescriptor(_p0, revision, sd, size) -} - -func _convertStringSecurityDescriptorToSecurityDescriptor(str *uint16, revision uint32, sd *uintptr, size *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procConvertStringSecurityDescriptorToSecurityDescriptorW.Addr(), 4, uintptr(unsafe.Pointer(str)), uintptr(revision), uintptr(unsafe.Pointer(sd)), uintptr(unsafe.Pointer(size)), 0, 0) + r1, _, e1 := syscall.SyscallN(procConvertSidToStringSidW.Addr(), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(str))) if r1 == 0 { err = errnoErr(e1) } @@ -126,21 +94,15 @@ func _convertStringSecurityDescriptorToSecurityDescriptor(str *uint16, revision } func convertStringSidToSid(str *uint16, sid **byte) (err error) { - r1, _, e1 := syscall.Syscall(procConvertStringSidToSidW.Addr(), 2, uintptr(unsafe.Pointer(str)), uintptr(unsafe.Pointer(sid)), 0) + r1, _, e1 := syscall.SyscallN(procConvertStringSidToSidW.Addr(), uintptr(unsafe.Pointer(str)), uintptr(unsafe.Pointer(sid))) if r1 == 0 { err = errnoErr(e1) } return } -func getSecurityDescriptorLength(sd uintptr) (len uint32) { - r0, _, _ := syscall.Syscall(procGetSecurityDescriptorLength.Addr(), 1, uintptr(sd), 0, 0) - len = uint32(r0) - return -} - func impersonateSelf(level uint32) (err error) { - r1, _, e1 := syscall.Syscall(procImpersonateSelf.Addr(), 1, uintptr(level), 0, 0) + r1, _, e1 := syscall.SyscallN(procImpersonateSelf.Addr(), uintptr(level)) if r1 == 0 { err = errnoErr(e1) } @@ -157,7 +119,7 @@ func lookupAccountName(systemName *uint16, accountName string, sid *byte, sidSiz } func _lookupAccountName(systemName *uint16, accountName *uint16, sid *byte, sidSize *uint32, refDomain *uint16, refDomainSize *uint32, sidNameUse *uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procLookupAccountNameW.Addr(), 7, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(accountName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(sidSize)), uintptr(unsafe.Pointer(refDomain)), uintptr(unsafe.Pointer(refDomainSize)), uintptr(unsafe.Pointer(sidNameUse)), 0, 0) + r1, _, e1 := syscall.SyscallN(procLookupAccountNameW.Addr(), uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(accountName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(sidSize)), uintptr(unsafe.Pointer(refDomain)), uintptr(unsafe.Pointer(refDomainSize)), uintptr(unsafe.Pointer(sidNameUse))) if r1 == 0 { err = errnoErr(e1) } @@ -165,7 +127,7 @@ func _lookupAccountName(systemName *uint16, accountName *uint16, sid *byte, sidS } func lookupAccountSid(systemName *uint16, sid *byte, name *uint16, nameSize *uint32, refDomain *uint16, refDomainSize *uint32, sidNameUse *uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procLookupAccountSidW.Addr(), 7, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameSize)), uintptr(unsafe.Pointer(refDomain)), uintptr(unsafe.Pointer(refDomainSize)), uintptr(unsafe.Pointer(sidNameUse)), 0, 0) + r1, _, e1 := syscall.SyscallN(procLookupAccountSidW.Addr(), uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(sid)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(nameSize)), uintptr(unsafe.Pointer(refDomain)), uintptr(unsafe.Pointer(refDomainSize)), uintptr(unsafe.Pointer(sidNameUse))) if r1 == 0 { err = errnoErr(e1) } @@ -182,7 +144,7 @@ func lookupPrivilegeDisplayName(systemName string, name *uint16, buffer *uint16, } func _lookupPrivilegeDisplayName(systemName *uint16, name *uint16, buffer *uint16, size *uint32, languageId *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procLookupPrivilegeDisplayNameW.Addr(), 5, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(size)), uintptr(unsafe.Pointer(languageId)), 0) + r1, _, e1 := syscall.SyscallN(procLookupPrivilegeDisplayNameW.Addr(), uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(size)), uintptr(unsafe.Pointer(languageId))) if r1 == 0 { err = errnoErr(e1) } @@ -199,7 +161,7 @@ func lookupPrivilegeName(systemName string, luid *uint64, buffer *uint16, size * } func _lookupPrivilegeName(systemName *uint16, luid *uint64, buffer *uint16, size *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procLookupPrivilegeNameW.Addr(), 4, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(luid)), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(size)), 0, 0) + r1, _, e1 := syscall.SyscallN(procLookupPrivilegeNameW.Addr(), uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(luid)), uintptr(unsafe.Pointer(buffer)), uintptr(unsafe.Pointer(size))) if r1 == 0 { err = errnoErr(e1) } @@ -221,19 +183,19 @@ func lookupPrivilegeValue(systemName string, name string, luid *uint64) (err err } func _lookupPrivilegeValue(systemName *uint16, name *uint16, luid *uint64) (err error) { - r1, _, e1 := syscall.Syscall(procLookupPrivilegeValueW.Addr(), 3, uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(luid))) + r1, _, e1 := syscall.SyscallN(procLookupPrivilegeValueW.Addr(), uintptr(unsafe.Pointer(systemName)), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(luid))) if r1 == 0 { err = errnoErr(e1) } return } -func openThreadToken(thread syscall.Handle, accessMask uint32, openAsSelf bool, token *windows.Token) (err error) { +func openThreadToken(thread windows.Handle, accessMask uint32, openAsSelf bool, token *windows.Token) (err error) { var _p0 uint32 if openAsSelf { _p0 = 1 } - r1, _, e1 := syscall.Syscall6(procOpenThreadToken.Addr(), 4, uintptr(thread), uintptr(accessMask), uintptr(_p0), uintptr(unsafe.Pointer(token)), 0, 0) + r1, _, e1 := syscall.SyscallN(procOpenThreadToken.Addr(), uintptr(thread), uintptr(accessMask), uintptr(_p0), uintptr(unsafe.Pointer(token))) if r1 == 0 { err = errnoErr(e1) } @@ -241,14 +203,14 @@ func openThreadToken(thread syscall.Handle, accessMask uint32, openAsSelf bool, } func revertToSelf() (err error) { - r1, _, e1 := syscall.Syscall(procRevertToSelf.Addr(), 0, 0, 0, 0) + r1, _, e1 := syscall.SyscallN(procRevertToSelf.Addr()) if r1 == 0 { err = errnoErr(e1) } return } -func backupRead(h syscall.Handle, b []byte, bytesRead *uint32, abort bool, processSecurity bool, context *uintptr) (err error) { +func backupRead(h windows.Handle, b []byte, bytesRead *uint32, abort bool, processSecurity bool, context *uintptr) (err error) { var _p0 *byte if len(b) > 0 { _p0 = &b[0] @@ -261,14 +223,14 @@ func backupRead(h syscall.Handle, b []byte, bytesRead *uint32, abort bool, proce if processSecurity { _p2 = 1 } - r1, _, e1 := syscall.Syscall9(procBackupRead.Addr(), 7, uintptr(h), uintptr(unsafe.Pointer(_p0)), uintptr(len(b)), uintptr(unsafe.Pointer(bytesRead)), uintptr(_p1), uintptr(_p2), uintptr(unsafe.Pointer(context)), 0, 0) + r1, _, e1 := syscall.SyscallN(procBackupRead.Addr(), uintptr(h), uintptr(unsafe.Pointer(_p0)), uintptr(len(b)), uintptr(unsafe.Pointer(bytesRead)), uintptr(_p1), uintptr(_p2), uintptr(unsafe.Pointer(context))) if r1 == 0 { err = errnoErr(e1) } return } -func backupWrite(h syscall.Handle, b []byte, bytesWritten *uint32, abort bool, processSecurity bool, context *uintptr) (err error) { +func backupWrite(h windows.Handle, b []byte, bytesWritten *uint32, abort bool, processSecurity bool, context *uintptr) (err error) { var _p0 *byte if len(b) > 0 { _p0 = &b[0] @@ -281,39 +243,39 @@ func backupWrite(h syscall.Handle, b []byte, bytesWritten *uint32, abort bool, p if processSecurity { _p2 = 1 } - r1, _, e1 := syscall.Syscall9(procBackupWrite.Addr(), 7, uintptr(h), uintptr(unsafe.Pointer(_p0)), uintptr(len(b)), uintptr(unsafe.Pointer(bytesWritten)), uintptr(_p1), uintptr(_p2), uintptr(unsafe.Pointer(context)), 0, 0) + r1, _, e1 := syscall.SyscallN(procBackupWrite.Addr(), uintptr(h), uintptr(unsafe.Pointer(_p0)), uintptr(len(b)), uintptr(unsafe.Pointer(bytesWritten)), uintptr(_p1), uintptr(_p2), uintptr(unsafe.Pointer(context))) if r1 == 0 { err = errnoErr(e1) } return } -func cancelIoEx(file syscall.Handle, o *syscall.Overlapped) (err error) { - r1, _, e1 := syscall.Syscall(procCancelIoEx.Addr(), 2, uintptr(file), uintptr(unsafe.Pointer(o)), 0) +func cancelIoEx(file windows.Handle, o *windows.Overlapped) (err error) { + r1, _, e1 := syscall.SyscallN(procCancelIoEx.Addr(), uintptr(file), uintptr(unsafe.Pointer(o))) if r1 == 0 { err = errnoErr(e1) } return } -func connectNamedPipe(pipe syscall.Handle, o *syscall.Overlapped) (err error) { - r1, _, e1 := syscall.Syscall(procConnectNamedPipe.Addr(), 2, uintptr(pipe), uintptr(unsafe.Pointer(o)), 0) +func connectNamedPipe(pipe windows.Handle, o *windows.Overlapped) (err error) { + r1, _, e1 := syscall.SyscallN(procConnectNamedPipe.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(o))) if r1 == 0 { err = errnoErr(e1) } return } -func createIoCompletionPort(file syscall.Handle, port syscall.Handle, key uintptr, threadCount uint32) (newport syscall.Handle, err error) { - r0, _, e1 := syscall.Syscall6(procCreateIoCompletionPort.Addr(), 4, uintptr(file), uintptr(port), uintptr(key), uintptr(threadCount), 0, 0) - newport = syscall.Handle(r0) +func createIoCompletionPort(file windows.Handle, port windows.Handle, key uintptr, threadCount uint32) (newport windows.Handle, err error) { + r0, _, e1 := syscall.SyscallN(procCreateIoCompletionPort.Addr(), uintptr(file), uintptr(port), uintptr(key), uintptr(threadCount)) + newport = windows.Handle(r0) if newport == 0 { err = errnoErr(e1) } return } -func createNamedPipe(name string, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *syscall.SecurityAttributes) (handle syscall.Handle, err error) { +func createNamedPipe(name string, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *windows.SecurityAttributes) (handle windows.Handle, err error) { var _p0 *uint16 _p0, err = syscall.UTF16PtrFromString(name) if err != nil { @@ -322,96 +284,93 @@ func createNamedPipe(name string, flags uint32, pipeMode uint32, maxInstances ui return _createNamedPipe(_p0, flags, pipeMode, maxInstances, outSize, inSize, defaultTimeout, sa) } -func _createNamedPipe(name *uint16, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *syscall.SecurityAttributes) (handle syscall.Handle, err error) { - r0, _, e1 := syscall.Syscall9(procCreateNamedPipeW.Addr(), 8, uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(pipeMode), uintptr(maxInstances), uintptr(outSize), uintptr(inSize), uintptr(defaultTimeout), uintptr(unsafe.Pointer(sa)), 0) - handle = syscall.Handle(r0) - if handle == syscall.InvalidHandle { +func _createNamedPipe(name *uint16, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *windows.SecurityAttributes) (handle windows.Handle, err error) { + r0, _, e1 := syscall.SyscallN(procCreateNamedPipeW.Addr(), uintptr(unsafe.Pointer(name)), uintptr(flags), uintptr(pipeMode), uintptr(maxInstances), uintptr(outSize), uintptr(inSize), uintptr(defaultTimeout), uintptr(unsafe.Pointer(sa))) + handle = windows.Handle(r0) + if handle == windows.InvalidHandle { err = errnoErr(e1) } return } -func getCurrentThread() (h syscall.Handle) { - r0, _, _ := syscall.Syscall(procGetCurrentThread.Addr(), 0, 0, 0, 0) - h = syscall.Handle(r0) - return -} - -func getNamedPipeHandleState(pipe syscall.Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) { - r1, _, e1 := syscall.Syscall9(procGetNamedPipeHandleStateW.Addr(), 7, uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(curInstances)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout)), uintptr(unsafe.Pointer(userName)), uintptr(maxUserNameSize), 0, 0) +func disconnectNamedPipe(pipe windows.Handle) (err error) { + r1, _, e1 := syscall.SyscallN(procDisconnectNamedPipe.Addr(), uintptr(pipe)) if r1 == 0 { err = errnoErr(e1) } return } -func getNamedPipeInfo(pipe syscall.Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetNamedPipeInfo.Addr(), 5, uintptr(pipe), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(outSize)), uintptr(unsafe.Pointer(inSize)), uintptr(unsafe.Pointer(maxInstances)), 0) - if r1 == 0 { - err = errnoErr(e1) - } +func getCurrentThread() (h windows.Handle) { + r0, _, _ := syscall.SyscallN(procGetCurrentThread.Addr()) + h = windows.Handle(r0) return } -func getQueuedCompletionStatus(port syscall.Handle, bytes *uint32, key *uintptr, o **ioOperation, timeout uint32) (err error) { - r1, _, e1 := syscall.Syscall6(procGetQueuedCompletionStatus.Addr(), 5, uintptr(port), uintptr(unsafe.Pointer(bytes)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(o)), uintptr(timeout), 0) +func getNamedPipeHandleState(pipe windows.Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) { + r1, _, e1 := syscall.SyscallN(procGetNamedPipeHandleStateW.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(curInstances)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout)), uintptr(unsafe.Pointer(userName)), uintptr(maxUserNameSize)) if r1 == 0 { err = errnoErr(e1) } return } -func localAlloc(uFlags uint32, length uint32) (ptr uintptr) { - r0, _, _ := syscall.Syscall(procLocalAlloc.Addr(), 2, uintptr(uFlags), uintptr(length), 0) - ptr = uintptr(r0) +func getNamedPipeInfo(pipe windows.Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error) { + r1, _, e1 := syscall.SyscallN(procGetNamedPipeInfo.Addr(), uintptr(pipe), uintptr(unsafe.Pointer(flags)), uintptr(unsafe.Pointer(outSize)), uintptr(unsafe.Pointer(inSize)), uintptr(unsafe.Pointer(maxInstances))) + if r1 == 0 { + err = errnoErr(e1) + } return } -func localFree(mem uintptr) { - syscall.Syscall(procLocalFree.Addr(), 1, uintptr(mem), 0, 0) +func getQueuedCompletionStatus(port windows.Handle, bytes *uint32, key *uintptr, o **ioOperation, timeout uint32) (err error) { + r1, _, e1 := syscall.SyscallN(procGetQueuedCompletionStatus.Addr(), uintptr(port), uintptr(unsafe.Pointer(bytes)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(o)), uintptr(timeout)) + if r1 == 0 { + err = errnoErr(e1) + } return } -func setFileCompletionNotificationModes(h syscall.Handle, flags uint8) (err error) { - r1, _, e1 := syscall.Syscall(procSetFileCompletionNotificationModes.Addr(), 2, uintptr(h), uintptr(flags), 0) +func setFileCompletionNotificationModes(h windows.Handle, flags uint8) (err error) { + r1, _, e1 := syscall.SyscallN(procSetFileCompletionNotificationModes.Addr(), uintptr(h), uintptr(flags)) if r1 == 0 { err = errnoErr(e1) } return } -func ntCreateNamedPipeFile(pipe *syscall.Handle, access uint32, oa *objectAttributes, iosb *ioStatusBlock, share uint32, disposition uint32, options uint32, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (status ntStatus) { - r0, _, _ := syscall.Syscall15(procNtCreateNamedPipeFile.Addr(), 14, uintptr(unsafe.Pointer(pipe)), uintptr(access), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(share), uintptr(disposition), uintptr(options), uintptr(typ), uintptr(readMode), uintptr(completionMode), uintptr(maxInstances), uintptr(inboundQuota), uintptr(outputQuota), uintptr(unsafe.Pointer(timeout)), 0) +func ntCreateNamedPipeFile(pipe *windows.Handle, access ntAccessMask, oa *objectAttributes, iosb *ioStatusBlock, share ntFileShareMode, disposition ntFileCreationDisposition, options ntFileOptions, typ uint32, readMode uint32, completionMode uint32, maxInstances uint32, inboundQuota uint32, outputQuota uint32, timeout *int64) (status ntStatus) { + r0, _, _ := syscall.SyscallN(procNtCreateNamedPipeFile.Addr(), uintptr(unsafe.Pointer(pipe)), uintptr(access), uintptr(unsafe.Pointer(oa)), uintptr(unsafe.Pointer(iosb)), uintptr(share), uintptr(disposition), uintptr(options), uintptr(typ), uintptr(readMode), uintptr(completionMode), uintptr(maxInstances), uintptr(inboundQuota), uintptr(outputQuota), uintptr(unsafe.Pointer(timeout))) status = ntStatus(r0) return } func rtlDefaultNpAcl(dacl *uintptr) (status ntStatus) { - r0, _, _ := syscall.Syscall(procRtlDefaultNpAcl.Addr(), 1, uintptr(unsafe.Pointer(dacl)), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlDefaultNpAcl.Addr(), uintptr(unsafe.Pointer(dacl))) status = ntStatus(r0) return } func rtlDosPathNameToNtPathName(name *uint16, ntName *unicodeString, filePart uintptr, reserved uintptr) (status ntStatus) { - r0, _, _ := syscall.Syscall6(procRtlDosPathNameToNtPathName_U.Addr(), 4, uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(ntName)), uintptr(filePart), uintptr(reserved), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlDosPathNameToNtPathName_U.Addr(), uintptr(unsafe.Pointer(name)), uintptr(unsafe.Pointer(ntName)), uintptr(filePart), uintptr(reserved)) status = ntStatus(r0) return } func rtlNtStatusToDosError(status ntStatus) (winerr error) { - r0, _, _ := syscall.Syscall(procRtlNtStatusToDosErrorNoTeb.Addr(), 1, uintptr(status), 0, 0) + r0, _, _ := syscall.SyscallN(procRtlNtStatusToDosErrorNoTeb.Addr(), uintptr(status)) if r0 != 0 { winerr = syscall.Errno(r0) } return } -func wsaGetOverlappedResult(h syscall.Handle, o *syscall.Overlapped, bytes *uint32, wait bool, flags *uint32) (err error) { +func wsaGetOverlappedResult(h windows.Handle, o *windows.Overlapped, bytes *uint32, wait bool, flags *uint32) (err error) { var _p0 uint32 if wait { _p0 = 1 } - r1, _, e1 := syscall.Syscall6(procWSAGetOverlappedResult.Addr(), 5, uintptr(h), uintptr(unsafe.Pointer(o)), uintptr(unsafe.Pointer(bytes)), uintptr(_p0), uintptr(unsafe.Pointer(flags)), 0) + r1, _, e1 := syscall.SyscallN(procWSAGetOverlappedResult.Addr(), uintptr(h), uintptr(unsafe.Pointer(o)), uintptr(unsafe.Pointer(bytes)), uintptr(_p0), uintptr(unsafe.Pointer(flags))) if r1 == 0 { err = errnoErr(e1) } diff --git a/vendor/github.com/VictoriaMetrics/metrics/README.md b/vendor/github.com/VictoriaMetrics/metrics/README.md index e1a2537c..b01d81e8 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/README.md +++ b/vendor/github.com/VictoriaMetrics/metrics/README.md @@ -73,8 +73,11 @@ http.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) { metrics.InitPush("http://victoria-metrics:8428/api/v1/import/prometheus", 10*time.Second, `instance="foobar"`, true) ``` -See [docs](http://godoc.org/github.com/VictoriaMetrics/metrics) for more info. +By default, exposed metrics [do not have](https://github.com/VictoriaMetrics/metrics/issues/48#issuecomment-1620765811) +`TYPE` or `HELP` meta information. Call [`ExposeMetadata(true)`](https://pkg.go.dev/github.com/VictoriaMetrics/metrics#ExposeMetadata) +in order to generate `TYPE` and `HELP` meta information per each metric. +See [docs](https://pkg.go.dev/github.com/VictoriaMetrics/metrics) for more info. ### Users diff --git a/vendor/github.com/VictoriaMetrics/metrics/counter.go b/vendor/github.com/VictoriaMetrics/metrics/counter.go index dfe94779..1076e80c 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/counter.go +++ b/vendor/github.com/VictoriaMetrics/metrics/counter.go @@ -42,6 +42,11 @@ func (c *Counter) Add(n int) { atomic.AddUint64(&c.n, uint64(n)) } +// AddInt64 adds n to c. +func (c *Counter) AddInt64(n int64) { + atomic.AddUint64(&c.n, uint64(n)) +} + // Get returns the current value for c. func (c *Counter) Get() uint64 { return atomic.LoadUint64(&c.n) @@ -58,6 +63,10 @@ func (c *Counter) marshalTo(prefix string, w io.Writer) { fmt.Fprintf(w, "%s %d\n", prefix, v) } +func (c *Counter) metricType() string { + return "counter" +} + // GetOrCreateCounter returns registered counter with the given name // or creates new counter if the registry doesn't contain counter with // the given name. diff --git a/vendor/github.com/VictoriaMetrics/metrics/floatcounter.go b/vendor/github.com/VictoriaMetrics/metrics/floatcounter.go index f8987909..8bd9fa67 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/floatcounter.go +++ b/vendor/github.com/VictoriaMetrics/metrics/floatcounter.go @@ -63,6 +63,10 @@ func (fc *FloatCounter) marshalTo(prefix string, w io.Writer) { fmt.Fprintf(w, "%s %g\n", prefix, v) } +func (fc *FloatCounter) metricType() string { + return "counter" +} + // GetOrCreateFloatCounter returns registered FloatCounter with the given name // or creates new FloatCounter if the registry doesn't contain FloatCounter with // the given name. diff --git a/vendor/github.com/VictoriaMetrics/metrics/gauge.go b/vendor/github.com/VictoriaMetrics/metrics/gauge.go index 9084fc4d..3573e144 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/gauge.go +++ b/vendor/github.com/VictoriaMetrics/metrics/gauge.go @@ -3,10 +3,11 @@ package metrics import ( "fmt" "io" + "math" + "sync/atomic" ) -// NewGauge registers and returns gauge with the given name, which calls f -// to obtain gauge value. +// NewGauge registers and returns gauge with the given name, which calls f to obtain gauge value. // // name must be valid Prometheus-compatible metric with possible labels. // For instance, @@ -16,6 +17,7 @@ import ( // - foo{bar="baz",aaa="b"} // // f must be safe for concurrent calls. +// if f is nil, then it is expected that the gauge value is changed via Set(), Inc(), Dec() and Add() calls. // // The returned gauge is safe to use from concurrent goroutines. // @@ -25,19 +27,68 @@ func NewGauge(name string, f func() float64) *Gauge { } // Gauge is a float64 gauge. -// -// See also Counter, which could be used as a gauge with Set and Dec calls. type Gauge struct { + // valueBits contains uint64 representation of float64 passed to Gauge.Set. + valueBits uint64 + + // f is a callback, which is called for returning the gauge value. f func() float64 } // Get returns the current value for g. func (g *Gauge) Get() float64 { - return g.f() + if f := g.f; f != nil { + return f() + } + n := atomic.LoadUint64(&g.valueBits) + return math.Float64frombits(n) +} + +// Set sets g value to v. +// +// The g must be created with nil callback in order to be able to call this function. +func (g *Gauge) Set(v float64) { + if g.f != nil { + panic(fmt.Errorf("cannot call Set on gauge created with non-nil callback")) + } + n := math.Float64bits(v) + atomic.StoreUint64(&g.valueBits, n) +} + +// Inc increments g by 1. +// +// The g must be created with nil callback in order to be able to call this function. +func (g *Gauge) Inc() { + g.Add(1) +} + +// Dec decrements g by 1. +// +// The g must be created with nil callback in order to be able to call this function. +func (g *Gauge) Dec() { + g.Add(-1) +} + +// Add adds fAdd to g. fAdd may be positive and negative. +// +// The g must be created with nil callback in order to be able to call this function. +func (g *Gauge) Add(fAdd float64) { + if g.f != nil { + panic(fmt.Errorf("cannot call Set on gauge created with non-nil callback")) + } + for { + n := atomic.LoadUint64(&g.valueBits) + f := math.Float64frombits(n) + fNew := f + fAdd + nNew := math.Float64bits(fNew) + if atomic.CompareAndSwapUint64(&g.valueBits, n, nNew) { + break + } + } } func (g *Gauge) marshalTo(prefix string, w io.Writer) { - v := g.f() + v := g.Get() if float64(int64(v)) == v { // Marshal integer values without scientific notation fmt.Fprintf(w, "%s %d\n", prefix, int64(v)) @@ -46,6 +97,10 @@ func (g *Gauge) marshalTo(prefix string, w io.Writer) { } } +func (g *Gauge) metricType() string { + return "gauge" +} + // GetOrCreateGauge returns registered gauge with the given name // or creates new gauge if the registry doesn't contain gauge with // the given name. diff --git a/vendor/github.com/VictoriaMetrics/metrics/go_metrics.go b/vendor/github.com/VictoriaMetrics/metrics/go_metrics.go index f8b60673..d8b949de 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/go_metrics.go +++ b/vendor/github.com/VictoriaMetrics/metrics/go_metrics.go @@ -3,41 +3,78 @@ package metrics import ( "fmt" "io" + "log" + "math" "runtime" + runtimemetrics "runtime/metrics" + "strings" "github.com/valyala/histogram" ) +// See https://pkg.go.dev/runtime/metrics#hdr-Supported_metrics +var runtimeMetrics = [][2]string{ + {"/sched/latencies:seconds", "go_sched_latencies_seconds"}, + {"/sync/mutex/wait/total:seconds", "go_mutex_wait_seconds_total"}, + {"/cpu/classes/gc/mark/assist:cpu-seconds", "go_gc_mark_assist_cpu_seconds_total"}, + {"/cpu/classes/gc/total:cpu-seconds", "go_gc_cpu_seconds_total"}, + {"/gc/pauses:seconds", "go_gc_pauses_seconds"}, + {"/cpu/classes/scavenge/total:cpu-seconds", "go_scavenge_cpu_seconds_total"}, + {"/gc/gomemlimit:bytes", "go_memlimit_bytes"}, +} + +var supportedRuntimeMetrics = initSupportedRuntimeMetrics(runtimeMetrics) + +func initSupportedRuntimeMetrics(rms [][2]string) [][2]string { + exposedMetrics := make(map[string]struct{}) + for _, d := range runtimemetrics.All() { + exposedMetrics[d.Name] = struct{}{} + } + var supportedMetrics [][2]string + for _, rm := range rms { + metricName := rm[0] + if _, ok := exposedMetrics[metricName]; ok { + supportedMetrics = append(supportedMetrics, rm) + } else { + log.Printf("github.com/VictoriaMetrics/metrics: do not expose %s metric, since the corresponding metric %s isn't supported in the current Go runtime", rm[1], metricName) + } + } + return supportedMetrics +} + func writeGoMetrics(w io.Writer) { + writeRuntimeMetrics(w) + var ms runtime.MemStats runtime.ReadMemStats(&ms) - fmt.Fprintf(w, "go_memstats_alloc_bytes %d\n", ms.Alloc) - fmt.Fprintf(w, "go_memstats_alloc_bytes_total %d\n", ms.TotalAlloc) - fmt.Fprintf(w, "go_memstats_buck_hash_sys_bytes %d\n", ms.BuckHashSys) - fmt.Fprintf(w, "go_memstats_frees_total %d\n", ms.Frees) - fmt.Fprintf(w, "go_memstats_gc_cpu_fraction %g\n", ms.GCCPUFraction) - fmt.Fprintf(w, "go_memstats_gc_sys_bytes %d\n", ms.GCSys) - fmt.Fprintf(w, "go_memstats_heap_alloc_bytes %d\n", ms.HeapAlloc) - fmt.Fprintf(w, "go_memstats_heap_idle_bytes %d\n", ms.HeapIdle) - fmt.Fprintf(w, "go_memstats_heap_inuse_bytes %d\n", ms.HeapInuse) - fmt.Fprintf(w, "go_memstats_heap_objects %d\n", ms.HeapObjects) - fmt.Fprintf(w, "go_memstats_heap_released_bytes %d\n", ms.HeapReleased) - fmt.Fprintf(w, "go_memstats_heap_sys_bytes %d\n", ms.HeapSys) - fmt.Fprintf(w, "go_memstats_last_gc_time_seconds %g\n", float64(ms.LastGC)/1e9) - fmt.Fprintf(w, "go_memstats_lookups_total %d\n", ms.Lookups) - fmt.Fprintf(w, "go_memstats_mallocs_total %d\n", ms.Mallocs) - fmt.Fprintf(w, "go_memstats_mcache_inuse_bytes %d\n", ms.MCacheInuse) - fmt.Fprintf(w, "go_memstats_mcache_sys_bytes %d\n", ms.MCacheSys) - fmt.Fprintf(w, "go_memstats_mspan_inuse_bytes %d\n", ms.MSpanInuse) - fmt.Fprintf(w, "go_memstats_mspan_sys_bytes %d\n", ms.MSpanSys) - fmt.Fprintf(w, "go_memstats_next_gc_bytes %d\n", ms.NextGC) - fmt.Fprintf(w, "go_memstats_other_sys_bytes %d\n", ms.OtherSys) - fmt.Fprintf(w, "go_memstats_stack_inuse_bytes %d\n", ms.StackInuse) - fmt.Fprintf(w, "go_memstats_stack_sys_bytes %d\n", ms.StackSys) - fmt.Fprintf(w, "go_memstats_sys_bytes %d\n", ms.Sys) - - fmt.Fprintf(w, "go_cgo_calls_count %d\n", runtime.NumCgoCall()) - fmt.Fprintf(w, "go_cpu_count %d\n", runtime.NumCPU()) + WriteGaugeUint64(w, "go_memstats_alloc_bytes", ms.Alloc) + WriteCounterUint64(w, "go_memstats_alloc_bytes_total", ms.TotalAlloc) + WriteGaugeUint64(w, "go_memstats_buck_hash_sys_bytes", ms.BuckHashSys) + WriteCounterUint64(w, "go_memstats_frees_total", ms.Frees) + WriteGaugeFloat64(w, "go_memstats_gc_cpu_fraction", ms.GCCPUFraction) + WriteGaugeUint64(w, "go_memstats_gc_sys_bytes", ms.GCSys) + + WriteGaugeUint64(w, "go_memstats_heap_alloc_bytes", ms.HeapAlloc) + WriteGaugeUint64(w, "go_memstats_heap_idle_bytes", ms.HeapIdle) + WriteGaugeUint64(w, "go_memstats_heap_inuse_bytes", ms.HeapInuse) + WriteGaugeUint64(w, "go_memstats_heap_objects", ms.HeapObjects) + WriteGaugeUint64(w, "go_memstats_heap_released_bytes", ms.HeapReleased) + WriteGaugeUint64(w, "go_memstats_heap_sys_bytes", ms.HeapSys) + WriteGaugeFloat64(w, "go_memstats_last_gc_time_seconds", float64(ms.LastGC)/1e9) + WriteCounterUint64(w, "go_memstats_lookups_total", ms.Lookups) + WriteCounterUint64(w, "go_memstats_mallocs_total", ms.Mallocs) + WriteGaugeUint64(w, "go_memstats_mcache_inuse_bytes", ms.MCacheInuse) + WriteGaugeUint64(w, "go_memstats_mcache_sys_bytes", ms.MCacheSys) + WriteGaugeUint64(w, "go_memstats_mspan_inuse_bytes", ms.MSpanInuse) + WriteGaugeUint64(w, "go_memstats_mspan_sys_bytes", ms.MSpanSys) + WriteGaugeUint64(w, "go_memstats_next_gc_bytes", ms.NextGC) + WriteGaugeUint64(w, "go_memstats_other_sys_bytes", ms.OtherSys) + WriteGaugeUint64(w, "go_memstats_stack_inuse_bytes", ms.StackInuse) + WriteGaugeUint64(w, "go_memstats_stack_sys_bytes", ms.StackSys) + WriteGaugeUint64(w, "go_memstats_sys_bytes", ms.Sys) + + WriteCounterUint64(w, "go_cgo_calls_count", uint64(runtime.NumCgoCall())) + WriteGaugeUint64(w, "go_cpu_count", uint64(runtime.NumCPU())) gcPauses := histogram.NewFast() for _, pauseNs := range ms.PauseNs[:] { @@ -45,20 +82,103 @@ func writeGoMetrics(w io.Writer) { } phis := []float64{0, 0.25, 0.5, 0.75, 1} quantiles := make([]float64, 0, len(phis)) + WriteMetadataIfNeeded(w, "go_gc_duration_seconds", "summary") for i, q := range gcPauses.Quantiles(quantiles[:0], phis) { fmt.Fprintf(w, `go_gc_duration_seconds{quantile="%g"} %g`+"\n", phis[i], q) } - fmt.Fprintf(w, `go_gc_duration_seconds_sum %g`+"\n", float64(ms.PauseTotalNs)/1e9) - fmt.Fprintf(w, `go_gc_duration_seconds_count %d`+"\n", ms.NumGC) - fmt.Fprintf(w, `go_gc_forced_count %d`+"\n", ms.NumForcedGC) + fmt.Fprintf(w, "go_gc_duration_seconds_sum %g\n", float64(ms.PauseTotalNs)/1e9) + fmt.Fprintf(w, "go_gc_duration_seconds_count %d\n", ms.NumGC) + + WriteCounterUint64(w, "go_gc_forced_count", uint64(ms.NumForcedGC)) - fmt.Fprintf(w, `go_gomaxprocs %d`+"\n", runtime.GOMAXPROCS(0)) - fmt.Fprintf(w, `go_goroutines %d`+"\n", runtime.NumGoroutine()) + WriteGaugeUint64(w, "go_gomaxprocs", uint64(runtime.GOMAXPROCS(0))) + WriteGaugeUint64(w, "go_goroutines", uint64(runtime.NumGoroutine())) numThread, _ := runtime.ThreadCreateProfile(nil) - fmt.Fprintf(w, `go_threads %d`+"\n", numThread) + WriteGaugeUint64(w, "go_threads", uint64(numThread)) // Export build details. + WriteMetadataIfNeeded(w, "go_info", "gauge") fmt.Fprintf(w, "go_info{version=%q} 1\n", runtime.Version()) + + WriteMetadataIfNeeded(w, "go_info_ext", "gauge") fmt.Fprintf(w, "go_info_ext{compiler=%q, GOARCH=%q, GOOS=%q, GOROOT=%q} 1\n", runtime.Compiler, runtime.GOARCH, runtime.GOOS, runtime.GOROOT()) } + +func writeRuntimeMetrics(w io.Writer) { + samples := make([]runtimemetrics.Sample, len(supportedRuntimeMetrics)) + for i, rm := range supportedRuntimeMetrics { + samples[i].Name = rm[0] + } + runtimemetrics.Read(samples) + for i, rm := range supportedRuntimeMetrics { + writeRuntimeMetric(w, rm[1], &samples[i]) + } +} + +func writeRuntimeMetric(w io.Writer, name string, sample *runtimemetrics.Sample) { + kind := sample.Value.Kind() + switch kind { + case runtimemetrics.KindBad: + panic(fmt.Errorf("BUG: unexpected runtimemetrics.KindBad for sample.Name=%q", sample.Name)) + case runtimemetrics.KindUint64: + v := sample.Value.Uint64() + if strings.HasSuffix(name, "_total") { + WriteCounterUint64(w, name, v) + } else { + WriteGaugeUint64(w, name, v) + } + case runtimemetrics.KindFloat64: + v := sample.Value.Float64() + if isCounterName(name) { + WriteCounterFloat64(w, name, v) + } else { + WriteGaugeFloat64(w, name, v) + } + case runtimemetrics.KindFloat64Histogram: + h := sample.Value.Float64Histogram() + writeRuntimeHistogramMetric(w, name, h) + default: + panic(fmt.Errorf("unexpected metric kind=%d", kind)) + } +} + +func writeRuntimeHistogramMetric(w io.Writer, name string, h *runtimemetrics.Float64Histogram) { + buckets := h.Buckets + counts := h.Counts + if len(buckets) != len(counts)+1 { + panic(fmt.Errorf("the number of buckets must be bigger than the number of counts by 1 in histogram %s; got buckets=%d, counts=%d", name, len(buckets), len(counts))) + } + tailCount := uint64(0) + if strings.HasSuffix(name, "_seconds") { + // Limit the maximum bucket to 1 second, since Go runtime exposes buckets with 10K seconds, + // which have little sense. At the same time such buckets may lead to high cardinality issues + // at the scraper side. + for len(buckets) > 0 && buckets[len(buckets)-1] > 1 { + buckets = buckets[:len(buckets)-1] + tailCount += counts[len(counts)-1] + counts = counts[:len(counts)-1] + } + } + + iStep := float64(len(buckets)) / maxRuntimeHistogramBuckets + + totalCount := uint64(0) + iNext := 0.0 + WriteMetadataIfNeeded(w, name, "histogram") + for i, count := range counts { + totalCount += count + if float64(i) >= iNext { + iNext += iStep + le := buckets[i+1] + if !math.IsInf(le, 1) { + fmt.Fprintf(w, `%s_bucket{le="%g"} %d`+"\n", name, le, totalCount) + } + } + } + totalCount += tailCount + fmt.Fprintf(w, `%s_bucket{le="+Inf"} %d`+"\n", name, totalCount) +} + +// Limit the number of buckets for Go runtime histograms in order to prevent from high cardinality issues at scraper side. +const maxRuntimeHistogramBuckets = 30 diff --git a/vendor/github.com/VictoriaMetrics/metrics/histogram.go b/vendor/github.com/VictoriaMetrics/metrics/histogram.go index a5766817..d703ae82 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/histogram.go +++ b/vendor/github.com/VictoriaMetrics/metrics/histogram.go @@ -36,7 +36,7 @@ var bucketMultiplier = math.Pow(10, 1.0/bucketsPerDecimal) // // Histogram buckets can be converted to Prometheus-like buckets with `le` labels // with `prometheus_buckets(_bucket)` function from PromQL extensions in VictoriaMetrics. -// (see https://github.com/VictoriaMetrics/VictoriaMetrics/wiki/MetricsQL ): +// (see https://docs.victoriametrics.com/metricsql/ ): // // prometheus_buckets(request_duration_bucket) // @@ -47,13 +47,21 @@ var bucketMultiplier = math.Pow(10, 1.0/bucketsPerDecimal) // Zero histogram is usable. type Histogram struct { // Mu gurantees synchronous update for all the counters and sum. + // + // Do not use sync.RWMutex, since it has zero sense from performance PoV. + // It only complicates the code. mu sync.Mutex + // decimalBuckets contains counters for histogram buckets decimalBuckets [decimalBucketsCount]*[bucketsPerDecimal]uint64 + // lower is the number of values, which hit the lower bucket lower uint64 + + // upper is the number of values, which hit the upper bucket upper uint64 + // sum is the sum of all the values put into Histogram sum float64 } @@ -109,6 +117,34 @@ func (h *Histogram) Update(v float64) { h.mu.Unlock() } +// Merge merges src to h +func (h *Histogram) Merge(src *Histogram) { + h.mu.Lock() + defer h.mu.Unlock() + + src.mu.Lock() + defer src.mu.Unlock() + + h.lower += src.lower + h.upper += src.upper + h.sum += src.sum + + for i, dbSrc := range src.decimalBuckets { + if dbSrc == nil { + continue + } + dbDst := h.decimalBuckets[i] + if dbDst == nil { + var b [bucketsPerDecimal]uint64 + dbDst = &b + h.decimalBuckets[i] = dbDst + } + for j := range dbSrc { + dbDst[j] += dbSrc[j] + } + } +} + // VisitNonZeroBuckets calls f for all buckets with non-zero counters. // // vmrange contains "..." string with bucket bounds. The lower bound @@ -228,3 +264,7 @@ func (h *Histogram) getSum() float64 { h.mu.Unlock() return sum } + +func (h *Histogram) metricType() string { + return "histogram" +} diff --git a/vendor/github.com/VictoriaMetrics/metrics/metrics.go b/vendor/github.com/VictoriaMetrics/metrics/metrics.go index 7dfa9721..74e97352 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/metrics.go +++ b/vendor/github.com/VictoriaMetrics/metrics/metrics.go @@ -13,9 +13,12 @@ package metrics import ( + "fmt" "io" "sort" + "strings" "sync" + "sync/atomic" "unsafe" ) @@ -27,6 +30,7 @@ type namedMetric struct { type metric interface { marshalTo(prefix string, w io.Writer) + metricType() string } var defaultSet = NewSet() @@ -51,16 +55,33 @@ func RegisterSet(s *Set) { // UnregisterSet stops exporting metrics for the given s via global WritePrometheus() call. // -// Call s.UnregisterAllMetrics() after unregistering s if it is no longer used. -func UnregisterSet(s *Set) { +// If destroySet is set to true, then s.UnregisterAllMetrics() is called on s after unregistering it, +// so s becomes destroyed. Otherwise the s can be registered again in the set by passing it to RegisterSet(). +func UnregisterSet(s *Set, destroySet bool) { registeredSetsLock.Lock() delete(registeredSets, s) registeredSetsLock.Unlock() + + if destroySet { + s.UnregisterAllMetrics() + } } -// WritePrometheus writes all the metrics from default set and all the registered sets in Prometheus format to w. +// RegisterMetricsWriter registers writeMetrics callback for including metrics in the output generated by WritePrometheus. +// +// The writeMetrics callback must write metrics to w in Prometheus text exposition format without timestamps and trailing comments. +// The last line generated by writeMetrics must end with \n. +// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format +// +// It is OK to register multiple writeMetrics callbacks - all of them will be called sequentially for gererating the output at WritePrometheus. +func RegisterMetricsWriter(writeMetrics func(w io.Writer)) { + defaultSet.RegisterMetricsWriter(writeMetrics) +} + +// WritePrometheus writes all the metrics in Prometheus format from the default set, all the added sets and metrics writers to w. // // Additional sets can be registered via RegisterSet() call. +// Additional metric writers can be registered via RegisterMetricsWriter() call. // // If exposeProcessMetrics is true, then various `go_*` and `process_*` metrics // are exposed for the current process. @@ -134,10 +155,26 @@ func WritePrometheus(w io.Writer, exposeProcessMetrics bool) { // // - process_io_storage_written_bytes_total - the number of bytes actually written to disk // +// - go_sched_latencies_seconds - time spent by goroutines in ready state before they start execution +// +// - go_mutex_wait_seconds_total - summary time spent by all the goroutines while waiting for locked mutex +// +// - go_gc_mark_assist_cpu_seconds_total - summary CPU time spent by goroutines in GC mark assist state +// +// - go_gc_cpu_seconds_total - summary time spent in GC +// +// - go_gc_pauses_seconds - duration of GC pauses +// +// - go_scavenge_cpu_seconds_total - CPU time spent on returning the memory to OS +// +// - go_memlimit_bytes - the GOMEMLIMIT env var value +// // - go_memstats_alloc_bytes - memory usage for Go objects in the heap // // - go_memstats_alloc_bytes_total - the cumulative counter for total size of allocated Go objects // +// - go_memstats_buck_hash_sys_bytes - bytes of memory in profiling bucket hash tables +// // - go_memstats_frees_total - the cumulative counter for number of freed Go objects // // - go_memstats_gc_cpu_fraction - the fraction of CPU spent in Go garbage collector @@ -148,20 +185,42 @@ func WritePrometheus(w io.Writer, exposeProcessMetrics bool) { // // - go_memstats_heap_idle_bytes - idle memory ready for new Go object allocations // +// - go_memstats_heap_inuse_bytes - bytes in in-use spans +// // - go_memstats_heap_objects - the number of Go objects in the heap // +// - go_memstats_heap_released_bytes - bytes of physical memory returned to the OS +// // - go_memstats_heap_sys_bytes - memory requested for Go objects from the OS // +// - go_memstats_last_gc_time_seconds - unix timestamp the last garbage collection finished +// +// - go_memstats_lookups_total - the number of pointer lookups performed by the runtime +// // - go_memstats_mallocs_total - the number of allocations for Go objects // +// - go_memstats_mcache_inuse_bytes - bytes of allocated mcache structures +// +// - go_memstats_mcache_sys_bytes - bytes of memory obtained from the OS for mcache structures +// +// - go_memstats_mspan_inuse_bytes - bytes of allocated mspan structures +// +// - go_memstats_mspan_sys_bytes - bytes of memory obtained from the OS for mspan structures +// // - go_memstats_next_gc_bytes - the target heap size when the next garbage collection should start // +// - go_memstats_other_sys_bytes - bytes of memory in miscellaneous off-heap runtime allocations +// // - go_memstats_stack_inuse_bytes - memory used for goroutine stacks // // - go_memstats_stack_sys_bytes - memory requested fromthe OS for goroutine stacks // // - go_memstats_sys_bytes - memory requested by Go runtime from the OS // +// - go_cgo_calls_count - the total number of CGO calls +// +// - go_cpu_count - the number of CPU cores on the host where the app runs +// // The WriteProcessMetrics func is usually called in combination with writing Set metrics // inside "/metrics" handler: // @@ -170,7 +229,7 @@ func WritePrometheus(w io.Writer, exposeProcessMetrics bool) { // metrics.WriteProcessMetrics(w) // }) // -// See also WrteFDMetrics. +// See also WriteFDMetrics. func WriteProcessMetrics(w io.Writer) { writeGoMetrics(w) writeProcessMetrics(w) @@ -190,6 +249,8 @@ func UnregisterMetric(name string) bool { } // UnregisterAllMetrics unregisters all the metrics from default set. +// +// It also unregisters writeMetrics callbacks passed to RegisterMetricsWriter. func UnregisterAllMetrics() { defaultSet.UnregisterAllMetrics() } @@ -203,3 +264,76 @@ func ListMetricNames() []string { func GetDefaultSet() *Set { return defaultSet } + +// ExposeMetadata allows enabling adding TYPE and HELP metadata to the exposed metrics globally. +// +// It is safe to call this method multiple times. It is allowed to change it in runtime. +// ExposeMetadata is set to false by default. +func ExposeMetadata(v bool) { + n := 0 + if v { + n = 1 + } + atomic.StoreUint32(&exposeMetadata, uint32(n)) +} + +func isMetadataEnabled() bool { + n := atomic.LoadUint32(&exposeMetadata) + return n != 0 +} + +var exposeMetadata uint32 + +func isCounterName(name string) bool { + return strings.HasSuffix(name, "_total") +} + +// WriteGaugeUint64 writes gauge metric with the given name and value to w in Prometheus text exposition format. +func WriteGaugeUint64(w io.Writer, name string, value uint64) { + writeMetricUint64(w, name, "gauge", value) +} + +// WriteGaugeFloat64 writes gauge metric with the given name and value to w in Prometheus text exposition format. +func WriteGaugeFloat64(w io.Writer, name string, value float64) { + writeMetricFloat64(w, name, "gauge", value) +} + +// WriteCounterUint64 writes counter metric with the given name and value to w in Prometheus text exposition format. +func WriteCounterUint64(w io.Writer, name string, value uint64) { + writeMetricUint64(w, name, "counter", value) +} + +// WriteCounterFloat64 writes counter metric with the given name and value to w in Prometheus text exposition format. +func WriteCounterFloat64(w io.Writer, name string, value float64) { + writeMetricFloat64(w, name, "counter", value) +} + +func writeMetricUint64(w io.Writer, metricName, metricType string, value uint64) { + WriteMetadataIfNeeded(w, metricName, metricType) + fmt.Fprintf(w, "%s %d\n", metricName, value) +} + +func writeMetricFloat64(w io.Writer, metricName, metricType string, value float64) { + WriteMetadataIfNeeded(w, metricName, metricType) + fmt.Fprintf(w, "%s %g\n", metricName, value) +} + +// WriteMetadataIfNeeded writes HELP and TYPE metadata for the given metricName and metricType if this is globally enabled via ExposeMetadata(). +// +// If the metadata exposition isn't enabled, then this function is no-op. +func WriteMetadataIfNeeded(w io.Writer, metricName, metricType string) { + if !isMetadataEnabled() { + return + } + metricFamily := getMetricFamily(metricName) + fmt.Fprintf(w, "# HELP %s\n", metricFamily) + fmt.Fprintf(w, "# TYPE %s %s\n", metricFamily, metricType) +} + +func getMetricFamily(metricName string) string { + n := strings.IndexByte(metricName, '{') + if n < 0 { + return metricName + } + return metricName[:n] +} diff --git a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go index 48def1cb..e4587b71 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go +++ b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go @@ -16,6 +16,11 @@ import ( // See https://github.com/prometheus/procfs/blob/a4ac0826abceb44c40fc71daed2b301db498b93e/proc_stat.go#L40 . const userHZ = 100 +// Different environments may have different page size. +// +// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6457 +var pageSizeBytes = uint64(os.Getpagesize()) + // See http://man7.org/linux/man-pages/man5/proc.5.html type procStat struct { State byte @@ -74,15 +79,15 @@ func writeProcessMetrics(w io.Writer) { utime := float64(p.Utime) / userHZ stime := float64(p.Stime) / userHZ - fmt.Fprintf(w, "process_cpu_seconds_system_total %g\n", stime) - fmt.Fprintf(w, "process_cpu_seconds_total %g\n", utime+stime) - fmt.Fprintf(w, "process_cpu_seconds_user_total %g\n", utime) - fmt.Fprintf(w, "process_major_pagefaults_total %d\n", p.Majflt) - fmt.Fprintf(w, "process_minor_pagefaults_total %d\n", p.Minflt) - fmt.Fprintf(w, "process_num_threads %d\n", p.NumThreads) - fmt.Fprintf(w, "process_resident_memory_bytes %d\n", p.Rss*4096) - fmt.Fprintf(w, "process_start_time_seconds %d\n", startTimeSeconds) - fmt.Fprintf(w, "process_virtual_memory_bytes %d\n", p.Vsize) + WriteCounterFloat64(w, "process_cpu_seconds_system_total", stime) + WriteCounterFloat64(w, "process_cpu_seconds_total", utime+stime) + WriteCounterFloat64(w, "process_cpu_seconds_user_total", utime) + WriteCounterUint64(w, "process_major_pagefaults_total", uint64(p.Majflt)) + WriteCounterUint64(w, "process_minor_pagefaults_total", uint64(p.Minflt)) + WriteGaugeUint64(w, "process_num_threads", uint64(p.NumThreads)) + WriteGaugeUint64(w, "process_resident_memory_bytes", uint64(p.Rss)*pageSizeBytes) + WriteGaugeUint64(w, "process_start_time_seconds", uint64(startTimeSeconds)) + WriteGaugeUint64(w, "process_virtual_memory_bytes", uint64(p.Vsize)) writeProcessMemMetrics(w) writeIOMetrics(w) } @@ -133,12 +138,12 @@ func writeIOMetrics(w io.Writer) { writeBytes = getInt(s) } } - fmt.Fprintf(w, "process_io_read_bytes_total %d\n", rchar) - fmt.Fprintf(w, "process_io_written_bytes_total %d\n", wchar) - fmt.Fprintf(w, "process_io_read_syscalls_total %d\n", syscr) - fmt.Fprintf(w, "process_io_write_syscalls_total %d\n", syscw) - fmt.Fprintf(w, "process_io_storage_read_bytes_total %d\n", readBytes) - fmt.Fprintf(w, "process_io_storage_written_bytes_total %d\n", writeBytes) + WriteGaugeUint64(w, "process_io_read_bytes_total", uint64(rchar)) + WriteGaugeUint64(w, "process_io_written_bytes_total", uint64(wchar)) + WriteGaugeUint64(w, "process_io_read_syscalls_total", uint64(syscr)) + WriteGaugeUint64(w, "process_io_write_syscalls_total", uint64(syscw)) + WriteGaugeUint64(w, "process_io_storage_read_bytes_total", uint64(readBytes)) + WriteGaugeUint64(w, "process_io_storage_written_bytes_total", uint64(writeBytes)) } var startTimeSeconds = time.Now().Unix() @@ -155,8 +160,8 @@ func writeFDMetrics(w io.Writer) { log.Printf("ERROR: metrics: cannot determine the limit on open file descritors: %s", err) return } - fmt.Fprintf(w, "process_max_fds %d\n", maxOpenFDs) - fmt.Fprintf(w, "process_open_fds %d\n", totalOpenFDs) + WriteGaugeUint64(w, "process_max_fds", maxOpenFDs) + WriteGaugeUint64(w, "process_open_fds", totalOpenFDs) } func getOpenFDsCount(path string) (uint64, error) { @@ -224,11 +229,11 @@ func writeProcessMemMetrics(w io.Writer) { log.Printf("ERROR: metrics: cannot determine memory status: %s", err) return } - fmt.Fprintf(w, "process_virtual_memory_peak_bytes %d\n", ms.vmPeak) - fmt.Fprintf(w, "process_resident_memory_peak_bytes %d\n", ms.rssPeak) - fmt.Fprintf(w, "process_resident_memory_anon_bytes %d\n", ms.rssAnon) - fmt.Fprintf(w, "process_resident_memory_file_bytes %d\n", ms.rssFile) - fmt.Fprintf(w, "process_resident_memory_shared_bytes %d\n", ms.rssShmem) + WriteGaugeUint64(w, "process_virtual_memory_peak_bytes", ms.vmPeak) + WriteGaugeUint64(w, "process_resident_memory_peak_bytes", ms.rssPeak) + WriteGaugeUint64(w, "process_resident_memory_anon_bytes", ms.rssAnon) + WriteGaugeUint64(w, "process_resident_memory_file_bytes", ms.rssFile) + WriteGaugeUint64(w, "process_resident_memory_shared_bytes", ms.rssShmem) } diff --git a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_windows.go b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_windows.go index e824ada9..bda7c82f 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/process_metrics_windows.go +++ b/vendor/github.com/VictoriaMetrics/metrics/process_metrics_windows.go @@ -4,7 +4,6 @@ package metrics import ( - "fmt" "io" "log" "syscall" @@ -55,16 +54,16 @@ func writeProcessMetrics(w io.Writer) { log.Printf("ERROR: metrics: cannot read process memory information: %s", err) return } - stimeSeconds := (uint64(stime.HighDateTime)<<32 + uint64(stime.LowDateTime)) / 1e7 - utimeSeconds := (uint64(utime.HighDateTime)<<32 + uint64(utime.LowDateTime)) / 1e7 - fmt.Fprintf(w, "process_cpu_seconds_system_total %d\n", stimeSeconds) - fmt.Fprintf(w, "process_cpu_seconds_total %d\n", stimeSeconds+utimeSeconds) - fmt.Fprintf(w, "process_cpu_seconds_user_total %d\n", stimeSeconds) - fmt.Fprintf(w, "process_pagefaults_total %d\n", mc.PageFaultCount) - fmt.Fprintf(w, "process_start_time_seconds %d\n", startTime.Nanoseconds()/1e9) - fmt.Fprintf(w, "process_virtual_memory_bytes %d\n", mc.PrivateUsage) - fmt.Fprintf(w, "process_resident_memory_peak_bytes %d\n", mc.PeakWorkingSetSize) - fmt.Fprintf(w, "process_resident_memory_bytes %d\n", mc.WorkingSetSize) + stimeSeconds := float64(uint64(stime.HighDateTime)<<32+uint64(stime.LowDateTime)) / 1e7 + utimeSeconds := float64(uint64(utime.HighDateTime)<<32+uint64(utime.LowDateTime)) / 1e7 + WriteCounterFloat64(w, "process_cpu_seconds_system_total", stimeSeconds) + WriteCounterFloat64(w, "process_cpu_seconds_total", stimeSeconds+utimeSeconds) + WriteCounterFloat64(w, "process_cpu_seconds_user_total", stimeSeconds) + WriteCounterUint64(w, "process_pagefaults_total", uint64(mc.PageFaultCount)) + WriteGaugeUint64(w, "process_start_time_seconds", uint64(startTime.Nanoseconds())/1e9) + WriteGaugeUint64(w, "process_virtual_memory_bytes", uint64(mc.PrivateUsage)) + WriteGaugeUint64(w, "process_resident_memory_peak_bytes", uint64(mc.PeakWorkingSetSize)) + WriteGaugeUint64(w, "process_resident_memory_bytes", uint64(mc.WorkingSetSize)) } func writeFDMetrics(w io.Writer) { @@ -80,6 +79,6 @@ func writeFDMetrics(w io.Writer) { } // it seems to be hard-coded limit for 64-bit systems // https://learn.microsoft.com/en-us/archive/blogs/markrussinovich/pushing-the-limits-of-windows-handles#maximum-number-of-handles - fmt.Fprintf(w, "process_max_fds %d\n", 16777216) - fmt.Fprintf(w, "process_open_fds %d\n", count) + WriteGaugeUint64(w, "process_max_fds", 16777216) + WriteGaugeUint64(w, "process_open_fds", uint64(count)) } diff --git a/vendor/github.com/VictoriaMetrics/metrics/push.go b/vendor/github.com/VictoriaMetrics/metrics/push.go index 4215f48a..f33886f9 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/push.go +++ b/vendor/github.com/VictoriaMetrics/metrics/push.go @@ -2,17 +2,69 @@ package metrics import ( "bytes" + "context" + "errors" "fmt" "io" "io/ioutil" "log" "net/http" "net/url" + "strings" + "sync" "time" "compress/gzip" ) +// PushOptions is the list of options, which may be applied to InitPushWithOptions(). +type PushOptions struct { + // ExtraLabels is an optional comma-separated list of `label="value"` labels, which must be added to all the metrics before pushing them to pushURL. + ExtraLabels string + + // Headers is an optional list of HTTP headers to add to every push request to pushURL. + // + // Every item in the list must have the form `Header: value`. For example, `Authorization: Custom my-top-secret`. + Headers []string + + // Whether to disable HTTP request body compression before sending the metrics to pushURL. + // + // By default the compression is enabled. + DisableCompression bool + + // Method is HTTP request method to use when pushing metrics to pushURL. + // + // By default the Method is GET. + Method string + + // Optional WaitGroup for waiting until all the push workers created with this WaitGroup are stopped. + WaitGroup *sync.WaitGroup +} + +// InitPushWithOptions sets up periodic push for globally registered metrics to the given pushURL with the given interval. +// +// The periodic push is stopped when ctx is canceled. +// It is possible to wait until the background metrics push worker is stopped on a WaitGroup passed via opts.WaitGroup. +// +// If pushProcessMetrics is set to true, then 'process_*' and `go_*` metrics are also pushed to pushURL. +// +// opts may contain additional configuration options if non-nil. +// +// The metrics are pushed to pushURL in Prometheus text exposition format. +// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format +// +// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to +// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format +// +// It is OK calling InitPushWithOptions multiple times with different pushURL - +// in this case metrics are pushed to all the provided pushURL urls. +func InitPushWithOptions(ctx context.Context, pushURL string, interval time.Duration, pushProcessMetrics bool, opts *PushOptions) error { + writeMetrics := func(w io.Writer) { + WritePrometheus(w, pushProcessMetrics) + } + return InitPushExtWithOptions(ctx, pushURL, interval, writeMetrics, opts) +} + // InitPushProcessMetrics sets up periodic push for 'process_*' metrics to the given pushURL with the given interval. // // extraLabels may contain comma-separated list of `label="value"` labels, which will be added @@ -27,10 +79,7 @@ import ( // It is OK calling InitPushProcessMetrics multiple times with different pushURL - // in this case metrics are pushed to all the provided pushURL urls. func InitPushProcessMetrics(pushURL string, interval time.Duration, extraLabels string) error { - writeMetrics := func(w io.Writer) { - WriteProcessMetrics(w) - } - return InitPushExt(pushURL, interval, extraLabels, writeMetrics) + return InitPushExt(pushURL, interval, extraLabels, WriteProcessMetrics) } // InitPush sets up periodic push for globally registered metrics to the given pushURL with the given interval. @@ -38,7 +87,7 @@ func InitPushProcessMetrics(pushURL string, interval time.Duration, extraLabels // extraLabels may contain comma-separated list of `label="value"` labels, which will be added // to all the metrics before pushing them to pushURL. // -// If pushProcessMetrics is set to true, then 'process_*' metrics are also pushed to pushURL. +// If pushProcessMetrics is set to true, then 'process_*' and `go_*` metrics are also pushed to pushURL. // // The metrics are pushed to pushURL in Prometheus text exposition format. // See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format @@ -55,12 +104,46 @@ func InitPush(pushURL string, interval time.Duration, extraLabels string, pushPr return InitPushExt(pushURL, interval, extraLabels, writeMetrics) } +// PushMetrics pushes globally registered metrics to pushURL. +// +// If pushProcessMetrics is set to true, then 'process_*' and `go_*` metrics are also pushed to pushURL. +// +// opts may contain additional configuration options if non-nil. +// +// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to +// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format +func PushMetrics(ctx context.Context, pushURL string, pushProcessMetrics bool, opts *PushOptions) error { + writeMetrics := func(w io.Writer) { + WritePrometheus(w, pushProcessMetrics) + } + return PushMetricsExt(ctx, pushURL, writeMetrics, opts) +} + +// InitPushWithOptions sets up periodic push for metrics from s to the given pushURL with the given interval. +// +// The periodic push is stopped when the ctx is canceled. +// It is possible to wait until the background metrics push worker is stopped on a WaitGroup passed via opts.WaitGroup. +// +// opts may contain additional configuration options if non-nil. +// +// The metrics are pushed to pushURL in Prometheus text exposition format. +// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format +// +// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to +// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format +// +// It is OK calling InitPushWithOptions multiple times with different pushURL - +// in this case metrics are pushed to all the provided pushURL urls. +func (s *Set) InitPushWithOptions(ctx context.Context, pushURL string, interval time.Duration, opts *PushOptions) error { + return InitPushExtWithOptions(ctx, pushURL, interval, s.WritePrometheus, opts) +} + // InitPush sets up periodic push for metrics from s to the given pushURL with the given interval. // // extraLabels may contain comma-separated list of `label="value"` labels, which will be added // to all the metrics before pushing them to pushURL. // -// / The metrics are pushed to pushURL in Prometheus text exposition format. +// The metrics are pushed to pushURL in Prometheus text exposition format. // See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format // // It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to @@ -69,10 +152,17 @@ func InitPush(pushURL string, interval time.Duration, extraLabels string, pushPr // It is OK calling InitPush multiple times with different pushURL - // in this case metrics are pushed to all the provided pushURL urls. func (s *Set) InitPush(pushURL string, interval time.Duration, extraLabels string) error { - writeMetrics := func(w io.Writer) { - s.WritePrometheus(w) - } - return InitPushExt(pushURL, interval, extraLabels, writeMetrics) + return InitPushExt(pushURL, interval, extraLabels, s.WritePrometheus) +} + +// PushMetrics pushes s metrics to pushURL. +// +// opts may contain additional configuration options if non-nil. +// +// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to +// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format +func (s *Set) PushMetrics(ctx context.Context, pushURL string, opts *PushOptions) error { + return PushMetricsExt(ctx, pushURL, s.WritePrometheus, opts) } // InitPushExt sets up periodic push for metrics obtained by calling writeMetrics with the given interval. @@ -90,94 +180,246 @@ func (s *Set) InitPush(pushURL string, interval time.Duration, extraLabels strin // in this case metrics are pushed to all the provided pushURL urls. // // It is OK calling InitPushExt multiple times with different writeMetrics - -// in this case all the metrics generated by writeMetrics callbacks are writte to pushURL. +// in this case all the metrics generated by writeMetrics callbacks are written to pushURL. func InitPushExt(pushURL string, interval time.Duration, extraLabels string, writeMetrics func(w io.Writer)) error { + opts := &PushOptions{ + ExtraLabels: extraLabels, + } + return InitPushExtWithOptions(context.Background(), pushURL, interval, writeMetrics, opts) +} + +// InitPushExtWithOptions sets up periodic push for metrics obtained by calling writeMetrics with the given interval. +// +// The writeMetrics callback must write metrics to w in Prometheus text exposition format without timestamps and trailing comments. +// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format +// +// The periodic push is stopped when the ctx is canceled. +// It is possible to wait until the background metrics push worker is stopped on a WaitGroup passed via opts.WaitGroup. +// +// opts may contain additional configuration options if non-nil. +// +// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to +// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format +// +// It is OK calling InitPushExtWithOptions multiple times with different pushURL - +// in this case metrics are pushed to all the provided pushURL urls. +// +// It is OK calling InitPushExtWithOptions multiple times with different writeMetrics - +// in this case all the metrics generated by writeMetrics callbacks are written to pushURL. +func InitPushExtWithOptions(ctx context.Context, pushURL string, interval time.Duration, writeMetrics func(w io.Writer), opts *PushOptions) error { + pc, err := newPushContext(pushURL, opts) + if err != nil { + return err + } + + // validate interval if interval <= 0 { return fmt.Errorf("interval must be positive; got %s", interval) } - if err := validateTags(extraLabels); err != nil { - return fmt.Errorf("invalid extraLabels=%q: %w", extraLabels, err) + pushMetricsSet.GetOrCreateFloatCounter(fmt.Sprintf(`metrics_push_interval_seconds{url=%q}`, pc.pushURLRedacted)).Set(interval.Seconds()) + + var wg *sync.WaitGroup + if opts != nil { + wg = opts.WaitGroup + if wg != nil { + wg.Add(1) + } + } + go func() { + ticker := time.NewTicker(interval) + defer ticker.Stop() + stopCh := ctx.Done() + for { + select { + case <-ticker.C: + ctxLocal, cancel := context.WithTimeout(ctx, interval+time.Second) + err := pc.pushMetrics(ctxLocal, writeMetrics) + cancel() + if err != nil { + log.Printf("ERROR: metrics.push: %s", err) + } + case <-stopCh: + if wg != nil { + wg.Done() + } + return + } + } + }() + + return nil +} + +// PushMetricsExt pushes metrics generated by wirteMetrics to pushURL. +// +// The writeMetrics callback must write metrics to w in Prometheus text exposition format without timestamps and trailing comments. +// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format +// +// opts may contain additional configuration options if non-nil. +// +// It is recommended pushing metrics to /api/v1/import/prometheus endpoint according to +// https://docs.victoriametrics.com/#how-to-import-data-in-prometheus-exposition-format +func PushMetricsExt(ctx context.Context, pushURL string, writeMetrics func(w io.Writer), opts *PushOptions) error { + pc, err := newPushContext(pushURL, opts) + if err != nil { + return err } + return pc.pushMetrics(ctx, writeMetrics) +} + +type pushContext struct { + pushURL *url.URL + method string + pushURLRedacted string + extraLabels string + headers http.Header + disableCompression bool + + client *http.Client + + pushesTotal *Counter + bytesPushedTotal *Counter + pushBlockSize *Histogram + pushDuration *Histogram + pushErrors *Counter +} + +func newPushContext(pushURL string, opts *PushOptions) (*pushContext, error) { + if opts == nil { + opts = &PushOptions{} + } + + // validate pushURL pu, err := url.Parse(pushURL) if err != nil { - return fmt.Errorf("cannot parse pushURL=%q: %w", pushURL, err) + return nil, fmt.Errorf("cannot parse pushURL=%q: %w", pushURL, err) } if pu.Scheme != "http" && pu.Scheme != "https" { - return fmt.Errorf("unsupported scheme in pushURL=%q; expecting 'http' or 'https'", pushURL) + return nil, fmt.Errorf("unsupported scheme in pushURL=%q; expecting 'http' or 'https'", pushURL) } if pu.Host == "" { - return fmt.Errorf("missing host in pushURL=%q", pushURL) + return nil, fmt.Errorf("missing host in pushURL=%q", pushURL) + } + + method := opts.Method + if method == "" { + method = http.MethodGet + } + + // validate ExtraLabels + extraLabels := opts.ExtraLabels + if err := validateTags(extraLabels); err != nil { + return nil, fmt.Errorf("invalid extraLabels=%q: %w", extraLabels, err) + } + + // validate Headers + headers := make(http.Header) + for _, h := range opts.Headers { + n := strings.IndexByte(h, ':') + if n < 0 { + return nil, fmt.Errorf("missing `:` delimiter in the header %q", h) + } + name := strings.TrimSpace(h[:n]) + value := strings.TrimSpace(h[n+1:]) + headers.Add(name, value) } + pushURLRedacted := pu.Redacted() - c := &http.Client{ - Timeout: interval, - } - pushesTotal := pushMetrics.GetOrCreateCounter(fmt.Sprintf(`metrics_push_total{url=%q}`, pushURLRedacted)) - pushErrorsTotal := pushMetrics.GetOrCreateCounter(fmt.Sprintf(`metrics_push_errors_total{url=%q}`, pushURLRedacted)) - bytesPushedTotal := pushMetrics.GetOrCreateCounter(fmt.Sprintf(`metrics_push_bytes_pushed_total{url=%q}`, pushURLRedacted)) - pushDuration := pushMetrics.GetOrCreateHistogram(fmt.Sprintf(`metrics_push_duration_seconds{url=%q}`, pushURLRedacted)) - pushBlockSize := pushMetrics.GetOrCreateHistogram(fmt.Sprintf(`metrics_push_block_size_bytes{url=%q}`, pushURLRedacted)) - pushMetrics.GetOrCreateFloatCounter(fmt.Sprintf(`metrics_push_interval_seconds{url=%q}`, pushURLRedacted)).Set(interval.Seconds()) - go func() { - ticker := time.NewTicker(interval) - var bb bytes.Buffer - var tmpBuf []byte - zw := gzip.NewWriter(&bb) - for range ticker.C { - bb.Reset() - writeMetrics(&bb) - if len(extraLabels) > 0 { - tmpBuf = addExtraLabels(tmpBuf[:0], bb.Bytes(), extraLabels) - bb.Reset() - if _, err := bb.Write(tmpBuf); err != nil { - panic(fmt.Errorf("BUG: cannot write %d bytes to bytes.Buffer: %s", len(tmpBuf), err)) - } - } - tmpBuf = append(tmpBuf[:0], bb.Bytes()...) - bb.Reset() - zw.Reset(&bb) - if _, err := zw.Write(tmpBuf); err != nil { - panic(fmt.Errorf("BUG: cannot write %d bytes to gzip writer: %s", len(tmpBuf), err)) - } - if err := zw.Close(); err != nil { - panic(fmt.Errorf("BUG: cannot flush metrics to gzip writer: %s", err)) - } - pushesTotal.Inc() - blockLen := bb.Len() - bytesPushedTotal.Add(blockLen) - pushBlockSize.Update(float64(blockLen)) - req, err := http.NewRequest("GET", pushURL, &bb) - if err != nil { - panic(fmt.Errorf("BUG: metrics.push: cannot initialize request for metrics push to %q: %w", pushURLRedacted, err)) - } - req.Header.Set("Content-Type", "text/plain") - req.Header.Set("Content-Encoding", "gzip") - startTime := time.Now() - resp, err := c.Do(req) - pushDuration.UpdateDuration(startTime) - if err != nil { - log.Printf("ERROR: metrics.push: cannot push metrics to %q: %s", pushURLRedacted, err) - pushErrorsTotal.Inc() - continue - } - if resp.StatusCode/100 != 2 { - body, _ := ioutil.ReadAll(resp.Body) - _ = resp.Body.Close() - log.Printf("ERROR: metrics.push: unexpected status code in response from %q: %d; expecting 2xx; response body: %q", - pushURLRedacted, resp.StatusCode, body) - pushErrorsTotal.Inc() - continue - } - _ = resp.Body.Close() + client := &http.Client{} + return &pushContext{ + pushURL: pu, + method: method, + pushURLRedacted: pushURLRedacted, + extraLabels: extraLabels, + headers: headers, + disableCompression: opts.DisableCompression, + + client: client, + + pushesTotal: pushMetricsSet.GetOrCreateCounter(fmt.Sprintf(`metrics_push_total{url=%q}`, pushURLRedacted)), + bytesPushedTotal: pushMetricsSet.GetOrCreateCounter(fmt.Sprintf(`metrics_push_bytes_pushed_total{url=%q}`, pushURLRedacted)), + pushBlockSize: pushMetricsSet.GetOrCreateHistogram(fmt.Sprintf(`metrics_push_block_size_bytes{url=%q}`, pushURLRedacted)), + pushDuration: pushMetricsSet.GetOrCreateHistogram(fmt.Sprintf(`metrics_push_duration_seconds{url=%q}`, pushURLRedacted)), + pushErrors: pushMetricsSet.GetOrCreateCounter(fmt.Sprintf(`metrics_push_errors_total{url=%q}`, pushURLRedacted)), + }, nil +} + +func (pc *pushContext) pushMetrics(ctx context.Context, writeMetrics func(w io.Writer)) error { + bb := getBytesBuffer() + defer putBytesBuffer(bb) + + writeMetrics(bb) + + if len(pc.extraLabels) > 0 { + bbTmp := getBytesBuffer() + bbTmp.B = append(bbTmp.B[:0], bb.B...) + bb.B = addExtraLabels(bb.B[:0], bbTmp.B, pc.extraLabels) + putBytesBuffer(bbTmp) + } + if !pc.disableCompression { + bbTmp := getBytesBuffer() + bbTmp.B = append(bbTmp.B[:0], bb.B...) + bb.B = bb.B[:0] + zw := getGzipWriter(bb) + if _, err := zw.Write(bbTmp.B); err != nil { + panic(fmt.Errorf("BUG: cannot write %d bytes to gzip writer: %s", len(bbTmp.B), err)) } - }() + if err := zw.Close(); err != nil { + panic(fmt.Errorf("BUG: cannot flush metrics to gzip writer: %s", err)) + } + putGzipWriter(zw) + putBytesBuffer(bbTmp) + } + + // Update metrics + pc.pushesTotal.Inc() + blockLen := len(bb.B) + pc.bytesPushedTotal.Add(blockLen) + pc.pushBlockSize.Update(float64(blockLen)) + + // Prepare the request to sent to pc.pushURL + reqBody := bytes.NewReader(bb.B) + req, err := http.NewRequestWithContext(ctx, pc.method, pc.pushURL.String(), reqBody) + if err != nil { + panic(fmt.Errorf("BUG: metrics.push: cannot initialize request for metrics push to %q: %w", pc.pushURLRedacted, err)) + } + + req.Header.Set("Content-Type", "text/plain") + // Set the needed headers, and `Content-Type` allowed be overwrited. + for name, values := range pc.headers { + for _, value := range values { + req.Header.Add(name, value) + } + } + if !pc.disableCompression { + req.Header.Set("Content-Encoding", "gzip") + } + + // Perform the request + startTime := time.Now() + resp, err := pc.client.Do(req) + pc.pushDuration.UpdateDuration(startTime) + if err != nil { + if errors.Is(err, context.Canceled) { + return nil + } + pc.pushErrors.Inc() + return fmt.Errorf("cannot push metrics to %q: %s", pc.pushURLRedacted, err) + } + if resp.StatusCode/100 != 2 { + body, _ := ioutil.ReadAll(resp.Body) + _ = resp.Body.Close() + pc.pushErrors.Inc() + return fmt.Errorf("unexpected status code in response from %q: %d; expecting 2xx; response body: %q", pc.pushURLRedacted, resp.StatusCode, body) + } + _ = resp.Body.Close() return nil } -var pushMetrics = NewSet() +var pushMetricsSet = NewSet() func writePushMetrics(w io.Writer) { - pushMetrics.WritePrometheus(w) + pushMetricsSet.WritePrometheus(w) } func addExtraLabels(dst, src []byte, extraLabels string) []byte { @@ -225,3 +467,44 @@ func addExtraLabels(dst, src []byte, extraLabels string) []byte { } var bashBytes = []byte("#") + +func getBytesBuffer() *bytesBuffer { + v := bytesBufferPool.Get() + if v == nil { + return &bytesBuffer{} + } + return v.(*bytesBuffer) +} + +func putBytesBuffer(bb *bytesBuffer) { + bb.B = bb.B[:0] + bytesBufferPool.Put(bb) +} + +var bytesBufferPool sync.Pool + +type bytesBuffer struct { + B []byte +} + +func (bb *bytesBuffer) Write(p []byte) (int, error) { + bb.B = append(bb.B, p...) + return len(p), nil +} + +func getGzipWriter(w io.Writer) *gzip.Writer { + v := gzipWriterPool.Get() + if v == nil { + return gzip.NewWriter(w) + } + zw := v.(*gzip.Writer) + zw.Reset(w) + return zw +} + +func putGzipWriter(zw *gzip.Writer) { + zw.Reset(io.Discard) + gzipWriterPool.Put(zw) +} + +var gzipWriterPool sync.Pool diff --git a/vendor/github.com/VictoriaMetrics/metrics/set.go b/vendor/github.com/VictoriaMetrics/metrics/set.go index 79355ea3..868a01c9 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/set.go +++ b/vendor/github.com/VictoriaMetrics/metrics/set.go @@ -19,6 +19,8 @@ type Set struct { a []*namedMetric m map[string]*namedMetric summaries []*Summary + + metricsWriters []func(w io.Writer) } // NewSet creates new set of metrics. @@ -45,14 +47,27 @@ func (s *Set) WritePrometheus(w io.Writer) { sort.Slice(s.a, lessFunc) } sa := append([]*namedMetric(nil), s.a...) + metricsWriters := s.metricsWriters s.mu.Unlock() - // Call marshalTo without the global lock, since certain metric types such as Gauge - // can call a callback, which, in turn, can try calling s.mu.Lock again. + prevMetricFamily := "" for _, nm := range sa { + metricFamily := getMetricFamily(nm.name) + if metricFamily != prevMetricFamily { + // write meta info only once per metric family + metricType := nm.metric.metricType() + WriteMetadataIfNeeded(&bb, nm.name, metricType) + prevMetricFamily = metricFamily + } + // Call marshalTo without the global lock, since certain metric types such as Gauge + // can call a callback, which, in turn, can try calling s.mu.Lock again. nm.metric.marshalTo(nm.name, &bb) } w.Write(bb.Bytes()) + + for _, writeMetrics := range metricsWriters { + writeMetrics(w) + } } // NewHistogram creates and returns new histogram in s with the given name. @@ -243,9 +258,6 @@ func (s *Set) GetOrCreateFloatCounter(name string) *FloatCounter { // // The returned gauge is safe to use from concurrent goroutines. func (s *Set) NewGauge(name string, f func() float64) *Gauge { - if f == nil { - panic(fmt.Errorf("BUG: f cannot be nil")) - } g := &Gauge{ f: f, } @@ -272,9 +284,6 @@ func (s *Set) GetOrCreateGauge(name string, f func() float64) *Gauge { s.mu.Unlock() if nm == nil { // Slow path - create and register missing gauge. - if f == nil { - panic(fmt.Errorf("BUG: f cannot be nil")) - } if err := validateMetric(name); err != nil { panic(fmt.Errorf("BUG: invalid metric name %q: %s", name, err)) } @@ -521,14 +530,22 @@ func (s *Set) unregisterMetricLocked(nm *namedMetric) bool { } // UnregisterAllMetrics de-registers all metrics registered in s. +// +// It also de-registers writeMetrics callbacks passed to RegisterMetricsWriter. func (s *Set) UnregisterAllMetrics() { metricNames := s.ListMetricNames() for _, name := range metricNames { s.UnregisterMetric(name) } + + s.mu.Lock() + s.metricsWriters = nil + s.mu.Unlock() } // ListMetricNames returns sorted list of all the metrics in s. +// +// The returned list doesn't include metrics generated by metricsWriter passed to RegisterMetricsWriter. func (s *Set) ListMetricNames() []string { s.mu.Lock() defer s.mu.Unlock() @@ -542,3 +559,17 @@ func (s *Set) ListMetricNames() []string { sort.Strings(metricNames) return metricNames } + +// RegisterMetricsWriter registers writeMetrics callback for including metrics in the output generated by s.WritePrometheus. +// +// The writeMetrics callback must write metrics to w in Prometheus text exposition format without timestamps and trailing comments. +// The last line generated by writeMetrics must end with \n. +// See https://github.com/prometheus/docs/blob/main/content/docs/instrumenting/exposition_formats.md#text-based-format +// +// It is OK to reguster multiple writeMetrics callbacks - all of them will be called sequentially for gererating the output at s.WritePrometheus. +func (s *Set) RegisterMetricsWriter(writeMetrics func(w io.Writer)) { + s.mu.Lock() + defer s.mu.Unlock() + + s.metricsWriters = append(s.metricsWriters, writeMetrics) +} diff --git a/vendor/github.com/VictoriaMetrics/metrics/summary.go b/vendor/github.com/VictoriaMetrics/metrics/summary.go index 52183d22..057b67bc 100644 --- a/vendor/github.com/VictoriaMetrics/metrics/summary.go +++ b/vendor/github.com/VictoriaMetrics/metrics/summary.go @@ -119,6 +119,10 @@ func (sm *Summary) marshalTo(prefix string, w io.Writer) { } } +func (sm *Summary) metricType() string { + return "summary" +} + func splitMetricName(name string) (string, string) { n := strings.IndexByte(name, '{') if n < 0 { @@ -196,6 +200,10 @@ func (qv *quantileValue) marshalTo(prefix string, w io.Writer) { } } +func (qv *quantileValue) metricType() string { + return "unsupported" +} + func addTag(name, tag string) string { if len(name) == 0 || name[len(name)-1] != '}' { return fmt.Sprintf("%s{%s}", name, tag) diff --git a/vendor/github.com/bitfield/script/README.md b/vendor/github.com/bitfield/script/README.md index a06a70c1..98e87ec4 100644 --- a/vendor/github.com/bitfield/script/README.md +++ b/vendor/github.com/bitfield/script/README.md @@ -1,7 +1,8 @@ [![Go Reference](https://pkg.go.dev/badge/github.com/bitfield/script.svg)](https://pkg.go.dev/github.com/bitfield/script) [![Go Report Card](https://goreportcard.com/badge/github.com/bitfield/script)](https://goreportcard.com/report/github.com/bitfield/script) [![Mentioned in Awesome Go](https://awesome.re/mentioned-badge-flat.svg)](https://github.com/avelino/awesome-go) -![Tests](https://github.com/bitfield/script/actions/workflows/test.yml/badge.svg) +![CI](https://github.com/bitfield/script/actions/workflows/ci.yml/badge.svg) +![Audit](https://github.com/bitfield/script/actions/workflows/audit.yml/badge.svg) ```go import "github.com/bitfield/script" @@ -33,6 +34,7 @@ If you're already familiar with shell scripting and the Unix toolset, here is a | `>` | [`WriteFile`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WriteFile) | | `>>` | [`AppendFile`](https://pkg.go.dev/github.com/bitfield/script#Pipe.AppendFile) | | `$*` | [`Args`](https://pkg.go.dev/github.com/bitfield/script#Args) | +| `base64` | [`DecodeBase64`](https://pkg.go.dev/github.com/bitfield/script#Pipe.DecodeBase64) / [`EncodeBase64`](https://pkg.go.dev/github.com/bitfield/script#Pipe.EncodeBase64) | | `basename` | [`Basename`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Basename) | | `cat` | [`File`](https://pkg.go.dev/github.com/bitfield/script#File) / [`Concat`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Concat) | | `curl` | [`Do`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Do) / [`Get`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Get) / [`Post`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Post) | @@ -267,18 +269,31 @@ These are functions that create a pipe with a given contents: | Source | Contents | | -------- | ------------- | -| [`Args`](https://pkg.go.dev/github.com/bitfield/script#Args) | command-line arguments -| [`Do`](https://pkg.go.dev/github.com/bitfield/script#Do) | HTTP response -| [`Echo`](https://pkg.go.dev/github.com/bitfield/script#Echo) | a string -| [`Exec`](https://pkg.go.dev/github.com/bitfield/script#Exec) | command output -| [`File`](https://pkg.go.dev/github.com/bitfield/script#File) | file contents -| [`FindFiles`](https://pkg.go.dev/github.com/bitfield/script#FindFiles) | recursive file listing -| [`Get`](https://pkg.go.dev/github.com/bitfield/script#Get) | HTTP response -| [`IfExists`](https://pkg.go.dev/github.com/bitfield/script#IfExists) | do something only if some file exists -| [`ListFiles`](https://pkg.go.dev/github.com/bitfield/script#ListFiles) | file listing (including wildcards) -| [`Post`](https://pkg.go.dev/github.com/bitfield/script#Post) | HTTP response -| [`Slice`](https://pkg.go.dev/github.com/bitfield/script#Slice) | slice elements, one per line -| [`Stdin`](https://pkg.go.dev/github.com/bitfield/script#Stdin) | standard input +| [`Args`](https://pkg.go.dev/github.com/bitfield/script#Args) | command-line arguments | +| [`Do`](https://pkg.go.dev/github.com/bitfield/script#Do) | HTTP response | +| [`Echo`](https://pkg.go.dev/github.com/bitfield/script#Echo) | a string | +| [`Exec`](https://pkg.go.dev/github.com/bitfield/script#Exec) | command output | +| [`File`](https://pkg.go.dev/github.com/bitfield/script#File) | file contents | +| [`FindFiles`](https://pkg.go.dev/github.com/bitfield/script#FindFiles) | recursive file listing | +| [`Get`](https://pkg.go.dev/github.com/bitfield/script#Get) | HTTP response | +| [`IfExists`](https://pkg.go.dev/github.com/bitfield/script#IfExists) | do something only if some file exists | +| [`ListFiles`](https://pkg.go.dev/github.com/bitfield/script#ListFiles) | file listing (including wildcards) | +| [`Post`](https://pkg.go.dev/github.com/bitfield/script#Post) | HTTP response | +| [`Slice`](https://pkg.go.dev/github.com/bitfield/script#Slice) | slice elements, one per line | +| [`Stdin`](https://pkg.go.dev/github.com/bitfield/script#Stdin) | standard input | + +## Modifiers + +These are methods on a pipe that change its configuration: + +| Source | Modifies | +| -------- | ------------- | +| [`WithEnv`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WithEnv) | environment for commands | +| [`WithError`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WithError) | pipe error status | +| [`WithHTTPClient`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WithHTTPClient) | client for HTTP requests | +| [`WithReader`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WithReader) | pipe source | +| [`WithStderr`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WithStderr) | standard error output stream for command | +| [`WithStdout`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WithStdout) | standard output stream for pipe | ## Filters @@ -289,9 +304,11 @@ Filters are methods on an existing pipe that also return a pipe, allowing you to | [`Basename`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Basename) | removes leading path components from each line, leaving only the filename | | [`Column`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Column) | Nth column of input | | [`Concat`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Concat) | contents of multiple files | +| [`DecodeBase64`](https://pkg.go.dev/github.com/bitfield/script#Pipe.DecodeBase64) | input decoded from base64 | | [`Dirname`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Dirname) | removes filename from each line, leaving only leading path components | | [`Do`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Do) | response to supplied HTTP request | | [`Echo`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Echo) | all input replaced by given string | +| [`EncodeBase64`](https://pkg.go.dev/github.com/bitfield/script#Pipe.EncodeBase64) | input encoded to base64 | | [`Exec`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Exec) | filtered through external command | | [`ExecForEach`](https://pkg.go.dev/github.com/bitfield/script#Pipe.ExecForEach) | execute given command template for each line of input | | [`Filter`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Filter) | user-supplied function filtering a reader to a writer | @@ -329,13 +346,16 @@ Sinks are methods that return some data from a pipe, ending the pipeline and ext | [`Slice`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Slice) | | data as `[]string`, error | | [`Stdout`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Stdout) | standard output | bytes written, error | | [`String`](https://pkg.go.dev/github.com/bitfield/script#Pipe.String) | | data as `string`, error | -| [`Wait`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Wait) | | none | +| [`Wait`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Wait) | | error | | [`WriteFile`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WriteFile) | specified file, truncating if it exists | bytes written, error | # What's new | Version | New | | ----------- | ------- | +| 0.23.0 | [`WithEnv`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WithEnv) | +| | [`DecodeBase64`](https://pkg.go.dev/github.com/bitfield/script#Pipe.DecodeBase64) / [`EncodeBase64`](https://pkg.go.dev/github.com/bitfield/script#Pipe.EncodeBase64) | +| | [`Wait`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Wait) returns error | | v0.22.0 | [`Tee`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Tee), [`WithStderr`](https://pkg.go.dev/github.com/bitfield/script#Pipe.WithStderr) | | v0.21.0 | HTTP support: [`Do`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Do), [`Get`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Get), [`Post`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Post) | | v0.20.0 | [`JQ`](https://pkg.go.dev/github.com/bitfield/script#Pipe.JQ) | @@ -346,7 +366,7 @@ See the [contributor's guide](CONTRIBUTING.md) for some helpful tips if you'd li # Links -- [Scripting with Go](https://bitfieldconsulting.com/golang/scripting) +- [Scripting with Go](https://bitfieldconsulting.com/posts/scripting) - [Code Club: Script](https://www.youtube.com/watch?v=6S5EqzVwpEg) - [Bitfield Consulting](https://bitfieldconsulting.com/) - [Go books by John Arundel](https://bitfieldconsulting.com/books) diff --git a/vendor/github.com/bitfield/script/script.go b/vendor/github.com/bitfield/script/script.go index c471f74b..e25b1c48 100644 --- a/vendor/github.com/bitfield/script/script.go +++ b/vendor/github.com/bitfield/script/script.go @@ -4,6 +4,7 @@ import ( "bufio" "container/ring" "crypto/sha256" + "encoding/base64" "encoding/hex" "encoding/json" "fmt" @@ -27,13 +28,14 @@ import ( // Pipe represents a pipe object with an associated [ReadAutoCloser]. type Pipe struct { // Reader is the underlying reader. - Reader ReadAutoCloser - stdout, stderr io.Writer - httpClient *http.Client + Reader ReadAutoCloser + stdout io.Writer + httpClient *http.Client - // because pipe stages are concurrent, protect 'err' - mu *sync.Mutex - err error + mu *sync.Mutex + err error + stderr io.Writer + env []string } // Args creates a pipe containing the program's command-line arguments from @@ -166,6 +168,7 @@ func NewPipe() *Pipe { mu: new(sync.Mutex), stdout: os.Stdout, httpClient: http.DefaultClient, + env: nil, } } @@ -275,6 +278,18 @@ func (p *Pipe) CountLines() (lines int, err error) { return lines, p.Error() } +// DecodeBase64 produces the string represented by the base64 encoded input. +func (p *Pipe) DecodeBase64() *Pipe { + return p.Filter(func(r io.Reader, w io.Writer) error { + decoder := base64.NewDecoder(base64.StdEncoding, r) + _, err := io.Copy(w, decoder) + if err != nil { + return err + } + return nil + }) +} + // Dirname reads paths from the pipe, one per line, and produces only the // parent directories of each path. For example, /usr/local/bin/foo would // become just /usr/local/bin. This is the complementary operation to @@ -347,7 +362,29 @@ func (p *Pipe) Echo(s string) *Pipe { return p.WithReader(strings.NewReader(s)) } +// EncodeBase64 produces the base64 encoding of the input. +func (p *Pipe) EncodeBase64() *Pipe { + return p.Filter(func(r io.Reader, w io.Writer) error { + encoder := base64.NewEncoder(base64.StdEncoding, w) + defer encoder.Close() + _, err := io.Copy(encoder, r) + if err != nil { + return err + } + return nil + }) +} + +func (p *Pipe) environment() []string { + p.mu.Lock() + defer p.mu.Unlock() + return p.env +} + // Error returns any error present on the pipe, or nil otherwise. +// Error is not a sink and does not wait until the pipe reaches +// completion. To wait for completion before returning the error, +// see [Pipe.Wait]. func (p *Pipe) Error() error { if p.mu == nil { // uninitialised pipe return nil @@ -362,6 +399,11 @@ func (p *Pipe) Error() error { // error output). The effect of this is to filter the contents of the pipe // through the external command. // +// # Environment +// +// The command inherits the current process's environment, optionally modified +// by [Pipe.WithEnv]. +// // # Error handling // // If the command had a non-zero exit status, the pipe's error status will also @@ -385,8 +427,13 @@ func (p *Pipe) Exec(cmdLine string) *Pipe { cmd.Stdin = r cmd.Stdout = w cmd.Stderr = w - if p.stderr != nil { - cmd.Stderr = p.stderr + pipeStderr := p.stdErr() + if pipeStderr != nil { + cmd.Stderr = pipeStderr + } + pipeEnv := p.environment() + if pipeEnv != nil { + cmd.Env = pipeEnv } err = cmd.Start() if err != nil { @@ -399,7 +446,8 @@ func (p *Pipe) Exec(cmdLine string) *Pipe { // ExecForEach renders cmdLine as a Go template for each line of input, running // the resulting command, and produces the combined output of all these -// commands in sequence. See [Pipe.Exec] for error handling details. +// commands in sequence. See [Pipe.Exec] for details on error handling and +// environment variables. // // This is mostly useful for substituting data into commands using Go template // syntax. For example: @@ -425,8 +473,12 @@ func (p *Pipe) ExecForEach(cmdLine string) *Pipe { cmd := exec.Command(args[0], args[1:]...) cmd.Stdout = w cmd.Stderr = w - if p.stderr != nil { - cmd.Stderr = p.stderr + pipeStderr := p.stdErr() + if pipeStderr != nil { + cmd.Stderr = pipeStderr + } + if p.env != nil { + cmd.Env = p.env } err = cmd.Start() if err != nil { @@ -810,6 +862,18 @@ func (p *Pipe) Slice() ([]string, error) { return result, p.Error() } +// stdErr returns the pipe's configured standard error writer for commands run +// via [Pipe.Exec] and [Pipe.ExecForEach]. The default is nil, which means that +// error output will go to the pipe. +func (p *Pipe) stdErr() io.Writer { + if p.mu == nil { // uninitialised pipe + return nil + } + p.mu.Lock() + defer p.mu.Unlock() + return p.stderr +} + // Stdout copies the pipe's contents to its configured standard output (using // [Pipe.WithStdout]), or to [os.Stdout] otherwise, and returns the number of // bytes successfully written, together with any error. @@ -848,14 +912,25 @@ func (p *Pipe) Tee(writers ...io.Writer) *Pipe { return p.WithReader(io.TeeReader(p.Reader, teeWriter)) } -// Wait reads the pipe to completion and discards the result. This is mostly -// useful for waiting until concurrent filters have completed (see -// [Pipe.Filter]). -func (p *Pipe) Wait() { +// Wait reads the pipe to completion and returns any error present on +// the pipe, or nil otherwise. This is mostly useful for waiting until +// concurrent filters have completed (see [Pipe.Filter]). +func (p *Pipe) Wait() error { _, err := io.Copy(io.Discard, p) if err != nil { p.SetError(err) } + return p.Error() +} + +// WithEnv sets the environment for subsequent [Pipe.Exec] and [Pipe.ExecForEach] +// commands to the string slice env, using the same format as [os/exec.Cmd.Env]. +// An empty slice unsets all existing environment variables. +func (p *Pipe) WithEnv(env []string) *Pipe { + p.mu.Lock() + defer p.mu.Unlock() + p.env = env + return p } // WithError sets the error err on the pipe. @@ -883,10 +958,11 @@ func (p *Pipe) WithReader(r io.Reader) *Pipe { return p } -// WithStderr redirects the standard error output for commands run via -// [Pipe.Exec] or [Pipe.ExecForEach] to the writer w, instead of going to the -// pipe as it normally would. +// WithStderr sets the standard error output for [Pipe.Exec] or +// [Pipe.ExecForEach] commands to w, instead of the pipe. func (p *Pipe) WithStderr(w io.Writer) *Pipe { + p.mu.Lock() + defer p.mu.Unlock() p.stderr = w return p } diff --git a/vendor/github.com/bytedance/sonic/.gitignore b/vendor/github.com/bytedance/sonic/.gitignore index 0d884470..fa60f43a 100644 --- a/vendor/github.com/bytedance/sonic/.gitignore +++ b/vendor/github.com/bytedance/sonic/.gitignore @@ -49,4 +49,7 @@ ast/bench.sh !testdata/*.json.gz fuzz/testdata -*__debug_bin \ No newline at end of file +*__debug_bin* +*pprof +*coverage.txt +tools/venv/* \ No newline at end of file diff --git a/vendor/github.com/bytedance/sonic/.gitmodules b/vendor/github.com/bytedance/sonic/.gitmodules index b8d11c92..ea84b991 100644 --- a/vendor/github.com/bytedance/sonic/.gitmodules +++ b/vendor/github.com/bytedance/sonic/.gitmodules @@ -1,3 +1,6 @@ -[submodule "tools/asm2asm"] +[submodule "cloudwego"] path = tools/asm2asm - url = https://github.com/chenzhuoyu/asm2asm + url = https://github.com/cloudwego/asm2asm.git +[submodule "tools/simde"] + path = tools/simde + url = https://github.com/simd-everywhere/simde.git diff --git a/vendor/github.com/bytedance/sonic/Makefile b/vendor/github.com/bytedance/sonic/Makefile deleted file mode 100644 index c672c313..00000000 --- a/vendor/github.com/bytedance/sonic/Makefile +++ /dev/null @@ -1,111 +0,0 @@ -# -# Copyright 2021 ByteDance Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -ARCH := avx avx2 sse -TMP_DIR := output -OUT_DIR := internal/native -SRC_FILE := native/native.c - -CPU_avx := amd64 -CPU_avx2 := amd64 -CPU_sse := amd64 - -TMPL_avx := fastint_amd64_test fastfloat_amd64_test native_amd64_test recover_amd64_test -TMPL_avx2 := fastint_amd64_test fastfloat_amd64_test native_amd64_test recover_amd64_test -TMPL_sse := fastint_amd64_test fastfloat_amd64_test native_amd64_test recover_amd64_test - -CFLAGS_avx := -msse -mno-sse4 -mavx -mpclmul -mno-avx2 -mstack-alignment=0 -DUSE_AVX=1 -DUSE_AVX2=0 -CFLAGS_avx2 := -msse -mno-sse4 -mavx -mpclmul -mavx2 -mstack-alignment=0 -DUSE_AVX=1 -DUSE_AVX2=1 -CFLAGS_sse := -msse -mno-sse4 -mno-avx -mno-avx2 -mpclmul - -CC_amd64 := clang -ASM2ASM_amd64 := tools/asm2asm/asm2asm.py - -CFLAGS := -mno-red-zone -CFLAGS += -target x86_64-apple-macos11 -CFLAGS += -fno-asynchronous-unwind-tables -CFLAGS += -fno-builtin -CFLAGS += -fno-exceptions -CFLAGS += -fno-rtti -CFLAGS += -fno-stack-protector -CFLAGS += -nostdlib -CFLAGS += -O3 -CFLAGS += -Wall -Werror - -NATIVE_SRC := $(wildcard native/*.h) -NATIVE_SRC += $(wildcard native/*.c) - -.PHONY: all clean ${ARCH} - -define build_tmpl - $(eval @arch := $(1)) - $(eval @tmpl := $(2)) - $(eval @dest := $(3)) - -${@dest}: ${@tmpl} - mkdir -p $(dir ${@dest}) - echo '// Code generated by Makefile, DO NOT EDIT.' > ${@dest} - echo >> ${@dest} - sed -e 's/{{PACKAGE}}/${@arch}/g' ${@tmpl} >> ${@dest} -endef - -define build_arch - $(eval @cpu := $(value CPU_$(1))) - $(eval @deps := $(foreach tmpl,$(value TMPL_$(1)),${OUT_DIR}/$(1)/${tmpl}.go)) - $(eval @asmin := ${TMP_DIR}/$(1)/native.s) - $(eval @asmout := ${OUT_DIR}/$(1)/native_text_${@cpu}.go) - $(eval @stubin := ${OUT_DIR}/native_${@cpu}.tmpl) - $(eval @stubout := ${OUT_DIR}/$(1)/native_${@cpu}.go) - -$(1): ${@asmout} ${@deps} - -${@asmout}: ${@stubout} ${NATIVE_SRC} - mkdir -p ${TMP_DIR}/$(1) - $${CC_${@cpu}} $${CFLAGS} $${CFLAGS_$(1)} -S -o ${TMP_DIR}/$(1)/native.s ${SRC_FILE} - python3 $${ASM2ASM_${@cpu}} -r ${@stubout} ${TMP_DIR}/$(1)/native.s - -$(eval $(call \ - build_tmpl, \ - $(1), \ - ${@stubin}, \ - ${@stubout} \ -)) - -$(foreach \ - tmpl, \ - $(value TMPL_$(1)), \ - $(eval $(call \ - build_tmpl, \ - $(1), \ - ${OUT_DIR}/${tmpl}.tmpl, \ - ${OUT_DIR}/$(1)/${tmpl}.go \ - )) \ -) -endef - -all: ${ARCH} - -clean: - for arch in ${ARCH}; do \ - rm -vfr ${TMP_DIR}/$${arch}; \ - rm -vfr ${OUT_DIR}/$${arch}; \ - done - -$(foreach \ - arch, \ - ${ARCH}, \ - $(eval $(call build_arch,${arch})) \ -) diff --git a/vendor/github.com/bytedance/sonic/README.md b/vendor/github.com/bytedance/sonic/README.md index 42772018..5f609b1c 100644 --- a/vendor/github.com/bytedance/sonic/README.md +++ b/vendor/github.com/bytedance/sonic/README.md @@ -5,18 +5,27 @@ English | [中文](README_ZH_CN.md) A blazingly fast JSON serializing & deserializing library, accelerated by JIT (just-in-time compiling) and SIMD (single-instruction-multiple-data). ## Requirement -- Go 1.16~1.21 -- Linux / MacOS / Windows(need go1.17 above) -- Amd64 ARCH + +- Go: 1.17~1.23 +- OS: Linux / MacOS / Windows +- CPU: AMD64 / ARM64(need go1.20 above) ## Features + - Runtime object binding without code generation - Complete APIs for JSON value manipulation - Fast, fast, fast! +## APIs + +see [go.dev](https://pkg.go.dev/github.com/bytedance/sonic) + ## Benchmarks + For **all sizes** of json and **all scenarios** of usage, **Sonic performs best**. + - [Medium](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13KB, 300+ key, 6 layers) + ```powershell goversion: 1.17.1 goos: darwin @@ -81,14 +90,16 @@ BenchmarkLoadNode_Parallel/LoadAll()-16 5493 ns/op 2370.6 BenchmarkLoadNode/Interface()-16 17722 ns/op 734.85 MB/s 13323 B/op 88 allocs/op BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.70 MB/s 15178 B/op 88 allocs/op ``` + - [Small](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 keys, 3 layers) ![small benchmarks](./docs/imgs/bench-small.png) - [Large](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635KB, 10000+ key, 6 layers) ![large benchmarks](./docs/imgs/bench-large.png) -See [bench.sh](https://github.com/bytedance/sonic/blob/main/bench.sh) for benchmark codes. +See [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) for benchmark codes. ## How it works + See [INTRODUCTION.md](./docs/INTRODUCTION.md). ## Usage @@ -96,6 +107,7 @@ See [INTRODUCTION.md](./docs/INTRODUCTION.md). ### Marshal/Unmarshal Default behaviors are mostly consistent with `encoding/json`, except HTML escaping form (see [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) and `SortKeys` feature (optional support see [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys)) that is **NOT** in conformity to [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259). + ```go import "github.com/bytedance/sonic" @@ -107,8 +119,11 @@ err := sonic.Unmarshal(output, &data) ``` ### Streaming IO -Sonic supports decoding json from `io.Reader` or encoding objects into `io.`Writer`, aims at handling multiple values as well as reducing memory consumption. + +Sonic supports decoding json from `io.Reader` or encoding objects into `io.Writer`, aims at handling multiple values as well as reducing memory consumption. + - encoder + ```go var o1 = map[string]interface{}{ "a": "b", @@ -123,7 +138,9 @@ fmt.Println(w.String()) // {"a":"b"} // 1 ``` + - decoder + ```go var o = map[string]interface{}{} var r = strings.NewReader(`{"a":"b"}{"1":"2"}`) @@ -136,6 +153,7 @@ fmt.Printf("%+v", o) ``` ### Use Number/Use Int64 + ```go import "github.com/bytedance/sonic/decoder" @@ -164,7 +182,9 @@ fm := root.Interface().(float64) // jn == jm ``` ### Sort Keys + On account of the performance loss from sorting (roughly 10%), sonic doesn't enable this feature by default. If your component depends on it to work (like [zstd](https://github.com/facebook/zstd)), Use it like this: + ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/encoder" @@ -177,19 +197,26 @@ v, err := encoder.Encode(m, encoder.SortMapKeys) var root := sonic.Get(JSON) err := root.SortKeys() ``` + ### Escape HTML + On account of the performance loss (roughly 15%), sonic doesn't enable this feature by default. You can use `encoder.EscapeHTML` option to open this feature (align with `encoding/json.HTMLEscape`). + ```go import "github.com/bytedance/sonic" v := map[string]string{"&&":"<>"} ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e"}}` ``` + ### Compact Format + Sonic encodes primitive objects (struct/map...) as compact-format JSON by default, except marshaling `json.RawMessage` or `json.Marshaler`: sonic ensures validating their output JSON but **DONOT** compacting them for performance concerns. We provide the option `encoder.CompactMarshaler` to add compacting process. ### Print Error + If there invalid syntax in input JSON, sonic will return `decoder.SyntaxError`, which supports pretty-printing of error position + ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/decoder" @@ -215,7 +242,9 @@ if err != nil { ``` #### Mismatched Types [Sonic v1.6.0] + If there a **mismatch-typed** value for a given key, sonic will report `decoder.MismatchTypeError` (if there are many, report the last one), but still skip wrong the value and keep decoding next JSON. + ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/decoder" @@ -228,10 +257,15 @@ err := UnmarshalString(`{"A":"1","B":1}`, &data) println(err.Error()) // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n" fmt.Printf("%+v", data) // {A:0 B:1} ``` + ### Ast.Node + Sonic/ast.Node is a completely self-contained AST for JSON. It implements serialization and deserialization both and provides robust APIs for obtaining and modification of generic data. + #### Get/Index + Search partial JSON by given paths, which must be non-negative integer or string, or nil + ```go import "github.com/bytedance/sonic" @@ -245,10 +279,29 @@ raw := root.Raw() // == string(input) root, err := sonic.Get(input, "key1", 1, "key2") sub := root.Get("key3").Index(2).Int64() // == 3 ``` + **Tip**: since `Index()` uses offset to locate data, which is much faster than scanning like `Get()`, we suggest you use it as much as possible. And sonic also provides another API `IndexOrGet()` to underlying use offset as well as ensure the key is matched. +#### SearchOption + +`Searcher` provides some options for user to meet different needs: + +```go +opts := ast.SearchOption{ CopyReturn: true ... } +val, err := sonic.GetWithOptions(JSON, opts, "key") +``` + +- CopyReturn +Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results +- ConcurentRead +Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it. +- ValidateJSON +Indicate the searcher to validate the entire JSON. This option is enabled by default, which slow down the search speed a little. + #### Set/Unset + Modify the json content by Set()/Unset() + ```go import "github.com/bytedance/sonic" @@ -265,7 +318,9 @@ println(root.Get("key4").Check()) // "value not exist" ``` #### Serialize + To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer) + ```go import ( "encoding/json" @@ -279,6 +334,7 @@ println(string(buf) == string(exp)) // true ``` #### APIs + - validation: `Check()`, `Error()`, `Valid()`, `Exist()` - searching: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()` - go-type casting: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()` @@ -287,7 +343,9 @@ println(string(buf) == string(exp)) // true - modification: `Set()`, `SetByIndex()`, `Add()` ### Ast.Visitor + Sonic provides an advanced API for fully parsing JSON into non-standard types (neither `struct` not `map[string]interface{}`) without using any intermediate representation (`ast.Node` or `interface{}`). For example, you might have the following types which are like `interface{}` but actually not `interface{}`: + ```go type UserNode interface {} @@ -302,7 +360,9 @@ type ( UserArray struct{ Value []UserNode } ) ``` + Sonic provides the following API to return **the preorder traversal of a JSON AST**. The `ast.Visitor` is a SAX style interface which is used in some C++ JSON library. You should implement `ast.Visitor` by yourself and pass it to `ast.Preorder()` method. In your visitor you can make your custom types to represent JSON values. There may be an O(n) space container (such as stack) in your visitor to record the object / array hierarchy. + ```go func Preorder(str string, visitor Visitor, opts *VisitorOptions) error @@ -323,20 +383,20 @@ type Visitor interface { See [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) for detailed usage. We also implement a demo visitor for `UserNode` in [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go). ## Compatibility -Sonic **DOES NOT** ensure to support all environments, due to the difficulty of developing high-performance codes. For developers who use sonic to build their applications in different environments, we have the following suggestions: -- Developing on **Mac M1**: Make sure you have Rosetta 2 installed on your machine, and set `GOARCH=amd64` when building your application. Rosetta 2 can automatically translate x86 binaries to arm64 binaries and run x86 applications on Mac M1. -- Developing on **Linux arm64**: You can install qemu and use the `qemu-x86_64 -cpu max` command to convert x86 binaries to amr64 binaries for applications built with sonic. The qemu can achieve a similar transfer effect to Rosetta 2 on Mac M1. +For developers who want to use sonic to meet diffirent scenarios, we provide some integrated configs as `sonic.API` -For developers who want to use sonic on Linux arm64 without qemu, or those who want to handle JSON strictly consistent with `encoding/json`, we provide some compatible APIs as `sonic.API` -- `ConfigDefault`: the sonic's default config (`EscapeHTML=false`,`SortKeys=false`...) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options like `SortKeys=false` will be invalid. -- `ConfigStd`: the std-compatible config (`EscapeHTML=true`,`SortKeys=true`...) to run on sonic-supporting environment. It will fall back to `encoding/json`. -- `ConfigFastest`: the fastest config (`NoQuoteTextMarshaler=true`) to run on sonic-supporting environment. It will fall back to `encoding/json` with the corresponding config, and some options will be invalid. +- `ConfigDefault`: the sonic's default config (`EscapeHTML=false`,`SortKeys=false`...) to run sonic fast meanwhile ensure security. +- `ConfigStd`: the std-compatible config (`EscapeHTML=true`,`SortKeys=true`...) +- `ConfigFastest`: the fastest config (`NoQuoteTextMarshaler=true`) to run on sonic as fast as possible. +Sonic **DOES NOT** ensure to support all environments, due to the difficulty of developing high-performance codes. On non-sonic-supporting environment, the implementation will fall back to `encoding/json`. Thus beflow configs will all equal to `ConfigStd`. ## Tips ### Pretouch + Since Sonic uses [golang-asm](https://github.com/twitchyliquid64/golang-asm) as a JIT assembler, which is NOT very suitable for runtime compiling, first-hit running of a huge schema may cause request-timeout or even process-OOM. For better stability, we advise **using `Pretouch()` for huge-schema or compact-memory applications** before `Marshal()/Unmarshal()`. + ```go import ( "reflect" @@ -362,17 +422,23 @@ func init() { ``` ### Copy string -When decoding **string values without any escaped characters**, sonic references them from the origin JSON buffer instead of mallocing a new buffer to copy. This helps a lot for CPU performance but may leave the whole JSON buffer in memory as long as the decoded objects are being used. In practice, we found the extra memory introduced by referring JSON buffer is usually 20% ~ 80% of decoded objects. Once an application holds these objects for a long time (for example, cache the decoded objects for reusing), its in-use memory on the server may go up. We provide the option `decoder.CopyString()` for users to choose not to reference the JSON buffer, which may cause a decline in CPU performance to some degree. + +When decoding **string values without any escaped characters**, sonic references them from the origin JSON buffer instead of mallocing a new buffer to copy. This helps a lot for CPU performance but may leave the whole JSON buffer in memory as long as the decoded objects are being used. In practice, we found the extra memory introduced by referring JSON buffer is usually 20% ~ 80% of decoded objects. Once an application holds these objects for a long time (for example, cache the decoded objects for reusing), its in-use memory on the server may go up. - `Config.CopyString`/`decoder.CopyString()`: We provide the option for `Decode()` / `Unmarshal()` users to choose not to reference the JSON buffer, which may cause a decline in CPU performance to some degree. + +- `GetFromStringNoCopy()`: For memory safety, `sonic.Get()` / `sonic.GetFromString()` now copies return JSON. If users want to get json more quickly and not care about memory usage, you can use `GetFromStringNoCopy()` to return a JSON directly referenced from source. ### Pass string or []byte? + For alignment to `encoding/json`, we provide API to pass `[]byte` as an argument, but the string-to-bytes copy is conducted at the same time considering safety, which may lose performance when the origin JSON is huge. Therefore, you can use `UnmarshalString()` and `GetFromString()` to pass a string, as long as your origin data is a string or **nocopy-cast** is safe for your []byte. We also provide API `MarshalString()` for convenient **nocopy-cast** of encoded JSON []byte, which is safe since sonic's output bytes is always duplicated and unique. ### Accelerate `encoding.TextMarshaler` -To ensure data security, sonic.Encoder quotes and escapes string values from `encoding.TextMarshaler` interfaces by default, which may degrade performance much if most of your data is in form of them. We provide `encoder.NoQuoteTextMarshaler` to skip these operations, which means you **MUST** ensure their output string escaped and quoted following [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259). +To ensure data security, sonic.Encoder quotes and escapes string values from `encoding.TextMarshaler` interfaces by default, which may degrade performance much if most of your data is in form of them. We provide `encoder.NoQuoteTextMarshaler` to skip these operations, which means you **MUST** ensure their output string escaped and quoted following [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259). ### Better performance for generic data + In **fully-parsed** scenario, `Unmarshal()` performs better than `Get()`+`Node.Interface()`. But if you only have a part of the schema for specific json, you can combine `Get()` and `Unmarshal()` together: + ```go import "github.com/bytedance/sonic" @@ -380,7 +446,9 @@ node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user") var user User // your partial schema... err = sonic.UnmarshalString(node.Raw(), &user) ``` + Even if you don't have any schema, use `ast.Node` as the container of generic values instead of `map` or `interface`: + ```go import "github.com/bytedance/sonic" @@ -391,7 +459,9 @@ err = user.Check() // err = user.LoadAll() // only call this when you want to use 'user' concurrently... go someFunc(user) ``` + Why? Because `ast.Node` stores its children using `array`: + - `Array`'s performance is **much better** than `Map` when Inserting (Deserialize) and Scanning (Serialize) data; - **Hashing** (`map[x]`) is not as efficient as **Indexing** (`array[x]`), which `ast.Node` can conduct on **both array and object**; - Using `Interface()`/`Map()` means Sonic must parse all the underlying values, while `ast.Node` can parse them **on demand**. @@ -399,6 +469,7 @@ Why? Because `ast.Node` stores its children using `array`: **CAUTION:** `ast.Node` **DOESN'T** ensure concurrent security directly, due to its **lazy-load** design. However, you can call `Node.Load()`/`Node.LoadAll()` to achieve that, which may bring performance reduction while it still works faster than converting to `map` or `interface{}` ### Ast.Node or Ast.Visitor? + For generic data, `ast.Node` should be enough for your needs in most cases. However, `ast.Node` is designed for partially processing JSON string. It has some special designs such as lazy-load which might not be suitable for directly parsing the whole JSON string like `Unmarshal()`. Although `ast.Node` is better then `map` or `interface{}`, it's also a kind of intermediate representation after all if your final types are customized and you have to convert the above types to your custom types after parsing. @@ -407,5 +478,18 @@ For better performance, in previous case the `ast.Visitor` will be the better ch But `ast.Visitor` is not a very handy API. You might need to write a lot of code to implement your visitor and carefully maintain the tree hierarchy during decoding. Please read the comments in [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) carefully if you decide to use this API. +### Buffer Size + +Sonic use memory pool in many places like `encoder.Encode`, `ast.Node.MarshalJSON` to improve performace, which may produce more memory usage (in-use) when server's load is high. See [issue 614](https://github.com/bytedance/sonic/issues/614). Therefore, we introduce some options to let user control the behavior of memory pool. See [option](https://pkg.go.dev/github.com/bytedance/sonic@v1.11.9/option#pkg-variables) package. + +### Faster JSON skip + +For security, sonic use [FSM](native/skip_one.c) algorithm to validate JSON when decoding raw JSON or encoding `json.Marshaler`, which is much slower (1~10x) than [SIMD-searching-pair](native/skip_one_fast.c) algorithm. If user has many redundant JSON value and DO NOT NEED to strictly validate JSON correctness, you can enable below options: + +- `Config.NoValidateSkipJSON`: for faster skipping JSON when decoding, such as unknown fields, json.Unmarshaler(json.RawMessage), mismatched values, and redundant array elements +- `Config.NoValidateJSONMarshaler`: avoid validating JSON when encoding `json.Marshaler` +- `SearchOption.ValidateJSON`: indicates if validate located JSON value when `Get` + ## Community + Sonic is a subproject of [CloudWeGo](https://www.cloudwego.io/). We are committed to building a cloud native ecosystem. diff --git a/vendor/github.com/bytedance/sonic/README_ZH_CN.md b/vendor/github.com/bytedance/sonic/README_ZH_CN.md index dc5cfb5d..4f8980c5 100644 --- a/vendor/github.com/bytedance/sonic/README_ZH_CN.md +++ b/vendor/github.com/bytedance/sonic/README_ZH_CN.md @@ -6,9 +6,13 @@ ## 依赖 -- Go 1.16~1.20 -- Linux / MacOS / Windows(需要 Go1.17 以上) -- Amd64 架构 +- Go: 1.17~1.23 +- OS: Linux / MacOS / Windows +- CPU: AMD64 / ARM64(需要 Go1.20 以上) + +## 接口 + +详见 [go.dev](https://pkg.go.dev/github.com/bytedance/sonic) ## 特色 @@ -19,7 +23,9 @@ ## 基准测试 对于**所有大小**的 json 和**所有使用场景**, **Sonic 表现均为最佳**。 + - [中型](https://github.com/bytedance/sonic/blob/main/decoder/testdata_test.go#L19) (13kB, 300+ 键, 6 层) + ```powershell goversion: 1.17.1 goos: darwin @@ -84,12 +90,13 @@ BenchmarkLoadNode_Parallel/LoadAll()-16 5493 ns/op 2370.6 BenchmarkLoadNode/Interface()-16 17722 ns/op 734.85 MB/s 13323 B/op 88 allocs/op BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.70 MB/s 15178 B/op 88 allocs/op ``` + - [小型](https://github.com/bytedance/sonic/blob/main/testdata/small.go) (400B, 11 个键, 3 层) ![small benchmarks](./docs/imgs/bench-small.png) - [大型](https://github.com/bytedance/sonic/blob/main/testdata/twitter.json) (635kB, 10000+ 个键, 6 层) ![large benchmarks](./docs/imgs/bench-large.png) -要查看基准测试代码,请参阅 [bench.sh](https://github.com/bytedance/sonic/blob/main/bench.sh) 。 +要查看基准测试代码,请参阅 [bench.sh](https://github.com/bytedance/sonic/blob/main/scripts/bench.sh) 。 ## 工作原理 @@ -100,6 +107,7 @@ BenchmarkLoadNode_Parallel/Interface()-16 10330 ns/op 1260.7 ### 序列化/反序列化 默认的行为基本上与 `encoding/json` 相一致,除了 HTML 转义形式(参见 [Escape HTML](https://github.com/bytedance/sonic/blob/main/README.md#escape-html)) 和 `SortKeys` 功能(参见 [Sort Keys](https://github.com/bytedance/sonic/blob/main/README.md#sort-keys))**没有**遵循 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 。 + ```go import "github.com/bytedance/sonic" @@ -113,7 +121,9 @@ err := sonic.Unmarshal(output, &data) ### 流式输入输出 Sonic 支持解码 `io.Reader` 中输入的 json,或将对象编码为 json 后输出至 `io.Writer`,以处理多个值并减少内存消耗。 + - 编码器 + ```go var o1 = map[string]interface{}{ "a": "b", @@ -128,7 +138,9 @@ fmt.Println(w.String()) // {"a":"b"} // 1 ``` + - 解码器 + ```go var o = map[string]interface{}{} var r = strings.NewReader(`{"a":"b"}{"1":"2"}`) @@ -172,6 +184,7 @@ fm := root.Interface().(float64) // jn == jm ### 对键排序 考虑到排序带来的性能损失(约 10% ), sonic 默认不会启用这个功能。如果你的组件依赖这个行为(如 [zstd](https://github.com/facebook/zstd)) ,可以仿照下面的例子: + ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/encoder" @@ -188,6 +201,7 @@ err := root.SortKeys() ### HTML 转义 考虑到性能损失(约15%), sonic 默认不会启用这个功能。你可以使用 `encoder.EscapeHTML` 选项来开启(与 `encoding/json.HTMLEscape` 行为一致)。 + ```go import "github.com/bytedance/sonic" @@ -196,11 +210,13 @@ ret, err := Encode(v, EscapeHTML) // ret == `{"\u0026\u0026":{"X":"\u003c\u003e" ``` ### 紧凑格式 + Sonic 默认将基本类型( `struct` , `map` 等)编码为紧凑格式的 JSON ,除非使用 `json.RawMessage` or `json.Marshaler` 进行编码: sonic 确保输出的 JSON 合法,但出于性能考虑,**不会**加工成紧凑格式。我们提供选项 `encoder.CompactMarshaler` 来添加此过程, ### 打印错误 如果输入的 JSON 存在无效的语法,sonic 将返回 `decoder.SyntaxError`,该错误支持错误位置的美化输出。 + ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/decoder" @@ -228,6 +244,7 @@ if err != nil { #### 类型不匹配 [Sonic v1.6.0] 如果给定键中存在**类型不匹配**的值, sonic 会抛出 `decoder.MismatchTypeError` (如果有多个,只会报告最后一个),但仍会跳过错误的值并解码下一个 JSON 。 + ```go import "github.com/bytedance/sonic" import "github.com/bytedance/sonic/decoder" @@ -240,13 +257,15 @@ err := UnmarshalString(`{"A":"1","B":1}`, &data) println(err.Error()) // Mismatch type int with value string "at index 5: mismatched type with value\n\n\t{\"A\":\"1\",\"B\":1}\n\t.....^.........\n" fmt.Printf("%+v", data) // {A:0 B:1} ``` + ### `Ast.Node` -Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改通用数据的鲁棒的 API。 +Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改JSON数据的鲁棒的 API。 #### 查找/索引 通过给定的路径搜索 JSON 片段,路径必须为非负整数,字符串或 `nil` 。 + ```go import "github.com/bytedance/sonic" @@ -260,11 +279,29 @@ raw := root.Raw() // == string(input) root, err := sonic.Get(input, "key1", 1, "key2") sub := root.Get("key3").Index(2).Int64() // == 3 ``` + **注意**:由于 `Index()` 使用偏移量来定位数据,比使用扫描的 `Get()` 要快的多,建议尽可能的使用 `Index` 。 Sonic 也提供了另一个 API, `IndexOrGet()` ,以偏移量为基础并且也确保键的匹配。 +#### 查找选项 + +`ast.Searcher`提供了一些选项,以满足用户的不同需求: + +``` +opts:= ast.SearchOption{CopyReturn: true…} +Val, err:= sonic。gettwithoptions (JSON, opts, "key") +``` + +- CopyReturn +指示搜索器复制结果JSON字符串,而不是从输入引用。如果用户缓存结果,这有助于减少内存使用 +- ConcurentRead +因为`ast.Node`使用`Lazy-Load`设计,默认不支持并发读取。如果您想同时读取,请指定它。 +- ValidateJSON +指示搜索器来验证整个JSON。默认情况下启用该选项, 但是对于查找速度有一定影响。 + #### 修改 -使用 ` Set()` / `Unset()` 修改 json 的内容 +使用 `Set()` / `Unset()` 修改 json 的内容 + ```go import "github.com/bytedance/sonic" @@ -281,7 +318,9 @@ println(root.Get("key4").Check()) // "value not exist" ``` #### 序列化 + 要将 `ast.Node` 编码为 json ,使用 `MarshalJson()` 或者 `json.Marshal()` (必须传递指向节点的指针) + ```go import ( "encoding/json" @@ -295,6 +334,7 @@ println(string(buf) == string(exp)) // true ``` #### APIs + - 合法性检查: `Check()`, `Error()`, `Valid()`, `Exist()` - 索引: `Index()`, `Get()`, `IndexPair()`, `IndexOrGet()`, `GetByPath()` - 转换至 go 内置类型: `Int64()`, `Float64()`, `String()`, `Number()`, `Bool()`, `Map[UseNumber|UseNode]()`, `Array[UseNumber|UseNode]()`, `Interface[UseNumber|UseNode]()` @@ -303,7 +343,9 @@ println(string(buf) == string(exp)) // true - 修改: `Set()`, `SetByIndex()`, `Add()` ### `Ast.Visitor` + Sonic 提供了一个高级的 API 用于直接全量解析 JSON 到非标准容器里 (既不是 `struct` 也不是 `map[string]interface{}`) 且不需要借助任何中间表示 (`ast.Node` 或 `interface{}`)。举个例子,你可能定义了下述的类型,它们看起来像 `interface{}`,但实际上并不是: + ```go type UserNode interface {} @@ -318,7 +360,9 @@ type ( UserArray struct{ Value []UserNode } ) ``` + Sonic 提供了下述的 API 来返回 **“对 JSON AST 的前序遍历”**。`ast.Visitor` 是一个 SAX 风格的接口,这在某些 C++ 的 JSON 解析库中被使用到。你需要自己实现一个 `ast.Visitor`,将它传递给 `ast.Preorder()` 方法。在你的实现中你可以使用自定义的类型来表示 JSON 的值。在你的 `ast.Visitor` 中,可能需要有一个 O(n) 空间复杂度的容器(比如说栈)来记录 object / array 的层级。 + ```go func Preorder(str string, visitor Visitor, opts *VisitorOptions) error @@ -335,23 +379,24 @@ type Visitor interface { OnArrayEnd() error } ``` + 详细用法参看 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go),我们还为 `UserNode` 实现了一个示例 `ast.Visitor`,你可以在 [ast/visitor_test.go](https://github.com/bytedance/sonic/blob/main/ast/visitor_test.go) 中找到它。 ## 兼容性 -由于开发高性能代码的困难性, Sonic **不**保证对所有环境的支持。对于在不同环境中使用 Sonic 构建应用程序的开发者,我们有以下建议: -- 在 **Mac M1** 上开发:确保在您的计算机上安装了 Rosetta 2,并在构建时设置 `GOARCH=amd64` 。 Rosetta 2 可以自动将 x86 二进制文件转换为 arm64 二进制文件,并在 Mac M1 上运行 x86 应用程序。 -- 在 **Linux arm64** 上开发:您可以安装 qemu 并使用 `qemu-x86_64 -cpu max` 命令来将 x86 二进制文件转换为 arm64 二进制文件。qemu可以实现与Mac M1上的Rosetta 2类似的转换效果。 +对于想要使用sonic来满足不同场景的开发人员,我们提供了一些集成配置: -对于希望在不使用 qemu 下使用 sonic 的开发者,或者希望处理 JSON 时与 `encoding/JSON` 严格保持一致的开发者,我们在 `sonic.API` 中提供了一些兼容性 API -- `ConfigDefault`: 在支持 sonic 的环境下 sonic 的默认配置(`EscapeHTML=false`,`SortKeys=false`等)。行为与具有相应配置的 `encoding/json` 一致,一些选项,如 `SortKeys=false` 将无效。 -- `ConfigStd`: 在支持 sonic 的环境下与标准库兼容的配置(`EscapeHTML=true`,`SortKeys=true`等)。行为与 `encoding/json` 一致。 -- `ConfigFastest`: 在支持 sonic 的环境下运行最快的配置(`NoQuoteTextMarshaler=true`)。行为与具有相应配置的 `encoding/json` 一致,某些选项将无效。 +- `ConfigDefault`: sonic的默认配置 (`EscapeHTML=false`, `SortKeys=false`…) 保证性能同时兼顾安全性。 +- `ConfigStd`: 与 `encoding/json` 保证完全兼容的配置 +- `ConfigFastest`: 最快的配置(`NoQuoteTextMarshaler=true...`) 保证性能最优但是会缺少一些安全性检查(validate UTF8 等) +Sonic **不**确保支持所有环境,由于开发高性能代码的困难。在不支持声音的环境中,实现将回落到 `encoding/json`。因此上述配置将全部等于`ConfigStd`。 ## 注意事项 ### 预热 + 由于 Sonic 使用 [golang-asm](https://github.com/twitchyliquid64/golang-asm) 作为 JIT 汇编器,这个库并不适用于运行时编译,第一次运行一个大型模式可能会导致请求超时甚至进程内存溢出。为了更好地稳定性,我们建议在运行大型模式或在内存有限的应用中,在使用 `Marshal()/Unmarshal()` 前运行 `Pretouch()`。 + ```go import ( "reflect" @@ -381,16 +426,17 @@ func init() { 当解码 **没有转义字符的字符串**时, sonic 会从原始的 JSON 缓冲区内引用而不是复制到新的一个缓冲区中。这对 CPU 的性能方面很有帮助,但是可能因此在解码后对象仍在使用的时候将整个 JSON 缓冲区保留在内存中。实践中我们发现,通过引用 JSON 缓冲区引入的额外内存通常是解码后对象的 20% 至 80% ,一旦应用长期保留这些对象(如缓存以备重用),服务器所使用的内存可能会增加。我们提供了选项 `decoder.CopyString()` 供用户选择,不引用 JSON 缓冲区。这可能在一定程度上降低 CPU 性能。 ### 传递字符串还是字节数组? + 为了和 `encoding/json` 保持一致,我们提供了传递 `[]byte` 作为参数的 API ,但考虑到安全性,字符串到字节的复制是同时进行的,这在原始 JSON 非常大时可能会导致性能损失。因此,你可以使用 `UnmarshalString()` 和 `GetFromString()` 来传递字符串,只要你的原始数据是字符串,或**零拷贝类型转换**对于你的字节数组是安全的。我们也提供了 `MarshalString()` 的 API ,以便对编码的 JSON 字节数组进行**零拷贝类型转换**,因为 sonic 输出的字节始终是重复并且唯一的,所以这样是安全的。 ### 加速 `encoding.TextMarshaler` 为了保证数据安全性, `sonic.Encoder` 默认会对来自 `encoding.TextMarshaler` 接口的字符串进行引用和转义,如果大部分数据都是这种形式那可能会导致很大的性能损失。我们提供了 `encoder.NoQuoteTextMarshaler` 选项来跳过这些操作,但你**必须**保证他们的输出字符串依照 [RFC8259](https://datatracker.ietf.org/doc/html/rfc8259) 进行了转义和引用。 - ### 泛型的性能优化 在 **完全解析**的场景下, `Unmarshal()` 表现得比 `Get()`+`Node.Interface()` 更好。但是如果你只有特定 JSON 的部分模式,你可以将 `Get()` 和 `Unmarshal()` 结合使用: + ```go import "github.com/bytedance/sonic" @@ -398,7 +444,9 @@ node, err := sonic.GetFromString(_TwitterJson, "statuses", 3, "user") var user User // your partial schema... err = sonic.UnmarshalString(node.Raw(), &user) ``` + 甚至如果你没有任何模式,可以用 `ast.Node` 代替 `map` 或 `interface` 作为泛型的容器: + ```go import "github.com/bytedance/sonic" @@ -409,7 +457,9 @@ err = user.Check() // err = user.LoadAll() // only call this when you want to use 'user' concurrently... go someFunc(user) ``` + 为什么?因为 `ast.Node` 使用 `array` 来存储其子节点: + - 在插入(反序列化)和扫描(序列化)数据时,`Array` 的性能比 `Map` **好得多**; - **哈希**(`map[x]`)的效率不如**索引**(`array[x]`)高效,而 `ast.Node` 可以在数组和对象上使用索引; - 使用 `Interface()` / `Map()` 意味着 sonic 必须解析所有的底层值,而 `ast.Node` 可以**按需解析**它们。 @@ -417,6 +467,7 @@ go someFunc(user) **注意**:由于 `ast.Node` 的惰性加载设计,其**不能**直接保证并发安全性,但你可以调用 `Node.Load()` / `Node.LoadAll()` 来实现并发安全。尽管可能会带来性能损失,但仍比转换成 `map` 或 `interface{}` 更为高效。 ### 使用 `ast.Node` 还是 `ast.Visitor`? + 对于泛型数据的解析,`ast.Node` 在大多数场景上应该能够满足你的需求。 然而,`ast.Node` 是一种针对部分解析 JSON 而设计的泛型容器,它包含一些特殊设计,比如惰性加载,如果你希望像 `Unmarshal()` 那样直接解析整个 JSON,这些设计可能并不合适。尽管 `ast.Node` 相较于 `map` 或 `interface{}` 来说是更好的一种泛型容器,但它毕竟也是一种中间表示,如果你的最终类型是自定义的,你还得在解析完成后将上述类型转化成你自定义的类型。 @@ -425,6 +476,18 @@ go someFunc(user) 但是,`ast.Visitor` 并不是一个很易用的 API。你可能需要写大量的代码去实现自己的 `ast.Visitor`,并且需要在解析过程中仔细维护树的层级。如果你决定要使用这个 API,请先仔细阅读 [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) 中的注释。 +### 缓冲区大小 + +Sonic在许多地方使用内存池,如`encoder.Encode`, `ast.Node.MarshalJSON`等来提高性能,这可能会在服务器负载高时产生更多的内存使用(in-use)。参见[issue 614](https://github.com/bytedance/sonic/issues/614)。因此,我们引入了一些选项来让用户配置内存池的行为。参见[option](https://pkg.go.dev/github.com/bytedance/sonic@v1.11.9/option#pkg-variables)包。 + +### 更快的 JSON Skip + +为了安全起见,在跳过原始JSON 时,sonic decoder 默认使用[FSM](native/skip_one.c)算法扫描来跳过同时校验 JSON。它相比[SIMD-searching-pair](native/skip_one_fast.c)算法跳过要慢得多(1~10倍)。如果用户有很多冗余的JSON值,并且不需要严格验证JSON的正确性,你可以启用以下选项: + +- `Config.NoValidateSkipJSON`: 用于在解码时更快地跳过JSON,例如未知字段,`json.RawMessage`,不匹配的值和冗余的数组元素等 +- `Config.NoValidateJSONMarshaler`: 编码JSON时避免验证JSON。封送拆收器 +- `SearchOption.ValidateJSON`: 指示当`Get`时是否验证定位的JSON值 + ## 社区 Sonic 是 [CloudWeGo](https://www.cloudwego.io/) 下的一个子项目。我们致力于构建云原生生态系统。 diff --git a/vendor/github.com/bytedance/sonic/api.go b/vendor/github.com/bytedance/sonic/api.go index fa738f21..af6be70a 100644 --- a/vendor/github.com/bytedance/sonic/api.go +++ b/vendor/github.com/bytedance/sonic/api.go @@ -20,8 +20,19 @@ import ( `io` `github.com/bytedance/sonic/ast` + `github.com/bytedance/sonic/internal/rt` ) +const ( + // UseStdJSON indicates you are using fallback implementation (encoding/json) + UseStdJSON = iota + // UseSonicJSON indicates you are using real sonic implementation + UseSonicJSON +) + +// APIKind is the kind of API, 0 is std json, 1 is sonic. +const APIKind = apiKind + // Config is a combination of sonic/encoder.Options and sonic/decoder.Options type Config struct { // EscapeHTML indicates encoder to escape all HTML characters @@ -68,11 +79,25 @@ type Config struct { // ValidateString indicates decoder and encoder to valid string values: decoder will return errors // when unescaped control chars(\u0000-\u001f) in the string value of JSON. - ValidateString bool + ValidateString bool + + // NoValidateJSONMarshaler indicates that the encoder should not validate the output string + // after encoding the JSONMarshaler to JSON. + NoValidateJSONMarshaler bool + + // NoValidateJSONSkip indicates the decoder should not validate the JSON value when skipping it, + // such as unknown-fields, mismatched-type, redundant elements.. + NoValidateJSONSkip bool + + // NoEncoderNewline indicates that the encoder should not add a newline after every message + NoEncoderNewline bool + + // Encode Infinity or Nan float into `null`, instead of returning an error. + EncodeNullForInfOrNan bool } var ( - // ConfigDefault is the default config of APIs, aiming at efficiency and safty. + // ConfigDefault is the default config of APIs, aiming at efficiency and safety. ConfigDefault = Config{}.Froze() // ConfigStd is the standard config of APIs, aiming at being compatible with encoding/json. @@ -87,6 +112,8 @@ var ( // ConfigFastest is the fastest config of APIs, aiming at speed. ConfigFastest = Config{ NoQuoteTextMarshaler: true, + NoValidateJSONMarshaler: true, + NoValidateJSONSkip: true, }.Froze() ) @@ -109,7 +136,7 @@ type API interface { NewEncoder(writer io.Writer) Encoder // NewDecoder create a Decoder holding reader NewDecoder(reader io.Reader) Decoder - // Valid validates the JSON-encoded bytes and reportes if it is valid + // Valid validates the JSON-encoded bytes and reports if it is valid Valid(data []byte) bool } @@ -148,6 +175,13 @@ func Marshal(val interface{}) ([]byte, error) { return ConfigDefault.Marshal(val) } +// MarshalIndent is like Marshal but applies Indent to format the output. +// Each JSON element in the output will begin on a new line beginning with prefix +// followed by one or more copies of indent according to the indentation nesting. +func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) { + return ConfigDefault.MarshalIndent(v, prefix, indent) +} + // MarshalString returns the JSON encoding string of v. func MarshalString(val interface{}) (string, error) { return ConfigDefault.MarshalToString(val) @@ -165,27 +199,49 @@ func UnmarshalString(buf string, val interface{}) error { return ConfigDefault.UnmarshalFromString(buf, val) } -// Get searches the given path from json, -// and returns its representing ast.Node. +// Get searches and locates the given path from src json, +// and returns a ast.Node representing the partially json. // // Each path arg must be integer or string: // - Integer is target index(>=0), means searching current node as array. // - String is target key, means searching current node as object. // // -// Note, the api expects the json is well-formed at least, -// otherwise it may return unexpected result. +// Notice: It expects the src json is **Well-formed** and **Immutable** when calling, +// otherwise it may return unexpected result. +// Considering memory safety, the returned JSON is **Copied** from the input func Get(src []byte, path ...interface{}) (ast.Node, error) { - return GetFromString(string(src), path...) + return GetCopyFromString(rt.Mem2Str(src), path...) } -// GetFromString is same with Get except src is string, -// which can reduce unnecessary memory copy. +//GetWithOptions searches and locates the given path from src json, +// with specific options of ast.Searcher +func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) { + s := ast.NewSearcher(rt.Mem2Str(src)) + s.SearchOptions = opts + return s.GetByPath(path...) +} + +// GetFromString is same with Get except src is string. +// +// WARNING: The returned JSON is **Referenced** from the input. +// Caching or long-time holding the returned node may cause OOM. +// If your src is big, consider use GetFromStringCopy(). func GetFromString(src string, path ...interface{}) (ast.Node, error) { return ast.NewSearcher(src).GetByPath(path...) } +// GetCopyFromString is same with Get except src is string +func GetCopyFromString(src string, path ...interface{}) (ast.Node, error) { + return ast.NewSearcher(src).GetByPathCopy(path...) +} + // Valid reports whether data is a valid JSON encoding. func Valid(data []byte) bool { return ConfigDefault.Valid(data) } + +// Valid reports whether data is a valid JSON encoding. +func ValidString(data string) bool { + return ConfigDefault.Valid(rt.Str2Mem(data)) +} diff --git a/vendor/github.com/bytedance/sonic/ast/api_amd64.go b/vendor/github.com/bytedance/sonic/ast/api.go similarity index 71% rename from vendor/github.com/bytedance/sonic/ast/api_amd64.go rename to vendor/github.com/bytedance/sonic/ast/api.go index da6738ef..7c8253aa 100644 --- a/vendor/github.com/bytedance/sonic/ast/api_amd64.go +++ b/vendor/github.com/bytedance/sonic/ast/api.go @@ -1,4 +1,5 @@ -// +build amd64,go1.16,!go1.22 +//go:build (amd64 && go1.17 && !go1.24) || (arm64 && go1.20 && !go1.24) +// +build amd64,go1.17,!go1.24 arm64,go1.20,!go1.24 /* * Copyright 2022 ByteDance Inc. @@ -27,7 +28,7 @@ import ( `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` uq `github.com/bytedance/sonic/unquote` - `github.com/chenzhuoyu/base64x` + `github.com/bytedance/sonic/utf8` ) var typeByte = rt.UnpackEface(byte(0)).Type @@ -60,7 +61,7 @@ func quote(buf *[]byte, val string) { } // double buf size - *b = growslice(typeByte, *b, b.Cap*2) + *b = rt.GrowSlice(typeByte, *b, b.Cap*2) // ret is the complement of consumed input ret = ^ret // update input buffer @@ -77,14 +78,6 @@ func unquote(src string) (string, types.ParsingError) { return uq.String(src) } -func decodeBase64(src string) ([]byte, error) { - return base64x.StdEncoding.DecodeString(src) -} - -func encodeBase64(src []byte) string { - return base64x.StdEncoding.EncodeToString(src) -} - func (self *Parser) decodeValue() (val types.JsonState) { sv := (*rt.GoString)(unsafe.Pointer(&self.s)) flag := types.F_USE_NUMBER @@ -110,7 +103,7 @@ func (self *Parser) skip() (int, types.ParsingError) { func (self *Node) encodeInterface(buf *[]byte) error { //WARN: NOT compatible with json.Encoder - return encoder.EncodeInto(buf, self.packAny(), 0) + return encoder.EncodeInto(buf, self.packAny(), encoder.NoEncoderNewline) } func (self *Parser) skipFast() (int, types.ParsingError) { @@ -121,10 +114,15 @@ func (self *Parser) skipFast() (int, types.ParsingError) { return start, 0 } -func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) { - fsm := types.NewStateMachine() +func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) { + var fsm *types.StateMachine + if validate { + fsm = types.NewStateMachine() + } start := native.GetByPath(&self.s, &self.p, &path, fsm) - types.FreeStateMachine(fsm) + if validate { + types.FreeStateMachine(fsm) + } runtime.KeepAlive(path) if start < 0 { return self.p, types.ParsingError(-start) @@ -132,26 +130,6 @@ func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) { return start, 0 } -func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { - var err types.ParsingError - var start int - - self.parser.p = 0 - start, err = self.parser.getByPath(path...) - if err != 0 { - // for compatibility with old version - if err == types.ERR_NOT_FOUND { - return Node{}, ErrNotExist - } - if err == types.ERR_UNSUPPORT_TYPE { - panic("path must be either int(>=0) or string") - } - return Node{}, self.parser.syntaxError(err) - } - - t := switchRawType(self.parser.s[start]) - if t == _V_NONE { - return Node{}, self.parser.ExportError(err) - } - return newRawNode(self.parser.s[start:self.parser.p], t), nil +func validate_utf8(str string) bool { + return utf8.ValidateString(str) } diff --git a/vendor/github.com/bytedance/sonic/ast/api_compat.go b/vendor/github.com/bytedance/sonic/ast/api_compat.go index 7b475eb6..a349afc0 100644 --- a/vendor/github.com/bytedance/sonic/ast/api_compat.go +++ b/vendor/github.com/bytedance/sonic/ast/api_compat.go @@ -1,40 +1,40 @@ -// +build !amd64 !go1.16 go1.22 +// +build !amd64,!arm64 go1.24 !go1.17 arm64,!go1.20 /* - * Copyright 2022 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +* Copyright 2022 ByteDance Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ package ast import ( - `encoding/base64` `encoding/json` - `fmt` + `unicode/utf8` `github.com/bytedance/sonic/internal/native/types` `github.com/bytedance/sonic/internal/rt` ) func init() { - println("WARNING: sonic only supports Go1.16~1.20 && CPU amd64, but your environment is not suitable") + println("WARNING:(ast) sonic only supports go1.17~1.23, but your environment is not suitable") } func quote(buf *[]byte, val string) { quoteString(buf, val) } +// unquote unescapes a internal JSON string (it doesn't count quotas at the begining and end) func unquote(src string) (string, types.ParsingError) { sp := rt.IndexChar(src, -1) out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2)) @@ -44,13 +44,6 @@ func unquote(src string) (string, types.ParsingError) { return rt.Mem2Str(out), 0 } -func decodeBase64(src string) ([]byte, error) { - return base64.StdEncoding.DecodeString(src) -} - -func encodeBase64(src []byte) string { - return base64.StdEncoding.EncodeToString(src) -} func (self *Parser) decodeValue() (val types.JsonState) { e, v := decodeValue(self.s, self.p, self.dbuf == nil) @@ -88,37 +81,34 @@ func (self *Node) encodeInterface(buf *[]byte) error { return nil } -func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { - self.parser.p = 0 - - var err types.ParsingError +func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) { for _, p := range path { if idx, ok := p.(int); ok && idx >= 0 { - if err = self.parser.searchIndex(idx); err != 0 { - return Node{}, self.parser.ExportError(err) + if err := self.searchIndex(idx); err != 0 { + return self.p, err } } else if key, ok := p.(string); ok { - if err = self.parser.searchKey(key); err != 0 { - return Node{}, self.parser.ExportError(err) + if err := self.searchKey(key); err != 0 { + return self.p, err } } else { panic("path must be either int(>=0) or string") } } - var start = self.parser.p - if start, err = self.parser.skip(); err != 0 { - return Node{}, self.parser.ExportError(err) + var start int + var e types.ParsingError + if validate { + start, e = self.skip() + } else { + start, e = self.skipFast() } - ns := len(self.parser.s) - if self.parser.p > ns || start >= ns || start>=self.parser.p { - return Node{}, fmt.Errorf("skip %d char out of json boundary", start) - } - - t := switchRawType(self.parser.s[start]) - if t == _V_NONE { - return Node{}, self.parser.ExportError(err) + if e != 0 { + return self.p, e } + return start, 0 +} - return newRawNode(self.parser.s[start:self.parser.p], t), nil -} \ No newline at end of file +func validate_utf8(str string) bool { + return utf8.ValidString(str) +} diff --git a/vendor/github.com/bytedance/sonic/ast/buffer.go b/vendor/github.com/bytedance/sonic/ast/buffer.go index 93f4ff47..04701ef5 100644 --- a/vendor/github.com/bytedance/sonic/ast/buffer.go +++ b/vendor/github.com/bytedance/sonic/ast/buffer.go @@ -17,8 +17,10 @@ package ast import ( - `sort` - `unsafe` + "sort" + "unsafe" + + "github.com/bytedance/sonic/internal/caching" ) type nodeChunk [_DEFAULT_NODE_CAP]Node @@ -58,29 +60,82 @@ func (self *linkedNodes) At(i int) (*Node) { return nil } -func (self *linkedNodes) Add(v Node) { - if self.size < _DEFAULT_NODE_CAP { - self.head[self.size] = v - self.size++ +func (self *linkedNodes) MoveOne(source int, target int) { + if source == target { + return + } + if source < 0 || source >= self.size || target < 0 || target >= self.size { + return + } + // reserve source + n := *self.At(source) + if source < target { + // move every element (source,target] one step back + for i:=source; itarget; i-- { + *self.At(i) = *self.At(i-1) + } + } + // set target + *self.At(target) = n +} + +func (self *linkedNodes) Pop() { + if self == nil || self.size == 0 { + return + } + self.Set(self.size-1, Node{}) + self.size-- +} + +func (self *linkedNodes) Push(v Node) { + self.Set(self.size, v) +} + + +func (self *linkedNodes) Set(i int, v Node) { + if i < _DEFAULT_NODE_CAP { + self.head[i] = v + if self.size <= i { + self.size = i+1 + } return } + a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP + if a < 0 { + self.head[b] = v + } else { + self.growTailLength(a+1) + var n = &self.tail[a] + if *n == nil { + *n = new(nodeChunk) + } + (*n)[b] = v + } + if self.size <= i { + self.size = i+1 + } +} - a, b, c := self.size/_DEFAULT_NODE_CAP-1 , self.size%_DEFAULT_NODE_CAP, cap(self.tail) - if a - c >= 0 { +func (self *linkedNodes) growTailLength(l int) { + if l <= len(self.tail) { + return + } + c := cap(self.tail) + for c < l { c += 1 + c>>_APPEND_GROW_SHIFT - tmp := make([]*nodeChunk, a + 1, c) - copy(tmp, self.tail) - self.tail = tmp - } else if a >= len(self.tail) { - self.tail = self.tail[:a+1] } - - var n = &self.tail[a] - if *n == nil { - *n = new(nodeChunk) + if c == cap(self.tail) { + self.tail = self.tail[:l] + return } - (*n)[b] = v - self.size++ + tmp := make([]*nodeChunk, l, c) + copy(tmp, self.tail) + self.tail = tmp } func (self *linkedNodes) ToSlice(con []Node) { @@ -135,11 +190,22 @@ func (self *linkedNodes) FromSlice(con []Node) { type pairChunk [_DEFAULT_NODE_CAP]Pair type linkedPairs struct { + index map[uint64]int head pairChunk tail []*pairChunk size int } +func (self *linkedPairs) BuildIndex() { + if self.index == nil { + self.index = make(map[uint64]int, self.size) + } + for i:=0; i= 0 { - c += 1 + c>>_APPEND_GROW_SHIFT - tmp := make([]*pairChunk, a + 1, c) - copy(tmp, self.tail) - self.tail = tmp - } else if a >= len(self.tail) { - self.tail = self.tail[:a+1] +func (self *linkedPairs) Unset(i int) { + if self.index != nil { + p := self.At(i) + delete(self.index, p.hash) } + self.set(i, Pair{}) +} - var n = &self.tail[a] - if *n == nil { - *n = new(pairChunk) +func (self *linkedPairs) Set(i int, v Pair) { + if self.index != nil { + h := v.hash + self.index[h] = i } - (*n)[b] = v - self.size++ + self.set(i, v) +} + +func (self *linkedPairs) set(i int, v Pair) { + if i < _DEFAULT_NODE_CAP { + self.head[i] = v + if self.size <= i { + self.size = i+1 + } + return + } + a, b := i/_DEFAULT_NODE_CAP-1, i%_DEFAULT_NODE_CAP + if a < 0 { + self.head[b] = v + } else { + self.growTailLength(a+1) + var n = &self.tail[a] + if *n == nil { + *n = new(pairChunk) + } + (*n)[b] = v + } + if self.size <= i { + self.size = i+1 + } +} + +func (self *linkedPairs) growTailLength(l int) { + if l <= len(self.tail) { + return + } + c := cap(self.tail) + for c < l { + c += 1 + c>>_APPEND_GROW_SHIFT + } + if c == cap(self.tail) { + self.tail = self.tail[:l] + return + } + tmp := make([]*pairChunk, l, c) + copy(tmp, self.tail) + self.tail = tmp } // linear search func (self *linkedPairs) Get(key string) (*Pair, int) { + if self.index != nil { + // fast-path + i, ok := self.index[caching.StrHash(key)] + if ok { + n := self.At(i) + if n.Key == key { + return n, i + } + // hash conflicts + goto linear_search + } else { + return nil, -1 + } + } +linear_search: for i:=0; i len(src) { return -int(types.ERR_EOF) } - if src[pos:ret] == bytesNull { + if src[pos:ret] == strNull { return ret } else { return -int(types.ERR_INVALID_CHAR) @@ -583,3 +586,36 @@ func skipArray(src string, pos int) (ret int, start int) { pos++ } } + +// DecodeString decodes a JSON string from pos and return golang string. +// - needEsc indicates if to unescaped escaping chars +// - hasEsc tells if the returned string has escaping chars +// - validStr enables validating UTF8 charset +// +func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) { + p := NewParserObj(src) + p.p = pos + switch val := p.decodeValue(); val.Vt { + case types.V_STRING: + str := p.s[val.Iv : p.p-1] + if validStr && !validate_utf8(str) { + return "", -int(types.ERR_INVALID_UTF8), false + } + /* fast path: no escape sequence */ + if val.Ep == -1 { + return str, p.p, false + } else if !needEsc { + return str, p.p, true + } + /* unquote the string */ + out, err := unquote(str) + /* check for errors */ + if err != 0 { + return "", -int(err), true + } else { + return out, p.p, true + } + default: + return "", -int(_ERR_UNSUPPORT_TYPE), false + } +} diff --git a/vendor/github.com/bytedance/sonic/ast/encode.go b/vendor/github.com/bytedance/sonic/ast/encode.go index 263ae5a9..eae0bd25 100644 --- a/vendor/github.com/bytedance/sonic/ast/encode.go +++ b/vendor/github.com/bytedance/sonic/ast/encode.go @@ -17,12 +17,11 @@ package ast import ( - `sync` - `unicode/utf8` -) + "sync" + "unicode/utf8" -const ( - _MaxBuffer = 1024 // 1KB buffer size + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/option" ) func quoteString(e *[]byte, s string) { @@ -30,7 +29,7 @@ func quoteString(e *[]byte, s string) { start := 0 for i := 0; i < len(s); { if b := s[i]; b < utf8.RuneSelf { - if safeSet[b] { + if rt.SafeSet[b] { i++ continue } @@ -54,8 +53,8 @@ func quoteString(e *[]byte, s string) { // user-controlled strings are rendered into JSON // and served to some browsers. *e = append(*e, `u00`...) - *e = append(*e, hex[b>>4]) - *e = append(*e, hex[b&0xF]) + *e = append(*e, rt.Hex[b>>4]) + *e = append(*e, rt.Hex[b&0xF]) } i++ start = i @@ -76,7 +75,7 @@ func quoteString(e *[]byte, s string) { *e = append(*e, s[start:i]...) } *e = append(*e, `\u202`...) - *e = append(*e, hex[c&0xF]) + *e = append(*e, rt.Hex[c&0xF]) i += size start = i continue @@ -92,16 +91,24 @@ func quoteString(e *[]byte, s string) { var bytesPool = sync.Pool{} func (self *Node) MarshalJSON() ([]byte, error) { + if self == nil { + return bytesNull, nil + } + buf := newBuffer() err := self.encode(buf) if err != nil { freeBuffer(buf) return nil, err } - - ret := make([]byte, len(*buf)) - copy(ret, *buf) - freeBuffer(buf) + var ret []byte + if !rt.CanSizeResue(cap(*buf)) { + ret = *buf + } else { + ret = make([]byte, len(*buf)) + copy(ret, *buf) + freeBuffer(buf) + } return ret, err } @@ -109,21 +116,24 @@ func newBuffer() *[]byte { if ret := bytesPool.Get(); ret != nil { return ret.(*[]byte) } else { - buf := make([]byte, 0, _MaxBuffer) + buf := make([]byte, 0, option.DefaultAstBufferSize) return &buf } } func freeBuffer(buf *[]byte) { + if !rt.CanSizeResue(cap(*buf)) { + return + } *buf = (*buf)[:0] bytesPool.Put(buf) } func (self *Node) encode(buf *[]byte) error { - if self.IsRaw() { + if self.isRaw() { return self.encodeRaw(buf) } - switch self.Type() { + switch int(self.itype()) { case V_NONE : return ErrNotExist case V_ERROR : return self.Check() case V_NULL : return self.encodeNull(buf) @@ -139,16 +149,21 @@ func (self *Node) encode(buf *[]byte) error { } func (self *Node) encodeRaw(buf *[]byte) error { - raw, err := self.Raw() - if err != nil { - return err + lock := self.rlock() + if !self.isRaw() { + self.runlock() + return self.encode(buf) + } + raw := self.toString() + if lock { + self.runlock() } *buf = append(*buf, raw...) return nil } func (self *Node) encodeNull(buf *[]byte) error { - *buf = append(*buf, bytesNull...) + *buf = append(*buf, strNull...) return nil } @@ -193,20 +208,9 @@ func (self *Node) encodeArray(buf *[]byte) error { *buf = append(*buf, '[') - var s = (*linkedNodes)(self.p) var started bool - if nb > 0 { - n := s.At(0) - if n.Exists() { - if err := n.encode(buf); err != nil { - return err - } - started = true - } - } - - for i := 1; i < nb; i++ { - n := s.At(i) + for i := 0; i < nb; i++ { + n := self.nodeAt(i) if !n.Exists() { continue } @@ -250,21 +254,10 @@ func (self *Node) encodeObject(buf *[]byte) error { *buf = append(*buf, '{') - var s = (*linkedPairs)(self.p) var started bool - if nb > 0 { - n := s.At(0) - if n.Value.Exists() { - if err := n.encode(buf); err != nil { - return err - } - started = true - } - } - - for i := 1; i < nb; i++ { - n := s.At(i) - if !n.Value.Exists() { + for i := 0; i < nb; i++ { + n := self.pairAt(i) + if n == nil || !n.Value.Exists() { continue } if started { diff --git a/vendor/github.com/bytedance/sonic/ast/error.go b/vendor/github.com/bytedance/sonic/ast/error.go index 00a04468..3716e7a9 100644 --- a/vendor/github.com/bytedance/sonic/ast/error.go +++ b/vendor/github.com/bytedance/sonic/ast/error.go @@ -17,6 +17,10 @@ func newError(err types.ParsingError, msg string) *Node { } } +func newErrorPair(err SyntaxError) *Pair { + return &Pair{0, "", *newSyntaxError(err)} +} + // Error returns error message if the node is invalid func (self Node) Error() string { if self.t != V_ERROR { @@ -79,7 +83,7 @@ func (self SyntaxError) description() string { /* check for empty source */ if self.Src == "" { - return fmt.Sprintf("no sources available: %#v", self) + return fmt.Sprintf("no sources available, the input json is empty: %#v", self) } /* prevent slicing before the beginning */ diff --git a/vendor/github.com/bytedance/sonic/ast/iterator.go b/vendor/github.com/bytedance/sonic/ast/iterator.go index 3c4187a9..07664715 100644 --- a/vendor/github.com/bytedance/sonic/ast/iterator.go +++ b/vendor/github.com/bytedance/sonic/ast/iterator.go @@ -17,30 +17,48 @@ package ast import ( - `fmt` + "fmt" - `github.com/bytedance/sonic/internal/native/types` + "github.com/bytedance/sonic/internal/caching" + "github.com/bytedance/sonic/internal/native/types" ) type Pair struct { + hash uint64 Key string Value Node } +func NewPair(key string, val Node) Pair { + return Pair{ + hash: caching.StrHash(key), + Key: key, + Value: val, + } +} + // Values returns iterator for array's children traversal func (self *Node) Values() (ListIterator, error) { - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return ListIterator{}, err } - return ListIterator{Iterator{p: self}}, nil + return self.values(), nil +} + +func (self *Node) values() ListIterator { + return ListIterator{Iterator{p: self}} } // Properties returns iterator for object's children traversal func (self *Node) Properties() (ObjectIterator, error) { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return ObjectIterator{}, err } - return ObjectIterator{Iterator{p: self}}, nil + return self.properties(), nil +} + +func (self *Node) properties() ObjectIterator { + return ObjectIterator{Iterator{p: self}} } type Iterator struct { @@ -114,7 +132,7 @@ next_start: } else { n := self.p.pairAt(self.i) self.i++ - if !n.Value.Exists() { + if n == nil || !n.Value.Exists() { goto next_start } return n @@ -160,6 +178,9 @@ type Scanner func(path Sequence, node *Node) bool // // NOTICE: A unsetted node WON'T trigger sc, but its index still counts into Path.Index func (self *Node) ForEach(sc Scanner) error { + if err := self.checkRaw(); err != nil { + return err + } switch self.itype() { case types.V_ARRAY: iter, err := self.Values() diff --git a/vendor/github.com/bytedance/sonic/ast/node.go b/vendor/github.com/bytedance/sonic/ast/node.go index 444c8fe4..0fbcf783 100644 --- a/vendor/github.com/bytedance/sonic/ast/node.go +++ b/vendor/github.com/bytedance/sonic/ast/node.go @@ -17,22 +17,15 @@ package ast import ( - `encoding/json` - `fmt` - `strconv` - `unsafe` - `reflect` - - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` -) - -const ( - _CAP_BITS = 32 - _LEN_MASK = 1 << _CAP_BITS - 1 - - _NODE_SIZE = unsafe.Sizeof(Node{}) - _PAIR_SIZE = unsafe.Sizeof(Pair{}) + "encoding/json" + "fmt" + "strconv" + "sync" + "sync/atomic" + "unsafe" + + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" ) const ( @@ -45,7 +38,7 @@ const ( _V_ARRAY_LAZY = _V_LAZY | types.V_ARRAY _V_OBJECT_LAZY = _V_LAZY | types.V_OBJECT _MASK_LAZY = _V_LAZY - 1 - _MASK_RAW = _V_RAW - 1 + _MASK_RAW = _V_RAW - 1 ) const ( @@ -61,21 +54,18 @@ const ( V_ANY = int(_V_ANY) ) -var ( - byteType = rt.UnpackType(reflect.TypeOf(byte(0))) -) - type Node struct { t types.ValueType l uint p unsafe.Pointer + m *sync.RWMutex } // UnmarshalJSON is just an adapter to json.Unmarshaler. // If you want better performance, use Searcher.GetByPath() directly func (self *Node) UnmarshalJSON(data []byte) (err error) { - *self, err = NewSearcher(string(data)).GetByPath() - return + *self = NewRaw(string(data)) + return self.Check() } /** Node Type Accessor **/ @@ -92,17 +82,39 @@ func (self *Node) UnmarshalJSON(data []byte) (err error) { // V_STRING = 7 (json value string) // V_NUMBER = 33 (json value number ) // V_ANY = 34 (golang interface{}) +// +// Deprecated: not concurrent safe. Use TypeSafe instead func (self Node) Type() int { return int(self.t & _MASK_LAZY & _MASK_RAW) } -func (self Node) itype() types.ValueType { +// Type concurrently-safe returns json type represented by the node +// It will be one of belows: +// V_NONE = 0 (empty node, key not exists) +// V_ERROR = 1 (error node) +// V_NULL = 2 (json value `null`, key exists) +// V_TRUE = 3 (json value `true`) +// V_FALSE = 4 (json value `false`) +// V_ARRAY = 5 (json value array) +// V_OBJECT = 6 (json value object) +// V_STRING = 7 (json value string) +// V_NUMBER = 33 (json value number ) +// V_ANY = 34 (golang interface{}) +func (self *Node) TypeSafe() int { + return int(self.loadt() & _MASK_LAZY & _MASK_RAW) +} + +func (self *Node) itype() types.ValueType { return self.t & _MASK_LAZY & _MASK_RAW } // Exists returns false only if the self is nil or empty node V_NONE func (self *Node) Exists() bool { - return self.Valid() && self.t != _V_NONE + if self == nil { + return false + } + t := self.loadt() + return t != V_ERROR && t != _V_NONE } // Valid reports if self is NOT V_ERROR or nil @@ -110,7 +122,7 @@ func (self *Node) Valid() bool { if self == nil { return false } - return self.t != V_ERROR + return self.loadt() != V_ERROR } // Check checks if the node itself is valid, and return: @@ -119,45 +131,63 @@ func (self *Node) Valid() bool { func (self *Node) Check() error { if self == nil { return ErrNotExist - } else if self.t != V_ERROR { + } else if self.loadt() != V_ERROR { return nil } else { return self } } -// IsRaw returns true if node's underlying value is raw json +// isRaw returns true if node's underlying value is raw json +// +// Deprecated: not concurent safe func (self Node) IsRaw() bool { - return self.t&_V_RAW != 0 + return self.t & _V_RAW != 0 +} + +// IsRaw returns true if node's underlying value is raw json +func (self *Node) isRaw() bool { + return self.loadt() & _V_RAW != 0 } func (self *Node) isLazy() bool { - return self != nil && self.t&_V_LAZY != 0 + return self != nil && self.t & _V_LAZY != 0 } func (self *Node) isAny() bool { - return self != nil && self.t == _V_ANY + return self != nil && self.loadt() == _V_ANY } /** Simple Value Methods **/ // Raw returns json representation of the node, func (self *Node) Raw() (string, error) { - if !self.IsRaw() { + if self == nil { + return "", ErrNotExist + } + lock := self.rlock() + if !self.isRaw() { + if lock { + self.runlock() + } buf, err := self.MarshalJSON() return rt.Mem2Str(buf), err } - return self.toString(), nil + ret := self.toString() + if lock { + self.runlock() + } + return ret, nil } func (self *Node) checkRaw() error { if err := self.Check(); err != nil { return err } - if self.IsRaw() { + if self.isRaw() { self.parseRaw(false) } - return nil + return self.Check() } // Bool returns bool value represented by this node, @@ -501,7 +531,6 @@ func (self *Node) StrictFloat64() (float64, error) { // Len returns children count of a array|object|string node // WARN: For partially loaded node, it also works but only counts the parsed children -// WARN: For ARRAY|OBJECT nodes which has been conducted `UnsetXX()`, its length WON'T change func (self *Node) Len() (int, error) { if err := self.checkRaw(); err != nil { return 0, err @@ -515,7 +544,7 @@ func (self *Node) Len() (int, error) { } } -func (self Node) len() int { +func (self *Node) len() int { return int(self.l) } @@ -538,14 +567,19 @@ func (self *Node) Cap() (int, error) { // // If self is V_NONE or V_NULL, it becomes V_OBJECT and sets the node at the key. func (self *Node) Set(key string, node Node) (bool, error) { - if self != nil && (self.t == _V_NONE || self.t == types.V_NULL) { - *self = NewObject([]Pair{{key, node}}) - return false, nil + if err := self.checkRaw(); err != nil { + return false, err } - if err := node.Check(); err != nil { return false, err } + + if self.t == _V_NONE || self.t == types.V_NULL { + *self = NewObject([]Pair{NewPair(key, node)}) + return false, nil + } else if self.itype() != types.V_OBJECT { + return false, ErrUnsupportType + } p := self.Get(key) @@ -555,7 +589,7 @@ func (self *Node) Set(key string, node Node) (bool, error) { *self = newObject(new(linkedPairs)) } s := (*linkedPairs)(self.p) - s.Add(Pair{key, node}) + s.Push(NewPair(key, node)) self.l++ return false, nil @@ -572,18 +606,22 @@ func (self *Node) SetAny(key string, val interface{}) (bool, error) { return self.Set(key, NewAny(val)) } -// Unset RESET the node of given key under object parent, and reports if the key has existed. -// WARN: After conducting `UnsetXX()`, the node's length WON'T change +// Unset REMOVE (soft) the node of given key under object parent, and reports if the key has existed. func (self *Node) Unset(key string) (bool, error) { - self.must(types.V_OBJECT, "an object") + if err := self.should(types.V_OBJECT); err != nil { + return false, err + } + // NOTICE: must get acurate length before deduct + if err := self.skipAllKey(); err != nil { + return false, err + } p, i := self.skipKey(key) if !p.Exists() { return false, nil } else if err := p.Check(); err != nil { return false, err } - - self.removePair(i) + self.removePairAt(i) return true, nil } @@ -591,10 +629,18 @@ func (self *Node) Unset(key string) (bool, error) { // // The index must be within self's children. func (self *Node) SetByIndex(index int, node Node) (bool, error) { + if err := self.checkRaw(); err != nil { + return false, err + } if err := node.Check(); err != nil { return false, err } + if index == 0 && (self.t == _V_NONE || self.t == types.V_NULL) { + *self = NewArray([]Node{node}) + return false, nil + } + p := self.Index(index) if !p.Exists() { return false, ErrNotExist @@ -611,18 +657,28 @@ func (self *Node) SetAnyByIndex(index int, val interface{}) (bool, error) { return self.SetByIndex(index, NewAny(val)) } -// UnsetByIndex remove the node of given index -// WARN: After conducting `UnsetXX()`, the node's length WON'T change +// UnsetByIndex REOMVE (softly) the node of given index. +// +// WARN: this will change address of elements, which is a dangerous action. +// Use Unset() for object or Pop() for array instead. func (self *Node) UnsetByIndex(index int) (bool, error) { + if err := self.checkRaw(); err != nil { + return false, err + } + var p *Node it := self.itype() + if it == types.V_ARRAY { - p = self.Index(index) - }else if it == types.V_OBJECT { - if err := self.checkRaw(); err != nil { + if err := self.skipAllIndex(); err != nil { + return false, err + } + p = self.nodeAt(index) + } else if it == types.V_OBJECT { + if err := self.skipAllKey(); err != nil { return false, err } - pr := self.skipIndexPair(index) + pr := self.pairAt(index) if pr == nil { return false, ErrNotExist } @@ -635,6 +691,12 @@ func (self *Node) UnsetByIndex(index int) (bool, error) { return false, ErrNotExist } + // last elem + if index == self.len() - 1 { + return true, self.Pop() + } + + // not last elem, self.len() change but linked-chunk not change if it == types.V_ARRAY { self.removeNode(index) }else if it == types.V_OBJECT { @@ -647,21 +709,112 @@ func (self *Node) UnsetByIndex(index int) (bool, error) { // // If self is V_NONE or V_NULL, it becomes V_ARRAY and sets the node at index 0. func (self *Node) Add(node Node) error { + if err := self.checkRaw(); err != nil { + return err + } + if self != nil && (self.t == _V_NONE || self.t == types.V_NULL) { *self = NewArray([]Node{node}) return nil } + if err := self.should(types.V_ARRAY); err != nil { + return err + } s, err := self.unsafeArray() if err != nil { return err } - s.Add(node) + // Notice: array won't have unset node in tail + s.Push(node) self.l++ return nil } +// Pop remove the last child of the V_Array or V_Object node. +func (self *Node) Pop() error { + if err := self.checkRaw(); err != nil { + return err + } + + if it := self.itype(); it == types.V_ARRAY { + s, err := self.unsafeArray() + if err != nil { + return err + } + // remove tail unset nodes + for i := s.Len()-1; i >= 0; i-- { + if s.At(i).Exists() { + s.Pop() + self.l-- + break + } + s.Pop() + } + + } else if it == types.V_OBJECT { + s, err := self.unsafeMap() + if err != nil { + return err + } + // remove tail unset nodes + for i := s.Len()-1; i >= 0; i-- { + if p := s.At(i); p != nil && p.Value.Exists() { + s.Pop() + self.l-- + break + } + s.Pop() + } + + } else { + return ErrUnsupportType + } + + return nil +} + +// Move moves the child at src index to dst index, +// meanwhile slides sliblings from src+1 to dst. +// +// WARN: this will change address of elements, which is a dangerous action. +func (self *Node) Move(dst, src int) error { + if err := self.should(types.V_ARRAY); err != nil { + return err + } + + s, err := self.unsafeArray() + if err != nil { + return err + } + + // check if any unset node exists + if l := s.Len(); self.len() != l { + di, si := dst, src + // find real pos of src and dst + for i := 0; i < l; i++ { + if s.At(i).Exists() { + di-- + si-- + } + if di == -1 { + dst = i + di-- + } + if si == -1 { + src = i + si-- + } + if di == -2 && si == -2 { + break + } + } + } + + s.MoveOne(src, dst) + return nil +} // SetAny wraps val with V_ANY node, and Add() the node. func (self *Node) AddAny(val interface{}) error { @@ -699,7 +852,7 @@ func (self *Node) GetByPath(path ...interface{}) *Node { // Get loads given key of an object node on demands func (self *Node) Get(key string) *Node { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return unwrapError(err) } n, _ := self.skipKey(key) @@ -708,8 +861,6 @@ func (self *Node) Get(key string) *Node { // Index indexies node at given idx, // node type CAN be either V_OBJECT or V_ARRAY -// WARN: After conducting `UnsetXX()`, the node's length WON'T change, -// thus its children's indexing WON'T change too func (self *Node) Index(idx int) *Node { if err := self.checkRaw(); err != nil { return unwrapError(err) @@ -733,28 +884,37 @@ func (self *Node) Index(idx int) *Node { // IndexPair indexies pair at given idx, // node type MUST be either V_OBJECT -// WARN: After conducting `UnsetXX()`, the node's length WON'T change, -// thus its children's indexing WON'T change too func (self *Node) IndexPair(idx int) *Pair { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil } return self.skipIndexPair(idx) } -// IndexOrGet firstly use idx to index a value and check if its key matches -// If not, then use the key to search value -func (self *Node) IndexOrGet(idx int, key string) *Node { - if err := self.should(types.V_OBJECT, "an object"); err != nil { - return unwrapError(err) +func (self *Node) indexOrGet(idx int, key string) (*Node, int) { + if err := self.should(types.V_OBJECT); err != nil { + return unwrapError(err), idx } pr := self.skipIndexPair(idx) if pr != nil && pr.Key == key { - return &pr.Value + return &pr.Value, idx } - n, _ := self.skipKey(key) - return n + + return self.skipKey(key) +} + +// IndexOrGet firstly use idx to index a value and check if its key matches +// If not, then use the key to search value +func (self *Node) IndexOrGet(idx int, key string) *Node { + node, _ := self.indexOrGet(idx, key) + return node +} + +// IndexOrGetWithIdx attempts to retrieve a node by index and key, returning the node and its correct index. +// If the key does not match at the given index, it searches by key and returns the node with its updated index. +func (self *Node) IndexOrGetWithIdx(idx int, key string) (*Node, int) { + return self.indexOrGet(idx, key) } /** Generic Value Converters **/ @@ -769,10 +929,10 @@ func (self *Node) Map() (map[string]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObject() @@ -788,10 +948,10 @@ func (self *Node) MapUseNumber() (map[string]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObjectUseNumber() @@ -808,7 +968,7 @@ func (self *Node) MapUseNode() (map[string]Node, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } if err := self.skipAllKey(); err != nil { @@ -831,10 +991,8 @@ func (self *Node) MapUseNode() (map[string]Node, error) { // return self.toGenericObjectUsePair() // } +//go:nocheckptr func (self *Node) unsafeMap() (*linkedPairs, error) { - if err := self.should(types.V_OBJECT, "an object"); err != nil { - return nil, err - } if err := self.skipAllKey(); err != nil { return nil, err } @@ -846,7 +1004,39 @@ func (self *Node) unsafeMap() (*linkedPairs, error) { // SortKeys sorts children of a V_OBJECT node in ascending key-order. // If recurse is true, it recursively sorts children's children as long as a V_OBJECT node is found. -func (self *Node) SortKeys(recurse bool) (err error) { +func (self *Node) SortKeys(recurse bool) error { + // check raw node first + if err := self.checkRaw(); err != nil { + return err + } + if self.itype() == types.V_OBJECT { + return self.sortKeys(recurse) + } else if self.itype() == types.V_ARRAY { + var err error + err2 := self.ForEach(func(path Sequence, node *Node) bool { + it := node.itype() + if it == types.V_ARRAY || it == types.V_OBJECT { + err = node.SortKeys(recurse) + if err != nil { + return false + } + } + return true + }) + if err != nil { + return err + } + return err2 + } else { + return nil + } +} + +func (self *Node) sortKeys(recurse bool) (err error) { + // check raw node first + if err := self.checkRaw(); err != nil { + return err + } ps, err := self.unsafeMap() if err != nil { return err @@ -856,7 +1046,7 @@ func (self *Node) SortKeys(recurse bool) (err error) { var sc Scanner sc = func(path Sequence, node *Node) bool { if node.itype() == types.V_OBJECT { - if err := node.SortKeys(recurse); err != nil { + if err := node.sortKeys(recurse); err != nil { return false } } @@ -867,7 +1057,9 @@ func (self *Node) SortKeys(recurse bool) (err error) { } return true } - self.ForEach(sc) + if err := self.ForEach(sc); err != nil { + return err + } } return nil } @@ -882,10 +1074,10 @@ func (self *Node) Array() ([]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArray() @@ -901,10 +1093,10 @@ func (self *Node) ArrayUseNumber() ([]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArrayUseNumber() @@ -921,7 +1113,7 @@ func (self *Node) ArrayUseNode() ([]Node, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } if err := self.skipAllIndex(); err != nil { @@ -946,9 +1138,6 @@ func (self *Node) ArrayUseNode() ([]Node, error) { // } func (self *Node) unsafeArray() (*linkedNodes, error) { - if err := self.should(types.V_ARRAY, "an array"); err != nil { - return nil, err - } if err := self.skipAllIndex(); err != nil { return nil, err } @@ -980,12 +1169,12 @@ func (self *Node) Interface() (interface{}, error) { } return v, nil case _V_ARRAY_LAZY : - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArray() case _V_OBJECT_LAZY : - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObject() @@ -1019,12 +1208,12 @@ func (self *Node) InterfaceUseNumber() (interface{}, error) { case types.V_STRING : return self.toString(), nil case _V_NUMBER : return self.toNumber(), nil case _V_ARRAY_LAZY : - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArrayUseNumber() case _V_OBJECT_LAZY : - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObjectUseNumber() @@ -1056,105 +1245,30 @@ func (self *Node) InterfaceUseNode() (interface{}, error) { } } -// LoadAll loads all the node's children and children's children as parsed. -// After calling it, the node can be safely used on concurrency +// LoadAll loads the node's children +// and ensure all its children can be READ concurrently (include its children's children) func (self *Node) LoadAll() error { - if self.IsRaw() { - self.parseRaw(true) - return self.Check() - } - - switch self.itype() { - case types.V_ARRAY: - e := self.len() - if err := self.loadAllIndex(); err != nil { - return err - } - for i := 0; i < e; i++ { - n := self.nodeAt(i) - if n.IsRaw() { - n.parseRaw(true) - } - if err := n.Check(); err != nil { - return err - } - } - return nil - case types.V_OBJECT: - e := self.len() - if err := self.loadAllKey(); err != nil { - return err - } - for i := 0; i < e; i++ { - n := self.pairAt(i) - if n.Value.IsRaw() { - n.Value.parseRaw(true) - } - if err := n.Value.Check(); err != nil { - return err - } - } - return nil - default: - return self.Check() - } + return self.Load() } // Load loads the node's children as parsed. -// After calling it, only the node itself can be used on concurrency (not include its children) +// and ensure all its children can be READ concurrently (include its children's children) func (self *Node) Load() error { - if self.IsRaw() { - self.parseRaw(false) - return self.Load() - } - switch self.t { - case _V_ARRAY_LAZY: - return self.skipAllIndex() - case _V_OBJECT_LAZY: - return self.skipAllKey() - default: - return self.Check() + case _V_ARRAY_LAZY: self.loadAllIndex(true) + case _V_OBJECT_LAZY: self.loadAllKey(true) + case V_ERROR: return self + case V_NONE: return nil + } + if self.m == nil { + self.m = new(sync.RWMutex) } + return self.checkRaw() } /**---------------------------------- Internal Helper Methods ----------------------------------**/ -var ( - _NODE_TYPE = rt.UnpackEface(Node{}).Type - _PAIR_TYPE = rt.UnpackEface(Pair{}).Type -) - -// func (self *Node) setCapAndLen(cap int, len int) { -// if self.t == types.V_ARRAY || self.t == types.V_OBJECT || self.t == _V_ARRAY_LAZY || self.t == _V_OBJECT_LAZY { -// self.l = uint32(len) -// self.c = uint32(cap) -// } else { -// panic("value does not have a length") -// } -// } - -func (self *Node) unsafe_next() *Node { - return (*Node)(unsafe.Pointer(uintptr(unsafe.Pointer(self)) + _NODE_SIZE)) -} - -func (self *Pair) unsafe_next() *Pair { - return (*Pair)(unsafe.Pointer(uintptr(unsafe.Pointer(self)) + _PAIR_SIZE)) -} - -func (self *Node) must(t types.ValueType, s string) { - if err := self.checkRaw(); err != nil { - panic(err) - } - if err := self.Check(); err != nil { - panic(err) - } - if self.itype() != t { - panic("value cannot be represented as " + s) - } -} - -func (self *Node) should(t types.ValueType, s string) error { +func (self *Node) should(t types.ValueType) error { if err := self.checkRaw(); err != nil { return err } @@ -1171,6 +1285,19 @@ func (self *Node) nodeAt(i int) *Node { p = &stack.v } else { p = (*linkedNodes)(self.p) + if l := p.Len(); l != self.len() { + // some nodes got unset, iterate to skip them + for j:=0; j _Threshold_Index { + v.BuildIndex() + } return Node{ t: types.V_OBJECT, l: uint(v.Len()), @@ -1640,53 +1802,42 @@ func newObject(v *linkedPairs) Node { } func (self *Node) setObject(v *linkedPairs) { + if v.size > _Threshold_Index { + v.BuildIndex() + } self.t = types.V_OBJECT self.l = uint(v.Len()) self.p = unsafe.Pointer(v) } -func newRawNode(str string, typ types.ValueType) Node { - return Node{ - t: _V_RAW | typ, - p: rt.StrPtr(str), - l: uint(len(str)), - } -} - func (self *Node) parseRaw(full bool) { + lock := self.lock() + defer self.unlock() + if !self.isRaw() { + return + } raw := self.toString() parser := NewParserObj(raw) + var e types.ParsingError if full { parser.noLazy = true - parser.skipValue = false + *self, e = parser.Parse() + } else if lock { + var n Node + parser.noLazy = true + parser.loadOnce = true + n, e = parser.Parse() + self.assign(n) + } else { + *self, e = parser.Parse() } - var e types.ParsingError - *self, e = parser.Parse() if e != 0 { *self = *newSyntaxError(parser.syntaxError(e)) } } -var typeJumpTable = [256]types.ValueType{ - '"' : types.V_STRING, - '-' : _V_NUMBER, - '0' : _V_NUMBER, - '1' : _V_NUMBER, - '2' : _V_NUMBER, - '3' : _V_NUMBER, - '4' : _V_NUMBER, - '5' : _V_NUMBER, - '6' : _V_NUMBER, - '7' : _V_NUMBER, - '8' : _V_NUMBER, - '9' : _V_NUMBER, - '[' : types.V_ARRAY, - 'f' : types.V_FALSE, - 'n' : types.V_NULL, - 't' : types.V_TRUE, - '{' : types.V_OBJECT, -} - -func switchRawType(c byte) types.ValueType { - return typeJumpTable[c] +func (self *Node) assign(n Node) { + self.l = n.l + self.p = n.p + atomic.StoreInt64(&self.t, n.t) } diff --git a/vendor/github.com/bytedance/sonic/ast/parser.go b/vendor/github.com/bytedance/sonic/ast/parser.go index cb16f20b..30bd1f45 100644 --- a/vendor/github.com/bytedance/sonic/ast/parser.go +++ b/vendor/github.com/bytedance/sonic/ast/parser.go @@ -17,14 +17,16 @@ package ast import ( - `fmt` + "fmt" + "sync" + "sync/atomic" - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" ) const ( - _DEFAULT_NODE_CAP int = 8 + _DEFAULT_NODE_CAP int = 16 _APPEND_GROW_SHIFT = 1 ) @@ -45,6 +47,7 @@ type Parser struct { p int s string noLazy bool + loadOnce bool skipValue bool dbuf *byte } @@ -115,6 +118,10 @@ func (self *Parser) lspace(sp int) int { return sp } +func (self *Parser) backward() { + for ; self.p >= 0 && isSpace(self.s[self.p]); self.p-=1 {} +} + func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { sp := self.p ns := len(self.s) @@ -148,7 +155,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) }else{ /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -157,7 +164,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { } /* add the value to result */ - ret.Add(val) + ret.Push(val) self.p = self.lspace(self.p) /* check for EOF */ @@ -234,7 +241,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) } else { /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -244,7 +251,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { /* add the value to result */ // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !! - ret.Add(Pair{Key: key, Value: val}) + ret.Push(NewPair(key, val)) self.p = self.lspace(self.p) /* check for EOF */ @@ -291,6 +298,10 @@ func (self *Parser) Pos() int { return self.p } + +// Parse returns a ast.Node representing the parser's JSON. +// NOTICE: the specific parsing lazy dependens parser's option +// It only parse first layer and first child for Object or Array be default func (self *Parser) Parse() (Node, types.ParsingError) { switch val := self.decodeValue(); val.Vt { case types.V_EOF : return Node{}, types.ERR_EOF @@ -299,22 +310,48 @@ func (self *Parser) Parse() (Node, types.ParsingError) { case types.V_FALSE : return falseNode, 0 case types.V_STRING : return self.decodeString(val.Iv, val.Ep) case types.V_ARRAY: + s := self.p - 1; if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' { self.p = p + 1 return Node{t: types.V_ARRAY}, 0 } if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeArray(new(linkedNodes)) } + // NOTICE: loadOnce always keep raw json for object or array + if self.loadOnce { + self.p = s + s, e := self.skipFast() + if e != 0 { + return Node{}, e + } + return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0 + } return newLazyArray(self), 0 case types.V_OBJECT: + s := self.p - 1; if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' { self.p = p + 1 return Node{t: types.V_OBJECT}, 0 } + // NOTICE: loadOnce always keep raw json for object or array if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeObject(new(linkedPairs)) } + if self.loadOnce { + self.p = s + s, e := self.skipFast() + if e != 0 { + return Node{}, e + } + return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0 + } return newLazyObject(self), 0 case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0 case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0 @@ -471,11 +508,11 @@ func (self *Node) skipNextNode() *Node { if t == _V_NONE { return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ - ret.Add(val) + ret.Push(val) self.l++ parser.p = parser.lspace(parser.p) @@ -510,7 +547,7 @@ func (self *Node) skipNextPair() (*Pair) { /* check for EOF */ if parser.p = parser.lspace(sp); parser.p >= ns { - return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))} + return newErrorPair(parser.syntaxError(types.ERR_EOF)) } /* check for empty object */ @@ -527,7 +564,7 @@ func (self *Node) skipNextPair() (*Pair) { /* decode the key */ if njs = parser.decodeValue(); njs.Vt != types.V_STRING { - return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } /* extract the key */ @@ -537,34 +574,34 @@ func (self *Node) skipNextPair() (*Pair) { /* check for escape sequence */ if njs.Ep != -1 { if key, err = unquote(key); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } } /* expect a ':' delimiter */ if err = parser.delim(); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } /* skip the value */ if start, err := parser.skipFast(); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } else { t := switchRawType(parser.s[start]) if t == _V_NONE { - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ - ret.Add(Pair{Key: key, Value: val}) + ret.Push(NewPair(key, val)) self.l++ parser.p = parser.lspace(parser.p) /* check for EOF */ if parser.p >= ns { - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))} + return newErrorPair(parser.syntaxError(types.ERR_EOF)) } /* check for the next character */ @@ -577,7 +614,7 @@ func (self *Node) skipNextPair() (*Pair) { self.setObject(ret) return ret.At(ret.Len()-1) default: - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } } @@ -653,3 +690,77 @@ func (self *Parser) ExportError(err types.ParsingError) error { Code: err, }.Description()) } + +func backward(src string, i int) int { + for ; i>=0 && isSpace(src[i]); i-- {} + return i +} + + +func newRawNode(str string, typ types.ValueType, lock bool) Node { + ret := Node{ + t: typ | _V_RAW, + p: rt.StrPtr(str), + l: uint(len(str)), + } + if lock { + ret.m = new(sync.RWMutex) + } + return ret +} + +var typeJumpTable = [256]types.ValueType{ + '"' : types.V_STRING, + '-' : _V_NUMBER, + '0' : _V_NUMBER, + '1' : _V_NUMBER, + '2' : _V_NUMBER, + '3' : _V_NUMBER, + '4' : _V_NUMBER, + '5' : _V_NUMBER, + '6' : _V_NUMBER, + '7' : _V_NUMBER, + '8' : _V_NUMBER, + '9' : _V_NUMBER, + '[' : types.V_ARRAY, + 'f' : types.V_FALSE, + 'n' : types.V_NULL, + 't' : types.V_TRUE, + '{' : types.V_OBJECT, +} + +func switchRawType(c byte) types.ValueType { + return typeJumpTable[c] +} + +func (self *Node) loadt() types.ValueType { + return (types.ValueType)(atomic.LoadInt64(&self.t)) +} + +func (self *Node) lock() bool { + if m := self.m; m != nil { + m.Lock() + return true + } + return false +} + +func (self *Node) unlock() { + if m := self.m; m != nil { + m.Unlock() + } +} + +func (self *Node) rlock() bool { + if m := self.m; m != nil { + m.RLock() + return true + } + return false +} + +func (self *Node) runlock() { + if m := self.m; m != nil { + m.RUnlock() + } +} diff --git a/vendor/github.com/bytedance/sonic/ast/search.go b/vendor/github.com/bytedance/sonic/ast/search.go index bb6fceaa..9a5fb942 100644 --- a/vendor/github.com/bytedance/sonic/ast/search.go +++ b/vendor/github.com/bytedance/sonic/ast/search.go @@ -16,8 +16,28 @@ package ast +import ( + `github.com/bytedance/sonic/internal/rt` + `github.com/bytedance/sonic/internal/native/types` +) + +// SearchOptions controls Searcher's behavior +type SearchOptions struct { + // ValidateJSON indicates the searcher to validate the entire JSON + ValidateJSON bool + + // CopyReturn indicates the searcher to copy the result JSON instead of refer from the input + // This can help to reduce memory usage if you cache the results + CopyReturn bool + + // ConcurrentRead indicates the searcher to return a concurrently-READ-safe node, + // including: GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON + ConcurrentRead bool +} + type Searcher struct { parser Parser + SearchOptions } func NewSearcher(str string) *Searcher { @@ -26,5 +46,112 @@ func NewSearcher(str string) *Searcher { s: str, noLazy: false, }, + SearchOptions: SearchOptions{ + ValidateJSON: true, + }, + } +} + +// GetByPathCopy search in depth from top json and returns a **Copied** json node at the path location +func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { + self.CopyReturn = true + return self.getByPath(path...) +} + +// GetByPathNoCopy search in depth from top json and returns a **Referenced** json node at the path location +// +// WARN: this search directly refer partial json from top json, which has faster speed, +// may consumes more memory. +func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { + return self.getByPath(path...) +} + +func (self *Searcher) getByPath(path ...interface{}) (Node, error) { + var err types.ParsingError + var start int + + self.parser.p = 0 + start, err = self.parser.getByPath(self.ValidateJSON, path...) + if err != 0 { + // for compatibility with old version + if err == types.ERR_NOT_FOUND { + return Node{}, ErrNotExist + } + if err == types.ERR_UNSUPPORT_TYPE { + panic("path must be either int(>=0) or string") + } + return Node{}, self.parser.syntaxError(err) + } + + t := switchRawType(self.parser.s[start]) + if t == _V_NONE { + return Node{}, self.parser.ExportError(err) + } + + // copy string to reducing memory usage + var raw string + if self.CopyReturn { + raw = rt.Mem2Str([]byte(self.parser.s[start:self.parser.p])) + } else { + raw = self.parser.s[start:self.parser.p] + } + return newRawNode(raw, t, self.ConcurrentRead), nil +} + +// GetByPath searches a path and returns relaction and types of target +func _GetByPath(src string, path ...interface{}) (start int, end int, typ int, err error) { + p := NewParserObj(src) + s, e := p.getByPath(false, path...) + if e != 0 { + // for compatibility with old version + if e == types.ERR_NOT_FOUND { + return -1, -1, 0, ErrNotExist + } + if e == types.ERR_UNSUPPORT_TYPE { + panic("path must be either int(>=0) or string") + } + return -1, -1, 0, p.syntaxError(e) + } + + t := switchRawType(p.s[s]) + if t == _V_NONE { + return -1, -1, 0, ErrNotExist + } + if t == _V_NUMBER { + p.p = 1 + backward(p.s, p.p-1) + } + return s, p.p, int(t), nil +} + +// ValidSyntax check if a json has a valid JSON syntax, +// while not validate UTF-8 charset +func _ValidSyntax(json string) bool { + p := NewParserObj(json) + _, e := p.skip() + if e != 0 { + return false + } + if skipBlank(p.s, p.p) != -int(types.ERR_EOF) { + return false + } + return true +} + +// SkipFast skip a json value in fast-skip algs, +// while not strictly validate JSON syntax and UTF-8 charset. +func _SkipFast(src string, i int) (int, int, error) { + p := NewParserObj(src) + p.p = i + s, e := p.skipFast() + if e != 0 { + return -1, -1, p.ExportError(e) + } + t := switchRawType(p.s[s]) + if t == _V_NONE { + return -1, -1, ErrNotExist + } + if t == _V_NUMBER { + p.p = 1 + backward(p.s, p.p-1) } + return s, p.p, nil } diff --git a/vendor/github.com/bytedance/sonic/ast/stubs.go b/vendor/github.com/bytedance/sonic/ast/stubs.go new file mode 100644 index 00000000..53bf3b8a --- /dev/null +++ b/vendor/github.com/bytedance/sonic/ast/stubs.go @@ -0,0 +1,142 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ast + +import ( + "unicode/utf8" + "unsafe" + + "github.com/bytedance/sonic/internal/rt" +) + +//go:noescape +//go:linkname memmove runtime.memmove +//goland:noinspection GoUnusedParameter +func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr) + +//go:linkname unsafe_NewArray reflect.unsafe_NewArray +//goland:noinspection GoUnusedParameter +func unsafe_NewArray(typ *rt.GoType, n int) unsafe.Pointer + +//go:nosplit +func mem2ptr(s []byte) unsafe.Pointer { + return (*rt.GoSlice)(unsafe.Pointer(&s)).Ptr +} + +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + +var hex = "0123456789abcdef" + +//go:linkname unquoteBytes encoding/json.unquoteBytes +func unquoteBytes(s []byte) (t []byte, ok bool) diff --git a/vendor/github.com/bytedance/sonic/ast/stubs_go115.go b/vendor/github.com/bytedance/sonic/ast/stubs_go115.go deleted file mode 100644 index 37b9451f..00000000 --- a/vendor/github.com/bytedance/sonic/ast/stubs_go115.go +++ /dev/null @@ -1,55 +0,0 @@ -// +build !go1.20 - -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ast - -import ( - `unsafe` - `unicode/utf8` - - `github.com/bytedance/sonic/internal/rt` -) - -//go:noescape -//go:linkname memmove runtime.memmove -//goland:noinspection GoUnusedParameter -func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr) - -//go:linkname unsafe_NewArray reflect.unsafe_NewArray -//goland:noinspection GoUnusedParameter -func unsafe_NewArray(typ *rt.GoType, n int) unsafe.Pointer - -//go:linkname growslice runtime.growslice -//goland:noinspection GoUnusedParameter -func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice - -//go:nosplit -func mem2ptr(s []byte) unsafe.Pointer { - return (*rt.GoSlice)(unsafe.Pointer(&s)).Ptr -} - -var ( - //go:linkname safeSet encoding/json.safeSet - safeSet [utf8.RuneSelf]bool - - //go:linkname hex encoding/json.hex - hex string -) - -//go:linkname unquoteBytes encoding/json.unquoteBytes -func unquoteBytes(s []byte) (t []byte, ok bool) \ No newline at end of file diff --git a/vendor/github.com/bytedance/sonic/ast/stubs_go120.go b/vendor/github.com/bytedance/sonic/ast/stubs_go120.go deleted file mode 100644 index 6f830529..00000000 --- a/vendor/github.com/bytedance/sonic/ast/stubs_go120.go +++ /dev/null @@ -1,55 +0,0 @@ -// +build go1.20 - -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ast - -import ( - `unsafe` - `unicode/utf8` - - `github.com/bytedance/sonic/internal/rt` -) - -//go:noescape -//go:linkname memmove runtime.memmove -//goland:noinspection GoUnusedParameter -func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr) - -//go:linkname unsafe_NewArray reflect.unsafe_NewArray -//goland:noinspection GoUnusedParameter -func unsafe_NewArray(typ *rt.GoType, n int) unsafe.Pointer - -//go:linkname growslice reflect.growslice -//goland:noinspection GoUnusedParameter -func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice - -//go:nosplit -func mem2ptr(s []byte) unsafe.Pointer { - return (*rt.GoSlice)(unsafe.Pointer(&s)).Ptr -} - -var ( - //go:linkname safeSet encoding/json.safeSet - safeSet [utf8.RuneSelf]bool - - //go:linkname hex encoding/json.hex - hex string -) - -//go:linkname unquoteBytes encoding/json.unquoteBytes -func unquoteBytes(s []byte) (t []byte, ok bool) diff --git a/vendor/github.com/bytedance/sonic/ast/visitor.go b/vendor/github.com/bytedance/sonic/ast/visitor.go index 4019c31a..dc047851 100644 --- a/vendor/github.com/bytedance/sonic/ast/visitor.go +++ b/vendor/github.com/bytedance/sonic/ast/visitor.go @@ -18,6 +18,7 @@ package ast import ( `encoding/json` + `errors` `github.com/bytedance/sonic/internal/native/types` ) @@ -25,7 +26,7 @@ import ( // Visitor handles the callbacks during preorder traversal of a JSON AST. // // According to the JSON RFC8259, a JSON AST can be defined by -// the following rules without seperator / whitespace tokens. +// the following rules without separator / whitespace tokens. // // JSON-AST = value // value = false / null / true / object / array / number / string @@ -174,6 +175,19 @@ func (self *traverser) decodeArray() error { sp := self.parser.p ns := len(self.parser.s) + /* allocate array space and parse every element */ + if err := self.visitor.OnArrayBegin(_DEFAULT_NODE_CAP); err != nil { + if err == VisitOPSkip { + // NOTICE: for user needs to skip entiry object + self.parser.p -= 1 + if _, e := self.parser.skipFast(); e != 0 { + return e + } + return self.visitor.OnArrayEnd() + } + return err + } + /* check for EOF */ self.parser.p = self.parser.lspace(sp) if self.parser.p >= ns { @@ -183,16 +197,9 @@ func (self *traverser) decodeArray() error { /* check for empty array */ if self.parser.s[self.parser.p] == ']' { self.parser.p++ - if err := self.visitor.OnArrayBegin(0); err != nil { - return err - } return self.visitor.OnArrayEnd() } - /* allocate array space and parse every element */ - if err := self.visitor.OnArrayBegin(_DEFAULT_NODE_CAP); err != nil { - return err - } for { /* decode the value */ if err := self.decodeValue(); err != nil { @@ -223,6 +230,19 @@ func (self *traverser) decodeObject() error { sp := self.parser.p ns := len(self.parser.s) + /* allocate object space and decode each pair */ + if err := self.visitor.OnObjectBegin(_DEFAULT_NODE_CAP); err != nil { + if err == VisitOPSkip { + // NOTICE: for user needs to skip entiry object + self.parser.p -= 1 + if _, e := self.parser.skipFast(); e != 0 { + return e + } + return self.visitor.OnObjectEnd() + } + return err + } + /* check for EOF */ self.parser.p = self.parser.lspace(sp) if self.parser.p >= ns { @@ -232,16 +252,9 @@ func (self *traverser) decodeObject() error { /* check for empty object */ if self.parser.s[self.parser.p] == '}' { self.parser.p++ - if err := self.visitor.OnObjectBegin(0); err != nil { - return err - } return self.visitor.OnObjectEnd() } - /* allocate object space and decode each pair */ - if err := self.visitor.OnObjectBegin(_DEFAULT_NODE_CAP); err != nil { - return err - } for { var njs types.JsonState var err types.ParsingError @@ -313,3 +326,7 @@ func (self *traverser) decodeString(iv int64, ep int) error { } return self.visitor.OnString(out) } + +// If visitor return this error on `OnObjectBegin()` or `OnArrayBegin()`, +// the transverer will skip entiry object or array +var VisitOPSkip = errors.New("") diff --git a/vendor/github.com/bytedance/sonic/compat.go b/vendor/github.com/bytedance/sonic/compat.go index ec414c0c..b32342a8 100644 --- a/vendor/github.com/bytedance/sonic/compat.go +++ b/vendor/github.com/bytedance/sonic/compat.go @@ -1,4 +1,4 @@ -// +build !amd64 !go1.16 go1.22 +// +build !amd64,!arm64 go1.24 !go1.17 arm64,!go1.20 /* * Copyright 2021 ByteDance Inc. @@ -27,6 +27,8 @@ import ( `github.com/bytedance/sonic/option` ) +const apiKind = UseStdJSON + type frozenConfig struct { Config } diff --git a/vendor/github.com/bytedance/sonic/decoder/decoder_compat.go b/vendor/github.com/bytedance/sonic/decoder/decoder_compat.go index 466d842e..81e1ae4e 100644 --- a/vendor/github.com/bytedance/sonic/decoder/decoder_compat.go +++ b/vendor/github.com/bytedance/sonic/decoder/decoder_compat.go @@ -1,4 +1,4 @@ -// +build !amd64 !go1.16 go1.22 +// +build !amd64,!arm64 go1.24 !go1.17 arm64,!go1.20 /* * Copyright 2023 ByteDance Inc. @@ -30,7 +30,7 @@ import ( ) func init() { - println("WARNING: sonic only supports Go1.16~1.20 && CPU amd64, but your environment is not suitable") + println("WARNING: sonic/decoder only supports (Go1.17~1.23 && CPU amd64) or (go1.20~1.23 && CPU arm64), but your environment is not suitable") } const ( @@ -42,6 +42,7 @@ const ( _F_use_number = types.B_USE_NUMBER _F_validate_string = types.B_VALIDATE_STRING _F_allow_control = types.B_ALLOW_CONTROL + _F_no_validate_json = types.B_NO_VALIDATE_JSON ) type Options uint64 @@ -53,6 +54,7 @@ const ( OptionDisableUnknown Options = 1 << _F_disable_unknown OptionCopyString Options = 1 << _F_copy_string OptionValidateString Options = 1 << _F_validate_string + OptionNoValidateJSON Options = 1 << _F_no_validate_json ) func (self *Decoder) SetOptions(opts Options) { @@ -112,10 +114,10 @@ func (self *Decoder) CheckTrailings() error { func (self *Decoder) Decode(val interface{}) error { r := bytes.NewBufferString(self.s) dec := json.NewDecoder(r) - if (self.f | uint64(OptionUseNumber)) != 0 { + if (self.f & uint64(OptionUseNumber)) != 0 { dec.UseNumber() } - if (self.f | uint64(OptionDisableUnknown)) != 0 { + if (self.f & uint64(OptionDisableUnknown)) != 0 { dec.DisallowUnknownFields() } return dec.Decode(val) diff --git a/vendor/github.com/bytedance/sonic/decoder/decoder_amd64.go b/vendor/github.com/bytedance/sonic/decoder/decoder_native.go similarity index 64% rename from vendor/github.com/bytedance/sonic/decoder/decoder_amd64.go rename to vendor/github.com/bytedance/sonic/decoder/decoder_native.go index 7c284551..9317d57f 100644 --- a/vendor/github.com/bytedance/sonic/decoder/decoder_amd64.go +++ b/vendor/github.com/bytedance/sonic/decoder/decoder_native.go @@ -1,4 +1,6 @@ -// +build amd64,go1.16,!go1.22 +//go:build (amd64 && go1.17 && !go1.24) || (arm64 && go1.20 && !go1.24) +// +build amd64,go1.17,!go1.24 arm64,go1.20,!go1.24 + /* * Copyright 2023 ByteDance Inc. @@ -19,50 +21,51 @@ package decoder import ( - `github.com/bytedance/sonic/internal/decoder` + `github.com/bytedance/sonic/internal/decoder/api` ) // Decoder is the decoder context object -type Decoder = decoder.Decoder +type Decoder = api.Decoder // SyntaxError represents json syntax error -type SyntaxError = decoder.SyntaxError +type SyntaxError = api.SyntaxError // MismatchTypeError represents dismatching between json and object -type MismatchTypeError = decoder.MismatchTypeError +type MismatchTypeError = api.MismatchTypeError // Options for decode. -type Options = decoder.Options +type Options = api.Options const ( - OptionUseInt64 Options = decoder.OptionUseInt64 - OptionUseNumber Options = decoder.OptionUseNumber - OptionUseUnicodeErrors Options = decoder.OptionUseUnicodeErrors - OptionDisableUnknown Options = decoder.OptionDisableUnknown - OptionCopyString Options = decoder.OptionCopyString - OptionValidateString Options = decoder.OptionValidateString + OptionUseInt64 Options = api.OptionUseInt64 + OptionUseNumber Options = api.OptionUseNumber + OptionUseUnicodeErrors Options = api.OptionUseUnicodeErrors + OptionDisableUnknown Options = api.OptionDisableUnknown + OptionCopyString Options = api.OptionCopyString + OptionValidateString Options = api.OptionValidateString + OptionNoValidateJSON Options = api.OptionNoValidateJSON ) // StreamDecoder is the decoder context object for streaming input. -type StreamDecoder = decoder.StreamDecoder +type StreamDecoder = api.StreamDecoder var ( // NewDecoder creates a new decoder instance. - NewDecoder = decoder.NewDecoder + NewDecoder = api.NewDecoder // NewStreamDecoder adapts to encoding/json.NewDecoder API. // // NewStreamDecoder returns a new decoder that reads from r. - NewStreamDecoder = decoder.NewStreamDecoder + NewStreamDecoder = api.NewStreamDecoder // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in // order to reduce the first-hit latency. // // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is // a compile option to set the depth of recursive compile for the nested struct type. - Pretouch = decoder.Pretouch + Pretouch = api.Pretouch // Skip skips only one json value, and returns first non-blank character position and its ending position if it is valid. // Otherwise, returns negative error code using start and invalid character position using end - Skip = decoder.Skip + Skip = api.Skip ) diff --git a/vendor/github.com/bytedance/sonic/encoder/encoder_compat.go b/vendor/github.com/bytedance/sonic/encoder/encoder_compat.go index 222eea5b..254defa2 100644 --- a/vendor/github.com/bytedance/sonic/encoder/encoder_compat.go +++ b/vendor/github.com/bytedance/sonic/encoder/encoder_compat.go @@ -1,4 +1,4 @@ -// +build !amd64 !go1.16 go1.22 +// +build !amd64,!arm64 go1.24 !go1.17 arm64,!go1.20 /* * Copyright 2023 ByteDance Inc. @@ -28,9 +28,12 @@ import ( ) func init() { - println("WARNING: sonic only supports Go1.16~1.20 && CPU amd64, but your environment is not suitable") + println("WARNING:(encoder) sonic only supports (Go1.17~1.23 && CPU amd64) or (G01.20~1.23 && CPU arm64) , but your environment is not suitable") } +// EnableFallback indicates if encoder use fallback +const EnableFallback = true + // Options is a set of encoding options. type Options uint64 @@ -41,6 +44,8 @@ const ( bitNoQuoteTextMarshaler bitNoNullSliceOrMap bitValidateString + bitNoValidateJSONMarshaler + bitNoEncoderNewline // used for recursive compile bitPointerValue = 63 @@ -72,6 +77,13 @@ const ( // ValidateString indicates that encoder should validate the input string // before encoding it into JSON. ValidateString Options = 1 << bitValidateString + + // NoValidateJSONMarshaler indicates that the encoder should not validate the output string + // after encoding the JSONMarshaler to JSON. + NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler + + // NoEncoderNewline indicates that the encoder should not add a newline after every message + NoEncoderNewline Options = 1 << bitNoEncoderNewline // CompatibleWithStd is used to be compatible with std encoder. CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler @@ -116,6 +128,24 @@ func (self *Encoder) SetValidateString(f bool) { } } +// SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens +func (self *Encoder) SetNoValidateJSONMarshaler(f bool) { + if f { + self.Opts |= NoValidateJSONMarshaler + } else { + self.Opts &= ^NoValidateJSONMarshaler + } +} + +// SetNoEncoderNewline specifies if option NoEncoderNewline opens +func (self *Encoder) SetNoEncoderNewline(f bool) { + if f { + self.Opts |= NoEncoderNewline + } else { + self.Opts &= ^NoEncoderNewline + } +} + // SetCompactMarshaler specifies if option CompactMarshaler opens func (self *Encoder) SetCompactMarshaler(f bool) { if f { @@ -161,15 +191,19 @@ func Encode(val interface{}, opts Options) ([]byte, error) { // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating // a new one. func EncodeInto(buf *[]byte, val interface{}, opts Options) error { - if buf == nil { - panic("user-supplied buffer buf is nil") - } - w := bytes.NewBuffer(*buf) - enc := json.NewEncoder(w) - enc.SetEscapeHTML((opts & EscapeHTML) != 0) - err := enc.Encode(val) - *buf = w.Bytes() - return err + if buf == nil { + panic("user-supplied buffer buf is nil") + } + w := bytes.NewBuffer(*buf) + enc := json.NewEncoder(w) + enc.SetEscapeHTML((opts & EscapeHTML) != 0) + err := enc.Encode(val) + *buf = w.Bytes() + l := len(*buf) + if l > 0 && (opts & NoEncoderNewline != 0) && (*buf)[l-1] == '\n' { + *buf = (*buf)[:l-1] + } + return err } // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 diff --git a/vendor/github.com/bytedance/sonic/encoder/encoder_amd64.go b/vendor/github.com/bytedance/sonic/encoder/encoder_native.go similarity index 86% rename from vendor/github.com/bytedance/sonic/encoder/encoder_amd64.go rename to vendor/github.com/bytedance/sonic/encoder/encoder_native.go index 0ef336b7..b300ebf0 100644 --- a/vendor/github.com/bytedance/sonic/encoder/encoder_amd64.go +++ b/vendor/github.com/bytedance/sonic/encoder/encoder_native.go @@ -1,4 +1,4 @@ -// +build amd64,go1.16,!go1.22 +// +build amd64,go1.17,!go1.24 arm64,go1.20,!go1.24 /* * Copyright 2023 ByteDance Inc. @@ -22,6 +22,8 @@ import ( `github.com/bytedance/sonic/internal/encoder` ) +// EnableFallback indicates if encoder use fallback +const EnableFallback = false // Encoder represents a specific set of encoder configurations. type Encoder = encoder.Encoder @@ -59,8 +61,18 @@ const ( // before encoding it into JSON. ValidateString Options = encoder.ValidateString + // NoValidateJSONMarshaler indicates that the encoder should not validate the output string + // after encoding the JSONMarshaler to JSON. + NoValidateJSONMarshaler Options = encoder.NoValidateJSONMarshaler + + // NoEncoderNewline indicates that the encoder should not add a newline after every message + NoEncoderNewline Options = encoder.NoEncoderNewline + // CompatibleWithStd is used to be compatible with std encoder. CompatibleWithStd Options = encoder.CompatibleWithStd + + // Encode Infinity or Nan float into `null`, instead of returning an error. + EncodeNullForInfOrNan Options = encoder.EncodeNullForInfOrNan ) diff --git a/vendor/github.com/bytedance/sonic/go.work b/vendor/github.com/bytedance/sonic/go.work index e21d6f87..8d2af51b 100644 --- a/vendor/github.com/bytedance/sonic/go.work +++ b/vendor/github.com/bytedance/sonic/go.work @@ -2,7 +2,8 @@ go 1.18 use ( . - ./generic_test - ./fuzz ./external_jsonlib_test + ./fuzz + ./generic_test + ./loader ) diff --git a/vendor/github.com/bytedance/sonic/go.work.sum b/vendor/github.com/bytedance/sonic/go.work.sum new file mode 100644 index 00000000..d5962587 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/go.work.sum @@ -0,0 +1 @@ +github.com/bytedance/sonic/loader v0.2.0/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= diff --git a/vendor/github.com/bytedance/sonic/internal/base64/b64_amd64.go b/vendor/github.com/bytedance/sonic/internal/base64/b64_amd64.go new file mode 100644 index 00000000..01f99f93 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/base64/b64_amd64.go @@ -0,0 +1,46 @@ +// +build amd64,go1.16 + +/** + * Copyright 2023 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package base64 + +import ( + "github.com/cloudwego/base64x" +) + +func DecodeBase64(src string) ([]byte, error) { + return base64x.StdEncoding.DecodeString(src) +} + +func EncodeBase64(buf []byte, src []byte) []byte { + if len(src) == 0 { + return append(buf, '"', '"') + } + buf = append(buf, '"') + need := base64x.StdEncoding.EncodedLen(len(src)) + if cap(buf) - len(buf) < need { + tmp := make([]byte, len(buf), len(buf) + need*2) + copy(tmp, buf) + buf = tmp + } + base64x.StdEncoding.Encode(buf[len(buf):cap(buf)], src) + buf = buf[:len(buf) + need] + buf = append(buf, '"') + return buf +} + + \ No newline at end of file diff --git a/vendor/github.com/bytedance/sonic/internal/base64/b64_compat.go b/vendor/github.com/bytedance/sonic/internal/base64/b64_compat.go new file mode 100644 index 00000000..ba8f8b56 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/base64/b64_compat.go @@ -0,0 +1,44 @@ +// +build !amd64 !go1.16 + +/* + * Copyright 2022 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package base64 + +import ( + "encoding/base64" +) + +func EncodeBase64(buf []byte, src []byte) []byte { + if len(src) == 0 { + return append(buf, '"', '"') + } + buf = append(buf, '"') + need := base64.StdEncoding.EncodedLen(len(src)) + if cap(buf) - len(buf) < need { + tmp := make([]byte, len(buf), len(buf) + need*2) + copy(tmp, buf) + buf = tmp + } + base64.StdEncoding.Encode(buf[len(buf):cap(buf)], src) + buf = buf[:len(buf) + need] + buf = append(buf, '"') + return buf +} + +func DecodeBase64(src string) ([]byte, error) { + return base64.StdEncoding.DecodeString(src) +} diff --git a/vendor/github.com/bytedance/sonic/internal/cpu/features.go b/vendor/github.com/bytedance/sonic/internal/cpu/features.go index f9ee3b8f..fd4dbda3 100644 --- a/vendor/github.com/bytedance/sonic/internal/cpu/features.go +++ b/vendor/github.com/bytedance/sonic/internal/cpu/features.go @@ -24,7 +24,6 @@ import ( ) var ( - HasAVX = cpuid.CPU.Has(cpuid.AVX) HasAVX2 = cpuid.CPU.Has(cpuid.AVX2) HasSSE = cpuid.CPU.Has(cpuid.SSE) ) @@ -33,7 +32,8 @@ func init() { switch v := os.Getenv("SONIC_MODE"); v { case "" : break case "auto" : break - case "noavx" : HasAVX = false; fallthrough + case "noavx" : HasAVX2 = false + // will also disable avx, act as `noavx`, we remain it to make sure forward compatibility case "noavx2" : HasAVX2 = false default : panic(fmt.Sprintf("invalid mode: '%s', should be one of 'auto', 'noavx', 'noavx2'", v)) } diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/decoder.go b/vendor/github.com/bytedance/sonic/internal/decoder/api/decoder.go similarity index 57% rename from vendor/github.com/bytedance/sonic/internal/decoder/decoder.go rename to vendor/github.com/bytedance/sonic/internal/decoder/api/decoder.go index 8453db86..0dc01998 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/decoder.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/api/decoder.go @@ -14,51 +14,52 @@ * limitations under the License. */ -package decoder +package api import ( - `unsafe` - `encoding/json` `reflect` - `runtime` `github.com/bytedance/sonic/internal/native` `github.com/bytedance/sonic/internal/native/types` + `github.com/bytedance/sonic/internal/decoder/consts` + `github.com/bytedance/sonic/internal/decoder/errors` `github.com/bytedance/sonic/internal/rt` `github.com/bytedance/sonic/option` - `github.com/bytedance/sonic/utf8` ) const ( - _F_use_int64 = 0 - _F_disable_urc = 2 - _F_disable_unknown = 3 - _F_copy_string = 4 - - _F_use_number = types.B_USE_NUMBER - _F_validate_string = types.B_VALIDATE_STRING - _F_allow_control = types.B_ALLOW_CONTROL + _F_allow_control = consts.F_allow_control + _F_copy_string = consts.F_copy_string + _F_disable_unknown = consts.F_disable_unknown + _F_disable_urc = consts.F_disable_urc + _F_use_int64 = consts.F_use_int64 + _F_use_number = consts.F_use_number + _F_validate_string = consts.F_validate_string + + _MaxStack = consts.MaxStack + + OptionUseInt64 = consts.OptionUseInt64 + OptionUseNumber = consts.OptionUseNumber + OptionUseUnicodeErrors = consts.OptionUseUnicodeErrors + OptionDisableUnknown = consts.OptionDisableUnknown + OptionCopyString = consts.OptionCopyString + OptionValidateString = consts.OptionValidateString + OptionNoValidateJSON = consts.OptionNoValidateJSON ) -type Options uint64 - -const ( - OptionUseInt64 Options = 1 << _F_use_int64 - OptionUseNumber Options = 1 << _F_use_number - OptionUseUnicodeErrors Options = 1 << _F_disable_urc - OptionDisableUnknown Options = 1 << _F_disable_unknown - OptionCopyString Options = 1 << _F_copy_string - OptionValidateString Options = 1 << _F_validate_string +type ( + Options = consts.Options + MismatchTypeError = errors.MismatchTypeError + SyntaxError = errors.SyntaxError ) func (self *Decoder) SetOptions(opts Options) { - if (opts & OptionUseNumber != 0) && (opts & OptionUseInt64 != 0) { + if (opts & consts.OptionUseNumber != 0) && (opts & consts.OptionUseInt64 != 0) { panic("can't set OptionUseInt64 and OptionUseNumber both!") } self.f = uint64(opts) } - // Decoder is the decoder context object type Decoder struct { i int @@ -109,44 +110,7 @@ func (self *Decoder) CheckTrailings() error { // Decode parses the JSON-encoded data from current position and stores the result // in the value pointed to by val. func (self *Decoder) Decode(val interface{}) error { - /* validate json if needed */ - if (self.f & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(self.s){ - dbuf := utf8.CorrectWith(nil, rt.Str2Mem(self.s), "\ufffd") - self.s = rt.Mem2Str(dbuf) - } - - vv := rt.UnpackEface(val) - vp := vv.Value - - /* check for nil type */ - if vv.Type == nil { - return &json.InvalidUnmarshalError{} - } - - /* must be a non-nil pointer */ - if vp == nil || vv.Type.Kind() != reflect.Ptr { - return &json.InvalidUnmarshalError{Type: vv.Type.Pack()} - } - - etp := rt.PtrElem(vv.Type) - - /* check the defined pointer type for issue 379 */ - if vv.Type.IsNamed() { - newp := vp - etp = vv.Type - vp = unsafe.Pointer(&newp) - } - - /* create a new stack, and call the decoder */ - sb := newStack() - nb, err := decodeTypedPointer(self.s, self.i, etp, vp, sb, self.f) - /* return the stack back */ - self.i = nb - freeStack(sb) - - /* avoid GC ahead */ - runtime.KeepAlive(vv) - return err + return decodeImpl(&self.s, &self.i, self.f, val) } // UseInt64 indicates the Decoder to unmarshal an integer into an interface{} as an @@ -194,53 +158,7 @@ func (self *Decoder) ValidateString() { // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is // a compile option to set the depth of recursive compile for the nested struct type. func Pretouch(vt reflect.Type, opts ...option.CompileOption) error { - cfg := option.DefaultCompileOptions() - for _, opt := range opts { - opt(&cfg) - } - return pretouchRec(map[reflect.Type]bool{vt:true}, cfg) -} - -func pretouchType(_vt reflect.Type, opts option.CompileOptions) (map[reflect.Type]bool, error) { - /* compile function */ - compiler := newCompiler().apply(opts) - decoder := func(vt *rt.GoType, _ ...interface{}) (interface{}, error) { - if pp, err := compiler.compile(_vt); err != nil { - return nil, err - } else { - as := newAssembler(pp) - as.name = _vt.String() - return as.Load(), nil - } - } - - /* find or compile */ - vt := rt.UnpackType(_vt) - if val := programCache.Get(vt); val != nil { - return nil, nil - } else if _, err := programCache.Compute(vt, decoder); err == nil { - return compiler.rec, nil - } else { - return nil, err - } -} - -func pretouchRec(vtm map[reflect.Type]bool, opts option.CompileOptions) error { - if opts.RecursiveDepth < 0 || len(vtm) == 0 { - return nil - } - next := make(map[reflect.Type]bool) - for vt := range(vtm) { - sub, err := pretouchType(vt, opts) - if err != nil { - return err - } - for svt := range(sub) { - next[svt] = true - } - } - opts.RecursiveDepth -= 1 - return pretouchRec(next, opts) + return pretouchImpl(vt, opts...) } // Skip skips only one json value, and returns first non-blank character position and its ending position if it is valid. diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/api/decoder_amd64.go b/vendor/github.com/bytedance/sonic/internal/decoder/api/decoder_amd64.go new file mode 100644 index 00000000..4e1c3f42 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/api/decoder_amd64.go @@ -0,0 +1,38 @@ +//go:build go1.17 && !go1.24 +// +build go1.17,!go1.24 + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package api + +import ( + "github.com/bytedance/sonic/internal/envs" + "github.com/bytedance/sonic/internal/decoder/jitdec" + "github.com/bytedance/sonic/internal/decoder/optdec" +) + +var ( + pretouchImpl = jitdec.Pretouch + decodeImpl = jitdec.Decode +) + + func init() { + if envs.UseOptDec { + pretouchImpl = optdec.Pretouch + decodeImpl = optdec.Decode + } + } diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/api/decoder_arm64.go b/vendor/github.com/bytedance/sonic/internal/decoder/api/decoder_arm64.go new file mode 100644 index 00000000..65a9478b --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/api/decoder_arm64.go @@ -0,0 +1,38 @@ +// +build go1.17,!go1.24 + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package api + +import ( + `github.com/bytedance/sonic/internal/decoder/optdec` + `github.com/bytedance/sonic/internal/envs` +) + +var ( + pretouchImpl = optdec.Pretouch + decodeImpl = optdec.Decode +) + + +func init() { + // whe in aarch64. we enable all optimize + envs.EnableOptDec() + envs.EnableFastMap() +} + + diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/stream.go b/vendor/github.com/bytedance/sonic/internal/decoder/api/stream.go similarity index 60% rename from vendor/github.com/bytedance/sonic/internal/decoder/stream.go rename to vendor/github.com/bytedance/sonic/internal/decoder/api/stream.go index a3716435..8a8102dd 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/stream.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/api/stream.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package decoder +package api import ( `bytes` @@ -23,11 +23,12 @@ import ( `github.com/bytedance/sonic/internal/native` `github.com/bytedance/sonic/internal/native/types` + `github.com/bytedance/sonic/internal/rt` `github.com/bytedance/sonic/option` ) var ( - minLeftBufferShift uint = 1 + minLeftBufferShift uint = 1 ) // StreamDecoder is the decoder context object for streaming input. @@ -46,6 +47,12 @@ var bufPool = sync.Pool{ }, } +func freeBytes(buf []byte) { + if rt.CanSizeResue(cap(buf)) { + bufPool.Put(buf[:0]) + } +} + // NewStreamDecoder adapts to encoding/json.NewDecoder API. // // NewStreamDecoder returns a new decoder that reads from r. @@ -58,90 +65,53 @@ func NewStreamDecoder(r io.Reader) *StreamDecoder { // Either io error from underlying io.Reader (except io.EOF) // or syntax error from data will be recorded and stop subsequently decoding. func (self *StreamDecoder) Decode(val interface{}) (err error) { - if self.err != nil { - return self.err - } - - var buf = self.buf[self.scanp:] - var p = 0 - var recycle bool - if cap(buf) == 0 { - buf = bufPool.Get().([]byte) - recycle = true - } - - var first = true - var repeat = true - -read_more: - for { - l := len(buf) - realloc(&buf) - n, err := self.r.Read(buf[l:cap(buf)]) - buf = buf[:l+n] - if err != nil { - repeat = false - if err == io.EOF { - if len(buf) == 0 { - return err - } - break - } - self.err = err - return err - } - if n > 0 || first { - break - } - } - first = false - - l := len(buf) - if l > 0 { - self.Decoder.Reset(string(buf)) - - var x int - if ret := native.SkipOneFast(&self.s, &x); ret < 0 { - if repeat { - goto read_more + // read more data into buf + if self.More() { + var s = self.scanp + try_skip: + var e = len(self.buf) + var src = rt.Mem2Str(self.buf[s:e]) + // try skip + var x = 0; + if y := native.SkipOneFast(&src, &x); y < 0 { + if self.readMore() { + goto try_skip } else { - err = SyntaxError{x, self.s, types.ParsingError(-ret), ""} - self.err = err + err = SyntaxError{e, self.s, types.ParsingError(-s), ""} + self.setErr(err) return } + } else { + s = y + s + e = x + s } - + + // must copy string here for safety + self.Decoder.Reset(string(self.buf[s:e])) err = self.Decoder.Decode(val) if err != nil { - self.err = err + self.setErr(err) + return } - p = self.Decoder.Pos() - self.scanned += int64(p) - self.scanp = 0 - } - - if l > p { - // remain undecoded bytes, so copy them into self.buf - self.buf = append(self.buf[:0], buf[p:]...) - } else { - self.buf = nil - recycle = true - } + self.scanp = e + _, empty := self.scan() + if empty { + // no remain valid bytes, thus we just recycle buffer + mem := self.buf + self.buf = nil + freeBytes(mem) + } else { + // remain undecoded bytes, move them onto head + n := copy(self.buf, self.buf[self.scanp:]) + self.buf = self.buf[:n] + } - if recycle { - buf = buf[:0] - bufPool.Put(buf) - } - return err -} + self.scanned += int64(self.scanp) + self.scanp = 0 + } -func (self StreamDecoder) repeatable(err error) bool { - if ee, ok := err.(SyntaxError); ok && - (ee.Code == types.ERR_EOF || (ee.Code == types.ERR_INVALID_CHAR && self.i >= len(self.s)-1)) { - return true - } - return false + return self.err } // InputOffset returns the input stream byte offset of the current decoder position. @@ -166,28 +136,72 @@ func (self *StreamDecoder) More() bool { return err == nil && c != ']' && c != '}' } +// More reports whether there is another element in the +// current array or object being parsed. +func (self *StreamDecoder) readMore() bool { + if self.err != nil { + return false + } + + var err error + var n int + for { + // Grow buffer if not large enough. + l := len(self.buf) + realloc(&self.buf) + + n, err = self.r.Read(self.buf[l:cap(self.buf)]) + self.buf = self.buf[: l+n] + + self.scanp = l + _, empty := self.scan() + if !empty { + return true + } + + // buffer has been scanned, now report any error + if err != nil { + self.setErr(err) + return false + } + } +} + +func (self *StreamDecoder) setErr(err error) { + self.err = err + mem := self.buf[:0] + self.buf = nil + freeBytes(mem) +} + func (self *StreamDecoder) peek() (byte, error) { var err error for { - for i := self.scanp; i < len(self.buf); i++ { - c := self.buf[i] - if isSpace(c) { - continue - } - self.scanp = i - return c, nil + c, empty := self.scan() + if !empty { + return byte(c), nil } // buffer has been scanned, now report any error if err != nil { - if err != io.EOF { - self.err = err - } + self.setErr(err) return 0, err } err = self.refill() } } +func (self *StreamDecoder) scan() (byte, bool) { + for i := self.scanp; i < len(self.buf); i++ { + c := self.buf[i] + if isSpace(c) { + continue + } + self.scanp = i + return c, false + } + return 0, true +} + func isSpace(c byte) bool { return types.SPACE_MASK & (1 << c) != 0 } @@ -212,17 +226,23 @@ func (self *StreamDecoder) refill() error { return err } -func realloc(buf *[]byte) { +func realloc(buf *[]byte) bool { l := uint(len(*buf)) c := uint(cap(*buf)) + if c == 0 { + *buf = bufPool.Get().([]byte) + return true + } if c - l <= c >> minLeftBufferShift { e := l+(l>>minLeftBufferShift) - if e < option.DefaultDecoderBufferSize { - e = option.DefaultDecoderBufferSize + if e <= c { + e = c*2 } tmp := make([]byte, l, e) copy(tmp, *buf) *buf = tmp + return true } + return false } diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go116.go b/vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go116.go deleted file mode 100644 index 4c4c850a..00000000 --- a/vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go116.go +++ /dev/null @@ -1,130 +0,0 @@ -// +build go1.16,!go1.17 - -// Copyright 2023 CloudWeGo Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package decoder - -import ( - `strconv` - _ `unsafe` - - `github.com/bytedance/sonic/internal/jit` - `github.com/bytedance/sonic/internal/rt` - `github.com/twitchyliquid64/golang-asm/obj` - `github.com/twitchyliquid64/golang-asm/obj/x86` -) - -var _runtime_writeBarrier uintptr = rt.GcwbAddr() - -//go:linkname gcWriteBarrierAX runtime.gcWriteBarrier -func gcWriteBarrierAX() - -var ( - _V_writeBarrier = jit.Imm(int64(_runtime_writeBarrier)) - - _F_gcWriteBarrierAX = jit.Func(gcWriteBarrierAX) -) - -func (self *_Assembler) WritePtrAX(i int, rec obj.Addr, saveDI bool) { - self.Emit("MOVQ", _V_writeBarrier, _R10) - self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) - self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - if saveDI { - self.save(_DI) - } - self.Emit("LEAQ", rec, _DI) - self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX - self.Rjmp("CALL", _R10) - if saveDI { - self.load(_DI) - } - self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Emit("MOVQ", _AX, rec) - self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") -} - -func (self *_Assembler) WriteRecNotAX(i int, ptr obj.Addr, rec obj.Addr, saveDI bool, saveAX bool) { - if rec.Reg == x86.REG_AX || rec.Index == x86.REG_AX { - panic("rec contains AX!") - } - self.Emit("MOVQ", _V_writeBarrier, _R10) - self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) - self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - if saveAX { - self.Emit("XCHGQ", ptr, _AX) - } else { - self.Emit("MOVQ", ptr, _AX) - } - if saveDI { - self.save(_DI) - } - self.Emit("LEAQ", rec, _DI) - self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX - self.Rjmp("CALL", _R10) - if saveDI { - self.load(_DI) - } - if saveAX { - self.Emit("XCHGQ", ptr, _AX) - } - self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Emit("MOVQ", ptr, rec) - self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") -} - - -func (self *_ValueDecoder) WritePtrAX(i int, rec obj.Addr, saveDI bool) { - self.Emit("MOVQ", _V_writeBarrier, _R10) - self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) - self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - if saveDI { - self.save(_DI) - } - self.Emit("LEAQ", rec, _DI) - self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX - self.Rjmp("CALL", _R10) - if saveDI { - self.load(_DI) - } - self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Emit("MOVQ", _AX, rec) - self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") -} - -func (self *_ValueDecoder) WriteRecNotAX(i int, ptr obj.Addr, rec obj.Addr, saveDI bool) { - if rec.Reg == x86.REG_AX || rec.Index == x86.REG_AX { - panic("rec contains AX!") - } - self.Emit("MOVQ", _V_writeBarrier, _R10) - self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) - self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Emit("MOVQ", ptr, _AX) - if saveDI { - self.save(_DI) - } - self.Emit("LEAQ", rec, _DI) - self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX - self.Rjmp("CALL", _R10) - if saveDI { - self.load(_DI) - } - self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Emit("MOVQ", ptr, rec) - self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") -} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/assembler_stkabi_amd64.go b/vendor/github.com/bytedance/sonic/internal/decoder/assembler_stkabi_amd64.go deleted file mode 100644 index 57a38b42..00000000 --- a/vendor/github.com/bytedance/sonic/internal/decoder/assembler_stkabi_amd64.go +++ /dev/null @@ -1,1949 +0,0 @@ -// +build go1.16,!go1.17 - -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package decoder - -import ( - `encoding/json` - `fmt` - `math` - `reflect` - `unsafe` - - `github.com/bytedance/sonic/internal/caching` - `github.com/bytedance/sonic/internal/jit` - `github.com/bytedance/sonic/internal/native` - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` - `github.com/twitchyliquid64/golang-asm/obj` -) - -/** Register Allocations - * - * State Registers: - * - * %rbx : stack base - * %r12 : input pointer - * %r13 : input length - * %r14 : input cursor - * %r15 : value pointer - * - * Error Registers: - * - * %r10 : error type register - * %r11 : error pointer register - */ - -/** Function Prototype & Stack Map - * - * func (s string, ic int, vp unsafe.Pointer, sb *_Stack, fv uint64, sv string) (rc int, err error) - * - * s.buf : (FP) - * s.len : 8(FP) - * ic : 16(FP) - * vp : 24(FP) - * sb : 32(FP) - * fv : 40(FP) - * sv : 56(FP) - * err.vt : 72(FP) - * err.vp : 80(FP) - */ - -const ( - _FP_args = 96 // 96 bytes to pass arguments and return values for this function - _FP_fargs = 80 // 80 bytes for passing arguments to other Go functions - _FP_saves = 40 // 40 bytes for saving the registers before CALL instructions - _FP_locals = 144 // 144 bytes for local variables -) - -const ( - _FP_offs = _FP_fargs + _FP_saves + _FP_locals - _FP_size = _FP_offs + 8 // 8 bytes for the parent frame pointer - _FP_base = _FP_size + 8 // 8 bytes for the return address -) - -const ( - _IM_null = 0x6c6c756e // 'null' - _IM_true = 0x65757274 // 'true' - _IM_alse = 0x65736c61 // 'alse' ('false' without the 'f') -) - -const ( - _BM_space = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n') -) - -const ( - _MODE_JSON = 1 << 3 // base64 mode -) - -const ( - _LB_error = "_error" - _LB_im_error = "_im_error" - _LB_eof_error = "_eof_error" - _LB_type_error = "_type_error" - _LB_field_error = "_field_error" - _LB_range_error = "_range_error" - _LB_stack_error = "_stack_error" - _LB_base64_error = "_base64_error" - _LB_unquote_error = "_unquote_error" - _LB_parsing_error = "_parsing_error" - _LB_parsing_error_v = "_parsing_error_v" - _LB_mismatch_error = "_mismatch_error" -) - -const ( - _LB_char_0_error = "_char_0_error" - _LB_char_1_error = "_char_1_error" - _LB_char_2_error = "_char_2_error" - _LB_char_3_error = "_char_3_error" - _LB_char_4_error = "_char_4_error" - _LB_char_m2_error = "_char_m2_error" - _LB_char_m3_error = "_char_m3_error" -) - -const ( - _LB_skip_one = "_skip_one" - _LB_skip_key_value = "_skip_key_value" -) - -var ( - _AX = jit.Reg("AX") - _CX = jit.Reg("CX") - _DX = jit.Reg("DX") - _DI = jit.Reg("DI") - _SI = jit.Reg("SI") - _BP = jit.Reg("BP") - _SP = jit.Reg("SP") - _R8 = jit.Reg("R8") - _R9 = jit.Reg("R9") - _X0 = jit.Reg("X0") - _X1 = jit.Reg("X1") -) - -var ( - _ST = jit.Reg("BX") - _IP = jit.Reg("R12") - _IL = jit.Reg("R13") - _IC = jit.Reg("R14") - _VP = jit.Reg("R15") -) - -var ( - _R10 = jit.Reg("R10") // used for gcWriteBarrier - _DF = jit.Reg("R10") // reuse R10 in generic decoder for flags - _ET = jit.Reg("R10") - _EP = jit.Reg("R11") -) - -var ( - _ARG_s = _ARG_sp - _ARG_sp = jit.Ptr(_SP, _FP_base) - _ARG_sl = jit.Ptr(_SP, _FP_base + 8) - _ARG_ic = jit.Ptr(_SP, _FP_base + 16) - _ARG_vp = jit.Ptr(_SP, _FP_base + 24) - _ARG_sb = jit.Ptr(_SP, _FP_base + 32) - _ARG_fv = jit.Ptr(_SP, _FP_base + 40) -) - -var ( - _VAR_sv = _VAR_sv_p - _VAR_sv_p = jit.Ptr(_SP, _FP_base + 48) - _VAR_sv_n = jit.Ptr(_SP, _FP_base + 56) - _VAR_vk = jit.Ptr(_SP, _FP_base + 64) -) - -var ( - _RET_rc = jit.Ptr(_SP, _FP_base + 72) - _RET_et = jit.Ptr(_SP, _FP_base + 80) - _RET_ep = jit.Ptr(_SP, _FP_base + 88) -) - -var ( - _VAR_st = _VAR_st_Vt - _VAR_sr = jit.Ptr(_SP, _FP_fargs + _FP_saves) -) - - -var ( - _VAR_st_Vt = jit.Ptr(_SP, _FP_fargs + _FP_saves + 0) - _VAR_st_Dv = jit.Ptr(_SP, _FP_fargs + _FP_saves + 8) - _VAR_st_Iv = jit.Ptr(_SP, _FP_fargs + _FP_saves + 16) - _VAR_st_Ep = jit.Ptr(_SP, _FP_fargs + _FP_saves + 24) - _VAR_st_Db = jit.Ptr(_SP, _FP_fargs + _FP_saves + 32) - _VAR_st_Dc = jit.Ptr(_SP, _FP_fargs + _FP_saves + 40) -) - -var ( - _VAR_ss_AX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 48) - _VAR_ss_CX = jit.Ptr(_SP, _FP_fargs + _FP_saves + 56) - _VAR_ss_SI = jit.Ptr(_SP, _FP_fargs + _FP_saves + 64) - _VAR_ss_R8 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 72) - _VAR_ss_R9 = jit.Ptr(_SP, _FP_fargs + _FP_saves + 80) -) - -var ( - _VAR_bs_p = jit.Ptr(_SP, _FP_fargs + _FP_saves + 88) - _VAR_bs_n = jit.Ptr(_SP, _FP_fargs + _FP_saves + 96) - _VAR_bs_LR = jit.Ptr(_SP, _FP_fargs + _FP_saves + 104) -) - -var _VAR_fl = jit.Ptr(_SP, _FP_fargs + _FP_saves + 112) - -var ( - _VAR_et = jit.Ptr(_SP, _FP_fargs + _FP_saves + 120) // save dismatched type - _VAR_ic = jit.Ptr(_SP, _FP_fargs + _FP_saves + 128) // save dismatched position - _VAR_pc = jit.Ptr(_SP, _FP_fargs + _FP_saves + 136) // save skip return pc -) - -type _Assembler struct { - jit.BaseAssembler - p _Program - name string -} - -func newAssembler(p _Program) *_Assembler { - return new(_Assembler).Init(p) -} - -/** Assembler Interface **/ - -func (self *_Assembler) Load() _Decoder { - return ptodec(self.BaseAssembler.Load("decode_"+self.name, _FP_size, _FP_args, argPtrs, localPtrs)) -} - -func (self *_Assembler) Init(p _Program) *_Assembler { - self.p = p - self.BaseAssembler.Init(self.compile) - return self -} - -func (self *_Assembler) compile() { - self.prologue() - self.instrs() - self.epilogue() - self.copy_string() - self.escape_string() - self.escape_string_twice() - self.skip_one() - self.skip_key_value() - self.mismatch_error() - self.type_error() - self.field_error() - self.range_error() - self.stack_error() - self.base64_error() - self.parsing_error() -} - -/** Assembler Stages **/ - -var _OpFuncTab = [256]func(*_Assembler, *_Instr) { - _OP_any : (*_Assembler)._asm_OP_any, - _OP_dyn : (*_Assembler)._asm_OP_dyn, - _OP_str : (*_Assembler)._asm_OP_str, - _OP_bin : (*_Assembler)._asm_OP_bin, - _OP_bool : (*_Assembler)._asm_OP_bool, - _OP_num : (*_Assembler)._asm_OP_num, - _OP_i8 : (*_Assembler)._asm_OP_i8, - _OP_i16 : (*_Assembler)._asm_OP_i16, - _OP_i32 : (*_Assembler)._asm_OP_i32, - _OP_i64 : (*_Assembler)._asm_OP_i64, - _OP_u8 : (*_Assembler)._asm_OP_u8, - _OP_u16 : (*_Assembler)._asm_OP_u16, - _OP_u32 : (*_Assembler)._asm_OP_u32, - _OP_u64 : (*_Assembler)._asm_OP_u64, - _OP_f32 : (*_Assembler)._asm_OP_f32, - _OP_f64 : (*_Assembler)._asm_OP_f64, - _OP_unquote : (*_Assembler)._asm_OP_unquote, - _OP_nil_1 : (*_Assembler)._asm_OP_nil_1, - _OP_nil_2 : (*_Assembler)._asm_OP_nil_2, - _OP_nil_3 : (*_Assembler)._asm_OP_nil_3, - _OP_deref : (*_Assembler)._asm_OP_deref, - _OP_index : (*_Assembler)._asm_OP_index, - _OP_is_null : (*_Assembler)._asm_OP_is_null, - _OP_is_null_quote : (*_Assembler)._asm_OP_is_null_quote, - _OP_map_init : (*_Assembler)._asm_OP_map_init, - _OP_map_key_i8 : (*_Assembler)._asm_OP_map_key_i8, - _OP_map_key_i16 : (*_Assembler)._asm_OP_map_key_i16, - _OP_map_key_i32 : (*_Assembler)._asm_OP_map_key_i32, - _OP_map_key_i64 : (*_Assembler)._asm_OP_map_key_i64, - _OP_map_key_u8 : (*_Assembler)._asm_OP_map_key_u8, - _OP_map_key_u16 : (*_Assembler)._asm_OP_map_key_u16, - _OP_map_key_u32 : (*_Assembler)._asm_OP_map_key_u32, - _OP_map_key_u64 : (*_Assembler)._asm_OP_map_key_u64, - _OP_map_key_f32 : (*_Assembler)._asm_OP_map_key_f32, - _OP_map_key_f64 : (*_Assembler)._asm_OP_map_key_f64, - _OP_map_key_str : (*_Assembler)._asm_OP_map_key_str, - _OP_map_key_utext : (*_Assembler)._asm_OP_map_key_utext, - _OP_map_key_utext_p : (*_Assembler)._asm_OP_map_key_utext_p, - _OP_array_skip : (*_Assembler)._asm_OP_array_skip, - _OP_array_clear : (*_Assembler)._asm_OP_array_clear, - _OP_array_clear_p : (*_Assembler)._asm_OP_array_clear_p, - _OP_slice_init : (*_Assembler)._asm_OP_slice_init, - _OP_slice_append : (*_Assembler)._asm_OP_slice_append, - _OP_object_skip : (*_Assembler)._asm_OP_object_skip, - _OP_object_next : (*_Assembler)._asm_OP_object_next, - _OP_struct_field : (*_Assembler)._asm_OP_struct_field, - _OP_unmarshal : (*_Assembler)._asm_OP_unmarshal, - _OP_unmarshal_p : (*_Assembler)._asm_OP_unmarshal_p, - _OP_unmarshal_text : (*_Assembler)._asm_OP_unmarshal_text, - _OP_unmarshal_text_p : (*_Assembler)._asm_OP_unmarshal_text_p, - _OP_lspace : (*_Assembler)._asm_OP_lspace, - _OP_match_char : (*_Assembler)._asm_OP_match_char, - _OP_check_char : (*_Assembler)._asm_OP_check_char, - _OP_load : (*_Assembler)._asm_OP_load, - _OP_save : (*_Assembler)._asm_OP_save, - _OP_drop : (*_Assembler)._asm_OP_drop, - _OP_drop_2 : (*_Assembler)._asm_OP_drop_2, - _OP_recurse : (*_Assembler)._asm_OP_recurse, - _OP_goto : (*_Assembler)._asm_OP_goto, - _OP_switch : (*_Assembler)._asm_OP_switch, - _OP_check_char_0 : (*_Assembler)._asm_OP_check_char_0, - _OP_dismatch_err : (*_Assembler)._asm_OP_dismatch_err, - _OP_go_skip : (*_Assembler)._asm_OP_go_skip, - _OP_add : (*_Assembler)._asm_OP_add, - _OP_check_empty : (*_Assembler)._asm_OP_check_empty, -} - -func (self *_Assembler) instr(v *_Instr) { - if fn := _OpFuncTab[v.op()]; fn != nil { - fn(self, v) - } else { - panic(fmt.Sprintf("invalid opcode: %d", v.op())) - } -} - -func (self *_Assembler) instrs() { - for i, v := range self.p { - self.Mark(i) - self.instr(&v) - self.debug_instr(i, &v) - } -} - -func (self *_Assembler) epilogue() { - self.Mark(len(self.p)) - self.Emit("XORL", _EP, _EP) // XORL EP, EP - self.Emit("MOVQ", _VAR_et, _ET) // MOVQ VAR_et, ET - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ", _LB_mismatch_error) // JNZ _LB_mismatch_error - self.Link(_LB_error) // _error: - self.Emit("MOVQ", _IC, _RET_rc) // MOVQ IC, rc<>+40(FP) - self.Emit("MOVQ", _ET, _RET_et) // MOVQ ET, et<>+48(FP) - self.Emit("MOVQ", _EP, _RET_ep) // MOVQ EP, ep<>+56(FP) - self.Emit("MOVQ", jit.Ptr(_SP, _FP_offs), _BP) // MOVQ _FP_offs(SP), BP - self.Emit("ADDQ", jit.Imm(_FP_size), _SP) // ADDQ $_FP_size, SP - self.Emit("RET") // RET -} - -func (self *_Assembler) prologue() { - self.Emit("SUBQ", jit.Imm(_FP_size), _SP) // SUBQ $_FP_size, SP - self.Emit("MOVQ", _BP, jit.Ptr(_SP, _FP_offs)) // MOVQ BP, _FP_offs(SP) - self.Emit("LEAQ", jit.Ptr(_SP, _FP_offs), _BP) // LEAQ _FP_offs(SP), BP - self.Emit("MOVQ", _ARG_sp, _IP) // MOVQ s.p<>+0(FP), IP - self.Emit("MOVQ", _ARG_sl, _IL) // MOVQ s.l<>+8(FP), IL - self.Emit("MOVQ", _ARG_ic, _IC) // MOVQ ic<>+16(FP), IC - self.Emit("MOVQ", _ARG_vp, _VP) // MOVQ vp<>+24(FP), VP - self.Emit("MOVQ", _ARG_sb, _ST) // MOVQ vp<>+32(FP), ST - // initialize digital buffer first - self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_st_Dc) // MOVQ $_MaxDigitNums, ss.Dcap - self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX - self.Emit("MOVQ", _AX, _VAR_st_Db) // MOVQ AX, ss.Dbuf - self.Emit("XORL", _AX, _AX) // XORL AX, AX - self.Emit("MOVQ", _AX, _VAR_et) // MOVQ AX, ss.Dp -} - -/** Function Calling Helpers **/ - -var _REG_go = []obj.Addr { - _ST, - _VP, - _IP, - _IL, - _IC, -} - -func (self *_Assembler) save(r ...obj.Addr) { - for i, v := range r { - if i > _FP_saves / 8 - 1 { - panic("too many registers to save") - } else { - self.Emit("MOVQ", v, jit.Ptr(_SP, _FP_fargs + int64(i) * 8)) - } - } -} - -func (self *_Assembler) load(r ...obj.Addr) { - for i, v := range r { - if i > _FP_saves / 8 - 1 { - panic("too many registers to load") - } else { - self.Emit("MOVQ", jit.Ptr(_SP, _FP_fargs + int64(i) * 8), v) - } - } -} - -func (self *_Assembler) call(fn obj.Addr) { - self.Emit("MOVQ", fn, _AX) // MOVQ ${fn}, AX - self.Rjmp("CALL", _AX) // CALL AX -} - -func (self *_Assembler) call_go(fn obj.Addr) { - self.save(_REG_go...) // SAVE $REG_go - self.call(fn) // CALL ${fn} - self.load(_REG_go...) // LOAD $REG_go -} - -func (self *_Assembler) call_sf(fn obj.Addr) { - self.Emit("LEAQ", _ARG_s, _DI) // LEAQ s<>+0(FP), DI - self.Emit("MOVQ", _IC, _ARG_ic) // MOVQ IC, ic<>+16(FP) - self.Emit("LEAQ", _ARG_ic, _SI) // LEAQ ic<>+16(FP), SI - self.Emit("LEAQ", jit.Ptr(_ST, _FsmOffset), _DX) // LEAQ _FsmOffset(ST), DX - self.Emit("MOVQ", _ARG_fv, _CX) - self.call(fn) // CALL ${fn} - self.Emit("MOVQ", _ARG_ic, _IC) // MOVQ ic<>+16(FP), IC -} - -func (self *_Assembler) call_vf(fn obj.Addr) { - self.Emit("LEAQ", _ARG_s, _DI) // LEAQ s<>+0(FP), DI - self.Emit("MOVQ", _IC, _ARG_ic) // MOVQ IC, ic<>+16(FP) - self.Emit("LEAQ", _ARG_ic, _SI) // LEAQ ic<>+16(FP), SI - self.Emit("LEAQ", _VAR_st, _DX) // LEAQ st, DX - self.call(fn) // CALL ${fn} - self.Emit("MOVQ", _ARG_ic, _IC) // MOVQ ic<>+16(FP), IC -} - -/** Assembler Error Handlers **/ - -var ( - _F_convT64 = jit.Func(convT64) - _F_error_wrap = jit.Func(error_wrap) - _F_error_type = jit.Func(error_type) - _F_error_field = jit.Func(error_field) - _F_error_value = jit.Func(error_value) - _F_error_mismatch = jit.Func(error_mismatch) -) - -var ( - _I_int8 , _T_int8 = rtype(reflect.TypeOf(int8(0))) - _I_int16 , _T_int16 = rtype(reflect.TypeOf(int16(0))) - _I_int32 , _T_int32 = rtype(reflect.TypeOf(int32(0))) - _I_uint8 , _T_uint8 = rtype(reflect.TypeOf(uint8(0))) - _I_uint16 , _T_uint16 = rtype(reflect.TypeOf(uint16(0))) - _I_uint32 , _T_uint32 = rtype(reflect.TypeOf(uint32(0))) - _I_float32 , _T_float32 = rtype(reflect.TypeOf(float32(0))) -) - -var ( - _T_error = rt.UnpackType(errorType) - _I_base64_CorruptInputError = jit.Itab(_T_error, base64CorruptInputError) -) - -var ( - _V_stackOverflow = jit.Imm(int64(uintptr(unsafe.Pointer(&stackOverflow)))) - _I_json_UnsupportedValueError = jit.Itab(_T_error, reflect.TypeOf(new(json.UnsupportedValueError))) - _I_json_MismatchTypeError = jit.Itab(_T_error, reflect.TypeOf(new(MismatchTypeError))) -) - -func (self *_Assembler) type_error() { - self.Link(_LB_type_error) // _type_error: - self.Emit("MOVQ", _ET, jit.Ptr(_SP, 0)) // MOVQ ET, (SP) - self.call_go(_F_error_type) // CALL_GO error_type - self.Emit("MOVQ", jit.Ptr(_SP, 8), _ET) // MOVQ 8(SP), ET - self.Emit("MOVQ", jit.Ptr(_SP, 16), _EP) // MOVQ 16(SP), EP - self.Sjmp("JMP" , _LB_error) // JMP _error -} - - -func (self *_Assembler) mismatch_error() { - self.Link(_LB_mismatch_error) // _type_error: - self.Emit("MOVQ", _VAR_et, _ET) // MOVQ _VAR_et, ET - self.Emit("MOVQ", _VAR_ic, _EP) // MOVQ _VAR_ic, EP - self.Emit("MOVQ", _I_json_MismatchTypeError, _AX) // MOVQ _I_json_MismatchTypeError, AX - self.Emit("CMPQ", _ET, _AX) // CMPQ ET, AX - self.Sjmp("JE" , _LB_error) // JE _LB_error - self.Emit("MOVQ", _ARG_sp, _AX) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ", _ARG_sl, _CX) - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.Emit("MOVQ", _VAR_ic, _AX) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 16)) // MOVQ AX, 16(SP) - self.Emit("MOVQ", _VAR_et, _CX) - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 24)) // MOVQ CX, 24(SP) - self.call_go(_F_error_mismatch) // CALL_GO error_type - self.Emit("MOVQ", jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ", jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) _asm_OP_dismatch_err(p *_Instr) { - self.Emit("MOVQ", _IC, _VAR_ic) - self.Emit("MOVQ", jit.Type(p.vt()), _ET) - self.Emit("MOVQ", _ET, _VAR_et) -} - -func (self *_Assembler) _asm_OP_go_skip(p *_Instr) { - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ (PC), R9 - self.Xref(p.vi(), 4) - self.Emit("MOVQ", _R9, _VAR_pc) - self.Sjmp("JMP" , _LB_skip_one) // JMP _skip_one -} - -func (self *_Assembler) skip_one() { - self.Link(_LB_skip_one) // _skip: - self.Emit("MOVQ", _VAR_ic, _IC) // MOVQ _VAR_ic, IC - self.call_sf(_F_skip_one) // CALL_SF skip_one - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v - self.Emit("MOVQ" , _VAR_pc, _R9) // MOVQ pc, R9 - self.Rjmp("JMP" , _R9) // JMP (R9) -} - - -func (self *_Assembler) skip_key_value() { - self.Link(_LB_skip_key_value) // _skip: - // skip the key - self.Emit("MOVQ", _VAR_ic, _IC) // MOVQ _VAR_ic, IC - self.call_sf(_F_skip_one) // CALL_SF skip_one - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v - // match char ':' - self.lspace("_global_1") - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm(':')) - self.Sjmp("JNE" , _LB_parsing_error_v) // JNE _parse_error_v - self.Emit("ADDQ", jit.Imm(1), _IC) // ADDQ $1, IC - self.lspace("_global_2") - // skip the value - self.call_sf(_F_skip_one) // CALL_SF skip_one - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v - // jump back to specified address - self.Emit("MOVQ" , _VAR_pc, _R9) // MOVQ pc, R9 - self.Rjmp("JMP" , _R9) // JMP (R9) -} - -func (self *_Assembler) field_error() { - self.Link(_LB_field_error) // _field_error: - self.Emit("MOVOU", _VAR_sv, _X0) // MOVOU sv, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 0)) // MOVOU X0, (SP) - self.call_go(_F_error_field) // CALL_GO error_field - self.Emit("MOVQ" , jit.Ptr(_SP, 16), _ET) // MOVQ 16(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 24), _EP) // MOVQ 24(SP), EP - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) range_error() { - self.Link(_LB_range_error) // _range_error: - self.slice_from(_VAR_st_Ep, 0) // SLICE st.Ep, $0 - self.Emit("MOVQ", _DI, jit.Ptr(_SP, 0)) // MOVQ DI, (SP) - self.Emit("MOVQ", _SI, jit.Ptr(_SP, 8)) // MOVQ SI, 8(SP) - self.Emit("MOVQ", _ET, jit.Ptr(_SP, 16)) // MOVQ ET, 16(SP) - self.Emit("MOVQ", _EP, jit.Ptr(_SP, 24)) // MOVQ EP, 24(SP) - self.call_go(_F_error_value) // CALL_GO error_value - self.Emit("MOVQ", jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ", jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) stack_error() { - self.Link(_LB_stack_error) // _stack_error: - self.Emit("MOVQ", _V_stackOverflow, _EP) // MOVQ ${_V_stackOverflow}, EP - self.Emit("MOVQ", _I_json_UnsupportedValueError, _ET) // MOVQ ${_I_json_UnsupportedValueError}, ET - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) base64_error() { - self.Link(_LB_base64_error) - self.Emit("NEGQ", _AX) // NEGQ AX - self.Emit("SUBQ", jit.Imm(1), _AX) // SUBQ $1, AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.call_go(_F_convT64) // CALL_GO convT64 - self.Emit("MOVQ", jit.Ptr(_SP, 8), _EP) // MOVQ 8(SP), EP - self.Emit("MOVQ", _I_base64_CorruptInputError, _ET) // MOVQ ${itab(base64.CorruptInputError)}, ET - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) parsing_error() { - self.Link(_LB_eof_error) // _eof_error: - self.Emit("MOVQ" , _IL, _IC) // MOVQ IL, IC - self.Emit("MOVL" , jit.Imm(int64(types.ERR_EOF)), _EP) // MOVL ${types.ERR_EOF}, EP - self.Sjmp("JMP" , _LB_parsing_error) // JMP _parsing_error - self.Link(_LB_unquote_error) // _unquote_error: - self.Emit("SUBQ" , _VAR_sr, _SI) // SUBQ sr, SI - self.Emit("SUBQ" , _SI, _IC) // SUBQ IL, IC - self.Link(_LB_parsing_error_v) // _parsing_error_v: - self.Emit("MOVQ" , _AX, _EP) // MOVQ AX, EP - self.Emit("NEGQ" , _EP) // NEGQ EP - self.Sjmp("JMP" , _LB_parsing_error) // JMP _parsing_error - self.Link(_LB_char_m3_error) // _char_m3_error: - self.Emit("SUBQ" , jit.Imm(1), _IC) // SUBQ $1, IC - self.Link(_LB_char_m2_error) // _char_m2_error: - self.Emit("SUBQ" , jit.Imm(2), _IC) // SUBQ $2, IC - self.Sjmp("JMP" , _LB_char_0_error) // JMP _char_0_error - self.Link(_LB_im_error) // _im_error: - self.Emit("CMPB" , _CX, jit.Sib(_IP, _IC, 1, 0)) // CMPB CX, (IP)(IC) - self.Sjmp("JNE" , _LB_char_0_error) // JNE _char_0_error - self.Emit("SHRL" , jit.Imm(8), _CX) // SHRL $8, CX - self.Emit("CMPB" , _CX, jit.Sib(_IP, _IC, 1, 1)) // CMPB CX, 1(IP)(IC) - self.Sjmp("JNE" , _LB_char_1_error) // JNE _char_1_error - self.Emit("SHRL" , jit.Imm(8), _CX) // SHRL $8, CX - self.Emit("CMPB" , _CX, jit.Sib(_IP, _IC, 1, 2)) // CMPB CX, 2(IP)(IC) - self.Sjmp("JNE" , _LB_char_2_error) // JNE _char_2_error - self.Sjmp("JMP" , _LB_char_3_error) // JNE _char_3_error - self.Link(_LB_char_4_error) // _char_4_error: - self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC - self.Link(_LB_char_3_error) // _char_3_error: - self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC - self.Link(_LB_char_2_error) // _char_2_error: - self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC - self.Link(_LB_char_1_error) // _char_1_error: - self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC - self.Link(_LB_char_0_error) // _char_0_error: - self.Emit("MOVL" , jit.Imm(int64(types.ERR_INVALID_CHAR)), _EP) // MOVL ${types.ERR_INVALID_CHAR}, EP - self.Link(_LB_parsing_error) // _parsing_error: - self.Emit("MOVOU", _ARG_s, _X0) // MOVOU s, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 0)) // MOVOU X0, (SP) - self.Emit("MOVQ" , _IC, jit.Ptr(_SP, 16)) // MOVQ IC, 16(SP) - self.Emit("MOVQ" , _EP, jit.Ptr(_SP, 24)) // MOVQ EP, 24(SP) - self.call_go(_F_error_wrap) // CALL_GO error_wrap - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -/** Memory Management Routines **/ - -var ( - _T_byte = jit.Type(byteType) - _F_mallocgc = jit.Func(mallocgc) -) - -func (self *_Assembler) malloc(nb obj.Addr, ret obj.Addr) { - self.Emit("XORL", _AX, _AX) // XORL AX, AX - self.Emit("MOVQ", _T_byte, _CX) // MOVQ ${type(byte)}, CX - self.Emit("MOVQ", nb, jit.Ptr(_SP, 0)) // MOVQ ${nb}, (SP) - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 16)) // MOVQ AX, 16(SP) - self.call_go(_F_mallocgc) // CALL_GO mallocgc - self.Emit("MOVQ", jit.Ptr(_SP, 24), ret) // MOVQ 24(SP), ${ret} -} - -func (self *_Assembler) valloc(vt reflect.Type, ret obj.Addr) { - self.Emit("MOVQ", jit.Imm(int64(vt.Size())), _AX) // MOVQ ${vt.Size()}, AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ", jit.Type(vt), _AX) // MOVQ ${vt}, AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - self.Emit("MOVB", jit.Imm(1), jit.Ptr(_SP, 16)) // MOVB $1, 16(SP) - self.call_go(_F_mallocgc) // CALL_GO mallocgc - self.Emit("MOVQ", jit.Ptr(_SP, 24), ret) // MOVQ 24(SP), ${ret} -} - -func (self *_Assembler) vfollow(vt reflect.Type) { - self.Emit("MOVQ" , jit.Ptr(_VP, 0), _AX) // MOVQ (VP), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JNZ" , "_end_{n}") // JNZ _end_{n} - self.valloc(vt, _AX) // VALLOC ${vt}, AX - self.WritePtrAX(1, jit.Ptr(_VP, 0), false) // MOVQ AX, (VP) - self.Link("_end_{n}") // _end_{n}: - self.Emit("MOVQ" , _AX, _VP) // MOVQ AX, VP -} - -/** Value Parsing Routines **/ - -var ( - _F_vstring = jit.Imm(int64(native.S_vstring)) - _F_vnumber = jit.Imm(int64(native.S_vnumber)) - _F_vsigned = jit.Imm(int64(native.S_vsigned)) - _F_vunsigned = jit.Imm(int64(native.S_vunsigned)) -) - -func (self *_Assembler) check_err(vt reflect.Type, pin string, pin2 int) { - self.Emit("MOVQ" , _VAR_st_Vt, _AX) // MOVQ st.Vt, AX - self.Emit("TESTQ", _AX, _AX) // CMPQ AX, ${native.V_STRING} - // try to skip the value - if vt != nil { - self.Sjmp("JNS" , "_check_err_{n}") // JNE _parsing_error_v - self.Emit("MOVQ", jit.Type(vt), _ET) - self.Emit("MOVQ", _ET, _VAR_et) - if pin2 != -1 { - self.Emit("SUBQ", jit.Imm(1), _BP) - self.Emit("MOVQ", _BP, _VAR_ic) - self.Byte(0x4c , 0x8d, 0x0d) // LEAQ (PC), R9 - self.Xref(pin2, 4) - self.Emit("MOVQ", _R9, _VAR_pc) - self.Sjmp("JMP" , _LB_skip_key_value) - } else { - self.Emit("MOVQ", _BP, _VAR_ic) - self.Byte(0x4c , 0x8d, 0x0d) // LEAQ (PC), R9 - self.Sref(pin, 4) - self.Emit("MOVQ", _R9, _VAR_pc) - self.Sjmp("JMP" , _LB_skip_one) - } - self.Link("_check_err_{n}") - } else { - self.Sjmp("JS" , _LB_parsing_error_v) // JNE _parsing_error_v - } -} - -func (self *_Assembler) check_eof(d int64) { - if d == 1 { - self.Emit("CMPQ", _IC, _IL) // CMPQ IC, IL - self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error - } else { - self.Emit("LEAQ", jit.Ptr(_IC, d), _AX) // LEAQ ${d}(IC), AX - self.Emit("CMPQ", _AX, _IL) // CMPQ AX, IL - self.Sjmp("JA" , _LB_eof_error) // JA _eof_error - } -} - -func (self *_Assembler) parse_string() { // parse_string has a validate flag params in the last - self.Emit("MOVQ", _ARG_fv, _CX) - self.call_vf(_F_vstring) - self.check_err(nil, "", -1) -} - -func (self *_Assembler) parse_number(vt reflect.Type, pin string, pin2 int) { - self.Emit("MOVQ", _IC, _BP) - self.call_vf(_F_vnumber) // call vnumber - self.check_err(vt, pin, pin2) -} - -func (self *_Assembler) parse_signed(vt reflect.Type, pin string, pin2 int) { - self.Emit("MOVQ", _IC, _BP) - self.call_vf(_F_vsigned) - self.check_err(vt, pin, pin2) -} - -func (self *_Assembler) parse_unsigned(vt reflect.Type, pin string, pin2 int) { - self.Emit("MOVQ", _IC, _BP) - self.call_vf(_F_vunsigned) - self.check_err(vt, pin, pin2) -} - -// Pointer: DI, Size: SI, Return: R9 -func (self *_Assembler) copy_string() { - self.Link("_copy_string") - self.Emit("MOVQ", _DI, _VAR_bs_p) - self.Emit("MOVQ", _SI, _VAR_bs_n) - self.Emit("MOVQ", _R9, _VAR_bs_LR) - self.malloc(_SI, _AX) - self.Emit("MOVQ", _AX, _VAR_sv_p) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) - self.Emit("MOVQ", _VAR_bs_p, _DI) - self.Emit("MOVQ", _DI, jit.Ptr(_SP, 8)) - self.Emit("MOVQ", _VAR_bs_n, _SI) - self.Emit("MOVQ", _SI, jit.Ptr(_SP, 16)) - self.call_go(_F_memmove) - self.Emit("MOVQ", _VAR_sv_p, _DI) - self.Emit("MOVQ", _VAR_bs_n, _SI) - self.Emit("MOVQ", _VAR_bs_LR, _R9) - self.Rjmp("JMP", _R9) -} - -// Pointer: DI, Size: SI, Return: R9 -func (self *_Assembler) escape_string() { - self.Link("_escape_string") - self.Emit("MOVQ" , _DI, _VAR_bs_p) - self.Emit("MOVQ" , _SI, _VAR_bs_n) - self.Emit("MOVQ" , _R9, _VAR_bs_LR) - self.malloc(_SI, _DX) // MALLOC SI, DX - self.Emit("MOVQ" , _DX, _VAR_sv_p) - self.Emit("MOVQ" , _VAR_bs_p, _DI) - self.Emit("MOVQ" , _VAR_bs_n, _SI) - self.Emit("LEAQ" , _VAR_sr, _CX) // LEAQ sr, CX - self.Emit("XORL" , _R8, _R8) // XORL R8, R8 - self.Emit("BTQ" , jit.Imm(_F_disable_urc), _ARG_fv) // BTQ ${_F_disable_urc}, fv - self.Emit("SETCC", _R8) // SETCC R8 - self.Emit("SHLQ" , jit.Imm(types.B_UNICODE_REPLACE), _R8) // SHLQ ${types.B_UNICODE_REPLACE}, R8 - self.call(_F_unquote) // CALL unquote - self.Emit("MOVQ" , _VAR_bs_n, _SI) // MOVQ ${n}, SI - self.Emit("ADDQ" , jit.Imm(1), _SI) // ADDQ $1, SI - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_unquote_error) // JS _unquote_error - self.Emit("MOVQ" , _AX, _SI) - self.Emit("MOVQ" , _VAR_sv_p, _DI) - self.Emit("MOVQ" , _VAR_bs_LR, _R9) - self.Rjmp("JMP", _R9) -} - -func (self *_Assembler) escape_string_twice() { - self.Link("_escape_string_twice") - self.Emit("MOVQ" , _DI, _VAR_bs_p) - self.Emit("MOVQ" , _SI, _VAR_bs_n) - self.Emit("MOVQ" , _R9, _VAR_bs_LR) - self.malloc(_SI, _DX) // MALLOC SI, DX - self.Emit("MOVQ" , _DX, _VAR_sv_p) - self.Emit("MOVQ" , _VAR_bs_p, _DI) - self.Emit("MOVQ" , _VAR_bs_n, _SI) - self.Emit("LEAQ" , _VAR_sr, _CX) // LEAQ sr, CX - self.Emit("MOVL" , jit.Imm(types.F_DOUBLE_UNQUOTE), _R8) // MOVL ${types.F_DOUBLE_UNQUOTE}, R8 - self.Emit("BTQ" , jit.Imm(_F_disable_urc), _ARG_fv) // BTQ ${_F_disable_urc}, AX - self.Emit("XORL" , _AX, _AX) // XORL AX, AX - self.Emit("SETCC", _AX) // SETCC AX - self.Emit("SHLQ" , jit.Imm(types.B_UNICODE_REPLACE), _AX) // SHLQ ${types.B_UNICODE_REPLACE}, AX - self.Emit("ORQ" , _AX, _R8) // ORQ AX, R8 - self.call(_F_unquote) // CALL unquote - self.Emit("MOVQ" , _VAR_bs_n, _SI) // MOVQ ${n}, SI - self.Emit("ADDQ" , jit.Imm(3), _SI) // ADDQ $3, SI - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_unquote_error) // JS _unquote_error - self.Emit("MOVQ" , _AX, _SI) - self.Emit("MOVQ" , _VAR_sv_p, _DI) - self.Emit("MOVQ" , _VAR_bs_LR, _R9) - self.Rjmp("JMP", _R9) -} - -/** Range Checking Routines **/ - -var ( - _V_max_f32 = jit.Imm(int64(uintptr(unsafe.Pointer(_Vp_max_f32)))) - _V_min_f32 = jit.Imm(int64(uintptr(unsafe.Pointer(_Vp_min_f32)))) -) - -var ( - _Vp_max_f32 = new(float32) - _Vp_min_f32 = new(float32) -) - -func init() { - *_Vp_max_f32 = math.MaxFloat32 - *_Vp_min_f32 = -math.MaxFloat32 -} - -func (self *_Assembler) range_single() { - self.Emit("CVTSD2SS", _VAR_st_Dv, _X0) // CVTSD2SS st.Dv, X0 - self.Emit("MOVQ" , _V_max_f32, _AX) // MOVQ _max_f32, AX - self.Emit("MOVQ" , jit.Gitab(_I_float32), _ET) // MOVQ ${itab(float32)}, ET - self.Emit("MOVQ" , jit.Gtype(_T_float32), _EP) // MOVQ ${type(float32)}, EP - self.Emit("UCOMISS" , jit.Ptr(_AX, 0), _X0) // UCOMISS (AX), X0 - self.Sjmp("JA" , _LB_range_error) // JA _range_error - self.Emit("MOVQ" , _V_min_f32, _AX) // MOVQ _min_f32, AX - self.Emit("UCOMISS" , jit.Ptr(_AX, 0), _X0) // UCOMISS (AX), X0 - self.Sjmp("JB" , _LB_range_error) // JB _range_error -} - -func (self *_Assembler) range_signed(i *rt.GoItab, t *rt.GoType, a int64, b int64) { - self.Emit("MOVQ", _VAR_st_Iv, _AX) // MOVQ st.Iv, AX - self.Emit("MOVQ", jit.Gitab(i), _ET) // MOVQ ${i}, ET - self.Emit("MOVQ", jit.Gtype(t), _EP) // MOVQ ${t}, EP - self.Emit("CMPQ", _AX, jit.Imm(a)) // CMPQ AX, ${a} - self.Sjmp("JL" , _LB_range_error) // JL _range_error - self.Emit("CMPQ", _AX, jit.Imm(b)) // CMPQ AX, ${B} - self.Sjmp("JG" , _LB_range_error) // JG _range_error -} - -func (self *_Assembler) range_unsigned(i *rt.GoItab, t *rt.GoType, v uint64) { - self.Emit("MOVQ" , _VAR_st_Iv, _AX) // MOVQ st.Iv, AX - self.Emit("MOVQ" , jit.Gitab(i), _ET) // MOVQ ${i}, ET - self.Emit("MOVQ" , jit.Gtype(t), _EP) // MOVQ ${t}, EP - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_range_error) // JS _range_error - self.Emit("CMPQ" , _AX, jit.Imm(int64(v))) // CMPQ AX, ${a} - self.Sjmp("JA" , _LB_range_error) // JA _range_error -} - -/** String Manipulating Routines **/ - -var ( - _F_unquote = jit.Imm(int64(native.S_unquote)) -) - -func (self *_Assembler) slice_from(p obj.Addr, d int64) { - self.Emit("MOVQ", p, _SI) // MOVQ ${p}, SI - self.slice_from_r(_SI, d) // SLICE_R SI, ${d} -} - -func (self *_Assembler) slice_from_r(p obj.Addr, d int64) { - self.Emit("LEAQ", jit.Sib(_IP, p, 1, 0), _DI) // LEAQ (IP)(${p}), DI - self.Emit("NEGQ", p) // NEGQ ${p} - self.Emit("LEAQ", jit.Sib(_IC, p, 1, d), _SI) // LEAQ d(IC)(${p}), SI -} - -func (self *_Assembler) unquote_once(p obj.Addr, n obj.Addr, stack bool, copy bool) { - self.slice_from(_VAR_st_Iv, -1) // SLICE st.Iv, $-1 - self.Emit("CMPQ" , _VAR_st_Ep, jit.Imm(-1)) // CMPQ st.Ep, $-1 - self.Sjmp("JE" , "_noescape_{n}") // JE _noescape_{n} - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ (PC), R9 - self.Sref("_unquote_once_write_{n}", 4) - self.Sjmp("JMP" , "_escape_string") - self.Link("_noescape_{n}") // _noescape_{n}: - if copy { - self.Emit("BTQ" , jit.Imm(_F_copy_string), _ARG_fv) - self.Sjmp("JNC", "_unquote_once_write_{n}") - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ (PC), R9 - self.Sref("_unquote_once_write_{n}", 4) - self.Sjmp("JMP", "_copy_string") - } - self.Link("_unquote_once_write_{n}") - self.Emit("MOVQ" , _SI, n) // MOVQ SI, ${n} - if stack { - self.Emit("MOVQ", _DI, p) - } else { - self.WriteRecNotAX(10, _DI, p, false, false) - } -} - -func (self *_Assembler) unquote_twice(p obj.Addr, n obj.Addr, stack bool) { - self.Emit("CMPQ" , _VAR_st_Ep, jit.Imm(-1)) // CMPQ st.Ep, $-1 - self.Sjmp("JE" , _LB_eof_error) // JE _eof_error - self.Emit("CMPB" , jit.Sib(_IP, _IC, 1, -3), jit.Imm('\\')) // CMPB -3(IP)(IC), $'\\' - self.Sjmp("JNE" , _LB_char_m3_error) // JNE _char_m3_error - self.Emit("CMPB" , jit.Sib(_IP, _IC, 1, -2), jit.Imm('"')) // CMPB -2(IP)(IC), $'"' - self.Sjmp("JNE" , _LB_char_m2_error) // JNE _char_m2_error - self.slice_from(_VAR_st_Iv, -3) // SLICE st.Iv, $-3 - self.Emit("MOVQ" , _SI, _AX) // MOVQ SI, AX - self.Emit("ADDQ" , _VAR_st_Iv, _AX) // ADDQ st.Iv, AX - self.Emit("CMPQ" , _VAR_st_Ep, _AX) // CMPQ st.Ep, AX - self.Sjmp("JE" , "_noescape_{n}") // JE _noescape_{n} - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ (PC), R9 - self.Sref("_unquote_twice_write_{n}", 4) - self.Sjmp("JMP" , "_escape_string_twice") - self.Link("_noescape_{n}") // _noescape_{n}: - self.Emit("BTQ" , jit.Imm(_F_copy_string), _ARG_fv) - self.Sjmp("JNC", "_unquote_twice_write_{n}") - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ (PC), R9 - self.Sref("_unquote_twice_write_{n}", 4) - self.Sjmp("JMP", "_copy_string") - self.Link("_unquote_twice_write_{n}") - self.Emit("MOVQ" , _SI, n) // MOVQ SI, ${n} - if stack { - self.Emit("MOVQ", _DI, p) - } else { - self.WriteRecNotAX(12, _DI, p, false, false) - } -} - -/** Memory Clearing Routines **/ - -var ( - _F_memclrHasPointers = jit.Func(memclrHasPointers) - _F_memclrNoHeapPointers = jit.Func(memclrNoHeapPointers) -) - -func (self *_Assembler) mem_clear_fn(ptrfree bool) { - if !ptrfree { - self.call_go(_F_memclrHasPointers) - } else { - self.call_go(_F_memclrNoHeapPointers) - } -} - -func (self *_Assembler) mem_clear_rem(size int64, ptrfree bool) { - self.Emit("MOVQ", jit.Imm(size), _CX) // MOVQ ${size}, CX - self.Emit("MOVQ", jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, 0), _AX) // MOVQ (ST)(AX), AX - self.Emit("SUBQ", _VP, _AX) // SUBQ VP, AX - self.Emit("ADDQ", _AX, _CX) // ADDQ AX, CX - self.Emit("MOVQ", _VP, jit.Ptr(_SP, 0)) // MOVQ VP, (SP) - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.mem_clear_fn(ptrfree) // CALL_GO memclr{Has,NoHeap}Pointers -} - -/** Map Assigning Routines **/ - -var ( - _F_mapassign = jit.Func(mapassign) - _F_mapassign_fast32 = jit.Func(mapassign_fast32) - _F_mapassign_faststr = jit.Func(mapassign_faststr) - _F_mapassign_fast64ptr = jit.Func(mapassign_fast64ptr) -) - -var ( - _F_decodeJsonUnmarshaler obj.Addr - _F_decodeTextUnmarshaler obj.Addr -) - -func init() { - _F_decodeJsonUnmarshaler = jit.Func(decodeJsonUnmarshaler) - _F_decodeTextUnmarshaler = jit.Func(decodeTextUnmarshaler) -} - -func (self *_Assembler) mapaccess_ptr(t reflect.Type) { - if rt.MapType(rt.UnpackType(t)).IndirectElem() { - self.vfollow(t.Elem()) - } -} - -func (self *_Assembler) mapassign_std(t reflect.Type, v obj.Addr) { - self.Emit("LEAQ", v, _AX) // LEAQ ${v}, AX - self.mapassign_call(t, _F_mapassign) // MAPASSIGN ${t}, mapassign -} - -func (self *_Assembler) mapassign_str_fast(t reflect.Type, p obj.Addr, n obj.Addr) { - self.Emit("MOVQ", jit.Type(t), _AX) // MOVQ ${t}, AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ", _VP, jit.Ptr(_SP, 8)) // MOVQ VP, 8(SP) - self.Emit("MOVQ", p, jit.Ptr(_SP, 16)) // MOVQ ${p}, 16(SP) - self.Emit("MOVQ", n, jit.Ptr(_SP, 24)) // MOVQ ${n}, 24(SP) - self.call_go(_F_mapassign_faststr) // CALL_GO ${fn} - self.Emit("MOVQ", jit.Ptr(_SP, 32), _VP) // MOVQ 32(SP), VP - self.mapaccess_ptr(t) -} - -func (self *_Assembler) mapassign_call(t reflect.Type, fn obj.Addr) { - self.Emit("MOVQ", jit.Type(t), _SI) // MOVQ ${t}, SI - self.Emit("MOVQ", _SI, jit.Ptr(_SP, 0)) // MOVQ SI, (SP) - self.Emit("MOVQ", _VP, jit.Ptr(_SP, 8)) // MOVQ VP, 8(SP) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 16)) // MOVQ AX, 16(SP) - self.call_go(fn) // CALL_GO ${fn} - self.Emit("MOVQ", jit.Ptr(_SP, 24), _VP) // MOVQ 24(SP), VP -} - -func (self *_Assembler) mapassign_fastx(t reflect.Type, fn obj.Addr) { - self.mapassign_call(t, fn) - self.mapaccess_ptr(t) -} - -func (self *_Assembler) mapassign_utext(t reflect.Type, addressable bool) { - pv := false - vk := t.Key() - tk := t.Key() - - /* deref pointer if needed */ - if vk.Kind() == reflect.Ptr { - pv = true - vk = vk.Elem() - } - - /* addressable value with pointer receiver */ - if addressable { - pv = false - tk = reflect.PtrTo(tk) - } - - /* allocate the key, and call the unmarshaler */ - self.valloc(vk, _DI) // VALLOC ${vk}, DI - // must spill vk pointer since next call_go may invoke GC - self.Emit("MOVQ" , _DI, _VAR_vk) - self.Emit("MOVQ" , jit.Type(tk), _AX) // MOVQ ${tk}, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ" , _DI, jit.Ptr(_SP, 8)) // MOVQ DI, 8(SP) - self.Emit("MOVOU", _VAR_sv, _X0) // MOVOU sv, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 16)) // MOVOU X0, 16(SP) - self.call_go(_F_decodeTextUnmarshaler) // CALL_GO decodeTextUnmarshaler - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error - self.Emit("MOVQ" , _VAR_vk, _AX) - - /* select the correct assignment function */ - if !pv { - self.mapassign_call(t, _F_mapassign) - } else { - self.mapassign_fastx(t, _F_mapassign_fast64ptr) - } -} - -/** External Unmarshaler Routines **/ - -var ( - _F_skip_one = jit.Imm(int64(native.S_skip_one)) - _F_skip_number = jit.Imm(int64(native.S_skip_number)) -) - -func (self *_Assembler) unmarshal_json(t reflect.Type, deref bool) { - self.call_sf(_F_skip_one) // CALL_SF skip_one - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v - self.slice_from_r(_AX, 0) // SLICE_R AX, $0 - self.Emit("MOVQ" , _DI, _VAR_sv_p) // MOVQ DI, sv.p - self.Emit("MOVQ" , _SI, _VAR_sv_n) // MOVQ SI, sv.n - self.unmarshal_func(t, _F_decodeJsonUnmarshaler, deref) // UNMARSHAL json, ${t}, ${deref} -} - -func (self *_Assembler) unmarshal_text(t reflect.Type, deref bool) { - self.parse_string() // PARSE STRING - self.unquote_once(_VAR_sv_p, _VAR_sv_n, true, true) // UNQUOTE once, sv.p, sv.n - self.unmarshal_func(t, _F_decodeTextUnmarshaler, deref) // UNMARSHAL text, ${t}, ${deref} -} - -func (self *_Assembler) unmarshal_func(t reflect.Type, fn obj.Addr, deref bool) { - pt := t - vk := t.Kind() - - /* allocate the field if needed */ - if deref && vk == reflect.Ptr { - self.Emit("MOVQ" , _VP, _AX) // MOVQ VP, AX - self.Emit("MOVQ" , jit.Ptr(_AX, 0), _AX) // MOVQ (AX), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JNZ" , "_deref_{n}") // JNZ _deref_{n} - self.valloc(t.Elem(), _AX) // VALLOC ${t.Elem()}, AX - self.WritePtrAX(3, jit.Ptr(_VP, 0), false) // MOVQ AX, (VP) - self.Link("_deref_{n}") // _deref_{n}: - } - - /* set value type */ - self.Emit("MOVQ", jit.Type(pt), _CX) // MOVQ ${pt}, CX - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 0)) // MOVQ CX, (SP) - - /* set value pointer */ - if deref && vk == reflect.Ptr { - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - } else { - self.Emit("MOVQ", _VP, jit.Ptr(_SP, 8)) // MOVQ VP, 8(SP) - } - - /* set the source string and call the unmarshaler */ - self.Emit("MOVOU", _VAR_sv, _X0) // MOVOU sv, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 16)) // MOVOU X0, 16(SP) - self.call_go(fn) // CALL_GO ${fn} - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error -} - -/** Dynamic Decoding Routine **/ - -var ( - _F_decodeTypedPointer obj.Addr -) - -func init() { - _F_decodeTypedPointer = jit.Func(decodeTypedPointer) -} - -func (self *_Assembler) decode_dynamic(vt obj.Addr, vp obj.Addr) { - self.Emit("MOVQ" , _ARG_fv, _CX) // MOVQ fv, CX - self.Emit("MOVOU", _ARG_sp, _X0) // MOVOU sp, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 0)) // MOVOU X0, (SP) - self.Emit("MOVQ" , _IC, jit.Ptr(_SP, 16)) // MOVQ IC, 16(SP) - self.Emit("MOVQ" , vt, jit.Ptr(_SP, 24)) // MOVQ ${vt}, 24(SP) - self.Emit("MOVQ" , vp, jit.Ptr(_SP, 32)) // MOVQ ${vp}, 32(SP) - self.Emit("MOVQ" , _ST, jit.Ptr(_SP, 40)) // MOVQ ST, 40(SP) - self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 48)) // MOVQ CX, 48(SP) - self.call_go(_F_decodeTypedPointer) // CALL_GO decodeTypedPointer - self.Emit("MOVQ" , jit.Ptr(_SP, 64), _ET) // MOVQ 64(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 72), _EP) // MOVQ 72(SP), EP - self.Emit("MOVQ" , jit.Ptr(_SP, 56), _IC) // MOVQ 56(SP), IC - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JE", "_decode_dynamic_end_{n}") // JE, _decode_dynamic_end_{n} - self.Emit("MOVQ", _I_json_MismatchTypeError, _AX) // MOVQ _I_json_MismatchTypeError, AX - self.Emit("CMPQ", _ET, _AX) // CMPQ ET, AX - self.Sjmp("JNE" , _LB_error) // JNE LB_error - self.Emit("MOVQ", _EP, _VAR_ic) // MOVQ EP, VAR_ic - self.Emit("MOVQ", _ET, _VAR_et) // MOVQ ET, VAR_et - self.Link("_decode_dynamic_end_{n}") - -} - -/** OpCode Assembler Functions **/ - -var ( - _F_memequal = jit.Func(memequal) - _F_memmove = jit.Func(memmove) - _F_growslice = jit.Func(growslice) - _F_makeslice = jit.Func(makeslice) - _F_makemap_small = jit.Func(makemap_small) - _F_mapassign_fast64 = jit.Func(mapassign_fast64) -) - -var ( - _F_lspace = jit.Imm(int64(native.S_lspace)) - _F_strhash = jit.Imm(int64(caching.S_strhash)) -) - -var ( - _F_b64decode = jit.Imm(int64(_subr__b64decode)) - _F_decodeValue = jit.Imm(int64(_subr_decode_value)) -) - -var ( - _F_skip_array = jit.Imm(int64(native.S_skip_array)) - _F_skip_object = jit.Imm(int64(native.S_skip_object)) -) - -var ( - _F_FieldMap_GetCaseInsensitive obj.Addr - _Empty_Slice = make([]byte, 0) - _Zero_Base = int64(uintptr(((*rt.GoSlice)(unsafe.Pointer(&_Empty_Slice))).Ptr)) -) - -const ( - _MODE_AVX2 = 1 << 2 -) - -const ( - _Fe_ID = int64(unsafe.Offsetof(caching.FieldEntry{}.ID)) - _Fe_Name = int64(unsafe.Offsetof(caching.FieldEntry{}.Name)) - _Fe_Hash = int64(unsafe.Offsetof(caching.FieldEntry{}.Hash)) -) - -const ( - _Vk_Ptr = int64(reflect.Ptr) - _Gt_KindFlags = int64(unsafe.Offsetof(rt.GoType{}.KindFlags)) -) - -func init() { - _F_FieldMap_GetCaseInsensitive = jit.Func((*caching.FieldMap).GetCaseInsensitive) -} - -func (self *_Assembler) _asm_OP_any(_ *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_VP, 8), _CX) // MOVQ 8(VP), CX - self.Emit("TESTQ" , _CX, _CX) // TESTQ CX, CX - self.Sjmp("JZ" , "_decode_{n}") // JZ _decode_{n} - self.Emit("CMPQ" , _CX, _VP) // CMPQ CX, VP - self.Sjmp("JE" , "_decode_{n}") // JE _decode_{n} - self.Emit("MOVQ" , jit.Ptr(_VP, 0), _AX) // MOVQ (VP), AX - self.Emit("MOVBLZX", jit.Ptr(_AX, _Gt_KindFlags), _DX) // MOVBLZX _Gt_KindFlags(AX), DX - self.Emit("ANDL" , jit.Imm(rt.F_kind_mask), _DX) // ANDL ${F_kind_mask}, DX - self.Emit("CMPL" , _DX, jit.Imm(_Vk_Ptr)) // CMPL DX, ${reflect.Ptr} - self.Sjmp("JNE" , "_decode_{n}") // JNE _decode_{n} - self.Emit("LEAQ" , jit.Ptr(_VP, 8), _DI) // LEAQ 8(VP), DI - self.decode_dynamic(_AX, _DI) // DECODE AX, DI - self.Sjmp("JMP" , "_decode_end_{n}") // JMP _decode_end_{n} - self.Link("_decode_{n}") // _decode_{n}: - self.Emit("MOVQ" , _ARG_fv, _DF) // MOVQ fv, DF - self.Emit("MOVQ" , _ST, jit.Ptr(_SP, 0)) // MOVQ _ST, (SP) - self.call(_F_decodeValue) // CALL decodeValue - self.Emit("TESTQ" , _EP, _EP) // TESTQ EP, EP - self.Sjmp("JNZ" , _LB_parsing_error) // JNZ _parsing_error - self.Link("_decode_end_{n}") // _decode_end_{n}: -} - -func (self *_Assembler) _asm_OP_dyn(p *_Instr) { - self.Emit("MOVQ" , jit.Type(p.vt()), _ET) // MOVQ ${p.vt()}, ET - self.Emit("CMPQ" , jit.Ptr(_VP, 8), jit.Imm(0)) // CMPQ 8(VP), $0 - self.Sjmp("JE" , _LB_type_error) // JE _type_error - self.Emit("MOVQ" , jit.Ptr(_VP, 0), _AX) // MOVQ (VP), AX - self.Emit("MOVQ" , jit.Ptr(_AX, 8), _AX) // MOVQ 8(AX), AX - self.Emit("MOVBLZX", jit.Ptr(_AX, _Gt_KindFlags), _DX) // MOVBLZX _Gt_KindFlags(AX), DX - self.Emit("ANDL" , jit.Imm(rt.F_kind_mask), _DX) // ANDL ${F_kind_mask}, DX - self.Emit("CMPL" , _DX, jit.Imm(_Vk_Ptr)) // CMPL DX, ${reflect.Ptr} - self.Sjmp("JNE" , _LB_type_error) // JNE _type_error - self.Emit("LEAQ" , jit.Ptr(_VP, 8), _DI) // LEAQ 8(VP), DI - self.decode_dynamic(_AX, _DI) // DECODE AX, DI - self.Link("_decode_end_{n}") // _decode_end_{n}: -} - -func (self *_Assembler) _asm_OP_str(_ *_Instr) { - self.parse_string() // PARSE STRING - self.unquote_once(jit.Ptr(_VP, 0), jit.Ptr(_VP, 8), false, true) // UNQUOTE once, (VP), 8(VP) -} - -func (self *_Assembler) _asm_OP_bin(_ *_Instr) { - self.parse_string() // PARSE STRING - self.slice_from(_VAR_st_Iv, -1) // SLICE st.Iv, $-1 - self.Emit("MOVQ" , _DI, jit.Ptr(_VP, 0)) // MOVQ DI, (VP) - self.Emit("MOVQ" , _SI, jit.Ptr(_VP, 8)) // MOVQ SI, 8(VP) - self.Emit("SHRQ" , jit.Imm(2), _SI) // SHRQ $2, SI - self.Emit("LEAQ" , jit.Sib(_SI, _SI, 2, 0), _SI) // LEAQ (SI)(SI*2), SI - self.Emit("MOVQ" , _SI, jit.Ptr(_VP, 16)) // MOVQ SI, 16(VP) - self.malloc(_SI, _SI) // MALLOC SI, SI - - // TODO: due to base64x's bug, only use AVX mode now - self.Emit("MOVL", jit.Imm(_MODE_JSON), _CX) // MOVL $_MODE_JSON, CX - - /* call the decoder */ - self.Emit("XORL" , _DX, _DX) // XORL DX, DX - self.Emit("MOVQ" , _VP, _DI) // MOVQ VP, DI - - self.Emit("MOVQ" , jit.Ptr(_VP, 0), _R9) // MOVQ SI, (VP) - self.WriteRecNotAX(4, _SI, jit.Ptr(_VP, 0), true, false) // XCHGQ SI, (VP) - self.Emit("MOVQ" , _R9, _SI) - - self.Emit("XCHGQ", _DX, jit.Ptr(_VP, 8)) // XCHGQ DX, 8(VP) - self.call(_F_b64decode) // CALL b64decode - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_base64_error) // JS _base64_error - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 8)) // MOVQ AX, 8(VP) -} - -func (self *_Assembler) _asm_OP_bool(_ *_Instr) { - self.Emit("LEAQ", jit.Ptr(_IC, 4), _AX) // LEAQ 4(IC), AX - self.Emit("CMPQ", _AX, _IL) // CMPQ AX, IL - self.Sjmp("JA" , _LB_eof_error) // JA _eof_error - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm('f')) // CMPB (IP)(IC), $'f' - self.Sjmp("JE" , "_false_{n}") // JE _false_{n} - self.Emit("MOVL", jit.Imm(_IM_true), _CX) // MOVL $"true", CX - self.Emit("CMPL", _CX, jit.Sib(_IP, _IC, 1, 0)) // CMPL CX, (IP)(IC) - self.Sjmp("JE" , "_bool_true_{n}") - - // try to skip the value - self.Emit("MOVQ", _IC, _VAR_ic) - self.Emit("MOVQ", _T_bool, _ET) - self.Emit("MOVQ", _ET, _VAR_et) - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ (PC), R9 - self.Sref("_end_{n}", 4) - self.Emit("MOVQ", _R9, _VAR_pc) - self.Sjmp("JMP" , _LB_skip_one) - - self.Link("_bool_true_{n}") - self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC - self.Emit("MOVB", jit.Imm(1), jit.Ptr(_VP, 0)) // MOVB $1, (VP) - self.Sjmp("JMP" , "_end_{n}") // JMP _end_{n} - self.Link("_false_{n}") // _false_{n}: - self.Emit("ADDQ", jit.Imm(1), _AX) // ADDQ $1, AX - self.Emit("ADDQ", jit.Imm(1), _IC) // ADDQ $1, IC - self.Emit("CMPQ", _AX, _IL) // CMPQ AX, IL - self.Sjmp("JA" , _LB_eof_error) // JA _eof_error - self.Emit("MOVL", jit.Imm(_IM_alse), _CX) // MOVL $"alse", CX - self.Emit("CMPL", _CX, jit.Sib(_IP, _IC, 1, 0)) // CMPL CX, (IP)(IC) - self.Sjmp("JNE" , _LB_im_error) // JNE _im_error - self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC - self.Emit("XORL", _AX, _AX) // XORL AX, AX - self.Emit("MOVB", _AX, jit.Ptr(_VP, 0)) // MOVB AX, (VP) - self.Link("_end_{n}") // _end_{n}: -} - -func (self *_Assembler) _asm_OP_num(_ *_Instr) { - self.Emit("MOVQ", jit.Imm(0), _VAR_fl) - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm('"')) - self.Emit("MOVQ", _IC, _BP) - self.Sjmp("JNE", "_skip_number_{n}") - self.Emit("MOVQ", jit.Imm(1), _VAR_fl) - self.Emit("ADDQ", jit.Imm(1), _IC) - self.Link("_skip_number_{n}") - - /* call skip_number */ - self.call_sf(_F_skip_number) // CALL_SF skip_one - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JNS" , "_num_next_{n}") - - /* call skip one */ - self.Emit("MOVQ", _BP, _VAR_ic) - self.Emit("MOVQ", _T_number, _ET) - self.Emit("MOVQ", _ET, _VAR_et) - self.Byte(0x4c, 0x8d, 0x0d) - self.Sref("_num_end_{n}", 4) - self.Emit("MOVQ", _R9, _VAR_pc) - self.Sjmp("JMP" , _LB_skip_one) - - /* assgin string */ - self.Link("_num_next_{n}") - self.slice_from_r(_AX, 0) - self.Emit("BTQ", jit.Imm(_F_copy_string), _ARG_fv) - self.Sjmp("JNC", "_num_write_{n}") - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ (PC), R9 - self.Sref("_num_write_{n}", 4) - self.Sjmp("JMP", "_copy_string") - self.Link("_num_write_{n}") - self.Emit("MOVQ", _SI, jit.Ptr(_VP, 8)) // MOVQ SI, 8(VP) - self.WriteRecNotAX(13, _DI, jit.Ptr(_VP, 0), false, false) - - /* check if quoted */ - self.Emit("CMPQ", _VAR_fl, jit.Imm(1)) - self.Sjmp("JNE", "_num_end_{n}") - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm('"')) - self.Sjmp("JNE", _LB_char_0_error) - self.Emit("ADDQ", jit.Imm(1), _IC) - self.Link("_num_end_{n}") -} - -func (self *_Assembler) _asm_OP_i8(ins *_Instr) { - var pin = "_i8_end_{n}" - self.parse_signed(int8Type, pin, -1) // PARSE int8 - self.range_signed(_I_int8, _T_int8, math.MinInt8, math.MaxInt8) // RANGE int8 - self.Emit("MOVB", _AX, jit.Ptr(_VP, 0)) // MOVB AX, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_i16(ins *_Instr) { - var pin = "_i16_end_{n}" - self.parse_signed(int16Type, pin, -1) // PARSE int16 - self.range_signed(_I_int16, _T_int16, math.MinInt16, math.MaxInt16) // RANGE int16 - self.Emit("MOVW", _AX, jit.Ptr(_VP, 0)) // MOVW AX, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_i32(ins *_Instr) { - var pin = "_i32_end_{n}" - self.parse_signed(int32Type, pin, -1) // PARSE int32 - self.range_signed(_I_int32, _T_int32, math.MinInt32, math.MaxInt32) // RANGE int32 - self.Emit("MOVL", _AX, jit.Ptr(_VP, 0)) // MOVL AX, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_i64(ins *_Instr) { - var pin = "_i64_end_{n}" - self.parse_signed(int64Type, pin, -1) // PARSE int64 - self.Emit("MOVQ", _VAR_st_Iv, _AX) // MOVQ st.Iv, AX - self.Emit("MOVQ", _AX, jit.Ptr(_VP, 0)) // MOVQ AX, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_u8(ins *_Instr) { - var pin = "_u8_end_{n}" - self.parse_unsigned(uint8Type, pin, -1) // PARSE uint8 - self.range_unsigned(_I_uint8, _T_uint8, math.MaxUint8) // RANGE uint8 - self.Emit("MOVB", _AX, jit.Ptr(_VP, 0)) // MOVB AX, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_u16(ins *_Instr) { - var pin = "_u16_end_{n}" - self.parse_unsigned(uint16Type, pin, -1) // PARSE uint16 - self.range_unsigned(_I_uint16, _T_uint16, math.MaxUint16) // RANGE uint16 - self.Emit("MOVW", _AX, jit.Ptr(_VP, 0)) // MOVW AX, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_u32(ins *_Instr) { - var pin = "_u32_end_{n}" - self.parse_unsigned(uint32Type, pin, -1) // PARSE uint32 - self.range_unsigned(_I_uint32, _T_uint32, math.MaxUint32) // RANGE uint32 - self.Emit("MOVL", _AX, jit.Ptr(_VP, 0)) // MOVL AX, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_u64(ins *_Instr) { - var pin = "_u64_end_{n}" - self.parse_unsigned(uint64Type, pin, -1) // PARSE uint64 - self.Emit("MOVQ", _VAR_st_Iv, _AX) // MOVQ st.Iv, AX - self.Emit("MOVQ", _AX, jit.Ptr(_VP, 0)) // MOVQ AX, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_f32(ins *_Instr) { - var pin = "_f32_end_{n}" - self.parse_number(float32Type, pin, -1) // PARSE NUMBER - self.range_single() // RANGE float32 - self.Emit("MOVSS", _X0, jit.Ptr(_VP, 0)) // MOVSS X0, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_f64(ins *_Instr) { - var pin = "_f64_end_{n}" - self.parse_number(float64Type, pin, -1) // PARSE NUMBER - self.Emit("MOVSD", _VAR_st_Dv, _X0) // MOVSD st.Dv, X0 - self.Emit("MOVSD", _X0, jit.Ptr(_VP, 0)) // MOVSD X0, (VP) - self.Link(pin) -} - -func (self *_Assembler) _asm_OP_unquote(ins *_Instr) { - self.check_eof(2) - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm('\\')) // CMPB (IP)(IC), $'\\' - self.Sjmp("JNE" , _LB_char_0_error) // JNE _char_0_error - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 1), jit.Imm('"')) // CMPB 1(IP)(IC), $'"' - self.Sjmp("JNE" , _LB_char_1_error) // JNE _char_1_error - self.Emit("ADDQ", jit.Imm(2), _IC) // ADDQ $2, IC - self.parse_string() // PARSE STRING - self.unquote_twice(jit.Ptr(_VP, 0), jit.Ptr(_VP, 8), false) // UNQUOTE twice, (VP), 8(VP) -} - -func (self *_Assembler) _asm_OP_nil_1(_ *_Instr) { - self.Emit("XORL", _AX, _AX) // XORL AX, AX - self.Emit("MOVQ", _AX, jit.Ptr(_VP, 0)) // MOVQ AX, (VP) -} - -func (self *_Assembler) _asm_OP_nil_2(_ *_Instr) { - self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_VP, 0)) // MOVOU X0, (VP) -} - -func (self *_Assembler) _asm_OP_nil_3(_ *_Instr) { - self.Emit("XORL" , _AX, _AX) // XORL AX, AX - self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_VP, 0)) // MOVOU X0, (VP) - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 16)) // MOVOU X0, 16(VP) -} - -func (self *_Assembler) _asm_OP_deref(p *_Instr) { - self.vfollow(p.vt()) -} - -func (self *_Assembler) _asm_OP_index(p *_Instr) { - self.Emit("MOVQ", jit.Imm(p.i64()), _AX) // MOVQ ${p.vi()}, AX - self.Emit("ADDQ", _AX, _VP) // ADDQ _AX, _VP -} - -func (self *_Assembler) _asm_OP_is_null(p *_Instr) { - self.Emit("LEAQ" , jit.Ptr(_IC, 4), _AX) // LEAQ 4(IC), AX - self.Emit("CMPQ" , _AX, _IL) // CMPQ AX, IL - self.Sjmp("JA" , "_not_null_{n}") // JA _not_null_{n} - self.Emit("CMPL" , jit.Sib(_IP, _IC, 1, 0), jit.Imm(_IM_null)) // CMPL (IP)(IC), $"null" - self.Emit("CMOVQEQ", _AX, _IC) // CMOVQEQ AX, IC - self.Xjmp("JE" , p.vi()) // JE {p.vi()} - self.Link("_not_null_{n}") // _not_null_{n}: -} - -func (self *_Assembler) _asm_OP_is_null_quote(p *_Instr) { - self.Emit("LEAQ" , jit.Ptr(_IC, 5), _AX) // LEAQ 4(IC), AX - self.Emit("CMPQ" , _AX, _IL) // CMPQ AX, IL - self.Sjmp("JA" , "_not_null_quote_{n}") // JA _not_null_quote_{n} - self.Emit("CMPL" , jit.Sib(_IP, _IC, 1, 0), jit.Imm(_IM_null)) // CMPL (IP)(IC), $"null" - self.Sjmp("JNE" , "_not_null_quote_{n}") // JNE _not_null_quote_{n} - self.Emit("CMPB" , jit.Sib(_IP, _IC, 1, 4), jit.Imm('"')) // CMPB 4(IP)(IC), $'"' - self.Emit("CMOVQEQ", _AX, _IC) // CMOVQEQ AX, IC - self.Xjmp("JE" , p.vi()) // JE {p.vi()} - self.Link("_not_null_quote_{n}") // _not_null_quote_{n}: -} - -func (self *_Assembler) _asm_OP_map_init(_ *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_VP, 0), _AX) // MOVQ (VP), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JNZ" , "_end_{n}") // JNZ _end_{n} - self.call_go(_F_makemap_small) // CALL_GO makemap_small - self.Emit("MOVQ" , jit.Ptr(_SP, 0), _AX) // MOVQ (SP), AX - self.WritePtrAX(6, jit.Ptr(_VP, 0), false) // MOVQ AX, (VP) - self.Link("_end_{n}") // _end_{n}: - self.Emit("MOVQ" , _AX, _VP) // MOVQ AX, VP -} - -func (self *_Assembler) _asm_OP_map_key_i8(p *_Instr) { - self.parse_signed(int8Type, "", p.vi()) // PARSE int8 - self.range_signed(_I_int8, _T_int8, math.MinInt8, math.MaxInt8) // RANGE int8 - self.match_char('"') - self.mapassign_std(p.vt(), _VAR_st_Iv) // MAPASSIGN int8, mapassign, st.Iv -} - -func (self *_Assembler) _asm_OP_map_key_i16(p *_Instr) { - self.parse_signed(int16Type, "", p.vi()) // PARSE int16 - self.range_signed(_I_int16, _T_int16, math.MinInt16, math.MaxInt16) // RANGE int16 - self.match_char('"') - self.mapassign_std(p.vt(), _VAR_st_Iv) // MAPASSIGN int16, mapassign, st.Iv -} - -func (self *_Assembler) _asm_OP_map_key_i32(p *_Instr) { - self.parse_signed(int32Type, "", p.vi()) // PARSE int32 - self.range_signed(_I_int32, _T_int32, math.MinInt32, math.MaxInt32) // RANGE int32 - self.match_char('"') - if vt := p.vt(); !mapfast(vt) { - self.mapassign_std(vt, _VAR_st_Iv) // MAPASSIGN int32, mapassign, st.Iv - } else { - self.mapassign_fastx(vt, _F_mapassign_fast32) // MAPASSIGN int32, mapassign_fast32 - } -} - -func (self *_Assembler) _asm_OP_map_key_i64(p *_Instr) { - self.parse_signed(int64Type, "", p.vi()) // PARSE int64 - self.match_char('"') - if vt := p.vt(); !mapfast(vt) { - self.mapassign_std(vt, _VAR_st_Iv) // MAPASSIGN int64, mapassign, st.Iv - } else { - self.Emit("MOVQ", _VAR_st_Iv, _AX) // MOVQ st.Iv, AX - self.mapassign_fastx(vt, _F_mapassign_fast64) // MAPASSIGN int64, mapassign_fast64 - } -} - -func (self *_Assembler) _asm_OP_map_key_u8(p *_Instr) { - self.parse_unsigned(uint8Type, "", p.vi()) // PARSE uint8 - self.range_unsigned(_I_uint8, _T_uint8, math.MaxUint8) // RANGE uint8 - self.match_char('"') - self.mapassign_std(p.vt(), _VAR_st_Iv) // MAPASSIGN uint8, vt.Iv -} - -func (self *_Assembler) _asm_OP_map_key_u16(p *_Instr) { - self.parse_unsigned(uint16Type, "", p.vi()) // PARSE uint16 - self.range_unsigned(_I_uint16, _T_uint16, math.MaxUint16) // RANGE uint16 - self.match_char('"') - self.mapassign_std(p.vt(), _VAR_st_Iv) // MAPASSIGN uint16, vt.Iv -} - -func (self *_Assembler) _asm_OP_map_key_u32(p *_Instr) { - self.parse_unsigned(uint32Type, "", p.vi()) // PARSE uint32 - self.range_unsigned(_I_uint32, _T_uint32, math.MaxUint32) // RANGE uint32 - self.match_char('"') - if vt := p.vt(); !mapfast(vt) { - self.mapassign_std(vt, _VAR_st_Iv) // MAPASSIGN uint32, vt.Iv - } else { - self.mapassign_fastx(vt, _F_mapassign_fast32) // MAPASSIGN uint32, mapassign_fast32 - } -} - -func (self *_Assembler) _asm_OP_map_key_u64(p *_Instr) { - self.parse_unsigned(uint64Type, "", p.vi()) // PARSE uint64 - self.match_char('"') - if vt := p.vt(); !mapfast(vt) { - self.mapassign_std(vt, _VAR_st_Iv) // MAPASSIGN uint64, vt.Iv - } else { - self.Emit("MOVQ", _VAR_st_Iv, _AX) // MOVQ st.Iv, AX - self.mapassign_fastx(vt, _F_mapassign_fast64) // MAPASSIGN uint64, mapassign_fast64 - } -} - -func (self *_Assembler) _asm_OP_map_key_f32(p *_Instr) { - self.parse_number(float32Type, "", p.vi()) // PARSE NUMBER - self.range_single() // RANGE float32 - self.Emit("MOVSS", _X0, _VAR_st_Dv) // MOVSS X0, st.Dv - self.match_char('"') - self.mapassign_std(p.vt(), _VAR_st_Dv) // MAPASSIGN ${p.vt()}, mapassign, st.Dv -} - -func (self *_Assembler) _asm_OP_map_key_f64(p *_Instr) { - self.parse_number(float64Type, "", p.vi()) // PARSE NUMBER - self.match_char('"') - self.mapassign_std(p.vt(), _VAR_st_Dv) // MAPASSIGN ${p.vt()}, mapassign, st.Dv -} - -func (self *_Assembler) _asm_OP_map_key_str(p *_Instr) { - self.parse_string() // PARSE STRING - self.unquote_once(_VAR_sv_p, _VAR_sv_n, true, true) // UNQUOTE once, sv.p, sv.n - if vt := p.vt(); !mapfast(vt) { - self.valloc(vt.Key(), _DI) - self.Emit("MOVOU", _VAR_sv, _X0) - self.Emit("MOVOU", _X0, jit.Ptr(_DI, 0)) - self.mapassign_std(vt, jit.Ptr(_DI, 0)) - } else { - self.Emit("MOVQ", _VAR_sv_p, _DI) // MOVQ sv.p, DI - self.Emit("MOVQ", _VAR_sv_n, _SI) // MOVQ sv.n, SI - self.mapassign_str_fast(vt, _DI, _SI) // MAPASSIGN string, DI, SI - } -} - -func (self *_Assembler) _asm_OP_map_key_utext(p *_Instr) { - self.parse_string() // PARSE STRING - self.unquote_once(_VAR_sv_p, _VAR_sv_n, true, true) // UNQUOTE once, sv.p, sv.n - self.mapassign_utext(p.vt(), false) // MAPASSIGN utext, ${p.vt()}, false -} - -func (self *_Assembler) _asm_OP_map_key_utext_p(p *_Instr) { - self.parse_string() // PARSE STRING - self.unquote_once(_VAR_sv_p, _VAR_sv_n, true, false) // UNQUOTE once, sv.p, sv.n - self.mapassign_utext(p.vt(), true) // MAPASSIGN utext, ${p.vt()}, true -} - -func (self *_Assembler) _asm_OP_array_skip(_ *_Instr) { - self.call_sf(_F_skip_array) // CALL_SF skip_array - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v -} - -func (self *_Assembler) _asm_OP_array_clear(p *_Instr) { - self.mem_clear_rem(p.i64(), true) -} - -func (self *_Assembler) _asm_OP_array_clear_p(p *_Instr) { - self.mem_clear_rem(p.i64(), false) -} - -func (self *_Assembler) _asm_OP_slice_init(p *_Instr) { - self.Emit("XORL" , _AX, _AX) // XORL AX, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 8)) // MOVQ AX, 8(VP) - self.Emit("MOVQ" , jit.Ptr(_VP, 16), _AX) // MOVQ 16(VP), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JNZ" , "_done_{n}") // JNZ _done_{n} - self.Emit("MOVQ" , jit.Imm(_MinSlice), _CX) // MOVQ ${_MinSlice}, CX - self.Emit("MOVQ" , _CX, jit.Ptr(_VP, 16)) // MOVQ CX, 16(VP) - self.Emit("MOVQ" , jit.Type(p.vt()), _DX) // MOVQ ${p.vt()}, DX - self.Emit("MOVQ" , _DX, jit.Ptr(_SP, 0)) // MOVQ DX, (SP) - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 16)) // MOVQ CX, 16(SP) - self.call_go(_F_makeslice) // CALL_GO makeslice - self.Emit("MOVQ" , jit.Ptr(_SP, 24), _AX) // MOVQ 24(SP), AX - self.WritePtrAX(7, jit.Ptr(_VP, 0), false) // MOVQ AX, (VP) - self.Link("_done_{n}") // _done_{n}: - self.Emit("XORL" , _AX, _AX) // XORL AX, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 8)) // MOVQ AX, 8(VP) -} - -func (self *_Assembler) _asm_OP_check_empty(p *_Instr) { - rbracket := p.vb() - if rbracket == ']' { - self.check_eof(1) - self.Emit("LEAQ", jit.Ptr(_IC, 1), _AX) // LEAQ 1(IC), AX - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm(int64(rbracket))) // CMPB (IP)(IC), ']' - self.Sjmp("JNE" , "_not_empty_array_{n}") // JNE _not_empty_array_{n} - self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC - self.StorePtr(_Zero_Base, jit.Ptr(_VP, 0), _AX) // MOVQ $zerobase, (VP) - self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_VP, 8)) // MOVOU X0, 8(VP) - self.Xjmp("JMP" , p.vi()) // JMP {p.vi()} - self.Link("_not_empty_array_{n}") - } else { - panic("only implement check empty array here!") - } -} - -func (self *_Assembler) _asm_OP_slice_append(p *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_VP, 8), _AX) // MOVQ 8(VP), AX - self.Emit("CMPQ" , _AX, jit.Ptr(_VP, 16)) // CMPQ AX, 16(VP) - self.Sjmp("JB" , "_index_{n}") // JB _index_{n} - self.Emit("MOVQ" , jit.Type(p.vt()), _AX) // MOVQ ${p.vt()}, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVOU", jit.Ptr(_VP, 0), _X0) // MOVOU (VP), X0 - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP) - self.Emit("MOVQ" , jit.Ptr(_VP, 16), _AX) // MOVQ 16(VP), AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 24)) // MOVQ AX, 24(SP) - self.Emit("SHLQ" , jit.Imm(1), _AX) // SHLQ $1, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 32)) // MOVQ AX, 32(SP) - self.call_go(_F_growslice) // CALL_GO growslice - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _DI) // MOVQ 40(SP), DI - self.Emit("MOVQ" , jit.Ptr(_SP, 48), _AX) // MOVQ 48(SP), AX - self.Emit("MOVQ" , jit.Ptr(_SP, 56), _SI) // MOVQ 56(SP), SI - self.WriteRecNotAX(8, _DI, jit.Ptr(_VP, 0), true, true)// MOVQ DI, (VP) - self.Emit("MOVQ" , _AX, jit.Ptr(_VP, 8)) // MOVQ AX, 8(VP) - self.Emit("MOVQ" , _SI, jit.Ptr(_VP, 16)) // MOVQ SI, 16(VP) - - // because growslice not zero memory {oldcap, newlen} when append et not has ptrdata. - // but we should zero it, avoid decode it as random values. - if rt.UnpackType(p.vt()).PtrData == 0 { - self.Emit("SUBQ" , _AX, _SI) // MOVQ AX, SI - - self.Emit("ADDQ" , jit.Imm(1), jit.Ptr(_VP, 8)) // ADDQ $1, 8(VP) - self.Emit("MOVQ" , _DI, _VP) // MOVQ DI, VP - self.Emit("MOVQ" , jit.Imm(int64(p.vlen())), _CX) // MOVQ ${p.vlen()}, CX - self.From("MULQ" , _CX) // MULQ CX - self.Emit("ADDQ" , _AX, _VP) // ADDQ AX, VP - - self.Emit("MOVQ" , _SI, _AX) // MOVQ SI, AX - self.From("MULQ" , _CX) // MULQ CX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - - self.Emit("MOVQ" , _VP, jit.Ptr(_SP, 0)) // MOVQ VP, (SP) - self.mem_clear_fn(true) // CALL_GO memclr{Has,NoHeap} - self.Sjmp("JMP", "_append_slice_end_{n}") // JMP _append_slice_end_{n} - } - - self.Link("_index_{n}") // _index_{n}: - self.Emit("ADDQ" , jit.Imm(1), jit.Ptr(_VP, 8)) // ADDQ $1, 8(VP) - self.Emit("MOVQ" , jit.Ptr(_VP, 0), _VP) // MOVQ (VP), VP - self.Emit("MOVQ" , jit.Imm(int64(p.vlen())), _CX) // MOVQ ${p.vlen()}, CX - self.From("MULQ" , _CX) // MULQ CX - self.Emit("ADDQ" , _AX, _VP) // ADDQ AX, VP - self.Link("_append_slice_end_{n}") -} - -func (self *_Assembler) _asm_OP_object_skip(_ *_Instr) { - self.call_sf(_F_skip_object) // CALL_SF skip_object - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v -} - -func (self *_Assembler) _asm_OP_object_next(_ *_Instr) { - self.call_sf(_F_skip_one) // CALL_SF skip_one - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v -} - -func (self *_Assembler) _asm_OP_struct_field(p *_Instr) { - assert_eq(caching.FieldEntrySize, 32, "invalid field entry size") - self.Emit("MOVQ" , jit.Imm(-1), _AX) // MOVQ $-1, AX - self.Emit("MOVQ" , _AX, _VAR_sr) // MOVQ AX, sr - self.parse_string() // PARSE STRING - self.unquote_once(_VAR_sv_p, _VAR_sv_n, true, false) // UNQUOTE once, sv.p, sv.n - self.Emit("LEAQ" , _VAR_sv, _AX) // LEAQ sv, AX - self.Emit("XORL" , _CX, _CX) // XORL CX, CX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.call_go(_F_strhash) // CALL_GO strhash - self.Emit("MOVQ" , jit.Ptr(_SP, 16), _AX) // MOVQ 16(SP), AX - self.Emit("MOVQ" , _AX, _R9) // MOVQ AX, R9 - self.Emit("MOVQ" , jit.Imm(freezeFields(p.vf())), _CX) // MOVQ ${p.vf()}, CX - self.Emit("MOVQ" , jit.Ptr(_CX, caching.FieldMap_b), _SI) // MOVQ FieldMap.b(CX), SI - self.Emit("MOVQ" , jit.Ptr(_CX, caching.FieldMap_N), _CX) // MOVQ FieldMap.N(CX), CX - self.Emit("TESTQ", _CX, _CX) // TESTQ CX, CX - self.Sjmp("JZ" , "_try_lowercase_{n}") // JZ _try_lowercase_{n} - self.Link("_loop_{n}") // _loop_{n}: - self.Emit("XORL" , _DX, _DX) // XORL DX, DX - self.From("DIVQ" , _CX) // DIVQ CX - self.Emit("LEAQ" , jit.Ptr(_DX, 1), _AX) // LEAQ 1(DX), AX - self.Emit("SHLQ" , jit.Imm(5), _DX) // SHLQ $5, DX - self.Emit("LEAQ" , jit.Sib(_SI, _DX, 1, 0), _DI) // LEAQ (SI)(DX), DI - self.Emit("MOVQ" , jit.Ptr(_DI, _Fe_Hash), _R8) // MOVQ FieldEntry.Hash(DI), R8 - self.Emit("TESTQ", _R8, _R8) // TESTQ R8, R8 - self.Sjmp("JZ" , "_try_lowercase_{n}") // JZ _try_lowercase_{n} - self.Emit("CMPQ" , _R8, _R9) // CMPQ R8, R9 - self.Sjmp("JNE" , "_loop_{n}") // JNE _loop_{n} - self.Emit("MOVQ" , jit.Ptr(_DI, _Fe_Name + 8), _DX) // MOVQ FieldEntry.Name+8(DI), DX - self.Emit("CMPQ" , _DX, _VAR_sv_n) // CMPQ DX, sv.n - self.Sjmp("JNE" , "_loop_{n}") // JNE _loop_{n} - self.Emit("MOVQ" , jit.Ptr(_DI, _Fe_ID), _R8) // MOVQ FieldEntry.ID(DI), R8 - self.Emit("MOVQ" , _AX, _VAR_ss_AX) // MOVQ AX, ss.AX - self.Emit("MOVQ" , _CX, _VAR_ss_CX) // MOVQ CX, ss.CX - self.Emit("MOVQ" , _SI, _VAR_ss_SI) // MOVQ SI, ss.SI - self.Emit("MOVQ" , _R8, _VAR_ss_R8) // MOVQ R8, ss.R8 - self.Emit("MOVQ" , _R9, _VAR_ss_R9) // MOVQ R9, ss.R9 - self.Emit("MOVQ" , _VAR_sv_p, _AX) // MOVQ _VAR_sv_p, AX - self.Emit("MOVQ" , jit.Ptr(_DI, _Fe_Name), _CX) // MOVQ FieldEntry.Name(DI), CX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.Emit("MOVQ" , _DX, jit.Ptr(_SP, 16)) // MOVQ DX, 16(SP) - self.call_go(_F_memequal) // CALL_GO memequal - self.Emit("MOVQ" , _VAR_ss_AX, _AX) // MOVQ ss.AX, AX - self.Emit("MOVQ" , _VAR_ss_CX, _CX) // MOVQ ss.CX, CX - self.Emit("MOVQ" , _VAR_ss_SI, _SI) // MOVQ ss.SI, SI - self.Emit("MOVQ" , _VAR_ss_R9, _R9) // MOVQ ss.R9, R9 - self.Emit("MOVB" , jit.Ptr(_SP, 24), _DX) // MOVB 24(SP), DX - self.Emit("TESTB", _DX, _DX) // TESTB DX, DX - self.Sjmp("JZ" , "_loop_{n}") // JZ _loop_{n} - self.Emit("MOVQ" , _VAR_ss_R8, _R8) // MOVQ ss.R8, R8 - self.Emit("MOVQ" , _R8, _VAR_sr) // MOVQ R8, sr - self.Sjmp("JMP" , "_end_{n}") // JMP _end_{n} - self.Link("_try_lowercase_{n}") // _try_lowercase_{n}: - self.Emit("MOVQ" , jit.Imm(referenceFields(p.vf())), _AX) // MOVQ ${p.vf()}, AX - self.Emit("MOVOU", _VAR_sv, _X0) // MOVOU sv, X0 - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP) - self.call_go(_F_FieldMap_GetCaseInsensitive) // CALL_GO FieldMap::GetCaseInsensitive - self.Emit("MOVQ" , jit.Ptr(_SP, 24), _AX) // MOVQ 24(SP), AX - self.Emit("MOVQ" , _AX, _VAR_sr) // MOVQ AX, _VAR_sr - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JNS" , "_end_{n}") // JNS _end_{n} - self.Emit("BTQ" , jit.Imm(_F_disable_unknown), _ARG_fv) // BTQ ${_F_disable_unknown}, fv - self.Sjmp("JC" , _LB_field_error) // JC _field_error - self.Link("_end_{n}") // _end_{n}: -} - -func (self *_Assembler) _asm_OP_unmarshal(p *_Instr) { - self.unmarshal_json(p.vt(), true) -} - -func (self *_Assembler) _asm_OP_unmarshal_p(p *_Instr) { - self.unmarshal_json(p.vt(), false) -} - -func (self *_Assembler) _asm_OP_unmarshal_text(p *_Instr) { - self.unmarshal_text(p.vt(), true) -} - -func (self *_Assembler) _asm_OP_unmarshal_text_p(p *_Instr) { - self.unmarshal_text(p.vt(), false) -} - -func (self *_Assembler) _asm_OP_lspace(_ *_Instr) { - self.lspace("_{n}") -} - -func (self *_Assembler) lspace(subfix string) { - var label = "_lspace" + subfix - - self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL - self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error - self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX - self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX - self.Emit("CMPQ" , _AX, jit.Imm(' ')) // CMPQ AX, $' ' - self.Sjmp("JA" , label) // JA _nospace_{n} - self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX - self.Sjmp("JNC" , label) // JNC _nospace_{n} - - /* test up to 4 characters */ - for i := 0; i < 3; i++ { - self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC - self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL - self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error - self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX - self.Emit("CMPQ" , _AX, jit.Imm(' ')) // CMPQ AX, $' ' - self.Sjmp("JA" , label) // JA _nospace_{n} - self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX - self.Sjmp("JNC" , label) // JNC _nospace_{n} - } - - /* handle over to the native function */ - self.Emit("MOVQ" , _IP, _DI) // MOVQ IP, DI - self.Emit("MOVQ" , _IL, _SI) // MOVQ IL, SI - self.Emit("MOVQ" , _IC, _DX) // MOVQ IC, DX - self.call(_F_lspace) // CALL lspace - self.Emit("TESTQ" , _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parsing_error_v - self.Emit("CMPQ" , _AX, _IL) // CMPQ AX, IL - self.Sjmp("JAE" , _LB_eof_error) // JAE _eof_error - self.Emit("MOVQ" , _AX, _IC) // MOVQ AX, IC - self.Link(label) // _nospace_{n}: -} - -func (self *_Assembler) _asm_OP_match_char(p *_Instr) { - self.match_char(p.vb()) -} - -func (self *_Assembler) match_char(char byte) { - self.check_eof(1) - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm(int64(char))) // CMPB (IP)(IC), ${p.vb()} - self.Sjmp("JNE" , _LB_char_0_error) // JNE _char_0_error - self.Emit("ADDQ", jit.Imm(1), _IC) // ADDQ $1, IC -} - -func (self *_Assembler) _asm_OP_check_char(p *_Instr) { - self.check_eof(1) - self.Emit("LEAQ" , jit.Ptr(_IC, 1), _AX) // LEAQ 1(IC), AX - self.Emit("CMPB" , jit.Sib(_IP, _IC, 1, 0), jit.Imm(int64(p.vb()))) // CMPB (IP)(IC), ${p.vb()} - self.Emit("CMOVQEQ", _AX, _IC) // CMOVQEQ AX, IC - self.Xjmp("JE" , p.vi()) // JE {p.vi()} -} - -func (self *_Assembler) _asm_OP_check_char_0(p *_Instr) { - self.check_eof(1) - self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm(int64(p.vb()))) // CMPB (IP)(IC), ${p.vb()} - self.Xjmp("JE" , p.vi()) // JE {p.vi()} -} - -func (self *_Assembler) _asm_OP_add(p *_Instr) { - self.Emit("ADDQ", jit.Imm(int64(p.vi())), _IC) // ADDQ ${p.vi()}, IC -} - -func (self *_Assembler) _asm_OP_load(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, 0), _VP) // MOVQ (ST)(AX), VP -} - -func (self *_Assembler) _asm_OP_save(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _CX) // MOVQ (ST), CX - self.Emit("CMPQ", _CX, jit.Imm(_MaxStackBytes)) // CMPQ CX, ${_MaxStackBytes} - self.Sjmp("JAE" , _LB_stack_error) // JA _stack_error - self.WriteRecNotAX(0 , _VP, jit.Sib(_ST, _CX, 1, 8), false, false) // MOVQ VP, 8(ST)(CX) - self.Emit("ADDQ", jit.Imm(8), _CX) // ADDQ $8, CX - self.Emit("MOVQ", _CX, jit.Ptr(_ST, 0)) // MOVQ CX, (ST) -} - -func (self *_Assembler) _asm_OP_drop(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("SUBQ", jit.Imm(8), _AX) // SUBQ $8, AX - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, 8), _VP) // MOVQ 8(ST)(AX), VP - self.Emit("MOVQ", _AX, jit.Ptr(_ST, 0)) // MOVQ AX, (ST) - self.Emit("XORL", _ET, _ET) // XORL ET, ET - self.Emit("MOVQ", _ET, jit.Sib(_ST, _AX, 1, 8)) // MOVQ ET, 8(ST)(AX) -} - -func (self *_Assembler) _asm_OP_drop_2(_ *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("SUBQ" , jit.Imm(16), _AX) // SUBQ $16, AX - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 8), _VP) // MOVQ 8(ST)(AX), VP - self.Emit("MOVQ" , _AX, jit.Ptr(_ST, 0)) // MOVQ AX, (ST) - self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 - self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 8)) // MOVOU X0, 8(ST)(AX) -} - -func (self *_Assembler) _asm_OP_recurse(p *_Instr) { - self.Emit("MOVQ", jit.Type(p.vt()), _AX) // MOVQ ${p.vt()}, AX - self.decode_dynamic(_AX, _VP) // DECODE AX, VP -} - -func (self *_Assembler) _asm_OP_goto(p *_Instr) { - self.Xjmp("JMP", p.vi()) -} - -func (self *_Assembler) _asm_OP_switch(p *_Instr) { - self.Emit("MOVQ", _VAR_sr, _AX) // MOVQ sr, AX - self.Emit("CMPQ", _AX, jit.Imm(p.i64())) // CMPQ AX, ${len(p.vs())} - self.Sjmp("JAE" , "_default_{n}") // JAE _default_{n} - - /* jump table selector */ - self.Byte(0x48, 0x8d, 0x3d) // LEAQ ?(PC), DI - self.Sref("_switch_table_{n}", 4) // .... &_switch_table_{n} - self.Emit("MOVLQSX", jit.Sib(_DI, _AX, 4, 0), _AX) // MOVLQSX (DI)(AX*4), AX - self.Emit("ADDQ" , _DI, _AX) // ADDQ DI, AX - self.Rjmp("JMP" , _AX) // JMP AX - self.Link("_switch_table_{n}") // _switch_table_{n}: - - /* generate the jump table */ - for i, v := range p.vs() { - self.Xref(v, int64(-i) * 4) - } - - /* default case */ - self.Link("_default_{n}") - self.NOP() -} - -func (self *_Assembler) print_gc(i int, p1 *_Instr, p2 *_Instr) { - self.Emit("MOVQ", jit.Imm(int64(p2.op())), jit.Ptr(_SP, 16))// MOVQ $(p2.op()), 16(SP) - self.Emit("MOVQ", jit.Imm(int64(p1.op())), jit.Ptr(_SP, 8)) // MOVQ $(p1.op()), 8(SP) - self.Emit("MOVQ", jit.Imm(int64(i)), jit.Ptr(_SP, 0)) // MOVQ $(i), (SP) - self.call_go(_F_println) -} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/consts/option.go b/vendor/github.com/bytedance/sonic/internal/decoder/consts/option.go new file mode 100644 index 00000000..4195ebda --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/consts/option.go @@ -0,0 +1,36 @@ + +package consts + +import ( + `github.com/bytedance/sonic/internal/native/types` +) + + +const ( + F_use_int64 = 0 + F_disable_urc = 2 + F_disable_unknown = 3 + F_copy_string = 4 + + + F_use_number = types.B_USE_NUMBER + F_validate_string = types.B_VALIDATE_STRING + F_allow_control = types.B_ALLOW_CONTROL + F_no_validate_json = types.B_NO_VALIDATE_JSON +) + +type Options uint64 + +const ( + OptionUseInt64 Options = 1 << F_use_int64 + OptionUseNumber Options = 1 << F_use_number + OptionUseUnicodeErrors Options = 1 << F_disable_urc + OptionDisableUnknown Options = 1 << F_disable_unknown + OptionCopyString Options = 1 << F_copy_string + OptionValidateString Options = 1 << F_validate_string + OptionNoValidateJSON Options = 1 << F_no_validate_json +) + +const ( + MaxStack = 4096 +) \ No newline at end of file diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/errors.go b/vendor/github.com/bytedance/sonic/internal/decoder/errors/errors.go similarity index 90% rename from vendor/github.com/bytedance/sonic/internal/decoder/errors.go rename to vendor/github.com/bytedance/sonic/internal/decoder/errors/errors.go index 4453f5cf..9f05e8b6 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/errors.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/errors/errors.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package decoder +package errors import ( `encoding/json` @@ -46,7 +46,7 @@ func (self SyntaxError) Description() string { func (self SyntaxError) description() string { /* check for empty source */ if self.Src == "" { - return fmt.Sprintf("no sources available: %#v", self) + return fmt.Sprintf("no sources available, the input json is empty: %#v", self) } p, x, q, y := calcBounds(len(self.Src), self.Pos) @@ -112,12 +112,12 @@ func clamp_zero(v int) int { /** JIT Error Helpers **/ -var stackOverflow = &json.UnsupportedValueError { +var StackOverflow = &json.UnsupportedValueError { Str : "Value nesting too deep", Value : reflect.ValueOf("..."), } -func error_wrap(src string, pos int, code types.ParsingError) error { +func ErrorWrap(src string, pos int, code types.ParsingError) error { return *error_wrap_heap(src, pos, code) } @@ -130,7 +130,7 @@ func error_wrap_heap(src string, pos int, code types.ParsingError) *SyntaxError } } -func error_type(vt *rt.GoType) error { +func ErrorType(vt *rt.GoType) error { return &json.UnmarshalTypeError{Type: vt.Pack()} } @@ -171,7 +171,7 @@ func (self MismatchTypeError) Description() string { return fmt.Sprintf("Mismatch type %s with value %s %s", self.Type.String(), swithchJSONType(self.Src, self.Pos), se.description()) } -func error_mismatch(src string, pos int, vt *rt.GoType) error { +func ErrorMismatch(src string, pos int, vt *rt.GoType) error { return &MismatchTypeError { Pos : pos, Src : src, @@ -179,11 +179,11 @@ func error_mismatch(src string, pos int, vt *rt.GoType) error { } } -func error_field(name string) error { +func ErrorField(name string) error { return errors.New("json: unknown field " + strconv.Quote(name)) } -func error_value(value string, vtype reflect.Type) error { +func ErrorValue(value string, vtype reflect.Type) error { return &json.UnmarshalTypeError { Type : vtype, Value : value, diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/generic_stkabi_amd64.go b/vendor/github.com/bytedance/sonic/internal/decoder/generic_stkabi_amd64.go deleted file mode 100644 index 8ce5c292..00000000 --- a/vendor/github.com/bytedance/sonic/internal/decoder/generic_stkabi_amd64.go +++ /dev/null @@ -1,733 +0,0 @@ -// +build go1.16,!go1.17 - -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package decoder - -import ( - `encoding/json` - `fmt` - `reflect` - - `github.com/bytedance/sonic/internal/jit` - `github.com/bytedance/sonic/internal/native` - `github.com/bytedance/sonic/internal/native/types` - `github.com/twitchyliquid64/golang-asm/obj` -) - -/** Crucial Registers: - * - * ST(BX) : ro, decoder stack - * DF(R10) : ro, decoder flags - * EP(R11) : wo, error pointer - * IP(R12) : ro, input pointer - * IL(R13) : ro, input length - * IC(R14) : rw, input cursor - * VP(R15) : ro, value pointer (to an interface{}) - */ - -const ( - _VD_args = 8 // 8 bytes for passing arguments to this functions - _VD_fargs = 64 // 64 bytes for passing arguments to other Go functions - _VD_saves = 40 // 40 bytes for saving the registers before CALL instructions - _VD_locals = 88 // 88 bytes for local variables -) - -const ( - _VD_offs = _VD_fargs + _VD_saves + _VD_locals - _VD_size = _VD_offs + 8 // 8 bytes for the parent frame pointer -) - -var ( - _VAR_ss = _VAR_ss_Vt - _VAR_df = jit.Ptr(_SP, _VD_fargs + _VD_saves) -) - -var ( - _VAR_ss_Vt = jit.Ptr(_SP, _VD_fargs + _VD_saves + 8) - _VAR_ss_Dv = jit.Ptr(_SP, _VD_fargs + _VD_saves + 16) - _VAR_ss_Iv = jit.Ptr(_SP, _VD_fargs + _VD_saves + 24) - _VAR_ss_Ep = jit.Ptr(_SP, _VD_fargs + _VD_saves + 32) - _VAR_ss_Db = jit.Ptr(_SP, _VD_fargs + _VD_saves + 40) - _VAR_ss_Dc = jit.Ptr(_SP, _VD_fargs + _VD_saves + 48) -) - -var ( - _VAR_cs_LR = jit.Ptr(_SP, _VD_fargs + _VD_saves + 56) - _VAR_cs_p = jit.Ptr(_SP, _VD_fargs + _VD_saves + 64) - _VAR_cs_n = jit.Ptr(_SP, _VD_fargs + _VD_saves + 72) - _VAR_cs_d = jit.Ptr(_SP, _VD_fargs + _VD_saves + 80) -) - -type _ValueDecoder struct { - jit.BaseAssembler -} - -func (self *_ValueDecoder) build() uintptr { - self.Init(self.compile) - return *(*uintptr)(self.Load("decode_value", _VD_size, _VD_args, argPtrs_generic, localPtrs_generic)) -} - -/** Function Calling Helpers **/ - -func (self *_ValueDecoder) save(r ...obj.Addr) { - for i, v := range r { - if i > _VD_saves / 8 - 1 { - panic("too many registers to save") - } else { - self.Emit("MOVQ", v, jit.Ptr(_SP, _VD_fargs + int64(i) * 8)) - } - } -} - -func (self *_ValueDecoder) load(r ...obj.Addr) { - for i, v := range r { - if i > _VD_saves / 8 - 1 { - panic("too many registers to load") - } else { - self.Emit("MOVQ", jit.Ptr(_SP, _VD_fargs + int64(i) * 8), v) - } - } -} - -func (self *_ValueDecoder) call(fn obj.Addr) { - self.Emit("MOVQ", fn, _AX) // MOVQ ${fn}, AX - self.Rjmp("CALL", _AX) // CALL AX -} - -func (self *_ValueDecoder) call_go(fn obj.Addr) { - self.save(_REG_go...) // SAVE $REG_go - self.call(fn) // CALL ${fn} - self.load(_REG_go...) // LOAD $REG_go -} - -/** Decoder Assembler **/ - -const ( - _S_val = iota + 1 - _S_arr - _S_arr_0 - _S_obj - _S_obj_0 - _S_obj_delim - _S_obj_sep -) - -const ( - _S_omask_key = (1 << _S_obj_0) | (1 << _S_obj_sep) - _S_omask_end = (1 << _S_obj_0) | (1 << _S_obj) - _S_vmask = (1 << _S_val) | (1 << _S_arr_0) -) - -const ( - _A_init_len = 1 - _A_init_cap = 16 -) - -const ( - _ST_Sp = 0 - _ST_Vt = _PtrBytes - _ST_Vp = _PtrBytes * (types.MAX_RECURSE + 1) -) - -var ( - _V_true = jit.Imm(int64(pbool(true))) - _V_false = jit.Imm(int64(pbool(false))) - _F_value = jit.Imm(int64(native.S_value)) -) - -var ( - _V_max = jit.Imm(int64(types.V_MAX)) - _E_eof = jit.Imm(int64(types.ERR_EOF)) - _E_invalid = jit.Imm(int64(types.ERR_INVALID_CHAR)) - _E_recurse = jit.Imm(int64(types.ERR_RECURSE_EXCEED_MAX)) -) - -var ( - _F_convTslice = jit.Func(convTslice) - _F_convTstring = jit.Func(convTstring) - _F_invalid_vtype = jit.Func(invalid_vtype) -) - -var ( - _T_map = jit.Type(reflect.TypeOf((map[string]interface{})(nil))) - _T_bool = jit.Type(reflect.TypeOf(false)) - _T_int64 = jit.Type(reflect.TypeOf(int64(0))) - _T_eface = jit.Type(reflect.TypeOf((*interface{})(nil)).Elem()) - _T_slice = jit.Type(reflect.TypeOf(([]interface{})(nil))) - _T_string = jit.Type(reflect.TypeOf("")) - _T_number = jit.Type(reflect.TypeOf(json.Number(""))) - _T_float64 = jit.Type(reflect.TypeOf(float64(0))) -) - -var _R_tab = map[int]string { - '[': "_decode_V_ARRAY", - '{': "_decode_V_OBJECT", - ':': "_decode_V_KEY_SEP", - ',': "_decode_V_ELEM_SEP", - ']': "_decode_V_ARRAY_END", - '}': "_decode_V_OBJECT_END", -} - -func (self *_ValueDecoder) compile() { - self.Emit("SUBQ", jit.Imm(_VD_size), _SP) // SUBQ $_VD_size, SP - self.Emit("MOVQ", _BP, jit.Ptr(_SP, _VD_offs)) // MOVQ BP, _VD_offs(SP) - self.Emit("LEAQ", jit.Ptr(_SP, _VD_offs), _BP) // LEAQ _VD_offs(SP), BP - - /* initialize the state machine */ - self.Emit("XORL", _CX, _CX) // XORL CX, CX - self.Emit("MOVQ", _DF, _VAR_df) // MOVQ DF, df - /* initialize digital buffer first */ - self.Emit("MOVQ", jit.Imm(_MaxDigitNums), _VAR_ss_Dc) // MOVQ $_MaxDigitNums, ss.Dcap - self.Emit("LEAQ", jit.Ptr(_ST, _DbufOffset), _AX) // LEAQ _DbufOffset(ST), AX - self.Emit("MOVQ", _AX, _VAR_ss_Db) // MOVQ AX, ss.Dbuf - /* add ST offset */ - self.Emit("ADDQ", jit.Imm(_FsmOffset), _ST) // ADDQ _FsmOffset, _ST - self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp - self.WriteRecNotAX(0, _VP, jit.Ptr(_ST, _ST_Vp), false) // MOVQ VP, ST.Vp[0] - self.Emit("MOVQ", jit.Imm(_S_val), jit.Ptr(_ST, _ST_Vt)) // MOVQ _S_val, ST.Vt[0] - self.Sjmp("JMP" , "_next") // JMP _next - - /* set the value from previous round */ - self.Link("_set_value") // _set_value: - self.Emit("MOVL" , jit.Imm(_S_vmask), _DX) // MOVL _S_vmask, DX - self.Emit("MOVQ" , jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ" , jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX - self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX - self.Sjmp("JNC" , "_vtype_error") // JNC _vtype_error - self.Emit("XORL" , _SI, _SI) // XORL SI, SI - self.Emit("SUBQ" , jit.Imm(1), jit.Ptr(_ST, _ST_Sp)) // SUBQ $1, ST.Sp - self.Emit("XCHGQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // XCHGQ ST.Vp[CX], SI - self.Emit("MOVQ" , _R8, jit.Ptr(_SI, 0)) // MOVQ R8, (SI) - self.WriteRecNotAX(1, _R9, jit.Ptr(_SI, 8), false) // MOVQ R9, 8(SI) - - /* check for value stack */ - self.Link("_next") // _next: - self.Emit("MOVQ" , jit.Ptr(_ST, _ST_Sp), _AX) // MOVQ ST.Sp, AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , "_return") // JS _return - - /* fast path: test up to 4 characters manually */ - self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL - self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF - self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX - self.Emit("MOVQ" , jit.Imm(_BM_space), _DX) // MOVQ _BM_space, DX - self.Emit("CMPQ" , _AX, jit.Imm(' ')) // CMPQ AX, $' ' - self.Sjmp("JA" , "_decode_fast") // JA _decode_fast - self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX - self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast - self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC - - /* at least 1 to 3 spaces */ - for i := 0; i < 3; i++ { - self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL - self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF - self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX - self.Emit("CMPQ" , _AX, jit.Imm(' ')) // CMPQ AX, $' ' - self.Sjmp("JA" , "_decode_fast") // JA _decode_fast - self.Emit("BTQ" , _AX, _DX) // BTQ _AX, _DX - self.Sjmp("JNC" , "_decode_fast") // JNC _decode_fast - self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC - } - - /* at least 4 spaces */ - self.Emit("CMPQ" , _IC, _IL) // CMPQ IC, IL - self.Sjmp("JAE" , "_decode_V_EOF") // JAE _decode_V_EOF - self.Emit("MOVBQZX", jit.Sib(_IP, _IC, 1, 0), _AX) // MOVBQZX (IP)(IC), AX - - /* fast path: use lookup table to select decoder */ - self.Link("_decode_fast") // _decode_fast: - self.Byte(0x48, 0x8d, 0x3d) // LEAQ ?(PC), DI - self.Sref("_decode_tab", 4) // .... &_decode_tab - self.Emit("MOVLQSX", jit.Sib(_DI, _AX, 4, 0), _AX) // MOVLQSX (DI)(AX*4), AX - self.Emit("TESTQ" , _AX, _AX) // TESTQ AX, AX - self.Sjmp("JZ" , "_decode_native") // JZ _decode_native - self.Emit("ADDQ" , jit.Imm(1), _IC) // ADDQ $1, IC - self.Emit("ADDQ" , _DI, _AX) // ADDQ DI, AX - self.Rjmp("JMP" , _AX) // JMP AX - - /* decode with native decoder */ - self.Link("_decode_native") // _decode_native: - self.Emit("MOVQ", _IP, _DI) // MOVQ IP, DI - self.Emit("MOVQ", _IL, _SI) // MOVQ IL, SI - self.Emit("MOVQ", _IC, _DX) // MOVQ IC, DX - self.Emit("LEAQ", _VAR_ss, _CX) // LEAQ ss, CX - self.Emit("MOVQ", _VAR_df, _R8) // MOVQ $df, R8 - self.Emit("BTSQ", jit.Imm(_F_allow_control), _R8) // ANDQ $1<<_F_allow_control, R8 - self.call(_F_value) // CALL value - self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC - - /* check for errors */ - self.Emit("MOVQ" , _VAR_ss_Vt, _AX) // MOVQ ss.Vt, AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , "_parsing_error") - self.Sjmp("JZ" , "_invalid_vtype") // JZ _invalid_vtype - self.Emit("CMPQ" , _AX, _V_max) // CMPQ AX, _V_max - self.Sjmp("JA" , "_invalid_vtype") // JA _invalid_vtype - - /* jump table selector */ - self.Byte(0x48, 0x8d, 0x3d) // LEAQ ?(PC), DI - self.Sref("_switch_table", 4) // .... &_switch_table - self.Emit("MOVLQSX", jit.Sib(_DI, _AX, 4, -4), _AX) // MOVLQSX -4(DI)(AX*4), AX - self.Emit("ADDQ" , _DI, _AX) // ADDQ DI, AX - self.Rjmp("JMP" , _AX) // JMP AX - - /** V_EOF **/ - self.Link("_decode_V_EOF") // _decode_V_EOF: - self.Emit("MOVL", _E_eof, _EP) // MOVL _E_eof, EP - self.Sjmp("JMP" , "_error") // JMP _error - - /** V_NULL **/ - self.Link("_decode_V_NULL") // _decode_V_NULL: - self.Emit("XORL", _R8, _R8) // XORL R8, R8 - self.Emit("XORL", _R9, _R9) // XORL R9, R9 - self.Emit("LEAQ", jit.Ptr(_IC, -4), _DI) // LEAQ -4(IC), DI - self.Sjmp("JMP" , "_set_value") // JMP _set_value - - /** V_TRUE **/ - self.Link("_decode_V_TRUE") // _decode_V_TRUE: - self.Emit("MOVQ", _T_bool, _R8) // MOVQ _T_bool, R8 - // TODO: maybe modified by users? - self.Emit("MOVQ", _V_true, _R9) // MOVQ _V_true, R9 - self.Emit("LEAQ", jit.Ptr(_IC, -4), _DI) // LEAQ -4(IC), DI - self.Sjmp("JMP" , "_set_value") // JMP _set_value - - /** V_FALSE **/ - self.Link("_decode_V_FALSE") // _decode_V_FALSE: - self.Emit("MOVQ", _T_bool, _R8) // MOVQ _T_bool, R8 - self.Emit("MOVQ", _V_false, _R9) // MOVQ _V_false, R9 - self.Emit("LEAQ", jit.Ptr(_IC, -5), _DI) // LEAQ -5(IC), DI - self.Sjmp("JMP" , "_set_value") // JMP _set_value - - /** V_ARRAY **/ - self.Link("_decode_V_ARRAY") // _decode_V_ARRAY - self.Emit("MOVL", jit.Imm(_S_vmask), _DX) // MOVL _S_vmask, DX - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX - self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX - self.Sjmp("JNC" , "_invalid_char") // JNC _invalid_char - - /* create a new array */ - self.Emit("MOVQ", _T_eface, _AX) // MOVQ _T_eface, AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ", jit.Imm(_A_init_len), jit.Ptr(_SP, 8)) // MOVQ _A_init_len, 8(SP) - self.Emit("MOVQ", jit.Imm(_A_init_cap), jit.Ptr(_SP, 16)) // MOVQ _A_init_cap, 16(SP) - self.call_go(_F_makeslice) // CALL_GO runtime.makeslice - self.Emit("MOVQ", jit.Ptr(_SP, 24), _DX) // MOVQ 24(SP), DX - - /* pack into an interface */ - self.Emit("MOVQ", _DX, jit.Ptr(_SP, 0)) // MOVQ DX, (SP) - self.Emit("MOVQ", jit.Imm(_A_init_len), jit.Ptr(_SP, 8)) // MOVQ _A_init_len, 8(SP) - self.Emit("MOVQ", jit.Imm(_A_init_cap), jit.Ptr(_SP, 16)) // MOVQ _A_init_cap, 16(SP) - self.call_go(_F_convTslice) // CALL_GO runtime.convTslice - self.Emit("MOVQ", jit.Ptr(_SP, 24), _R8) // MOVQ 24(SP), R8 - - /* replace current state with an array */ - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI - self.Emit("MOVQ", jit.Imm(_S_arr), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_arr, ST.Vt[CX] - self.Emit("MOVQ", _T_slice, _AX) // MOVQ _T_slice, AX - self.Emit("MOVQ", _AX, jit.Ptr(_SI, 0)) // MOVQ AX, (SI) - self.WriteRecNotAX(2, _R8, jit.Ptr(_SI, 8), false) // MOVQ R8, 8(SI) - - /* add a new slot for the first element */ - self.Emit("ADDQ", jit.Imm(1), _CX) // ADDQ $1, CX - self.Emit("CMPQ", _CX, jit.Imm(types.MAX_RECURSE)) // CMPQ CX, ${types.MAX_RECURSE} - self.Sjmp("JAE" , "_stack_overflow") // JA _stack_overflow - self.Emit("MOVQ", jit.Ptr(_R8, 0), _AX) // MOVQ (R8), AX - self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp - self.WritePtrAX(3, jit.Sib(_ST, _CX, 8, _ST_Vp), false) // MOVQ AX, ST.Vp[CX] - self.Emit("MOVQ", jit.Imm(_S_arr_0), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_arr_0, ST.Vt[CX] - self.Sjmp("JMP" , "_next") // JMP _next - - /** V_OBJECT **/ - self.Link("_decode_V_OBJECT") // _decode_V_OBJECT: - self.Emit("MOVL", jit.Imm(_S_vmask), _DX) // MOVL _S_vmask, DX - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX - self.Emit("BTQ" , _AX, _DX) // BTQ AX, DX - self.Sjmp("JNC" , "_invalid_char") // JNC _invalid_char - self.call_go(_F_makemap_small) // CALL_GO runtime.makemap_small - self.Emit("MOVQ", jit.Ptr(_SP, 0), _AX) // MOVQ (SP), AX - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Imm(_S_obj_0), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_obj, ST.Vt[CX] - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI - self.Emit("MOVQ", _T_map, _DX) // MOVQ _T_map, DX - self.Emit("MOVQ", _DX, jit.Ptr(_SI, 0)) // MOVQ DX, (SI) - self.WritePtrAX(4, jit.Ptr(_SI, 8), false) // MOVQ AX, 8(SI) - self.Sjmp("JMP" , "_next") // JMP _next - - /** V_STRING **/ - self.Link("_decode_V_STRING") // _decode_V_STRING: - self.Emit("MOVQ", _VAR_ss_Iv, _CX) // MOVQ ss.Iv, CX - self.Emit("MOVQ", _IC, _AX) // MOVQ IC, AX - self.Emit("SUBQ", _CX, _AX) // SUBQ CX, AX - - /* check for escapes */ - self.Emit("CMPQ", _VAR_ss_Ep, jit.Imm(-1)) // CMPQ ss.Ep, $-1 - self.Sjmp("JNE" , "_unquote") // JNE _unquote - self.Emit("SUBQ", jit.Imm(1), _AX) // SUBQ $1, AX - self.Emit("LEAQ", jit.Sib(_IP, _CX, 1, 0), _R8) // LEAQ (IP)(CX), R8 - self.Byte(0x48, 0x8d, 0x3d) // LEAQ (PC), DI - self.Sref("_copy_string_end", 4) - self.Emit("BTQ", jit.Imm(_F_copy_string), _VAR_df) - self.Sjmp("JC", "copy_string") - self.Link("_copy_string_end") - self.Emit("XORL", _DX, _DX) // XORL DX, DX - /* strings with no escape sequences */ - self.Link("_noescape") // _noescape: - self.Emit("MOVL", jit.Imm(_S_omask_key), _DI) // MOVL _S_omask, DI - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _SI) // MOVQ ST.Vt[CX], SI - self.Emit("BTQ" , _SI, _DI) // BTQ SI, DI - self.Sjmp("JC" , "_object_key") // JC _object_key - - /* check for pre-packed strings, avoid 1 allocation */ - self.Emit("TESTQ", _DX, _DX) // TESTQ DX, DX - self.Sjmp("JNZ" , "_packed_str") // JNZ _packed_str - self.Emit("MOVQ" , _R8, jit.Ptr(_SP, 0)) // MOVQ R8, (SP) - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - self.call_go(_F_convTstring) // CALL_GO runtime.convTstring - self.Emit("MOVQ" , jit.Ptr(_SP, 16), _R9) // MOVQ 16(SP), R9 - - /* packed string already in R9 */ - self.Link("_packed_str") // _packed_str: - self.Emit("MOVQ", _T_string, _R8) // MOVQ _T_string, R8 - self.Emit("MOVQ", _VAR_ss_Iv, _DI) // MOVQ ss.Iv, DI - self.Emit("SUBQ", jit.Imm(1), _DI) // SUBQ $1, DI - self.Sjmp("JMP" , "_set_value") // JMP _set_value - - /* the string is an object key, get the map */ - self.Link("_object_key") - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI - self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI - - /* add a new delimiter */ - self.Emit("ADDQ", jit.Imm(1), _CX) // ADDQ $1, CX - self.Emit("CMPQ", _CX, jit.Imm(types.MAX_RECURSE)) // CMPQ CX, ${types.MAX_RECURSE} - self.Sjmp("JAE" , "_stack_overflow") // JA _stack_overflow - self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp - self.Emit("MOVQ", jit.Imm(_S_obj_delim), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_obj_delim, ST.Vt[CX] - - /* add a new slot int the map */ - self.Emit("MOVQ", _T_map, _DX) // MOVQ _T_map, DX - self.Emit("MOVQ", _DX, jit.Ptr(_SP, 0)) // MOVQ DX, (SP) - self.Emit("MOVQ", _SI, jit.Ptr(_SP, 8)) // MOVQ SI, 8(SP) - self.Emit("MOVQ", _R8, jit.Ptr(_SP, 16)) // MOVQ R9, 16(SP) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 24)) // MOVQ AX, 24(SP) - self.call_go(_F_mapassign_faststr) // CALL_GO runtime.mapassign_faststr - self.Emit("MOVQ", jit.Ptr(_SP, 32), _AX) // MOVQ 32(SP), AX - - /* add to the pointer stack */ - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.WritePtrAX(6, jit.Sib(_ST, _CX, 8, _ST_Vp), false) // MOVQ AX, ST.Vp[CX] - self.Sjmp("JMP" , "_next") // JMP _next - - /* allocate memory to store the string header and unquoted result */ - self.Link("_unquote") // _unquote: - self.Emit("ADDQ", jit.Imm(15), _AX) // ADDQ $15, AX - self.Emit("MOVQ", _T_byte, _CX) // MOVQ _T_byte, CX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.Emit("MOVB", jit.Imm(0), jit.Ptr(_SP, 16)) // MOVB $0, 16(SP) - self.call_go(_F_mallocgc) // CALL_GO runtime.mallocgc - self.Emit("MOVQ", jit.Ptr(_SP, 24), _R9) // MOVQ 24(SP), R9 - - /* prepare the unquoting parameters */ - self.Emit("MOVQ" , _VAR_ss_Iv, _CX) // MOVQ ss.Iv, CX - self.Emit("LEAQ" , jit.Sib(_IP, _CX, 1, 0), _DI) // LEAQ (IP)(CX), DI - self.Emit("NEGQ" , _CX) // NEGQ CX - self.Emit("LEAQ" , jit.Sib(_IC, _CX, 1, -1), _SI) // LEAQ -1(IC)(CX), SI - self.Emit("LEAQ" , jit.Ptr(_R9, 16), _DX) // LEAQ 16(R8), DX - self.Emit("LEAQ" , _VAR_ss_Ep, _CX) // LEAQ ss.Ep, CX - self.Emit("XORL" , _R8, _R8) // XORL R8, R8 - self.Emit("BTQ" , jit.Imm(_F_disable_urc), _VAR_df) // BTQ ${_F_disable_urc}, fv - self.Emit("SETCC", _R8) // SETCC R8 - self.Emit("SHLQ" , jit.Imm(types.B_UNICODE_REPLACE), _R8) // SHLQ ${types.B_UNICODE_REPLACE}, R8 - - /* unquote the string, with R9 been preserved */ - self.save(_R9) // SAVE R9 - self.call(_F_unquote) // CALL unquote - self.load(_R9) // LOAD R9 - - /* check for errors */ - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , "_unquote_error") // JS _unquote_error - self.Emit("MOVL" , jit.Imm(1), _DX) // MOVL $1, DX - self.Emit("LEAQ" , jit.Ptr(_R9, 16), _R8) // ADDQ $16, R8 - self.Emit("MOVQ" , _R8, jit.Ptr(_R9, 0)) // MOVQ R8, (R9) - self.Emit("MOVQ" , _AX, jit.Ptr(_R9, 8)) // MOVQ AX, 8(R9) - self.Sjmp("JMP" , "_noescape") // JMP _noescape - - /** V_DOUBLE **/ - self.Link("_decode_V_DOUBLE") // _decode_V_DOUBLE: - self.Emit("BTQ" , jit.Imm(_F_use_number), _VAR_df) // BTQ _F_use_number, df - self.Sjmp("JC" , "_use_number") // JC _use_number - self.Emit("MOVSD", _VAR_ss_Dv, _X0) // MOVSD ss.Dv, X0 - self.Sjmp("JMP" , "_use_float64") // JMP _use_float64 - - /** V_INTEGER **/ - self.Link("_decode_V_INTEGER") // _decode_V_INTEGER: - self.Emit("BTQ" , jit.Imm(_F_use_number), _VAR_df) // BTQ _F_use_number, df - self.Sjmp("JC" , "_use_number") // JC _use_number - self.Emit("BTQ" , jit.Imm(_F_use_int64), _VAR_df) // BTQ _F_use_int64, df - self.Sjmp("JC" , "_use_int64") // JC _use_int64 - self.Emit("MOVQ" , _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX - self.Emit("CVTSQ2SD", _AX, _X0) // CVTSQ2SD AX, X0 - - /* represent numbers as `float64` */ - self.Link("_use_float64") // _use_float64: - self.Emit("MOVSD", _X0, jit.Ptr(_SP, 0)) // MOVSD X0, (SP) - self.call_go(_F_convT64) // CALL_GO runtime.convT64 - self.Emit("MOVQ" , _T_float64, _R8) // MOVQ _T_float64, R8 - self.Emit("MOVQ" , jit.Ptr(_SP, 8), _R9) // MOVQ 8(SP), R9 - self.Emit("MOVQ" , _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI - self.Sjmp("JMP" , "_set_value") // JMP _set_value - - /* represent numbers as `json.Number` */ - self.Link("_use_number") // _use_number - self.Emit("MOVQ", _VAR_ss_Ep, _AX) // MOVQ ss.Ep, AX - self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _SI) // LEAQ (IP)(AX), SI - self.Emit("MOVQ", _IC, _CX) // MOVQ IC, CX - self.Emit("SUBQ", _AX, _CX) // SUBQ AX, CX - self.Emit("MOVQ", _SI, jit.Ptr(_SP, 0)) // MOVQ SI, (SP) - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.call_go(_F_convTstring) // CALL_GO runtime.convTstring - self.Emit("MOVQ", _T_number, _R8) // MOVQ _T_number, R8 - self.Emit("MOVQ", jit.Ptr(_SP, 16), _R9) // MOVQ 16(SP), R9 - self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI - self.Sjmp("JMP" , "_set_value") // JMP _set_value - - /* represent numbers as `int64` */ - self.Link("_use_int64") // _use_int64: - self.Emit("MOVQ", _VAR_ss_Iv, _AX) // MOVQ ss.Iv, AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.call_go(_F_convT64) // CALL_GO runtime.convT64 - self.Emit("MOVQ", _T_int64, _R8) // MOVQ _T_int64, R8 - self.Emit("MOVQ", jit.Ptr(_SP, 8), _R9) // MOVQ 8(SP), R9 - self.Emit("MOVQ", _VAR_ss_Ep, _DI) // MOVQ ss.Ep, DI - self.Sjmp("JMP" , "_set_value") // JMP _set_value - - /** V_KEY_SEP **/ - self.Link("_decode_V_KEY_SEP") // _decode_V_KEY_SEP: - // self.Byte(0xcc) - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX - self.Emit("CMPQ", _AX, jit.Imm(_S_obj_delim)) // CMPQ AX, _S_obj_delim - self.Sjmp("JNE" , "_invalid_char") // JNE _invalid_char - self.Emit("MOVQ", jit.Imm(_S_val), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_val, ST.Vt[CX] - self.Emit("MOVQ", jit.Imm(_S_obj), jit.Sib(_ST, _CX, 8, _ST_Vt - 8)) // MOVQ _S_obj, ST.Vt[CX - 1] - self.Sjmp("JMP" , "_next") // JMP _next - - /** V_ELEM_SEP **/ - self.Link("_decode_V_ELEM_SEP") // _decode_V_ELEM_SEP: - self.Emit("MOVQ" , jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ" , jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX - self.Emit("CMPQ" , _AX, jit.Imm(_S_arr)) // CMPQ _AX, _S_arr - self.Sjmp("JE" , "_array_sep") // JZ _next - self.Emit("CMPQ" , _AX, jit.Imm(_S_obj)) // CMPQ _AX, _S_arr - self.Sjmp("JNE" , "_invalid_char") // JNE _invalid_char - self.Emit("MOVQ" , jit.Imm(_S_obj_sep), jit.Sib(_ST, _CX, 8, _ST_Vt)) - self.Sjmp("JMP" , "_next") // JMP _next - - /* arrays */ - self.Link("_array_sep") - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI - self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI - self.Emit("MOVQ", jit.Ptr(_SI, 8), _DX) // MOVQ 8(SI), DX - self.Emit("CMPQ", _DX, jit.Ptr(_SI, 16)) // CMPQ DX, 16(SI) - self.Sjmp("JAE" , "_array_more") // JAE _array_more - - /* add a slot for the new element */ - self.Link("_array_append") // _array_append: - self.Emit("ADDQ", jit.Imm(1), jit.Ptr(_SI, 8)) // ADDQ $1, 8(SI) - self.Emit("MOVQ", jit.Ptr(_SI, 0), _SI) // MOVQ (SI), SI - self.Emit("ADDQ", jit.Imm(1), _CX) // ADDQ $1, CX - self.Emit("CMPQ", _CX, jit.Imm(types.MAX_RECURSE)) // CMPQ CX, ${types.MAX_RECURSE} - self.Sjmp("JAE" , "_stack_overflow") - self.Emit("SHLQ", jit.Imm(1), _DX) // SHLQ $1, DX - self.Emit("LEAQ", jit.Sib(_SI, _DX, 8, 0), _SI) // LEAQ (SI)(DX*8), SI - self.Emit("MOVQ", _CX, jit.Ptr(_ST, _ST_Sp)) // MOVQ CX, ST.Sp - self.WriteRecNotAX(7 , _SI, jit.Sib(_ST, _CX, 8, _ST_Vp), false) // MOVQ SI, ST.Vp[CX] - self.Emit("MOVQ", jit.Imm(_S_val), jit.Sib(_ST, _CX, 8, _ST_Vt)) // MOVQ _S_val, ST.Vt[CX} - self.Sjmp("JMP" , "_next") // JMP _next - - /** V_ARRAY_END **/ - self.Link("_decode_V_ARRAY_END") // _decode_V_ARRAY_END: - self.Emit("XORL", _DX, _DX) // XORL DX, DX - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX - self.Emit("CMPQ", _AX, jit.Imm(_S_arr_0)) // CMPQ AX, _S_arr_0 - self.Sjmp("JE" , "_first_item") // JE _first_item - self.Emit("CMPQ", _AX, jit.Imm(_S_arr)) // CMPQ AX, _S_arr - self.Sjmp("JNE" , "_invalid_char") // JNE _invalid_char - self.Emit("SUBQ", jit.Imm(1), jit.Ptr(_ST, _ST_Sp)) // SUBQ $1, ST.Sp - self.Emit("MOVQ", _DX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ DX, ST.Vp[CX] - self.Sjmp("JMP" , "_next") // JMP _next - - /* first element of an array */ - self.Link("_first_item") // _first_item: - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("SUBQ", jit.Imm(2), jit.Ptr(_ST, _ST_Sp)) // SUBQ $2, ST.Sp - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp - 8), _SI) // MOVQ ST.Vp[CX - 1], SI - self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI - self.Emit("MOVQ", _DX, jit.Sib(_ST, _CX, 8, _ST_Vp - 8)) // MOVQ DX, ST.Vp[CX - 1] - self.Emit("MOVQ", _DX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ DX, ST.Vp[CX] - self.Emit("MOVQ", _DX, jit.Ptr(_SI, 8)) // MOVQ DX, 8(SI) - self.Sjmp("JMP" , "_next") // JMP _next - - /** V_OBJECT_END **/ - self.Link("_decode_V_OBJECT_END") // _decode_V_OBJECT_END: - self.Emit("MOVL", jit.Imm(_S_omask_end), _DX) // MOVL _S_omask, DI - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vt), _AX) // MOVQ ST.Vt[CX], AX - self.Emit("BTQ" , _AX, _DX) - self.Sjmp("JNC" , "_invalid_char") // JNE _invalid_char - self.Emit("XORL", _AX, _AX) // XORL AX, AX - self.Emit("SUBQ", jit.Imm(1), jit.Ptr(_ST, _ST_Sp)) // SUBQ $1, ST.Sp - self.Emit("MOVQ", _AX, jit.Sib(_ST, _CX, 8, _ST_Vp)) // MOVQ AX, ST.Vp[CX] - self.Sjmp("JMP" , "_next") // JMP _next - - /* return from decoder */ - self.Link("_return") // _return: - self.Emit("XORL", _EP, _EP) // XORL EP, EP - self.Emit("MOVQ", _EP, jit.Ptr(_ST, _ST_Vp)) // MOVQ EP, ST.Vp[0] - self.Link("_epilogue") // _epilogue: - self.Emit("SUBQ", jit.Imm(_FsmOffset), _ST) // SUBQ _FsmOffset, _ST - self.Emit("MOVQ", jit.Ptr(_SP, _VD_offs), _BP) // MOVQ _VD_offs(SP), BP - self.Emit("ADDQ", jit.Imm(_VD_size), _SP) // ADDQ $_VD_size, SP - self.Emit("RET") // RET - - /* array expand */ - self.Link("_array_more") // _array_more: - self.Emit("MOVQ" , _T_eface, _AX) // MOVQ _T_eface, AX - self.Emit("MOVOU", jit.Ptr(_SI, 0), _X0) // MOVOU (SI), X0 - self.Emit("MOVQ" , jit.Ptr(_SI, 16), _DX) // MOVQ 16(SI), DX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP) - self.Emit("MOVQ" , _DX, jit.Ptr(_SP, 24)) // MOVQ DX, 24(SP) - self.Emit("SHLQ" , jit.Imm(1), _DX) // SHLQ $1, DX - self.Emit("MOVQ" , _DX, jit.Ptr(_SP, 32)) // MOVQ DX, 32(SP) - self.call_go(_F_growslice) // CALL_GO runtime.growslice - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _DI) // MOVOU 40(SP), DI - self.Emit("MOVQ" , jit.Ptr(_SP, 48), _DX) // MOVOU 48(SP), DX - self.Emit("MOVQ" , jit.Ptr(_SP, 56), _AX) // MOVQ 56(SP), AX - - /* update the slice */ - self.Emit("MOVQ", jit.Ptr(_ST, _ST_Sp), _CX) // MOVQ ST.Sp, CX - self.Emit("MOVQ", jit.Sib(_ST, _CX, 8, _ST_Vp), _SI) // MOVQ ST.Vp[CX], SI - self.Emit("MOVQ", jit.Ptr(_SI, 8), _SI) // MOVQ 8(SI), SI - self.Emit("MOVQ", _DX, jit.Ptr(_SI, 8)) // MOVQ DX, 8(SI) - self.Emit("MOVQ", _AX, jit.Ptr(_SI, 16)) // MOVQ AX, 16(AX) - self.WriteRecNotAX(8 , _DI, jit.Ptr(_SI, 0), false) // MOVQ R10, (SI) - self.Sjmp("JMP" , "_array_append") // JMP _array_append - - /* copy string */ - self.Link("copy_string") // pointer: R8, length: AX, return addr: DI - // self.Byte(0xcc) - self.Emit("MOVQ", _R8, _VAR_cs_p) - self.Emit("MOVQ", _AX, _VAR_cs_n) - self.Emit("MOVQ", _DI, _VAR_cs_LR) - self.Emit("MOVQ", _T_byte, _R8) - self.Emit("MOVQ", _R8, jit.Ptr(_SP, 0)) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 8)) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 16)) - self.call_go(_F_makeslice) - self.Emit("MOVQ", jit.Ptr(_SP, 24), _R8) - self.Emit("MOVQ", _R8, _VAR_cs_d) - self.Emit("MOVQ", _R8, jit.Ptr(_SP, 0)) - self.Emit("MOVQ", _VAR_cs_p, _R8) - self.Emit("MOVQ", _R8, jit.Ptr(_SP, 8)) - self.Emit("MOVQ", _VAR_cs_n, _AX) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 16)) - self.call_go(_F_memmove) - self.Emit("MOVQ", _VAR_cs_d, _R8) - self.Emit("MOVQ", _VAR_cs_n, _AX) - self.Emit("MOVQ", _VAR_cs_LR, _DI) - // self.Byte(0xcc) - self.Rjmp("JMP", _DI) - - /* error handlers */ - self.Link("_stack_overflow") - self.Emit("MOVL" , _E_recurse, _EP) // MOVQ _E_recurse, EP - self.Sjmp("JMP" , "_error") // JMP _error - self.Link("_vtype_error") // _vtype_error: - self.Emit("MOVQ" , _DI, _IC) // MOVQ DI, IC - self.Emit("MOVL" , _E_invalid, _EP) // MOVL _E_invalid, EP - self.Sjmp("JMP" , "_error") // JMP _error - self.Link("_invalid_char") // _invalid_char: - self.Emit("SUBQ" , jit.Imm(1), _IC) // SUBQ $1, IC - self.Emit("MOVL" , _E_invalid, _EP) // MOVL _E_invalid, EP - self.Sjmp("JMP" , "_error") // JMP _error - self.Link("_unquote_error") // _unquote_error: - self.Emit("MOVQ" , _VAR_ss_Iv, _IC) // MOVQ ss.Iv, IC - self.Emit("SUBQ" , jit.Imm(1), _IC) // SUBQ $1, IC - self.Link("_parsing_error") // _parsing_error: - self.Emit("NEGQ" , _AX) // NEGQ AX - self.Emit("MOVQ" , _AX, _EP) // MOVQ AX, EP - self.Link("_error") // _error: - self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 - self.Emit("MOVOU", _X0, jit.Ptr(_VP, 0)) // MOVOU X0, (VP) - self.Sjmp("JMP" , "_epilogue") // JMP _epilogue - - /* invalid value type, never returns */ - self.Link("_invalid_vtype") - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.call(_F_invalid_vtype) // CALL invalid_type - self.Emit("UD2") // UD2 - - /* switch jump table */ - self.Link("_switch_table") // _switch_table: - self.Sref("_decode_V_EOF", 0) // SREF &_decode_V_EOF, $0 - self.Sref("_decode_V_NULL", -4) // SREF &_decode_V_NULL, $-4 - self.Sref("_decode_V_TRUE", -8) // SREF &_decode_V_TRUE, $-8 - self.Sref("_decode_V_FALSE", -12) // SREF &_decode_V_FALSE, $-12 - self.Sref("_decode_V_ARRAY", -16) // SREF &_decode_V_ARRAY, $-16 - self.Sref("_decode_V_OBJECT", -20) // SREF &_decode_V_OBJECT, $-20 - self.Sref("_decode_V_STRING", -24) // SREF &_decode_V_STRING, $-24 - self.Sref("_decode_V_DOUBLE", -28) // SREF &_decode_V_DOUBLE, $-28 - self.Sref("_decode_V_INTEGER", -32) // SREF &_decode_V_INTEGER, $-32 - self.Sref("_decode_V_KEY_SEP", -36) // SREF &_decode_V_KEY_SEP, $-36 - self.Sref("_decode_V_ELEM_SEP", -40) // SREF &_decode_V_ELEM_SEP, $-40 - self.Sref("_decode_V_ARRAY_END", -44) // SREF &_decode_V_ARRAY_END, $-44 - self.Sref("_decode_V_OBJECT_END", -48) // SREF &_decode_V_OBJECT_END, $-48 - - /* fast character lookup table */ - self.Link("_decode_tab") // _decode_tab: - self.Sref("_decode_V_EOF", 0) // SREF &_decode_V_EOF, $0 - - /* generate rest of the tabs */ - for i := 1; i < 256; i++ { - if to, ok := _R_tab[i]; ok { - self.Sref(to, -int64(i) * 4) - } else { - self.Byte(0x00, 0x00, 0x00, 0x00) - } - } -} - -/** Generic Decoder **/ - -var ( - _subr_decode_value = new(_ValueDecoder).build() -) - -//go:nosplit -func invalid_vtype(vt types.ValueType) { - throw(fmt.Sprintf("invalid value type: %d", vt)) -} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/asm.s b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/asm.s similarity index 100% rename from vendor/github.com/bytedance/sonic/internal/decoder/asm.s rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/asm.s diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go117.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/asm_stubs_amd64_go117.go similarity index 99% rename from vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go117.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/asm_stubs_amd64_go117.go index b0125a79..48f73e5b 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go117.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/asm_stubs_amd64_go117.go @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package decoder +package jitdec import ( `strconv` diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go121.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/asm_stubs_amd64_go121.go similarity index 98% rename from vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go121.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/asm_stubs_amd64_go121.go index 018892f5..cbec3d24 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/asm_stubs_amd64_go121.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/asm_stubs_amd64_go121.go @@ -1,4 +1,4 @@ -// +build go1.21,!go1.22 +// +build go1.21,!go1.24 // Copyright 2023 CloudWeGo Authors // @@ -14,7 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package decoder +package jitdec import ( `strconv` @@ -81,7 +81,6 @@ func (self *_Assembler) WriteRecNotAX(i int, ptr obj.Addr, rec obj.Addr, saveDI self.Emit("MOVQ", ptr, jit.Ptr(_R11, 0)) self.Emit("MOVQ", rec, _AX) self.Emit("MOVQ", _AX, jit.Ptr(_R11, 8)) - self.load(_R11) if saveAX { self.load(_AX, _R11) } else { diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/assembler_regabi_amd64.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/assembler_regabi_amd64.go similarity index 96% rename from vendor/github.com/bytedance/sonic/internal/decoder/assembler_regabi_amd64.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/assembler_regabi_amd64.go index 3d223e14..4ff3b196 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/assembler_regabi_amd64.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/assembler_regabi_amd64.go @@ -1,5 +1,5 @@ -//go:build go1.17 && !go1.22 -// +build go1.17,!go1.22 +//go:build go1.17 && !go1.24 +// +build go1.17,!go1.24 /* * Copyright 2021 ByteDance Inc. @@ -17,21 +17,22 @@ * limitations under the License. */ -package decoder +package jitdec import ( - `encoding/json` - `fmt` - `math` - `reflect` - `unsafe` - - `github.com/bytedance/sonic/internal/caching` - `github.com/bytedance/sonic/internal/jit` - `github.com/bytedance/sonic/internal/native` - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` - `github.com/twitchyliquid64/golang-asm/obj` + "encoding/json" + "fmt" + "math" + "reflect" + "strings" + "unsafe" + + "github.com/bytedance/sonic/internal/caching" + "github.com/bytedance/sonic/internal/jit" + "github.com/bytedance/sonic/internal/native" + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" + "github.com/twitchyliquid64/golang-asm/obj" ) /** Register Allocations @@ -135,6 +136,7 @@ var ( _R9 = jit.Reg("R9") _X0 = jit.Reg("X0") _X1 = jit.Reg("X1") + _X15 = jit.Reg("X15") ) var ( @@ -292,7 +294,6 @@ var _OpFuncTab = [256]func(*_Assembler, *_Instr) { _OP_array_clear_p : (*_Assembler)._asm_OP_array_clear_p, _OP_slice_init : (*_Assembler)._asm_OP_slice_init, _OP_slice_append : (*_Assembler)._asm_OP_slice_append, - _OP_object_skip : (*_Assembler)._asm_OP_object_skip, _OP_object_next : (*_Assembler)._asm_OP_object_next, _OP_struct_field : (*_Assembler)._asm_OP_struct_field, _OP_unmarshal : (*_Assembler)._asm_OP_unmarshal, @@ -312,6 +313,7 @@ var _OpFuncTab = [256]func(*_Assembler, *_Instr) { _OP_check_char_0 : (*_Assembler)._asm_OP_check_char_0, _OP_dismatch_err : (*_Assembler)._asm_OP_dismatch_err, _OP_go_skip : (*_Assembler)._asm_OP_go_skip, + _OP_skip_emtpy : (*_Assembler)._asm_OP_skip_empty, _OP_add : (*_Assembler)._asm_OP_add, _OP_check_empty : (*_Assembler)._asm_OP_check_empty, _OP_debug : (*_Assembler)._asm_OP_debug, @@ -385,7 +387,7 @@ func (self *_Assembler) prologue() { var ( _REG_go = []obj.Addr { _ST, _VP, _IP, _IL, _IC } - _REG_rt = []obj.Addr { _ST, _VP, _IP, _IL, _IC, _IL } + _REG_rt = []obj.Addr { _ST, _VP, _IP, _IL, _IC } ) func (self *_Assembler) save(r ...obj.Addr) { @@ -420,9 +422,10 @@ func (self *_Assembler) call_go(fn obj.Addr) { } func (self *_Assembler) callc(fn obj.Addr) { - self.Emit("XCHGQ", _IP, _BP) + self.save(_IP) self.call(fn) - self.Emit("XCHGQ", _IP, _BP) + self.Emit("XORPS", _X15, _X15) + self.load(_IP) } func (self *_Assembler) call_c(fn obj.Addr) { @@ -491,9 +494,9 @@ func (self *_Assembler) type_error() { func (self *_Assembler) mismatch_error() { self.Link(_LB_mismatch_error) // _type_error: self.Emit("MOVQ", _VAR_et, _ET) // MOVQ _VAR_et, ET - self.Emit("MOVQ", _VAR_ic, _EP) // MOVQ _VAR_ic, EP self.Emit("MOVQ", _I_json_MismatchTypeError, _CX) // MOVQ _I_json_MismatchType, CX self.Emit("CMPQ", _ET, _CX) // CMPQ ET, CX + self.Emit("MOVQ", jit.Ptr(_ST, _EpOffset), _EP) // MOVQ stack.Ep, EP self.Sjmp("JE" , _LB_error) // JE _LB_error self.Emit("MOVQ", _ARG_sp, _AX) self.Emit("MOVQ", _ARG_sl, _BX) @@ -599,6 +602,28 @@ func (self *_Assembler) _asm_OP_go_skip(p *_Instr) { self.Sjmp("JMP" , _LB_skip_one) // JMP _skip_one } +var _F_IndexByte = jit.Func(strings.IndexByte) + +func (self *_Assembler) _asm_OP_skip_empty(p *_Instr) { + // self.Byte(0xcc) + self.call_sf(_F_skip_one) // CALL_SF skip_one + // self.Byte(0xcc) + self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX + self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v + self.Emit("BTQ", jit.Imm(_F_disable_unknown), _ARG_fv) + self.Xjmp("JNC", p.vi()) + self.Emit("LEAQ", jit.Sib(_IC, _AX, 1, 0), _BX) + self.Emit("MOVQ", _BX, _ARG_sv_n) + self.Emit("LEAQ", jit.Sib(_IP, _AX, 1, 0), _AX) + self.Emit("MOVQ", _AX, _ARG_sv_p) + self.Emit("MOVQ", jit.Imm(':'), _CX) + self.call_go(_F_IndexByte) + // self.Byte(0xcc) + self.Emit("TESTQ", _AX, _AX) + // disallow unknown field + self.Sjmp("JNS", _LB_field_error) +} + func (self *_Assembler) skip_one() { self.Link(_LB_skip_one) // _skip: self.Emit("MOVQ", _VAR_ic, _IC) // MOVQ _VAR_ic, IC @@ -606,7 +631,6 @@ func (self *_Assembler) skip_one() { self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v self.Emit("MOVQ" , _VAR_pc, _R9) // MOVQ pc, R9 - // self.Byte(0xcc) self.Rjmp("JMP" , _R9) // JMP (R9) } @@ -972,11 +996,13 @@ var ( var ( _F_decodeJsonUnmarshaler obj.Addr + _F_decodeJsonUnmarshalerQuoted obj.Addr _F_decodeTextUnmarshaler obj.Addr ) func init() { _F_decodeJsonUnmarshaler = jit.Func(decodeJsonUnmarshaler) + _F_decodeJsonUnmarshalerQuoted = jit.Func(decodeJsonUnmarshalerQuoted) _F_decodeTextUnmarshaler = jit.Func(decodeTextUnmarshaler) } @@ -1057,18 +1083,18 @@ func (self *_Assembler) mapassign_utext(t reflect.Type, addressable bool) { var ( _F_skip_one = jit.Imm(int64(native.S_skip_one)) _F_skip_array = jit.Imm(int64(native.S_skip_array)) - _F_skip_object = jit.Imm(int64(native.S_skip_object)) _F_skip_number = jit.Imm(int64(native.S_skip_number)) ) -func (self *_Assembler) unmarshal_json(t reflect.Type, deref bool) { +func (self *_Assembler) unmarshal_json(t reflect.Type, deref bool, f obj.Addr) { self.call_sf(_F_skip_one) // CALL_SF skip_one self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v + self.Emit("MOVQ", _IC, _VAR_ic) // store for mismatche error skip self.slice_from_r(_AX, 0) // SLICE_R AX, $0 self.Emit("MOVQ" , _DI, _ARG_sv_p) // MOVQ DI, sv.p self.Emit("MOVQ" , _SI, _ARG_sv_n) // MOVQ SI, sv.n - self.unmarshal_func(t, _F_decodeJsonUnmarshaler, deref) // UNMARSHAL json, ${t}, ${deref} + self.unmarshal_func(t, f, deref) // UNMARSHAL json, ${t}, ${deref} } func (self *_Assembler) unmarshal_text(t reflect.Type, deref bool) { @@ -1103,7 +1129,15 @@ func (self *_Assembler) unmarshal_func(t reflect.Type, fn obj.Addr, deref bool) self.Emit("MOVQ" , _ARG_sv_n, _DI) // MOVQ sv.n, DI self.call_go(fn) // CALL_GO ${fn} self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error + self.Sjmp("JZ" , "_unmarshal_func_end_{n}") // JNZ _error + self.Emit("MOVQ", _I_json_MismatchTypeError, _CX) // MOVQ ET, VAR.et + self.Emit("CMPQ", _ET, _CX) // check if MismatchedError + self.Sjmp("JNE" , _LB_error) + self.Emit("MOVQ", jit.Type(t), _CX) // store current type + self.Emit("MOVQ", _CX, _VAR_et) // store current type + self.Emit("MOVQ", _VAR_ic, _IC) // recover the pos + self.Emit("XORL", _ET, _ET) + self.Link("_unmarshal_func_end_{n}") } /** Dynamic Decoding Routine **/ @@ -1136,8 +1170,8 @@ func (self *_Assembler) decode_dynamic(vt obj.Addr, vp obj.Addr) { self.Emit("MOVQ", _I_json_MismatchTypeError, _CX) // MOVQ _I_json_MismatchTypeError, CX self.Emit("CMPQ", _ET, _CX) // CMPQ ET, CX self.Sjmp("JNE", _LB_error) // JNE LB_error - self.Emit("MOVQ", _EP, _VAR_ic) // MOVQ EP, VAR_ic self.Emit("MOVQ", _ET, _VAR_et) // MOVQ ET, VAR_et + self.WriteRecNotAX(14, _EP, jit.Ptr(_ST, _EpOffset), false, false) // MOVQ EP, stack.Ep self.Link("_decode_dynamic_end_{n}") } @@ -1146,7 +1180,7 @@ func (self *_Assembler) decode_dynamic(vt obj.Addr, vp obj.Addr) { var ( _F_memequal = jit.Func(memequal) _F_memmove = jit.Func(memmove) - _F_growslice = jit.Func(growslice) + _F_growslice = jit.Func(rt.GrowSlice) _F_makeslice = jit.Func(makeslice) _F_makemap_small = jit.Func(makemap_small) _F_mapassign_fast64 = jit.Func(mapassign_fast64) @@ -1164,7 +1198,7 @@ var ( var ( _F_FieldMap_GetCaseInsensitive obj.Addr - _Empty_Slice = make([]byte, 0) + _Empty_Slice = []byte{} _Zero_Base = int64(uintptr(((*rt.GoSlice)(unsafe.Pointer(&_Empty_Slice))).Ptr)) ) @@ -1641,7 +1675,8 @@ func (self *_Assembler) _asm_OP_check_empty(p *_Instr) { self.Emit("CMPB", jit.Sib(_IP, _IC, 1, 0), jit.Imm(int64(rbracket))) // CMPB (IP)(IC), ']' self.Sjmp("JNE" , "_not_empty_array_{n}") // JNE _not_empty_array_{n} self.Emit("MOVQ", _AX, _IC) // MOVQ AX, IC - self.StorePtr(_Zero_Base, jit.Ptr(_VP, 0), _AX) // MOVQ $zerobase, (VP) + self.Emit("MOVQ", jit.Imm(_Zero_Base), _AX) + self.WritePtrAX(9, jit.Ptr(_VP, 0), false) self.Emit("PXOR", _X0, _X0) // PXOR X0, X0 self.Emit("MOVOU", _X0, jit.Ptr(_VP, 8)) // MOVOU X0, 8(VP) self.Xjmp("JMP" , p.vi()) // JMP {p.vi()} @@ -1697,12 +1732,6 @@ func (self *_Assembler) _asm_OP_slice_append(p *_Instr) { self.Link("_append_slice_end_{n}") } -func (self *_Assembler) _asm_OP_object_skip(_ *_Instr) { - self.call_sf(_F_skip_object) // CALL_SF skip_object - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , _LB_parsing_error_v) // JS _parse_error_v -} - func (self *_Assembler) _asm_OP_object_next(_ *_Instr) { self.call_sf(_F_skip_one) // CALL_SF skip_one self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX @@ -1773,11 +1802,19 @@ func (self *_Assembler) _asm_OP_struct_field(p *_Instr) { } func (self *_Assembler) _asm_OP_unmarshal(p *_Instr) { - self.unmarshal_json(p.vt(), true) + if iv := p.i64(); iv != 0 { + self.unmarshal_json(p.vt(), true, _F_decodeJsonUnmarshalerQuoted) + } else { + self.unmarshal_json(p.vt(), true, _F_decodeJsonUnmarshaler) + } } func (self *_Assembler) _asm_OP_unmarshal_p(p *_Instr) { - self.unmarshal_json(p.vt(), false) + if iv := p.i64(); iv != 0 { + self.unmarshal_json(p.vt(), false, _F_decodeJsonUnmarshalerQuoted) + } else { + self.unmarshal_json(p.vt(), false, _F_decodeJsonUnmarshaler) + } } func (self *_Assembler) _asm_OP_unmarshal_text(p *_Instr) { diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/compiler.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/compiler.go similarity index 93% rename from vendor/github.com/bytedance/sonic/internal/decoder/compiler.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/compiler.go index e9e2b77f..a097d171 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/compiler.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/compiler.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package decoder +package jitdec import ( `encoding/json` @@ -77,7 +77,6 @@ const ( _OP_array_clear_p _OP_slice_init _OP_slice_append - _OP_object_skip _OP_object_next _OP_struct_field _OP_unmarshal @@ -97,6 +96,7 @@ const ( _OP_check_char_0 _OP_dismatch_err _OP_go_skip + _OP_skip_emtpy _OP_add _OP_check_empty _OP_debug @@ -155,7 +155,6 @@ var _OpNames = [256]string { _OP_array_skip : "array_skip", _OP_slice_init : "slice_init", _OP_slice_append : "slice_append", - _OP_object_skip : "object_skip", _OP_object_next : "object_next", _OP_struct_field : "struct_field", _OP_unmarshal : "unmarshal", @@ -271,6 +270,13 @@ func newInsVt(op _Op, vt reflect.Type) _Instr { } } +func newInsVtI(op _Op, vt reflect.Type, iv int) _Instr { + return _Instr { + u: packOp(op) | rt.PackInt(iv), + p: unsafe.Pointer(rt.UnpackType(vt)), + } +} + func newInsVf(op _Op, vf *caching.FieldMap) _Instr { return _Instr { u: packOp(op), @@ -452,6 +458,10 @@ func (self *_Program) rtt(op _Op, vt reflect.Type) { *self = append(*self, newInsVt(op, vt)) } +func (self *_Program) rtti(op _Op, vt reflect.Type, iv int) { + *self = append(*self, newInsVtI(op, vt, iv)) +} + func (self *_Program) fmv(op _Op, vf *caching.FieldMap) { *self = append(*self, newInsVf(op, vf)) } @@ -527,40 +537,66 @@ func (self *_Compiler) compile(vt reflect.Type) (ret _Program, err error) { return } -func (self *_Compiler) compileOne(p *_Program, sp int, vt reflect.Type) { - /* check for recursive nesting */ - ok := self.tab[vt] - if ok { - p.rtt(_OP_recurse, vt) - return - } +const ( + checkMarshalerFlags_quoted = 1 +) +func (self *_Compiler) checkMarshaler(p *_Program, vt reflect.Type, flags int, exec bool) bool { pt := reflect.PtrTo(vt) /* check for `json.Unmarshaler` with pointer receiver */ if pt.Implements(jsonUnmarshalerType) { - p.rtt(_OP_unmarshal_p, pt) - return + if exec { + p.add(_OP_lspace) + p.rtti(_OP_unmarshal_p, pt, flags) + } + return true } /* check for `json.Unmarshaler` */ if vt.Implements(jsonUnmarshalerType) { - p.add(_OP_lspace) - self.compileUnmarshalJson(p, vt) - return + if exec { + p.add(_OP_lspace) + self.compileUnmarshalJson(p, vt, flags) + } + return true + } + + if flags == checkMarshalerFlags_quoted { + // text marshaler shouldn't be supported for quoted string + return false } /* check for `encoding.TextMarshaler` with pointer receiver */ if pt.Implements(encodingTextUnmarshalerType) { - p.add(_OP_lspace) - self.compileUnmarshalTextPtr(p, pt) - return + if exec { + p.add(_OP_lspace) + self.compileUnmarshalTextPtr(p, pt, flags) + } + return true } /* check for `encoding.TextUnmarshaler` */ if vt.Implements(encodingTextUnmarshalerType) { - p.add(_OP_lspace) - self.compileUnmarshalText(p, vt) + if exec { + p.add(_OP_lspace) + self.compileUnmarshalText(p, vt, flags) + } + return true + } + + return false +} + +func (self *_Compiler) compileOne(p *_Program, sp int, vt reflect.Type) { + /* check for recursive nesting */ + ok := self.tab[vt] + if ok { + p.rtt(_OP_recurse, vt) + return + } + + if self.checkMarshaler(p, vt, 0, true) { return } @@ -683,17 +719,9 @@ func (self *_Compiler) compilePtr(p *_Program, sp int, et reflect.Type) { /* dereference all the way down */ for et.Kind() == reflect.Ptr { - if et.Implements(jsonUnmarshalerType) { - p.rtt(_OP_unmarshal_p, et) - return - } - - if et.Implements(encodingTextUnmarshalerType) { - p.add(_OP_lspace) - self.compileUnmarshalTextPtr(p, et) + if self.checkMarshaler(p, et, 0, true) { return } - et = et.Elem() p.rtt(_OP_deref, et) } @@ -706,7 +734,7 @@ func (self *_Compiler) compilePtr(p *_Program, sp int, et reflect.Type) { /* enter the recursion */ p.add(_OP_lspace) self.tab[et] = true - + /* not inline the pointer type * recursing the defined pointer type's elem will casue issue379. */ @@ -716,8 +744,12 @@ func (self *_Compiler) compilePtr(p *_Program, sp int, et reflect.Type) { j := p.pc() p.add(_OP_goto) + + // set val pointer as nil p.pin(i) p.add(_OP_nil_1) + + // nothing todo p.pin(j) } @@ -869,7 +901,24 @@ func (self *_Compiler) compileStructBody(p *_Program, sp int, vt reflect.Type) { n := p.pc() p.add(_OP_is_null) - skip := self.checkIfSkip(p, vt, '{') + j := p.pc() + p.chr(_OP_check_char_0, '{') + p.rtt(_OP_dismatch_err, vt) + + /* special case for empty object */ + if len(fv) == 0 { + p.pin(j) + s := p.pc() + p.add(_OP_skip_emtpy) + p.pin(s) + p.pin(n) + return + } + + skip := p.pc() + p.add(_OP_go_skip) + p.pin(j) + p.int(_OP_add, 1) p.add(_OP_save) p.add(_OP_lspace) @@ -887,11 +936,6 @@ func (self *_Compiler) compileStructBody(p *_Program, sp int, vt reflect.Type) { p.chr(_OP_check_char, '}') p.chr(_OP_match_char, ',') - /* special case of an empty struct */ - if len(fv) == 0 { - p.add(_OP_object_skip) - goto end_of_object - } /* match the remaining fields */ p.add(_OP_lspace) @@ -927,7 +971,6 @@ func (self *_Compiler) compileStructBody(p *_Program, sp int, vt reflect.Type) { p.int(_OP_goto, y0) } -end_of_object: p.pin(x) p.pin(y1) p.add(_OP_drop) @@ -935,7 +978,22 @@ end_of_object: p.pin(skip) } +func (self *_Compiler) compileStructFieldStrUnmarshal(p *_Program, vt reflect.Type) { + p.add(_OP_lspace) + n0 := p.pc() + p.add(_OP_is_null) + self.checkMarshaler(p, vt, checkMarshalerFlags_quoted, true) + p.pin(n0) +} + func (self *_Compiler) compileStructFieldStr(p *_Program, sp int, vt reflect.Type) { + // according to std, json.Unmarshaler should be called before stringize + // see https://github.com/bytedance/sonic/issues/670 + if self.checkMarshaler(p, vt, checkMarshalerFlags_quoted, false) { + self.compileStructFieldStrUnmarshal(p, vt) + return + } + n1 := -1 ft := vt sv := false @@ -1103,7 +1161,7 @@ func (self *_Compiler) compileUnmarshalEnd(p *_Program, vt reflect.Type, i int) p.pin(j) } -func (self *_Compiler) compileUnmarshalJson(p *_Program, vt reflect.Type) { +func (self *_Compiler) compileUnmarshalJson(p *_Program, vt reflect.Type, flags int) { i := p.pc() v := _OP_unmarshal p.add(_OP_is_null) @@ -1114,11 +1172,11 @@ func (self *_Compiler) compileUnmarshalJson(p *_Program, vt reflect.Type) { } /* call the unmarshaler */ - p.rtt(v, vt) + p.rtti(v, vt, flags) self.compileUnmarshalEnd(p, vt, i) } -func (self *_Compiler) compileUnmarshalText(p *_Program, vt reflect.Type) { +func (self *_Compiler) compileUnmarshalText(p *_Program, vt reflect.Type, iv int) { i := p.pc() v := _OP_unmarshal_text p.add(_OP_is_null) @@ -1131,15 +1189,15 @@ func (self *_Compiler) compileUnmarshalText(p *_Program, vt reflect.Type) { } /* call the unmarshaler */ - p.rtt(v, vt) + p.rtti(v, vt, iv) self.compileUnmarshalEnd(p, vt, i) } -func (self *_Compiler) compileUnmarshalTextPtr(p *_Program, vt reflect.Type) { +func (self *_Compiler) compileUnmarshalTextPtr(p *_Program, vt reflect.Type, iv int) { i := p.pc() p.add(_OP_is_null) p.chr(_OP_match_char, '"') - p.rtt(_OP_unmarshal_text_p, vt) + p.rtti(_OP_unmarshal_text_p, vt, iv) p.pin(i) } diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/debug.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/debug.go similarity index 99% rename from vendor/github.com/bytedance/sonic/internal/decoder/debug.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/debug.go index d5537ed9..b59a3e57 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/debug.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/debug.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package decoder +package jitdec import ( `os` diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/decoder.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/decoder.go new file mode 100644 index 00000000..bbb4b4b6 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/decoder.go @@ -0,0 +1,140 @@ +package jitdec + +import ( + `unsafe` + `encoding/json` + `reflect` + `runtime` + + `github.com/bytedance/sonic/internal/decoder/consts` + `github.com/bytedance/sonic/internal/decoder/errors` + `github.com/bytedance/sonic/internal/rt` + `github.com/bytedance/sonic/utf8` + `github.com/bytedance/sonic/option` +) + +type ( + MismatchTypeError = errors.MismatchTypeError + SyntaxError = errors.SyntaxError +) + +const ( + _F_allow_control = consts.F_allow_control + _F_copy_string = consts.F_copy_string + _F_disable_unknown = consts.F_disable_unknown + _F_disable_urc = consts.F_disable_urc + _F_use_int64 = consts.F_use_int64 + _F_use_number = consts.F_use_number + _F_no_validate_json = consts.F_no_validate_json + _F_validate_string = consts.F_validate_string +) + +var ( + error_wrap = errors.ErrorWrap + error_type = errors.ErrorType + error_field = errors.ErrorField + error_value = errors.ErrorValue + error_mismatch = errors.ErrorMismatch + stackOverflow = errors.StackOverflow +) + + +// Decode parses the JSON-encoded data from current position and stores the result +// in the value pointed to by val. +func Decode(s *string, i *int, f uint64, val interface{}) error { + /* validate json if needed */ + if (f & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(*s){ + dbuf := utf8.CorrectWith(nil, rt.Str2Mem(*s), "\ufffd") + *s = rt.Mem2Str(dbuf) + } + + vv := rt.UnpackEface(val) + vp := vv.Value + + /* check for nil type */ + if vv.Type == nil { + return &json.InvalidUnmarshalError{} + } + + /* must be a non-nil pointer */ + if vp == nil || vv.Type.Kind() != reflect.Ptr { + return &json.InvalidUnmarshalError{Type: vv.Type.Pack()} + } + + etp := rt.PtrElem(vv.Type) + + /* check the defined pointer type for issue 379 */ + if vv.Type.IsNamed() { + newp := vp + etp = vv.Type + vp = unsafe.Pointer(&newp) + } + + /* create a new stack, and call the decoder */ + sb := newStack() + nb, err := decodeTypedPointer(*s, *i, etp, vp, sb, f) + /* return the stack back */ + *i = nb + freeStack(sb) + + /* avoid GC ahead */ + runtime.KeepAlive(vv) + return err +} + + +// Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in +// order to reduce the first-hit latency. +// +// Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is +// a compile option to set the depth of recursive compile for the nested struct type. +func Pretouch(vt reflect.Type, opts ...option.CompileOption) error { + cfg := option.DefaultCompileOptions() + for _, opt := range opts { + opt(&cfg) + } + return pretouchRec(map[reflect.Type]bool{vt:true}, cfg) +} + +func pretouchType(_vt reflect.Type, opts option.CompileOptions) (map[reflect.Type]bool, error) { + /* compile function */ + compiler := newCompiler().apply(opts) + decoder := func(vt *rt.GoType, _ ...interface{}) (interface{}, error) { + if pp, err := compiler.compile(_vt); err != nil { + return nil, err + } else { + as := newAssembler(pp) + as.name = _vt.String() + return as.Load(), nil + } + } + + /* find or compile */ + vt := rt.UnpackType(_vt) + if val := programCache.Get(vt); val != nil { + return nil, nil + } else if _, err := programCache.Compute(vt, decoder); err == nil { + return compiler.rec, nil + } else { + return nil, err + } +} + +func pretouchRec(vtm map[reflect.Type]bool, opts option.CompileOptions) error { + if opts.RecursiveDepth < 0 || len(vtm) == 0 { + return nil + } + next := make(map[reflect.Type]bool) + for vt := range(vtm) { + sub, err := pretouchType(vt, opts) + if err != nil { + return err + } + for svt := range(sub) { + next[svt] = true + } + } + opts.RecursiveDepth -= 1 + return pretouchRec(next, opts) +} + diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/generic_regabi_amd64.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/generic_regabi_amd64.go similarity index 99% rename from vendor/github.com/bytedance/sonic/internal/decoder/generic_regabi_amd64.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/generic_regabi_amd64.go index 337af054..e6d5e3e8 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/generic_regabi_amd64.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/generic_regabi_amd64.go @@ -1,4 +1,4 @@ -// +build go1.17,!go1.22 +// +build go1.17,!go1.24 /* * Copyright 2021 ByteDance Inc. @@ -16,7 +16,7 @@ * limitations under the License. */ -package decoder +package jitdec import ( `encoding/json` @@ -119,9 +119,9 @@ func (self *_ValueDecoder) call_go(fn obj.Addr) { } func (self *_ValueDecoder) callc(fn obj.Addr) { - self.Emit("XCHGQ", _IP, _BP) + self.save(_IP) self.call(fn) - self.Emit("XCHGQ", _IP, _BP) + self.load(_IP) } func (self *_ValueDecoder) call_c(fn obj.Addr) { diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/generic_regabi_amd64_test.s b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/generic_regabi_amd64_test.s similarity index 97% rename from vendor/github.com/bytedance/sonic/internal/decoder/generic_regabi_amd64_test.s rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/generic_regabi_amd64_test.s index 1c46928d..19ed3752 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/generic_regabi_amd64_test.s +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/generic_regabi_amd64_test.s @@ -1,4 +1,4 @@ -// +build go1.17,!go1.22 +// +build go1.17,!go1.24 // // Copyright 2021 ByteDance Inc. diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/pools.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/pools.go similarity index 97% rename from vendor/github.com/bytedance/sonic/internal/decoder/pools.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/pools.go index bcd14cc6..01868cb2 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/pools.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/pools.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package decoder +package jitdec import ( `sync` @@ -36,6 +36,7 @@ const ( _PtrBytes = _PTR_SIZE / 8 _FsmOffset = (_MaxStack + 1) * _PtrBytes _DbufOffset = _FsmOffset + int64(unsafe.Sizeof(types.StateMachine{})) + types.MAX_RECURSE * _PtrBytes + _EpOffset = _DbufOffset + _MaxDigitNums _StackSize = unsafe.Sizeof(_Stack{}) ) @@ -53,6 +54,7 @@ type _Stack struct { mm types.StateMachine vp [types.MAX_RECURSE]unsafe.Pointer dp [_MaxDigitNums]byte + ep unsafe.Pointer } type _Decoder func( diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/primitives.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/primitives.go similarity index 84% rename from vendor/github.com/bytedance/sonic/internal/decoder/primitives.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/primitives.go index d6053e2c..5adfc038 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/primitives.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/primitives.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package decoder +package jitdec import ( `encoding` @@ -30,9 +30,7 @@ func decodeTypedPointer(s string, i int, vt *rt.GoType, vp unsafe.Pointer, sb *_ return 0, err } else { rt.MoreStack(_FP_size + _VD_size + native.MaxFrameSize) - rt.StopProf() ret, err := fn(s, i, vp, sb, fv, "", nil) - rt.StartProf() return ret, err } } @@ -41,6 +39,13 @@ func decodeJsonUnmarshaler(vv interface{}, s string) error { return vv.(json.Unmarshaler).UnmarshalJSON(rt.Str2Mem(s)) } +func decodeJsonUnmarshalerQuoted(vv interface{}, s string) error { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return &MismatchTypeError{} + } + return vv.(json.Unmarshaler).UnmarshalJSON(rt.Str2Mem(s[1:len(s)-1])) +} + func decodeTextUnmarshaler(vv interface{}, s string) error { return vv.(encoding.TextUnmarshaler).UnmarshalText(rt.Str2Mem(s)) } diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/stubs_go116.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/stubs_go116.go similarity index 91% rename from vendor/github.com/bytedance/sonic/internal/decoder/stubs_go116.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/stubs_go116.go index c6e133d8..8fa7c32f 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/stubs_go116.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/stubs_go116.go @@ -1,4 +1,4 @@ -// +build go1.16,!go1.20 +// +build go1.17,!go1.20 /* * Copyright 2021 ByteDance Inc. @@ -16,18 +16,18 @@ * limitations under the License. */ -package decoder +package jitdec import ( `unsafe` `reflect` - _ `github.com/chenzhuoyu/base64x` + _ `github.com/cloudwego/base64x` `github.com/bytedance/sonic/internal/rt` ) -//go:linkname _subr__b64decode github.com/chenzhuoyu/base64x._subr__b64decode +//go:linkname _subr__b64decode github.com/cloudwego/base64x._subr__b64decode var _subr__b64decode uintptr // runtime.maxElementSize @@ -72,11 +72,6 @@ func mallocgc(size uintptr, typ *rt.GoType, needzero bool) unsafe.Pointer //goland:noinspection GoUnusedParameter func makeslice(et *rt.GoType, len int, cap int) unsafe.Pointer -//go:noescape -//go:linkname growslice runtime.growslice -//goland:noinspection GoUnusedParameter -func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice - //go:linkname makemap_small runtime.makemap_small func makemap_small() unsafe.Pointer diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/stubs_go120.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/stubs_go120.go similarity index 91% rename from vendor/github.com/bytedance/sonic/internal/decoder/stubs_go120.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/stubs_go120.go index 73960ea1..a6dad26d 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/stubs_go120.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/stubs_go120.go @@ -16,18 +16,18 @@ * limitations under the License. */ -package decoder +package jitdec import ( `unsafe` `reflect` - _ `github.com/chenzhuoyu/base64x` + _ `github.com/cloudwego/base64x` `github.com/bytedance/sonic/internal/rt` ) -//go:linkname _subr__b64decode github.com/chenzhuoyu/base64x._subr__b64decode +//go:linkname _subr__b64decode github.com/cloudwego/base64x._subr__b64decode var _subr__b64decode uintptr // runtime.maxElementSize @@ -72,11 +72,6 @@ func mallocgc(size uintptr, typ *rt.GoType, needzero bool) unsafe.Pointer //goland:noinspection GoUnusedParameter func makeslice(et *rt.GoType, len int, cap int) unsafe.Pointer -//go:noescape -//go:linkname growslice reflect.growslice -//goland:noinspection GoUnusedParameter -func growslice(et *rt.GoType, old rt.GoSlice, cap int) rt.GoSlice - //go:linkname makemap_small runtime.makemap_small func makemap_small() unsafe.Pointer diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/types.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/types.go similarity index 99% rename from vendor/github.com/bytedance/sonic/internal/decoder/types.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/types.go index 6fc0e706..c196eb5b 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/types.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/types.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package decoder +package jitdec import ( `encoding` diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/utils.go b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/utils.go similarity index 98% rename from vendor/github.com/bytedance/sonic/internal/decoder/utils.go rename to vendor/github.com/bytedance/sonic/internal/decoder/jitdec/utils.go index 23ee5d50..0a7a2028 100644 --- a/vendor/github.com/bytedance/sonic/internal/decoder/utils.go +++ b/vendor/github.com/bytedance/sonic/internal/decoder/jitdec/utils.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package decoder +package jitdec import ( `unsafe` diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/compile_struct.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/compile_struct.go new file mode 100644 index 00000000..713fb656 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/compile_struct.go @@ -0,0 +1,174 @@ +package optdec + +import ( + "fmt" + "reflect" + + caching "github.com/bytedance/sonic/internal/optcaching" + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/internal/resolver" +) + +const ( + _MAX_FIELDS = 50 // cutoff at 50 fields struct +) + +func (c *compiler) compileIntStringOption(vt reflect.Type) decFunc { + switch vt.Size() { + case 4: + switch vt.Kind() { + case reflect.Uint: + fallthrough + case reflect.Uintptr: + return &u32StringDecoder{} + case reflect.Int: + return &i32StringDecoder{} + } + case 8: + switch vt.Kind() { + case reflect.Uint: + fallthrough + case reflect.Uintptr: + return &u64StringDecoder{} + case reflect.Int: + return &i64StringDecoder{} + } + default: + panic("not supported pointer size: " + fmt.Sprint(vt.Size())) + } + panic("unreachable") +} + +func isInteger(vt reflect.Type) bool { + switch vt.Kind() { + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr, reflect.Int: return true + default: return false + } +} + +func (c *compiler) assertStringOptTypes(vt reflect.Type) { + if c.depth > _CompileMaxDepth { + panic(*stackOverflow) + } + + c.depth += 1 + defer func () { + c.depth -= 1 + }() + + if isInteger(vt) { + return + } + + switch vt.Kind() { + case reflect.String, reflect.Bool, reflect.Float32, reflect.Float64: + return + case reflect.Ptr: c.assertStringOptTypes(vt.Elem()) + default: + panicForInvalidStrType(vt) + } +} + +func (c *compiler) compileFieldStringOption(vt reflect.Type) decFunc { + c.assertStringOptTypes(vt) + unmDec := c.tryCompilePtrUnmarshaler(vt, true) + if unmDec != nil { + return unmDec + } + + switch vt.Kind() { + case reflect.String: + if vt == jsonNumberType { + return &numberStringDecoder{} + } + return &strStringDecoder{} + case reflect.Bool: + return &boolStringDecoder{} + case reflect.Int8: + return &i8StringDecoder{} + case reflect.Int16: + return &i16StringDecoder{} + case reflect.Int32: + return &i32StringDecoder{} + case reflect.Int64: + return &i64StringDecoder{} + case reflect.Uint8: + return &u8StringDecoder{} + case reflect.Uint16: + return &u16StringDecoder{} + case reflect.Uint32: + return &u32StringDecoder{} + case reflect.Uint64: + return &u64StringDecoder{} + case reflect.Float32: + return &f32StringDecoder{} + case reflect.Float64: + return &f64StringDecoder{} + case reflect.Uint: + fallthrough + case reflect.Uintptr: + fallthrough + case reflect.Int: + return c.compileIntStringOption(vt) + case reflect.Ptr: + return &ptrStrDecoder{ + typ: rt.UnpackType(vt.Elem()), + deref: c.compileFieldStringOption(vt.Elem()), + } + default: + panicForInvalidStrType(vt) + return nil + } +} + +func (c *compiler) compileStruct(vt reflect.Type) decFunc { + c.enter(vt) + defer c.exit(vt) + if c.namedPtr { + c.namedPtr = false + return c.compileStructBody(vt) + } + + if c.depth >= c.opts.MaxInlineDepth + 1 || (c.counts > 0 && vt.NumField() >= _MAX_FIELDS) { + return &recuriveDecoder{ + typ: rt.UnpackType(vt), + } + } else { + return c.compileStructBody(vt) + } +} + +func (c *compiler) compileStructBody(vt reflect.Type) decFunc { + fv := resolver.ResolveStruct(vt) + entries := make([]fieldEntry, 0, len(fv)) + + for _, f := range fv { + var dec decFunc + /* dealt with field tag options */ + if f.Opts&resolver.F_stringize != 0 { + dec = c.compileFieldStringOption(f.Type) + } else { + dec = c.compile(f.Type) + } + + /* deal with embedded pointer fields */ + if f.Path[0].Kind == resolver.F_deref { + dec = &embeddedFieldPtrDecoder{ + field: f, + fieldDec: dec, + fieldName: f.Name, + } + } + + entries = append(entries, fieldEntry{ + FieldMeta: f, + fieldDec: dec, + }) + } + return &structDecoder{ + fieldMap: caching.NewFieldLookup(fv), + fields: entries, + structName: vt.Name(), + typ: vt, + } +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/compiler.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/compiler.go new file mode 100644 index 00000000..fd164af9 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/compiler.go @@ -0,0 +1,449 @@ +package optdec + +import ( + "encoding/json" + "fmt" + "reflect" + + "github.com/bytedance/sonic/option" + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/internal/caching" +) + +var ( + programCache = caching.CreateProgramCache() +) + +func findOrCompile(vt *rt.GoType) (decFunc, error) { + makeDecoder := func(vt *rt.GoType, _ ...interface{}) (interface{}, error) { + ret, err := newCompiler().compileType(vt.Pack()) + return ret, err + } + if val := programCache.Get(vt); val != nil { + return val.(decFunc), nil + } else if ret, err := programCache.Compute(vt, makeDecoder); err == nil { + return ret.(decFunc), nil + } else { + return nil, err + } +} + +type compiler struct { + visited map[reflect.Type]bool + depth int + counts int + opts option.CompileOptions + namedPtr bool +} + +func newCompiler() *compiler { + return &compiler{ + visited: make(map[reflect.Type]bool), + opts: option.DefaultCompileOptions(), + } +} + +func (self *compiler) apply(opts option.CompileOptions) *compiler { + self.opts = opts + return self +} + +const _CompileMaxDepth = 4096 + +func (c *compiler) enter(vt reflect.Type) { + c.visited[vt] = true + c.depth += 1 + + if c.depth > _CompileMaxDepth { + panic(*stackOverflow) + } +} + +func (c *compiler) exit(vt reflect.Type) { + c.visited[vt] = false + c.depth -= 1 +} + +func (c *compiler) compileInt(vt reflect.Type) decFunc { + switch vt.Size() { + case 4: + switch vt.Kind() { + case reflect.Uint: + fallthrough + case reflect.Uintptr: + return &u32Decoder{} + case reflect.Int: + return &i32Decoder{} + } + case 8: + switch vt.Kind() { + case reflect.Uint: + fallthrough + case reflect.Uintptr: + return &u64Decoder{} + case reflect.Int: + return &i64Decoder{} + } + default: + panic("not supported pointer size: " + fmt.Sprint(vt.Size())) + } + panic("unreachable") +} + +func (c *compiler) rescue(ep *error) { + if val := recover(); val != nil { + if err, ok := val.(error); ok { + *ep = err + } else { + panic(val) + } + } +} + +func (c *compiler) compileType(vt reflect.Type) (rt decFunc, err error) { + defer c.rescue(&err) + rt = c.compile(vt) + return rt, err +} + +func (c *compiler) compile(vt reflect.Type) decFunc { + if c.visited[vt] { + return &recuriveDecoder{ + typ: rt.UnpackType(vt), + } + } + + dec := c.tryCompilePtrUnmarshaler(vt, false) + if dec != nil { + return dec + } + + return c.compileBasic(vt) +} + +func (c *compiler) compileBasic(vt reflect.Type) decFunc { + defer func() { + c.counts += 1 + }() + switch vt.Kind() { + case reflect.Bool: + return &boolDecoder{} + case reflect.Int8: + return &i8Decoder{} + case reflect.Int16: + return &i16Decoder{} + case reflect.Int32: + return &i32Decoder{} + case reflect.Int64: + return &i64Decoder{} + case reflect.Uint8: + return &u8Decoder{} + case reflect.Uint16: + return &u16Decoder{} + case reflect.Uint32: + return &u32Decoder{} + case reflect.Uint64: + return &u64Decoder{} + case reflect.Float32: + return &f32Decoder{} + case reflect.Float64: + return &f64Decoder{} + case reflect.Uint: + fallthrough + case reflect.Uintptr: + fallthrough + case reflect.Int: + return c.compileInt(vt) + case reflect.String: + return c.compileString(vt) + case reflect.Array: + return c.compileArray(vt) + case reflect.Interface: + return c.compileInterface(vt) + case reflect.Map: + return c.compileMap(vt) + case reflect.Ptr: + return c.compilePtr(vt) + case reflect.Slice: + return c.compileSlice(vt) + case reflect.Struct: + return c.compileStruct(vt) + default: + panic(&json.UnmarshalTypeError{Type: vt}) + } +} + +func (c *compiler) compilePtr(vt reflect.Type) decFunc { + c.enter(vt) + defer c.exit(vt) + + // specail logic for Named Ptr, issue 379 + if reflect.PtrTo(vt.Elem()) != vt { + c.namedPtr = true + return &ptrDecoder{ + typ: rt.UnpackType(vt.Elem()), + deref: c.compileBasic(vt.Elem()), + } + } + + return &ptrDecoder{ + typ: rt.UnpackType(vt.Elem()), + deref: c.compile(vt.Elem()), + } +} + +func (c *compiler) compileArray(vt reflect.Type) decFunc { + c.enter(vt) + defer c.exit(vt) + return &arrayDecoder{ + len: vt.Len(), + elemType: rt.UnpackType(vt.Elem()), + elemDec: c.compile(vt.Elem()), + typ: vt, + } +} + +func (c *compiler) compileString(vt reflect.Type) decFunc { + if vt == jsonNumberType { + return &numberDecoder{} + } + return &stringDecoder{} + +} + +func (c *compiler) tryCompileSliceUnmarshaler(vt reflect.Type) decFunc { + pt := reflect.PtrTo(vt.Elem()) + if pt.Implements(jsonUnmarshalerType) { + return &sliceDecoder{ + elemType: rt.UnpackType(vt.Elem()), + elemDec: c.compile(vt.Elem()), + typ: vt, + } + } + + if pt.Implements(encodingTextUnmarshalerType) { + return &sliceDecoder{ + elemType: rt.UnpackType(vt.Elem()), + elemDec: c.compile(vt.Elem()), + typ: vt, + } + } + return nil +} + +func (c *compiler) compileSlice(vt reflect.Type) decFunc { + c.enter(vt) + defer c.exit(vt) + + // Some common slice, use a decoder, to avoid function calls + et := rt.UnpackType(vt.Elem()) + + /* first checking `[]byte` */ + if et.Kind() == reflect.Uint8 /* []byte */ { + return c.compileSliceBytes(vt) + } + + dec := c.tryCompileSliceUnmarshaler(vt) + if dec != nil { + return dec + } + + if vt == reflect.TypeOf([]interface{}{}) { + return &sliceEfaceDecoder{} + } + if et.IsInt32() { + return &sliceI32Decoder{} + } + if et.IsInt64() { + return &sliceI64Decoder{} + } + if et.IsUint32() { + return &sliceU32Decoder{} + } + if et.IsUint64() { + return &sliceU64Decoder{} + } + if et.Kind() == reflect.String { + return &sliceStringDecoder{} + } + + return &sliceDecoder{ + elemType: rt.UnpackType(vt.Elem()), + elemDec: c.compile(vt.Elem()), + typ: vt, + } +} + +func (c *compiler) compileSliceBytes(vt reflect.Type) decFunc { + ep := reflect.PtrTo(vt.Elem()) + + if ep.Implements(jsonUnmarshalerType) { + return &sliceBytesUnmarshalerDecoder{ + elemType: rt.UnpackType(vt.Elem()), + elemDec: c.compile(vt.Elem()), + typ: vt, + } + } + + if ep.Implements(encodingTextUnmarshalerType) { + return &sliceBytesUnmarshalerDecoder{ + elemType: rt.UnpackType(vt.Elem()), + elemDec: c.compile(vt.Elem()), + typ: vt, + } + } + + return &sliceBytesDecoder{} +} + +func (c *compiler) compileInterface(vt reflect.Type) decFunc { + c.enter(vt) + defer c.exit(vt) + if vt.NumMethod() == 0 { + return &efaceDecoder{} + } + + if vt.Implements(jsonUnmarshalerType) { + return &unmarshalJSONDecoder{ + typ: rt.UnpackType(vt), + } + } + + if vt.Implements(encodingTextUnmarshalerType) { + return &unmarshalTextDecoder{ + typ: rt.UnpackType(vt), + } + } + + return &ifaceDecoder{ + typ: rt.UnpackType(vt), + } +} + +func (c *compiler) compileMap(vt reflect.Type) decFunc { + c.enter(vt) + defer c.exit(vt) + // check the key unmarshaler at first + decKey := tryCompileKeyUnmarshaler(vt) + if decKey != nil { + return &mapDecoder{ + mapType: rt.MapType(rt.UnpackType(vt)), + keyDec: decKey, + elemDec: c.compile(vt.Elem()), + } + } + + // Most common map, use a decoder, to avoid function calls + if vt == reflect.TypeOf(map[string]interface{}{}) { + return &mapEfaceDecoder{} + } else if vt == reflect.TypeOf(map[string]string{}) { + return &mapStringDecoder{} + } + + // Some common integer map later + mt := rt.MapType(rt.UnpackType(vt)) + + if mt.Key.Kind() == reflect.String { + return &mapStrKeyDecoder{ + mapType: mt, + assign: rt.GetMapStrAssign(vt), + elemDec: c.compile(vt.Elem()), + } + } + + if mt.Key.IsInt64() { + return &mapI64KeyDecoder{ + mapType: mt, + elemDec: c.compile(vt.Elem()), + assign: rt.GetMap64Assign(vt), + } + } + + if mt.Key.IsInt32() { + return &mapI32KeyDecoder{ + mapType: mt, + elemDec: c.compile(vt.Elem()), + assign: rt.GetMap32Assign(vt), + } + } + + if mt.Key.IsUint64() { + return &mapU64KeyDecoder{ + mapType: mt, + elemDec: c.compile(vt.Elem()), + assign: rt.GetMap64Assign(vt), + } + } + + if mt.Key.IsUint32() { + return &mapU32KeyDecoder{ + mapType: mt, + elemDec: c.compile(vt.Elem()), + assign: rt.GetMap32Assign(vt), + } + } + + // Generic map + return &mapDecoder{ + mapType: mt, + keyDec: c.compileMapKey(vt), + elemDec: c.compile(vt.Elem()), + } +} + +func tryCompileKeyUnmarshaler(vt reflect.Type) decKey { + pt := reflect.PtrTo(vt.Key()) + + /* check for `encoding.TextUnmarshaler` with pointer receiver */ + if pt.Implements(encodingTextUnmarshalerType) { + return decodeKeyTextUnmarshaler + } + + /* not support map key with `json.Unmarshaler` */ + return nil +} + +func (c *compiler) compileMapKey(vt reflect.Type) decKey { + switch vt.Key().Kind() { + case reflect.Int8: + return decodeKeyI8 + case reflect.Int16: + return decodeKeyI16 + case reflect.Uint8: + return decodeKeyU8 + case reflect.Uint16: + return decodeKeyU16 + default: + panic(&json.UnmarshalTypeError{Type: vt}) + } +} + +// maybe vt is a named type, and not a pointer receiver, see issue 379 +func (c *compiler) tryCompilePtrUnmarshaler(vt reflect.Type, strOpt bool) decFunc { + pt := reflect.PtrTo(vt) + + /* check for `json.Unmarshaler` with pointer receiver */ + if pt.Implements(jsonUnmarshalerType) { + return &unmarshalJSONDecoder{ + typ: rt.UnpackType(pt), + strOpt: strOpt, + } + } + + /* check for `encoding.TextMarshaler` with pointer receiver */ + if pt.Implements(encodingTextUnmarshalerType) { + /* TextUnmarshal not support ,strig tag */ + if strOpt { + panicForInvalidStrType(vt) + } + return &unmarshalTextDecoder{ + typ: rt.UnpackType(pt), + } + } + + return nil +} + +func panicForInvalidStrType(vt reflect.Type) { + panic(error_type(rt.UnpackType(vt))) +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/const.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/const.go new file mode 100644 index 00000000..77879faf --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/const.go @@ -0,0 +1,60 @@ +package optdec + +import "math" + +/* +Copied from sonic-rs +// JSON Value Type +const NULL: u64 = 0; +const BOOL: u64 = 2; +const FALSE: u64 = BOOL; +const TRUE: u64 = (1 << 3) | BOOL; +const NUMBER: u64 = 3; +const UINT: u64 = NUMBER; +const SINT: u64 = (1 << 3) | NUMBER; +const REAL: u64 = (2 << 3) | NUMBER; +const RAWNUMBER: u64 = (3 << 3) | NUMBER; +const STRING: u64 = 4; +const STRING_COMMON: u64 = STRING; +const STRING_HASESCAPED: u64 = (1 << 3) | STRING; +const OBJECT: u64 = 6; +const ARRAY: u64 = 7; + +/// JSON Type Mask +const POS_MASK: u64 = (!0) << 32; +const POS_BITS: u64 = 32; +const TYPE_MASK: u64 = 0xFF; +const TYPE_BITS: u64 = 8; + +*/ + +const ( + // BasicType: 3 bits + KNull = 0 // xxxxx000 + KBool = 2 // xxxxx010 + KNumber = 3 // xxxxx011 + KString = 4 // xxxxx100 + KRaw = 5 // xxxxx101 + KObject = 6 // xxxxx110 + KArray = 7 // xxxxx111 + + // SubType: 2 bits + KFalse = (0 << 3) | KBool // xxx00_010, 2 + KTrue = (1 << 3) | KBool // xxx01_010, 10 + KUint = (0 << 3) | KNumber // xxx00_011, 3 + KSint = (1 << 3) | KNumber // xxx01_011, 11 + KReal = (2 << 3) | KNumber // xxx10_011, 19 + KRawNumber = (3 << 3) | KNumber // xxx11_011, 27 + KStringCommon = KString // xxx00_100, 4 + KStringEscaped = (1 << 3) | KString // xxx01_100, 12 +) + +const ( + PosMask = math.MaxUint64 << 32 + PosBits = 32 + TypeMask = 0xFF + TypeBits = 8 + + ConLenMask = uint64(math.MaxUint32) + ConLenBits = 32 +) diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/context.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/context.go new file mode 100644 index 00000000..93ed9b7e --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/context.go @@ -0,0 +1,3 @@ +package optdec + +type context = Context diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/decoder.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/decoder.go new file mode 100644 index 00000000..81eed34e --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/decoder.go @@ -0,0 +1,160 @@ +package optdec + +import ( + "reflect" + "unsafe" + + "encoding/json" + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/option" + "github.com/bytedance/sonic/internal/decoder/errors" + "github.com/bytedance/sonic/internal/decoder/consts" +) + + +type ( + MismatchTypeError = errors.MismatchTypeError + SyntaxError = errors.SyntaxError +) + +const ( + _F_allow_control = consts.F_allow_control + _F_copy_string = consts.F_copy_string + _F_disable_unknown = consts.F_disable_unknown + _F_disable_urc = consts.F_disable_urc + _F_use_int64 = consts.F_use_int64 + _F_use_number = consts.F_use_number + _F_validate_string = consts.F_validate_string +) + +type Options = consts.Options + +const ( + OptionUseInt64 = consts.OptionUseInt64 + OptionUseNumber = consts.OptionUseNumber + OptionUseUnicodeErrors = consts.OptionUseUnicodeErrors + OptionDisableUnknown = consts.OptionDisableUnknown + OptionCopyString = consts.OptionCopyString + OptionValidateString = consts.OptionValidateString +) + + +func Decode(s *string, i *int, f uint64, val interface{}) error { + vv := rt.UnpackEface(val) + vp := vv.Value + + /* check for nil type */ + if vv.Type == nil { + return &json.InvalidUnmarshalError{} + } + + /* must be a non-nil pointer */ + if vp == nil || vv.Type.Kind() != reflect.Ptr { + return &json.InvalidUnmarshalError{Type: vv.Type.Pack()} + } + + etp := rt.PtrElem(vv.Type) + + /* check the defined pointer type for issue 379 */ + if vv.Type.IsNamed() { + newp := vp + etp = vv.Type + vp = unsafe.Pointer(&newp) + } + + dec, err := findOrCompile(etp) + if err != nil { + return err + } + + /* parse into document */ + ctx, err := NewContext(*s, *i, uint64(f), etp) + defer ctx.Delete() + if ctx.Parser.Utf8Inv { + *s = ctx.Parser.Json + } + if err != nil { + goto fix_error; + } + err = dec.FromDom(vp, ctx.Root(), &ctx) + +fix_error: + err = fix_error(*s, *i, err) + + // update position at last + *i += ctx.Parser.Pos() + return err +} + +func fix_error(json string, pos int, err error) error { + if e, ok := err.(SyntaxError); ok { + return SyntaxError{ + Pos: int(e.Pos) + pos, + Src: json, + Msg: e.Msg, + } + } + + if e, ok := err.(MismatchTypeError); ok { + return &MismatchTypeError { + Pos: int(e.Pos) + pos, + Src: json, + Type: e.Type, + } + } + + return err +} + +// Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in +// order to reduce the first-hit latency. +// +// Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is +// a compile option to set the depth of recursive compile for the nested struct type. +func Pretouch(vt reflect.Type, opts ...option.CompileOption) error { + cfg := option.DefaultCompileOptions() + for _, opt := range opts { + opt(&cfg) + } + return pretouchRec(map[reflect.Type]bool{vt:true}, cfg) +} + +func pretouchType(_vt reflect.Type, opts option.CompileOptions) (map[reflect.Type]bool, error) { + /* compile function */ + compiler := newCompiler().apply(opts) + decoder := func(vt *rt.GoType, _ ...interface{}) (interface{}, error) { + if f, err := compiler.compileType(_vt); err != nil { + return nil, err + } else { + return f, nil + } + } + + /* find or compile */ + vt := rt.UnpackType(_vt) + if val := programCache.Get(vt); val != nil { + return nil, nil + } else if _, err := programCache.Compute(vt, decoder); err == nil { + return compiler.visited, nil + } else { + return nil, err + } +} + +func pretouchRec(vtm map[reflect.Type]bool, opts option.CompileOptions) error { + if opts.RecursiveDepth < 0 || len(vtm) == 0 { + return nil + } + next := make(map[reflect.Type]bool) + for vt := range(vtm) { + sub, err := pretouchType(vt, opts) + if err != nil { + return err + } + for svt := range(sub) { + next[svt] = true + } + } + opts.RecursiveDepth -= 1 + return pretouchRec(next, opts) +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/errors.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/errors.go new file mode 100644 index 00000000..db0af547 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/errors.go @@ -0,0 +1,73 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package optdec + + import ( + "encoding/json" + "errors" + "reflect" + "strconv" + + "github.com/bytedance/sonic/internal/rt" + ) + + /** JIT Error Helpers **/ + + var stackOverflow = &json.UnsupportedValueError{ + Str: "Value nesting too deep", + Value: reflect.ValueOf("..."), + } + + func error_type(vt *rt.GoType) error { + return &json.UnmarshalTypeError{Type: vt.Pack()} + } + + func error_mismatch(node Node, ctx *context, typ reflect.Type) error { + return MismatchTypeError{ + Pos: node.Position(), + Src: ctx.Parser.Json, + Type: typ, + } + } + + func newUnmatched(pos int, vt *rt.GoType) error { + return MismatchTypeError{ + Pos: pos, + Src: "", + Type: vt.Pack(), + } + } + + func error_field(name string) error { + return errors.New("json: unknown field " + strconv.Quote(name)) + } + + func error_value(value string, vtype reflect.Type) error { + return &json.UnmarshalTypeError{ + Type: vtype, + Value: value, + } + } + + func error_syntax(pos int, src string, msg string) error { + return SyntaxError{ + Pos: pos, + Src: src, + Msg: msg, + } + } + \ No newline at end of file diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/functor.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/functor.go new file mode 100644 index 00000000..2a0523d5 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/functor.go @@ -0,0 +1,281 @@ +package optdec + +import ( + "encoding/json" + "math" + "unsafe" + + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/internal/resolver" +) + +type decFunc interface { + FromDom(vp unsafe.Pointer, node Node, ctx *context) error +} + +type ptrDecoder struct { + typ *rt.GoType + deref decFunc +} + +// Pointer Value is allocated in the Caller +func (d *ptrDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + if *(*unsafe.Pointer)(vp) == nil { + *(*unsafe.Pointer)(vp) = rt.Mallocgc(d.typ.Size, d.typ, true) + } + + return d.deref.FromDom(*(*unsafe.Pointer)(vp), node, ctx) +} + +type embeddedFieldPtrDecoder struct { + field resolver.FieldMeta + fieldDec decFunc + fieldName string +} + +// Pointer Value is allocated in the Caller +func (d *embeddedFieldPtrDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + // seek into the pointer + vp = unsafe.Pointer(uintptr(vp) - uintptr(d.field.Path[0].Size)) + for _, f := range d.field.Path { + deref := rt.UnpackType(f.Type) + vp = unsafe.Pointer(uintptr(vp) + f.Size) + if f.Kind == resolver.F_deref { + if *(*unsafe.Pointer)(vp) == nil { + *(*unsafe.Pointer)(vp) = rt.Mallocgc(deref.Size, deref, true) + } + vp = *(*unsafe.Pointer)(vp) + } + } + return d.fieldDec.FromDom(vp, node, ctx) +} + +type i8Decoder struct{} + +func (d *i8Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsI64(ctx) + if !ok || ret > math.MaxInt8 || ret < math.MinInt8 { + return error_mismatch(node, ctx, int8Type) + } + + *(*int8)(vp) = int8(ret) + return nil +} + +type i16Decoder struct{} + +func (d *i16Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsI64(ctx) + if !ok || ret > math.MaxInt16 || ret < math.MinInt16 { + return error_mismatch(node, ctx, int16Type) + } + + *(*int16)(vp) = int16(ret) + return nil +} + +type i32Decoder struct{} + +func (d *i32Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsI64(ctx) + if !ok || ret > math.MaxInt32 || ret < math.MinInt32 { + return error_mismatch(node, ctx, int32Type) + } + + *(*int32)(vp) = int32(ret) + return nil +} + +type i64Decoder struct{} + +func (d *i64Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsI64(ctx) + if !ok { + return error_mismatch(node, ctx, int64Type) + } + + *(*int64)(vp) = int64(ret) + return nil +} + +type u8Decoder struct{} + +func (d *u8Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsU64(ctx) + if !ok || ret > math.MaxUint8 { + err := error_mismatch(node, ctx, uint8Type) + return err + } + + *(*uint8)(vp) = uint8(ret) + return nil +} + +type u16Decoder struct{} + +func (d *u16Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsU64(ctx) + if !ok || ret > math.MaxUint16 { + return error_mismatch(node, ctx, uint16Type) + } + *(*uint16)(vp) = uint16(ret) + return nil +} + +type u32Decoder struct{} + +func (d *u32Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsU64(ctx) + if !ok || ret > math.MaxUint32 { + return error_mismatch(node, ctx, uint32Type) + } + + *(*uint32)(vp) = uint32(ret) + return nil +} + +type u64Decoder struct{} + +func (d *u64Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsU64(ctx) + if !ok { + return error_mismatch(node, ctx, uint64Type) + } + + *(*uint64)(vp) = uint64(ret) + return nil +} + +type f32Decoder struct{} + +func (d *f32Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsF64(ctx) + if !ok || ret > math.MaxFloat32 || ret < -math.MaxFloat32 { + return error_mismatch(node, ctx, float32Type) + } + + *(*float32)(vp) = float32(ret) + return nil +} + +type f64Decoder struct{} + +func (d *f64Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsF64(ctx) + if !ok { + return error_mismatch(node, ctx, float64Type) + } + + *(*float64)(vp) = float64(ret) + return nil +} + +type boolDecoder struct { +} + +func (d *boolDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsBool() + if !ok { + return error_mismatch(node, ctx, boolType) + } + + *(*bool)(vp) = bool(ret) + return nil +} + +type stringDecoder struct { +} + +func (d *stringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + ret, ok := node.AsStr(ctx) + if !ok { + return error_mismatch(node, ctx, stringType) + } + *(*string)(vp) = ret + return nil +} + +type numberDecoder struct { +} + +func (d *numberDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + num, ok := node.AsNumber(ctx) + if !ok { + return error_mismatch(node, ctx, jsonNumberType) + } + *(*json.Number)(vp) = num + return nil +} + +type recuriveDecoder struct { + typ *rt.GoType +} + +func (d *recuriveDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + dec, err := findOrCompile(d.typ) + if err != nil { + return err + } + return dec.FromDom(vp, node, ctx) +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/helper.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/helper.go new file mode 100644 index 00000000..1d76f805 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/helper.go @@ -0,0 +1,101 @@ +package optdec + +import ( + "encoding/json" + "strconv" + + "github.com/bytedance/sonic/internal/native" + "github.com/bytedance/sonic/internal/native/types" +) + + +func SkipNumberFast(json string, start int) (int, error) { + // find the number ending, we pasred in sonic-cpp, it alway valid + pos := start + for pos < len(json) && json[pos] != ']' && json[pos] != '}' && json[pos] != ',' { + if json[pos] >= '0' && json[pos] <= '9' || json[pos] == '.' || json[pos] == '-' || json[pos] == '+' || json[pos] == 'e' || json[pos] == 'E' { + pos += 1 + } else { + return pos, error_syntax(pos, json, "invalid number") + } + } + return pos, nil +} + +func ValidNumberFast(json string) error { + // find the number ending, we pasred in sonic-cpp, it alway valid + pos := 0 + for pos < len(json) && json[pos] != ']' && json[pos] != '}' && json[pos] != ',' { + if json[pos] >= '0' && json[pos] <= '9' || json[pos] == '.' || json[pos] == '-' || json[pos] == '+' || json[pos] == 'e' || json[pos] == 'E' { + pos += 1 + } else { + return error_syntax(pos, json, "invalid number") + } + } + + if pos == 0 { + return error_syntax(pos, json, "invalid number") + } + return nil +} + +func SkipOneFast2(json string, pos *int) (int, error) { + // find the number ending, we pasred in sonic-cpp, it alway valid + start := native.SkipOneFast(&json, pos) + if start < 0 { + return -1, error_syntax(*pos, json, types.ParsingError(-start).Error()) + } + return start, nil +} + +func SkipOneFast(json string, pos int) (string, error) { + // find the number ending, we pasred in sonic-cpp, it alway valid + start := native.SkipOneFast(&json, &pos) + if start < 0 { + // TODO: details error code + return "", error_syntax(pos, json, types.ParsingError(-start).Error()) + } + return json[start:pos], nil +} + +func ParseI64(raw string) (int64, error) { + i64, err := strconv.ParseInt(raw, 10, 64) + if err != nil { + return 0, err + } + return i64, nil +} + +func ParseBool(raw string) (bool, error) { + var b bool + err := json.Unmarshal([]byte(raw), &b) + if err != nil { + return false, err + } + return b, nil +} + +func ParseU64(raw string) (uint64, error) { + u64, err := strconv.ParseUint(raw, 10, 64) + if err != nil { + return 0, err + } + return u64, nil +} + +func ParseF64(raw string) (float64, error) { + f64, err := strconv.ParseFloat(raw, 64) + if err != nil { + return 0, err + } + return f64, nil +} + +func Unquote(raw string) (string, error) { + var u string + err := json.Unmarshal([]byte(raw), &u) + if err != nil { + return "", err + } + return u, nil +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/interface.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/interface.go new file mode 100644 index 00000000..0c063d55 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/interface.go @@ -0,0 +1,169 @@ +package optdec + +import ( + "encoding" + "encoding/json" + "unsafe" + "reflect" + + "github.com/bytedance/sonic/internal/rt" +) + +type efaceDecoder struct { +} + +func (d *efaceDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*interface{})(vp) = interface{}(nil) + return nil + } + + eface := *(*rt.GoEface)(vp) + + // not pointer type, or nil pointer, or *interface{} + if eface.Value == nil || eface.Type.Kind() != reflect.Ptr || rt.PtrElem(eface.Type) == anyType { + ret, err := node.AsEface(ctx) + if err != nil { + return err + } + + *(*interface{})(vp) = ret + return nil + } + + etp := rt.PtrElem(eface.Type) + vp = eface.Value + + /* check the defined pointer type for issue 379 */ + if eface.Type.IsNamed() { + newp := vp + etp = eface.Type + vp = unsafe.Pointer(&newp) + } + + dec, err := findOrCompile(etp) + if err != nil { + return err + } + + return dec.FromDom(vp, node, ctx) +} + +type ifaceDecoder struct { + typ *rt.GoType +} + +func (d *ifaceDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + iface := *(*rt.GoIface)(vp) + if iface.Itab == nil { + return error_type(d.typ) + } + + vt := iface.Itab.Vt + + // not pointer type, or nil pointer, or *interface{} + if vp == nil || vt.Kind() != reflect.Ptr || rt.PtrElem(vt) == anyType { + ret, err := node.AsEface(ctx) + if err != nil { + return err + } + + *(*interface{})(vp) = ret + return nil + } + + + etp := rt.PtrElem(vt) + vp = iface.Value + + /* check the defined pointer type for issue 379 */ + if vt.IsNamed() { + newp := vp + etp = vt + vp = unsafe.Pointer(&newp) + } + + dec, err := findOrCompile(etp) + if err != nil { + return err + } + + return dec.FromDom(vp, node, ctx) +} + +type unmarshalTextDecoder struct { + typ *rt.GoType +} + +func (d *unmarshalTextDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + txt, ok := node.AsStringText(ctx) + if !ok { + return error_mismatch(node, ctx, d.typ.Pack()) + } + + v := *(*interface{})(unsafe.Pointer(&rt.GoEface{ + Type: d.typ, + Value: vp, + })) + + // fast path + if u, ok := v.(encoding.TextUnmarshaler); ok { + return u.UnmarshalText(txt) + } + + // slow path + rv := reflect.ValueOf(v) + if u, ok := rv.Interface().(encoding.TextUnmarshaler); ok { + return u.UnmarshalText(txt) + } + + return error_type(d.typ) +} + +type unmarshalJSONDecoder struct { + typ *rt.GoType + strOpt bool +} + +func (d *unmarshalJSONDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + v := *(*interface{})(unsafe.Pointer(&rt.GoEface{ + Type: d.typ, + Value: vp, + })) + + var input []byte + if d.strOpt && node.IsNull() { + input = []byte("null") + } else if d.strOpt { + s, ok := node.AsStringText(ctx) + if !ok { + return error_mismatch(node, ctx, d.typ.Pack()) + } + input = s + } else { + input = []byte(node.AsRaw(ctx)) + } + + // fast path + if u, ok := v.(json.Unmarshaler); ok { + return u.UnmarshalJSON((input)) + } + + // slow path + rv := reflect.ValueOf(v) + if u, ok := rv.Interface().(json.Unmarshaler); ok { + return u.UnmarshalJSON(input) + } + + return error_type(d.typ) +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/map.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/map.go new file mode 100644 index 00000000..1a2bda8f --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/map.go @@ -0,0 +1,430 @@ +package optdec + +import ( + "encoding" + "encoding/json" + "math" + "reflect" + "unsafe" + + "github.com/bytedance/sonic/internal/rt" +) + +/** Decoder for most common map types: map[string]interface{}, map[string]string **/ + +type mapEfaceDecoder struct { +} + +func (d *mapEfaceDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*map[string]interface{})(vp) = nil + return nil + } + + return node.AsMapEface(ctx, vp) +} + +type mapStringDecoder struct { +} + +func (d *mapStringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*map[string]string)(vp) = nil + return nil + } + + return node.AsMapString(ctx, vp) +} + +/** Decoder for map with string key **/ + +type mapStrKeyDecoder struct { + mapType *rt.GoMapType + elemDec decFunc + assign rt.MapStrAssign + typ reflect.Type +} + +func (d *mapStrKeyDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + obj, ok := node.AsObj() + if !ok { + return error_mismatch(node, ctx, d.mapType.Pack()) + } + + // allocate map + m := *(*unsafe.Pointer)(vp) + if m == nil { + m = rt.Makemap(&d.mapType.GoType, obj.Len()) + } + + var gerr error + next := obj.Children() + for i := 0; i < obj.Len(); i++ { + keyn := NewNode(next) + key, _ := keyn.AsStr(ctx) + + valn := NewNode(PtrOffset(next, 1)) + valp := d.assign(d.mapType, m, key) + err := d.elemDec.FromDom(valp, valn, ctx) + if gerr == nil && err != nil { + gerr = err + } + next = valn.Next() + } + + *(*unsafe.Pointer)(vp) = m + return gerr +} + +/** Decoder for map with int32 or int64 key **/ + +type mapI32KeyDecoder struct { + mapType *rt.GoMapType + elemDec decFunc + assign rt.Map32Assign +} + +func (d *mapI32KeyDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + obj, ok := node.AsObj() + if !ok { + return error_mismatch(node, ctx, d.mapType.Pack()) + } + + // allocate map + m := *(*unsafe.Pointer)(vp) + if m == nil { + m = rt.Makemap(&d.mapType.GoType, obj.Len()) + } + + next := obj.Children() + var gerr error + for i := 0; i < obj.Len(); i++ { + keyn := NewNode(next) + k, ok := keyn.ParseI64(ctx) + if !ok || k > math.MaxInt32 || k < math.MinInt32 { + if gerr == nil { + gerr = error_mismatch(keyn, ctx, d.mapType.Pack()) + } + valn := NewNode(PtrOffset(next, 1)) + next = valn.Next() + continue + } + + key := int32(k) + ku32 := *(*uint32)(unsafe.Pointer(&key)) + valn := NewNode(PtrOffset(next, 1)) + valp := d.assign(d.mapType, m, ku32) + err := d.elemDec.FromDom(valp, valn, ctx) + if gerr == nil && err != nil { + gerr = err + } + + next = valn.Next() + } + + *(*unsafe.Pointer)(vp) = m + return gerr +} + +type mapI64KeyDecoder struct { + mapType *rt.GoMapType + elemDec decFunc + assign rt.Map64Assign +} + +func (d *mapI64KeyDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + obj, ok := node.AsObj() + if !ok { + return error_mismatch(node, ctx, d.mapType.Pack()) + } + + // allocate map + m := *(*unsafe.Pointer)(vp) + if m == nil { + m = rt.Makemap(&d.mapType.GoType, obj.Len()) + } + + var gerr error + next := obj.Children() + for i := 0; i < obj.Len(); i++ { + keyn := NewNode(next) + key, ok := keyn.ParseI64(ctx) + + if !ok { + if gerr == nil { + gerr = error_mismatch(keyn, ctx, d.mapType.Pack()) + } + valn := NewNode(PtrOffset(next, 1)) + next = valn.Next() + continue + } + + ku64 := *(*uint64)(unsafe.Pointer(&key)) + valn := NewNode(PtrOffset(next, 1)) + valp := d.assign(d.mapType, m, ku64) + err := d.elemDec.FromDom(valp, valn, ctx) + if gerr == nil && err != nil { + gerr = err + } + next = valn.Next() + } + + *(*unsafe.Pointer)(vp) = m + return gerr +} + +/** Decoder for map with unt32 or uint64 key **/ + +type mapU32KeyDecoder struct { + mapType *rt.GoMapType + elemDec decFunc + assign rt.Map32Assign +} + +func (d *mapU32KeyDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + obj, ok := node.AsObj() + if !ok { + return error_mismatch(node, ctx, d.mapType.Pack()) + } + + // allocate map + m := *(*unsafe.Pointer)(vp) + if m == nil { + m = rt.Makemap(&d.mapType.GoType, obj.Len()) + } + + var gerr error + next := obj.Children() + for i := 0; i < obj.Len(); i++ { + keyn := NewNode(next) + k, ok := keyn.ParseU64(ctx) + if !ok || k > math.MaxUint32 { + if gerr == nil { + gerr = error_mismatch(keyn, ctx, d.mapType.Pack()) + } + valn := NewNode(PtrOffset(next, 1)) + next = valn.Next() + continue + } + + key := uint32(k) + valn := NewNode(PtrOffset(next, 1)) + valp := d.assign(d.mapType, m, key) + err := d.elemDec.FromDom(valp, valn, ctx) + if gerr == nil && err != nil { + gerr = err + } + next = valn.Next() + } + + *(*unsafe.Pointer)(vp) = m + return gerr +} + +type mapU64KeyDecoder struct { + mapType *rt.GoMapType + elemDec decFunc + assign rt.Map64Assign +} + +func (d *mapU64KeyDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + obj, ok := node.AsObj() + if !ok { + return error_mismatch(node, ctx, d.mapType.Pack()) + } + // allocate map + m := *(*unsafe.Pointer)(vp) + if m == nil { + m = rt.Makemap(&d.mapType.GoType, obj.Len()) + } + + var gerr error + next := obj.Children() + for i := 0; i < obj.Len(); i++ { + keyn := NewNode(next) + key, ok := keyn.ParseU64(ctx) + if !ok { + if gerr == nil { + gerr = error_mismatch(keyn, ctx, d.mapType.Pack()) + } + valn := NewNode(PtrOffset(next, 1)) + next = valn.Next() + continue + } + + valn := NewNode(PtrOffset(next, 1)) + valp := d.assign(d.mapType, m, key) + err := d.elemDec.FromDom(valp, valn, ctx) + if gerr == nil && err != nil { + gerr = err + } + next = valn.Next() + } + + *(*unsafe.Pointer)(vp) = m + return gerr +} + +/** Decoder for generic cases */ + +type decKey func(dec *mapDecoder, raw string, ctx *context) (interface{}, error) + +func decodeKeyU8(dec *mapDecoder, raw string, ctx *context) (interface{}, error) { + key, err := Unquote(raw) + if err != nil { + return nil, err + } + ret, err := ParseU64(key) + if err != nil { + return nil, err + } + if ret > math.MaxUint8 { + return nil, error_value(key, dec.mapType.Key.Pack()) + } + return uint8(ret), nil +} + +func decodeKeyU16(dec *mapDecoder, raw string, ctx *context) (interface{}, error) { + key, err := Unquote(raw) + if err != nil { + return nil, err + } + ret, err := ParseU64(key) + if err != nil { + return nil, err + } + if ret > math.MaxUint16 { + return nil, error_value(key, dec.mapType.Key.Pack()) + } + return uint16(ret), nil +} + +func decodeKeyI8(dec *mapDecoder, raw string, ctx *context) (interface{}, error) { + key, err := Unquote(raw) + if err != nil { + return nil, err + } + ret, err := ParseI64(key) + if err != nil { + return nil, err + } + if ret > math.MaxInt8 || ret < math.MinInt8 { + return nil, error_value(key, dec.mapType.Key.Pack()) + } + return int8(ret), nil +} + +func decodeKeyI16(dec *mapDecoder, raw string, ctx *context) (interface{}, error) { + key, err := Unquote(raw) + if err != nil { + return nil, err + } + ret, err := ParseI64(key) + if err != nil { + return nil, err + } + if ret > math.MaxInt16 || ret < math.MinInt16 { + return nil, error_value(key, dec.mapType.Key.Pack()) + } + return int16(ret), nil +} + +func decodeKeyJSONUnmarshaler(dec *mapDecoder, raw string, _ *context) (interface{}, error) { + ret := reflect.New(dec.mapType.Key.Pack()).Interface() + err := ret.(json.Unmarshaler).UnmarshalJSON([]byte(raw)) + if err != nil { + return nil, err + } + return ret, nil +} + +func decodeKeyTextUnmarshaler(dec *mapDecoder, raw string, ctx *context) (interface{}, error) { + key, err := Unquote(raw) + if err != nil { + return nil, err + } + ret := reflect.New(dec.mapType.Key.Pack()).Interface() + err = ret.(encoding.TextUnmarshaler).UnmarshalText([]byte(key)) + if err != nil { + return nil, err + } + return ret, nil +} + +type mapDecoder struct { + mapType *rt.GoMapType + keyDec decKey + elemDec decFunc +} + +func (d *mapDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + obj, ok := node.AsObj() + if !ok { + return error_mismatch(node, ctx, d.mapType.Pack()) + } + + // allocate map + m := *(*unsafe.Pointer)(vp) + if m == nil { + m = rt.Makemap(&d.mapType.GoType, obj.Len()) + } + + next := obj.Children() + var gerr error + for i := 0; i < obj.Len(); i++ { + keyn := NewNode(next) + raw := keyn.AsRaw(ctx) + key, err := d.keyDec(d, raw, ctx) + if err != nil { + if gerr == nil { + gerr = error_mismatch(keyn, ctx, d.mapType.Pack()) + } + valn := NewNode(PtrOffset(next, 1)) + next = valn.Next() + continue + } + + valn := NewNode(PtrOffset(next, 1)) + keyp := rt.UnpackEface(key).Value + valp := rt.Mapassign(d.mapType, m, keyp) + err = d.elemDec.FromDom(valp, valn, ctx) + if gerr == nil && err != nil { + gerr = err + } + + next = valn.Next() + } + + *(*unsafe.Pointer)(vp) = m + return gerr +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/native.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/native.go new file mode 100644 index 00000000..29a0136a --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/native.go @@ -0,0 +1,269 @@ +package optdec + +import ( + "fmt" + "reflect" + "unsafe" + + "sync" + + "github.com/bytedance/sonic/internal/native" + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/utf8" +) + + +type ErrorCode int + +const ( + SONIC_OK = 0; + SONIC_CONTROL_CHAR = 1; + SONIC_INVALID_ESCAPED = 2; + SONIC_INVALID_NUM = 3; + SONIC_FLOAT_INF = 4; + SONIC_EOF = 5; + SONIC_INVALID_CHAR = 6; + SONIC_EXPECT_KEY = 7; + SONIC_EXPECT_COLON = 8; + SONIC_EXPECT_OBJ_COMMA_OR_END = 9; + SONIC_EXPECT_ARR_COMMA_OR_END = 10; + SONIC_VISIT_FAILED = 11; + SONIC_INVALID_ESCAPED_UTF = 12; + SONIC_INVALID_LITERAL = 13; + SONIC_STACK_OVERFLOW = 14; +) + +var ParsingErrors = []string{ + SONIC_OK : "ok", + SONIC_CONTROL_CHAR : "control chars in string", + SONIC_INVALID_ESCAPED : "invalid escaped chars in string", + SONIC_INVALID_NUM : "invalid number", + SONIC_FLOAT_INF : "float infinity", + SONIC_EOF : "eof", + SONIC_INVALID_CHAR : "invalid chars", + SONIC_EXPECT_KEY : "expect a json key", + SONIC_EXPECT_COLON : "expect a `:`", + SONIC_EXPECT_OBJ_COMMA_OR_END : "expect a `,` or `}`", + SONIC_EXPECT_ARR_COMMA_OR_END : "expect a `,` or `]`", + SONIC_VISIT_FAILED : "failed in json visitor", + SONIC_INVALID_ESCAPED_UTF : "invalid escaped unicodes", + SONIC_INVALID_LITERAL : "invalid literal(true/false/null)", + SONIC_STACK_OVERFLOW : "json is exceeded max depth 4096, cause stack overflow", +} + +func (code ErrorCode) Error() string { + return ParsingErrors[code] +} + +type node struct { + typ uint64 + val uint64 +} + +// should consitent with native/parser.c +type _nospaceBlock struct { + _ [8]byte + _ [8]byte +} + +// should consitent with native/parser.c +type nodeBuf struct { + ncur uintptr + parent int64 + depth uint64 + nstart uintptr + nend uintptr + stat jsonStat +} + +func (self *nodeBuf) init(nodes []node) { + self.ncur = uintptr(unsafe.Pointer(&nodes[0])) + self.nstart = self.ncur + self.nend = self.ncur + uintptr(cap(nodes)) * unsafe.Sizeof(node{}) + self.parent = -1 +} + +// should consitent with native/parser.c +type Parser struct { + Json string + padded []byte + nodes []node + dbuf []byte + backup []node + + options uint64 + // JSON cursor + start uintptr + cur uintptr + end uintptr + _nbk _nospaceBlock + + // node buffer cursor + nbuf nodeBuf + Utf8Inv bool + isEface bool +} + +// only when parse non-empty object/array are needed. +type jsonStat struct { + object uint32 + array uint32 + str uint32 + number uint32 + array_elems uint32 + object_keys uint32 + max_depth uint32 +} + + +var ( + defaultJsonPaddedCap uintptr = 1 << 20 // 1 Mb + defaultNodesCap uintptr = (1 << 20) / unsafe.Sizeof(node{}) // 1 Mb +) + +var parsePool sync.Pool = sync.Pool { + New: func () interface{} { + return &Parser{ + options: 0, + padded: make([]byte, 0, defaultJsonPaddedCap), + nodes: make([]node, defaultNodesCap, defaultNodesCap), + dbuf: make([]byte, types.MaxDigitNums, types.MaxDigitNums), + } + }, +} + +var padding string = "x\"x\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + +func newParser(data string, pos int, opt uint64) *Parser { + p := parsePool.Get().(*Parser) + + /* validate json if needed */ + if (opt & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(data){ + dbuf := utf8.CorrectWith(nil, rt.Str2Mem(data[pos:]), "\ufffd") + dbuf = append(dbuf, padding...) + p.Json = rt.Mem2Str(dbuf[:len(dbuf) - len(padding)]) + p.Utf8Inv = true + p.start = uintptr((*rt.GoString)(unsafe.Pointer(&p.Json)).Ptr) + } else { + p.Json = data + // TODO: prevent too large JSON + p.padded = append(p.padded, data[pos:]...) + p.padded = append(p.padded, padding...) + p.start = uintptr((*rt.GoSlice)(unsafe.Pointer(&p.padded)).Ptr) + } + + p.cur = p.start + p.end = p.cur + uintptr(len(p.Json)) + p.options = opt + p.nbuf.init(p.nodes) + return p +} + + +func (p *Parser) Pos() int { + return int(p.cur - p.start) +} + +func (p *Parser) JsonBytes() []byte { + if p.Utf8Inv { + return (rt.Str2Mem(p.Json)) + } else { + return p.padded + } +} + +var nodeType = rt.UnpackType(reflect.TypeOf(node{})) + +//go:inline +func calMaxNodeCap(jsonSize int) int { + return jsonSize / 2 + 2 +} + +func (p *Parser) parse() ErrorCode { + // when decode into struct, we should decode number as possible + old := p.options + if !p.isEface { + p.options &^= 1 << _F_use_number + } + + // fast path with limited node buffer + err := ErrorCode(native.ParseWithPadding(unsafe.Pointer(p))) + if err != SONIC_VISIT_FAILED { + p.options = old + return err + } + + // check OoB here + offset := p.nbuf.ncur - p.nbuf.nstart + curLen := offset / unsafe.Sizeof(node{}) + if curLen != uintptr(len(p.nodes)) { + panic(fmt.Sprintf("current len: %d, real len: %d cap: %d", curLen, len(p.nodes), cap(p.nodes))) + } + + // node buf is not enough, continue parse + // the maxCap is always meet all valid JSON + maxCap := calMaxNodeCap(len(p.Json)) + slice := rt.GoSlice{ + Ptr: rt.Mallocgc(uintptr(maxCap) * nodeType.Size, nodeType, false), + Len: maxCap, + Cap: maxCap, + } + rt.Memmove(unsafe.Pointer(slice.Ptr), unsafe.Pointer(&p.nodes[0]), offset) + p.backup = p.nodes + p.nodes = *(*[]node)(unsafe.Pointer(&slice)) + + // update node cursor + p.nbuf.nstart = uintptr(unsafe.Pointer(&p.nodes[0])) + p.nbuf.nend = p.nbuf.nstart + uintptr(cap(p.nodes)) * unsafe.Sizeof(node{}) + p.nbuf.ncur = p.nbuf.nstart + offset + + // continue parse json + err = ErrorCode(native.ParseWithPadding(unsafe.Pointer(p))) + p.options = old + return err +} + +func (p *Parser) reset() { + p.options = 0 + p.padded = p.padded[:0] + // nodes is too large here, we will not reset it and use small backup nodes buffer + if p.backup != nil { + p.nodes = p.backup + p.backup = nil + } + p.start = 0 + p.cur = 0 + p.end = 0 + p.Json = "" + p.nbuf = nodeBuf{} + p._nbk = _nospaceBlock{} + p.Utf8Inv = false + p.isEface = false +} + +func (p *Parser) free() { + p.reset() + parsePool.Put(p) +} + +//go:noinline +func (p *Parser) fixError(code ErrorCode) error { + if code == SONIC_OK { + return nil + } + + if p.Pos() == 0 { + code = SONIC_EOF; + } + + pos := p.Pos() - 1 + return error_syntax(pos, p.Json, ParsingErrors[code]) +} + +func Parse(data string, opt uint64) error { + p := newParser(data, 0, opt) + err := p.parse() + p.free() + return err +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/node.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/node.go new file mode 100644 index 00000000..8b49ebb3 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/node.go @@ -0,0 +1,1279 @@ +package optdec + +import ( + "encoding/json" + "math" + "unsafe" + + "github.com/bytedance/sonic/internal/envs" + "github.com/bytedance/sonic/internal/rt" +) + +type Context struct { + Parser *Parser + efacePool *efacePool + Stack bounedStack + Utf8Inv bool +} + +func (ctx *Context) Options() uint64 { + return ctx.Parser.options +} + +/************************* Stack and Pool Helper *******************/ + +type parentStat struct { + con unsafe.Pointer + remain uint64 +} +type bounedStack struct { + stack []parentStat + index int +} + +func newStack(size int) bounedStack { + return bounedStack{ + stack: make([]parentStat, size + 2), + index: 0, + } +} + +//go:nosplit +func (s *bounedStack) Pop() (unsafe.Pointer, int, bool){ + s.index-- + con := s.stack[s.index].con + remain := s.stack[s.index].remain &^ (uint64(1) << 63) + isObj := (s.stack[s.index].remain & (uint64(1) << 63)) != 0 + s.stack[s.index].con = nil + s.stack[s.index].remain = 0 + return con, int(remain), isObj +} + +//go:nosplit +func (s *bounedStack) Push(p unsafe.Pointer, remain int, isObj bool) { + s.stack[s.index].con = p + s.stack[s.index].remain = uint64(remain) + if isObj { + s.stack[s.index].remain |= (uint64(1) << 63) + } + s.index++ +} + +type efacePool struct{ + t64 rt.T64Pool + tslice rt.TslicePool + tstring rt.TstringPool + efaceSlice rt.SlicePool +} + +func newEfacePool(stat *jsonStat, useNumber bool) *efacePool { + strs := int(stat.str) + nums := 0 + if useNumber { + strs += int(stat.number) + } else { + nums = int(stat.number) + } + + return &efacePool{ + t64: rt.NewT64Pool(nums), + tslice: rt.NewTslicePool(int(stat.array)), + tstring: rt.NewTstringPool(strs), + efaceSlice: rt.NewPool(rt.AnyType, int(stat.array_elems)), + } +} + +func (self *efacePool) GetMap(hint int) unsafe.Pointer { + m := make(map[string]interface{}, hint) + return *(*unsafe.Pointer)(unsafe.Pointer(&m)) +} + +func (self *efacePool) GetSlice(hint int) unsafe.Pointer { + return unsafe.Pointer(self.efaceSlice.GetSlice(hint)) +} + +func (self *efacePool) ConvTSlice(val rt.GoSlice, typ *rt.GoType, dst unsafe.Pointer) { + self.tslice.Conv(val, typ, (*interface{})(dst)) +} + +func (self *efacePool) ConvF64(val float64, dst unsafe.Pointer) { + self.t64.Conv(castU64(val), rt.Float64Type, (*interface{})(dst)) +} + +func (self *efacePool) ConvTstring(val string, dst unsafe.Pointer) { + self.tstring.Conv(val, (*interface{})(dst)) +} + +func (self *efacePool) ConvTnum(val json.Number, dst unsafe.Pointer) { + self.tstring.ConvNum(val, (*interface{})(dst)) +} + +/********************************************************/ + +func canUseFastMap( opts uint64, root *rt.GoType) bool { + return envs.UseFastMap && (opts & (1 << _F_copy_string)) == 0 && (opts & (1 << _F_use_int64)) == 0 && (root == rt.AnyType || root == rt.MapEfaceType || root == rt.SliceEfaceType) +} + +func NewContext(json string, pos int, opts uint64, root *rt.GoType) (Context, error) { + ctx := Context{ + Parser: newParser(json, pos, opts), + } + if root == rt.AnyType || root == rt.MapEfaceType || root == rt.SliceEfaceType { + ctx.Parser.isEface = true + } + + ecode := ctx.Parser.parse() + + if ecode != 0 { + return ctx, ctx.Parser.fixError(ecode) + } + + useNumber := (opts & (1 << _F_use_number )) != 0 + if canUseFastMap(opts, root) { + ctx.efacePool = newEfacePool(&ctx.Parser.nbuf.stat, useNumber) + ctx.Stack = newStack(int(ctx.Parser.nbuf.stat.max_depth)) + } + + return ctx, nil +} + +func (ctx *Context) Delete() { + ctx.Parser.free() + ctx.Parser = nil +} + +type Node struct { + cptr uintptr +} + +func NewNode(cptr uintptr) Node { + return Node{cptr: cptr} +} + +type Dom struct { + cdom uintptr +} + +func (ctx *Context) Root() Node { + root := (uintptr)(((*rt.GoSlice)(unsafe.Pointer(&ctx.Parser.nodes))).Ptr) + return Node{cptr: root} +} + +type Array struct { + cptr uintptr +} + +type Object struct { + cptr uintptr +} + +func (obj Object) Len() int { + cobj := ptrCast(obj.cptr) + return int(uint64(cobj.val) & ConLenMask) +} + +func (arr Array) Len() int { + carr := ptrCast(arr.cptr) + return int(uint64(carr.val) & ConLenMask) +} + +// / Helper functions to eliminate CGO calls +func (val Node) Type() uint8 { + ctype := ptrCast(val.cptr) + return uint8(ctype.typ & TypeMask) +} + +func (val Node) Next() uintptr { + if val.Type() != KObject && val.Type() != KArray { + return PtrOffset(val.cptr, 1) + } + cobj := ptrCast(val.cptr) + offset := int64(uint64(cobj.val) >> ConLenBits) + return PtrOffset(val.cptr, offset) +} + +func (val *Node) next() { + *val = NewNode(val.Next()) +} + +type NodeIter struct { + next uintptr +} + +func NewNodeIter(node Node) NodeIter { + return NodeIter{next: node.cptr} +} + +func (iter *NodeIter) Next() Node { + ret := NewNode(iter.next) + iter.next = PtrOffset(iter.next, 1) + return ret +} + + +func (iter *NodeIter) Peek() Node { + return NewNode(iter.next) +} + +func (val Node) U64() uint64 { + cnum := ptrCast(val.cptr) + return *(*uint64)((unsafe.Pointer)(&(cnum.val))) +} + +func (val Node) I64() int64 { + cnum := ptrCast(val.cptr) + return *(*int64)((unsafe.Pointer)(&(cnum.val))) +} + +func (val Node) IsNull() bool { + return val.Type() == KNull +} + +func (val Node) IsNumber() bool { + return val.Type() & KNumber != 0 +} + +func (val Node) F64() float64 { + cnum := ptrCast(val.cptr) + return *(*float64)((unsafe.Pointer)(&(cnum.val))) +} + +func (val Node) Bool() bool { + return val.Type() == KTrue +} + +func (self Node) AsU64(ctx *Context) (uint64, bool) { + if self.Type() == KUint { + return self.U64(), true + } else if self.Type() == KRawNumber { + num, err := ParseU64(self.Raw(ctx)) + if err != nil { + return 0, false + } + return num, true + } else { + return 0, false + } +} + +func (val *Node) AsObj() (Object, bool) { + var ret Object + if val.Type() != KObject { + return ret, false + } + return Object{ + cptr: val.cptr, + }, true +} + +func (val Node) Obj() Object { + return Object{cptr: val.cptr} +} + +func (val Node) Arr() Array { + return Array{cptr: val.cptr} +} + +func (val *Node) AsArr() (Array, bool) { + var ret Array + if val.Type() != KArray { + return ret, false + } + return Array{ + cptr: val.cptr, + }, true +} + +func (self Node) AsI64(ctx *Context) (int64, bool) { + typ := self.Type() + if typ == KUint && self.U64() <= math.MaxInt64 { + return int64(self.U64()), true + } else if typ == KSint { + return self.I64(), true + } else if typ == KRawNumber { + val, err := self.Number(ctx).Int64() + if err != nil { + return 0, false + } + return val, true + } else { + return 0, false + } +} + +/********* Parse Node String into Value ***************/ + +func (val Node) ParseI64(ctx *Context) (int64, bool) { + s, ok := val.AsStrRef(ctx) + if !ok { + return 0, false + } + + if s == "null" { + return 0, true + } + + i, err := ParseI64(s) + if err != nil { + return 0, false + } + return i, true +} + +func (val Node) ParseBool(ctx *Context) (bool, bool) { + s, ok := val.AsStrRef(ctx) + if !ok { + return false, false + } + + if s == "null" { + return false, true + } + + b, err := ParseBool(s) + if err != nil { + return false, false + } + return b, true +} + +func (val Node) ParseU64(ctx *Context) (uint64, bool) { + s, ok := val.AsStrRef(ctx) + if !ok { + return 0, false + } + + if s == "null" { + return 0, true + } + + i, err := ParseU64(s) + if err != nil { + return 0, false + } + return i, true +} + +func (val Node) ParseF64(ctx *Context) (float64, bool) { + s, ok := val.AsStrRef(ctx) + if !ok { + return 0, false + } + + if s == "null" { + return 0, true + } + + i, err := ParseF64(s) + if err != nil { + return 0, false + } + return i, true +} + +func (val Node) ParseString(ctx *Context) (string, bool) { + // shoud not use AsStrRef + s, ok := val.AsStr(ctx) + if !ok { + return "", false + } + + if s == "null" { + return "", true + } + + s, err := Unquote(s) + if err != nil { + return "", false + } + return s, true +} + + +func (val Node) ParseNumber(ctx *Context) (json.Number, bool) { + // shoud not use AsStrRef + s, ok := val.AsStr(ctx) + if !ok { + return json.Number(""), false + } + + if s == "null" { + return json.Number(""), true + } + + end, err := SkipNumberFast(s, 0) + // has error or trailing chars + if err != nil || end != len(s) { + return json.Number(""), false + } + return json.Number(s), true +} + + + +func (val Node) AsF64(ctx *Context) (float64, bool) { + switch val.Type() { + case KUint: return float64(val.U64()), true + case KSint: return float64(val.I64()), true + case KReal: return float64(val.F64()), true + case KRawNumber: f, err := val.Number(ctx).Float64(); return f, err == nil + default: return 0, false + } +} + +func (val Node) AsBool() (bool, bool) { + switch val.Type() { + case KTrue: return true, true + case KFalse: return false, true + default: return false, false + } +} + +func (val Node) AsStr(ctx *Context) (string, bool) { + switch val.Type() { + case KStringCommon: + s := val.StringRef(ctx) + if (ctx.Options() & (1 << _F_copy_string) == 0) { + return s, true + } + return string(rt.Str2Mem(s)), true + case KStringEscaped: + return val.StringCopyEsc(ctx), true + default: return "", false + } +} + +func (val Node) AsStrRef(ctx *Context) (string, bool) { + switch val.Type() { + case KStringEscaped: + node := ptrCast(val.cptr) + offset := val.Position() + len := int(node.val) + return rt.Mem2Str(ctx.Parser.JsonBytes()[offset : offset + len]), true + case KStringCommon: + return val.StringRef(ctx), true + default: + return "", false + } +} + +func (val Node) AsBytesRef(ctx *Context) ([]byte, bool) { + switch val.Type() { + case KStringEscaped: + node := ptrCast(val.cptr) + offset := val.Position() + len := int(node.val) + return ctx.Parser.JsonBytes()[offset : offset + len], true + case KStringCommon: + return rt.Str2Mem(val.StringRef(ctx)), true + default: + return nil, false + } +} + +func (val Node) AsStringText(ctx *Context) ([]byte, bool) { + if !val.IsStr() { + return nil, false + } + + // clone to new bytes + s, b := val.AsStrRef(ctx) + return []byte(s), b +} + +func (val Node) IsStr() bool { + return (val.Type() == KStringCommon) || (val.Type() == KStringEscaped) +} + +func (val Node) IsRawNumber() bool { + return val.Type() == KRawNumber +} + +func (val Node) Number(ctx *Context) json.Number { + return json.Number(val.Raw(ctx)) +} + +func (val Node) Raw(ctx *Context) string { + node := ptrCast(val.cptr) + len := int(node.val) + offset := val.Position() + return ctx.Parser.Json[offset:int(offset+len)] +} + +func (val Node) Position() int { + node := ptrCast(val.cptr) + return int(node.typ >> PosBits) +} + +func (val Node) AsNumber(ctx *Context) (json.Number, bool) { + // parse JSON string as number + if val.IsStr() { + s, _ := val.AsStr(ctx) + err := ValidNumberFast(s) + if err != nil { + return "", false + } + + return json.Number(s), true + } + + return val.NonstrAsNumber(ctx) +} + +func (val Node) NonstrAsNumber(ctx *Context) (json.Number, bool) { + // deal with raw number + if val.IsRawNumber() { + return val.Number(ctx), true + } + + // deal with parse number + if !val.IsNumber() { + return json.Number(""), false + } + + start := val.Position() + end, err := SkipNumberFast(ctx.Parser.Json, start) + if err != nil { + return "", false + } + return json.Number(ctx.Parser.Json[start:end]), true +} + +func (val Node) AsRaw(ctx *Context) string { + // fast path for unescaped strings + switch val.Type() { + case KNull: + return "null" + case KTrue: + return "true" + case KFalse: + return "false" + case KStringCommon: + node := ptrCast(val.cptr) + len := int(node.val) + offset := val.Position() + // add start abd end quote + ref := rt.Str2Mem(ctx.Parser.Json)[offset-1 : offset+len+1] + return rt.Mem2Str(ref) + case KRawNumber: fallthrough + case KRaw: return val.Raw(ctx) + case KStringEscaped: + raw, _ := SkipOneFast(ctx.Parser.Json, val.Position() - 1) + return raw + default: + raw, err := SkipOneFast(ctx.Parser.Json, val.Position()) + if err != nil { + break + } + return raw + } + panic("should always be valid json here") +} + +// reference from the input JSON as possible +func (val Node) StringRef(ctx *Context) string { + return val.Raw(ctx) +} + +//go:nocheckptr +func ptrCast(p uintptr) *node { + return (*node)(unsafe.Pointer(p)) +} + +func (val Node) StringCopyEsc(ctx *Context) string { + // check whether there are in padded + node := ptrCast(val.cptr) + len := int(node.val) + offset := val.Position() + return string(ctx.Parser.JsonBytes()[offset : offset + len]) +} + +func (val Node) Object() Object { + return Object{cptr: val.cptr} +} + +func (val Node) Array() Array { + return Array{cptr: val.cptr} +} + +func (val *Array) Children() uintptr { + return PtrOffset(val.cptr, 1) +} + +func (val *Object) Children() uintptr { + return PtrOffset(val.cptr, 1) +} + +func (val *Node) Equal(ctx *Context, lhs string) bool { + // check whether escaped + cstr := ptrCast(val.cptr) + offset := int(val.Position()) + len := int(cstr.val) + return lhs == ctx.Parser.Json[offset:offset+len] +} + +func (node *Node) AsMapEface(ctx *Context, vp unsafe.Pointer) error { + if node.IsNull() { + return nil + } + + obj, ok := node.AsObj() + if !ok { + return newUnmatched(node.Position(), rt.MapEfaceType) + } + + var err, gerr error + size := obj.Len() + + var m map[string]interface{} + if *(*unsafe.Pointer)(vp) == nil { + if ctx.efacePool != nil { + p := ctx.efacePool.GetMap(size) + m = *(*map[string]interface{})(unsafe.Pointer(&p)) + } else { + m = make(map[string]interface{}, size) + } + } else { + m = *(*map[string]interface{})(vp) + } + + next := obj.Children() + for i := 0; i < size; i++ { + knode := NewNode(next) + key, _ := knode.AsStr(ctx) + val := NewNode(PtrOffset(next, 1)) + m[key], err = val.AsEface(ctx) + next = val.cptr + if gerr == nil && err != nil { + gerr = err + } + } + + *(*map[string]interface{})(vp) = m + return gerr +} + +func (node *Node) AsMapString(ctx *Context, vp unsafe.Pointer) error { + obj, ok := node.AsObj() + if !ok { + return newUnmatched(node.Position(), rt.MapStringType) + } + + size := obj.Len() + + var m map[string]string + if *(*unsafe.Pointer)(vp) == nil { + m = make(map[string]string, size) + } else { + m = *(*map[string]string)(vp) + } + + var gerr error + next := obj.Children() + for i := 0; i < size; i++ { + knode := NewNode(next) + key, _ := knode.AsStr(ctx) + val := NewNode(PtrOffset(next, 1)) + m[key], ok = val.AsStr(ctx) + if !ok { + if gerr == nil { + gerr = newUnmatched(val.Position(), rt.StringType) + } + next = val.Next() + } else { + next = PtrOffset(val.cptr, 1) + } + } + + *(*map[string]string)(vp) = m + return gerr +} + +func (node *Node) AsSliceEface(ctx *Context, vp unsafe.Pointer) error { + arr, ok := node.AsArr() + if !ok { + return newUnmatched(node.Position(), rt.SliceEfaceType) + } + + size := arr.Len() + var s []interface{} + if size != 0 && ctx.efacePool != nil { + slice := rt.GoSlice { + Ptr: ctx.efacePool.GetSlice(size), + Len: size, + Cap: size, + } + *(*rt.GoSlice)(unsafe.Pointer(&s)) = slice + } else { + s = *(*[]interface{})((unsafe.Pointer)(rt.MakeSlice(vp, rt.AnyType, size))) + } + + *node = NewNode(arr.Children()) + + var err, gerr error + for i := 0; i < size; i++ { + s[i], err = node.AsEface(ctx) + if gerr == nil && err != nil { + gerr = err + } + } + + *(*[]interface{})(vp) = s + return nil +} + +func (node *Node) AsSliceI32(ctx *Context, vp unsafe.Pointer) error { + arr, ok := node.AsArr() + if !ok { + return newUnmatched(node.Position(), rt.SliceI32Type) + } + + size := arr.Len() + s := *(*[]int32)((unsafe.Pointer)(rt.MakeSlice(vp, rt.Int32Type, size))) + next := arr.Children() + + var gerr error + for i := 0; i < size; i++ { + val := NewNode(next) + ret, ok := val.AsI64(ctx) + if !ok || ret > math.MaxInt32 || ret < math.MinInt32 { + if gerr == nil { + gerr = newUnmatched(val.Position(), rt.Int32Type) + } + next = val.Next() + } else { + s[i] = int32(ret) + next = PtrOffset(val.cptr, 1) + } + } + + *(*[]int32)(vp) = s + return gerr +} + +func (node *Node) AsSliceI64(ctx *Context, vp unsafe.Pointer) error { + arr, ok := node.AsArr() + if !ok { + return newUnmatched(node.Position(), rt.SliceI64Type) + } + + size := arr.Len() + s := *(*[]int64)((unsafe.Pointer)(rt.MakeSlice(vp, rt.Int64Type, size))) + next := arr.Children() + + var gerr error + for i := 0; i < size; i++ { + val := NewNode(next) + + ret, ok := val.AsI64(ctx) + if !ok { + if gerr == nil { + gerr = newUnmatched(val.Position(), rt.Int64Type) + } + next = val.Next() + } else { + s[i] = ret + next = PtrOffset(val.cptr, 1) + } + } + + *(*[]int64)(vp) = s + return gerr +} + +func (node *Node) AsSliceU32(ctx *Context, vp unsafe.Pointer) error { + arr, ok := node.AsArr() + if !ok { + return newUnmatched(node.Position(), rt.SliceU32Type) + } + + size := arr.Len() + next := arr.Children() + s := *(*[]uint32)((unsafe.Pointer)(rt.MakeSlice(vp, rt.Uint32Type, size))) + + var gerr error + for i := 0; i < size; i++ { + val := NewNode(next) + ret, ok := val.AsU64(ctx) + if !ok || ret > math.MaxUint32 { + if gerr == nil { + gerr = newUnmatched(val.Position(), rt.Uint32Type) + } + next = val.Next() + } else { + s[i] = uint32(ret) + next = PtrOffset(val.cptr, 1) + } + } + + *(*[]uint32)(vp) = s + return gerr +} + +func (node *Node) AsSliceU64(ctx *Context, vp unsafe.Pointer) error { + arr, ok := node.AsArr() + if !ok { + return newUnmatched(node.Position(), rt.SliceU64Type) + } + + size := arr.Len() + next := arr.Children() + + s := *(*[]uint64)((unsafe.Pointer)(rt.MakeSlice(vp, rt.Uint64Type, size))) + var gerr error + for i := 0; i < size; i++ { + val := NewNode(next) + ret, ok := val.AsU64(ctx) + if !ok { + if gerr == nil { + gerr = newUnmatched(val.Position(), rt.Uint64Type) + } + next = val.Next() + } else { + s[i] = ret + next = PtrOffset(val.cptr, 1) + } + } + + *(*[]uint64)(vp) = s + return gerr +} + +func (node *Node) AsSliceString(ctx *Context, vp unsafe.Pointer) error { + arr, ok := node.AsArr() + if !ok { + return newUnmatched(node.Position(), rt.SliceStringType) + } + + size := arr.Len() + next := arr.Children() + s := *(*[]string)((unsafe.Pointer)(rt.MakeSlice(vp, rt.StringType, size))) + + var gerr error + for i := 0; i < size; i++ { + val := NewNode(next) + ret, ok := val.AsStr(ctx) + if !ok { + if gerr == nil { + gerr = newUnmatched(val.Position(), rt.StringType) + } + next = val.Next() + } else { + s[i] = ret + next = PtrOffset(val.cptr, 1) + } + } + + *(*[]string)(vp) = s + return gerr +} + +func (node *Node) AsSliceBytes(ctx *Context) ([]byte, error) { + b, ok := node.AsBytesRef(ctx) + if !ok { + return nil, newUnmatched(node.Position(), rt.BytesType) + } + + b64, err := rt.DecodeBase64(b) + if err != nil { + return nil, newUnmatched(node.Position(), rt.BytesType) + } + return b64, nil +} + +// AsEface will always ok, because we have parse in native. +func (node *Node) AsEface(ctx *Context) (interface{}, error) { + if ctx.efacePool != nil { + iter := NewNodeIter(*node) + v := AsEfaceFast(&iter, ctx) + *node = iter.Peek() + return v, nil + } else { + return node.AsEfaceFallback(ctx) + } +} + +func parseSingleNode(node Node, ctx *Context) interface{} { + var v interface{} + switch node.Type() { + case KObject: v = map[string]interface{}{} + case KArray: v = []interface{}{} + case KStringCommon: v = node.StringRef(ctx) + case KStringEscaped: v = node.StringCopyEsc(ctx) + case KTrue: v = true + case KFalse: v = false + case KNull: v = nil + case KUint: v = float64(node.U64()) + case KSint: v = float64(node.I64()) + case KReal: v = float64(node.F64()) + case KRawNumber: v = node.Number(ctx) + default: panic("unreachable for as eface") + } + return v +} + +func castU64(val float64) uint64 { + return *((*uint64)(unsafe.Pointer((&val)))) +} + +func AsEfaceFast(iter *NodeIter, ctx *Context) interface{} { + var mp, sp, parent unsafe.Pointer // current container pointer + var node Node + var size int + var isObj bool + var slice rt.GoSlice + var val unsafe.Pointer + var vt **rt.GoType + var vp *unsafe.Pointer + var rootM unsafe.Pointer + var rootS rt.GoSlice + var root interface{} + var key string + + node = iter.Next() + + switch node.Type() { + case KObject: + size = node.Object().Len() + if size != 0 { + ctx.Stack.Push(nil, 0, true) + mp = ctx.efacePool.GetMap(size) + rootM = mp + isObj = true + goto _object_key + } else { + return rt.GoEface { + Type: rt.MapEfaceType, + Value: ctx.efacePool.GetMap(0), + }.Pack() + } + case KArray: + size = node.Array().Len() + if size != 0 { + ctx.Stack.Push(nil, 0, false) + sp = ctx.efacePool.GetSlice(size) + slice = rt.GoSlice { + Ptr: sp, + Len: size, + Cap: size, + } + rootS = slice + isObj = false + val = sp + goto _arr_val; + } else { + ctx.efacePool.ConvTSlice(rt.EmptySlice, rt.SliceEfaceType, unsafe.Pointer(&root)) + } + case KStringCommon: ctx.efacePool.ConvTstring(node.StringRef(ctx), unsafe.Pointer(&root)) + case KStringEscaped: ctx.efacePool.ConvTstring(node.StringCopyEsc(ctx), unsafe.Pointer(&root)) + case KTrue: root = true + case KFalse: root = false + case KNull: root = nil + case KUint: ctx.efacePool.ConvF64(float64(node.U64()), unsafe.Pointer(&root)) + case KSint: ctx.efacePool.ConvF64(float64(node.I64()), unsafe.Pointer(&root)) + case KReal: ctx.efacePool.ConvF64(node.F64(), unsafe.Pointer(&root)) + case KRawNumber: ctx.efacePool.ConvTnum(node.Number(ctx), unsafe.Pointer(&root)) + default: panic("unreachable for as eface") + } + return root + +_object_key: + node = iter.Next() + if node.Type() == KStringCommon { + key = node.StringRef(ctx) + } else { + key = node.StringCopyEsc(ctx) + } + + // interface{} slot in map bucket + val = rt.Mapassign_faststr(rt.MapEfaceMapType, mp, key) + vt = &(*rt.GoEface)(val).Type + vp = &(*rt.GoEface)(val).Value + + // parse value node + node = iter.Next() + switch node.Type() { + case KObject: + newSize := node.Object().Len() + newMp := ctx.efacePool.GetMap(newSize) + *vt = rt.MapEfaceType + *vp = newMp + remain := size - 1 + isObj = true + if newSize != 0 { + if remain > 0 { + ctx.Stack.Push(mp, remain, true) + } + mp = newMp + size = newSize + goto _object_key; + } + case KArray: + newSize := node.Array().Len() + if newSize == 0 { + ctx.efacePool.ConvTSlice(rt.EmptySlice, rt.SliceEfaceType, val) + break; + } + + newSp := ctx.efacePool.GetSlice(newSize) + // pack to []interface{} + ctx.efacePool.ConvTSlice(rt.GoSlice{ + Ptr: newSp, + Len: newSize, + Cap: newSize, + }, rt.SliceEfaceType, val) + remain := size - 1 + if remain > 0 { + ctx.Stack.Push(mp, remain, true) + } + val = newSp + isObj = false + size = newSize + goto _arr_val; + case KStringCommon: + ctx.efacePool.ConvTstring(node.StringRef(ctx), val) + case KStringEscaped: + ctx.efacePool.ConvTstring(node.StringCopyEsc(ctx), val) + case KTrue: + rt.ConvTBool(true, (*interface{})(val)) + case KFalse: + rt.ConvTBool(false, (*interface{})(val)) + case KNull: /* skip */ + case KUint: + ctx.efacePool.ConvF64(float64(node.U64()), val) + case KSint: + ctx.efacePool.ConvF64(float64(node.I64()), val) + case KReal: + ctx.efacePool.ConvF64(node.F64(), val) + case KRawNumber: + ctx.efacePool.ConvTnum(node.Number(ctx), val) + default: + panic("unreachable for as eface") + } + + // check size + size -= 1 + if size != 0 { + goto _object_key; + } + + parent, size, isObj = ctx.Stack.Pop() + + // parent is empty + if parent == nil { + if isObj { + return rt.GoEface { + Type: rt.MapEfaceType, + Value: rootM, + }.Pack() + } else { + ctx.efacePool.ConvTSlice(rootS, rt.SliceEfaceType, (unsafe.Pointer)(&root)) + return root + } + } + + // continue to parse parent + if isObj { + mp = parent + goto _object_key; + } else { + val = rt.PtrAdd(parent, rt.AnyType.Size) + goto _arr_val; + } + +_arr_val: + // interface{} slot in slice + vt = &(*rt.GoEface)(val).Type + vp = &(*rt.GoEface)(val).Value + + // parse value node + node = iter.Next() + switch node.Type() { + case KObject: + newSize := node.Object().Len() + newMp := ctx.efacePool.GetMap(newSize) + *vt = rt.MapEfaceType + *vp = newMp + remain := size - 1 + if newSize != 0 { + // push next array elem into stack + if remain > 0 { + ctx.Stack.Push(val, remain, false) + } + mp = newMp + size = newSize + isObj = true + goto _object_key; + } + case KArray: + newSize := node.Array().Len() + if newSize == 0 { + ctx.efacePool.ConvTSlice(rt.EmptySlice, rt.SliceEfaceType, val) + break; + } + + newSp := ctx.efacePool.GetSlice(newSize) + // pack to []interface{} + ctx.efacePool.ConvTSlice(rt.GoSlice { + Ptr: newSp, + Len: newSize, + Cap: newSize, + }, rt.SliceEfaceType, val) + + remain := size - 1 + if remain > 0 { + ctx.Stack.Push(val, remain, false) + } + + val = newSp + isObj = false + size = newSize + goto _arr_val; + case KStringCommon: + ctx.efacePool.ConvTstring(node.StringRef(ctx), val) + case KStringEscaped: + ctx.efacePool.ConvTstring(node.StringCopyEsc(ctx), val) + case KTrue: + rt.ConvTBool(true, (*interface{})(val)) + case KFalse: + rt.ConvTBool(false, (*interface{})(val)) + case KNull: /* skip */ + case KUint: + ctx.efacePool.ConvF64(float64(node.U64()), val) + case KSint: + ctx.efacePool.ConvF64(float64(node.I64()), val) + case KReal: + ctx.efacePool.ConvF64(node.F64(), val) + case KRawNumber: + ctx.efacePool.ConvTnum(node.Number(ctx), val) + default: panic("unreachable for as eface") + } + + // check size + size -= 1 + if size != 0 { + val = rt.PtrAdd(val, rt.AnyType.Size) + goto _arr_val; + } + + + parent, size, isObj = ctx.Stack.Pop() + + // parent is empty + if parent == nil { + if isObj { + return rt.GoEface { + Type: rt.MapEfaceType, + Value: rootM, + }.Pack() + } else { + ctx.efacePool.ConvTSlice(rootS, rt.SliceEfaceType, unsafe.Pointer(&root)) + return root + } + } + + // continue to parse parent + if isObj { + mp = parent + goto _object_key; + } else { + val = rt.PtrAdd(parent, rt.AnyType.Size) + goto _arr_val; + } +} + +func (node *Node) AsEfaceFallback(ctx *Context) (interface{}, error) { + switch node.Type() { + case KObject: + obj := node.Object() + size := obj.Len() + m := make(map[string]interface{}, size) + *node = NewNode(obj.Children()) + var gerr, err error + for i := 0; i < size; i++ { + key, _ := node.AsStr(ctx) + *node = NewNode(PtrOffset(node.cptr, 1)) + m[key], err = node.AsEfaceFallback(ctx) + if gerr == nil && err != nil { + gerr = err + } + } + return m, gerr + case KArray: + arr := node.Array() + size := arr.Len() + a := make([]interface{}, size) + *node = NewNode(arr.Children()) + var gerr, err error + for i := 0; i < size; i++ { + a[i], err = node.AsEfaceFallback(ctx) + if gerr == nil && err != nil { + gerr = err + } + } + return a, gerr + case KStringCommon: + str, _ := node.AsStr(ctx) + *node = NewNode(PtrOffset(node.cptr, 1)) + return str, nil + case KStringEscaped: + str := node.StringCopyEsc(ctx) + *node = NewNode(PtrOffset(node.cptr, 1)) + return str, nil + case KTrue: + *node = NewNode(PtrOffset(node.cptr, 1)) + return true, nil + case KFalse: + *node = NewNode(PtrOffset(node.cptr, 1)) + return false, nil + case KNull: + *node = NewNode(PtrOffset(node.cptr, 1)) + return nil, nil + default: + // use float64 + if ctx.Parser.options & (1 << _F_use_number) != 0 { + num, ok := node.AsNumber(ctx) + if !ok { + // skip the unmacthed type + *node = NewNode(node.Next()) + return nil, newUnmatched(node.Position(), rt.JsonNumberType) + } else { + *node = NewNode(PtrOffset(node.cptr, 1)) + return num, nil + } + } else if ctx.Parser.options & (1 << _F_use_int64) != 0 { + // first try int64 + i, ok := node.AsI64(ctx) + if ok { + *node = NewNode(PtrOffset(node.cptr, 1)) + return i, nil + } + + // is not integer, then use float64 + f, ok := node.AsF64(ctx) + if ok { + *node = NewNode(PtrOffset(node.cptr, 1)) + return f, nil + } + + // skip the unmacthed type + *node = NewNode(node.Next()) + return nil, newUnmatched(node.Position(), rt.Int64Type) + } else { + num, ok := node.AsF64(ctx) + if !ok { + // skip the unmacthed type + *node = NewNode(node.Next()) + return nil, newUnmatched(node.Position(), rt.Float64Type) + } else { + *node = NewNode(PtrOffset(node.cptr, 1)) + return num, nil + } + } + } +} + +//go:nosplit +func PtrOffset(ptr uintptr, off int64) uintptr { + return uintptr(int64(ptr) + off * int64(unsafe.Sizeof(node{}))) +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/slice.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/slice.go new file mode 100644 index 00000000..a94e422b --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/slice.go @@ -0,0 +1,224 @@ +package optdec + +import ( + "reflect" + "unsafe" + + "github.com/bytedance/sonic/internal/rt" +) + +type sliceDecoder struct { + elemType *rt.GoType + elemDec decFunc + typ reflect.Type +} + +var ( + emptyPtr = &struct{}{} +) + +func (d *sliceDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + arr, ok := node.AsArr() + if !ok { + return error_mismatch(node, ctx, d.typ) + } + + slice := rt.MakeSlice(vp, d.elemType, arr.Len()) + elems := slice.Ptr + next := arr.Children() + + var gerr error + for i := 0; i < arr.Len(); i++ { + val := NewNode(next) + elem := unsafe.Pointer(uintptr(elems) + uintptr(i)*d.elemType.Size) + err := d.elemDec.FromDom(elem, val, ctx) + if gerr == nil && err != nil { + gerr = err + } + next = val.Next() + } + + *(*rt.GoSlice)(vp) = *slice + return gerr +} + +type arrayDecoder struct { + len int + elemType *rt.GoType + elemDec decFunc + typ reflect.Type +} + +//go:nocheckptr +func (d *arrayDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + arr, ok := node.AsArr() + if !ok { + return error_mismatch(node, ctx, d.typ) + } + + next := arr.Children() + i := 0 + + var gerr error + for ; i < d.len && i < arr.Len(); i++ { + elem := unsafe.Pointer(uintptr(vp) + uintptr(i)*d.elemType.Size) + val := NewNode(next) + err := d.elemDec.FromDom(elem, val, ctx) + if gerr == nil && err != nil { + gerr = err + } + next = val.Next() + } + + /* zero rest of array */ + ptr := unsafe.Pointer(uintptr(vp) + uintptr(i)*d.elemType.Size) + n := uintptr(d.len-i) * d.elemType.Size + rt.ClearMemory(d.elemType, ptr, n) + return gerr +} + +type sliceEfaceDecoder struct { +} + +func (d *sliceEfaceDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + return node.AsSliceEface(ctx, vp) +} + +type sliceI32Decoder struct { +} + +func (d *sliceI32Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + return node.AsSliceI32(ctx, vp) +} + +type sliceI64Decoder struct { +} + +func (d *sliceI64Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + return node.AsSliceI64(ctx, vp) +} + +type sliceU32Decoder struct { +} + +func (d *sliceU32Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + return node.AsSliceU32(ctx, vp) +} + +type sliceU64Decoder struct { +} + +func (d *sliceU64Decoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + return node.AsSliceU64(ctx, vp) +} + +type sliceStringDecoder struct { +} + +func (d *sliceStringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + return node.AsSliceString(ctx, vp) +} + +type sliceBytesDecoder struct { +} + +func (d *sliceBytesDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + s, err := node.AsSliceBytes(ctx) + if err != nil { + return err + } + + *(*[]byte)(vp) = s + return nil +} + +type sliceBytesUnmarshalerDecoder struct { + elemType *rt.GoType + elemDec decFunc + typ reflect.Type +} + +func (d *sliceBytesUnmarshalerDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*rt.GoSlice)(vp) = rt.GoSlice{} + return nil + } + + /* parse JSON string into `[]byte` */ + if node.IsStr() { + slice, err := node.AsSliceBytes(ctx) + if err != nil { + return err + } + *(*[]byte)(vp) = slice + return nil + } + + /* parse JSON array into `[]byte` */ + arr, ok := node.AsArr() + if !ok { + return error_mismatch(node, ctx, d.typ) + } + + slice := rt.MakeSlice(vp, d.elemType, arr.Len()) + elems := slice.Ptr + + var gerr error + next := arr.Children() + for i := 0; i < arr.Len(); i++ { + child := NewNode(next) + elem := unsafe.Pointer(uintptr(elems) + uintptr(i)*d.elemType.Size) + err := d.elemDec.FromDom(elem, child, ctx) + if gerr == nil && err != nil { + gerr = err + } + next = child.Next() + } + + *(*rt.GoSlice)(vp) = *slice + return gerr +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/stringopts.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/stringopts.go new file mode 100644 index 00000000..627b5ebe --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/stringopts.go @@ -0,0 +1,360 @@ +package optdec + +import ( + "encoding/json" + "math" + "unsafe" + + "github.com/bytedance/sonic/internal/rt" +) + +type ptrStrDecoder struct { + typ *rt.GoType + deref decFunc +} + +// Pointer Value is allocated in the Caller +func (d *ptrStrDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + s, ok := node.AsStrRef(ctx) + if !ok { + return error_mismatch(node, ctx, stringType) + } + + if s == "null" { + *(*unsafe.Pointer)(vp) = nil + return nil + } + + if *(*unsafe.Pointer)(vp) == nil { + *(*unsafe.Pointer)(vp) = rt.Mallocgc(d.typ.Size, d.typ, true) + } + + return d.deref.FromDom(*(*unsafe.Pointer)(vp), node, ctx) +} + +type boolStringDecoder struct { +} + +func (d *boolStringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + s, ok := node.AsStrRef(ctx) + if !ok { + return error_mismatch(node, ctx, stringType) + } + + if s == "null" { + return nil + } + + b, err := ParseBool(s) + if err != nil { + return error_mismatch(node, ctx, boolType) + } + + *(*bool)(vp) = b + return nil +} + +func parseI64(node Node, ctx *context) (int64, error, bool) { + if node.IsNull() { + return 0, nil, true + } + + s, ok := node.AsStrRef(ctx) + if !ok { + return 0, error_mismatch(node, ctx, stringType), false + } + + if s == "null" { + return 0, nil, true + } + + ret, err := ParseI64(s) + return ret, err, false +} + +type i8StringDecoder struct{} + +func (d *i8StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + ret, err, null := parseI64(node, ctx) + if null { + return nil + } + + if err != nil { + return err + } + + if ret > math.MaxInt8 || ret < math.MinInt8 { + return error_mismatch(node, ctx, int8Type) + } + + *(*int8)(vp) = int8(ret) + return nil +} + +type i16StringDecoder struct{} + +func (d *i16StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + ret, err, null := parseI64(node, ctx) + if null { + return nil + } + + if err != nil { + return err + } + + if ret > math.MaxInt16 || ret < math.MinInt16 { + return error_mismatch(node, ctx, int16Type) + } + + *(*int16)(vp) = int16(ret) + return nil +} + +type i32StringDecoder struct{} + +func (d *i32StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + ret, err, null := parseI64(node, ctx) + if null { + return nil + } + + if err != nil { + return err + } + + if ret > math.MaxInt32 || ret < math.MinInt32 { + return error_mismatch(node, ctx, int32Type) + } + + *(*int32)(vp) = int32(ret) + return nil +} + +type i64StringDecoder struct{} + +func (d *i64StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + ret, err, null := parseI64(node, ctx) + if null { + return nil + } + + if err != nil { + return err + } + + *(*int64)(vp) = int64(ret) + return nil +} + +func parseU64(node Node, ctx *context) (uint64, error, bool) { + if node.IsNull() { + return 0, nil, true + } + + s, ok := node.AsStrRef(ctx) + if !ok { + return 0, error_mismatch(node, ctx, stringType), false + } + + if s == "null" { + return 0, nil, true + } + + ret, err := ParseU64(s) + return ret, err, false +} + +type u8StringDecoder struct{} + +func (d *u8StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + ret, err, null := parseU64(node, ctx) + if null { + return nil + } + + if err != nil { + return err + } + + if ret > math.MaxUint8 { + return error_mismatch(node, ctx, uint8Type) + } + + *(*uint8)(vp) = uint8(ret) + return nil +} + +type u16StringDecoder struct{} + +func (d *u16StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + ret, err, null := parseU64(node, ctx) + if null { + return nil + } + + if err != nil { + return err + } + + if ret > math.MaxUint16 { + return error_mismatch(node, ctx, uint16Type) + } + + *(*uint16)(vp) = uint16(ret) + return nil +} + +type u32StringDecoder struct{} + +func (d *u32StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + ret, err, null := parseU64(node, ctx) + if null { + return nil + } + + if err != nil { + return err + } + + if ret > math.MaxUint32 { + return error_mismatch(node, ctx, uint32Type) + } + + *(*uint32)(vp) = uint32(ret) + return nil +} + + +type u64StringDecoder struct{} + +func (d *u64StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + ret, err, null := parseU64(node, ctx) + if null { + return nil + } + + if err != nil { + return err + } + + *(*uint64)(vp) = uint64(ret) + return nil +} + +type f32StringDecoder struct{} + +func (d *f32StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + s, ok := node.AsStrRef(ctx) + if !ok { + return error_mismatch(node, ctx, stringType) + } + + if s == "null" { + return nil + } + + ret, err := ParseF64(s) + if err != nil || ret > math.MaxFloat32 || ret < -math.MaxFloat32 { + return error_mismatch(node, ctx, float32Type) + } + + *(*float32)(vp) = float32(ret) + return nil +} + +type f64StringDecoder struct{} + +func (d *f64StringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + s, ok := node.AsStrRef(ctx) + if !ok { + return error_mismatch(node, ctx, stringType) + } + + if s == "null" { + return nil + } + + ret, err := ParseF64(s) + if err != nil { + return error_mismatch(node, ctx, float64Type) + } + + *(*float64)(vp) = float64(ret) + return nil +} + +/* parse string field with string options */ +type strStringDecoder struct{} + +func (d *strStringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + s, ok := node.AsStrRef(ctx) + if !ok { + return error_mismatch(node, ctx, stringType) + } + + if s == "null" { + return nil + } + + s, err := Unquote(s) + if err != nil { + return error_mismatch(node, ctx, stringType) + } + + *(*string)(vp) = s + return nil +} + +type numberStringDecoder struct{} + +func (d *numberStringDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + s, ok := node.AsStrRef(ctx) + if !ok { + return error_mismatch(node, ctx, stringType) + } + + if s == "null" { + return nil + } + + num, ok := node.ParseNumber(ctx) + if !ok { + return error_mismatch(node, ctx, jsonNumberType) + } + + end, err := SkipNumberFast(s, 0) + // has error or trailing chars + if err != nil || end != len(s) { + return error_mismatch(node, ctx, jsonNumberType) + } + + *(*json.Number)(vp) = json.Number(num) + return nil +} diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/structs.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/structs.go new file mode 100644 index 00000000..bce2758f --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/structs.go @@ -0,0 +1,61 @@ +package optdec + +import ( + "reflect" + "unsafe" + + caching "github.com/bytedance/sonic/internal/optcaching" + "github.com/bytedance/sonic/internal/resolver" +) + +type fieldEntry struct { + resolver.FieldMeta + fieldDec decFunc +} + +type structDecoder struct { + fieldMap caching.FieldLookup + fields []fieldEntry + structName string + typ reflect.Type +} + +func (d *structDecoder) FromDom(vp unsafe.Pointer, node Node, ctx *context) error { + if node.IsNull() { + return nil + } + + var gerr error + obj, ok := node.AsObj() + if !ok { + return error_mismatch(node, ctx, d.typ) + } + + next := obj.Children() + for i := 0; i < obj.Len(); i++ { + key, _ := NewNode(next).AsStrRef(ctx) + val := NewNode(PtrOffset(next, 1)) + next = val.Next() + + // find field idx + idx := d.fieldMap.Get(key) + if idx == -1 { + if Options(ctx.Options())&OptionDisableUnknown != 0 { + return error_field(key) + } + continue + } + + offset := d.fields[idx].Path[0].Size + elem := unsafe.Pointer(uintptr(vp) + offset) + err := d.fields[idx].fieldDec.FromDom(elem, val, ctx) + + // deal with mismatch type errors + if gerr == nil && err != nil { + // TODO: better error info + gerr = err + } + } + return gerr +} + diff --git a/vendor/github.com/bytedance/sonic/internal/decoder/optdec/types.go b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/types.go new file mode 100644 index 00000000..fe1433ee --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/decoder/optdec/types.go @@ -0,0 +1,60 @@ +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package optdec + +import ( + "encoding" + "encoding/base64" + "encoding/json" + "reflect" + "unsafe" + + "github.com/bytedance/sonic/internal/rt" +) + +var ( + boolType = reflect.TypeOf(bool(false)) + byteType = reflect.TypeOf(byte(0)) + intType = reflect.TypeOf(int(0)) + int8Type = reflect.TypeOf(int8(0)) + int16Type = reflect.TypeOf(int16(0)) + int32Type = reflect.TypeOf(int32(0)) + int64Type = reflect.TypeOf(int64(0)) + uintType = reflect.TypeOf(uint(0)) + uint8Type = reflect.TypeOf(uint8(0)) + uint16Type = reflect.TypeOf(uint16(0)) + uint32Type = reflect.TypeOf(uint32(0)) + uint64Type = reflect.TypeOf(uint64(0)) + float32Type = reflect.TypeOf(float32(0)) + float64Type = reflect.TypeOf(float64(0)) + stringType = reflect.TypeOf("") + bytesType = reflect.TypeOf([]byte(nil)) + jsonNumberType = reflect.TypeOf(json.Number("")) + base64CorruptInputError = reflect.TypeOf(base64.CorruptInputError(0)) + anyType = rt.UnpackType(reflect.TypeOf((*interface{})(nil)).Elem()) +) + +var ( + errorType = reflect.TypeOf((*error)(nil)).Elem() + jsonUnmarshalerType = reflect.TypeOf((*json.Unmarshaler)(nil)).Elem() + encodingTextUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem() +) + +func rtype(t reflect.Type) (*rt.GoItab, *rt.GoType) { + p := (*rt.GoIface)(unsafe.Pointer(&t)) + return p.Itab, (*rt.GoType)(p.Value) +} diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/mapiter.go b/vendor/github.com/bytedance/sonic/internal/encoder/alg/mapiter.go similarity index 52% rename from vendor/github.com/bytedance/sonic/internal/encoder/mapiter.go rename to vendor/github.com/bytedance/sonic/internal/encoder/alg/mapiter.go index 8a322b3a..5d9956a9 100644 --- a/vendor/github.com/bytedance/sonic/internal/encoder/mapiter.go +++ b/vendor/github.com/bytedance/sonic/internal/encoder/alg/mapiter.go @@ -14,15 +14,16 @@ * limitations under the License. */ -package encoder +package alg import ( "encoding" "reflect" + "strconv" "sync" "unsafe" - "github.com/bytedance/sonic/internal/native" + "github.com/bytedance/sonic/internal/encoder/vars" "github.com/bytedance/sonic/internal/rt" ) @@ -32,8 +33,8 @@ type _MapPair struct { m [32]byte } -type _MapIterator struct { - it rt.GoMapIterator // must be the first field +type MapIterator struct { + It rt.GoMapIterator // must be the first field kv rt.GoSlice // slice of _MapPair ki int } @@ -44,43 +45,43 @@ var ( ) func init() { - if unsafe.Offsetof(_MapIterator{}.it) != 0 { + if unsafe.Offsetof(MapIterator{}.It) != 0 { panic("_MapIterator.it is not the first field") } } -func newIterator() *_MapIterator { +func newIterator() *MapIterator { if v := iteratorPool.Get(); v == nil { - return new(_MapIterator) + return new(MapIterator) } else { - return resetIterator(v.(*_MapIterator)) + return resetIterator(v.(*MapIterator)) } } -func resetIterator(p *_MapIterator) *_MapIterator { +func resetIterator(p *MapIterator) *MapIterator { p.ki = 0 - p.it = rt.GoMapIterator{} + p.It = rt.GoMapIterator{} p.kv.Len = 0 return p } -func (self *_MapIterator) at(i int) *_MapPair { +func (self *MapIterator) at(i int) *_MapPair { return (*_MapPair)(unsafe.Pointer(uintptr(self.kv.Ptr) + uintptr(i) * unsafe.Sizeof(_MapPair{}))) } -func (self *_MapIterator) add() (p *_MapPair) { +func (self *MapIterator) add() (p *_MapPair) { p = self.at(self.kv.Len) self.kv.Len++ return } -func (self *_MapIterator) data() (p []_MapPair) { +func (self *MapIterator) data() (p []_MapPair) { *(*rt.GoSlice)(unsafe.Pointer(&p)) = self.kv return } -func (self *_MapIterator) append(t *rt.GoType, k unsafe.Pointer, v unsafe.Pointer) (err error) { +func (self *MapIterator) append(t *rt.GoType, k unsafe.Pointer, v unsafe.Pointer) (err error) { p := self.add() p.v = v @@ -94,26 +95,26 @@ func (self *_MapIterator) append(t *rt.GoType, k unsafe.Pointer, v unsafe.Pointe return nil } -func (self *_MapIterator) appendGeneric(p *_MapPair, t *rt.GoType, v reflect.Kind, k unsafe.Pointer) error { +func (self *MapIterator) appendGeneric(p *_MapPair, t *rt.GoType, v reflect.Kind, k unsafe.Pointer) error { switch v { - case reflect.Int : p.k = rt.Mem2Str(p.m[:native.I64toa(&p.m[0], int64(*(*int)(k)))]) ; return nil - case reflect.Int8 : p.k = rt.Mem2Str(p.m[:native.I64toa(&p.m[0], int64(*(*int8)(k)))]) ; return nil - case reflect.Int16 : p.k = rt.Mem2Str(p.m[:native.I64toa(&p.m[0], int64(*(*int16)(k)))]) ; return nil - case reflect.Int32 : p.k = rt.Mem2Str(p.m[:native.I64toa(&p.m[0], int64(*(*int32)(k)))]) ; return nil - case reflect.Int64 : p.k = rt.Mem2Str(p.m[:native.I64toa(&p.m[0], *(*int64)(k))]) ; return nil - case reflect.Uint : p.k = rt.Mem2Str(p.m[:native.U64toa(&p.m[0], uint64(*(*uint)(k)))]) ; return nil - case reflect.Uint8 : p.k = rt.Mem2Str(p.m[:native.U64toa(&p.m[0], uint64(*(*uint8)(k)))]) ; return nil - case reflect.Uint16 : p.k = rt.Mem2Str(p.m[:native.U64toa(&p.m[0], uint64(*(*uint16)(k)))]) ; return nil - case reflect.Uint32 : p.k = rt.Mem2Str(p.m[:native.U64toa(&p.m[0], uint64(*(*uint32)(k)))]) ; return nil - case reflect.Uint64 : p.k = rt.Mem2Str(p.m[:native.U64toa(&p.m[0], *(*uint64)(k))]) ; return nil - case reflect.Uintptr : p.k = rt.Mem2Str(p.m[:native.U64toa(&p.m[0], uint64(*(*uintptr)(k)))]) ; return nil + case reflect.Int : p.k = rt.Mem2Str(strconv.AppendInt(p.m[:0], int64(*(*int)(k)), 10)) ; return nil + case reflect.Int8 : p.k = rt.Mem2Str(strconv.AppendInt(p.m[:0], int64(*(*int8)(k)), 10)) ; return nil + case reflect.Int16 : p.k = rt.Mem2Str(strconv.AppendInt(p.m[:0], int64(*(*int16)(k)), 10)) ; return nil + case reflect.Int32 : p.k = rt.Mem2Str(strconv.AppendInt(p.m[:0], int64(*(*int32)(k)), 10)) ; return nil + case reflect.Int64 : p.k = rt.Mem2Str(strconv.AppendInt(p.m[:0], int64(*(*int64)(k)), 10)) ; return nil + case reflect.Uint : p.k = rt.Mem2Str(strconv.AppendUint(p.m[:0], uint64(*(*uint)(k)), 10)) ; return nil + case reflect.Uint8 : p.k = rt.Mem2Str(strconv.AppendUint(p.m[:0], uint64(*(*uint8)(k)), 10)) ; return nil + case reflect.Uint16 : p.k = rt.Mem2Str(strconv.AppendUint(p.m[:0], uint64(*(*uint16)(k)), 10)) ; return nil + case reflect.Uint32 : p.k = rt.Mem2Str(strconv.AppendUint(p.m[:0], uint64(*(*uint32)(k)), 10)) ; return nil + case reflect.Uint64 : p.k = rt.Mem2Str(strconv.AppendUint(p.m[:0], uint64(*(*uint64)(k)), 10)) ; return nil + case reflect.Uintptr : p.k = rt.Mem2Str(strconv.AppendUint(p.m[:0], uint64(*(*uintptr)(k)), 10)) ; return nil case reflect.Interface : return self.appendInterface(p, t, k) case reflect.Struct, reflect.Ptr : return self.appendConcrete(p, t, k) default : panic("unexpected map key type") } } -func (self *_MapIterator) appendConcrete(p *_MapPair, t *rt.GoType, k unsafe.Pointer) (err error) { +func (self *MapIterator) appendConcrete(p *_MapPair, t *rt.GoType, k unsafe.Pointer) (err error) { // compiler has already checked that the type implements the encoding.MarshalText interface if !t.Indirect() { k = *(*unsafe.Pointer)(k) @@ -127,7 +128,7 @@ func (self *_MapIterator) appendConcrete(p *_MapPair, t *rt.GoType, k unsafe.Poi return } -func (self *_MapIterator) appendInterface(p *_MapPair, t *rt.GoType, k unsafe.Pointer) (err error) { +func (self *MapIterator) appendInterface(p *_MapPair, t *rt.GoType, k unsafe.Pointer) (err error) { if len(rt.IfaceType(t).Methods) == 0 { panic("unexpected map key type") } else if p.k, err = asText(k); err == nil { @@ -137,17 +138,17 @@ func (self *_MapIterator) appendInterface(p *_MapPair, t *rt.GoType, k unsafe.Po } } -func iteratorStop(p *_MapIterator) { +func IteratorStop(p *MapIterator) { iteratorPool.Put(p) } -func iteratorNext(p *_MapIterator) { +func IteratorNext(p *MapIterator) { i := p.ki - t := &p.it + t := &p.It /* check for unordered iteration */ if i < 0 { - mapiternext(t) + rt.Mapiternext(t) return } @@ -164,25 +165,25 @@ func iteratorNext(p *_MapIterator) { p.ki++ } -func iteratorStart(t *rt.GoMapType, m *rt.GoMap, fv uint64) (*_MapIterator, error) { +func IteratorStart(t *rt.GoMapType, m *rt.GoMap, fv uint64) (*MapIterator, error) { it := newIterator() - mapiterinit(t, m, &it.it) + rt.Mapiterinit(t, m, &it.It) /* check for key-sorting, empty map don't need sorting */ - if m.Count == 0 || (fv & uint64(SortMapKeys)) == 0 { + if m.Count == 0 || (fv & (1< it.kv.Cap { - it.kv = growslice(iteratorPair, it.kv, m.Count) + it.kv = rt.GrowSlice(iteratorPair, it.kv, m.Count) } /* dump all the key-value pairs */ - for ; it.it.K != nil; mapiternext(&it.it) { - if err := it.append(t.Key, it.it.K, it.it.V); err != nil { - iteratorStop(it) + for ; it.It.K != nil; rt.Mapiternext(&it.It) { + if err := it.append(t.Key, it.It.K, it.It.V); err != nil { + IteratorStop(it) return nil, err } } @@ -193,7 +194,13 @@ func iteratorStart(t *rt.GoMapType, m *rt.GoMap, fv uint64) (*_MapIterator, erro } /* load the first pair into iterator */ - it.it.V = it.at(0).v - it.it.K = unsafe.Pointer(&it.at(0).k) + it.It.V = it.at(0).v + it.It.K = unsafe.Pointer(&it.at(0).k) return it, nil } + +func asText(v unsafe.Pointer) (string, error) { + text := rt.AssertI2I(rt.UnpackType(vars.EncodingTextMarshalerType), *(*rt.GoIface)(v)) + r, e := (*(*encoding.TextMarshaler)(unsafe.Pointer(&text))).MarshalText() + return rt.Mem2Str(r), e +} diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/alg/opts.go b/vendor/github.com/bytedance/sonic/internal/encoder/alg/opts.go new file mode 100644 index 00000000..c19e2de4 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/encoder/alg/opts.go @@ -0,0 +1,31 @@ +/** + * Copyright 2024 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package alg + +const ( + BitSortMapKeys = iota + BitEscapeHTML + BitCompactMarshaler + BitNoQuoteTextMarshaler + BitNoNullSliceOrMap + BitValidateString + BitNoValidateJSONMarshaler + BitNoEncoderNewline + BitEncodeNullForInfOrNan + + BitPointerValue = 63 +) diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/alg/primitives.go b/vendor/github.com/bytedance/sonic/internal/encoder/alg/primitives.go new file mode 100644 index 00000000..63fa0189 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/encoder/alg/primitives.go @@ -0,0 +1,95 @@ +/** + * Copyright 2024 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package alg + +import ( + "encoding" + "encoding/json" + + "github.com/bytedance/sonic/internal/encoder/vars" + "github.com/bytedance/sonic/internal/rt" +) + +func Compact(p *[]byte, v []byte) error { + buf := vars.NewBuffer() + err := json.Compact(buf, v) + + /* check for errors */ + if err != nil { + return err + } + + /* add to result */ + v = buf.Bytes() + *p = append(*p, v...) + + /* return the buffer into pool */ + vars.FreeBuffer(buf) + return nil +} + +func EncodeNil(rb *[]byte) error { + *rb = append(*rb, 'n', 'u', 'l', 'l') + return nil +} + +// func Make_EncodeTypedPointer(computor func(*rt.GoType, ...interface{}) (interface{}, error)) func(*[]byte, *rt.GoType, *unsafe.Pointer, *vars.Stack, uint64) error { +// return func(buf *[]byte, vt *rt.GoType, vp *unsafe.Pointer, sb *vars.Stack, fv uint64) error { +// if vt == nil { +// return EncodeNil(buf) +// } else if fn, err := vars.FindOrCompile(vt, (fv&(1< 0 { + // output buffer + dp := unsafe.Pointer(uintptr(b.Ptr) + uintptr(b.Len)) + dn := b.Cap - b.Len + // call native.Quote, dn is byte count it outputs + opts := uint64(0) + if double { + opts = types.F_DOUBLE_UNQUOTE + } + ret := native.Quote(sp, nb, dp, &dn, opts) + // update *buf length + b.Len += dn + + // no need more output + if ret >= 0 { + break + } + + // double buf size + *b = rt.GrowSlice(typeByte, *b, b.Cap*2) + // ret is the complement of consumed input + ret = ^ret + // update input buffer + nb -= ret + sp = unsafe.Pointer(uintptr(sp) + uintptr(ret)) + } + + runtime.KeepAlive(buf) + runtime.KeepAlive(sp) + if double { + buf = append(buf, `\""`...) + } else { + buf = append(buf, `"`...) + } + + return buf +} + +func HtmlEscape(dst []byte, src []byte) []byte { + var sidx int + + dst = append(dst, src[:0]...) // avoid check nil dst + sbuf := (*rt.GoSlice)(unsafe.Pointer(&src)) + dbuf := (*rt.GoSlice)(unsafe.Pointer(&dst)) + + /* grow dst if it is shorter */ + if cap(dst)-len(dst) < len(src)+types.BufPaddingSize { + cap := len(src)*3/2 + types.BufPaddingSize + *dbuf = rt.GrowSlice(typeByte, *dbuf, cap) + } + + for sidx < sbuf.Len { + sp := rt.Add(sbuf.Ptr, uintptr(sidx)) + dp := rt.Add(dbuf.Ptr, uintptr(dbuf.Len)) + + sn := sbuf.Len - sidx + dn := dbuf.Cap - dbuf.Len + nb := native.HTMLEscape(sp, sn, dp, &dn) + + /* check for errors */ + if dbuf.Len += dn; nb >= 0 { + break + } + + /* not enough space, grow the slice and try again */ + sidx += ^nb + *dbuf = rt.GrowSlice(typeByte, *dbuf, dbuf.Cap*2) + } + return dst +} + +func F64toa(buf []byte, v float64) ([]byte) { + if v == 0 { + return append(buf, '0') + } + buf = rt.GuardSlice2(buf, 64) + ret := native.F64toa((*byte)(rt.IndexByte(buf, len(buf))), v) + if ret > 0 { + return buf[:len(buf)+ret] + } else { + return buf + } +} + +func F32toa(buf []byte, v float32) ([]byte) { + if v == 0 { + return append(buf, '0') + } + buf = rt.GuardSlice2(buf, 64) + ret := native.F32toa((*byte)(rt.IndexByte(buf, len(buf))), v) + if ret > 0 { + return buf[:len(buf)+ret] + } else { + return buf + } +} + +func I64toa(buf []byte, v int64) ([]byte) { + buf = rt.GuardSlice2(buf, 32) + ret := native.I64toa((*byte)(rt.IndexByte(buf, len(buf))), v) + if ret > 0 { + return buf[:len(buf)+ret] + } else { + return buf + } +} + +func U64toa(buf []byte, v uint64) ([]byte) { + buf = rt.GuardSlice2(buf, 32) + ret := native.U64toa((*byte)(rt.IndexByte(buf, len(buf))), v) + if ret > 0 { + return buf[:len(buf)+ret] + } else { + return buf + } +} + diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/alg/spec_compat.go b/vendor/github.com/bytedance/sonic/internal/encoder/alg/spec_compat.go new file mode 100644 index 00000000..c15cbf7d --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/encoder/alg/spec_compat.go @@ -0,0 +1,148 @@ +// +build !amd64,!arm64 go1.24 !go1.16 arm64,!go1.20 + +/** + * Copyright 2024 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package alg + +import ( + _ "unsafe" + "unicode/utf8" + "strconv" + "bytes" + "encoding/json" + + "github.com/bytedance/sonic/internal/rt" +) + +// Valid validates json and returns first non-blank character position, +// if it is only one valid json value. +// Otherwise returns invalid character position using start. +// +// Note: it does not check for the invalid UTF-8 characters. +func Valid(data []byte) (ok bool, start int) { + ok = json.Valid(data) + return ok, 0 +} + +var typeByte = rt.UnpackEface(byte(0)).Type + +func Quote(e []byte, s string, double bool) []byte { + if len(s) == 0 { + if double { + return append(e, `"\"\""`...) + } + return append(e, `""`...) + } + + b := e + ss := len(e) + e = append(e, '"') + start := 0 + + for i := 0; i < len(s); { + if b := s[i]; b < utf8.RuneSelf { + if rt.SafeSet[b] { + i++ + continue + } + if start < i { + e = append(e, s[start:i]...) + } + e = append(e, '\\') + switch b { + case '\\', '"': + e = append(e, b) + case '\n': + e = append(e, 'n') + case '\r': + e = append(e, 'r') + case '\t': + e = append(e, 't') + default: + // This encodes bytes < 0x20 except for \t, \n and \r. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. + e = append(e, `u00`...) + e = append(e, rt.Hex[b>>4]) + e = append(e, rt.Hex[b&0xF]) + } + i++ + start = i + continue + } + c, size := utf8.DecodeRuneInString(s[i:]) + // if correct && c == utf8.RuneError && size == 1 { + // if start < i { + // e = append(e, s[start:i]...) + // } + // e = append(e, `\ufffd`...) + // i += size + // start = i + // continue + // } + if c == '\u2028' || c == '\u2029' { + if start < i { + e = append(e, s[start:i]...) + } + e = append(e, `\u202`...) + e = append(e, rt.Hex[c&0xF]) + i += size + start = i + continue + } + i += size + } + + if start < len(s) { + e = append(e, s[start:]...) + } + e = append(e, '"') + + if double { + return strconv.AppendQuote(b, string(e[ss:])) + } else { + return e + } +} + +func HtmlEscape(dst []byte, src []byte) []byte { + buf := bytes.NewBuffer(dst) + json.HTMLEscape(buf, src) + return buf.Bytes() +} + +func F64toa(buf []byte, v float64) ([]byte) { + bs := bytes.NewBuffer(buf) + _ = json.NewEncoder(bs).Encode(v) + return bs.Bytes() +} + +func F32toa(buf []byte, v float32) ([]byte) { + bs := bytes.NewBuffer(buf) + _ = json.NewEncoder(bs).Encode(v) + return bs.Bytes() +} + +func I64toa(buf []byte, v int64) ([]byte) { + return strconv.AppendInt(buf, int64(v), 10) +} + +func U64toa(buf []byte, v uint64) ([]byte) { + return strconv.AppendUint(buf, v, 10) +} diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/asm_stubs_amd64_go116.go b/vendor/github.com/bytedance/sonic/internal/encoder/asm_stubs_amd64_go116.go deleted file mode 100644 index 0a99f30a..00000000 --- a/vendor/github.com/bytedance/sonic/internal/encoder/asm_stubs_amd64_go116.go +++ /dev/null @@ -1,51 +0,0 @@ -// +build go1.16,!go1.17 - -// Copyright 2023 CloudWeGo Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package encoder - -import ( - `strconv` - - `github.com/bytedance/sonic/internal/jit` - `github.com/twitchyliquid64/golang-asm/obj` - `github.com/twitchyliquid64/golang-asm/obj/x86` -) - -var ( - _V_writeBarrier = jit.Imm(int64(_runtime_writeBarrier)) - - _F_gcWriteBarrierAX = jit.Func(gcWriteBarrierAX) -) - -func (self *_Assembler) WritePtr(i int, ptr obj.Addr, rec obj.Addr) { - if rec.Reg == x86.REG_AX || rec.Index == x86.REG_AX { - panic("rec contains AX!") - } - self.Emit("MOVQ", _V_writeBarrier, _R10) - self.Emit("CMPL", jit.Ptr(_R10, 0), jit.Imm(0)) - self.Sjmp("JE", "_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Emit("MOVQ", ptr, _AX) - self.xsave(_DI) - self.Emit("LEAQ", rec, _DI) - self.Emit("MOVQ", _F_gcWriteBarrierAX, _R10) // MOVQ ${fn}, AX - self.Rjmp("CALL", _R10) - self.xload(_DI) - self.Sjmp("JMP", "_end_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Link("_no_writeBarrier" + strconv.Itoa(i) + "_{n}") - self.Emit("MOVQ", ptr, rec) - self.Link("_end_writeBarrier" + strconv.Itoa(i) + "_{n}") -} - diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/assembler_regabi_amd64.go b/vendor/github.com/bytedance/sonic/internal/encoder/assembler_regabi_amd64.go deleted file mode 100644 index a89364b1..00000000 --- a/vendor/github.com/bytedance/sonic/internal/encoder/assembler_regabi_amd64.go +++ /dev/null @@ -1,1177 +0,0 @@ -//go:build go1.17 && !go1.22 -// +build go1.17,!go1.22 - -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package encoder - -import ( - `fmt` - `reflect` - `strconv` - `unsafe` - - `github.com/bytedance/sonic/internal/cpu` - `github.com/bytedance/sonic/internal/jit` - `github.com/bytedance/sonic/internal/native/types` - `github.com/twitchyliquid64/golang-asm/obj` - `github.com/twitchyliquid64/golang-asm/obj/x86` - - `github.com/bytedance/sonic/internal/native` - `github.com/bytedance/sonic/internal/rt` -) - -/** Register Allocations - * - * State Registers: - * - * %rbx : stack base - * %rdi : result pointer - * %rsi : result length - * %rdx : result capacity - * %r12 : sp->p - * %r13 : sp->q - * %r14 : sp->x - * %r15 : sp->f - * - * Error Registers: - * - * %r10 : error type register - * %r11 : error pointer register - */ - -/** Function Prototype & Stack Map - * - * func (buf *[]byte, p unsafe.Pointer, sb *_Stack, fv uint64) (err error) - * - * buf : (FP) - * p : 8(FP) - * sb : 16(FP) - * fv : 24(FP) - * err.vt : 32(FP) - * err.vp : 40(FP) - */ - -const ( - _S_cond = iota - _S_init -) - -const ( - _FP_args = 32 // 32 bytes for spill registers of arguments - _FP_fargs = 40 // 40 bytes for passing arguments to other Go functions - _FP_saves = 64 // 64 bytes for saving the registers before CALL instructions - _FP_locals = 24 // 24 bytes for local variables -) - -const ( - _FP_loffs = _FP_fargs + _FP_saves - _FP_offs = _FP_loffs + _FP_locals - // _FP_offs = _FP_loffs + _FP_locals + _FP_debug - _FP_size = _FP_offs + 8 // 8 bytes for the parent frame pointer - _FP_base = _FP_size + 8 // 8 bytes for the return address -) - -const ( - _FM_exp32 = 0x7f800000 - _FM_exp64 = 0x7ff0000000000000 -) - -const ( - _IM_null = 0x6c6c756e // 'null' - _IM_true = 0x65757274 // 'true' - _IM_fals = 0x736c6166 // 'fals' ('false' without the 'e') - _IM_open = 0x00225c22 // '"\"∅' - _IM_array = 0x5d5b // '[]' - _IM_object = 0x7d7b // '{}' - _IM_mulv = -0x5555555555555555 -) - -const ( - _LB_more_space = "_more_space" - _LB_more_space_return = "_more_space_return_" -) - -const ( - _LB_error = "_error" - _LB_error_too_deep = "_error_too_deep" - _LB_error_invalid_number = "_error_invalid_number" - _LB_error_nan_or_infinite = "_error_nan_or_infinite" - _LB_panic = "_panic" -) - -var ( - _AX = jit.Reg("AX") - _BX = jit.Reg("BX") - _CX = jit.Reg("CX") - _DX = jit.Reg("DX") - _DI = jit.Reg("DI") - _SI = jit.Reg("SI") - _BP = jit.Reg("BP") - _SP = jit.Reg("SP") - _R8 = jit.Reg("R8") - _R9 = jit.Reg("R9") -) - -var ( - _X0 = jit.Reg("X0") - _Y0 = jit.Reg("Y0") -) - -var ( - _ST = jit.Reg("R15") // can't use R14 since it's always scratched by Go... - _RP = jit.Reg("DI") - _RL = jit.Reg("SI") - _RC = jit.Reg("DX") -) - -var ( - _LR = jit.Reg("R9") - _ET = jit.Reg("AX") - _EP = jit.Reg("BX") -) - -var ( - _SP_p = jit.Reg("R10") // saved on BX when call_c - _SP_q = jit.Reg("R11") // saved on BP when call_c - _SP_x = jit.Reg("R12") - _SP_f = jit.Reg("R13") -) - -var ( - _ARG_rb = jit.Ptr(_SP, _FP_base) - _ARG_vp = jit.Ptr(_SP, _FP_base + 8) - _ARG_sb = jit.Ptr(_SP, _FP_base + 16) - _ARG_fv = jit.Ptr(_SP, _FP_base + 24) -) - -var ( - _RET_et = _ET - _RET_ep = _EP -) - -var ( - _VAR_sp = jit.Ptr(_SP, _FP_fargs + _FP_saves) - _VAR_dn = jit.Ptr(_SP, _FP_fargs + _FP_saves + 8) - _VAR_vp = jit.Ptr(_SP, _FP_fargs + _FP_saves + 16) -) - -var ( - _REG_ffi = []obj.Addr{ _RP, _RL, _RC} - _REG_b64 = []obj.Addr{_SP_p, _SP_q} - - _REG_all = []obj.Addr{_ST, _SP_x, _SP_f, _SP_p, _SP_q, _RP, _RL, _RC} - _REG_ms = []obj.Addr{_ST, _SP_x, _SP_f, _SP_p, _SP_q, _LR} - _REG_enc = []obj.Addr{_ST, _SP_x, _SP_f, _SP_p, _SP_q, _RL} -) - -type _Assembler struct { - jit.BaseAssembler - p _Program - x int - name string -} - -func newAssembler(p _Program) *_Assembler { - return new(_Assembler).Init(p) -} - -/** Assembler Interface **/ - -func (self *_Assembler) Load() _Encoder { - return ptoenc(self.BaseAssembler.Load("encode_"+self.name, _FP_size, _FP_args, argPtrs, localPtrs)) -} - -func (self *_Assembler) Init(p _Program) *_Assembler { - self.p = p - self.BaseAssembler.Init(self.compile) - return self -} - -func (self *_Assembler) compile() { - self.prologue() - self.instrs() - self.epilogue() - self.builtins() -} - -/** Assembler Stages **/ - -var _OpFuncTab = [256]func(*_Assembler, *_Instr) { - _OP_null : (*_Assembler)._asm_OP_null, - _OP_empty_arr : (*_Assembler)._asm_OP_empty_arr, - _OP_empty_obj : (*_Assembler)._asm_OP_empty_obj, - _OP_bool : (*_Assembler)._asm_OP_bool, - _OP_i8 : (*_Assembler)._asm_OP_i8, - _OP_i16 : (*_Assembler)._asm_OP_i16, - _OP_i32 : (*_Assembler)._asm_OP_i32, - _OP_i64 : (*_Assembler)._asm_OP_i64, - _OP_u8 : (*_Assembler)._asm_OP_u8, - _OP_u16 : (*_Assembler)._asm_OP_u16, - _OP_u32 : (*_Assembler)._asm_OP_u32, - _OP_u64 : (*_Assembler)._asm_OP_u64, - _OP_f32 : (*_Assembler)._asm_OP_f32, - _OP_f64 : (*_Assembler)._asm_OP_f64, - _OP_str : (*_Assembler)._asm_OP_str, - _OP_bin : (*_Assembler)._asm_OP_bin, - _OP_quote : (*_Assembler)._asm_OP_quote, - _OP_number : (*_Assembler)._asm_OP_number, - _OP_eface : (*_Assembler)._asm_OP_eface, - _OP_iface : (*_Assembler)._asm_OP_iface, - _OP_byte : (*_Assembler)._asm_OP_byte, - _OP_text : (*_Assembler)._asm_OP_text, - _OP_deref : (*_Assembler)._asm_OP_deref, - _OP_index : (*_Assembler)._asm_OP_index, - _OP_load : (*_Assembler)._asm_OP_load, - _OP_save : (*_Assembler)._asm_OP_save, - _OP_drop : (*_Assembler)._asm_OP_drop, - _OP_drop_2 : (*_Assembler)._asm_OP_drop_2, - _OP_recurse : (*_Assembler)._asm_OP_recurse, - _OP_is_nil : (*_Assembler)._asm_OP_is_nil, - _OP_is_nil_p1 : (*_Assembler)._asm_OP_is_nil_p1, - _OP_is_zero_1 : (*_Assembler)._asm_OP_is_zero_1, - _OP_is_zero_2 : (*_Assembler)._asm_OP_is_zero_2, - _OP_is_zero_4 : (*_Assembler)._asm_OP_is_zero_4, - _OP_is_zero_8 : (*_Assembler)._asm_OP_is_zero_8, - _OP_is_zero_map : (*_Assembler)._asm_OP_is_zero_map, - _OP_goto : (*_Assembler)._asm_OP_goto, - _OP_map_iter : (*_Assembler)._asm_OP_map_iter, - _OP_map_stop : (*_Assembler)._asm_OP_map_stop, - _OP_map_check_key : (*_Assembler)._asm_OP_map_check_key, - _OP_map_write_key : (*_Assembler)._asm_OP_map_write_key, - _OP_map_value_next : (*_Assembler)._asm_OP_map_value_next, - _OP_slice_len : (*_Assembler)._asm_OP_slice_len, - _OP_slice_next : (*_Assembler)._asm_OP_slice_next, - _OP_marshal : (*_Assembler)._asm_OP_marshal, - _OP_marshal_p : (*_Assembler)._asm_OP_marshal_p, - _OP_marshal_text : (*_Assembler)._asm_OP_marshal_text, - _OP_marshal_text_p : (*_Assembler)._asm_OP_marshal_text_p, - _OP_cond_set : (*_Assembler)._asm_OP_cond_set, - _OP_cond_testc : (*_Assembler)._asm_OP_cond_testc, -} - -func (self *_Assembler) instr(v *_Instr) { - if fn := _OpFuncTab[v.op()]; fn != nil { - fn(self, v) - } else { - panic(fmt.Sprintf("invalid opcode: %d", v.op())) - } -} - -func (self *_Assembler) instrs() { - for i, v := range self.p { - self.Mark(i) - self.instr(&v) - self.debug_instr(i, &v) - } -} - -func (self *_Assembler) builtins() { - self.more_space() - self.error_too_deep() - self.error_invalid_number() - self.error_nan_or_infinite() - self.go_panic() -} - -func (self *_Assembler) epilogue() { - self.Mark(len(self.p)) - self.Emit("XORL", _ET, _ET) - self.Emit("XORL", _EP, _EP) - self.Link(_LB_error) - self.Emit("MOVQ", _ARG_rb, _CX) // MOVQ rb<>+0(FP), CX - self.Emit("MOVQ", _RL, jit.Ptr(_CX, 8)) // MOVQ RL, 8(CX) - self.Emit("MOVQ", jit.Imm(0), _ARG_rb) // MOVQ AX, rb<>+0(FP) - self.Emit("MOVQ", jit.Imm(0), _ARG_vp) // MOVQ BX, vp<>+8(FP) - self.Emit("MOVQ", jit.Imm(0), _ARG_sb) // MOVQ CX, sb<>+16(FP) - self.Emit("MOVQ", jit.Ptr(_SP, _FP_offs), _BP) // MOVQ _FP_offs(SP), BP - self.Emit("ADDQ", jit.Imm(_FP_size), _SP) // ADDQ $_FP_size, SP - self.Emit("RET") // RET -} - -func (self *_Assembler) prologue() { - self.Emit("SUBQ", jit.Imm(_FP_size), _SP) // SUBQ $_FP_size, SP - self.Emit("MOVQ", _BP, jit.Ptr(_SP, _FP_offs)) // MOVQ BP, _FP_offs(SP) - self.Emit("LEAQ", jit.Ptr(_SP, _FP_offs), _BP) // LEAQ _FP_offs(SP), BP - self.Emit("MOVQ", _AX, _ARG_rb) // MOVQ AX, rb<>+0(FP) - self.Emit("MOVQ", _BX, _ARG_vp) // MOVQ BX, vp<>+8(FP) - self.Emit("MOVQ", _CX, _ARG_sb) // MOVQ CX, sb<>+16(FP) - self.Emit("MOVQ", _DI, _ARG_fv) // MOVQ DI, rb<>+24(FP) - self.Emit("MOVQ", jit.Ptr(_AX, 0), _RP) // MOVQ (AX) , DI - self.Emit("MOVQ", jit.Ptr(_AX, 8), _RL) // MOVQ 8(AX) , SI - self.Emit("MOVQ", jit.Ptr(_AX, 16), _RC) // MOVQ 16(AX), DX - self.Emit("MOVQ", _BX, _SP_p) // MOVQ BX, R10 - self.Emit("MOVQ", _CX, _ST) // MOVQ CX, R8 - self.Emit("XORL", _SP_x, _SP_x) // XORL R10, R12 - self.Emit("XORL", _SP_f, _SP_f) // XORL R11, R13 - self.Emit("XORL", _SP_q, _SP_q) // XORL R13, R11 -} - -/** Assembler Inline Functions **/ - -func (self *_Assembler) xsave(reg ...obj.Addr) { - for i, v := range reg { - if i > _FP_saves / 8 - 1 { - panic("too many registers to save") - } else { - self.Emit("MOVQ", v, jit.Ptr(_SP, _FP_fargs + int64(i) * 8)) - } - } -} - -func (self *_Assembler) xload(reg ...obj.Addr) { - for i, v := range reg { - if i > _FP_saves / 8 - 1 { - panic("too many registers to load") - } else { - self.Emit("MOVQ", jit.Ptr(_SP, _FP_fargs + int64(i) * 8), v) - } - } -} - -func (self *_Assembler) rbuf_di() { - if _RP.Reg != x86.REG_DI { - panic("register allocation messed up: RP != DI") - } else { - self.Emit("ADDQ", _RL, _RP) - } -} - -func (self *_Assembler) store_int(nd int, fn obj.Addr, ins string) { - self.check_size(nd) - self.save_c() // SAVE $C_regs - self.rbuf_di() // MOVQ RP, DI - self.Emit(ins, jit.Ptr(_SP_p, 0), _SI) // $ins (SP.p), SI - self.call_c(fn) // CALL_C $fn - self.Emit("ADDQ", _AX, _RL) // ADDQ AX, RL -} - -func (self *_Assembler) store_str(s string) { - i := 0 - m := rt.Str2Mem(s) - - /* 8-byte stores */ - for i <= len(m) - 8 { - self.Emit("MOVQ", jit.Imm(rt.Get64(m[i:])), _AX) // MOVQ $s[i:], AX - self.Emit("MOVQ", _AX, jit.Sib(_RP, _RL, 1, int64(i))) // MOVQ AX, i(RP)(RL) - i += 8 - } - - /* 4-byte stores */ - if i <= len(m) - 4 { - self.Emit("MOVL", jit.Imm(int64(rt.Get32(m[i:]))), jit.Sib(_RP, _RL, 1, int64(i))) // MOVL $s[i:], i(RP)(RL) - i += 4 - } - - /* 2-byte stores */ - if i <= len(m) - 2 { - self.Emit("MOVW", jit.Imm(int64(rt.Get16(m[i:]))), jit.Sib(_RP, _RL, 1, int64(i))) // MOVW $s[i:], i(RP)(RL) - i += 2 - } - - /* last byte */ - if i < len(m) { - self.Emit("MOVB", jit.Imm(int64(m[i])), jit.Sib(_RP, _RL, 1, int64(i))) // MOVB $s[i:], i(RP)(RL) - } -} - -func (self *_Assembler) check_size(n int) { - self.check_size_rl(jit.Ptr(_RL, int64(n))) -} - -func (self *_Assembler) check_size_r(r obj.Addr, d int) { - self.check_size_rl(jit.Sib(_RL, r, 1, int64(d))) -} - -func (self *_Assembler) check_size_rl(v obj.Addr) { - idx := self.x - key := _LB_more_space_return + strconv.Itoa(idx) - - /* the following code relies on LR == R9 to work */ - if _LR.Reg != x86.REG_R9 { - panic("register allocation messed up: LR != R9") - } - - /* check for buffer capacity */ - self.x++ - self.Emit("LEAQ", v, _AX) // LEAQ $v, AX - self.Emit("CMPQ", _AX, _RC) // CMPQ AX, RC - self.Sjmp("JBE" , key) // JBE _more_space_return_{n} - self.slice_grow_ax(key) // GROW $key - self.Link(key) // _more_space_return_{n}: -} - -func (self *_Assembler) slice_grow_ax(ret string) { - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ ?(PC), R9 - self.Sref(ret, 4) // .... &ret - self.Sjmp("JMP" , _LB_more_space) // JMP _more_space -} - -/** State Stack Helpers **/ - -const ( - _StateSize = int64(unsafe.Sizeof(_State{})) - _StackLimit = _MaxStack * _StateSize -) - -func (self *_Assembler) save_state() { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _CX) // MOVQ (ST), CX - self.Emit("LEAQ", jit.Ptr(_CX, _StateSize), _R9) // LEAQ _StateSize(CX), R9 - self.Emit("CMPQ", _R9, jit.Imm(_StackLimit)) // CMPQ R9, $_StackLimit - self.Sjmp("JAE" , _LB_error_too_deep) // JA _error_too_deep - self.Emit("MOVQ", _SP_x, jit.Sib(_ST, _CX, 1, 8)) // MOVQ SP.x, 8(ST)(CX) - self.Emit("MOVQ", _SP_f, jit.Sib(_ST, _CX, 1, 16)) // MOVQ SP.f, 16(ST)(CX) - self.WritePtr(0, _SP_p, jit.Sib(_ST, _CX, 1, 24)) // MOVQ SP.p, 24(ST)(CX) - self.WritePtr(1, _SP_q, jit.Sib(_ST, _CX, 1, 32)) // MOVQ SP.q, 32(ST)(CX) - self.Emit("MOVQ", _R9, jit.Ptr(_ST, 0)) // MOVQ R9, (ST) -} - -func (self *_Assembler) drop_state(decr int64) { - self.Emit("MOVQ" , jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("SUBQ" , jit.Imm(decr), _AX) // SUBQ $decr, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_ST, 0)) // MOVQ AX, (ST) - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 8), _SP_x) // MOVQ 8(ST)(AX), SP.x - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 16), _SP_f) // MOVQ 16(ST)(AX), SP.f - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 24), _SP_p) // MOVQ 24(ST)(AX), SP.p - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 32), _SP_q) // MOVQ 32(ST)(AX), SP.q - self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 - self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 8)) // MOVOU X0, 8(ST)(AX) - self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 24)) // MOVOU X0, 24(ST)(AX) -} - -/** Buffer Helpers **/ - -func (self *_Assembler) add_char(ch byte) { - self.Emit("MOVB", jit.Imm(int64(ch)), jit.Sib(_RP, _RL, 1, 0)) // MOVB $ch, (RP)(RL) - self.Emit("ADDQ", jit.Imm(1), _RL) // ADDQ $1, RL -} - -func (self *_Assembler) add_long(ch uint32, n int64) { - self.Emit("MOVL", jit.Imm(int64(ch)), jit.Sib(_RP, _RL, 1, 0)) // MOVL $ch, (RP)(RL) - self.Emit("ADDQ", jit.Imm(n), _RL) // ADDQ $n, RL -} - -func (self *_Assembler) add_text(ss string) { - self.store_str(ss) // TEXT $ss - self.Emit("ADDQ", jit.Imm(int64(len(ss))), _RL) // ADDQ ${len(ss)}, RL -} - -// get *buf at AX -func (self *_Assembler) prep_buffer_AX() { - self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX - self.Emit("MOVQ", _RL, jit.Ptr(_AX, 8)) // MOVQ RL, 8(AX) -} - -func (self *_Assembler) save_buffer() { - self.Emit("MOVQ", _ARG_rb, _CX) // MOVQ rb<>+0(FP), CX - self.Emit("MOVQ", _RP, jit.Ptr(_CX, 0)) // MOVQ RP, (CX) - self.Emit("MOVQ", _RL, jit.Ptr(_CX, 8)) // MOVQ RL, 8(CX) - self.Emit("MOVQ", _RC, jit.Ptr(_CX, 16)) // MOVQ RC, 16(CX) -} - -// get *buf at AX -func (self *_Assembler) load_buffer_AX() { - self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX - self.Emit("MOVQ", jit.Ptr(_AX, 0), _RP) // MOVQ (AX), RP - self.Emit("MOVQ", jit.Ptr(_AX, 8), _RL) // MOVQ 8(AX), RL - self.Emit("MOVQ", jit.Ptr(_AX, 16), _RC) // MOVQ 16(AX), RC -} - -/** Function Interface Helpers **/ - -func (self *_Assembler) call(pc obj.Addr) { - self.Emit("MOVQ", pc, _LR) // MOVQ $pc, AX - self.Rjmp("CALL", _LR) // CALL AX -} - -func (self *_Assembler) save_c() { - self.xsave(_REG_ffi...) // SAVE $REG_ffi -} - -func (self *_Assembler) call_b64(pc obj.Addr) { - self.xsave(_REG_b64...) // SAVE $REG_all - self.call(pc) // CALL $pc - self.xload(_REG_b64...) // LOAD $REG_ffi -} - -func (self *_Assembler) call_c(pc obj.Addr) { - self.Emit("XCHGQ", _SP_p, _BX) - self.Emit("XCHGQ", _SP_q, _BP) - self.call(pc) // CALL $pc - self.xload(_REG_ffi...) // LOAD $REG_ffi - self.Emit("XCHGQ", _SP_p, _BX) - self.Emit("XCHGQ", _SP_q, _BP) -} - -func (self *_Assembler) call_go(pc obj.Addr) { - self.xsave(_REG_all...) // SAVE $REG_all - self.call(pc) // CALL $pc - self.xload(_REG_all...) // LOAD $REG_all -} - -func (self *_Assembler) call_more_space(pc obj.Addr) { - self.xsave(_REG_ms...) // SAVE $REG_all - self.call(pc) // CALL $pc - self.xload(_REG_ms...) // LOAD $REG_all -} - -func (self *_Assembler) call_encoder(pc obj.Addr) { - self.xsave(_REG_enc...) // SAVE $REG_all - self.call(pc) // CALL $pc - self.xload(_REG_enc...) // LOAD $REG_all -} - -func (self *_Assembler) call_marshaler(fn obj.Addr, it *rt.GoType, vt reflect.Type) { - switch vt.Kind() { - case reflect.Interface : self.call_marshaler_i(fn, it) - case reflect.Ptr, reflect.Map : self.call_marshaler_v(fn, it, vt, true) - // struct/array of 1 direct iface type can be direct - default : self.call_marshaler_v(fn, it, vt, !rt.UnpackType(vt).Indirect()) - } -} - -func (self *_Assembler) call_marshaler_i(fn obj.Addr, it *rt.GoType) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JZ" , "_null_{n}") // JZ _null_{n} - self.Emit("MOVQ" , _AX, _BX) // MOVQ AX, BX - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _CX) // MOVQ 8(SP.p), CX - self.Emit("MOVQ" , jit.Gtype(it), _AX) // MOVQ $it, AX - self.call_go(_F_assertI2I) // CALL_GO assertI2I - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JZ" , "_null_{n}") // JZ _null_{n} - self.Emit("MOVQ", _BX, _CX) // MOVQ BX, CX - self.Emit("MOVQ", _AX, _BX) // MOVQ AX, BX - self.prep_buffer_AX() - self.Emit("MOVQ", _ARG_fv, _DI) // MOVQ ARG.fv, DI - self.call_go(fn) // CALL $fn - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error - self.load_buffer_AX() - self.Sjmp("JMP" , "_done_{n}") // JMP _done_{n} - self.Link("_null_{n}") // _null_{n}: - self.check_size(4) // SIZE $4 - self.Emit("MOVL", jit.Imm(_IM_null), jit.Sib(_RP, _RL, 1, 0)) // MOVL $'null', (RP)(RL*1) - self.Emit("ADDQ", jit.Imm(4), _RL) // ADDQ $4, RL - self.Link("_done_{n}") // _done_{n}: -} - -func (self *_Assembler) call_marshaler_v(fn obj.Addr, it *rt.GoType, vt reflect.Type, deref bool) { - self.prep_buffer_AX() // MOVE {buf}, (SP) - self.Emit("MOVQ", jit.Itab(it, vt), _BX) // MOVQ $(itab(it, vt)), BX - - /* dereference the pointer if needed */ - if !deref { - self.Emit("MOVQ", _SP_p, _CX) // MOVQ SP.p, CX - } else { - self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _CX) // MOVQ 0(SP.p), CX - } - - /* call the encoder, and perform error checks */ - self.Emit("MOVQ", _ARG_fv, _DI) // MOVQ ARG.fv, DI - self.call_go(fn) // CALL $fn - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error - self.load_buffer_AX() -} - -/** Builtin: _more_space **/ - -var ( - _T_byte = jit.Type(byteType) - _F_growslice = jit.Func(growslice) -) - -// AX must saving n -func (self *_Assembler) more_space() { - self.Link(_LB_more_space) - self.Emit("MOVQ", _RP, _BX) // MOVQ DI, BX - self.Emit("MOVQ", _RL, _CX) // MOVQ SI, CX - self.Emit("MOVQ", _RC, _DI) // MOVQ DX, DI - self.Emit("MOVQ", _AX, _SI) // MOVQ AX, SI - self.Emit("MOVQ", _T_byte, _AX) // MOVQ $_T_byte, AX - self.call_more_space(_F_growslice) // CALL $pc - self.Emit("MOVQ", _AX, _RP) // MOVQ AX, DI - self.Emit("MOVQ", _BX, _RL) // MOVQ BX, SI - self.Emit("MOVQ", _CX, _RC) // MOVQ CX, DX - self.save_buffer() // SAVE {buf} - self.Rjmp("JMP" , _LR) // JMP LR -} - -/** Builtin Errors **/ - -var ( - _V_ERR_too_deep = jit.Imm(int64(uintptr(unsafe.Pointer(_ERR_too_deep)))) - _V_ERR_nan_or_infinite = jit.Imm(int64(uintptr(unsafe.Pointer(_ERR_nan_or_infinite)))) - _I_json_UnsupportedValueError = jit.Itab(rt.UnpackType(errorType), jsonUnsupportedValueType) -) - -func (self *_Assembler) error_too_deep() { - self.Link(_LB_error_too_deep) - self.Emit("MOVQ", _V_ERR_too_deep, _EP) // MOVQ $_V_ERR_too_deep, EP - self.Emit("MOVQ", _I_json_UnsupportedValueError, _ET) // MOVQ $_I_json_UnsupportedValuError, ET - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) error_invalid_number() { - self.Link(_LB_error_invalid_number) - self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _AX) // MOVQ 0(SP), AX - self.Emit("MOVQ", jit.Ptr(_SP_p, 8), _BX) // MOVQ 8(SP), BX - self.call_go(_F_error_number) // CALL_GO error_number - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) error_nan_or_infinite() { - self.Link(_LB_error_nan_or_infinite) - self.Emit("MOVQ", _V_ERR_nan_or_infinite, _EP) // MOVQ $_V_ERR_nan_or_infinite, EP - self.Emit("MOVQ", _I_json_UnsupportedValueError, _ET) // MOVQ $_I_json_UnsupportedValuError, ET - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -/** String Encoding Routine **/ - -var ( - _F_quote = jit.Imm(int64(native.S_quote)) - _F_panic = jit.Func(goPanic) -) - -func (self *_Assembler) go_panic() { - self.Link(_LB_panic) - self.Emit("MOVQ", _SP_p, _BX) - self.call_go(_F_panic) -} - -func (self *_Assembler) encode_string(doubleQuote bool) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JZ" , "_str_empty_{n}") // JZ _str_empty_{n} - self.Emit("CMPQ", jit.Ptr(_SP_p, 0), jit.Imm(0)) - self.Sjmp("JNE" , "_str_next_{n}") - self.Emit("MOVQ", jit.Imm(int64(panicNilPointerOfNonEmptyString)), _AX) - self.Sjmp("JMP", _LB_panic) - self.Link("_str_next_{n}") - - /* openning quote, check for double quote */ - if !doubleQuote { - self.check_size_r(_AX, 2) // SIZE $2 - self.add_char('"') // CHAR $'"' - } else { - self.check_size_r(_AX, 6) // SIZE $6 - self.add_long(_IM_open, 3) // TEXT $`"\"` - } - - /* quoting loop */ - self.Emit("XORL", _AX, _AX) // XORL AX, AX - self.Emit("MOVQ", _AX, _VAR_sp) // MOVQ AX, sp - self.Link("_str_loop_{n}") // _str_loop_{n}: - self.save_c() // SAVE $REG_ffi - - /* load the output buffer first, and then input buffer, - * because the parameter registers collide with RP / RL / RC */ - self.Emit("MOVQ", _RC, _CX) // MOVQ RC, CX - self.Emit("SUBQ", _RL, _CX) // SUBQ RL, CX - self.Emit("MOVQ", _CX, _VAR_dn) // MOVQ CX, dn - self.Emit("LEAQ", jit.Sib(_RP, _RL, 1, 0), _DX) // LEAQ (RP)(RL), DX - self.Emit("LEAQ", _VAR_dn, _CX) // LEAQ dn, CX - self.Emit("MOVQ", _VAR_sp, _AX) // MOVQ sp, AX - self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _DI) // MOVQ (SP.p), DI - self.Emit("MOVQ", jit.Ptr(_SP_p, 8), _SI) // MOVQ 8(SP.p), SI - self.Emit("ADDQ", _AX, _DI) // ADDQ AX, DI - self.Emit("SUBQ", _AX, _SI) // SUBQ AX, SI - - /* set the flags based on `doubleQuote` */ - if !doubleQuote { - self.Emit("XORL", _R8, _R8) // XORL R8, R8 - } else { - self.Emit("MOVL", jit.Imm(types.F_DOUBLE_UNQUOTE), _R8) // MOVL ${types.F_DOUBLE_UNQUOTE}, R8 - } - - /* call the native quoter */ - self.call_c(_F_quote) // CALL quote - self.Emit("ADDQ" , _VAR_dn, _RL) // ADDQ dn, RL - - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , "_str_space_{n}") // JS _str_space_{n} - - /* close the string, check for double quote */ - if !doubleQuote { - self.check_size(1) // SIZE $1 - self.add_char('"') // CHAR $'"' - self.Sjmp("JMP", "_str_end_{n}") // JMP _str_end_{n} - } else { - self.check_size(3) // SIZE $3 - self.add_text("\\\"\"") // TEXT $'\""' - self.Sjmp("JMP", "_str_end_{n}") // JMP _str_end_{n} - } - - /* not enough space to contain the quoted string */ - self.Link("_str_space_{n}") // _str_space_{n}: - self.Emit("NOTQ", _AX) // NOTQ AX - self.Emit("ADDQ", _AX, _VAR_sp) // ADDQ AX, sp - self.Emit("LEAQ", jit.Sib(_RC, _RC, 1, 0), _AX) // LEAQ (RC)(RC), AX - self.slice_grow_ax("_str_loop_{n}") // GROW _str_loop_{n} - - /* empty string, check for double quote */ - if !doubleQuote { - self.Link("_str_empty_{n}") // _str_empty_{n}: - self.check_size(2) // SIZE $2 - self.add_text("\"\"") // TEXT $'""' - self.Link("_str_end_{n}") // _str_end_{n}: - } else { - self.Link("_str_empty_{n}") // _str_empty_{n}: - self.check_size(6) // SIZE $6 - self.add_text("\"\\\"\\\"\"") // TEXT $'"\"\""' - self.Link("_str_end_{n}") // _str_end_{n}: - } -} - -/** OpCode Assembler Functions **/ - -var ( - _T_json_Marshaler = rt.UnpackType(jsonMarshalerType) - _T_encoding_TextMarshaler = rt.UnpackType(encodingTextMarshalerType) -) - -var ( - _F_f64toa = jit.Imm(int64(native.S_f64toa)) - _F_f32toa = jit.Imm(int64(native.S_f32toa)) - _F_i64toa = jit.Imm(int64(native.S_i64toa)) - _F_u64toa = jit.Imm(int64(native.S_u64toa)) - _F_b64encode = jit.Imm(int64(_subr__b64encode)) -) - -var ( - _F_memmove = jit.Func(memmove) - _F_error_number = jit.Func(error_number) - _F_isValidNumber = jit.Func(isValidNumber) -) - -var ( - _F_iteratorStop = jit.Func(iteratorStop) - _F_iteratorNext = jit.Func(iteratorNext) - _F_iteratorStart = jit.Func(iteratorStart) -) - -var ( - _F_encodeTypedPointer obj.Addr - _F_encodeJsonMarshaler obj.Addr - _F_encodeTextMarshaler obj.Addr -) - -const ( - _MODE_AVX2 = 1 << 2 -) - -func init() { - _F_encodeTypedPointer = jit.Func(encodeTypedPointer) - _F_encodeJsonMarshaler = jit.Func(encodeJsonMarshaler) - _F_encodeTextMarshaler = jit.Func(encodeTextMarshaler) -} - -func (self *_Assembler) _asm_OP_null(_ *_Instr) { - self.check_size(4) - self.Emit("MOVL", jit.Imm(_IM_null), jit.Sib(_RP, _RL, 1, 0)) // MOVL $'null', (RP)(RL*1) - self.Emit("ADDQ", jit.Imm(4), _RL) // ADDQ $4, RL -} - -func (self *_Assembler) _asm_OP_empty_arr(_ *_Instr) { - self.Emit("BTQ", jit.Imm(int64(bitNoNullSliceOrMap)), _ARG_fv) - self.Sjmp("JC", "_empty_arr_{n}") - self._asm_OP_null(nil) - self.Sjmp("JMP", "_empty_arr_end_{n}") - self.Link("_empty_arr_{n}") - self.check_size(2) - self.Emit("MOVW", jit.Imm(_IM_array), jit.Sib(_RP, _RL, 1, 0)) - self.Emit("ADDQ", jit.Imm(2), _RL) - self.Link("_empty_arr_end_{n}") -} - -func (self *_Assembler) _asm_OP_empty_obj(_ *_Instr) { - self.Emit("BTQ", jit.Imm(int64(bitNoNullSliceOrMap)), _ARG_fv) - self.Sjmp("JC", "_empty_obj_{n}") - self._asm_OP_null(nil) - self.Sjmp("JMP", "_empty_obj_end_{n}") - self.Link("_empty_obj_{n}") - self.check_size(2) - self.Emit("MOVW", jit.Imm(_IM_object), jit.Sib(_RP, _RL, 1, 0)) - self.Emit("ADDQ", jit.Imm(2), _RL) - self.Link("_empty_obj_end_{n}") -} - -func (self *_Assembler) _asm_OP_bool(_ *_Instr) { - self.Emit("CMPB", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPB (SP.p), $0 - self.Sjmp("JE" , "_false_{n}") // JE _false_{n} - self.check_size(4) // SIZE $4 - self.Emit("MOVL", jit.Imm(_IM_true), jit.Sib(_RP, _RL, 1, 0)) // MOVL $'true', (RP)(RL*1) - self.Emit("ADDQ", jit.Imm(4), _RL) // ADDQ $4, RL - self.Sjmp("JMP" , "_end_{n}") // JMP _end_{n} - self.Link("_false_{n}") // _false_{n}: - self.check_size(5) // SIZE $5 - self.Emit("MOVL", jit.Imm(_IM_fals), jit.Sib(_RP, _RL, 1, 0)) // MOVL $'fals', (RP)(RL*1) - self.Emit("MOVB", jit.Imm('e'), jit.Sib(_RP, _RL, 1, 4)) // MOVB $'e', 4(RP)(RL*1) - self.Emit("ADDQ", jit.Imm(5), _RL) // ADDQ $5, RL - self.Link("_end_{n}") // _end_{n}: -} - -func (self *_Assembler) _asm_OP_i8(_ *_Instr) { - self.store_int(4, _F_i64toa, "MOVBQSX") -} - -func (self *_Assembler) _asm_OP_i16(_ *_Instr) { - self.store_int(6, _F_i64toa, "MOVWQSX") -} - -func (self *_Assembler) _asm_OP_i32(_ *_Instr) { - self.store_int(17, _F_i64toa, "MOVLQSX") -} - -func (self *_Assembler) _asm_OP_i64(_ *_Instr) { - self.store_int(21, _F_i64toa, "MOVQ") -} - -func (self *_Assembler) _asm_OP_u8(_ *_Instr) { - self.store_int(3, _F_u64toa, "MOVBQZX") -} - -func (self *_Assembler) _asm_OP_u16(_ *_Instr) { - self.store_int(5, _F_u64toa, "MOVWQZX") -} - -func (self *_Assembler) _asm_OP_u32(_ *_Instr) { - self.store_int(16, _F_u64toa, "MOVLQZX") -} - -func (self *_Assembler) _asm_OP_u64(_ *_Instr) { - self.store_int(20, _F_u64toa, "MOVQ") -} - -func (self *_Assembler) _asm_OP_f32(_ *_Instr) { - self.check_size(32) - self.Emit("MOVL" , jit.Ptr(_SP_p, 0), _AX) // MOVL (SP.p), AX - self.Emit("ANDL" , jit.Imm(_FM_exp32), _AX) // ANDL $_FM_exp32, AX - self.Emit("XORL" , jit.Imm(_FM_exp32), _AX) // XORL $_FM_exp32, AX - self.Sjmp("JZ" , _LB_error_nan_or_infinite) // JZ _error_nan_or_infinite - self.save_c() // SAVE $C_regs - self.rbuf_di() // MOVQ RP, DI - self.Emit("MOVSS" , jit.Ptr(_SP_p, 0), _X0) // MOVSS (SP.p), X0 - self.call_c(_F_f32toa) // CALL_C f64toa - self.Emit("ADDQ" , _AX, _RL) // ADDQ AX, RL -} - -func (self *_Assembler) _asm_OP_f64(_ *_Instr) { - self.check_size(32) - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("MOVQ" , jit.Imm(_FM_exp64), _CX) // MOVQ $_FM_exp64, CX - self.Emit("ANDQ" , _CX, _AX) // ANDQ CX, AX - self.Emit("XORQ" , _CX, _AX) // XORQ CX, AX - self.Sjmp("JZ" , _LB_error_nan_or_infinite) // JZ _error_nan_or_infinite - self.save_c() // SAVE $C_regs - self.rbuf_di() // MOVQ RP, DI - self.Emit("MOVSD" , jit.Ptr(_SP_p, 0), _X0) // MOVSD (SP.p), X0 - self.call_c(_F_f64toa) // CALL_C f64toa - self.Emit("ADDQ" , _AX, _RL) // ADDQ AX, RL -} - -func (self *_Assembler) _asm_OP_str(_ *_Instr) { - self.encode_string(false) -} - -func (self *_Assembler) _asm_OP_bin(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX - self.Emit("ADDQ", jit.Imm(2), _AX) // ADDQ $2, AX - self.Emit("MOVQ", jit.Imm(_IM_mulv), _CX) // MOVQ $_MF_mulv, CX - self.Emit("MOVQ", _DX, _BX) // MOVQ DX, BX - self.From("MULQ", _CX) // MULQ CX - self.Emit("LEAQ", jit.Sib(_DX, _DX, 1, 1), _AX) // LEAQ 1(DX)(DX), AX - self.Emit("ORQ" , jit.Imm(2), _AX) // ORQ $2, AX - self.Emit("MOVQ", _BX, _DX) // MOVQ BX, DX - self.check_size_r(_AX, 0) // SIZE AX - self.add_char('"') // CHAR $'"' - self.Emit("MOVQ", _ARG_rb, _DI) // MOVQ rb<>+0(FP), DI - self.Emit("MOVQ", _RL, jit.Ptr(_DI, 8)) // MOVQ SI, 8(DI) - self.Emit("MOVQ", _SP_p, _SI) // MOVQ SP.p, SI - - /* check for AVX2 support */ - if !cpu.HasAVX2 { - self.Emit("XORL", _DX, _DX) // XORL DX, DX - } else { - self.Emit("MOVL", jit.Imm(_MODE_AVX2), _DX) // MOVL $_MODE_AVX2, DX - } - - /* call the encoder */ - self.call_b64(_F_b64encode) // CALL b64encode - self.load_buffer_AX() // LOAD {buf} - self.add_char('"') // CHAR $'"' -} - -func (self *_Assembler) _asm_OP_quote(_ *_Instr) { - self.encode_string(true) -} - -func (self *_Assembler) _asm_OP_number(_ *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _BX) // MOVQ (SP.p), BX - self.Emit("TESTQ", _BX, _BX) // TESTQ BX, BX - self.Sjmp("JZ" , "_empty_{n}") - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JNZ" , "_number_next_{n}") - self.Emit("MOVQ", jit.Imm(int64(panicNilPointerOfNonEmptyString)), _AX) - self.Sjmp("JMP", _LB_panic) - self.Link("_number_next_{n}") - self.call_go(_F_isValidNumber) // CALL_GO isValidNumber - self.Emit("CMPB" , _AX, jit.Imm(0)) // CMPB AX, $0 - self.Sjmp("JE" , _LB_error_invalid_number) // JE _error_invalid_number - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _BX) // MOVQ (SP.p), BX - self.check_size_r(_BX, 0) // SIZE BX - self.Emit("LEAQ" , jit.Sib(_RP, _RL, 1, 0), _AX) // LEAQ (RP)(RL), AX - self.Emit("ADDQ" , jit.Ptr(_SP_p, 8), _RL) // ADDQ 8(SP.p), RL - self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _BX) // MOVOU (SP.p), BX - self.Emit("MOVQ", jit.Ptr(_SP_p, 8), _CX) // MOVOU X0, 8(SP) - self.call_go(_F_memmove) // CALL_GO memmove - self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX - self.Emit("MOVQ", _RL, jit.Ptr(_AX, 8)) // MOVQ RL, 8(AX) - self.Sjmp("JMP" , "_done_{n}") // JMP _done_{n} - self.Link("_empty_{n}") // _empty_{n} - self.check_size(1) // SIZE $1 - self.add_char('0') // CHAR $'0' - self.Link("_done_{n}") // _done_{n}: -} - -func (self *_Assembler) _asm_OP_eface(_ *_Instr) { - self.prep_buffer_AX() // MOVE {buf}, AX - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _BX) // MOVQ (SP.p), BX - self.Emit("LEAQ" , jit.Ptr(_SP_p, 8), _CX) // LEAQ 8(SP.p), CX - self.Emit("MOVQ" , _ST, _DI) // MOVQ ST, DI - self.Emit("MOVQ" , _ARG_fv, _SI) // MOVQ fv, AX - self.call_encoder(_F_encodeTypedPointer) // CALL encodeTypedPointer - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error - self.load_buffer_AX() -} - -func (self *_Assembler) _asm_OP_iface(_ *_Instr) { - self.prep_buffer_AX() // MOVE {buf}, AX - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _CX) // MOVQ (SP.p), CX - self.Emit("MOVQ" , jit.Ptr(_CX, 8), _BX) // MOVQ 8(CX), BX - self.Emit("LEAQ" , jit.Ptr(_SP_p, 8), _CX) // LEAQ 8(SP.p), CX - self.Emit("MOVQ" , _ST, _DI) // MOVQ ST, DI - self.Emit("MOVQ" , _ARG_fv, _SI) // MOVQ fv, AX - self.call_encoder(_F_encodeTypedPointer) // CALL encodeTypedPointer - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error - self.load_buffer_AX() -} - -func (self *_Assembler) _asm_OP_byte(p *_Instr) { - self.check_size(1) - self.Emit("MOVB", jit.Imm(p.i64()), jit.Sib(_RP, _RL, 1, 0)) // MOVL p.vi(), (RP)(RL*1) - self.Emit("ADDQ", jit.Imm(1), _RL) // ADDQ $1, RL -} - -func (self *_Assembler) _asm_OP_text(p *_Instr) { - self.check_size(len(p.vs())) // SIZE ${len(p.vs())} - self.add_text(p.vs()) // TEXT ${p.vs()} -} - -func (self *_Assembler) _asm_OP_deref(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _SP_p) // MOVQ (SP.p), SP.p -} - -func (self *_Assembler) _asm_OP_index(p *_Instr) { - self.Emit("MOVQ", jit.Imm(p.i64()), _AX) // MOVQ $p.vi(), AX - self.Emit("ADDQ", _AX, _SP_p) // ADDQ AX, SP.p -} - -func (self *_Assembler) _asm_OP_load(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, -24), _SP_x) // MOVQ -24(ST)(AX), SP.x - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, -8), _SP_p) // MOVQ -8(ST)(AX), SP.p - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, 0), _SP_q) // MOVQ (ST)(AX), SP.q -} - -func (self *_Assembler) _asm_OP_save(_ *_Instr) { - self.save_state() -} - -func (self *_Assembler) _asm_OP_drop(_ *_Instr) { - self.drop_state(_StateSize) -} - -func (self *_Assembler) _asm_OP_drop_2(_ *_Instr) { - self.drop_state(_StateSize * 2) // DROP $(_StateSize * 2) - self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 56)) // MOVOU X0, 56(ST)(AX) -} - -func (self *_Assembler) _asm_OP_recurse(p *_Instr) { - self.prep_buffer_AX() // MOVE {buf}, (SP) - vt, pv := p.vp() - self.Emit("MOVQ", jit.Type(vt), _BX) // MOVQ $(type(p.vt())), BX - - /* check for indirection */ - if !rt.UnpackType(vt).Indirect() { - self.Emit("MOVQ", _SP_p, _CX) // MOVQ SP.p, CX - } else { - self.Emit("MOVQ", _SP_p, _VAR_vp) // MOVQ SP.p, VAR.vp - self.Emit("LEAQ", _VAR_vp, _CX) // LEAQ VAR.vp, CX - } - - /* call the encoder */ - self.Emit("MOVQ" , _ST, _DI) // MOVQ ST, DI - self.Emit("MOVQ" , _ARG_fv, _SI) // MOVQ $fv, SI - if pv { - self.Emit("BTCQ", jit.Imm(bitPointerValue), _SI) // BTCQ $1, SI - } - self.call_encoder(_F_encodeTypedPointer) // CALL encodeTypedPointer - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error - self.load_buffer_AX() -} - -func (self *_Assembler) _asm_OP_is_nil(p *_Instr) { - self.Emit("CMPQ", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPQ (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_nil_p1(p *_Instr) { - self.Emit("CMPQ", jit.Ptr(_SP_p, 8), jit.Imm(0)) // CMPQ 8(SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_1(p *_Instr) { - self.Emit("CMPB", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPB (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_2(p *_Instr) { - self.Emit("CMPW", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPW (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_4(p *_Instr) { - self.Emit("CMPL", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPL (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_8(p *_Instr) { - self.Emit("CMPQ", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPQ (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_map(p *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Xjmp("JZ" , p.vi()) // JZ p.vi() - self.Emit("CMPQ" , jit.Ptr(_AX, 0), jit.Imm(0)) // CMPQ (AX), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_goto(p *_Instr) { - self.Xjmp("JMP", p.vi()) -} - -func (self *_Assembler) _asm_OP_map_iter(p *_Instr) { - self.Emit("MOVQ" , jit.Type(p.vt()), _AX) // MOVQ $p.vt(), AX - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _BX) // MOVQ (SP.p), BX - self.Emit("MOVQ" , _ARG_fv, _CX) // MOVQ fv, CX - self.call_go(_F_iteratorStart) // CALL_GO iteratorStart - self.Emit("MOVQ" , _AX, _SP_q) // MOVQ AX, SP.q - self.Emit("MOVQ" , _BX, _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ" , _CX, _EP) // MOVQ 40(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error -} - -func (self *_Assembler) _asm_OP_map_stop(_ *_Instr) { - self.Emit("MOVQ", _SP_q, _AX) // MOVQ SP.q, AX - self.call_go(_F_iteratorStop) // CALL_GO iteratorStop - self.Emit("XORL", _SP_q, _SP_q) // XORL SP.q, SP.q -} - -func (self *_Assembler) _asm_OP_map_check_key(p *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_SP_q, 0), _SP_p) // MOVQ (SP.q), SP.p - self.Emit("TESTQ", _SP_p, _SP_p) // TESTQ SP.p, SP.p - self.Xjmp("JZ" , p.vi()) // JNZ p.vi() -} - -func (self *_Assembler) _asm_OP_map_write_key(p *_Instr) { - self.Emit("BTQ", jit.Imm(bitSortMapKeys), _ARG_fv) // BTQ ${SortMapKeys}, fv - self.Sjmp("JNC", "_unordered_key_{n}") // JNC _unordered_key_{n} - self.encode_string(false) // STR $false - self.Xjmp("JMP", p.vi()) // JMP ${p.vi()} - self.Link("_unordered_key_{n}") // _unordered_key_{n}: -} - -func (self *_Assembler) _asm_OP_map_value_next(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_SP_q, 8), _SP_p) // MOVQ 8(SP.q), SP.p - self.Emit("MOVQ", _SP_q, _AX) // MOVQ SP.q, AX - self.call_go(_F_iteratorNext) // CALL_GO iteratorNext -} - -func (self *_Assembler) _asm_OP_slice_len(_ *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _SP_x) // MOVQ 8(SP.p), SP.x - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _SP_p) // MOVQ (SP.p), SP.p - self.Emit("ORQ" , jit.Imm(1 << _S_init), _SP_f) // ORQ $(1<<_S_init), SP.f -} - -func (self *_Assembler) _asm_OP_slice_next(p *_Instr) { - self.Emit("TESTQ" , _SP_x, _SP_x) // TESTQ SP.x, SP.x - self.Xjmp("JZ" , p.vi()) // JZ p.vi() - self.Emit("SUBQ" , jit.Imm(1), _SP_x) // SUBQ $1, SP.x - self.Emit("BTRQ" , jit.Imm(_S_init), _SP_f) // BTRQ $_S_init, SP.f - self.Emit("LEAQ" , jit.Ptr(_SP_p, int64(p.vlen())), _AX) // LEAQ $(p.vlen())(SP.p), AX - self.Emit("CMOVQCC", _AX, _SP_p) // CMOVQNC AX, SP.p -} - -func (self *_Assembler) _asm_OP_marshal(p *_Instr) { - self.call_marshaler(_F_encodeJsonMarshaler, _T_json_Marshaler, p.vt()) -} - -func (self *_Assembler) _asm_OP_marshal_p(p *_Instr) { - if p.vk() != reflect.Ptr { - panic("marshal_p: invalid type") - } else { - self.call_marshaler_v(_F_encodeJsonMarshaler, _T_json_Marshaler, p.vt(), false) - } -} - -func (self *_Assembler) _asm_OP_marshal_text(p *_Instr) { - self.call_marshaler(_F_encodeTextMarshaler, _T_encoding_TextMarshaler, p.vt()) -} - -func (self *_Assembler) _asm_OP_marshal_text_p(p *_Instr) { - if p.vk() != reflect.Ptr { - panic("marshal_text_p: invalid type") - } else { - self.call_marshaler_v(_F_encodeTextMarshaler, _T_encoding_TextMarshaler, p.vt(), false) - } -} - -func (self *_Assembler) _asm_OP_cond_set(_ *_Instr) { - self.Emit("ORQ", jit.Imm(1 << _S_cond), _SP_f) // ORQ $(1<<_S_cond), SP.f -} - -func (self *_Assembler) _asm_OP_cond_testc(p *_Instr) { - self.Emit("BTRQ", jit.Imm(_S_cond), _SP_f) // BTRQ $_S_cond, SP.f - self.Xjmp("JC" , p.vi()) -} - -func (self *_Assembler) print_gc(i int, p1 *_Instr, p2 *_Instr) { - self.Emit("MOVQ", jit.Imm(int64(p2.op())), _CX) // MOVQ $(p2.op()), AX - self.Emit("MOVQ", jit.Imm(int64(p1.op())), _BX) // MOVQ $(p1.op()), BX - self.Emit("MOVQ", jit.Imm(int64(i)), _AX) // MOVQ $(i), CX - self.call_go(_F_println) -} diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/assembler_stkabi_amd64.go b/vendor/github.com/bytedance/sonic/internal/encoder/assembler_stkabi_amd64.go deleted file mode 100644 index 89dafc84..00000000 --- a/vendor/github.com/bytedance/sonic/internal/encoder/assembler_stkabi_amd64.go +++ /dev/null @@ -1,1175 +0,0 @@ -// +build go1.16,!go1.17 - -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package encoder - -import ( - `fmt` - `reflect` - `strconv` - `unsafe` - - `github.com/bytedance/sonic/internal/cpu` - `github.com/bytedance/sonic/internal/jit` - `github.com/bytedance/sonic/internal/native/types` - `github.com/twitchyliquid64/golang-asm/obj` - `github.com/twitchyliquid64/golang-asm/obj/x86` - - `github.com/bytedance/sonic/internal/native` - `github.com/bytedance/sonic/internal/rt` -) - -/** Register Allocations - * - * State Registers: - * - * %rbx : stack base - * %rdi : result pointer - * %rsi : result length - * %rdx : result capacity - * %r12 : sp->p - * %r13 : sp->q - * %r14 : sp->x - * %r15 : sp->f - * - * Error Registers: - * - * %r10 : error type register - * %r11 : error pointer register - */ - -/** Function Prototype & Stack Map - * - * func (buf *[]byte, p unsafe.Pointer, sb *_Stack, fv uint64) (err error) - * - * buf : (FP) - * p : 8(FP) - * sb : 16(FP) - * fv : 24(FP) - * err.vt : 32(FP) - * err.vp : 40(FP) - */ - -const ( - _S_cond = iota - _S_init -) - -const ( - _FP_args = 48 // 48 bytes for passing arguments to this function - _FP_fargs = 64 // 64 bytes for passing arguments to other Go functions - _FP_saves = 64 // 64 bytes for saving the registers before CALL instructions - _FP_locals = 24 // 24 bytes for local variables -) - -const ( - _FP_offs = _FP_fargs + _FP_saves + _FP_locals - _FP_size = _FP_offs + 8 // 8 bytes for the parent frame pointer - _FP_base = _FP_size + 8 // 8 bytes for the return address -) - -const ( - _FM_exp32 = 0x7f800000 - _FM_exp64 = 0x7ff0000000000000 -) - -const ( - _IM_null = 0x6c6c756e // 'null' - _IM_true = 0x65757274 // 'true' - _IM_fals = 0x736c6166 // 'fals' ('false' without the 'e') - _IM_open = 0x00225c22 // '"\"∅' - _IM_array = 0x5d5b // '[]' - _IM_object = 0x7d7b // '{}' - _IM_mulv = -0x5555555555555555 -) - -const ( - _LB_more_space = "_more_space" - _LB_more_space_return = "_more_space_return_" -) - -const ( - _LB_error = "_error" - _LB_error_too_deep = "_error_too_deep" - _LB_error_invalid_number = "_error_invalid_number" - _LB_error_nan_or_infinite = "_error_nan_or_infinite" - _LB_panic = "_panic" -) - -var ( - _AX = jit.Reg("AX") - _CX = jit.Reg("CX") - _DX = jit.Reg("DX") - _DI = jit.Reg("DI") - _SI = jit.Reg("SI") - _BP = jit.Reg("BP") - _SP = jit.Reg("SP") - _R8 = jit.Reg("R8") -) - -var ( - _X0 = jit.Reg("X0") - _Y0 = jit.Reg("Y0") -) - -var ( - _ST = jit.Reg("BX") - _RP = jit.Reg("DI") - _RL = jit.Reg("SI") - _RC = jit.Reg("DX") -) - -var ( - _LR = jit.Reg("R9") - _R10 = jit.Reg("R10") // used for gcWriterBarrier - _ET = jit.Reg("R10") - _EP = jit.Reg("R11") -) - -var ( - _SP_p = jit.Reg("R12") - _SP_q = jit.Reg("R13") - _SP_x = jit.Reg("R14") - _SP_f = jit.Reg("R15") -) - -var ( - _ARG_rb = jit.Ptr(_SP, _FP_base) - _ARG_vp = jit.Ptr(_SP, _FP_base + 8) - _ARG_sb = jit.Ptr(_SP, _FP_base + 16) - _ARG_fv = jit.Ptr(_SP, _FP_base + 24) -) - -var ( - _RET_et = jit.Ptr(_SP, _FP_base + 32) - _RET_ep = jit.Ptr(_SP, _FP_base + 40) -) - -var ( - _VAR_sp = jit.Ptr(_SP, _FP_fargs + _FP_saves) - _VAR_dn = jit.Ptr(_SP, _FP_fargs + _FP_saves + 8) - _VAR_vp = jit.Ptr(_SP, _FP_fargs + _FP_saves + 16) -) - -var ( - _REG_ffi = []obj.Addr{_RP, _RL, _RC} - _REG_enc = []obj.Addr{_ST, _SP_x, _SP_f, _SP_p, _SP_q, _RL} - _REG_jsr = []obj.Addr{_ST, _SP_x, _SP_f, _SP_p, _SP_q, _LR} - _REG_all = []obj.Addr{_ST, _SP_x, _SP_f, _SP_p, _SP_q, _RP, _RL, _RC} -) - -type _Assembler struct { - jit.BaseAssembler - p _Program - x int - name string -} - -func newAssembler(p _Program) *_Assembler { - return new(_Assembler).Init(p) -} - -/** Assembler Interface **/ -func (self *_Assembler) Load() _Encoder { - return ptoenc(self.BaseAssembler.Load("encode_"+self.name, _FP_size, _FP_args, argPtrs, localPtrs)) -} - -func (self *_Assembler) Init(p _Program) *_Assembler { - self.p = p - self.BaseAssembler.Init(self.compile) - return self -} - -func (self *_Assembler) compile() { - self.prologue() - self.instrs() - self.epilogue() - self.builtins() -} - -/** Assembler Stages **/ - -var _OpFuncTab = [256]func(*_Assembler, *_Instr) { - _OP_null : (*_Assembler)._asm_OP_null, - _OP_empty_arr : (*_Assembler)._asm_OP_empty_arr, - _OP_empty_obj : (*_Assembler)._asm_OP_empty_obj, - _OP_bool : (*_Assembler)._asm_OP_bool, - _OP_i8 : (*_Assembler)._asm_OP_i8, - _OP_i16 : (*_Assembler)._asm_OP_i16, - _OP_i32 : (*_Assembler)._asm_OP_i32, - _OP_i64 : (*_Assembler)._asm_OP_i64, - _OP_u8 : (*_Assembler)._asm_OP_u8, - _OP_u16 : (*_Assembler)._asm_OP_u16, - _OP_u32 : (*_Assembler)._asm_OP_u32, - _OP_u64 : (*_Assembler)._asm_OP_u64, - _OP_f32 : (*_Assembler)._asm_OP_f32, - _OP_f64 : (*_Assembler)._asm_OP_f64, - _OP_str : (*_Assembler)._asm_OP_str, - _OP_bin : (*_Assembler)._asm_OP_bin, - _OP_quote : (*_Assembler)._asm_OP_quote, - _OP_number : (*_Assembler)._asm_OP_number, - _OP_eface : (*_Assembler)._asm_OP_eface, - _OP_iface : (*_Assembler)._asm_OP_iface, - _OP_byte : (*_Assembler)._asm_OP_byte, - _OP_text : (*_Assembler)._asm_OP_text, - _OP_deref : (*_Assembler)._asm_OP_deref, - _OP_index : (*_Assembler)._asm_OP_index, - _OP_load : (*_Assembler)._asm_OP_load, - _OP_save : (*_Assembler)._asm_OP_save, - _OP_drop : (*_Assembler)._asm_OP_drop, - _OP_drop_2 : (*_Assembler)._asm_OP_drop_2, - _OP_recurse : (*_Assembler)._asm_OP_recurse, - _OP_is_nil : (*_Assembler)._asm_OP_is_nil, - _OP_is_nil_p1 : (*_Assembler)._asm_OP_is_nil_p1, - _OP_is_zero_1 : (*_Assembler)._asm_OP_is_zero_1, - _OP_is_zero_2 : (*_Assembler)._asm_OP_is_zero_2, - _OP_is_zero_4 : (*_Assembler)._asm_OP_is_zero_4, - _OP_is_zero_8 : (*_Assembler)._asm_OP_is_zero_8, - _OP_is_zero_map : (*_Assembler)._asm_OP_is_zero_map, - _OP_goto : (*_Assembler)._asm_OP_goto, - _OP_map_iter : (*_Assembler)._asm_OP_map_iter, - _OP_map_stop : (*_Assembler)._asm_OP_map_stop, - _OP_map_check_key : (*_Assembler)._asm_OP_map_check_key, - _OP_map_write_key : (*_Assembler)._asm_OP_map_write_key, - _OP_map_value_next : (*_Assembler)._asm_OP_map_value_next, - _OP_slice_len : (*_Assembler)._asm_OP_slice_len, - _OP_slice_next : (*_Assembler)._asm_OP_slice_next, - _OP_marshal : (*_Assembler)._asm_OP_marshal, - _OP_marshal_p : (*_Assembler)._asm_OP_marshal_p, - _OP_marshal_text : (*_Assembler)._asm_OP_marshal_text, - _OP_marshal_text_p : (*_Assembler)._asm_OP_marshal_text_p, - _OP_cond_set : (*_Assembler)._asm_OP_cond_set, - _OP_cond_testc : (*_Assembler)._asm_OP_cond_testc, -} - -func (self *_Assembler) instr(v *_Instr) { - if fn := _OpFuncTab[v.op()]; fn != nil { - fn(self, v) - } else { - panic(fmt.Sprintf("invalid opcode: %d", v.op())) - } -} - -func (self *_Assembler) instrs() { - for i, v := range self.p { - self.Mark(i) - self.instr(&v) - self.debug_instr(i, &v) - } -} - -func (self *_Assembler) builtins() { - self.more_space() - self.error_too_deep() - self.error_invalid_number() - self.error_nan_or_infinite() - self.go_panic() -} - -func (self *_Assembler) epilogue() { - self.Mark(len(self.p)) - self.Emit("XORL", _ET, _ET) - self.Emit("XORL", _EP, _EP) - self.Link(_LB_error) - self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX - self.Emit("MOVQ", _RL, jit.Ptr(_AX, 8)) // MOVQ RL, 8(AX) - self.Emit("MOVQ", _ET, _RET_et) // MOVQ ET, et<>+24(FP) - self.Emit("MOVQ", _EP, _RET_ep) // MOVQ EP, ep<>+32(FP) - self.Emit("MOVQ", jit.Ptr(_SP, _FP_offs), _BP) // MOVQ _FP_offs(SP), BP - self.Emit("ADDQ", jit.Imm(_FP_size), _SP) // ADDQ $_FP_size, SP - self.Emit("RET") // RET -} - -func (self *_Assembler) prologue() { - self.Emit("SUBQ", jit.Imm(_FP_size), _SP) // SUBQ $_FP_size, SP - self.Emit("MOVQ", _BP, jit.Ptr(_SP, _FP_offs)) // MOVQ BP, _FP_offs(SP) - self.Emit("LEAQ", jit.Ptr(_SP, _FP_offs), _BP) // LEAQ _FP_offs(SP), BP - self.load_buffer() // LOAD {buf} - self.Emit("MOVQ", _ARG_vp, _SP_p) // MOVQ vp<>+8(FP), SP.p - self.Emit("MOVQ", _ARG_sb, _ST) // MOVQ sb<>+16(FP), ST - self.Emit("XORL", _SP_x, _SP_x) // XORL SP.x, SP.x - self.Emit("XORL", _SP_f, _SP_f) // XORL SP.f, SP.f - self.Emit("XORL", _SP_q, _SP_q) // XORL SP.q, SP.q -} - -/** Assembler Inline Functions **/ - -func (self *_Assembler) xsave(reg ...obj.Addr) { - for i, v := range reg { - if i > _FP_saves / 8 - 1 { - panic("too many registers to save") - } else { - self.Emit("MOVQ", v, jit.Ptr(_SP, _FP_fargs + int64(i) * 8)) - } - } -} - -func (self *_Assembler) xload(reg ...obj.Addr) { - for i, v := range reg { - if i > _FP_saves / 8 - 1 { - panic("too many registers to load") - } else { - self.Emit("MOVQ", jit.Ptr(_SP, _FP_fargs + int64(i) * 8), v) - } - } -} - -func (self *_Assembler) rbuf_di() { - if _RP.Reg != x86.REG_DI { - panic("register allocation messed up: RP != DI") - } else { - self.Emit("ADDQ", _RL, _RP) - } -} - -func (self *_Assembler) store_int(nd int, fn obj.Addr, ins string) { - self.check_size(nd) - self.save_c() // SAVE $C_regs - self.rbuf_di() // MOVQ RP, DI - self.Emit(ins, jit.Ptr(_SP_p, 0), _SI) // $ins (SP.p), SI - self.call_c(fn) // CALL_C $fn - self.Emit("ADDQ", _AX, _RL) // ADDQ AX, RL -} - -func (self *_Assembler) store_str(s string) { - i := 0 - m := rt.Str2Mem(s) - - /* 8-byte stores */ - for i <= len(m) - 8 { - self.Emit("MOVQ", jit.Imm(rt.Get64(m[i:])), _AX) // MOVQ $s[i:], AX - self.Emit("MOVQ", _AX, jit.Sib(_RP, _RL, 1, int64(i))) // MOVQ AX, i(RP)(RL) - i += 8 - } - - /* 4-byte stores */ - if i <= len(m) - 4 { - self.Emit("MOVL", jit.Imm(int64(rt.Get32(m[i:]))), jit.Sib(_RP, _RL, 1, int64(i))) // MOVL $s[i:], i(RP)(RL) - i += 4 - } - - /* 2-byte stores */ - if i <= len(m) - 2 { - self.Emit("MOVW", jit.Imm(int64(rt.Get16(m[i:]))), jit.Sib(_RP, _RL, 1, int64(i))) // MOVW $s[i:], i(RP)(RL) - i += 2 - } - - /* last byte */ - if i < len(m) { - self.Emit("MOVB", jit.Imm(int64(m[i])), jit.Sib(_RP, _RL, 1, int64(i))) // MOVB $s[i:], i(RP)(RL) - } -} - -func (self *_Assembler) check_size(n int) { - self.check_size_rl(jit.Ptr(_RL, int64(n))) -} - -func (self *_Assembler) check_size_r(r obj.Addr, d int) { - self.check_size_rl(jit.Sib(_RL, r, 1, int64(d))) -} - -func (self *_Assembler) check_size_rl(v obj.Addr) { - idx := self.x - key := _LB_more_space_return + strconv.Itoa(idx) - - /* the following code relies on LR == R9 to work */ - if _LR.Reg != x86.REG_R9 { - panic("register allocation messed up: LR != R9") - } - - /* check for buffer capacity */ - self.x++ - self.Emit("LEAQ", v, _AX) // LEAQ $v, AX - self.Emit("CMPQ", _AX, _RC) // CMPQ AX, RC - self.Sjmp("JBE" , key) // JBE _more_space_return_{n} - self.slice_grow_ax(key) // GROW $key - self.Link(key) // _more_space_return_{n}: -} - -func (self *_Assembler) slice_grow_ax(ret string) { - self.Byte(0x4c, 0x8d, 0x0d) // LEAQ ?(PC), R9 - self.Sref(ret, 4) // .... &ret - self.Sjmp("JMP" , _LB_more_space) // JMP _more_space -} - -/** State Stack Helpers **/ - -const ( - _StateSize = int64(unsafe.Sizeof(_State{})) - _StackLimit = _MaxStack * _StateSize -) - -func (self *_Assembler) save_state() { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _CX) // MOVQ (ST), CX - self.Emit("LEAQ", jit.Ptr(_CX, _StateSize), _R8) // LEAQ _StateSize(CX), R8 - self.Emit("CMPQ", _R8, jit.Imm(_StackLimit)) // CMPQ R8, $_StackLimit - self.Sjmp("JAE" , _LB_error_too_deep) // JA _error_too_deep - self.Emit("MOVQ", _SP_x, jit.Sib(_ST, _CX, 1, 8)) // MOVQ SP.x, 8(ST)(CX) - self.Emit("MOVQ", _SP_f, jit.Sib(_ST, _CX, 1, 16)) // MOVQ SP.f, 16(ST)(CX) - self.WritePtr(0, _SP_p, jit.Sib(_ST, _CX, 1, 24)) // MOVQ SP.p, 24(ST)(CX) - self.WritePtr(1, _SP_q, jit.Sib(_ST, _CX, 1, 32)) // MOVQ SP.q, 32(ST)(CX) - self.Emit("MOVQ", _R8, jit.Ptr(_ST, 0)) // MOVQ R8, (ST) -} - -func (self *_Assembler) drop_state(decr int64) { - self.Emit("MOVQ" , jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("SUBQ" , jit.Imm(decr), _AX) // SUBQ $decr, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_ST, 0)) // MOVQ AX, (ST) - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 8), _SP_x) // MOVQ 8(ST)(AX), SP.x - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 16), _SP_f) // MOVQ 16(ST)(AX), SP.f - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 24), _SP_p) // MOVQ 24(ST)(AX), SP.p - self.Emit("MOVQ" , jit.Sib(_ST, _AX, 1, 32), _SP_q) // MOVQ 32(ST)(AX), SP.q - self.Emit("PXOR" , _X0, _X0) // PXOR X0, X0 - self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 8)) // MOVOU X0, 8(ST)(AX) - self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 24)) // MOVOU X0, 24(ST)(AX) -} - -/** Buffer Helpers **/ - -func (self *_Assembler) add_char(ch byte) { - self.Emit("MOVB", jit.Imm(int64(ch)), jit.Sib(_RP, _RL, 1, 0)) // MOVB $ch, (RP)(RL) - self.Emit("ADDQ", jit.Imm(1), _RL) // ADDQ $1, RL -} - -func (self *_Assembler) add_long(ch uint32, n int64) { - self.Emit("MOVL", jit.Imm(int64(ch)), jit.Sib(_RP, _RL, 1, 0)) // MOVL $ch, (RP)(RL) - self.Emit("ADDQ", jit.Imm(n), _RL) // ADDQ $n, RL -} - -func (self *_Assembler) add_text(ss string) { - self.store_str(ss) // TEXT $ss - self.Emit("ADDQ", jit.Imm(int64(len(ss))), _RL) // ADDQ ${len(ss)}, RL -} - -func (self *_Assembler) prep_buffer() { - self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX - self.Emit("MOVQ", _RL, jit.Ptr(_AX, 8)) // MOVQ RL, 8(AX) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) -} - -func (self *_Assembler) prep_buffer_c() { - self.Emit("MOVQ", _ARG_rb, _DI) // MOVQ rb<>+0(FP), DI - self.Emit("MOVQ", _RL, jit.Ptr(_DI, 8)) // MOVQ RL, 8(DI) -} - -func (self *_Assembler) save_buffer() { - self.Emit("MOVQ", _ARG_rb, _CX) // MOVQ rb<>+0(FP), CX - self.Emit("MOVQ", _RP, jit.Ptr(_CX, 0)) // MOVQ RP, (CX) - self.Emit("MOVQ", _RL, jit.Ptr(_CX, 8)) // MOVQ RL, 8(CX) - self.Emit("MOVQ", _RC, jit.Ptr(_CX, 16)) // MOVQ RC, 16(CX) -} - -func (self *_Assembler) load_buffer() { - self.Emit("MOVQ", _ARG_rb, _AX) // MOVQ rb<>+0(FP), AX - self.Emit("MOVQ", jit.Ptr(_AX, 0), _RP) // MOVQ (AX), RP - self.Emit("MOVQ", jit.Ptr(_AX, 8), _RL) // MOVQ 8(AX), RL - self.Emit("MOVQ", jit.Ptr(_AX, 16), _RC) // MOVQ 16(AX), RC -} - -/** Function Interface Helpers **/ - -func (self *_Assembler) call(pc obj.Addr) { - self.Emit("MOVQ", pc, _AX) // MOVQ $pc, AX - self.Rjmp("CALL", _AX) // CALL AX -} - -func (self *_Assembler) save_c() { - self.xsave(_REG_ffi...) // SAVE $REG_ffi -} - -func (self *_Assembler) call_c(pc obj.Addr) { - self.call(pc) // CALL $pc - self.xload(_REG_ffi...) // LOAD $REG_ffi -} - -func (self *_Assembler) call_go(pc obj.Addr) { - self.xsave(_REG_all...) // SAVE $REG_all - self.call(pc) // CALL $pc - self.xload(_REG_all...) // LOAD $REG_all -} - -func (self *_Assembler) call_encoder(pc obj.Addr) { - self.xsave(_REG_enc...) // SAVE $REG_enc - self.call(pc) // CALL $pc - self.xload(_REG_enc...) // LOAD $REG_enc - self.load_buffer() // LOAD {buf} -} - -func (self *_Assembler) call_marshaler(fn obj.Addr, it *rt.GoType, vt reflect.Type) { - switch vt.Kind() { - case reflect.Interface : self.call_marshaler_i(fn, it) - case reflect.Ptr, reflect.Map: self.call_marshaler_v(fn, it, vt, true) - // struct/array of 1 direct iface type can be direct - default : self.call_marshaler_v(fn, it, vt, !rt.UnpackType(vt).Indirect()) - } -} - -func (self *_Assembler) call_marshaler_i(fn obj.Addr, it *rt.GoType) { - self.Emit("MOVQ" , jit.Gtype(it), _AX) // MOVQ $it, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _CX) // MOVQ 8(SP.p), CX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JZ" , "_null_{n}") // JZ _null_{n} - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 16)) // MOVQ CX, 16(SP) - self.call_go(_F_assertI2I) // CALL_GO assertI2I - self.prep_buffer() // MOVE {buf}, (SP) - self.Emit("MOVOU", jit.Ptr(_SP, 24), _X0) // MOVOU 24(SP), X0 - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP) - self.Emit("MOVQ", _ARG_fv, _CX) // MOVQ ARG.fv, CX - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 24)) // MOVQ CX, 24(SP) - self.call_encoder(fn) // CALL $fn - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error - self.Sjmp("JMP" , "_done_{n}") // JMP _done_{n} - self.Link("_null_{n}") // _null_{n}: - self.check_size(4) // SIZE $4 - self.Emit("MOVL", jit.Imm(_IM_null), jit.Sib(_RP, _RL, 1, 0)) // MOVL $'null', (RP)(RL*1) - self.Emit("ADDQ", jit.Imm(4), _RL) // ADDQ $4, RL - self.Link("_done_{n}") // _done_{n}: -} - -func (self *_Assembler) call_marshaler_v(fn obj.Addr, it *rt.GoType, vt reflect.Type, deref bool) { - self.prep_buffer() // MOVE {buf}, (SP) - self.Emit("MOVQ", jit.Itab(it, vt), _AX) // MOVQ $(itab(it, vt)), AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - - /* dereference the pointer if needed */ - if !deref { - self.Emit("MOVQ", _SP_p, jit.Ptr(_SP, 16)) // MOVQ SP.p, 16(SP) - } else { - self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 16)) // MOVQ AX, 16(SP) - } - - /* call the encoder, and perform error checks */ - self.Emit("MOVQ", _ARG_fv, _CX) // MOVQ ARG.fv, CX - self.Emit("MOVQ", _CX, jit.Ptr(_SP, 24)) // MOVQ CX, 24(SP) - self.call_encoder(fn) // CALL $fn - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error -} - -/** Builtin: _more_space **/ - -var ( - _T_byte = jit.Type(byteType) - _F_growslice = jit.Func(growslice) -) - -func (self *_Assembler) more_space() { - self.Link(_LB_more_space) - self.Emit("MOVQ", _T_byte, _AX) // MOVQ $_T_byte, _AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 0)) // MOVQ _AX, (SP) - self.Emit("MOVQ", _RP, jit.Ptr(_SP, 8)) // MOVQ RP, 8(SP) - self.Emit("MOVQ", _RL, jit.Ptr(_SP, 16)) // MOVQ RL, 16(SP) - self.Emit("MOVQ", _RC, jit.Ptr(_SP, 24)) // MOVQ RC, 24(SP) - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 32)) // MOVQ AX, 32(SP) - self.xsave(_REG_jsr...) // SAVE $REG_jsr - self.call(_F_growslice) // CALL $pc - self.xload(_REG_jsr...) // LOAD $REG_jsr - self.Emit("MOVQ", jit.Ptr(_SP, 40), _RP) // MOVQ 40(SP), RP - self.Emit("MOVQ", jit.Ptr(_SP, 48), _RL) // MOVQ 48(SP), RL - self.Emit("MOVQ", jit.Ptr(_SP, 56), _RC) // MOVQ 56(SP), RC - self.save_buffer() // SAVE {buf} - self.Rjmp("JMP" , _LR) // JMP LR -} - -/** Builtin Errors **/ - -var ( - _V_ERR_too_deep = jit.Imm(int64(uintptr(unsafe.Pointer(_ERR_too_deep)))) - _V_ERR_nan_or_infinite = jit.Imm(int64(uintptr(unsafe.Pointer(_ERR_nan_or_infinite)))) - _I_json_UnsupportedValueError = jit.Itab(rt.UnpackType(errorType), jsonUnsupportedValueType) -) - -func (self *_Assembler) error_too_deep() { - self.Link(_LB_error_too_deep) - self.Emit("MOVQ", _V_ERR_too_deep, _EP) // MOVQ $_V_ERR_too_deep, EP - self.Emit("MOVQ", _I_json_UnsupportedValueError, _ET) // MOVQ $_I_json_UnsupportedValuError, ET - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) error_invalid_number() { - self.Link(_LB_error_invalid_number) - self.call_go(_F_error_number) // CALL_GO error_number - self.Emit("MOVQ", jit.Ptr(_SP, 16), _ET) // MOVQ 16(SP), ET - self.Emit("MOVQ", jit.Ptr(_SP, 24), _EP) // MOVQ 24(SP), EP - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -func (self *_Assembler) error_nan_or_infinite() { - self.Link(_LB_error_nan_or_infinite) - self.Emit("MOVQ", _V_ERR_nan_or_infinite, _EP) // MOVQ $_V_ERR_nan_or_infinite, EP - self.Emit("MOVQ", _I_json_UnsupportedValueError, _ET) // MOVQ $_I_json_UnsupportedValuError, ET - self.Sjmp("JMP" , _LB_error) // JMP _error -} - -/** String Encoding Routine **/ - -var ( - _F_quote = jit.Imm(int64(native.S_quote)) - _F_panic = jit.Func(goPanic) -) - -func (self *_Assembler) go_panic() { - self.Link(_LB_panic) - self.Emit("MOVQ", _SP_p, jit.Ptr(_SP, 8)) - self.call_go(_F_panic) -} - -func (self *_Assembler) encode_string(doubleQuote bool) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JZ" , "_str_empty_{n}") // JZ _str_empty_{n} - self.Emit("CMPQ", jit.Ptr(_SP_p, 0), jit.Imm(0)) - self.Sjmp("JNE" , "_str_next_{n}") - self.Emit("MOVQ", jit.Imm(int64(panicNilPointerOfNonEmptyString)), jit.Ptr(_SP, 0)) - self.Sjmp("JMP", _LB_panic) - self.Link("_str_next_{n}") - - /* openning quote, check for double quote */ - if !doubleQuote { - self.check_size_r(_AX, 2) // SIZE $2 - self.add_char('"') // CHAR $'"' - } else { - self.check_size_r(_AX, 6) // SIZE $6 - self.add_long(_IM_open, 3) // TEXT $`"\"` - } - - /* quoting loop */ - self.Emit("XORL", _AX, _AX) // XORL AX, AX - self.Emit("MOVQ", _AX, _VAR_sp) // MOVQ AX, sp - self.Link("_str_loop_{n}") // _str_loop_{n}: - self.save_c() // SAVE $REG_ffi - - /* load the output buffer first, and then input buffer, - * because the parameter registers collide with RP / RL / RC */ - self.Emit("MOVQ", _RC, _CX) // MOVQ RC, CX - self.Emit("SUBQ", _RL, _CX) // SUBQ RL, CX - self.Emit("MOVQ", _CX, _VAR_dn) // MOVQ CX, dn - self.Emit("LEAQ", jit.Sib(_RP, _RL, 1, 0), _DX) // LEAQ (RP)(RL), DX - self.Emit("LEAQ", _VAR_dn, _CX) // LEAQ dn, CX - self.Emit("MOVQ", _VAR_sp, _AX) // MOVQ sp, AX - self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _DI) // MOVQ (SP.p), DI - self.Emit("MOVQ", jit.Ptr(_SP_p, 8), _SI) // MOVQ 8(SP.p), SI - self.Emit("ADDQ", _AX, _DI) // ADDQ AX, DI - self.Emit("SUBQ", _AX, _SI) // SUBQ AX, SI - - /* set the flags based on `doubleQuote` */ - if !doubleQuote { - self.Emit("XORL", _R8, _R8) // XORL R8, R8 - } else { - self.Emit("MOVL", jit.Imm(types.F_DOUBLE_UNQUOTE), _R8) // MOVL ${types.F_DOUBLE_UNQUOTE}, R8 - } - - /* call the native quoter */ - self.call_c(_F_quote) // CALL quote - self.Emit("ADDQ" , _VAR_dn, _RL) // ADDQ dn, RL - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JS" , "_str_space_{n}") // JS _str_space_{n} - - /* close the string, check for double quote */ - if !doubleQuote { - self.check_size(1) // SIZE $1 - self.add_char('"') // CHAR $'"' - self.Sjmp("JMP", "_str_end_{n}") // JMP _str_end_{n} - } else { - self.check_size(3) // SIZE $3 - self.add_text("\\\"\"") // TEXT $'\""' - self.Sjmp("JMP", "_str_end_{n}") // JMP _str_end_{n} - } - - /* not enough space to contain the quoted string */ - self.Link("_str_space_{n}") // _str_space_{n}: - self.Emit("NOTQ", _AX) // NOTQ AX - self.Emit("ADDQ", _AX, _VAR_sp) // ADDQ AX, sp - self.Emit("LEAQ", jit.Sib(_RC, _RC, 1, 0), _AX) // LEAQ (RC)(RC), AX - self.slice_grow_ax("_str_loop_{n}") // GROW _str_loop_{n} - - /* empty string, check for double quote */ - if !doubleQuote { - self.Link("_str_empty_{n}") // _str_empty_{n}: - self.check_size(2) // SIZE $2 - self.add_text("\"\"") // TEXT $'""' - self.Link("_str_end_{n}") // _str_end_{n}: - } else { - self.Link("_str_empty_{n}") // _str_empty_{n}: - self.check_size(6) // SIZE $6 - self.add_text("\"\\\"\\\"\"") // TEXT $'"\"\""' - self.Link("_str_end_{n}") // _str_end_{n}: - } -} - -/** OpCode Assembler Functions **/ - -var ( - _T_json_Marshaler = rt.UnpackType(jsonMarshalerType) - _T_encoding_TextMarshaler = rt.UnpackType(encodingTextMarshalerType) -) - -var ( - _F_f64toa = jit.Imm(int64(native.S_f64toa)) - _F_f32toa = jit.Imm(int64(native.S_f32toa)) - _F_i64toa = jit.Imm(int64(native.S_i64toa)) - _F_u64toa = jit.Imm(int64(native.S_u64toa)) - _F_b64encode = jit.Imm(int64(_subr__b64encode)) -) - -var ( - _F_memmove = jit.Func(memmove) - _F_error_number = jit.Func(error_number) - _F_isValidNumber = jit.Func(isValidNumber) -) - -var ( - _F_iteratorStop = jit.Func(iteratorStop) - _F_iteratorNext = jit.Func(iteratorNext) - _F_iteratorStart = jit.Func(iteratorStart) -) - -var ( - _F_encodeTypedPointer obj.Addr - _F_encodeJsonMarshaler obj.Addr - _F_encodeTextMarshaler obj.Addr -) - -const ( - _MODE_AVX2 = 1 << 2 -) - -func init() { - _F_encodeTypedPointer = jit.Func(encodeTypedPointer) - _F_encodeJsonMarshaler = jit.Func(encodeJsonMarshaler) - _F_encodeTextMarshaler = jit.Func(encodeTextMarshaler) -} - -func (self *_Assembler) _asm_OP_null(_ *_Instr) { - self.check_size(4) - self.Emit("MOVL", jit.Imm(_IM_null), jit.Sib(_RP, _RL, 1, 0)) // MOVL $'null', (RP)(RL*1) - self.Emit("ADDQ", jit.Imm(4), _RL) // ADDQ $4, RL -} - -func (self *_Assembler) _asm_OP_empty_arr(_ *_Instr) { - self.Emit("BTQ", jit.Imm(int64(bitNoNullSliceOrMap)), _ARG_fv) - self.Sjmp("JC", "_empty_arr_{n}") - self._asm_OP_null(nil) - self.Sjmp("JMP", "_empty_arr_end_{n}") - self.Link("_empty_arr_{n}") - self.check_size(2) - self.Emit("MOVW", jit.Imm(_IM_array), jit.Sib(_RP, _RL, 1, 0)) - self.Emit("ADDQ", jit.Imm(2), _RL) - self.Link("_empty_arr_end_{n}") -} - -func (self *_Assembler) _asm_OP_empty_obj(_ *_Instr) { - self.Emit("BTQ", jit.Imm(int64(bitNoNullSliceOrMap)), _ARG_fv) - self.Sjmp("JC", "_empty_obj_{n}") - self._asm_OP_null(nil) - self.Sjmp("JMP", "_empty_obj_end_{n}") - self.Link("_empty_obj_{n}") - self.check_size(2) - self.Emit("MOVW", jit.Imm(_IM_object), jit.Sib(_RP, _RL, 1, 0)) - self.Emit("ADDQ", jit.Imm(2), _RL) - self.Link("_empty_obj_end_{n}") -} - -func (self *_Assembler) _asm_OP_bool(_ *_Instr) { - self.Emit("CMPB", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPB (SP.p), $0 - self.Sjmp("JE" , "_false_{n}") // JE _false_{n} - self.check_size(4) // SIZE $4 - self.Emit("MOVL", jit.Imm(_IM_true), jit.Sib(_RP, _RL, 1, 0)) // MOVL $'true', (RP)(RL*1) - self.Emit("ADDQ", jit.Imm(4), _RL) // ADDQ $4, RL - self.Sjmp("JMP" , "_end_{n}") // JMP _end_{n} - self.Link("_false_{n}") // _false_{n}: - self.check_size(5) // SIZE $5 - self.Emit("MOVL", jit.Imm(_IM_fals), jit.Sib(_RP, _RL, 1, 0)) // MOVL $'fals', (RP)(RL*1) - self.Emit("MOVB", jit.Imm('e'), jit.Sib(_RP, _RL, 1, 4)) // MOVB $'e', 4(RP)(RL*1) - self.Emit("ADDQ", jit.Imm(5), _RL) // ADDQ $5, RL - self.Link("_end_{n}") // _end_{n}: -} - -func (self *_Assembler) _asm_OP_i8(_ *_Instr) { - self.store_int(4, _F_i64toa, "MOVBQSX") -} - -func (self *_Assembler) _asm_OP_i16(_ *_Instr) { - self.store_int(6, _F_i64toa, "MOVWQSX") -} - -func (self *_Assembler) _asm_OP_i32(_ *_Instr) { - self.store_int(17, _F_i64toa, "MOVLQSX") -} - -func (self *_Assembler) _asm_OP_i64(_ *_Instr) { - self.store_int(21, _F_i64toa, "MOVQ") -} - -func (self *_Assembler) _asm_OP_u8(_ *_Instr) { - self.store_int(3, _F_u64toa, "MOVBQZX") -} - -func (self *_Assembler) _asm_OP_u16(_ *_Instr) { - self.store_int(5, _F_u64toa, "MOVWQZX") -} - -func (self *_Assembler) _asm_OP_u32(_ *_Instr) { - self.store_int(16, _F_u64toa, "MOVLQZX") -} - -func (self *_Assembler) _asm_OP_u64(_ *_Instr) { - self.store_int(20, _F_u64toa, "MOVQ") -} - -func (self *_Assembler) _asm_OP_f32(_ *_Instr) { - self.check_size(32) - self.Emit("MOVL" , jit.Ptr(_SP_p, 0), _AX) // MOVL (SP.p), AX - self.Emit("ANDL" , jit.Imm(_FM_exp32), _AX) // ANDL $_FM_exp32, AX - self.Emit("XORL" , jit.Imm(_FM_exp32), _AX) // XORL $_FM_exp32, AX - self.Sjmp("JZ" , _LB_error_nan_or_infinite) // JZ _error_nan_or_infinite - self.save_c() // SAVE $C_regs - self.rbuf_di() // MOVQ RP, DI - self.Emit("MOVSS" , jit.Ptr(_SP_p, 0), _X0) // MOVSS (SP.p), X0 - self.call_c(_F_f32toa) // CALL_C f64toa - self.Emit("ADDQ" , _AX, _RL) // ADDQ AX, RL -} - -func (self *_Assembler) _asm_OP_f64(_ *_Instr) { - self.check_size(32) - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("MOVQ" , jit.Imm(_FM_exp64), _CX) // MOVQ $_FM_exp64, CX - self.Emit("ANDQ" , _CX, _AX) // ANDQ CX, AX - self.Emit("XORQ" , _CX, _AX) // XORQ CX, AX - self.Sjmp("JZ" , _LB_error_nan_or_infinite) // JZ _error_nan_or_infinite - self.save_c() // SAVE $C_regs - self.rbuf_di() // MOVQ RP, DI - self.Emit("MOVSD" , jit.Ptr(_SP_p, 0), _X0) // MOVSD (SP.p), X0 - self.call_c(_F_f64toa) // CALL_C f64toa - self.Emit("ADDQ" , _AX, _RL) // ADDQ AX, RL -} - -func (self *_Assembler) _asm_OP_str(_ *_Instr) { - self.encode_string(false) -} - -func (self *_Assembler) _asm_OP_bin(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX - self.Emit("ADDQ", jit.Imm(2), _AX) // ADDQ $2, AX - self.Emit("MOVQ", jit.Imm(_IM_mulv), _CX) // MOVQ $_MF_mulv, CX - self.Emit("MOVQ", _DX, _R8) // MOVQ DX, R8 - self.From("MULQ", _CX) // MULQ CX - self.Emit("LEAQ", jit.Sib(_DX, _DX, 1, 1), _AX) // LEAQ 1(DX)(DX), AX - self.Emit("ORQ" , jit.Imm(2), _AX) // ORQ $2, AX - self.Emit("MOVQ", _R8, _DX) // MOVQ R8, DX - self.check_size_r(_AX, 0) // SIZE AX - self.add_char('"') // CHAR $'"' - self.save_c() // SAVE $REG_ffi - self.prep_buffer_c() // MOVE {buf}, DI - self.Emit("MOVQ", _SP_p, _SI) // MOVQ SP.p, SI - - /* check for AVX2 support */ - if !cpu.HasAVX2 { - self.Emit("XORL", _DX, _DX) // XORL DX, DX - } else { - self.Emit("MOVL", jit.Imm(_MODE_AVX2), _DX) // MOVL $_MODE_AVX2, DX - } - - /* call the encoder */ - self.call_c(_F_b64encode) // CALL b64encode - self.load_buffer() // LOAD {buf} - self.add_char('"') // CHAR $'"' -} - -func (self *_Assembler) _asm_OP_quote(_ *_Instr) { - self.encode_string(true) -} - -func (self *_Assembler) _asm_OP_number(_ *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _CX) // MOVQ (SP.p), CX - self.Emit("TESTQ", _CX, _CX) // TESTQ CX, CX - self.Sjmp("JZ" , "_empty_{n}") // JZ _empty_{n} - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Sjmp("JNZ" , "_number_next_{n}") - self.Emit("MOVQ", jit.Imm(int64(panicNilPointerOfNonEmptyString)), jit.Ptr(_SP, 0)) - self.Sjmp("JMP", _LB_panic) - self.Link("_number_next_{n}") - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.call_go(_F_isValidNumber) // CALL_GO isValidNumber - self.Emit("CMPB" , jit.Ptr(_SP, 16), jit.Imm(0)) // CMPB 16(SP), $0 - self.Sjmp("JE" , _LB_error_invalid_number) // JE _error_invalid_number - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _AX) // MOVQ 8(SP.p), AX - self.check_size_r(_AX, 0) // SIZE AX - self.Emit("LEAQ" , jit.Sib(_RP, _RL, 1, 0), _AX) // LEAQ (RP)(RL), AX - self.Emit("ADDQ" , jit.Ptr(_SP_p, 8), _RL) // ADDQ 8(SP.p), RL - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVOU", jit.Ptr(_SP_p, 0), _X0) // MOVOU (SP.p), X0 - self.Emit("MOVOU", _X0, jit.Ptr(_SP, 8)) // MOVOU X0, 8(SP) - self.call_go(_F_memmove) // CALL_GO memmove - self.Sjmp("JMP" , "_done_{n}") // JMP _done_{n} - self.Link("_empty_{n}") // _empty_{n}: - self.check_size(1) // SIZE $1 - self.add_char('0') // CHAR $'0' - self.Link("_done_{n}") // _done_{n}: -} - -func (self *_Assembler) _asm_OP_eface(_ *_Instr) { - self.prep_buffer() // MOVE {buf}, (SP)s - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - self.Emit("LEAQ" , jit.Ptr(_SP_p, 8), _AX) // LEAQ 8(SP.p), AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 16)) // MOVQ AX, 16(SP) - self.Emit("MOVQ" , _ST, jit.Ptr(_SP, 24)) // MOVQ ST, 24(SP) - self.Emit("MOVQ" , _ARG_fv, _AX) // MOVQ fv, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 32)) // MOVQ AX, 32(SP) - self.call_encoder(_F_encodeTypedPointer) // CALL encodeTypedPointer - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _ET) // MOVQ 40(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 48), _EP) // MOVQ 48(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error -} - -func (self *_Assembler) _asm_OP_iface(_ *_Instr) { - self.prep_buffer() // MOVE {buf}, (SP) - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("MOVQ" , jit.Ptr(_AX, 8), _AX) // MOVQ 8(AX), AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - self.Emit("LEAQ" , jit.Ptr(_SP_p, 8), _AX) // LEAQ 8(SP.p), AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 16)) // MOVQ AX, 16(SP) - self.Emit("MOVQ" , _ST, jit.Ptr(_SP, 24)) // MOVQ ST, 24(SP) - self.Emit("MOVQ" , _ARG_fv, _AX) // MOVQ fv, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 32)) // MOVQ AX, 32(SP) - self.call_encoder(_F_encodeTypedPointer) // CALL encodeTypedPointer - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _ET) // MOVQ 40(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 48), _EP) // MOVQ 48(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error -} - -func (self *_Assembler) _asm_OP_byte(p *_Instr) { - self.check_size(1) - self.Emit("MOVB", jit.Imm(p.i64()), jit.Sib(_RP, _RL, 1, 0)) // MOVL p.vi(), (RP)(RL*1) - self.Emit("ADDQ", jit.Imm(1), _RL) // ADDQ $1, RL -} - -func (self *_Assembler) _asm_OP_text(p *_Instr) { - self.check_size(len(p.vs())) // SIZE ${len(p.vs())} - self.add_text(p.vs()) // TEXT ${p.vs()} -} - -func (self *_Assembler) _asm_OP_deref(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_SP_p, 0), _SP_p) // MOVQ (SP.p), SP.p -} - -func (self *_Assembler) _asm_OP_index(p *_Instr) { - self.Emit("MOVQ", jit.Imm(p.i64()), _AX) // MOVQ $p.vi(), AX - self.Emit("ADDQ", _AX, _SP_p) // ADDQ AX, SP.p -} - -func (self *_Assembler) _asm_OP_load(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_ST, 0), _AX) // MOVQ (ST), AX - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, -24), _SP_x) // MOVQ -24(ST)(AX), SP.x - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, -8), _SP_p) // MOVQ -8(ST)(AX), SP.p - self.Emit("MOVQ", jit.Sib(_ST, _AX, 1, 0), _SP_q) // MOVQ (ST)(AX), SP.q -} - -func (self *_Assembler) _asm_OP_save(_ *_Instr) { - self.save_state() -} - -func (self *_Assembler) _asm_OP_drop(_ *_Instr) { - self.drop_state(_StateSize) -} - -func (self *_Assembler) _asm_OP_drop_2(_ *_Instr) { - self.drop_state(_StateSize * 2) // DROP $(_StateSize * 2) - self.Emit("MOVOU", _X0, jit.Sib(_ST, _AX, 1, 56)) // MOVOU X0, 56(ST)(AX) -} - -func (self *_Assembler) _asm_OP_recurse(p *_Instr) { - self.prep_buffer() // MOVE {buf}, (SP) - vt, pv := p.vp() - self.Emit("MOVQ", jit.Type(vt), _AX) // MOVQ $(type(p.vt())), AX - self.Emit("MOVQ", _AX, jit.Ptr(_SP, 8)) // MOVQ AX, 8(SP) - - /* check for indirection */ - if !rt.UnpackType(vt).Indirect() { - self.Emit("MOVQ", _SP_p, _AX) // MOVQ SP.p, AX - } else { - self.Emit("MOVQ", _SP_p, _VAR_vp) // MOVQ SP.p, 48(SP) - self.Emit("LEAQ", _VAR_vp, _AX) // LEAQ 48(SP), AX - } - - /* call the encoder */ - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 16)) // MOVQ AX, 16(SP) - self.Emit("MOVQ" , _ST, jit.Ptr(_SP, 24)) // MOVQ ST, 24(SP) - self.Emit("MOVQ" , _ARG_fv, _AX) // MOVQ fv, AX - if pv { - self.Emit("BTCQ", jit.Imm(bitPointerValue), _AX) // BTCQ $1, AX - } - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 32)) // MOVQ AX, 32(SP) - self.call_encoder(_F_encodeTypedPointer) // CALL encodeTypedPointer - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _ET) // MOVQ 40(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 48), _EP) // MOVQ 48(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error -} - -func (self *_Assembler) _asm_OP_is_nil(p *_Instr) { - self.Emit("CMPQ", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPQ (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_nil_p1(p *_Instr) { - self.Emit("CMPQ", jit.Ptr(_SP_p, 8), jit.Imm(0)) // CMPQ 8(SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_1(p *_Instr) { - self.Emit("CMPB", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPB (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_2(p *_Instr) { - self.Emit("CMPW", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPW (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_4(p *_Instr) { - self.Emit("CMPL", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPL (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_8(p *_Instr) { - self.Emit("CMPQ", jit.Ptr(_SP_p, 0), jit.Imm(0)) // CMPQ (SP.p), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_is_zero_map(p *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _AX) // MOVQ (SP.p), AX - self.Emit("TESTQ", _AX, _AX) // TESTQ AX, AX - self.Xjmp("JZ" , p.vi()) // JZ p.vi() - self.Emit("CMPQ" , jit.Ptr(_AX, 0), jit.Imm(0)) // CMPQ (AX), $0 - self.Xjmp("JE" , p.vi()) // JE p.vi() -} - -func (self *_Assembler) _asm_OP_goto(p *_Instr) { - self.Xjmp("JMP", p.vi()) -} - -func (self *_Assembler) _asm_OP_map_iter(p *_Instr) { - self.Emit("MOVQ" , jit.Type(p.vt()), _AX) // MOVQ $p.vt(), AX - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _CX) // MOVQ (SP.p), CX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 0)) // MOVQ AX, (SP) - self.Emit("MOVQ" , _CX, jit.Ptr(_SP, 8)) // MOVQ CX, 8(SP) - self.Emit("MOVQ" , _ARG_fv, _AX) // MOVQ fv, AX - self.Emit("MOVQ" , _AX, jit.Ptr(_SP, 16)) // MOVQ AX, 16(SP) - self.call_go(_F_iteratorStart) // CALL_GO iteratorStart - self.Emit("MOVQ" , jit.Ptr(_SP, 24), _SP_q) // MOVQ 24(SP), SP.q - self.Emit("MOVQ" , jit.Ptr(_SP, 32), _ET) // MOVQ 32(SP), ET - self.Emit("MOVQ" , jit.Ptr(_SP, 40), _EP) // MOVQ 40(SP), EP - self.Emit("TESTQ", _ET, _ET) // TESTQ ET, ET - self.Sjmp("JNZ" , _LB_error) // JNZ _error -} - -func (self *_Assembler) _asm_OP_map_stop(_ *_Instr) { - self.Emit("MOVQ", _SP_q, jit.Ptr(_SP, 0)) // MOVQ SP.q, 0(SP) - self.call_go(_F_iteratorStop) // CALL_GO iteratorStop - self.Emit("XORL", _SP_q, _SP_q) // XORL SP.q, SP.q -} - -func (self *_Assembler) _asm_OP_map_check_key(p *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_SP_q, 0), _SP_p) // MOVQ (SP.q), SP.p - self.Emit("TESTQ", _SP_p, _SP_p) // TESTQ SP.p, SP.p - self.Xjmp("JZ" , p.vi()) // JNZ p.vi() -} - -func (self *_Assembler) _asm_OP_map_write_key(p *_Instr) { - self.Emit("BTQ", jit.Imm(bitSortMapKeys), _ARG_fv) // BTQ ${SortMapKeys}, fv - self.Sjmp("JNC", "_unordered_key_{n}") // JNC _unordered_key_{n} - self.encode_string(false) // STR $false - self.Xjmp("JMP", p.vi()) // JMP ${p.vi()} - self.Link("_unordered_key_{n}") // _unordered_key_{n}: -} - -func (self *_Assembler) _asm_OP_map_value_next(_ *_Instr) { - self.Emit("MOVQ", jit.Ptr(_SP_q, 8), _SP_p) // MOVQ 8(SP.q), SP.p - self.Emit("MOVQ", _SP_q, jit.Ptr(_SP, 0)) // MOVQ SP.q, (SP) - self.call_go(_F_iteratorNext) // CALL_GO iteratorNext -} - -func (self *_Assembler) _asm_OP_slice_len(_ *_Instr) { - self.Emit("MOVQ" , jit.Ptr(_SP_p, 8), _SP_x) // MOVQ 8(SP.p), SP.x - self.Emit("MOVQ" , jit.Ptr(_SP_p, 0), _SP_p) // MOVQ (SP.p), SP.p - self.Emit("ORQ" , jit.Imm(1 << _S_init), _SP_f) // ORQ $(1<<_S_init), SP.f -} - -func (self *_Assembler) _asm_OP_slice_next(p *_Instr) { - self.Emit("TESTQ" , _SP_x, _SP_x) // TESTQ SP.x, SP.x - self.Xjmp("JZ" , p.vi()) // JZ p.vi() - self.Emit("SUBQ" , jit.Imm(1), _SP_x) // SUBQ $1, SP.x - self.Emit("BTRQ" , jit.Imm(_S_init), _SP_f) // BTRQ $_S_init, SP.f - self.Emit("LEAQ" , jit.Ptr(_SP_p, int64(p.vlen())), _AX) // LEAQ $(p.vlen())(SP.p), AX - self.Emit("CMOVQCC", _AX, _SP_p) // CMOVQNC AX, SP.p -} - -func (self *_Assembler) _asm_OP_marshal(p *_Instr) { - self.call_marshaler(_F_encodeJsonMarshaler, _T_json_Marshaler, p.vt()) -} - -func (self *_Assembler) _asm_OP_marshal_p(p *_Instr) { - if p.vk() != reflect.Ptr { - panic("marshal_p: invalid type") - } else { - self.call_marshaler_v(_F_encodeJsonMarshaler, _T_json_Marshaler, p.vt(), false) - } -} - -func (self *_Assembler) _asm_OP_marshal_text(p *_Instr) { - self.call_marshaler(_F_encodeTextMarshaler, _T_encoding_TextMarshaler, p.vt()) -} - -func (self *_Assembler) _asm_OP_marshal_text_p(p *_Instr) { - if p.vk() != reflect.Ptr { - panic("marshal_text_p: invalid type") - } else { - self.call_marshaler_v(_F_encodeTextMarshaler, _T_encoding_TextMarshaler, p.vt(), false) - } -} - -func (self *_Assembler) _asm_OP_cond_set(_ *_Instr) { - self.Emit("ORQ", jit.Imm(1 << _S_cond), _SP_f) // ORQ $(1<<_S_cond), SP.f -} - -func (self *_Assembler) _asm_OP_cond_testc(p *_Instr) { - self.Emit("BTRQ", jit.Imm(_S_cond), _SP_f) // BTRQ $_S_cond, SP.f - self.Xjmp("JC" , p.vi()) -} - -func (self *_Assembler) print_gc(i int, p1 *_Instr, p2 *_Instr) { - self.Emit("MOVQ", jit.Imm(int64(p2.op())), jit.Ptr(_SP, 16))// MOVQ $(p2.op()), 16(SP) - self.Emit("MOVQ", jit.Imm(int64(p1.op())), jit.Ptr(_SP, 8)) // MOVQ $(p1.op()), 8(SP) - self.Emit("MOVQ", jit.Imm(int64(i)), jit.Ptr(_SP, 0)) // MOVQ $(i), (SP) - self.call_go(_F_println) -} diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/compiler.go b/vendor/github.com/bytedance/sonic/internal/encoder/compiler.go index a949c90f..902fbc98 100644 --- a/vendor/github.com/bytedance/sonic/internal/encoder/compiler.go +++ b/vendor/github.com/bytedance/sonic/internal/encoder/compiler.go @@ -17,869 +17,660 @@ package encoder import ( - `fmt` - `reflect` - `strconv` - `strings` - `unsafe` - - `github.com/bytedance/sonic/internal/resolver` - `github.com/bytedance/sonic/internal/rt` - `github.com/bytedance/sonic/option` + "reflect" + "unsafe" + + "github.com/bytedance/sonic/internal/encoder/ir" + "github.com/bytedance/sonic/internal/encoder/vars" + "github.com/bytedance/sonic/internal/encoder/vm" + "github.com/bytedance/sonic/internal/resolver" + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/option" ) -type _Op uint8 - -const ( - _OP_null _Op = iota + 1 - _OP_empty_arr - _OP_empty_obj - _OP_bool - _OP_i8 - _OP_i16 - _OP_i32 - _OP_i64 - _OP_u8 - _OP_u16 - _OP_u32 - _OP_u64 - _OP_f32 - _OP_f64 - _OP_str - _OP_bin - _OP_quote - _OP_number - _OP_eface - _OP_iface - _OP_byte - _OP_text - _OP_deref - _OP_index - _OP_load - _OP_save - _OP_drop - _OP_drop_2 - _OP_recurse - _OP_is_nil - _OP_is_nil_p1 - _OP_is_zero_1 - _OP_is_zero_2 - _OP_is_zero_4 - _OP_is_zero_8 - _OP_is_zero_map - _OP_goto - _OP_map_iter - _OP_map_stop - _OP_map_check_key - _OP_map_write_key - _OP_map_value_next - _OP_slice_len - _OP_slice_next - _OP_marshal - _OP_marshal_p - _OP_marshal_text - _OP_marshal_text_p - _OP_cond_set - _OP_cond_testc -) - -const ( - _INT_SIZE = 32 << (^uint(0) >> 63) - _PTR_SIZE = 32 << (^uintptr(0) >> 63) - _PTR_BYTE = unsafe.Sizeof(uintptr(0)) -) - -const ( - _MAX_ILBUF = 100000 // cutoff at 100k of IL instructions - _MAX_FIELDS = 50 // cutoff at 50 fields struct -) - -var _OpNames = [256]string { - _OP_null : "null", - _OP_empty_arr : "empty_arr", - _OP_empty_obj : "empty_obj", - _OP_bool : "bool", - _OP_i8 : "i8", - _OP_i16 : "i16", - _OP_i32 : "i32", - _OP_i64 : "i64", - _OP_u8 : "u8", - _OP_u16 : "u16", - _OP_u32 : "u32", - _OP_u64 : "u64", - _OP_f32 : "f32", - _OP_f64 : "f64", - _OP_str : "str", - _OP_bin : "bin", - _OP_quote : "quote", - _OP_number : "number", - _OP_eface : "eface", - _OP_iface : "iface", - _OP_byte : "byte", - _OP_text : "text", - _OP_deref : "deref", - _OP_index : "index", - _OP_load : "load", - _OP_save : "save", - _OP_drop : "drop", - _OP_drop_2 : "drop_2", - _OP_recurse : "recurse", - _OP_is_nil : "is_nil", - _OP_is_nil_p1 : "is_nil_p1", - _OP_is_zero_1 : "is_zero_1", - _OP_is_zero_2 : "is_zero_2", - _OP_is_zero_4 : "is_zero_4", - _OP_is_zero_8 : "is_zero_8", - _OP_is_zero_map : "is_zero_map", - _OP_goto : "goto", - _OP_map_iter : "map_iter", - _OP_map_stop : "map_stop", - _OP_map_check_key : "map_check_key", - _OP_map_write_key : "map_write_key", - _OP_map_value_next : "map_value_next", - _OP_slice_len : "slice_len", - _OP_slice_next : "slice_next", - _OP_marshal : "marshal", - _OP_marshal_p : "marshal_p", - _OP_marshal_text : "marshal_text", - _OP_marshal_text_p : "marshal_text_p", - _OP_cond_set : "cond_set", - _OP_cond_testc : "cond_testc", -} - -func (self _Op) String() string { - if ret := _OpNames[self]; ret != "" { - return ret - } else { - return "" - } -} - -func _OP_int() _Op { - switch _INT_SIZE { - case 32: return _OP_i32 - case 64: return _OP_i64 - default: panic("unsupported int size") - } -} - -func _OP_uint() _Op { - switch _INT_SIZE { - case 32: return _OP_u32 - case 64: return _OP_u64 - default: panic("unsupported uint size") - } -} - -func _OP_uintptr() _Op { - switch _PTR_SIZE { - case 32: return _OP_u32 - case 64: return _OP_u64 - default: panic("unsupported pointer size") - } -} - -func _OP_is_zero_ints() _Op { - switch _INT_SIZE { - case 32: return _OP_is_zero_4 - case 64: return _OP_is_zero_8 - default: panic("unsupported integer size") - } -} - -type _Instr struct { - u uint64 // union {op: 8, _: 8, vi: 48}, vi maybe int or len(str) - p unsafe.Pointer // maybe GoString.Ptr, or *GoType -} - -func packOp(op _Op) uint64 { - return uint64(op) << 56 -} - -func newInsOp(op _Op) _Instr { - return _Instr{u: packOp(op)} -} - -func newInsVi(op _Op, vi int) _Instr { - return _Instr{u: packOp(op) | rt.PackInt(vi)} -} - -func newInsVs(op _Op, vs string) _Instr { - return _Instr { - u: packOp(op) | rt.PackInt(len(vs)), - p: (*rt.GoString)(unsafe.Pointer(&vs)).Ptr, - } -} - -func newInsVt(op _Op, vt reflect.Type) _Instr { - return _Instr { - u: packOp(op), - p: unsafe.Pointer(rt.UnpackType(vt)), - } -} - -func newInsVp(op _Op, vt reflect.Type, pv bool) _Instr { - i := 0 - if pv { - i = 1 - } - return _Instr { - u: packOp(op) | rt.PackInt(i), - p: unsafe.Pointer(rt.UnpackType(vt)), - } -} - -func (self _Instr) op() _Op { - return _Op(self.u >> 56) -} - -func (self _Instr) vi() int { - return rt.UnpackInt(self.u) -} - -func (self _Instr) vf() uint8 { - return (*rt.GoType)(self.p).KindFlags -} - -func (self _Instr) vs() (v string) { - (*rt.GoString)(unsafe.Pointer(&v)).Ptr = self.p - (*rt.GoString)(unsafe.Pointer(&v)).Len = self.vi() - return -} - -func (self _Instr) vk() reflect.Kind { - return (*rt.GoType)(self.p).Kind() -} - -func (self _Instr) vt() reflect.Type { - return (*rt.GoType)(self.p).Pack() -} - -func (self _Instr) vp() (vt reflect.Type, pv bool) { - return (*rt.GoType)(self.p).Pack(), rt.UnpackInt(self.u) == 1 -} - -func (self _Instr) i64() int64 { - return int64(self.vi()) -} - -func (self _Instr) vlen() int { - return int((*rt.GoType)(self.p).Size) -} - -func (self _Instr) isBranch() bool { - switch self.op() { - case _OP_goto : fallthrough - case _OP_is_nil : fallthrough - case _OP_is_nil_p1 : fallthrough - case _OP_is_zero_1 : fallthrough - case _OP_is_zero_2 : fallthrough - case _OP_is_zero_4 : fallthrough - case _OP_is_zero_8 : fallthrough - case _OP_map_check_key : fallthrough - case _OP_map_write_key : fallthrough - case _OP_slice_next : fallthrough - case _OP_cond_testc : return true - default : return false - } -} - -func (self _Instr) disassemble() string { - switch self.op() { - case _OP_byte : return fmt.Sprintf("%-18s%s", self.op().String(), strconv.QuoteRune(rune(self.vi()))) - case _OP_text : return fmt.Sprintf("%-18s%s", self.op().String(), strconv.Quote(self.vs())) - case _OP_index : return fmt.Sprintf("%-18s%d", self.op().String(), self.vi()) - case _OP_recurse : fallthrough - case _OP_map_iter : fallthrough - case _OP_marshal : fallthrough - case _OP_marshal_p : fallthrough - case _OP_marshal_text : fallthrough - case _OP_marshal_text_p : return fmt.Sprintf("%-18s%s", self.op().String(), self.vt()) - case _OP_goto : fallthrough - case _OP_is_nil : fallthrough - case _OP_is_nil_p1 : fallthrough - case _OP_is_zero_1 : fallthrough - case _OP_is_zero_2 : fallthrough - case _OP_is_zero_4 : fallthrough - case _OP_is_zero_8 : fallthrough - case _OP_is_zero_map : fallthrough - case _OP_cond_testc : fallthrough - case _OP_map_check_key : fallthrough - case _OP_map_write_key : return fmt.Sprintf("%-18sL_%d", self.op().String(), self.vi()) - case _OP_slice_next : return fmt.Sprintf("%-18sL_%d, %s", self.op().String(), self.vi(), self.vt()) - default : return self.op().String() - } -} - -type ( - _Program []_Instr -) - -func (self _Program) pc() int { - return len(self) -} - -func (self _Program) tag(n int) { - if n >= _MaxStack { - panic("type nesting too deep") - } -} - -func (self _Program) pin(i int) { - v := &self[i] - v.u &= 0xffff000000000000 - v.u |= rt.PackInt(self.pc()) -} - -func (self _Program) rel(v []int) { - for _, i := range v { - self.pin(i) - } -} - -func (self *_Program) add(op _Op) { - *self = append(*self, newInsOp(op)) -} - -func (self *_Program) key(op _Op) { - *self = append(*self, - newInsVi(_OP_byte, '"'), - newInsOp(op), - newInsVi(_OP_byte, '"'), - ) -} - -func (self *_Program) int(op _Op, vi int) { - *self = append(*self, newInsVi(op, vi)) -} - -func (self *_Program) str(op _Op, vs string) { - *self = append(*self, newInsVs(op, vs)) -} - -func (self *_Program) rtt(op _Op, vt reflect.Type) { - *self = append(*self, newInsVt(op, vt)) -} - -func (self *_Program) vp(op _Op, vt reflect.Type, pv bool) { - *self = append(*self, newInsVp(op, vt, pv)) -} - -func (self _Program) disassemble() string { - nb := len(self) - tab := make([]bool, nb + 1) - ret := make([]string, 0, nb + 1) - - /* prescan to get all the labels */ - for _, ins := range self { - if ins.isBranch() { - tab[ins.vi()] = true - } - } - - /* disassemble each instruction */ - for i, ins := range self { - if !tab[i] { - ret = append(ret, "\t" + ins.disassemble()) - } else { - ret = append(ret, fmt.Sprintf("L_%d:\n\t%s", i, ins.disassemble())) - } - } - - /* add the last label, if needed */ - if tab[nb] { - ret = append(ret, fmt.Sprintf("L_%d:", nb)) - } - - /* add an "end" indicator, and join all the strings */ - return strings.Join(append(ret, "\tend"), "\n") -} - -type _Compiler struct { - opts option.CompileOptions - pv bool - tab map[reflect.Type]bool - rec map[reflect.Type]uint8 -} - -func newCompiler() *_Compiler { - return &_Compiler { - opts: option.DefaultCompileOptions(), - tab: map[reflect.Type]bool{}, - rec: map[reflect.Type]uint8{}, - } -} - -func (self *_Compiler) apply(opts option.CompileOptions) *_Compiler { - self.opts = opts - if self.opts.RecursiveDepth > 0 { - self.rec = map[reflect.Type]uint8{} - } - return self -} - -func (self *_Compiler) rescue(ep *error) { - if val := recover(); val != nil { - if err, ok := val.(error); ok { - *ep = err - } else { - panic(val) - } - } -} - -func (self *_Compiler) compile(vt reflect.Type, pv bool) (ret _Program, err error) { - defer self.rescue(&err) - self.compileOne(&ret, 0, vt, pv) - return -} - -func (self *_Compiler) compileOne(p *_Program, sp int, vt reflect.Type, pv bool) { - if self.tab[vt] { - p.vp(_OP_recurse, vt, pv) - } else { - self.compileRec(p, sp, vt, pv) - } -} - -func (self *_Compiler) compileRec(p *_Program, sp int, vt reflect.Type, pv bool) { - pr := self.pv - pt := reflect.PtrTo(vt) - - /* check for addressable `json.Marshaler` with pointer receiver */ - if pv && pt.Implements(jsonMarshalerType) { - p.rtt(_OP_marshal_p, pt) - return - } - - /* check for `json.Marshaler` */ - if vt.Implements(jsonMarshalerType) { - self.compileMarshaler(p, _OP_marshal, vt, jsonMarshalerType) - return - } - - /* check for addressable `encoding.TextMarshaler` with pointer receiver */ - if pv && pt.Implements(encodingTextMarshalerType) { - p.rtt(_OP_marshal_text_p, pt) - return - } - - /* check for `encoding.TextMarshaler` */ - if vt.Implements(encodingTextMarshalerType) { - self.compileMarshaler(p, _OP_marshal_text, vt, encodingTextMarshalerType) - return - } - - /* enter the recursion, and compile the type */ - self.pv = pv - self.tab[vt] = true - self.compileOps(p, sp, vt) - - /* exit the recursion */ - self.pv = pr - delete(self.tab, vt) -} - -func (self *_Compiler) compileOps(p *_Program, sp int, vt reflect.Type) { - switch vt.Kind() { - case reflect.Bool : p.add(_OP_bool) - case reflect.Int : p.add(_OP_int()) - case reflect.Int8 : p.add(_OP_i8) - case reflect.Int16 : p.add(_OP_i16) - case reflect.Int32 : p.add(_OP_i32) - case reflect.Int64 : p.add(_OP_i64) - case reflect.Uint : p.add(_OP_uint()) - case reflect.Uint8 : p.add(_OP_u8) - case reflect.Uint16 : p.add(_OP_u16) - case reflect.Uint32 : p.add(_OP_u32) - case reflect.Uint64 : p.add(_OP_u64) - case reflect.Uintptr : p.add(_OP_uintptr()) - case reflect.Float32 : p.add(_OP_f32) - case reflect.Float64 : p.add(_OP_f64) - case reflect.String : self.compileString (p, vt) - case reflect.Array : self.compileArray (p, sp, vt.Elem(), vt.Len()) - case reflect.Interface : self.compileInterface (p, vt) - case reflect.Map : self.compileMap (p, sp, vt) - case reflect.Ptr : self.compilePtr (p, sp, vt.Elem()) - case reflect.Slice : self.compileSlice (p, sp, vt.Elem()) - case reflect.Struct : self.compileStruct (p, sp, vt) - default : panic (error_type(vt)) - } -} - -func (self *_Compiler) compileNil(p *_Program, sp int, vt reflect.Type, nil_op _Op, fn func(*_Program, int, reflect.Type)) { - x := p.pc() - p.add(_OP_is_nil) - fn(p, sp, vt) - e := p.pc() - p.add(_OP_goto) - p.pin(x) - p.add(nil_op) - p.pin(e) -} - -func (self *_Compiler) compilePtr(p *_Program, sp int, vt reflect.Type) { - self.compileNil(p, sp, vt, _OP_null, self.compilePtrBody) -} - -func (self *_Compiler) compilePtrBody(p *_Program, sp int, vt reflect.Type) { - p.tag(sp) - p.add(_OP_save) - p.add(_OP_deref) - self.compileOne(p, sp + 1, vt, true) - p.add(_OP_drop) -} - -func (self *_Compiler) compileMap(p *_Program, sp int, vt reflect.Type) { - self.compileNil(p, sp, vt, _OP_empty_obj, self.compileMapBody) -} - -func (self *_Compiler) compileMapBody(p *_Program, sp int, vt reflect.Type) { - p.tag(sp + 1) - p.int(_OP_byte, '{') - p.add(_OP_save) - p.rtt(_OP_map_iter, vt) - p.add(_OP_save) - i := p.pc() - p.add(_OP_map_check_key) - u := p.pc() - p.add(_OP_map_write_key) - self.compileMapBodyKey(p, vt.Key()) - p.pin(u) - p.int(_OP_byte, ':') - p.add(_OP_map_value_next) - self.compileOne(p, sp + 2, vt.Elem(), false) - j := p.pc() - p.add(_OP_map_check_key) - p.int(_OP_byte, ',') - v := p.pc() - p.add(_OP_map_write_key) - self.compileMapBodyKey(p, vt.Key()) - p.pin(v) - p.int(_OP_byte, ':') - p.add(_OP_map_value_next) - self.compileOne(p, sp + 2, vt.Elem(), false) - p.int(_OP_goto, j) - p.pin(i) - p.pin(j) - p.add(_OP_map_stop) - p.add(_OP_drop_2) - p.int(_OP_byte, '}') -} - -func (self *_Compiler) compileMapBodyKey(p *_Program, vk reflect.Type) { - if !vk.Implements(encodingTextMarshalerType) { - self.compileMapBodyTextKey(p, vk) - } else { - self.compileMapBodyUtextKey(p, vk) - } -} - -func (self *_Compiler) compileMapBodyTextKey(p *_Program, vk reflect.Type) { - switch vk.Kind() { - case reflect.Invalid : panic("map key is nil") - case reflect.Bool : p.key(_OP_bool) - case reflect.Int : p.key(_OP_int()) - case reflect.Int8 : p.key(_OP_i8) - case reflect.Int16 : p.key(_OP_i16) - case reflect.Int32 : p.key(_OP_i32) - case reflect.Int64 : p.key(_OP_i64) - case reflect.Uint : p.key(_OP_uint()) - case reflect.Uint8 : p.key(_OP_u8) - case reflect.Uint16 : p.key(_OP_u16) - case reflect.Uint32 : p.key(_OP_u32) - case reflect.Uint64 : p.key(_OP_u64) - case reflect.Uintptr : p.key(_OP_uintptr()) - case reflect.Float32 : p.key(_OP_f32) - case reflect.Float64 : p.key(_OP_f64) - case reflect.String : self.compileString(p, vk) - default : panic(error_type(vk)) - } -} - -func (self *_Compiler) compileMapBodyUtextKey(p *_Program, vk reflect.Type) { - if vk.Kind() != reflect.Ptr { - p.rtt(_OP_marshal_text, vk) - } else { - self.compileMapBodyUtextPtr(p, vk) - } -} - -func (self *_Compiler) compileMapBodyUtextPtr(p *_Program, vk reflect.Type) { - i := p.pc() - p.add(_OP_is_nil) - p.rtt(_OP_marshal_text, vk) - j := p.pc() - p.add(_OP_goto) - p.pin(i) - p.str(_OP_text, "\"\"") - p.pin(j) -} - -func (self *_Compiler) compileSlice(p *_Program, sp int, vt reflect.Type) { - self.compileNil(p, sp, vt, _OP_empty_arr, self.compileSliceBody) -} - -func (self *_Compiler) compileSliceBody(p *_Program, sp int, vt reflect.Type) { - if isSimpleByte(vt) { - p.add(_OP_bin) - } else { - self.compileSliceArray(p, sp, vt) - } -} - -func (self *_Compiler) compileSliceArray(p *_Program, sp int, vt reflect.Type) { - p.tag(sp) - p.int(_OP_byte, '[') - p.add(_OP_save) - p.add(_OP_slice_len) - i := p.pc() - p.rtt(_OP_slice_next, vt) - self.compileOne(p, sp + 1, vt, true) - j := p.pc() - p.rtt(_OP_slice_next, vt) - p.int(_OP_byte, ',') - self.compileOne(p, sp + 1, vt, true) - p.int(_OP_goto, j) - p.pin(i) - p.pin(j) - p.add(_OP_drop) - p.int(_OP_byte, ']') -} - -func (self *_Compiler) compileArray(p *_Program, sp int, vt reflect.Type, nb int) { - p.tag(sp) - p.int(_OP_byte, '[') - p.add(_OP_save) - - /* first item */ - if nb != 0 { - self.compileOne(p, sp + 1, vt, self.pv) - p.add(_OP_load) - } - - /* remaining items */ - for i := 1; i < nb; i++ { - p.int(_OP_byte, ',') - p.int(_OP_index, i * int(vt.Size())) - self.compileOne(p, sp + 1, vt, self.pv) - p.add(_OP_load) - } - - /* end of array */ - p.add(_OP_drop) - p.int(_OP_byte, ']') -} - -func (self *_Compiler) compileString(p *_Program, vt reflect.Type) { - if vt != jsonNumberType { - p.add(_OP_str) - } else { - p.add(_OP_number) - } -} - -func (self *_Compiler) compileStruct(p *_Program, sp int, vt reflect.Type) { - if sp >= self.opts.MaxInlineDepth || p.pc() >= _MAX_ILBUF || (sp > 0 && vt.NumField() >= _MAX_FIELDS) { - p.vp(_OP_recurse, vt, self.pv) - if self.opts.RecursiveDepth > 0 { - if self.pv { - self.rec[vt] = 1 - } else { - self.rec[vt] = 0 - } - } - } else { - self.compileStructBody(p, sp, vt) - } -} - -func (self *_Compiler) compileStructBody(p *_Program, sp int, vt reflect.Type) { - p.tag(sp) - p.int(_OP_byte, '{') - p.add(_OP_save) - p.add(_OP_cond_set) - - /* compile each field */ - for _, fv := range resolver.ResolveStruct(vt) { - var s []int - var o resolver.Offset - - /* "omitempty" for arrays */ - if fv.Type.Kind() == reflect.Array { - if fv.Type.Len() == 0 && (fv.Opts & resolver.F_omitempty) != 0 { - continue - } - } - - /* index to the field */ - for _, o = range fv.Path { - if p.int(_OP_index, int(o.Size)); o.Kind == resolver.F_deref { - s = append(s, p.pc()) - p.add(_OP_is_nil) - p.add(_OP_deref) - } - } - - /* check for "omitempty" option */ - if fv.Type.Kind() != reflect.Struct && fv.Type.Kind() != reflect.Array && (fv.Opts & resolver.F_omitempty) != 0 { - s = append(s, p.pc()) - self.compileStructFieldZero(p, fv.Type) - } - - /* add the comma if not the first element */ - i := p.pc() - p.add(_OP_cond_testc) - p.int(_OP_byte, ',') - p.pin(i) - - /* compile the key and value */ - ft := fv.Type - p.str(_OP_text, Quote(fv.Name) + ":") - - /* check for "stringnize" option */ - if (fv.Opts & resolver.F_stringize) == 0 { - self.compileOne(p, sp + 1, ft, self.pv) - } else { - self.compileStructFieldStr(p, sp + 1, ft) - } - - /* patch the skipping jumps and reload the struct pointer */ - p.rel(s) - p.add(_OP_load) - } - - /* end of object */ - p.add(_OP_drop) - p.int(_OP_byte, '}') -} - -func (self *_Compiler) compileStructFieldStr(p *_Program, sp int, vt reflect.Type) { - pc := -1 - ft := vt - sv := false - - /* dereference the pointer if needed */ - if ft.Kind() == reflect.Ptr { - ft = ft.Elem() - } - - /* check if it can be stringized */ - switch ft.Kind() { - case reflect.Bool : sv = true - case reflect.Int : sv = true - case reflect.Int8 : sv = true - case reflect.Int16 : sv = true - case reflect.Int32 : sv = true - case reflect.Int64 : sv = true - case reflect.Uint : sv = true - case reflect.Uint8 : sv = true - case reflect.Uint16 : sv = true - case reflect.Uint32 : sv = true - case reflect.Uint64 : sv = true - case reflect.Uintptr : sv = true - case reflect.Float32 : sv = true - case reflect.Float64 : sv = true - case reflect.String : sv = true - } - - /* if it's not, ignore the "string" and follow the regular path */ - if !sv { - self.compileOne(p, sp, vt, self.pv) - return - } - - /* dereference the pointer */ - if vt.Kind() == reflect.Ptr { - pc = p.pc() - vt = vt.Elem() - p.add(_OP_is_nil) - p.add(_OP_deref) - } - - /* special case of a double-quoted string */ - if ft != jsonNumberType && ft.Kind() == reflect.String { - p.add(_OP_quote) - } else { - self.compileStructFieldQuoted(p, sp, vt) - } - - /* the "null" case of the pointer */ - if pc != -1 { - e := p.pc() - p.add(_OP_goto) - p.pin(pc) - p.add(_OP_null) - p.pin(e) - } -} - -func (self *_Compiler) compileStructFieldZero(p *_Program, vt reflect.Type) { - switch vt.Kind() { - case reflect.Bool : p.add(_OP_is_zero_1) - case reflect.Int : p.add(_OP_is_zero_ints()) - case reflect.Int8 : p.add(_OP_is_zero_1) - case reflect.Int16 : p.add(_OP_is_zero_2) - case reflect.Int32 : p.add(_OP_is_zero_4) - case reflect.Int64 : p.add(_OP_is_zero_8) - case reflect.Uint : p.add(_OP_is_zero_ints()) - case reflect.Uint8 : p.add(_OP_is_zero_1) - case reflect.Uint16 : p.add(_OP_is_zero_2) - case reflect.Uint32 : p.add(_OP_is_zero_4) - case reflect.Uint64 : p.add(_OP_is_zero_8) - case reflect.Uintptr : p.add(_OP_is_nil) - case reflect.Float32 : p.add(_OP_is_zero_4) - case reflect.Float64 : p.add(_OP_is_zero_8) - case reflect.String : p.add(_OP_is_nil_p1) - case reflect.Interface : p.add(_OP_is_nil_p1) - case reflect.Map : p.add(_OP_is_zero_map) - case reflect.Ptr : p.add(_OP_is_nil) - case reflect.Slice : p.add(_OP_is_nil_p1) - default : panic(error_type(vt)) - } -} - -func (self *_Compiler) compileStructFieldQuoted(p *_Program, sp int, vt reflect.Type) { - p.int(_OP_byte, '"') - self.compileOne(p, sp, vt, self.pv) - p.int(_OP_byte, '"') -} - -func (self *_Compiler) compileInterface(p *_Program, vt reflect.Type) { - x := p.pc() - p.add(_OP_is_nil_p1) - - /* iface and efaces are different */ - if vt.NumMethod() == 0 { - p.add(_OP_eface) - } else { - p.add(_OP_iface) - } - - /* the "null" value */ - e := p.pc() - p.add(_OP_goto) - p.pin(x) - p.add(_OP_null) - p.pin(e) -} - -func (self *_Compiler) compileMarshaler(p *_Program, op _Op, vt reflect.Type, mt reflect.Type) { - pc := p.pc() - vk := vt.Kind() - - /* direct receiver */ - if vk != reflect.Ptr { - p.rtt(op, vt) - return - } - - /* value receiver with a pointer type, check for nil before calling the marshaler */ - p.add(_OP_is_nil) - p.rtt(op, vt) - i := p.pc() - p.add(_OP_goto) - p.pin(pc) - p.add(_OP_null) - p.pin(i) +func ForceUseVM() { + vm.SetCompiler(makeEncoderVM) + pretouchType = pretouchTypeVM + encodeTypedPointer = vm.EncodeTypedPointer + vars.UseVM = true +} + +var encodeTypedPointer func(buf *[]byte, vt *rt.GoType, vp *unsafe.Pointer, sb *vars.Stack, fv uint64) error + +func makeEncoderVM(vt *rt.GoType, ex ...interface{}) (interface{}, error) { + pp, err := NewCompiler().Compile(vt.Pack(), ex[0].(bool)) + if err != nil { + return nil, err + } + return &pp, nil +} + +var pretouchType func(_vt reflect.Type, opts option.CompileOptions, v uint8) (map[reflect.Type]uint8, error) + +func pretouchTypeVM(_vt reflect.Type, opts option.CompileOptions, v uint8) (map[reflect.Type]uint8, error) { + /* compile function */ + compiler := NewCompiler().apply(opts) + + /* find or compile */ + vt := rt.UnpackType(_vt) + if val := vars.GetProgram(vt); val != nil { + return nil, nil + } else if _, err := vars.ComputeProgram(vt, makeEncoderVM, v == 1); err == nil { + return compiler.rec, nil + } else { + return nil, err + } +} + +func pretouchRec(vtm map[reflect.Type]uint8, opts option.CompileOptions) error { + if opts.RecursiveDepth < 0 || len(vtm) == 0 { + return nil + } + next := make(map[reflect.Type]uint8) + for vt, v := range vtm { + sub, err := pretouchType(vt, opts, v) + if err != nil { + return err + } + for svt, v := range sub { + next[svt] = v + } + } + opts.RecursiveDepth -= 1 + return pretouchRec(next, opts) +} + +type Compiler struct { + opts option.CompileOptions + pv bool + tab map[reflect.Type]bool + rec map[reflect.Type]uint8 +} + +func NewCompiler() *Compiler { + return &Compiler{ + opts: option.DefaultCompileOptions(), + tab: map[reflect.Type]bool{}, + rec: map[reflect.Type]uint8{}, + } +} + +func (self *Compiler) apply(opts option.CompileOptions) *Compiler { + self.opts = opts + if self.opts.RecursiveDepth > 0 { + self.rec = map[reflect.Type]uint8{} + } + return self +} + +func (self *Compiler) rescue(ep *error) { + if val := recover(); val != nil { + if err, ok := val.(error); ok { + *ep = err + } else { + panic(val) + } + } +} + +func (self *Compiler) Compile(vt reflect.Type, pv bool) (ret ir.Program, err error) { + defer self.rescue(&err) + self.compileOne(&ret, 0, vt, pv) + return +} + +func (self *Compiler) compileOne(p *ir.Program, sp int, vt reflect.Type, pv bool) { + if self.tab[vt] { + p.Vp(ir.OP_recurse, vt, pv) + } else { + self.compileRec(p, sp, vt, pv) + } +} + +func (self *Compiler) tryCompileMarshaler(p *ir.Program, vt reflect.Type, pv bool) bool { + pt := reflect.PtrTo(vt) + + /* check for addressable `json.Marshaler` with pointer receiver */ + if pv && pt.Implements(vars.JsonMarshalerType) { + addMarshalerOp(p, ir.OP_marshal_p, pt, vars.JsonMarshalerType) + return true + } + + /* check for `json.Marshaler` */ + if vt.Implements(vars.JsonMarshalerType) { + self.compileMarshaler(p, ir.OP_marshal, vt, vars.JsonMarshalerType) + return true + } + + /* check for addressable `encoding.TextMarshaler` with pointer receiver */ + if pv && pt.Implements(vars.EncodingTextMarshalerType) { + addMarshalerOp(p, ir.OP_marshal_text_p, pt, vars.EncodingTextMarshalerType) + return true + } + + /* check for `encoding.TextMarshaler` */ + if vt.Implements(vars.EncodingTextMarshalerType) { + self.compileMarshaler(p, ir.OP_marshal_text, vt, vars.EncodingTextMarshalerType) + return true + } + + return false +} + +func (self *Compiler) compileRec(p *ir.Program, sp int, vt reflect.Type, pv bool) { + pr := self.pv + + if self.tryCompileMarshaler(p, vt, pv) { + return + } + + /* enter the recursion, and compile the type */ + self.pv = pv + self.tab[vt] = true + self.compileOps(p, sp, vt) + + /* exit the recursion */ + self.pv = pr + delete(self.tab, vt) +} + +func (self *Compiler) compileOps(p *ir.Program, sp int, vt reflect.Type) { + switch vt.Kind() { + case reflect.Bool: + p.Add(ir.OP_bool) + case reflect.Int: + p.Add(ir.OP_int()) + case reflect.Int8: + p.Add(ir.OP_i8) + case reflect.Int16: + p.Add(ir.OP_i16) + case reflect.Int32: + p.Add(ir.OP_i32) + case reflect.Int64: + p.Add(ir.OP_i64) + case reflect.Uint: + p.Add(ir.OP_uint()) + case reflect.Uint8: + p.Add(ir.OP_u8) + case reflect.Uint16: + p.Add(ir.OP_u16) + case reflect.Uint32: + p.Add(ir.OP_u32) + case reflect.Uint64: + p.Add(ir.OP_u64) + case reflect.Uintptr: + p.Add(ir.OP_uintptr()) + case reflect.Float32: + p.Add(ir.OP_f32) + case reflect.Float64: + p.Add(ir.OP_f64) + case reflect.String: + self.compileString(p, vt) + case reflect.Array: + self.compileArray(p, sp, vt.Elem(), vt.Len()) + case reflect.Interface: + self.compileInterface(p, vt) + case reflect.Map: + self.compileMap(p, sp, vt) + case reflect.Ptr: + self.compilePtr(p, sp, vt.Elem()) + case reflect.Slice: + self.compileSlice(p, sp, vt.Elem()) + case reflect.Struct: + self.compileStruct(p, sp, vt) + default: + panic(vars.Error_type(vt)) + } +} + +func (self *Compiler) compileNil(p *ir.Program, sp int, vt reflect.Type, nil_op ir.Op, fn func(*ir.Program, int, reflect.Type)) { + x := p.PC() + p.Add(ir.OP_is_nil) + fn(p, sp, vt) + e := p.PC() + p.Add(ir.OP_goto) + p.Pin(x) + p.Add(nil_op) + p.Pin(e) +} + +func (self *Compiler) compilePtr(p *ir.Program, sp int, vt reflect.Type) { + self.compileNil(p, sp, vt, ir.OP_null, self.compilePtrBody) +} + +func (self *Compiler) compilePtrBody(p *ir.Program, sp int, vt reflect.Type) { + p.Tag(sp) + p.Add(ir.OP_save) + p.Add(ir.OP_deref) + self.compileOne(p, sp+1, vt, true) + p.Add(ir.OP_drop) +} + +func (self *Compiler) compileMap(p *ir.Program, sp int, vt reflect.Type) { + self.compileNil(p, sp, vt, ir.OP_empty_obj, self.compileMapBody) +} + +func (self *Compiler) compileMapBody(p *ir.Program, sp int, vt reflect.Type) { + p.Tag(sp + 1) + p.Int(ir.OP_byte, '{') + e := p.PC() + p.Add(ir.OP_is_zero_map) + p.Add(ir.OP_save) + p.Rtt(ir.OP_map_iter, vt) + p.Add(ir.OP_save) + i := p.PC() + p.Add(ir.OP_map_check_key) + u := p.PC() + p.Add(ir.OP_map_write_key) + self.compileMapBodyKey(p, vt.Key()) + p.Pin(u) + p.Int(ir.OP_byte, ':') + p.Add(ir.OP_map_value_next) + self.compileOne(p, sp+2, vt.Elem(), false) + j := p.PC() + p.Add(ir.OP_map_check_key) + p.Int(ir.OP_byte, ',') + v := p.PC() + p.Add(ir.OP_map_write_key) + self.compileMapBodyKey(p, vt.Key()) + p.Pin(v) + p.Int(ir.OP_byte, ':') + p.Add(ir.OP_map_value_next) + self.compileOne(p, sp+2, vt.Elem(), false) + p.Int(ir.OP_goto, j) + p.Pin(i) + p.Pin(j) + p.Add(ir.OP_map_stop) + p.Add(ir.OP_drop_2) + p.Pin(e) + p.Int(ir.OP_byte, '}') +} + +func (self *Compiler) compileMapBodyKey(p *ir.Program, vk reflect.Type) { + if !vk.Implements(vars.EncodingTextMarshalerType) { + self.compileMapBodyTextKey(p, vk) + } else { + self.compileMapBodyUtextKey(p, vk) + } +} + +func (self *Compiler) compileMapBodyTextKey(p *ir.Program, vk reflect.Type) { + switch vk.Kind() { + case reflect.Invalid: + panic("map key is nil") + case reflect.Bool: + p.Key(ir.OP_bool) + case reflect.Int: + p.Key(ir.OP_int()) + case reflect.Int8: + p.Key(ir.OP_i8) + case reflect.Int16: + p.Key(ir.OP_i16) + case reflect.Int32: + p.Key(ir.OP_i32) + case reflect.Int64: + p.Key(ir.OP_i64) + case reflect.Uint: + p.Key(ir.OP_uint()) + case reflect.Uint8: + p.Key(ir.OP_u8) + case reflect.Uint16: + p.Key(ir.OP_u16) + case reflect.Uint32: + p.Key(ir.OP_u32) + case reflect.Uint64: + p.Key(ir.OP_u64) + case reflect.Uintptr: + p.Key(ir.OP_uintptr()) + case reflect.Float32: + p.Key(ir.OP_f32) + case reflect.Float64: + p.Key(ir.OP_f64) + case reflect.String: + self.compileString(p, vk) + default: + panic(vars.Error_type(vk)) + } +} + +func (self *Compiler) compileMapBodyUtextKey(p *ir.Program, vk reflect.Type) { + if vk.Kind() != reflect.Ptr { + addMarshalerOp(p, ir.OP_marshal_text, vk, vars.EncodingTextMarshalerType) + } else { + self.compileMapBodyUtextPtr(p, vk) + } +} + +func (self *Compiler) compileMapBodyUtextPtr(p *ir.Program, vk reflect.Type) { + i := p.PC() + p.Add(ir.OP_is_nil) + addMarshalerOp(p, ir.OP_marshal_text, vk, vars.EncodingTextMarshalerType) + j := p.PC() + p.Add(ir.OP_goto) + p.Pin(i) + p.Str(ir.OP_text, "\"\"") + p.Pin(j) +} + +func (self *Compiler) compileSlice(p *ir.Program, sp int, vt reflect.Type) { + self.compileNil(p, sp, vt, ir.OP_empty_arr, self.compileSliceBody) +} + +func (self *Compiler) compileSliceBody(p *ir.Program, sp int, vt reflect.Type) { + if vars.IsSimpleByte(vt) { + p.Add(ir.OP_bin) + } else { + self.compileSliceArray(p, sp, vt) + } +} + +func (self *Compiler) compileSliceArray(p *ir.Program, sp int, vt reflect.Type) { + p.Tag(sp) + p.Int(ir.OP_byte, '[') + e := p.PC() + p.Add(ir.OP_is_nil) + p.Add(ir.OP_save) + p.Add(ir.OP_slice_len) + i := p.PC() + p.Rtt(ir.OP_slice_next, vt) + self.compileOne(p, sp+1, vt, true) + j := p.PC() + p.Rtt(ir.OP_slice_next, vt) + p.Int(ir.OP_byte, ',') + self.compileOne(p, sp+1, vt, true) + p.Int(ir.OP_goto, j) + p.Pin(i) + p.Pin(j) + p.Add(ir.OP_drop) + p.Pin(e) + p.Int(ir.OP_byte, ']') +} + +func (self *Compiler) compileArray(p *ir.Program, sp int, vt reflect.Type, nb int) { + p.Tag(sp) + p.Int(ir.OP_byte, '[') + p.Add(ir.OP_save) + + /* first item */ + if nb != 0 { + self.compileOne(p, sp+1, vt, self.pv) + p.Add(ir.OP_load) + } + + /* remaining items */ + for i := 1; i < nb; i++ { + p.Int(ir.OP_byte, ',') + p.Int(ir.OP_index, i*int(vt.Size())) + self.compileOne(p, sp+1, vt, self.pv) + p.Add(ir.OP_load) + } + + /* end of array */ + p.Add(ir.OP_drop) + p.Int(ir.OP_byte, ']') +} + +func (self *Compiler) compileString(p *ir.Program, vt reflect.Type) { + if vt != vars.JsonNumberType { + p.Add(ir.OP_str) + } else { + p.Add(ir.OP_number) + } +} + +func (self *Compiler) compileStruct(p *ir.Program, sp int, vt reflect.Type) { + if sp >= self.opts.MaxInlineDepth || p.PC() >= vars.MAX_ILBUF || (sp > 0 && vt.NumField() >= vars.MAX_FIELDS) { + p.Vp(ir.OP_recurse, vt, self.pv) + if self.opts.RecursiveDepth > 0 { + if self.pv { + self.rec[vt] = 1 + } else { + self.rec[vt] = 0 + } + } + } else { + self.compileStructBody(p, sp, vt) + } +} + +func (self *Compiler) compileStructBody(p *ir.Program, sp int, vt reflect.Type) { + p.Tag(sp) + p.Int(ir.OP_byte, '{') + p.Add(ir.OP_save) + p.Add(ir.OP_cond_set) + + /* compile each field */ + for _, fv := range resolver.ResolveStruct(vt) { + var s []int + var o resolver.Offset + + /* "omitempty" for arrays */ + if fv.Type.Kind() == reflect.Array { + if fv.Type.Len() == 0 && (fv.Opts&resolver.F_omitempty) != 0 { + continue + } + } + + /* index to the field */ + for _, o = range fv.Path { + if p.Int(ir.OP_index, int(o.Size)); o.Kind == resolver.F_deref { + s = append(s, p.PC()) + p.Add(ir.OP_is_nil) + p.Add(ir.OP_deref) + } + } + + /* check for "omitempty" option */ + if fv.Type.Kind() != reflect.Struct && fv.Type.Kind() != reflect.Array && (fv.Opts&resolver.F_omitempty) != 0 { + s = append(s, p.PC()) + self.compileStructFieldZero(p, fv.Type) + } + + /* add the comma if not the first element */ + i := p.PC() + p.Add(ir.OP_cond_testc) + p.Int(ir.OP_byte, ',') + p.Pin(i) + + /* compile the key and value */ + ft := fv.Type + p.Str(ir.OP_text, Quote(fv.Name)+":") + + /* check for "stringnize" option */ + if (fv.Opts & resolver.F_stringize) == 0 { + self.compileOne(p, sp+1, ft, self.pv) + } else { + self.compileStructFieldStr(p, sp+1, ft) + } + + /* patch the skipping jumps and reload the struct pointer */ + p.Rel(s) + p.Add(ir.OP_load) + } + + /* end of object */ + p.Add(ir.OP_drop) + p.Int(ir.OP_byte, '}') +} + +func (self *Compiler) compileStructFieldStr(p *ir.Program, sp int, vt reflect.Type) { + // NOTICE: according to encoding/json, Marshaler type has higher priority than string option + // see issue: + if self.tryCompileMarshaler(p, vt, self.pv) { + return + } + + pc := -1 + ft := vt + sv := false + + /* dereference the pointer if needed */ + if ft.Kind() == reflect.Ptr { + ft = ft.Elem() + } + + /* check if it can be stringized */ + switch ft.Kind() { + case reflect.Bool: + sv = true + case reflect.Int: + sv = true + case reflect.Int8: + sv = true + case reflect.Int16: + sv = true + case reflect.Int32: + sv = true + case reflect.Int64: + sv = true + case reflect.Uint: + sv = true + case reflect.Uint8: + sv = true + case reflect.Uint16: + sv = true + case reflect.Uint32: + sv = true + case reflect.Uint64: + sv = true + case reflect.Uintptr: + sv = true + case reflect.Float32: + sv = true + case reflect.Float64: + sv = true + case reflect.String: + sv = true + } + + /* if it's not, ignore the "string" and follow the regular path */ + if !sv { + self.compileOne(p, sp, vt, self.pv) + return + } + + /* dereference the pointer */ + if vt.Kind() == reflect.Ptr { + pc = p.PC() + vt = vt.Elem() + p.Add(ir.OP_is_nil) + p.Add(ir.OP_deref) + } + + /* special case of a double-quoted string */ + if ft != vars.JsonNumberType && ft.Kind() == reflect.String { + p.Add(ir.OP_quote) + } else { + self.compileStructFieldQuoted(p, sp, vt) + } + + /* the "null" case of the pointer */ + if pc != -1 { + e := p.PC() + p.Add(ir.OP_goto) + p.Pin(pc) + p.Add(ir.OP_null) + p.Pin(e) + } +} + +func (self *Compiler) compileStructFieldZero(p *ir.Program, vt reflect.Type) { + switch vt.Kind() { + case reflect.Bool: + p.Add(ir.OP_is_zero_1) + case reflect.Int: + p.Add(ir.OP_is_zero_ints()) + case reflect.Int8: + p.Add(ir.OP_is_zero_1) + case reflect.Int16: + p.Add(ir.OP_is_zero_2) + case reflect.Int32: + p.Add(ir.OP_is_zero_4) + case reflect.Int64: + p.Add(ir.OP_is_zero_8) + case reflect.Uint: + p.Add(ir.OP_is_zero_ints()) + case reflect.Uint8: + p.Add(ir.OP_is_zero_1) + case reflect.Uint16: + p.Add(ir.OP_is_zero_2) + case reflect.Uint32: + p.Add(ir.OP_is_zero_4) + case reflect.Uint64: + p.Add(ir.OP_is_zero_8) + case reflect.Uintptr: + p.Add(ir.OP_is_nil) + case reflect.Float32: + p.Add(ir.OP_is_zero_4) + case reflect.Float64: + p.Add(ir.OP_is_zero_8) + case reflect.String: + p.Add(ir.OP_is_nil_p1) + case reflect.Interface: + p.Add(ir.OP_is_nil) + case reflect.Map: + p.Add(ir.OP_is_zero_map) + case reflect.Ptr: + p.Add(ir.OP_is_nil) + case reflect.Slice: + p.Add(ir.OP_is_nil_p1) + default: + panic(vars.Error_type(vt)) + } +} + +func (self *Compiler) compileStructFieldQuoted(p *ir.Program, sp int, vt reflect.Type) { + p.Int(ir.OP_byte, '"') + self.compileOne(p, sp, vt, self.pv) + p.Int(ir.OP_byte, '"') +} + +func (self *Compiler) compileInterface(p *ir.Program, vt reflect.Type) { + x := p.PC() + p.Add(ir.OP_is_nil_p1) + + /* iface and efaces are different */ + if vt.NumMethod() == 0 { + p.Add(ir.OP_eface) + } else { + p.Add(ir.OP_iface) + } + + /* the "null" value */ + e := p.PC() + p.Add(ir.OP_goto) + p.Pin(x) + p.Add(ir.OP_null) + p.Pin(e) +} + +func (self *Compiler) compileMarshaler(p *ir.Program, op ir.Op, vt reflect.Type, mt reflect.Type) { + pc := p.PC() + vk := vt.Kind() + + /* direct receiver */ + if vk != reflect.Ptr { + addMarshalerOp(p, op, vt, mt) + return + } + /* value receiver with a pointer type, check for nil before calling the marshaler */ + p.Add(ir.OP_is_nil) + + addMarshalerOp(p, op, vt, mt) + + i := p.PC() + p.Add(ir.OP_goto) + p.Pin(pc) + p.Add(ir.OP_null) + p.Pin(i) +} + +func addMarshalerOp(p *ir.Program, op ir.Op, vt reflect.Type, mt reflect.Type) { + if vars.UseVM { + itab := rt.GetItab(rt.IfaceType(rt.UnpackType(mt)), rt.UnpackType(vt), true) + p.Vtab(op, vt, itab) + } else { + // OPT: get itab here + p.Rtt(op, vt) + } } diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/debug_go117.go b/vendor/github.com/bytedance/sonic/internal/encoder/debug_go117.go deleted file mode 100644 index 56a6cbf5..00000000 --- a/vendor/github.com/bytedance/sonic/internal/encoder/debug_go117.go +++ /dev/null @@ -1,205 +0,0 @@ -// +build go1.17,!go1.22 - -/* - * Copyright 2021 ByteDance Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package encoder - -import ( - `fmt` - `os` - `runtime` - `strings` - `unsafe` - - `github.com/bytedance/sonic/internal/jit` - `github.com/twitchyliquid64/golang-asm/obj` -) - -const _FP_debug = 128 - -var ( - debugSyncGC = os.Getenv("SONIC_SYNC_GC") != "" - debugAsyncGC = os.Getenv("SONIC_NO_ASYNC_GC") == "" - debugCheckPtr = os.Getenv("SONIC_CHECK_POINTER") != "" -) - -var ( - _Instr_End = newInsOp(_OP_is_nil) - - _F_gc = jit.Func(gc) - _F_println = jit.Func(println_wrapper) - _F_print = jit.Func(print) -) - -func (self *_Assembler) dsave(r ...obj.Addr) { - for i, v := range r { - if i > _FP_debug / 8 - 1 { - panic("too many registers to save") - } else { - self.Emit("MOVQ", v, jit.Ptr(_SP, _FP_fargs + _FP_saves + _FP_locals + int64(i) * 8)) - } - } -} - -func (self *_Assembler) dload(r ...obj.Addr) { - for i, v := range r { - if i > _FP_debug / 8 - 1 { - panic("too many registers to load") - } else { - self.Emit("MOVQ", jit.Ptr(_SP, _FP_fargs + _FP_saves + _FP_locals + int64(i) * 8), v) - } - } -} - -func println_wrapper(i int, op1 int, op2 int){ - println(i, " Intrs ", op1, _OpNames[op1], "next: ", op2, _OpNames[op2]) -} - -func print(i int){ - println(i) -} - -func gc() { - if !debugSyncGC { - return - } - runtime.GC() - // debug.FreeOSMemory() -} - -func (self *_Assembler) dcall(fn obj.Addr) { - self.Emit("MOVQ", fn, _R10) // MOVQ ${fn}, R10 - self.Rjmp("CALL", _R10) // CALL R10 -} - -func (self *_Assembler) debug_gc() { - if !debugSyncGC { - return - } - self.dsave(_REG_debug...) - self.dcall(_F_gc) - self.dload(_REG_debug...) -} - -func (self *_Assembler) debug_instr(i int, v *_Instr) { - if debugSyncGC { - if i+1 == len(self.p) { - self.print_gc(i, v, &_Instr_End) - } else { - next := &(self.p[i+1]) - self.print_gc(i, v, next) - name := _OpNames[next.op()] - if strings.Contains(name, "save") { - return - } - } - // self.debug_gc() - } -} - -//go:noescape -//go:linkname checkptrBase runtime.checkptrBase -func checkptrBase(p unsafe.Pointer) uintptr - -//go:noescape -//go:linkname findObject runtime.findObject -func findObject(p, refBase, refOff uintptr) (base uintptr, s unsafe.Pointer, objIndex uintptr) - -var ( - _F_checkptr = jit.Func(checkptr) - _F_printptr = jit.Func(printptr) -) - -var ( - _R10 = jit.Reg("R10") -) -var _REG_debug = []obj.Addr { - jit.Reg("AX"), - jit.Reg("BX"), - jit.Reg("CX"), - jit.Reg("DX"), - jit.Reg("DI"), - jit.Reg("SI"), - jit.Reg("BP"), - jit.Reg("SP"), - jit.Reg("R8"), - jit.Reg("R9"), - jit.Reg("R10"), - jit.Reg("R11"), - jit.Reg("R12"), - jit.Reg("R13"), - jit.Reg("R14"), - jit.Reg("R15"), -} - -func checkptr(ptr uintptr) { - if ptr == 0 { - return - } - fmt.Printf("pointer: %x\n", ptr) - f := checkptrBase(unsafe.Pointer(uintptr(ptr))) - if f == 0 { - fmt.Printf("! unknown-based pointer: %x\n", ptr) - } else if f == 1 { - fmt.Printf("! stack pointer: %x\n", ptr) - } else { - fmt.Printf("base: %x\n", f) - } - findobj(ptr) -} - -func findobj(ptr uintptr) { - base, s, objIndex := findObject(ptr, 0, 0) - if s != nil && base == 0 { - fmt.Printf("! invalid pointer: %x\n", ptr) - } - fmt.Printf("objIndex: %d\n", objIndex) -} - -func (self *_Assembler) check_ptr(ptr obj.Addr, lea bool) { - if !debugCheckPtr { - return - } - - self.dsave(_REG_debug...) - if lea { - self.Emit("LEAQ", ptr, _R10) - } else { - self.Emit("MOVQ", ptr, _R10) - } - self.Emit("MOVQ", _R10, jit.Ptr(_SP, 0)) - self.dcall(_F_checkptr) - self.dload(_REG_debug...) -} - -func printptr(i int, ptr uintptr) { - fmt.Printf("[%d] ptr: %x\n", i, ptr) -} - -func (self *_Assembler) print_ptr(i int, ptr obj.Addr, lea bool) { - self.dsave(_REG_debug...) - if lea { - self.Emit("LEAQ", ptr, _R10) - } else { - self.Emit("MOVQ", ptr, _R10) - } - - self.Emit("MOVQ", jit.Imm(int64(i)), _AX) - self.Emit("MOVQ", _R10, _BX) - self.dcall(_F_printptr) - self.dload(_REG_debug...) -} diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/encode_norace.go b/vendor/github.com/bytedance/sonic/internal/encoder/encode_norace.go new file mode 100644 index 00000000..c5320643 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/encoder/encode_norace.go @@ -0,0 +1,24 @@ +//go:build !race +// +build !race + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package encoder + +func encodeIntoCheckRace(buf *[]byte, val interface{}, opts Options) error { + return encodeInto(buf, val, opts) +} diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/encode_race.go b/vendor/github.com/bytedance/sonic/internal/encoder/encode_race.go new file mode 100644 index 00000000..c373c55f --- /dev/null +++ b/vendor/github.com/bytedance/sonic/internal/encoder/encode_race.go @@ -0,0 +1,54 @@ +//go:build race +// +build race + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package encoder + +import ( + `encoding/json` + + `github.com/bytedance/sonic/internal/rt` +) + + +func helpDetectDataRace(val interface{}) { + var out []byte + defer func() { + if v := recover(); v != nil { + // NOTICE: help user to locate where panic occurs + println("panic when encoding on: ", truncate(out)) + panic(v) + } + }() + out, _ = json.Marshal(val) +} + +func encodeIntoCheckRace(buf *[]byte, val interface{}, opts Options) error { + err := encodeInto(buf, val, opts) + /* put last to make the panic from sonic will always be caught at first */ + helpDetectDataRace(val) + return err +} + +func truncate(json []byte) string { + if len(json) <= 256 { + return rt.Mem2Str(json) + } else { + return rt.Mem2Str(json[len(json)-256:]) + } +} diff --git a/vendor/github.com/bytedance/sonic/internal/encoder/encoder.go b/vendor/github.com/bytedance/sonic/internal/encoder/encoder.go index 757e73ff..4cba1a16 100644 --- a/vendor/github.com/bytedance/sonic/internal/encoder/encoder.go +++ b/vendor/github.com/bytedance/sonic/internal/encoder/encoder.go @@ -17,63 +17,62 @@ package encoder import ( - `bytes` - `encoding/json` - `reflect` - `runtime` - `unsafe` - - `github.com/bytedance/sonic/internal/native` - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` - `github.com/bytedance/sonic/utf8` - `github.com/bytedance/sonic/option` + "bytes" + "encoding/json" + "reflect" + "runtime" + "unsafe" + + "github.com/bytedance/sonic/utf8" + "github.com/bytedance/sonic/internal/encoder/alg" + "github.com/bytedance/sonic/internal/encoder/vars" + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/option" ) // Options is a set of encoding options. type Options uint64 -const ( - bitSortMapKeys = iota - bitEscapeHTML - bitCompactMarshaler - bitNoQuoteTextMarshaler - bitNoNullSliceOrMap - bitValidateString - - // used for recursive compile - bitPointerValue = 63 -) - const ( // SortMapKeys indicates that the keys of a map needs to be sorted // before serializing into JSON. // WARNING: This hurts performance A LOT, USE WITH CARE. - SortMapKeys Options = 1 << bitSortMapKeys + SortMapKeys Options = 1 << alg.BitSortMapKeys // EscapeHTML indicates encoder to escape all HTML characters // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape). // WARNING: This hurts performance A LOT, USE WITH CARE. - EscapeHTML Options = 1 << bitEscapeHTML + EscapeHTML Options = 1 << alg.BitEscapeHTML // CompactMarshaler indicates that the output JSON from json.Marshaler // is always compact and needs no validation - CompactMarshaler Options = 1 << bitCompactMarshaler + CompactMarshaler Options = 1 << alg.BitCompactMarshaler // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler // is always escaped string and needs no quoting - NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler + NoQuoteTextMarshaler Options = 1 << alg.BitNoQuoteTextMarshaler // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}', - // instead of 'null' - NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap + // instead of 'null'. + // NOTE: The priority of this option is lower than json tag `omitempty`. + NoNullSliceOrMap Options = 1 << alg.BitNoNullSliceOrMap // ValidateString indicates that encoder should validate the input string // before encoding it into JSON. - ValidateString Options = 1 << bitValidateString + ValidateString Options = 1 << alg.BitValidateString + + // NoValidateJSONMarshaler indicates that the encoder should not validate the output string + // after encoding the JSONMarshaler to JSON. + NoValidateJSONMarshaler Options = 1 << alg.BitNoValidateJSONMarshaler + + // NoEncoderNewline indicates that the encoder should not add a newline after every message + NoEncoderNewline Options = 1 << alg.BitNoEncoderNewline // CompatibleWithStd is used to be compatible with std encoder. CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler + + // Encode Infinity or Nan float into `null`, instead of returning an error. + EncodeNullForInfOrNan Options = 1 << alg.BitEncodeNullForInfOrNan ) // Encoder represents a specific set of encoder configurations. @@ -115,6 +114,25 @@ func (self *Encoder) SetValidateString(f bool) { } } +// SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens +func (self *Encoder) SetNoValidateJSONMarshaler(f bool) { + if f { + self.Opts |= NoValidateJSONMarshaler + } else { + self.Opts &= ^NoValidateJSONMarshaler + } +} + +// SetNoEncoderNewline specifies if option NoEncoderNewline opens +func (self *Encoder) SetNoEncoderNewline(f bool) { + if f { + self.Opts |= NoEncoderNewline + } else { + self.Opts &= ^NoEncoderNewline + } +} + + // SetCompactMarshaler specifies if option CompactMarshaler opens func (self *Encoder) SetCompactMarshaler(f bool) { if f { @@ -143,53 +161,45 @@ func (enc *Encoder) SetIndent(prefix, indent string) { // Quote returns the JSON-quoted version of s. func Quote(s string) string { - var n int - var p []byte - - /* check for empty string */ - if s == "" { - return `""` - } - - /* allocate space for result */ - n = len(s) + 2 - p = make([]byte, 0, n) - - /* call the encoder */ - _ = encodeString(&p, s) - return rt.Mem2Str(p) + buf := make([]byte, 0, len(s)+2) + buf = alg.Quote(buf, s, false) + return rt.Mem2Str(buf) } // Encode returns the JSON encoding of val, encoded with opts. func Encode(val interface{}, opts Options) ([]byte, error) { var ret []byte - buf := newBytes() - err := encodeInto(&buf, val, opts) + buf := vars.NewBytes() + err := encodeIntoCheckRace(buf, val, opts) /* check for errors */ if err != nil { - freeBytes(buf) + vars.FreeBytes(buf) return nil, err } /* htmlescape or correct UTF-8 if opts enable */ old := buf - buf = encodeFinish(old, opts) - pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr - pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr + *buf = encodeFinish(*old, opts) + pbuf := ((*rt.GoSlice)(unsafe.Pointer(buf))).Ptr + pold := ((*rt.GoSlice)(unsafe.Pointer(old))).Ptr /* return when allocated a new buffer */ if pbuf != pold { - freeBytes(old) - return buf, nil + vars.FreeBytes(old) + return *buf, nil } /* make a copy of the result */ - ret = make([]byte, len(buf)) - copy(ret, buf) - - freeBytes(buf) + if rt.CanSizeResue(cap(*buf)) { + ret = make([]byte, len(*buf)) + copy(ret, *buf) + vars.FreeBytes(buf) + } else { + ret = *buf + } + /* return the buffer into pool */ return ret, nil } @@ -197,7 +207,7 @@ func Encode(val interface{}, opts Options) ([]byte, error) { // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating // a new one. func EncodeInto(buf *[]byte, val interface{}, opts Options) error { - err := encodeInto(buf, val, opts) + err := encodeIntoCheckRace(buf, val, opts) if err != nil { return err } @@ -206,15 +216,15 @@ func EncodeInto(buf *[]byte, val interface{}, opts Options) error { } func encodeInto(buf *[]byte, val interface{}, opts Options) error { - stk := newStack() + stk := vars.NewStack() efv := rt.UnpackEface(val) err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts)) /* return the stack into pool */ if err != nil { - resetStack(stk) + vars.ResetStack(stk) } - freeStack(stk) + vars.FreeStack(stk) /* avoid GC ahead */ runtime.KeepAlive(buf) @@ -226,13 +236,12 @@ func encodeFinish(buf []byte, opts Options) []byte { if opts & EscapeHTML != 0 { buf = HTMLEscape(nil, buf) } - if opts & ValidateString != 0 && !utf8.Validate(buf) { + if (opts & ValidateString != 0) && !utf8.Validate(buf) { buf = utf8.CorrectWith(nil, buf, `\ufffd`) } return buf } -var typeByte = rt.UnpackType(reflect.TypeOf(byte(0))) // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 @@ -241,7 +250,7 @@ var typeByte = rt.UnpackType(reflect.TypeOf(byte(0))) // escaping within -{{ end }} \ No newline at end of file +{{- end -}} + +{{- define "base_option" }} + {{- .JSONNotEscaped | safeJS }} +{{- end }}; + +{{- define "base" }} + {{- template "base_element" . }} + {{- template "base_script" . }} +{{- end }} diff --git a/vendor/github.com/go-echarts/go-echarts/v2/templates/base_element.tpl b/vendor/github.com/go-echarts/go-echarts/v2/templates/base_element.tpl new file mode 100644 index 00000000..678283b0 --- /dev/null +++ b/vendor/github.com/go-echarts/go-echarts/v2/templates/base_element.tpl @@ -0,0 +1,3 @@ +{{- template "base_element" .}} + + diff --git a/vendor/github.com/go-echarts/go-echarts/v2/templates/base_option.tpl b/vendor/github.com/go-echarts/go-echarts/v2/templates/base_option.tpl new file mode 100644 index 00000000..3fb5e84c --- /dev/null +++ b/vendor/github.com/go-echarts/go-echarts/v2/templates/base_option.tpl @@ -0,0 +1 @@ +{{- template "base_option" . }} diff --git a/vendor/github.com/go-echarts/go-echarts/v2/templates/base_script.tpl b/vendor/github.com/go-echarts/go-echarts/v2/templates/base_script.tpl new file mode 100644 index 00000000..1d320521 --- /dev/null +++ b/vendor/github.com/go-echarts/go-echarts/v2/templates/base_script.tpl @@ -0,0 +1 @@ +{{- template "base_script" . }} diff --git a/vendor/github.com/go-echarts/go-echarts/v2/templates/page.tpl b/vendor/github.com/go-echarts/go-echarts/v2/templates/page.tpl index 2ba490ae..9f9331fa 100644 --- a/vendor/github.com/go-echarts/go-echarts/v2/templates/page.tpl +++ b/vendor/github.com/go-echarts/go-echarts/v2/templates/page.tpl @@ -16,6 +16,12 @@
{{- range .Charts }} {{ template "base" . }} {{- end }}
{{ end }} + +{{ if eq .Layout "full" }} + + {{- range .Charts }} {{ template "base" . }} {{- end }} +{{ end }} + {{ end }} diff --git a/vendor/github.com/go-echarts/go-echarts/v2/templates/template.go b/vendor/github.com/go-echarts/go-echarts/v2/templates/template.go index 59bd108b..d5a97319 100644 --- a/vendor/github.com/go-echarts/go-echarts/v2/templates/template.go +++ b/vendor/github.com/go-echarts/go-echarts/v2/templates/template.go @@ -15,3 +15,12 @@ var HeaderTpl string //go:embed page.tpl var PageTpl string + +//go:embed base_element.tpl +var BaseElementTpl string + +//go:embed base_script.tpl +var BaseScriptTpl string + +//go:embed base_option.tpl +var BaseOptionTpl string diff --git a/vendor/github.com/go-echarts/go-echarts/v2/types/lang.go b/vendor/github.com/go-echarts/go-echarts/v2/types/lang.go deleted file mode 100644 index c1c1ba4c..00000000 --- a/vendor/github.com/go-echarts/go-echarts/v2/types/lang.go +++ /dev/null @@ -1,15 +0,0 @@ -package types - -// thoughts on those boxed type for default value solution... -type ( - Bool *bool - Integer *int -) - -func newBool(val bool) Bool { - return &val -} - -func newInteger(val int) Integer { - return &val -} diff --git a/vendor/github.com/go-echarts/go-echarts/v2/types/orderedset.go b/vendor/github.com/go-echarts/go-echarts/v2/types/orderedset.go index 085e79c5..eb282609 100644 --- a/vendor/github.com/go-echarts/go-echarts/v2/types/orderedset.go +++ b/vendor/github.com/go-echarts/go-echarts/v2/types/orderedset.go @@ -52,3 +52,9 @@ func (o *OrderedSet) Size() int { func (o *OrderedSet) Contains(item string) bool { return o.filter[item] != nil } + +func (o *OrderedSet) Clear() { + o.filter = make(map[string]*Index) + o.cur = 0 + o.Values = []string{} +} diff --git a/vendor/github.com/go-echarts/go-echarts/v2/types/primitivie.go b/vendor/github.com/go-echarts/go-echarts/v2/types/primitivie.go new file mode 100644 index 00000000..71689267 --- /dev/null +++ b/vendor/github.com/go-echarts/go-echarts/v2/types/primitivie.go @@ -0,0 +1,13 @@ +package types + +// Bool a wrapper type of *bool, use opts.Bool to simply convert it. +type ( + Bool *bool + Int *int + Float *float32 + String string +) + +// FuncStr a pure JavaScrip function string or special formatted string +// use opts.FuncOpts or opts.FuncStripCommentsOpts to embed JavaScript. +type FuncStr string diff --git a/vendor/github.com/go-echarts/go-echarts/v2/util/default_val_setter.go b/vendor/github.com/go-echarts/go-echarts/v2/util/default_val_setter.go new file mode 100644 index 00000000..1565be8e --- /dev/null +++ b/vendor/github.com/go-echarts/go-echarts/v2/util/default_val_setter.go @@ -0,0 +1,42 @@ +package util + +import ( + "reflect" + "strconv" +) + +// SetDefaultValue set default values for the struct field. +// inspired from: https://github.com/mcuadros/go-defaults +func SetDefaultValue(ptr interface{}) { + elem := reflect.ValueOf(ptr).Elem() + walkField(elem) +} + +func walkField(val reflect.Value) { + t := val.Type() + + for i := 0; i < t.NumField(); i++ { + f := val.Field(i) + if f.Kind() == reflect.Struct { + walkField(f) + } + + if defaultVal := t.Field(i).Tag.Get("default"); defaultVal != "" { + setField(val.Field(i), defaultVal) + } + } +} + +// setField handles String/Bool types only. +func setField(field reflect.Value, defaultVal string) { + switch field.Kind() { + case reflect.String: + if field.String() == "" { + field.Set(reflect.ValueOf(defaultVal).Convert(field.Type())) + } + case reflect.Bool: + if val, err := strconv.ParseBool(defaultVal); err == nil { + field.Set(reflect.ValueOf(val).Convert(field.Type())) + } + } +} diff --git a/vendor/github.com/go-echarts/go-echarts/v2/util/id_gennerator.go b/vendor/github.com/go-echarts/go-echarts/v2/util/id_gennerator.go new file mode 100644 index 00000000..3934ef40 --- /dev/null +++ b/vendor/github.com/go-echarts/go-echarts/v2/util/id_gennerator.go @@ -0,0 +1,31 @@ +package util + +import ( + "math/rand" + "time" +) + +func init() { + rand.Seed(time.Now().UnixNano()) +} + +const ( + chartIDSize = 12 +) + +// GenerateUniqueID generate the unique ID for each chart. +func GenerateUniqueID() string { + var b [chartIDSize]byte + for i := range b { + b[i] = randByte() + } + return string(b[:]) +} + +func randByte() byte { + c := 65 // A + if rand.Intn(10) > 5 { + c = 97 // a + } + return byte(c + rand.Intn(26)) +} diff --git a/vendor/github.com/go-playground/validator/v10/Makefile b/vendor/github.com/go-playground/validator/v10/Makefile index ec3455bd..e097dfaf 100644 --- a/vendor/github.com/go-playground/validator/v10/Makefile +++ b/vendor/github.com/go-playground/validator/v10/Makefile @@ -1,4 +1,4 @@ -GOCMD=GO111MODULE=on go +GOCMD=go linters-install: @golangci-lint --version >/dev/null 2>&1 || { \ @@ -13,6 +13,6 @@ test: $(GOCMD) test -cover -race ./... bench: - $(GOCMD) test -bench=. -benchmem ./... + $(GOCMD) test -run=NONE -bench=. -benchmem ./... -.PHONY: test lint linters-install \ No newline at end of file +.PHONY: test lint linters-install diff --git a/vendor/github.com/go-playground/validator/v10/README.md b/vendor/github.com/go-playground/validator/v10/README.md index b2e0e2d9..ddd65b07 100644 --- a/vendor/github.com/go-playground/validator/v10/README.md +++ b/vendor/github.com/go-playground/validator/v10/README.md @@ -1,7 +1,7 @@ Package validator ================= [![Join the chat at https://gitter.im/go-playground/validator](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/go-playground/validator?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -![Project status](https://img.shields.io/badge/version-10.15.0-green.svg) +![Project status](https://img.shields.io/badge/version-10.22.1-green.svg) [![Build Status](https://travis-ci.org/go-playground/validator.svg?branch=master)](https://travis-ci.org/go-playground/validator) [![Coverage Status](https://coveralls.io/repos/go-playground/validator/badge.svg?branch=master&service=github)](https://coveralls.io/github/go-playground/validator?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/go-playground/validator)](https://goreportcard.com/report/github.com/go-playground/validator) @@ -67,6 +67,12 @@ Please see https://pkg.go.dev/github.com/go-playground/validator/v10 for detaile Baked-in Validations ------ +### Special Notes: +- If new to using validator it is highly recommended to initialize it using the `WithRequiredStructEnabled` option which is opt-in to new behaviour that will become the default behaviour in v11+. See documentation for more details. +```go +validate := validator.New(validator.WithRequiredStructEnabled()) +``` + ### Fields: | Tag | Description | @@ -157,6 +163,7 @@ Baked-in Validations | btc_addr_bech32 | Bitcoin Bech32 Address (segwit) | | credit_card | Credit Card Number | | mongodb | MongoDB ObjectID | +| mongodb_connection_string | MongoDB Connection String | | cron | Cron | | spicedb | SpiceDb ObjectID/Permission/Type | | datetime | Datetime | @@ -172,6 +179,7 @@ Baked-in Validations | isbn | International Standard Book Number | | isbn10 | International Standard Book Number 10 | | isbn13 | International Standard Book Number 13 | +| issn | International Standard Serial Number | | iso3166_1_alpha2 | Two-letter country code (ISO 3166-1 alpha-2) | | iso3166_1_alpha3 | Three-letter country code (ISO 3166-1 alpha-3) | | iso3166_1_alpha_numeric | Numeric country code (ISO 3166-1 numeric) | @@ -260,71 +268,72 @@ Benchmarks ------ ###### Run on MacBook Pro (15-inch, 2017) go version go1.10.2 darwin/amd64 ```go +go version go1.21.0 darwin/arm64 goos: darwin -goarch: amd64 -pkg: github.com/go-playground/validator -BenchmarkFieldSuccess-8 20000000 83.6 ns/op 0 B/op 0 allocs/op -BenchmarkFieldSuccessParallel-8 50000000 26.8 ns/op 0 B/op 0 allocs/op -BenchmarkFieldFailure-8 5000000 291 ns/op 208 B/op 4 allocs/op -BenchmarkFieldFailureParallel-8 20000000 107 ns/op 208 B/op 4 allocs/op -BenchmarkFieldArrayDiveSuccess-8 2000000 623 ns/op 201 B/op 11 allocs/op -BenchmarkFieldArrayDiveSuccessParallel-8 10000000 237 ns/op 201 B/op 11 allocs/op -BenchmarkFieldArrayDiveFailure-8 2000000 859 ns/op 412 B/op 16 allocs/op -BenchmarkFieldArrayDiveFailureParallel-8 5000000 335 ns/op 413 B/op 16 allocs/op -BenchmarkFieldMapDiveSuccess-8 1000000 1292 ns/op 432 B/op 18 allocs/op -BenchmarkFieldMapDiveSuccessParallel-8 3000000 467 ns/op 432 B/op 18 allocs/op -BenchmarkFieldMapDiveFailure-8 1000000 1082 ns/op 512 B/op 16 allocs/op -BenchmarkFieldMapDiveFailureParallel-8 5000000 425 ns/op 512 B/op 16 allocs/op -BenchmarkFieldMapDiveWithKeysSuccess-8 1000000 1539 ns/op 480 B/op 21 allocs/op -BenchmarkFieldMapDiveWithKeysSuccessParallel-8 3000000 613 ns/op 480 B/op 21 allocs/op -BenchmarkFieldMapDiveWithKeysFailure-8 1000000 1413 ns/op 721 B/op 21 allocs/op -BenchmarkFieldMapDiveWithKeysFailureParallel-8 3000000 575 ns/op 721 B/op 21 allocs/op -BenchmarkFieldCustomTypeSuccess-8 10000000 216 ns/op 32 B/op 2 allocs/op -BenchmarkFieldCustomTypeSuccessParallel-8 20000000 82.2 ns/op 32 B/op 2 allocs/op -BenchmarkFieldCustomTypeFailure-8 5000000 274 ns/op 208 B/op 4 allocs/op -BenchmarkFieldCustomTypeFailureParallel-8 20000000 116 ns/op 208 B/op 4 allocs/op -BenchmarkFieldOrTagSuccess-8 2000000 740 ns/op 16 B/op 1 allocs/op -BenchmarkFieldOrTagSuccessParallel-8 3000000 474 ns/op 16 B/op 1 allocs/op -BenchmarkFieldOrTagFailure-8 3000000 471 ns/op 224 B/op 5 allocs/op -BenchmarkFieldOrTagFailureParallel-8 3000000 414 ns/op 224 B/op 5 allocs/op -BenchmarkStructLevelValidationSuccess-8 10000000 213 ns/op 32 B/op 2 allocs/op -BenchmarkStructLevelValidationSuccessParallel-8 20000000 91.8 ns/op 32 B/op 2 allocs/op -BenchmarkStructLevelValidationFailure-8 3000000 473 ns/op 304 B/op 8 allocs/op -BenchmarkStructLevelValidationFailureParallel-8 10000000 234 ns/op 304 B/op 8 allocs/op -BenchmarkStructSimpleCustomTypeSuccess-8 5000000 385 ns/op 32 B/op 2 allocs/op -BenchmarkStructSimpleCustomTypeSuccessParallel-8 10000000 161 ns/op 32 B/op 2 allocs/op -BenchmarkStructSimpleCustomTypeFailure-8 2000000 640 ns/op 424 B/op 9 allocs/op -BenchmarkStructSimpleCustomTypeFailureParallel-8 5000000 318 ns/op 440 B/op 10 allocs/op -BenchmarkStructFilteredSuccess-8 2000000 597 ns/op 288 B/op 9 allocs/op -BenchmarkStructFilteredSuccessParallel-8 10000000 266 ns/op 288 B/op 9 allocs/op -BenchmarkStructFilteredFailure-8 3000000 454 ns/op 256 B/op 7 allocs/op -BenchmarkStructFilteredFailureParallel-8 10000000 214 ns/op 256 B/op 7 allocs/op -BenchmarkStructPartialSuccess-8 3000000 502 ns/op 256 B/op 6 allocs/op -BenchmarkStructPartialSuccessParallel-8 10000000 225 ns/op 256 B/op 6 allocs/op -BenchmarkStructPartialFailure-8 2000000 702 ns/op 480 B/op 11 allocs/op -BenchmarkStructPartialFailureParallel-8 5000000 329 ns/op 480 B/op 11 allocs/op -BenchmarkStructExceptSuccess-8 2000000 793 ns/op 496 B/op 12 allocs/op -BenchmarkStructExceptSuccessParallel-8 10000000 193 ns/op 240 B/op 5 allocs/op -BenchmarkStructExceptFailure-8 2000000 639 ns/op 464 B/op 10 allocs/op -BenchmarkStructExceptFailureParallel-8 5000000 300 ns/op 464 B/op 10 allocs/op -BenchmarkStructSimpleCrossFieldSuccess-8 3000000 417 ns/op 72 B/op 3 allocs/op -BenchmarkStructSimpleCrossFieldSuccessParallel-8 10000000 163 ns/op 72 B/op 3 allocs/op -BenchmarkStructSimpleCrossFieldFailure-8 2000000 645 ns/op 304 B/op 8 allocs/op -BenchmarkStructSimpleCrossFieldFailureParallel-8 5000000 285 ns/op 304 B/op 8 allocs/op -BenchmarkStructSimpleCrossStructCrossFieldSuccess-8 3000000 588 ns/op 80 B/op 4 allocs/op -BenchmarkStructSimpleCrossStructCrossFieldSuccessParallel-8 10000000 221 ns/op 80 B/op 4 allocs/op -BenchmarkStructSimpleCrossStructCrossFieldFailure-8 2000000 868 ns/op 320 B/op 9 allocs/op -BenchmarkStructSimpleCrossStructCrossFieldFailureParallel-8 5000000 337 ns/op 320 B/op 9 allocs/op -BenchmarkStructSimpleSuccess-8 5000000 260 ns/op 0 B/op 0 allocs/op -BenchmarkStructSimpleSuccessParallel-8 20000000 90.6 ns/op 0 B/op 0 allocs/op -BenchmarkStructSimpleFailure-8 2000000 619 ns/op 424 B/op 9 allocs/op -BenchmarkStructSimpleFailureParallel-8 5000000 296 ns/op 424 B/op 9 allocs/op -BenchmarkStructComplexSuccess-8 1000000 1454 ns/op 128 B/op 8 allocs/op -BenchmarkStructComplexSuccessParallel-8 3000000 579 ns/op 128 B/op 8 allocs/op -BenchmarkStructComplexFailure-8 300000 4140 ns/op 3041 B/op 53 allocs/op -BenchmarkStructComplexFailureParallel-8 1000000 2127 ns/op 3041 B/op 53 allocs/op -BenchmarkOneof-8 10000000 140 ns/op 0 B/op 0 allocs/op -BenchmarkOneofParallel-8 20000000 70.1 ns/op 0 B/op 0 allocs/op +goarch: arm64 +pkg: github.com/go-playground/validator/v10 +BenchmarkFieldSuccess-8 33142266 35.94 ns/op 0 B/op 0 allocs/op +BenchmarkFieldSuccessParallel-8 200816191 6.568 ns/op 0 B/op 0 allocs/op +BenchmarkFieldFailure-8 6779707 175.1 ns/op 200 B/op 4 allocs/op +BenchmarkFieldFailureParallel-8 11044147 108.4 ns/op 200 B/op 4 allocs/op +BenchmarkFieldArrayDiveSuccess-8 6054232 194.4 ns/op 97 B/op 5 allocs/op +BenchmarkFieldArrayDiveSuccessParallel-8 12523388 94.07 ns/op 97 B/op 5 allocs/op +BenchmarkFieldArrayDiveFailure-8 3587043 334.3 ns/op 300 B/op 10 allocs/op +BenchmarkFieldArrayDiveFailureParallel-8 5816665 200.8 ns/op 300 B/op 10 allocs/op +BenchmarkFieldMapDiveSuccess-8 2217910 540.1 ns/op 288 B/op 14 allocs/op +BenchmarkFieldMapDiveSuccessParallel-8 4446698 258.7 ns/op 288 B/op 14 allocs/op +BenchmarkFieldMapDiveFailure-8 2392759 504.6 ns/op 376 B/op 13 allocs/op +BenchmarkFieldMapDiveFailureParallel-8 4244199 286.9 ns/op 376 B/op 13 allocs/op +BenchmarkFieldMapDiveWithKeysSuccess-8 2005857 592.1 ns/op 288 B/op 14 allocs/op +BenchmarkFieldMapDiveWithKeysSuccessParallel-8 4400850 296.9 ns/op 288 B/op 14 allocs/op +BenchmarkFieldMapDiveWithKeysFailure-8 1850227 643.8 ns/op 553 B/op 16 allocs/op +BenchmarkFieldMapDiveWithKeysFailureParallel-8 3293233 375.1 ns/op 553 B/op 16 allocs/op +BenchmarkFieldCustomTypeSuccess-8 12174412 98.25 ns/op 32 B/op 2 allocs/op +BenchmarkFieldCustomTypeSuccessParallel-8 34389907 35.49 ns/op 32 B/op 2 allocs/op +BenchmarkFieldCustomTypeFailure-8 7582524 156.6 ns/op 184 B/op 3 allocs/op +BenchmarkFieldCustomTypeFailureParallel-8 13019902 92.79 ns/op 184 B/op 3 allocs/op +BenchmarkFieldOrTagSuccess-8 3427260 349.4 ns/op 16 B/op 1 allocs/op +BenchmarkFieldOrTagSuccessParallel-8 15144128 81.25 ns/op 16 B/op 1 allocs/op +BenchmarkFieldOrTagFailure-8 5913546 201.9 ns/op 216 B/op 5 allocs/op +BenchmarkFieldOrTagFailureParallel-8 9810212 113.7 ns/op 216 B/op 5 allocs/op +BenchmarkStructLevelValidationSuccess-8 13456327 87.66 ns/op 16 B/op 1 allocs/op +BenchmarkStructLevelValidationSuccessParallel-8 41818888 27.77 ns/op 16 B/op 1 allocs/op +BenchmarkStructLevelValidationFailure-8 4166284 272.6 ns/op 264 B/op 7 allocs/op +BenchmarkStructLevelValidationFailureParallel-8 7594581 152.1 ns/op 264 B/op 7 allocs/op +BenchmarkStructSimpleCustomTypeSuccess-8 6508082 182.6 ns/op 32 B/op 2 allocs/op +BenchmarkStructSimpleCustomTypeSuccessParallel-8 23078605 54.78 ns/op 32 B/op 2 allocs/op +BenchmarkStructSimpleCustomTypeFailure-8 3118352 381.0 ns/op 416 B/op 9 allocs/op +BenchmarkStructSimpleCustomTypeFailureParallel-8 5300738 224.1 ns/op 432 B/op 10 allocs/op +BenchmarkStructFilteredSuccess-8 4761807 251.1 ns/op 216 B/op 5 allocs/op +BenchmarkStructFilteredSuccessParallel-8 8792598 128.6 ns/op 216 B/op 5 allocs/op +BenchmarkStructFilteredFailure-8 5202573 232.1 ns/op 216 B/op 5 allocs/op +BenchmarkStructFilteredFailureParallel-8 9591267 121.4 ns/op 216 B/op 5 allocs/op +BenchmarkStructPartialSuccess-8 5188512 231.6 ns/op 224 B/op 4 allocs/op +BenchmarkStructPartialSuccessParallel-8 9179776 123.1 ns/op 224 B/op 4 allocs/op +BenchmarkStructPartialFailure-8 3071212 392.5 ns/op 440 B/op 9 allocs/op +BenchmarkStructPartialFailureParallel-8 5344261 223.7 ns/op 440 B/op 9 allocs/op +BenchmarkStructExceptSuccess-8 3184230 375.0 ns/op 424 B/op 8 allocs/op +BenchmarkStructExceptSuccessParallel-8 10090130 108.9 ns/op 208 B/op 3 allocs/op +BenchmarkStructExceptFailure-8 3347226 357.7 ns/op 424 B/op 8 allocs/op +BenchmarkStructExceptFailureParallel-8 5654923 209.5 ns/op 424 B/op 8 allocs/op +BenchmarkStructSimpleCrossFieldSuccess-8 5232265 229.1 ns/op 56 B/op 3 allocs/op +BenchmarkStructSimpleCrossFieldSuccessParallel-8 17436674 64.75 ns/op 56 B/op 3 allocs/op +BenchmarkStructSimpleCrossFieldFailure-8 3128613 383.6 ns/op 272 B/op 8 allocs/op +BenchmarkStructSimpleCrossFieldFailureParallel-8 6994113 168.8 ns/op 272 B/op 8 allocs/op +BenchmarkStructSimpleCrossStructCrossFieldSuccess-8 3506487 340.9 ns/op 64 B/op 4 allocs/op +BenchmarkStructSimpleCrossStructCrossFieldSuccessParallel-8 13431300 91.77 ns/op 64 B/op 4 allocs/op +BenchmarkStructSimpleCrossStructCrossFieldFailure-8 2410566 500.9 ns/op 288 B/op 9 allocs/op +BenchmarkStructSimpleCrossStructCrossFieldFailureParallel-8 6344510 188.2 ns/op 288 B/op 9 allocs/op +BenchmarkStructSimpleSuccess-8 8922726 133.8 ns/op 0 B/op 0 allocs/op +BenchmarkStructSimpleSuccessParallel-8 55291153 23.63 ns/op 0 B/op 0 allocs/op +BenchmarkStructSimpleFailure-8 3171553 378.4 ns/op 416 B/op 9 allocs/op +BenchmarkStructSimpleFailureParallel-8 5571692 212.0 ns/op 416 B/op 9 allocs/op +BenchmarkStructComplexSuccess-8 1683750 714.5 ns/op 224 B/op 5 allocs/op +BenchmarkStructComplexSuccessParallel-8 4578046 257.0 ns/op 224 B/op 5 allocs/op +BenchmarkStructComplexFailure-8 481585 2547 ns/op 3041 B/op 48 allocs/op +BenchmarkStructComplexFailureParallel-8 965764 1577 ns/op 3040 B/op 48 allocs/op +BenchmarkOneof-8 17380881 68.50 ns/op 0 B/op 0 allocs/op +BenchmarkOneofParallel-8 8084733 153.5 ns/op 0 B/op 0 allocs/op ``` Complementary Software diff --git a/vendor/github.com/go-playground/validator/v10/baked_in.go b/vendor/github.com/go-playground/validator/v10/baked_in.go index ca9eeb1d..d1a3656a 100644 --- a/vendor/github.com/go-playground/validator/v10/baked_in.go +++ b/vendor/github.com/go-playground/validator/v10/baked_in.go @@ -23,7 +23,7 @@ import ( "golang.org/x/text/language" "github.com/gabriel-vasile/mimetype" - "github.com/leodido/go-urn" + urn "github.com/leodido/go-urn" ) // Func accepts a FieldLevel interface for all validation needs. The return @@ -51,6 +51,7 @@ var ( endKeysTag: {}, structOnlyTag: {}, omitempty: {}, + omitnil: {}, skipValidationTag: {}, utf8HexComma: {}, utf8Pipe: {}, @@ -63,8 +64,9 @@ var ( // defines a common or complex set of validation(s) to simplify // adding validation to structs. bakedInAliases = map[string]string{ - "iscolor": "hexcolor|rgb|rgba|hsl|hsla", - "country_code": "iso3166_1_alpha2|iso3166_1_alpha3|iso3166_1_alpha_numeric", + "iscolor": "hexcolor|rgb|rgba|hsl|hsla", + "country_code": "iso3166_1_alpha2|iso3166_1_alpha3|iso3166_1_alpha_numeric", + "eu_country_code": "iso3166_1_alpha2_eu|iso3166_1_alpha3_eu|iso3166_1_alpha_numeric_eu", } // bakedInValidators is the default map of ValidationFunc @@ -132,6 +134,7 @@ var ( "urn_rfc2141": isUrnRFC2141, // RFC 2141 "file": isFile, "filepath": isFilePath, + "base32": isBase32, "base64": isBase64, "base64url": isBase64URL, "base64rawurl": isBase64RawURL, @@ -149,6 +152,7 @@ var ( "isbn": isISBN, "isbn10": isISBN10, "isbn13": isISBN13, + "issn": isISSN, "eth_addr": isEthereumAddress, "eth_addr_checksum": isEthereumAddressChecksum, "btc_addr": isBitcoinAddress, @@ -214,8 +218,11 @@ var ( "datetime": isDatetime, "timezone": isTimeZone, "iso3166_1_alpha2": isIso3166Alpha2, + "iso3166_1_alpha2_eu": isIso3166Alpha2EU, "iso3166_1_alpha3": isIso3166Alpha3, + "iso3166_1_alpha3_eu": isIso3166Alpha3EU, "iso3166_1_alpha_numeric": isIso3166AlphaNumeric, + "iso3166_1_alpha_numeric_eu": isIso3166AlphaNumericEU, "iso3166_2": isIso31662, "iso4217": isIso4217, "iso4217_numeric": isIso4217Numeric, @@ -228,7 +235,8 @@ var ( "credit_card": isCreditCard, "cve": isCveFormat, "luhn_checksum": hasLuhnChecksum, - "mongodb": isMongoDB, + "mongodb": isMongoDBObjectId, + "mongodb_connection_string": isMongoDBConnectionString, "cron": isCron, "spicedb": isSpiceDB, } @@ -245,7 +253,7 @@ func parseOneOfParam2(s string) []string { oneofValsCacheRWLock.RUnlock() if !ok { oneofValsCacheRWLock.Lock() - vals = splitParamsRegex.FindAllString(s, -1) + vals = splitParamsRegex().FindAllString(s, -1) for i := 0; i < len(vals); i++ { vals[i] = strings.Replace(vals[i], "'", "", -1) } @@ -256,15 +264,15 @@ func parseOneOfParam2(s string) []string { } func isURLEncoded(fl FieldLevel) bool { - return uRLEncodedRegex.MatchString(fl.Field().String()) + return uRLEncodedRegex().MatchString(fl.Field().String()) } func isHTMLEncoded(fl FieldLevel) bool { - return hTMLEncodedRegex.MatchString(fl.Field().String()) + return hTMLEncodedRegex().MatchString(fl.Field().String()) } func isHTML(fl FieldLevel) bool { - return hTMLRegex.MatchString(fl.Field().String()) + return hTMLRegex().MatchString(fl.Field().String()) } func isOneOf(fl FieldLevel) bool { @@ -373,9 +381,9 @@ func isMAC(fl FieldLevel) bool { // isCIDRv4 is the validation function for validating if the field's value is a valid v4 CIDR address. func isCIDRv4(fl FieldLevel) bool { - ip, _, err := net.ParseCIDR(fl.Field().String()) + ip, net, err := net.ParseCIDR(fl.Field().String()) - return err == nil && ip.To4() != nil + return err == nil && ip.To4() != nil && net.IP.Equal(ip) } // isCIDRv6 is the validation function for validating if the field's value is a valid v6 CIDR address. @@ -421,7 +429,7 @@ func isSSN(fl FieldLevel) bool { return false } - return sSNRegex.MatchString(field.String()) + return sSNRegex().MatchString(field.String()) } // isLongitude is the validation function for validating if the field's value is a valid longitude coordinate. @@ -444,7 +452,7 @@ func isLongitude(fl FieldLevel) bool { panic(fmt.Sprintf("Bad field type %T", field.Interface())) } - return longitudeRegex.MatchString(v) + return longitudeRegex().MatchString(v) } // isLatitude is the validation function for validating if the field's value is a valid latitude coordinate. @@ -467,7 +475,7 @@ func isLatitude(fl FieldLevel) bool { panic(fmt.Sprintf("Bad field type %T", field.Interface())) } - return latitudeRegex.MatchString(v) + return latitudeRegex().MatchString(v) } // isDataURI is the validation function for validating if the field's value is a valid data URI. @@ -478,11 +486,11 @@ func isDataURI(fl FieldLevel) bool { return false } - if !dataURIRegex.MatchString(uri[0]) { + if !dataURIRegex().MatchString(uri[0]) { return false } - return base64Regex.MatchString(uri[1]) + return base64Regex().MatchString(uri[1]) } // hasMultiByteCharacter is the validation function for validating if the field's value has a multi byte character. @@ -493,112 +501,112 @@ func hasMultiByteCharacter(fl FieldLevel) bool { return true } - return multibyteRegex.MatchString(field.String()) + return multibyteRegex().MatchString(field.String()) } // isPrintableASCII is the validation function for validating if the field's value is a valid printable ASCII character. func isPrintableASCII(fl FieldLevel) bool { - return printableASCIIRegex.MatchString(fl.Field().String()) + return printableASCIIRegex().MatchString(fl.Field().String()) } // isASCII is the validation function for validating if the field's value is a valid ASCII character. func isASCII(fl FieldLevel) bool { - return aSCIIRegex.MatchString(fl.Field().String()) + return aSCIIRegex().MatchString(fl.Field().String()) } // isUUID5 is the validation function for validating if the field's value is a valid v5 UUID. func isUUID5(fl FieldLevel) bool { - return uUID5Regex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uUID5Regex, fl) } // isUUID4 is the validation function for validating if the field's value is a valid v4 UUID. func isUUID4(fl FieldLevel) bool { - return uUID4Regex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uUID4Regex, fl) } // isUUID3 is the validation function for validating if the field's value is a valid v3 UUID. func isUUID3(fl FieldLevel) bool { - return uUID3Regex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uUID3Regex, fl) } // isUUID is the validation function for validating if the field's value is a valid UUID of any version. func isUUID(fl FieldLevel) bool { - return uUIDRegex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uUIDRegex, fl) } // isUUID5RFC4122 is the validation function for validating if the field's value is a valid RFC4122 v5 UUID. func isUUID5RFC4122(fl FieldLevel) bool { - return uUID5RFC4122Regex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uUID5RFC4122Regex, fl) } // isUUID4RFC4122 is the validation function for validating if the field's value is a valid RFC4122 v4 UUID. func isUUID4RFC4122(fl FieldLevel) bool { - return uUID4RFC4122Regex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uUID4RFC4122Regex, fl) } // isUUID3RFC4122 is the validation function for validating if the field's value is a valid RFC4122 v3 UUID. func isUUID3RFC4122(fl FieldLevel) bool { - return uUID3RFC4122Regex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uUID3RFC4122Regex, fl) } // isUUIDRFC4122 is the validation function for validating if the field's value is a valid RFC4122 UUID of any version. func isUUIDRFC4122(fl FieldLevel) bool { - return uUIDRFC4122Regex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uUIDRFC4122Regex, fl) } // isULID is the validation function for validating if the field's value is a valid ULID. func isULID(fl FieldLevel) bool { - return uLIDRegex.MatchString(fl.Field().String()) + return fieldMatchesRegexByStringerValOrString(uLIDRegex, fl) } // isMD4 is the validation function for validating if the field's value is a valid MD4. func isMD4(fl FieldLevel) bool { - return md4Regex.MatchString(fl.Field().String()) + return md4Regex().MatchString(fl.Field().String()) } // isMD5 is the validation function for validating if the field's value is a valid MD5. func isMD5(fl FieldLevel) bool { - return md5Regex.MatchString(fl.Field().String()) + return md5Regex().MatchString(fl.Field().String()) } // isSHA256 is the validation function for validating if the field's value is a valid SHA256. func isSHA256(fl FieldLevel) bool { - return sha256Regex.MatchString(fl.Field().String()) + return sha256Regex().MatchString(fl.Field().String()) } // isSHA384 is the validation function for validating if the field's value is a valid SHA384. func isSHA384(fl FieldLevel) bool { - return sha384Regex.MatchString(fl.Field().String()) + return sha384Regex().MatchString(fl.Field().String()) } // isSHA512 is the validation function for validating if the field's value is a valid SHA512. func isSHA512(fl FieldLevel) bool { - return sha512Regex.MatchString(fl.Field().String()) + return sha512Regex().MatchString(fl.Field().String()) } // isRIPEMD128 is the validation function for validating if the field's value is a valid PIPEMD128. func isRIPEMD128(fl FieldLevel) bool { - return ripemd128Regex.MatchString(fl.Field().String()) + return ripemd128Regex().MatchString(fl.Field().String()) } // isRIPEMD160 is the validation function for validating if the field's value is a valid PIPEMD160. func isRIPEMD160(fl FieldLevel) bool { - return ripemd160Regex.MatchString(fl.Field().String()) + return ripemd160Regex().MatchString(fl.Field().String()) } // isTIGER128 is the validation function for validating if the field's value is a valid TIGER128. func isTIGER128(fl FieldLevel) bool { - return tiger128Regex.MatchString(fl.Field().String()) + return tiger128Regex().MatchString(fl.Field().String()) } // isTIGER160 is the validation function for validating if the field's value is a valid TIGER160. func isTIGER160(fl FieldLevel) bool { - return tiger160Regex.MatchString(fl.Field().String()) + return tiger160Regex().MatchString(fl.Field().String()) } // isTIGER192 is the validation function for validating if the field's value is a valid isTIGER192. func isTIGER192(fl FieldLevel) bool { - return tiger192Regex.MatchString(fl.Field().String()) + return tiger192Regex().MatchString(fl.Field().String()) } // isISBN is the validation function for validating if the field's value is a valid v10 or v13 ISBN. @@ -610,7 +618,7 @@ func isISBN(fl FieldLevel) bool { func isISBN13(fl FieldLevel) bool { s := strings.Replace(strings.Replace(fl.Field().String(), "-", "", 4), " ", "", 4) - if !iSBN13Regex.MatchString(s) { + if !iSBN13Regex().MatchString(s) { return false } @@ -630,7 +638,7 @@ func isISBN13(fl FieldLevel) bool { func isISBN10(fl FieldLevel) bool { s := strings.Replace(strings.Replace(fl.Field().String(), "-", "", 3), " ", "", 3) - if !iSBN10Regex.MatchString(s) { + if !iSBN10Regex().MatchString(s) { return false } @@ -650,18 +658,44 @@ func isISBN10(fl FieldLevel) bool { return checksum%11 == 0 } +// isISSN is the validation function for validating if the field's value is a valid ISSN. +func isISSN(fl FieldLevel) bool { + s := fl.Field().String() + + if !iSSNRegex().MatchString(s) { + return false + } + s = strings.ReplaceAll(s, "-", "") + + pos := 8 + checksum := 0 + + for i := 0; i < 7; i++ { + checksum += pos * int(s[i]-'0') + pos-- + } + + if s[7] == 'X' { + checksum += 10 + } else { + checksum += int(s[7] - '0') + } + + return checksum%11 == 0 +} + // isEthereumAddress is the validation function for validating if the field's value is a valid Ethereum address. func isEthereumAddress(fl FieldLevel) bool { address := fl.Field().String() - return ethAddressRegex.MatchString(address) + return ethAddressRegex().MatchString(address) } -// isEthereumAddressChecksum is the validation function for validating if the field's value is a valid checksumed Ethereum address. +// isEthereumAddressChecksum is the validation function for validating if the field's value is a valid checksummed Ethereum address. func isEthereumAddressChecksum(fl FieldLevel) bool { address := fl.Field().String() - if !ethAddressRegex.MatchString(address) { + if !ethAddressRegex().MatchString(address) { return false } // Checksum validation. Reference: https://github.com/ethereum/EIPs/blob/master/EIPS/eip-55.md @@ -687,7 +721,7 @@ func isEthereumAddressChecksum(fl FieldLevel) bool { func isBitcoinAddress(fl FieldLevel) bool { address := fl.Field().String() - if !btcAddressRegex.MatchString(address) { + if !btcAddressRegex().MatchString(address) { return false } @@ -724,7 +758,7 @@ func isBitcoinAddress(fl FieldLevel) bool { func isBitcoinBech32Address(fl FieldLevel) bool { address := fl.Field().String() - if !btcLowerAddressRegexBech32.MatchString(address) && !btcUpperAddressRegexBech32.MatchString(address) { + if !btcLowerAddressRegexBech32().MatchString(address) && !btcUpperAddressRegexBech32().MatchString(address) { return false } @@ -1336,6 +1370,7 @@ func isPostcodeByIso3166Alpha2(fl FieldLevel) bool { field := fl.Field() param := fl.Param() + postcodeRegexInit.Do(initPostcodes) reg, found := postCodeRegexDict[param] if !found { return false @@ -1371,19 +1406,24 @@ func isPostcodeByIso3166Alpha2Field(fl FieldLevel) bool { return reg.MatchString(field.String()) } +// isBase32 is the validation function for validating if the current field's value is a valid base 32. +func isBase32(fl FieldLevel) bool { + return base32Regex().MatchString(fl.Field().String()) +} + // isBase64 is the validation function for validating if the current field's value is a valid base 64. func isBase64(fl FieldLevel) bool { - return base64Regex.MatchString(fl.Field().String()) + return base64Regex().MatchString(fl.Field().String()) } // isBase64URL is the validation function for validating if the current field's value is a valid base64 URL safe string. func isBase64URL(fl FieldLevel) bool { - return base64URLRegex.MatchString(fl.Field().String()) + return base64URLRegex().MatchString(fl.Field().String()) } // isBase64RawURL is the validation function for validating if the current field's value is a valid base64 URL safe string without '=' padding. func isBase64RawURL(fl FieldLevel) bool { - return base64RawURLRegex.MatchString(fl.Field().String()) + return base64RawURLRegex().MatchString(fl.Field().String()) } // isURI is the validation function for validating if the current field's value is a valid URI. @@ -1413,6 +1453,15 @@ func isURI(fl FieldLevel) bool { panic(fmt.Sprintf("Bad field type %T", field.Interface())) } +// isFileURL is the helper function for validating if the `path` valid file URL as per RFC8089 +func isFileURL(path string) bool { + if !strings.HasPrefix(path, "file:/") { + return false + } + _, err := url.ParseRequestURI(path) + return err == nil +} + // isURL is the validation function for validating if the current field's value is a valid URL. func isURL(fl FieldLevel) bool { field := fl.Field() @@ -1420,12 +1469,16 @@ func isURL(fl FieldLevel) bool { switch field.Kind() { case reflect.String: - s := field.String() + s := strings.ToLower(field.String()) if len(s) == 0 { return false } + if isFileURL(s) { + return true + } + url, err := url.Parse(s) if err != nil || url.Scheme == "" { return false @@ -1616,42 +1669,42 @@ func isFilePath(fl FieldLevel) bool { // isE164 is the validation function for validating if the current field's value is a valid e.164 formatted phone number. func isE164(fl FieldLevel) bool { - return e164Regex.MatchString(fl.Field().String()) + return e164Regex().MatchString(fl.Field().String()) } // isEmail is the validation function for validating if the current field's value is a valid email address. func isEmail(fl FieldLevel) bool { - return emailRegex.MatchString(fl.Field().String()) + return emailRegex().MatchString(fl.Field().String()) } // isHSLA is the validation function for validating if the current field's value is a valid HSLA color. func isHSLA(fl FieldLevel) bool { - return hslaRegex.MatchString(fl.Field().String()) + return hslaRegex().MatchString(fl.Field().String()) } // isHSL is the validation function for validating if the current field's value is a valid HSL color. func isHSL(fl FieldLevel) bool { - return hslRegex.MatchString(fl.Field().String()) + return hslRegex().MatchString(fl.Field().String()) } // isRGBA is the validation function for validating if the current field's value is a valid RGBA color. func isRGBA(fl FieldLevel) bool { - return rgbaRegex.MatchString(fl.Field().String()) + return rgbaRegex().MatchString(fl.Field().String()) } // isRGB is the validation function for validating if the current field's value is a valid RGB color. func isRGB(fl FieldLevel) bool { - return rgbRegex.MatchString(fl.Field().String()) + return rgbRegex().MatchString(fl.Field().String()) } // isHEXColor is the validation function for validating if the current field's value is a valid HEX color. func isHEXColor(fl FieldLevel) bool { - return hexColorRegex.MatchString(fl.Field().String()) + return hexColorRegex().MatchString(fl.Field().String()) } // isHexadecimal is the validation function for validating if the current field's value is a valid hexadecimal. func isHexadecimal(fl FieldLevel) bool { - return hexadecimalRegex.MatchString(fl.Field().String()) + return hexadecimalRegex().MatchString(fl.Field().String()) } // isNumber is the validation function for validating if the current field's value is a valid number. @@ -1660,7 +1713,7 @@ func isNumber(fl FieldLevel) bool { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64: return true default: - return numberRegex.MatchString(fl.Field().String()) + return numberRegex().MatchString(fl.Field().String()) } } @@ -1670,28 +1723,28 @@ func isNumeric(fl FieldLevel) bool { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.Float32, reflect.Float64: return true default: - return numericRegex.MatchString(fl.Field().String()) + return numericRegex().MatchString(fl.Field().String()) } } // isAlphanum is the validation function for validating if the current field's value is a valid alphanumeric value. func isAlphanum(fl FieldLevel) bool { - return alphaNumericRegex.MatchString(fl.Field().String()) + return alphaNumericRegex().MatchString(fl.Field().String()) } // isAlpha is the validation function for validating if the current field's value is a valid alpha value. func isAlpha(fl FieldLevel) bool { - return alphaRegex.MatchString(fl.Field().String()) + return alphaRegex().MatchString(fl.Field().String()) } // isAlphanumUnicode is the validation function for validating if the current field's value is a valid alphanumeric unicode value. func isAlphanumUnicode(fl FieldLevel) bool { - return alphaUnicodeNumericRegex.MatchString(fl.Field().String()) + return alphaUnicodeNumericRegex().MatchString(fl.Field().String()) } // isAlphaUnicode is the validation function for validating if the current field's value is a valid alpha unicode value. func isAlphaUnicode(fl FieldLevel) bool { - return alphaUnicodeRegex.MatchString(fl.Field().String()) + return alphaUnicodeRegex().MatchString(fl.Field().String()) } // isBoolean is the validation function for validating if the current field's value is a valid boolean value or can be safely converted to a boolean value. @@ -1775,7 +1828,14 @@ func requireCheckFieldValue( return int64(field.Len()) == asInt(value) case reflect.Bool: - return field.Bool() == asBool(value) + return field.Bool() == (value == "true") + + case reflect.Ptr: + if field.IsNil() { + return value == "nil" + } + // Handle non-nil pointers + return requireCheckFieldValue(fl, param, value, defaultNotFoundValue) } // default reflect.String: @@ -2514,11 +2574,11 @@ func isIP6Addr(fl FieldLevel) bool { } func isHostnameRFC952(fl FieldLevel) bool { - return hostnameRegexRFC952.MatchString(fl.Field().String()) + return hostnameRegexRFC952().MatchString(fl.Field().String()) } func isHostnameRFC1123(fl FieldLevel) bool { - return hostnameRegexRFC1123.MatchString(fl.Field().String()) + return hostnameRegexRFC1123().MatchString(fl.Field().String()) } func isFQDN(fl FieldLevel) bool { @@ -2528,7 +2588,7 @@ func isFQDN(fl FieldLevel) bool { return false } - return fqdnRegexRFC1123.MatchString(val) + return fqdnRegexRFC1123().MatchString(val) } // isDir is the validation function for validating if the current field's value is a valid existing directory. @@ -2627,7 +2687,7 @@ func isJSON(fl FieldLevel) bool { // isJWT is the validation function for validating if the current field's value is a valid JWT string. func isJWT(fl FieldLevel) bool { - return jWTRegex.MatchString(fl.Field().String()) + return jWTRegex().MatchString(fl.Field().String()) } // isHostnamePort validates a : combination for fields typically used for socket address. @@ -2646,7 +2706,7 @@ func isHostnamePort(fl FieldLevel) bool { // If host is specified, it should match a DNS name if host != "" { - return hostnameRegexRFC1123.MatchString(host) + return hostnameRegexRFC1123().MatchString(host) } return true } @@ -2717,14 +2777,26 @@ func isTimeZone(fl FieldLevel) bool { // isIso3166Alpha2 is the validation function for validating if the current field's value is a valid iso3166-1 alpha-2 country code. func isIso3166Alpha2(fl FieldLevel) bool { - val := fl.Field().String() - return iso3166_1_alpha2[val] + _, ok := iso3166_1_alpha2[fl.Field().String()] + return ok +} + +// isIso3166Alpha2EU is the validation function for validating if the current field's value is a valid iso3166-1 alpha-2 European Union country code. +func isIso3166Alpha2EU(fl FieldLevel) bool { + _, ok := iso3166_1_alpha2_eu[fl.Field().String()] + return ok } // isIso3166Alpha3 is the validation function for validating if the current field's value is a valid iso3166-1 alpha-3 country code. func isIso3166Alpha3(fl FieldLevel) bool { - val := fl.Field().String() - return iso3166_1_alpha3[val] + _, ok := iso3166_1_alpha3[fl.Field().String()] + return ok +} + +// isIso3166Alpha3EU is the validation function for validating if the current field's value is a valid iso3166-1 alpha-3 European Union country code. +func isIso3166Alpha3EU(fl FieldLevel) bool { + _, ok := iso3166_1_alpha3_eu[fl.Field().String()] + return ok } // isIso3166AlphaNumeric is the validation function for validating if the current field's value is a valid iso3166-1 alpha-numeric country code. @@ -2746,19 +2818,45 @@ func isIso3166AlphaNumeric(fl FieldLevel) bool { default: panic(fmt.Sprintf("Bad field type %T", field.Interface())) } - return iso3166_1_alpha_numeric[code] + + _, ok := iso3166_1_alpha_numeric[code] + return ok +} + +// isIso3166AlphaNumericEU is the validation function for validating if the current field's value is a valid iso3166-1 alpha-numeric European Union country code. +func isIso3166AlphaNumericEU(fl FieldLevel) bool { + field := fl.Field() + + var code int + switch field.Kind() { + case reflect.String: + i, err := strconv.Atoi(field.String()) + if err != nil { + return false + } + code = i % 1000 + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + code = int(field.Int() % 1000) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + code = int(field.Uint() % 1000) + default: + panic(fmt.Sprintf("Bad field type %T", field.Interface())) + } + + _, ok := iso3166_1_alpha_numeric_eu[code] + return ok } // isIso31662 is the validation function for validating if the current field's value is a valid iso3166-2 code. func isIso31662(fl FieldLevel) bool { - val := fl.Field().String() - return iso3166_2[val] + _, ok := iso3166_2[fl.Field().String()] + return ok } // isIso4217 is the validation function for validating if the current field's value is a valid iso4217 currency code. func isIso4217(fl FieldLevel) bool { - val := fl.Field().String() - return iso4217[val] + _, ok := iso4217[fl.Field().String()] + return ok } // isIso4217Numeric is the validation function for validating if the current field's value is a valid iso4217 numeric currency code. @@ -2774,7 +2872,9 @@ func isIso4217Numeric(fl FieldLevel) bool { default: panic(fmt.Sprintf("Bad field type %T", field.Interface())) } - return iso4217_numeric[code] + + _, ok := iso4217_numeric[code] + return ok } // isBCP47LanguageTag is the validation function for validating if the current field's value is a valid BCP 47 language tag, as parsed by language.Parse @@ -2793,21 +2893,21 @@ func isBCP47LanguageTag(fl FieldLevel) bool { func isIsoBicFormat(fl FieldLevel) bool { bicString := fl.Field().String() - return bicRegex.MatchString(bicString) + return bicRegex().MatchString(bicString) } // isSemverFormat is the validation function for validating if the current field's value is a valid semver version, defined in Semantic Versioning 2.0.0 func isSemverFormat(fl FieldLevel) bool { semverString := fl.Field().String() - return semverRegex.MatchString(semverString) + return semverRegex().MatchString(semverString) } // isCveFormat is the validation function for validating if the current field's value is a valid cve id, defined in CVE mitre org func isCveFormat(fl FieldLevel) bool { cveString := fl.Field().String() - return cveRegex.MatchString(cveString) + return cveRegex().MatchString(cveString) } // isDnsRFC1035LabelFormat is the validation function @@ -2815,7 +2915,7 @@ func isCveFormat(fl FieldLevel) bool { // a valid dns RFC 1035 label, defined in RFC 1035. func isDnsRFC1035LabelFormat(fl FieldLevel) bool { val := fl.Field().String() - return dnsRegexRFC1035Label.MatchString(val) + return dnsRegexRFC1035Label().MatchString(val) } // digitsHaveLuhnChecksum returns true if and only if the last element of the given digits slice is the Luhn checksum of the previous elements @@ -2841,10 +2941,16 @@ func digitsHaveLuhnChecksum(digits []string) bool { return (sum % 10) == 0 } -// isMongoDB is the validation function for validating if the current field's value is valid mongoDB objectID -func isMongoDB(fl FieldLevel) bool { +// isMongoDBObjectId is the validation function for validating if the current field's value is valid MongoDB ObjectID +func isMongoDBObjectId(fl FieldLevel) bool { + val := fl.Field().String() + return mongodbIdRegex().MatchString(val) +} + +// isMongoDBConnectionString is the validation function for validating if the current field's value is valid MongoDB Connection String +func isMongoDBConnectionString(fl FieldLevel) bool { val := fl.Field().String() - return mongodbRegex.MatchString(val) + return mongodbConnectionRegex().MatchString(val) } // isSpiceDB is the validation function for validating if the current field's value is valid for use with Authzed SpiceDB in the indicated way @@ -2854,11 +2960,11 @@ func isSpiceDB(fl FieldLevel) bool { switch param { case "permission": - return spicedbPermissionRegex.MatchString(val) + return spicedbPermissionRegex().MatchString(val) case "type": - return spicedbTypeRegex.MatchString(val) + return spicedbTypeRegex().MatchString(val) case "id", "": - return spicedbIDRegex.MatchString(val) + return spicedbIDRegex().MatchString(val) } panic("Unrecognized parameter: " + param) @@ -2910,5 +3016,5 @@ func hasLuhnChecksum(fl FieldLevel) bool { // isCron is the validation function for validating if the current field's value is a valid cron expression func isCron(fl FieldLevel) bool { cronString := fl.Field().String() - return cronRegex.MatchString(cronString) + return cronRegex().MatchString(cronString) } diff --git a/vendor/github.com/go-playground/validator/v10/cache.go b/vendor/github.com/go-playground/validator/v10/cache.go index ddd37b83..2063e1b7 100644 --- a/vendor/github.com/go-playground/validator/v10/cache.go +++ b/vendor/github.com/go-playground/validator/v10/cache.go @@ -20,7 +20,7 @@ const ( typeOr typeKeys typeEndKeys - typeNestedStructLevel + typeOmitNil ) const ( @@ -126,7 +126,7 @@ func (v *Validate) extractStructCache(current reflect.Value, sName string) *cStr fld = typ.Field(i) - if !fld.Anonymous && len(fld.PkgPath) > 0 { + if !v.privateFieldValidation && !fld.Anonymous && len(fld.PkgPath) > 0 { continue } @@ -153,7 +153,7 @@ func (v *Validate) extractStructCache(current reflect.Value, sName string) *cStr // and so only struct level caching can be used instead of combined with Field tag caching if len(tag) > 0 { - ctag, _ = v.parseFieldTagsRecursive(tag, fld, "", false) + ctag, _ = v.parseFieldTagsRecursive(tag, fld.Name, "", false) } else { // even if field doesn't have validations need cTag for traversing to potential inner/nested // elements of the field. @@ -172,7 +172,7 @@ func (v *Validate) extractStructCache(current reflect.Value, sName string) *cStr return cs } -func (v *Validate) parseFieldTagsRecursive(tag string, field reflect.StructField, alias string, hasAlias bool) (firstCtag *cTag, current *cTag) { +func (v *Validate) parseFieldTagsRecursive(tag string, fieldName string, alias string, hasAlias bool) (firstCtag *cTag, current *cTag) { var t string noAlias := len(alias) == 0 tags := strings.Split(tag, tagSeparator) @@ -186,9 +186,9 @@ func (v *Validate) parseFieldTagsRecursive(tag string, field reflect.StructField // check map for alias and process new tags, otherwise process as usual if tagsVal, found := v.aliases[t]; found { if i == 0 { - firstCtag, current = v.parseFieldTagsRecursive(tagsVal, field, t, true) + firstCtag, current = v.parseFieldTagsRecursive(tagsVal, fieldName, t, true) } else { - next, curr := v.parseFieldTagsRecursive(tagsVal, field, t, true) + next, curr := v.parseFieldTagsRecursive(tagsVal, fieldName, t, true) current.next, current = next, curr } @@ -236,7 +236,7 @@ func (v *Validate) parseFieldTagsRecursive(tag string, field reflect.StructField } } - current.keys, _ = v.parseFieldTagsRecursive(string(b[:len(b)-1]), field, "", false) + current.keys, _ = v.parseFieldTagsRecursive(string(b[:len(b)-1]), fieldName, "", false) continue case endKeysTag: @@ -253,6 +253,10 @@ func (v *Validate) parseFieldTagsRecursive(tag string, field reflect.StructField current.typeof = typeOmitEmpty continue + case omitnil: + current.typeof = typeOmitNil + continue + case structOnlyTag: current.typeof = typeStructOnly continue @@ -285,18 +289,14 @@ func (v *Validate) parseFieldTagsRecursive(tag string, field reflect.StructField current.tag = vals[0] if len(current.tag) == 0 { - panic(strings.TrimSpace(fmt.Sprintf(invalidValidation, field.Name))) + panic(strings.TrimSpace(fmt.Sprintf(invalidValidation, fieldName))) } if wrapper, ok := v.validations[current.tag]; ok { current.fn = wrapper.fn - current.runValidationWhenNil = wrapper.runValidatinOnNil + current.runValidationWhenNil = wrapper.runValidationOnNil } else { - panic(strings.TrimSpace(fmt.Sprintf(undefinedValidation, current.tag, field.Name))) - } - - if current.typeof == typeDefault && isNestedStructOrStructPtr(field) { - current.typeof = typeNestedStructLevel + panic(strings.TrimSpace(fmt.Sprintf(undefinedValidation, current.tag, fieldName))) } if len(orVals) > 1 { @@ -324,7 +324,7 @@ func (v *Validate) fetchCacheTag(tag string) *cTag { // isn't parsed again. ctag, found = v.tagCache.Get(tag) if !found { - ctag, _ = v.parseFieldTagsRecursive(tag, reflect.StructField{}, "", false) + ctag, _ = v.parseFieldTagsRecursive(tag, "", "", false) v.tagCache.Set(tag, ctag) } } diff --git a/vendor/github.com/go-playground/validator/v10/country_codes.go b/vendor/github.com/go-playground/validator/v10/country_codes.go index 0119f057..b5f10d3c 100644 --- a/vendor/github.com/go-playground/validator/v10/country_codes.go +++ b/vendor/github.com/go-playground/validator/v10/country_codes.go @@ -1,1150 +1,1177 @@ package validator -var iso3166_1_alpha2 = map[string]bool{ +var iso3166_1_alpha2 = map[string]struct{}{ // see: https://www.iso.org/iso-3166-country-codes.html - "AF": true, "AX": true, "AL": true, "DZ": true, "AS": true, - "AD": true, "AO": true, "AI": true, "AQ": true, "AG": true, - "AR": true, "AM": true, "AW": true, "AU": true, "AT": true, - "AZ": true, "BS": true, "BH": true, "BD": true, "BB": true, - "BY": true, "BE": true, "BZ": true, "BJ": true, "BM": true, - "BT": true, "BO": true, "BQ": true, "BA": true, "BW": true, - "BV": true, "BR": true, "IO": true, "BN": true, "BG": true, - "BF": true, "BI": true, "KH": true, "CM": true, "CA": true, - "CV": true, "KY": true, "CF": true, "TD": true, "CL": true, - "CN": true, "CX": true, "CC": true, "CO": true, "KM": true, - "CG": true, "CD": true, "CK": true, "CR": true, "CI": true, - "HR": true, "CU": true, "CW": true, "CY": true, "CZ": true, - "DK": true, "DJ": true, "DM": true, "DO": true, "EC": true, - "EG": true, "SV": true, "GQ": true, "ER": true, "EE": true, - "ET": true, "FK": true, "FO": true, "FJ": true, "FI": true, - "FR": true, "GF": true, "PF": true, "TF": true, "GA": true, - "GM": true, "GE": true, "DE": true, "GH": true, "GI": true, - "GR": true, "GL": true, "GD": true, "GP": true, "GU": true, - "GT": true, "GG": true, "GN": true, "GW": true, "GY": true, - "HT": true, "HM": true, "VA": true, "HN": true, "HK": true, - "HU": true, "IS": true, "IN": true, "ID": true, "IR": true, - "IQ": true, "IE": true, "IM": true, "IL": true, "IT": true, - "JM": true, "JP": true, "JE": true, "JO": true, "KZ": true, - "KE": true, "KI": true, "KP": true, "KR": true, "KW": true, - "KG": true, "LA": true, "LV": true, "LB": true, "LS": true, - "LR": true, "LY": true, "LI": true, "LT": true, "LU": true, - "MO": true, "MK": true, "MG": true, "MW": true, "MY": true, - "MV": true, "ML": true, "MT": true, "MH": true, "MQ": true, - "MR": true, "MU": true, "YT": true, "MX": true, "FM": true, - "MD": true, "MC": true, "MN": true, "ME": true, "MS": true, - "MA": true, "MZ": true, "MM": true, "NA": true, "NR": true, - "NP": true, "NL": true, "NC": true, "NZ": true, "NI": true, - "NE": true, "NG": true, "NU": true, "NF": true, "MP": true, - "NO": true, "OM": true, "PK": true, "PW": true, "PS": true, - "PA": true, "PG": true, "PY": true, "PE": true, "PH": true, - "PN": true, "PL": true, "PT": true, "PR": true, "QA": true, - "RE": true, "RO": true, "RU": true, "RW": true, "BL": true, - "SH": true, "KN": true, "LC": true, "MF": true, "PM": true, - "VC": true, "WS": true, "SM": true, "ST": true, "SA": true, - "SN": true, "RS": true, "SC": true, "SL": true, "SG": true, - "SX": true, "SK": true, "SI": true, "SB": true, "SO": true, - "ZA": true, "GS": true, "SS": true, "ES": true, "LK": true, - "SD": true, "SR": true, "SJ": true, "SZ": true, "SE": true, - "CH": true, "SY": true, "TW": true, "TJ": true, "TZ": true, - "TH": true, "TL": true, "TG": true, "TK": true, "TO": true, - "TT": true, "TN": true, "TR": true, "TM": true, "TC": true, - "TV": true, "UG": true, "UA": true, "AE": true, "GB": true, - "US": true, "UM": true, "UY": true, "UZ": true, "VU": true, - "VE": true, "VN": true, "VG": true, "VI": true, "WF": true, - "EH": true, "YE": true, "ZM": true, "ZW": true, "XK": true, + "AF": {}, "AX": {}, "AL": {}, "DZ": {}, "AS": {}, + "AD": {}, "AO": {}, "AI": {}, "AQ": {}, "AG": {}, + "AR": {}, "AM": {}, "AW": {}, "AU": {}, "AT": {}, + "AZ": {}, "BS": {}, "BH": {}, "BD": {}, "BB": {}, + "BY": {}, "BE": {}, "BZ": {}, "BJ": {}, "BM": {}, + "BT": {}, "BO": {}, "BQ": {}, "BA": {}, "BW": {}, + "BV": {}, "BR": {}, "IO": {}, "BN": {}, "BG": {}, + "BF": {}, "BI": {}, "KH": {}, "CM": {}, "CA": {}, + "CV": {}, "KY": {}, "CF": {}, "TD": {}, "CL": {}, + "CN": {}, "CX": {}, "CC": {}, "CO": {}, "KM": {}, + "CG": {}, "CD": {}, "CK": {}, "CR": {}, "CI": {}, + "HR": {}, "CU": {}, "CW": {}, "CY": {}, "CZ": {}, + "DK": {}, "DJ": {}, "DM": {}, "DO": {}, "EC": {}, + "EG": {}, "SV": {}, "GQ": {}, "ER": {}, "EE": {}, + "ET": {}, "FK": {}, "FO": {}, "FJ": {}, "FI": {}, + "FR": {}, "GF": {}, "PF": {}, "TF": {}, "GA": {}, + "GM": {}, "GE": {}, "DE": {}, "GH": {}, "GI": {}, + "GR": {}, "GL": {}, "GD": {}, "GP": {}, "GU": {}, + "GT": {}, "GG": {}, "GN": {}, "GW": {}, "GY": {}, + "HT": {}, "HM": {}, "VA": {}, "HN": {}, "HK": {}, + "HU": {}, "IS": {}, "IN": {}, "ID": {}, "IR": {}, + "IQ": {}, "IE": {}, "IM": {}, "IL": {}, "IT": {}, + "JM": {}, "JP": {}, "JE": {}, "JO": {}, "KZ": {}, + "KE": {}, "KI": {}, "KP": {}, "KR": {}, "KW": {}, + "KG": {}, "LA": {}, "LV": {}, "LB": {}, "LS": {}, + "LR": {}, "LY": {}, "LI": {}, "LT": {}, "LU": {}, + "MO": {}, "MK": {}, "MG": {}, "MW": {}, "MY": {}, + "MV": {}, "ML": {}, "MT": {}, "MH": {}, "MQ": {}, + "MR": {}, "MU": {}, "YT": {}, "MX": {}, "FM": {}, + "MD": {}, "MC": {}, "MN": {}, "ME": {}, "MS": {}, + "MA": {}, "MZ": {}, "MM": {}, "NA": {}, "NR": {}, + "NP": {}, "NL": {}, "NC": {}, "NZ": {}, "NI": {}, + "NE": {}, "NG": {}, "NU": {}, "NF": {}, "MP": {}, + "NO": {}, "OM": {}, "PK": {}, "PW": {}, "PS": {}, + "PA": {}, "PG": {}, "PY": {}, "PE": {}, "PH": {}, + "PN": {}, "PL": {}, "PT": {}, "PR": {}, "QA": {}, + "RE": {}, "RO": {}, "RU": {}, "RW": {}, "BL": {}, + "SH": {}, "KN": {}, "LC": {}, "MF": {}, "PM": {}, + "VC": {}, "WS": {}, "SM": {}, "ST": {}, "SA": {}, + "SN": {}, "RS": {}, "SC": {}, "SL": {}, "SG": {}, + "SX": {}, "SK": {}, "SI": {}, "SB": {}, "SO": {}, + "ZA": {}, "GS": {}, "SS": {}, "ES": {}, "LK": {}, + "SD": {}, "SR": {}, "SJ": {}, "SZ": {}, "SE": {}, + "CH": {}, "SY": {}, "TW": {}, "TJ": {}, "TZ": {}, + "TH": {}, "TL": {}, "TG": {}, "TK": {}, "TO": {}, + "TT": {}, "TN": {}, "TR": {}, "TM": {}, "TC": {}, + "TV": {}, "UG": {}, "UA": {}, "AE": {}, "GB": {}, + "US": {}, "UM": {}, "UY": {}, "UZ": {}, "VU": {}, + "VE": {}, "VN": {}, "VG": {}, "VI": {}, "WF": {}, + "EH": {}, "YE": {}, "ZM": {}, "ZW": {}, "XK": {}, } -var iso3166_1_alpha3 = map[string]bool{ +var iso3166_1_alpha2_eu = map[string]struct{}{ + "AT": {}, "BE": {}, "BG": {}, "HR": {}, "CY": {}, + "CZ": {}, "DK": {}, "EE": {}, "FI": {}, "FR": {}, + "DE": {}, "GR": {}, "HU": {}, "IE": {}, "IT": {}, + "LV": {}, "LT": {}, "LU": {}, "MT": {}, "NL": {}, + "PL": {}, "PT": {}, "RO": {}, "SK": {}, "SI": {}, + "ES": {}, "SE": {}, +} + +var iso3166_1_alpha3 = map[string]struct{}{ // see: https://www.iso.org/iso-3166-country-codes.html - "AFG": true, "ALB": true, "DZA": true, "ASM": true, "AND": true, - "AGO": true, "AIA": true, "ATA": true, "ATG": true, "ARG": true, - "ARM": true, "ABW": true, "AUS": true, "AUT": true, "AZE": true, - "BHS": true, "BHR": true, "BGD": true, "BRB": true, "BLR": true, - "BEL": true, "BLZ": true, "BEN": true, "BMU": true, "BTN": true, - "BOL": true, "BES": true, "BIH": true, "BWA": true, "BVT": true, - "BRA": true, "IOT": true, "BRN": true, "BGR": true, "BFA": true, - "BDI": true, "CPV": true, "KHM": true, "CMR": true, "CAN": true, - "CYM": true, "CAF": true, "TCD": true, "CHL": true, "CHN": true, - "CXR": true, "CCK": true, "COL": true, "COM": true, "COD": true, - "COG": true, "COK": true, "CRI": true, "HRV": true, "CUB": true, - "CUW": true, "CYP": true, "CZE": true, "CIV": true, "DNK": true, - "DJI": true, "DMA": true, "DOM": true, "ECU": true, "EGY": true, - "SLV": true, "GNQ": true, "ERI": true, "EST": true, "SWZ": true, - "ETH": true, "FLK": true, "FRO": true, "FJI": true, "FIN": true, - "FRA": true, "GUF": true, "PYF": true, "ATF": true, "GAB": true, - "GMB": true, "GEO": true, "DEU": true, "GHA": true, "GIB": true, - "GRC": true, "GRL": true, "GRD": true, "GLP": true, "GUM": true, - "GTM": true, "GGY": true, "GIN": true, "GNB": true, "GUY": true, - "HTI": true, "HMD": true, "VAT": true, "HND": true, "HKG": true, - "HUN": true, "ISL": true, "IND": true, "IDN": true, "IRN": true, - "IRQ": true, "IRL": true, "IMN": true, "ISR": true, "ITA": true, - "JAM": true, "JPN": true, "JEY": true, "JOR": true, "KAZ": true, - "KEN": true, "KIR": true, "PRK": true, "KOR": true, "KWT": true, - "KGZ": true, "LAO": true, "LVA": true, "LBN": true, "LSO": true, - "LBR": true, "LBY": true, "LIE": true, "LTU": true, "LUX": true, - "MAC": true, "MDG": true, "MWI": true, "MYS": true, "MDV": true, - "MLI": true, "MLT": true, "MHL": true, "MTQ": true, "MRT": true, - "MUS": true, "MYT": true, "MEX": true, "FSM": true, "MDA": true, - "MCO": true, "MNG": true, "MNE": true, "MSR": true, "MAR": true, - "MOZ": true, "MMR": true, "NAM": true, "NRU": true, "NPL": true, - "NLD": true, "NCL": true, "NZL": true, "NIC": true, "NER": true, - "NGA": true, "NIU": true, "NFK": true, "MKD": true, "MNP": true, - "NOR": true, "OMN": true, "PAK": true, "PLW": true, "PSE": true, - "PAN": true, "PNG": true, "PRY": true, "PER": true, "PHL": true, - "PCN": true, "POL": true, "PRT": true, "PRI": true, "QAT": true, - "ROU": true, "RUS": true, "RWA": true, "REU": true, "BLM": true, - "SHN": true, "KNA": true, "LCA": true, "MAF": true, "SPM": true, - "VCT": true, "WSM": true, "SMR": true, "STP": true, "SAU": true, - "SEN": true, "SRB": true, "SYC": true, "SLE": true, "SGP": true, - "SXM": true, "SVK": true, "SVN": true, "SLB": true, "SOM": true, - "ZAF": true, "SGS": true, "SSD": true, "ESP": true, "LKA": true, - "SDN": true, "SUR": true, "SJM": true, "SWE": true, "CHE": true, - "SYR": true, "TWN": true, "TJK": true, "TZA": true, "THA": true, - "TLS": true, "TGO": true, "TKL": true, "TON": true, "TTO": true, - "TUN": true, "TUR": true, "TKM": true, "TCA": true, "TUV": true, - "UGA": true, "UKR": true, "ARE": true, "GBR": true, "UMI": true, - "USA": true, "URY": true, "UZB": true, "VUT": true, "VEN": true, - "VNM": true, "VGB": true, "VIR": true, "WLF": true, "ESH": true, - "YEM": true, "ZMB": true, "ZWE": true, "ALA": true, "UNK": true, + "AFG": {}, "ALB": {}, "DZA": {}, "ASM": {}, "AND": {}, + "AGO": {}, "AIA": {}, "ATA": {}, "ATG": {}, "ARG": {}, + "ARM": {}, "ABW": {}, "AUS": {}, "AUT": {}, "AZE": {}, + "BHS": {}, "BHR": {}, "BGD": {}, "BRB": {}, "BLR": {}, + "BEL": {}, "BLZ": {}, "BEN": {}, "BMU": {}, "BTN": {}, + "BOL": {}, "BES": {}, "BIH": {}, "BWA": {}, "BVT": {}, + "BRA": {}, "IOT": {}, "BRN": {}, "BGR": {}, "BFA": {}, + "BDI": {}, "CPV": {}, "KHM": {}, "CMR": {}, "CAN": {}, + "CYM": {}, "CAF": {}, "TCD": {}, "CHL": {}, "CHN": {}, + "CXR": {}, "CCK": {}, "COL": {}, "COM": {}, "COD": {}, + "COG": {}, "COK": {}, "CRI": {}, "HRV": {}, "CUB": {}, + "CUW": {}, "CYP": {}, "CZE": {}, "CIV": {}, "DNK": {}, + "DJI": {}, "DMA": {}, "DOM": {}, "ECU": {}, "EGY": {}, + "SLV": {}, "GNQ": {}, "ERI": {}, "EST": {}, "SWZ": {}, + "ETH": {}, "FLK": {}, "FRO": {}, "FJI": {}, "FIN": {}, + "FRA": {}, "GUF": {}, "PYF": {}, "ATF": {}, "GAB": {}, + "GMB": {}, "GEO": {}, "DEU": {}, "GHA": {}, "GIB": {}, + "GRC": {}, "GRL": {}, "GRD": {}, "GLP": {}, "GUM": {}, + "GTM": {}, "GGY": {}, "GIN": {}, "GNB": {}, "GUY": {}, + "HTI": {}, "HMD": {}, "VAT": {}, "HND": {}, "HKG": {}, + "HUN": {}, "ISL": {}, "IND": {}, "IDN": {}, "IRN": {}, + "IRQ": {}, "IRL": {}, "IMN": {}, "ISR": {}, "ITA": {}, + "JAM": {}, "JPN": {}, "JEY": {}, "JOR": {}, "KAZ": {}, + "KEN": {}, "KIR": {}, "PRK": {}, "KOR": {}, "KWT": {}, + "KGZ": {}, "LAO": {}, "LVA": {}, "LBN": {}, "LSO": {}, + "LBR": {}, "LBY": {}, "LIE": {}, "LTU": {}, "LUX": {}, + "MAC": {}, "MDG": {}, "MWI": {}, "MYS": {}, "MDV": {}, + "MLI": {}, "MLT": {}, "MHL": {}, "MTQ": {}, "MRT": {}, + "MUS": {}, "MYT": {}, "MEX": {}, "FSM": {}, "MDA": {}, + "MCO": {}, "MNG": {}, "MNE": {}, "MSR": {}, "MAR": {}, + "MOZ": {}, "MMR": {}, "NAM": {}, "NRU": {}, "NPL": {}, + "NLD": {}, "NCL": {}, "NZL": {}, "NIC": {}, "NER": {}, + "NGA": {}, "NIU": {}, "NFK": {}, "MKD": {}, "MNP": {}, + "NOR": {}, "OMN": {}, "PAK": {}, "PLW": {}, "PSE": {}, + "PAN": {}, "PNG": {}, "PRY": {}, "PER": {}, "PHL": {}, + "PCN": {}, "POL": {}, "PRT": {}, "PRI": {}, "QAT": {}, + "ROU": {}, "RUS": {}, "RWA": {}, "REU": {}, "BLM": {}, + "SHN": {}, "KNA": {}, "LCA": {}, "MAF": {}, "SPM": {}, + "VCT": {}, "WSM": {}, "SMR": {}, "STP": {}, "SAU": {}, + "SEN": {}, "SRB": {}, "SYC": {}, "SLE": {}, "SGP": {}, + "SXM": {}, "SVK": {}, "SVN": {}, "SLB": {}, "SOM": {}, + "ZAF": {}, "SGS": {}, "SSD": {}, "ESP": {}, "LKA": {}, + "SDN": {}, "SUR": {}, "SJM": {}, "SWE": {}, "CHE": {}, + "SYR": {}, "TWN": {}, "TJK": {}, "TZA": {}, "THA": {}, + "TLS": {}, "TGO": {}, "TKL": {}, "TON": {}, "TTO": {}, + "TUN": {}, "TUR": {}, "TKM": {}, "TCA": {}, "TUV": {}, + "UGA": {}, "UKR": {}, "ARE": {}, "GBR": {}, "UMI": {}, + "USA": {}, "URY": {}, "UZB": {}, "VUT": {}, "VEN": {}, + "VNM": {}, "VGB": {}, "VIR": {}, "WLF": {}, "ESH": {}, + "YEM": {}, "ZMB": {}, "ZWE": {}, "ALA": {}, "UNK": {}, +} + +var iso3166_1_alpha3_eu = map[string]struct{}{ + "AUT": {}, "BEL": {}, "BGR": {}, "HRV": {}, "CYP": {}, + "CZE": {}, "DNK": {}, "EST": {}, "FIN": {}, "FRA": {}, + "DEU": {}, "GRC": {}, "HUN": {}, "IRL": {}, "ITA": {}, + "LVA": {}, "LTU": {}, "LUX": {}, "MLT": {}, "NLD": {}, + "POL": {}, "PRT": {}, "ROU": {}, "SVK": {}, "SVN": {}, + "ESP": {}, "SWE": {}, } -var iso3166_1_alpha_numeric = map[int]bool{ +var iso3166_1_alpha_numeric = map[int]struct{}{ // see: https://www.iso.org/iso-3166-country-codes.html - 4: true, 8: true, 12: true, 16: true, 20: true, - 24: true, 660: true, 10: true, 28: true, 32: true, - 51: true, 533: true, 36: true, 40: true, 31: true, - 44: true, 48: true, 50: true, 52: true, 112: true, - 56: true, 84: true, 204: true, 60: true, 64: true, - 68: true, 535: true, 70: true, 72: true, 74: true, - 76: true, 86: true, 96: true, 100: true, 854: true, - 108: true, 132: true, 116: true, 120: true, 124: true, - 136: true, 140: true, 148: true, 152: true, 156: true, - 162: true, 166: true, 170: true, 174: true, 180: true, - 178: true, 184: true, 188: true, 191: true, 192: true, - 531: true, 196: true, 203: true, 384: true, 208: true, - 262: true, 212: true, 214: true, 218: true, 818: true, - 222: true, 226: true, 232: true, 233: true, 748: true, - 231: true, 238: true, 234: true, 242: true, 246: true, - 250: true, 254: true, 258: true, 260: true, 266: true, - 270: true, 268: true, 276: true, 288: true, 292: true, - 300: true, 304: true, 308: true, 312: true, 316: true, - 320: true, 831: true, 324: true, 624: true, 328: true, - 332: true, 334: true, 336: true, 340: true, 344: true, - 348: true, 352: true, 356: true, 360: true, 364: true, - 368: true, 372: true, 833: true, 376: true, 380: true, - 388: true, 392: true, 832: true, 400: true, 398: true, - 404: true, 296: true, 408: true, 410: true, 414: true, - 417: true, 418: true, 428: true, 422: true, 426: true, - 430: true, 434: true, 438: true, 440: true, 442: true, - 446: true, 450: true, 454: true, 458: true, 462: true, - 466: true, 470: true, 584: true, 474: true, 478: true, - 480: true, 175: true, 484: true, 583: true, 498: true, - 492: true, 496: true, 499: true, 500: true, 504: true, - 508: true, 104: true, 516: true, 520: true, 524: true, - 528: true, 540: true, 554: true, 558: true, 562: true, - 566: true, 570: true, 574: true, 807: true, 580: true, - 578: true, 512: true, 586: true, 585: true, 275: true, - 591: true, 598: true, 600: true, 604: true, 608: true, - 612: true, 616: true, 620: true, 630: true, 634: true, - 642: true, 643: true, 646: true, 638: true, 652: true, - 654: true, 659: true, 662: true, 663: true, 666: true, - 670: true, 882: true, 674: true, 678: true, 682: true, - 686: true, 688: true, 690: true, 694: true, 702: true, - 534: true, 703: true, 705: true, 90: true, 706: true, - 710: true, 239: true, 728: true, 724: true, 144: true, - 729: true, 740: true, 744: true, 752: true, 756: true, - 760: true, 158: true, 762: true, 834: true, 764: true, - 626: true, 768: true, 772: true, 776: true, 780: true, - 788: true, 792: true, 795: true, 796: true, 798: true, - 800: true, 804: true, 784: true, 826: true, 581: true, - 840: true, 858: true, 860: true, 548: true, 862: true, - 704: true, 92: true, 850: true, 876: true, 732: true, - 887: true, 894: true, 716: true, 248: true, 153: true, + 4: {}, 8: {}, 12: {}, 16: {}, 20: {}, + 24: {}, 660: {}, 10: {}, 28: {}, 32: {}, + 51: {}, 533: {}, 36: {}, 40: {}, 31: {}, + 44: {}, 48: {}, 50: {}, 52: {}, 112: {}, + 56: {}, 84: {}, 204: {}, 60: {}, 64: {}, + 68: {}, 535: {}, 70: {}, 72: {}, 74: {}, + 76: {}, 86: {}, 96: {}, 100: {}, 854: {}, + 108: {}, 132: {}, 116: {}, 120: {}, 124: {}, + 136: {}, 140: {}, 148: {}, 152: {}, 156: {}, + 162: {}, 166: {}, 170: {}, 174: {}, 180: {}, + 178: {}, 184: {}, 188: {}, 191: {}, 192: {}, + 531: {}, 196: {}, 203: {}, 384: {}, 208: {}, + 262: {}, 212: {}, 214: {}, 218: {}, 818: {}, + 222: {}, 226: {}, 232: {}, 233: {}, 748: {}, + 231: {}, 238: {}, 234: {}, 242: {}, 246: {}, + 250: {}, 254: {}, 258: {}, 260: {}, 266: {}, + 270: {}, 268: {}, 276: {}, 288: {}, 292: {}, + 300: {}, 304: {}, 308: {}, 312: {}, 316: {}, + 320: {}, 831: {}, 324: {}, 624: {}, 328: {}, + 332: {}, 334: {}, 336: {}, 340: {}, 344: {}, + 348: {}, 352: {}, 356: {}, 360: {}, 364: {}, + 368: {}, 372: {}, 833: {}, 376: {}, 380: {}, + 388: {}, 392: {}, 832: {}, 400: {}, 398: {}, + 404: {}, 296: {}, 408: {}, 410: {}, 414: {}, + 417: {}, 418: {}, 428: {}, 422: {}, 426: {}, + 430: {}, 434: {}, 438: {}, 440: {}, 442: {}, + 446: {}, 450: {}, 454: {}, 458: {}, 462: {}, + 466: {}, 470: {}, 584: {}, 474: {}, 478: {}, + 480: {}, 175: {}, 484: {}, 583: {}, 498: {}, + 492: {}, 496: {}, 499: {}, 500: {}, 504: {}, + 508: {}, 104: {}, 516: {}, 520: {}, 524: {}, + 528: {}, 540: {}, 554: {}, 558: {}, 562: {}, + 566: {}, 570: {}, 574: {}, 807: {}, 580: {}, + 578: {}, 512: {}, 586: {}, 585: {}, 275: {}, + 591: {}, 598: {}, 600: {}, 604: {}, 608: {}, + 612: {}, 616: {}, 620: {}, 630: {}, 634: {}, + 642: {}, 643: {}, 646: {}, 638: {}, 652: {}, + 654: {}, 659: {}, 662: {}, 663: {}, 666: {}, + 670: {}, 882: {}, 674: {}, 678: {}, 682: {}, + 686: {}, 688: {}, 690: {}, 694: {}, 702: {}, + 534: {}, 703: {}, 705: {}, 90: {}, 706: {}, + 710: {}, 239: {}, 728: {}, 724: {}, 144: {}, + 729: {}, 740: {}, 744: {}, 752: {}, 756: {}, + 760: {}, 158: {}, 762: {}, 834: {}, 764: {}, + 626: {}, 768: {}, 772: {}, 776: {}, 780: {}, + 788: {}, 792: {}, 795: {}, 796: {}, 798: {}, + 800: {}, 804: {}, 784: {}, 826: {}, 581: {}, + 840: {}, 858: {}, 860: {}, 548: {}, 862: {}, + 704: {}, 92: {}, 850: {}, 876: {}, 732: {}, + 887: {}, 894: {}, 716: {}, 248: {}, 153: {}, +} + +var iso3166_1_alpha_numeric_eu = map[int]struct{}{ + 40: {}, 56: {}, 100: {}, 191: {}, 196: {}, + 200: {}, 208: {}, 233: {}, 246: {}, 250: {}, + 276: {}, 300: {}, 348: {}, 372: {}, 380: {}, + 428: {}, 440: {}, 442: {}, 470: {}, 528: {}, + 616: {}, 620: {}, 642: {}, 703: {}, 705: {}, + 724: {}, 752: {}, } -var iso3166_2 = map[string]bool{ - "AD-02": true, "AD-03": true, "AD-04": true, "AD-05": true, "AD-06": true, - "AD-07": true, "AD-08": true, "AE-AJ": true, "AE-AZ": true, "AE-DU": true, - "AE-FU": true, "AE-RK": true, "AE-SH": true, "AE-UQ": true, "AF-BAL": true, - "AF-BAM": true, "AF-BDG": true, "AF-BDS": true, "AF-BGL": true, "AF-DAY": true, - "AF-FRA": true, "AF-FYB": true, "AF-GHA": true, "AF-GHO": true, "AF-HEL": true, - "AF-HER": true, "AF-JOW": true, "AF-KAB": true, "AF-KAN": true, "AF-KAP": true, - "AF-KDZ": true, "AF-KHO": true, "AF-KNR": true, "AF-LAG": true, "AF-LOG": true, - "AF-NAN": true, "AF-NIM": true, "AF-NUR": true, "AF-PAN": true, "AF-PAR": true, - "AF-PIA": true, "AF-PKA": true, "AF-SAM": true, "AF-SAR": true, "AF-TAK": true, - "AF-URU": true, "AF-WAR": true, "AF-ZAB": true, "AG-03": true, "AG-04": true, - "AG-05": true, "AG-06": true, "AG-07": true, "AG-08": true, "AG-10": true, - "AG-11": true, "AL-01": true, "AL-02": true, "AL-03": true, "AL-04": true, - "AL-05": true, "AL-06": true, "AL-07": true, "AL-08": true, "AL-09": true, - "AL-10": true, "AL-11": true, "AL-12": true, "AL-BR": true, "AL-BU": true, - "AL-DI": true, "AL-DL": true, "AL-DR": true, "AL-DV": true, "AL-EL": true, - "AL-ER": true, "AL-FR": true, "AL-GJ": true, "AL-GR": true, "AL-HA": true, - "AL-KA": true, "AL-KB": true, "AL-KC": true, "AL-KO": true, "AL-KR": true, - "AL-KU": true, "AL-LB": true, "AL-LE": true, "AL-LU": true, "AL-MK": true, - "AL-MM": true, "AL-MR": true, "AL-MT": true, "AL-PG": true, "AL-PQ": true, - "AL-PR": true, "AL-PU": true, "AL-SH": true, "AL-SK": true, "AL-SR": true, - "AL-TE": true, "AL-TP": true, "AL-TR": true, "AL-VL": true, "AM-AG": true, - "AM-AR": true, "AM-AV": true, "AM-ER": true, "AM-GR": true, "AM-KT": true, - "AM-LO": true, "AM-SH": true, "AM-SU": true, "AM-TV": true, "AM-VD": true, - "AO-BGO": true, "AO-BGU": true, "AO-BIE": true, "AO-CAB": true, "AO-CCU": true, - "AO-CNN": true, "AO-CNO": true, "AO-CUS": true, "AO-HUA": true, "AO-HUI": true, - "AO-LNO": true, "AO-LSU": true, "AO-LUA": true, "AO-MAL": true, "AO-MOX": true, - "AO-NAM": true, "AO-UIG": true, "AO-ZAI": true, "AR-A": true, "AR-B": true, - "AR-C": true, "AR-D": true, "AR-E": true, "AR-F": true, "AR-G": true, "AR-H": true, - "AR-J": true, "AR-K": true, "AR-L": true, "AR-M": true, "AR-N": true, - "AR-P": true, "AR-Q": true, "AR-R": true, "AR-S": true, "AR-T": true, - "AR-U": true, "AR-V": true, "AR-W": true, "AR-X": true, "AR-Y": true, - "AR-Z": true, "AT-1": true, "AT-2": true, "AT-3": true, "AT-4": true, - "AT-5": true, "AT-6": true, "AT-7": true, "AT-8": true, "AT-9": true, - "AU-ACT": true, "AU-NSW": true, "AU-NT": true, "AU-QLD": true, "AU-SA": true, - "AU-TAS": true, "AU-VIC": true, "AU-WA": true, "AZ-ABS": true, "AZ-AGA": true, - "AZ-AGC": true, "AZ-AGM": true, "AZ-AGS": true, "AZ-AGU": true, "AZ-AST": true, - "AZ-BA": true, "AZ-BAB": true, "AZ-BAL": true, "AZ-BAR": true, "AZ-BEY": true, - "AZ-BIL": true, "AZ-CAB": true, "AZ-CAL": true, "AZ-CUL": true, "AZ-DAS": true, - "AZ-FUZ": true, "AZ-GA": true, "AZ-GAD": true, "AZ-GOR": true, "AZ-GOY": true, - "AZ-GYG": true, "AZ-HAC": true, "AZ-IMI": true, "AZ-ISM": true, "AZ-KAL": true, - "AZ-KAN": true, "AZ-KUR": true, "AZ-LA": true, "AZ-LAC": true, "AZ-LAN": true, - "AZ-LER": true, "AZ-MAS": true, "AZ-MI": true, "AZ-NA": true, "AZ-NEF": true, - "AZ-NV": true, "AZ-NX": true, "AZ-OGU": true, "AZ-ORD": true, "AZ-QAB": true, - "AZ-QAX": true, "AZ-QAZ": true, "AZ-QBA": true, "AZ-QBI": true, "AZ-QOB": true, - "AZ-QUS": true, "AZ-SA": true, "AZ-SAB": true, "AZ-SAD": true, "AZ-SAH": true, - "AZ-SAK": true, "AZ-SAL": true, "AZ-SAR": true, "AZ-SAT": true, "AZ-SBN": true, - "AZ-SIY": true, "AZ-SKR": true, "AZ-SM": true, "AZ-SMI": true, "AZ-SMX": true, - "AZ-SR": true, "AZ-SUS": true, "AZ-TAR": true, "AZ-TOV": true, "AZ-UCA": true, - "AZ-XA": true, "AZ-XAC": true, "AZ-XCI": true, "AZ-XIZ": true, "AZ-XVD": true, - "AZ-YAR": true, "AZ-YE": true, "AZ-YEV": true, "AZ-ZAN": true, "AZ-ZAQ": true, - "AZ-ZAR": true, "BA-01": true, "BA-02": true, "BA-03": true, "BA-04": true, - "BA-05": true, "BA-06": true, "BA-07": true, "BA-08": true, "BA-09": true, - "BA-10": true, "BA-BIH": true, "BA-BRC": true, "BA-SRP": true, "BB-01": true, - "BB-02": true, "BB-03": true, "BB-04": true, "BB-05": true, "BB-06": true, - "BB-07": true, "BB-08": true, "BB-09": true, "BB-10": true, "BB-11": true, - "BD-01": true, "BD-02": true, "BD-03": true, "BD-04": true, "BD-05": true, - "BD-06": true, "BD-07": true, "BD-08": true, "BD-09": true, "BD-10": true, - "BD-11": true, "BD-12": true, "BD-13": true, "BD-14": true, "BD-15": true, - "BD-16": true, "BD-17": true, "BD-18": true, "BD-19": true, "BD-20": true, - "BD-21": true, "BD-22": true, "BD-23": true, "BD-24": true, "BD-25": true, - "BD-26": true, "BD-27": true, "BD-28": true, "BD-29": true, "BD-30": true, - "BD-31": true, "BD-32": true, "BD-33": true, "BD-34": true, "BD-35": true, - "BD-36": true, "BD-37": true, "BD-38": true, "BD-39": true, "BD-40": true, - "BD-41": true, "BD-42": true, "BD-43": true, "BD-44": true, "BD-45": true, - "BD-46": true, "BD-47": true, "BD-48": true, "BD-49": true, "BD-50": true, - "BD-51": true, "BD-52": true, "BD-53": true, "BD-54": true, "BD-55": true, - "BD-56": true, "BD-57": true, "BD-58": true, "BD-59": true, "BD-60": true, - "BD-61": true, "BD-62": true, "BD-63": true, "BD-64": true, "BD-A": true, - "BD-B": true, "BD-C": true, "BD-D": true, "BD-E": true, "BD-F": true, - "BD-G": true, "BE-BRU": true, "BE-VAN": true, "BE-VBR": true, "BE-VLG": true, - "BE-VLI": true, "BE-VOV": true, "BE-VWV": true, "BE-WAL": true, "BE-WBR": true, - "BE-WHT": true, "BE-WLG": true, "BE-WLX": true, "BE-WNA": true, "BF-01": true, - "BF-02": true, "BF-03": true, "BF-04": true, "BF-05": true, "BF-06": true, - "BF-07": true, "BF-08": true, "BF-09": true, "BF-10": true, "BF-11": true, - "BF-12": true, "BF-13": true, "BF-BAL": true, "BF-BAM": true, "BF-BAN": true, - "BF-BAZ": true, "BF-BGR": true, "BF-BLG": true, "BF-BLK": true, "BF-COM": true, - "BF-GAN": true, "BF-GNA": true, "BF-GOU": true, "BF-HOU": true, "BF-IOB": true, - "BF-KAD": true, "BF-KEN": true, "BF-KMD": true, "BF-KMP": true, "BF-KOP": true, - "BF-KOS": true, "BF-KOT": true, "BF-KOW": true, "BF-LER": true, "BF-LOR": true, - "BF-MOU": true, "BF-NAM": true, "BF-NAO": true, "BF-NAY": true, "BF-NOU": true, - "BF-OUB": true, "BF-OUD": true, "BF-PAS": true, "BF-PON": true, "BF-SEN": true, - "BF-SIS": true, "BF-SMT": true, "BF-SNG": true, "BF-SOM": true, "BF-SOR": true, - "BF-TAP": true, "BF-TUI": true, "BF-YAG": true, "BF-YAT": true, "BF-ZIR": true, - "BF-ZON": true, "BF-ZOU": true, "BG-01": true, "BG-02": true, "BG-03": true, - "BG-04": true, "BG-05": true, "BG-06": true, "BG-07": true, "BG-08": true, - "BG-09": true, "BG-10": true, "BG-11": true, "BG-12": true, "BG-13": true, - "BG-14": true, "BG-15": true, "BG-16": true, "BG-17": true, "BG-18": true, - "BG-19": true, "BG-20": true, "BG-21": true, "BG-22": true, "BG-23": true, - "BG-24": true, "BG-25": true, "BG-26": true, "BG-27": true, "BG-28": true, - "BH-13": true, "BH-14": true, "BH-15": true, "BH-16": true, "BH-17": true, - "BI-BB": true, "BI-BL": true, "BI-BM": true, "BI-BR": true, "BI-CA": true, - "BI-CI": true, "BI-GI": true, "BI-KI": true, "BI-KR": true, "BI-KY": true, - "BI-MA": true, "BI-MU": true, "BI-MW": true, "BI-NG": true, "BI-RM": true, "BI-RT": true, - "BI-RY": true, "BJ-AK": true, "BJ-AL": true, "BJ-AQ": true, "BJ-BO": true, - "BJ-CO": true, "BJ-DO": true, "BJ-KO": true, "BJ-LI": true, "BJ-MO": true, - "BJ-OU": true, "BJ-PL": true, "BJ-ZO": true, "BN-BE": true, "BN-BM": true, - "BN-TE": true, "BN-TU": true, "BO-B": true, "BO-C": true, "BO-H": true, - "BO-L": true, "BO-N": true, "BO-O": true, "BO-P": true, "BO-S": true, - "BO-T": true, "BQ-BO": true, "BQ-SA": true, "BQ-SE": true, "BR-AC": true, - "BR-AL": true, "BR-AM": true, "BR-AP": true, "BR-BA": true, "BR-CE": true, - "BR-DF": true, "BR-ES": true, "BR-FN": true, "BR-GO": true, "BR-MA": true, - "BR-MG": true, "BR-MS": true, "BR-MT": true, "BR-PA": true, "BR-PB": true, - "BR-PE": true, "BR-PI": true, "BR-PR": true, "BR-RJ": true, "BR-RN": true, - "BR-RO": true, "BR-RR": true, "BR-RS": true, "BR-SC": true, "BR-SE": true, - "BR-SP": true, "BR-TO": true, "BS-AK": true, "BS-BI": true, "BS-BP": true, - "BS-BY": true, "BS-CE": true, "BS-CI": true, "BS-CK": true, "BS-CO": true, - "BS-CS": true, "BS-EG": true, "BS-EX": true, "BS-FP": true, "BS-GC": true, - "BS-HI": true, "BS-HT": true, "BS-IN": true, "BS-LI": true, "BS-MC": true, - "BS-MG": true, "BS-MI": true, "BS-NE": true, "BS-NO": true, "BS-NP": true, "BS-NS": true, - "BS-RC": true, "BS-RI": true, "BS-SA": true, "BS-SE": true, "BS-SO": true, - "BS-SS": true, "BS-SW": true, "BS-WG": true, "BT-11": true, "BT-12": true, - "BT-13": true, "BT-14": true, "BT-15": true, "BT-21": true, "BT-22": true, - "BT-23": true, "BT-24": true, "BT-31": true, "BT-32": true, "BT-33": true, - "BT-34": true, "BT-41": true, "BT-42": true, "BT-43": true, "BT-44": true, - "BT-45": true, "BT-GA": true, "BT-TY": true, "BW-CE": true, "BW-CH": true, "BW-GH": true, - "BW-KG": true, "BW-KL": true, "BW-KW": true, "BW-NE": true, "BW-NW": true, - "BW-SE": true, "BW-SO": true, "BY-BR": true, "BY-HM": true, "BY-HO": true, - "BY-HR": true, "BY-MA": true, "BY-MI": true, "BY-VI": true, "BZ-BZ": true, - "BZ-CY": true, "BZ-CZL": true, "BZ-OW": true, "BZ-SC": true, "BZ-TOL": true, - "CA-AB": true, "CA-BC": true, "CA-MB": true, "CA-NB": true, "CA-NL": true, - "CA-NS": true, "CA-NT": true, "CA-NU": true, "CA-ON": true, "CA-PE": true, - "CA-QC": true, "CA-SK": true, "CA-YT": true, "CD-BC": true, "CD-BN": true, - "CD-EQ": true, "CD-HK": true, "CD-IT": true, "CD-KA": true, "CD-KC": true, "CD-KE": true, "CD-KG": true, "CD-KN": true, - "CD-KW": true, "CD-KS": true, "CD-LU": true, "CD-MA": true, "CD-NK": true, "CD-OR": true, "CD-SA": true, "CD-SK": true, - "CD-TA": true, "CD-TO": true, "CF-AC": true, "CF-BB": true, "CF-BGF": true, "CF-BK": true, "CF-HK": true, "CF-HM": true, - "CF-HS": true, "CF-KB": true, "CF-KG": true, "CF-LB": true, "CF-MB": true, - "CF-MP": true, "CF-NM": true, "CF-OP": true, "CF-SE": true, "CF-UK": true, - "CF-VK": true, "CG-11": true, "CG-12": true, "CG-13": true, "CG-14": true, - "CG-15": true, "CG-16": true, "CG-2": true, "CG-5": true, "CG-7": true, "CG-8": true, - "CG-9": true, "CG-BZV": true, "CH-AG": true, "CH-AI": true, "CH-AR": true, - "CH-BE": true, "CH-BL": true, "CH-BS": true, "CH-FR": true, "CH-GE": true, - "CH-GL": true, "CH-GR": true, "CH-JU": true, "CH-LU": true, "CH-NE": true, - "CH-NW": true, "CH-OW": true, "CH-SG": true, "CH-SH": true, "CH-SO": true, - "CH-SZ": true, "CH-TG": true, "CH-TI": true, "CH-UR": true, "CH-VD": true, - "CH-VS": true, "CH-ZG": true, "CH-ZH": true, "CI-AB": true, "CI-BS": true, - "CI-CM": true, "CI-DN": true, "CI-GD": true, "CI-LC": true, "CI-LG": true, - "CI-MG": true, "CI-SM": true, "CI-SV": true, "CI-VB": true, "CI-WR": true, - "CI-YM": true, "CI-ZZ": true, "CL-AI": true, "CL-AN": true, "CL-AP": true, - "CL-AR": true, "CL-AT": true, "CL-BI": true, "CL-CO": true, "CL-LI": true, - "CL-LL": true, "CL-LR": true, "CL-MA": true, "CL-ML": true, "CL-NB": true, "CL-RM": true, - "CL-TA": true, "CL-VS": true, "CM-AD": true, "CM-CE": true, "CM-EN": true, - "CM-ES": true, "CM-LT": true, "CM-NO": true, "CM-NW": true, "CM-OU": true, - "CM-SU": true, "CM-SW": true, "CN-AH": true, "CN-BJ": true, "CN-CQ": true, - "CN-FJ": true, "CN-GS": true, "CN-GD": true, "CN-GX": true, "CN-GZ": true, - "CN-HI": true, "CN-HE": true, "CN-HL": true, "CN-HA": true, "CN-HB": true, - "CN-HN": true, "CN-JS": true, "CN-JX": true, "CN-JL": true, "CN-LN": true, - "CN-NM": true, "CN-NX": true, "CN-QH": true, "CN-SN": true, "CN-SD": true, "CN-SH": true, - "CN-SX": true, "CN-SC": true, "CN-TJ": true, "CN-XJ": true, "CN-XZ": true, "CN-YN": true, - "CN-ZJ": true, "CO-AMA": true, "CO-ANT": true, "CO-ARA": true, "CO-ATL": true, - "CO-BOL": true, "CO-BOY": true, "CO-CAL": true, "CO-CAQ": true, "CO-CAS": true, - "CO-CAU": true, "CO-CES": true, "CO-CHO": true, "CO-COR": true, "CO-CUN": true, - "CO-DC": true, "CO-GUA": true, "CO-GUV": true, "CO-HUI": true, "CO-LAG": true, - "CO-MAG": true, "CO-MET": true, "CO-NAR": true, "CO-NSA": true, "CO-PUT": true, - "CO-QUI": true, "CO-RIS": true, "CO-SAN": true, "CO-SAP": true, "CO-SUC": true, - "CO-TOL": true, "CO-VAC": true, "CO-VAU": true, "CO-VID": true, "CR-A": true, - "CR-C": true, "CR-G": true, "CR-H": true, "CR-L": true, "CR-P": true, - "CR-SJ": true, "CU-01": true, "CU-02": true, "CU-03": true, "CU-04": true, - "CU-05": true, "CU-06": true, "CU-07": true, "CU-08": true, "CU-09": true, - "CU-10": true, "CU-11": true, "CU-12": true, "CU-13": true, "CU-14": true, "CU-15": true, - "CU-16": true, "CU-99": true, "CV-B": true, "CV-BR": true, "CV-BV": true, "CV-CA": true, - "CV-CF": true, "CV-CR": true, "CV-MA": true, "CV-MO": true, "CV-PA": true, - "CV-PN": true, "CV-PR": true, "CV-RB": true, "CV-RG": true, "CV-RS": true, - "CV-S": true, "CV-SD": true, "CV-SF": true, "CV-SL": true, "CV-SM": true, - "CV-SO": true, "CV-SS": true, "CV-SV": true, "CV-TA": true, "CV-TS": true, - "CY-01": true, "CY-02": true, "CY-03": true, "CY-04": true, "CY-05": true, - "CY-06": true, "CZ-10": true, "CZ-101": true, "CZ-102": true, "CZ-103": true, - "CZ-104": true, "CZ-105": true, "CZ-106": true, "CZ-107": true, "CZ-108": true, - "CZ-109": true, "CZ-110": true, "CZ-111": true, "CZ-112": true, "CZ-113": true, - "CZ-114": true, "CZ-115": true, "CZ-116": true, "CZ-117": true, "CZ-118": true, - "CZ-119": true, "CZ-120": true, "CZ-121": true, "CZ-122": true, "CZ-20": true, - "CZ-201": true, "CZ-202": true, "CZ-203": true, "CZ-204": true, "CZ-205": true, - "CZ-206": true, "CZ-207": true, "CZ-208": true, "CZ-209": true, "CZ-20A": true, - "CZ-20B": true, "CZ-20C": true, "CZ-31": true, "CZ-311": true, "CZ-312": true, - "CZ-313": true, "CZ-314": true, "CZ-315": true, "CZ-316": true, "CZ-317": true, - "CZ-32": true, "CZ-321": true, "CZ-322": true, "CZ-323": true, "CZ-324": true, - "CZ-325": true, "CZ-326": true, "CZ-327": true, "CZ-41": true, "CZ-411": true, - "CZ-412": true, "CZ-413": true, "CZ-42": true, "CZ-421": true, "CZ-422": true, - "CZ-423": true, "CZ-424": true, "CZ-425": true, "CZ-426": true, "CZ-427": true, - "CZ-51": true, "CZ-511": true, "CZ-512": true, "CZ-513": true, "CZ-514": true, - "CZ-52": true, "CZ-521": true, "CZ-522": true, "CZ-523": true, "CZ-524": true, - "CZ-525": true, "CZ-53": true, "CZ-531": true, "CZ-532": true, "CZ-533": true, - "CZ-534": true, "CZ-63": true, "CZ-631": true, "CZ-632": true, "CZ-633": true, - "CZ-634": true, "CZ-635": true, "CZ-64": true, "CZ-641": true, "CZ-642": true, - "CZ-643": true, "CZ-644": true, "CZ-645": true, "CZ-646": true, "CZ-647": true, - "CZ-71": true, "CZ-711": true, "CZ-712": true, "CZ-713": true, "CZ-714": true, - "CZ-715": true, "CZ-72": true, "CZ-721": true, "CZ-722": true, "CZ-723": true, - "CZ-724": true, "CZ-80": true, "CZ-801": true, "CZ-802": true, "CZ-803": true, - "CZ-804": true, "CZ-805": true, "CZ-806": true, "DE-BB": true, "DE-BE": true, - "DE-BW": true, "DE-BY": true, "DE-HB": true, "DE-HE": true, "DE-HH": true, - "DE-MV": true, "DE-NI": true, "DE-NW": true, "DE-RP": true, "DE-SH": true, - "DE-SL": true, "DE-SN": true, "DE-ST": true, "DE-TH": true, "DJ-AR": true, - "DJ-AS": true, "DJ-DI": true, "DJ-DJ": true, "DJ-OB": true, "DJ-TA": true, - "DK-81": true, "DK-82": true, "DK-83": true, "DK-84": true, "DK-85": true, - "DM-01": true, "DM-02": true, "DM-03": true, "DM-04": true, "DM-05": true, - "DM-06": true, "DM-07": true, "DM-08": true, "DM-09": true, "DM-10": true, - "DO-01": true, "DO-02": true, "DO-03": true, "DO-04": true, "DO-05": true, - "DO-06": true, "DO-07": true, "DO-08": true, "DO-09": true, "DO-10": true, - "DO-11": true, "DO-12": true, "DO-13": true, "DO-14": true, "DO-15": true, - "DO-16": true, "DO-17": true, "DO-18": true, "DO-19": true, "DO-20": true, - "DO-21": true, "DO-22": true, "DO-23": true, "DO-24": true, "DO-25": true, - "DO-26": true, "DO-27": true, "DO-28": true, "DO-29": true, "DO-30": true, "DO-31": true, - "DZ-01": true, "DZ-02": true, "DZ-03": true, "DZ-04": true, "DZ-05": true, - "DZ-06": true, "DZ-07": true, "DZ-08": true, "DZ-09": true, "DZ-10": true, - "DZ-11": true, "DZ-12": true, "DZ-13": true, "DZ-14": true, "DZ-15": true, - "DZ-16": true, "DZ-17": true, "DZ-18": true, "DZ-19": true, "DZ-20": true, - "DZ-21": true, "DZ-22": true, "DZ-23": true, "DZ-24": true, "DZ-25": true, - "DZ-26": true, "DZ-27": true, "DZ-28": true, "DZ-29": true, "DZ-30": true, - "DZ-31": true, "DZ-32": true, "DZ-33": true, "DZ-34": true, "DZ-35": true, - "DZ-36": true, "DZ-37": true, "DZ-38": true, "DZ-39": true, "DZ-40": true, - "DZ-41": true, "DZ-42": true, "DZ-43": true, "DZ-44": true, "DZ-45": true, - "DZ-46": true, "DZ-47": true, "DZ-48": true, "DZ-49": true, "DZ-51": true, - "DZ-53": true, "DZ-55": true, "DZ-56": true, "DZ-57": true, "EC-A": true, "EC-B": true, - "EC-C": true, "EC-D": true, "EC-E": true, "EC-F": true, "EC-G": true, - "EC-H": true, "EC-I": true, "EC-L": true, "EC-M": true, "EC-N": true, - "EC-O": true, "EC-P": true, "EC-R": true, "EC-S": true, "EC-SD": true, - "EC-SE": true, "EC-T": true, "EC-U": true, "EC-W": true, "EC-X": true, - "EC-Y": true, "EC-Z": true, "EE-37": true, "EE-39": true, "EE-44": true, "EE-45": true, - "EE-49": true, "EE-50": true, "EE-51": true, "EE-52": true, "EE-56": true, "EE-57": true, - "EE-59": true, "EE-60": true, "EE-64": true, "EE-65": true, "EE-67": true, "EE-68": true, - "EE-70": true, "EE-71": true, "EE-74": true, "EE-78": true, "EE-79": true, "EE-81": true, "EE-82": true, - "EE-84": true, "EE-86": true, "EE-87": true, "EG-ALX": true, "EG-ASN": true, "EG-AST": true, - "EG-BA": true, "EG-BH": true, "EG-BNS": true, "EG-C": true, "EG-DK": true, - "EG-DT": true, "EG-FYM": true, "EG-GH": true, "EG-GZ": true, "EG-HU": true, - "EG-IS": true, "EG-JS": true, "EG-KB": true, "EG-KFS": true, "EG-KN": true, - "EG-LX": true, "EG-MN": true, "EG-MNF": true, "EG-MT": true, "EG-PTS": true, "EG-SHG": true, - "EG-SHR": true, "EG-SIN": true, "EG-SU": true, "EG-SUZ": true, "EG-WAD": true, - "ER-AN": true, "ER-DK": true, "ER-DU": true, "ER-GB": true, "ER-MA": true, - "ER-SK": true, "ES-A": true, "ES-AB": true, "ES-AL": true, "ES-AN": true, - "ES-AR": true, "ES-AS": true, "ES-AV": true, "ES-B": true, "ES-BA": true, - "ES-BI": true, "ES-BU": true, "ES-C": true, "ES-CA": true, "ES-CB": true, - "ES-CC": true, "ES-CE": true, "ES-CL": true, "ES-CM": true, "ES-CN": true, - "ES-CO": true, "ES-CR": true, "ES-CS": true, "ES-CT": true, "ES-CU": true, - "ES-EX": true, "ES-GA": true, "ES-GC": true, "ES-GI": true, "ES-GR": true, - "ES-GU": true, "ES-H": true, "ES-HU": true, "ES-IB": true, "ES-J": true, - "ES-L": true, "ES-LE": true, "ES-LO": true, "ES-LU": true, "ES-M": true, - "ES-MA": true, "ES-MC": true, "ES-MD": true, "ES-ML": true, "ES-MU": true, - "ES-NA": true, "ES-NC": true, "ES-O": true, "ES-OR": true, "ES-P": true, - "ES-PM": true, "ES-PO": true, "ES-PV": true, "ES-RI": true, "ES-S": true, - "ES-SA": true, "ES-SE": true, "ES-SG": true, "ES-SO": true, "ES-SS": true, - "ES-T": true, "ES-TE": true, "ES-TF": true, "ES-TO": true, "ES-V": true, - "ES-VA": true, "ES-VC": true, "ES-VI": true, "ES-Z": true, "ES-ZA": true, - "ET-AA": true, "ET-AF": true, "ET-AM": true, "ET-BE": true, "ET-DD": true, - "ET-GA": true, "ET-HA": true, "ET-OR": true, "ET-SN": true, "ET-SO": true, - "ET-TI": true, "FI-01": true, "FI-02": true, "FI-03": true, "FI-04": true, - "FI-05": true, "FI-06": true, "FI-07": true, "FI-08": true, "FI-09": true, - "FI-10": true, "FI-11": true, "FI-12": true, "FI-13": true, "FI-14": true, - "FI-15": true, "FI-16": true, "FI-17": true, "FI-18": true, "FI-19": true, - "FJ-C": true, "FJ-E": true, "FJ-N": true, "FJ-R": true, "FJ-W": true, - "FM-KSA": true, "FM-PNI": true, "FM-TRK": true, "FM-YAP": true, "FR-01": true, - "FR-02": true, "FR-03": true, "FR-04": true, "FR-05": true, "FR-06": true, - "FR-07": true, "FR-08": true, "FR-09": true, "FR-10": true, "FR-11": true, - "FR-12": true, "FR-13": true, "FR-14": true, "FR-15": true, "FR-16": true, - "FR-17": true, "FR-18": true, "FR-19": true, "FR-20R": true, "FR-21": true, "FR-22": true, - "FR-23": true, "FR-24": true, "FR-25": true, "FR-26": true, "FR-27": true, - "FR-28": true, "FR-29": true, "FR-2A": true, "FR-2B": true, "FR-30": true, - "FR-31": true, "FR-32": true, "FR-33": true, "FR-34": true, "FR-35": true, - "FR-36": true, "FR-37": true, "FR-38": true, "FR-39": true, "FR-40": true, - "FR-41": true, "FR-42": true, "FR-43": true, "FR-44": true, "FR-45": true, - "FR-46": true, "FR-47": true, "FR-48": true, "FR-49": true, "FR-50": true, - "FR-51": true, "FR-52": true, "FR-53": true, "FR-54": true, "FR-55": true, - "FR-56": true, "FR-57": true, "FR-58": true, "FR-59": true, "FR-60": true, - "FR-61": true, "FR-62": true, "FR-63": true, "FR-64": true, "FR-65": true, - "FR-66": true, "FR-67": true, "FR-68": true, "FR-69": true, "FR-70": true, - "FR-71": true, "FR-72": true, "FR-73": true, "FR-74": true, "FR-75": true, - "FR-76": true, "FR-77": true, "FR-78": true, "FR-79": true, "FR-80": true, - "FR-81": true, "FR-82": true, "FR-83": true, "FR-84": true, "FR-85": true, - "FR-86": true, "FR-87": true, "FR-88": true, "FR-89": true, "FR-90": true, - "FR-91": true, "FR-92": true, "FR-93": true, "FR-94": true, "FR-95": true, - "FR-ARA": true, "FR-BFC": true, "FR-BL": true, "FR-BRE": true, "FR-COR": true, - "FR-CP": true, "FR-CVL": true, "FR-GES": true, "FR-GF": true, "FR-GP": true, - "FR-GUA": true, "FR-HDF": true, "FR-IDF": true, "FR-LRE": true, "FR-MAY": true, - "FR-MF": true, "FR-MQ": true, "FR-NAQ": true, "FR-NC": true, "FR-NOR": true, - "FR-OCC": true, "FR-PAC": true, "FR-PDL": true, "FR-PF": true, "FR-PM": true, - "FR-RE": true, "FR-TF": true, "FR-WF": true, "FR-YT": true, "GA-1": true, - "GA-2": true, "GA-3": true, "GA-4": true, "GA-5": true, "GA-6": true, - "GA-7": true, "GA-8": true, "GA-9": true, "GB-ABC": true, "GB-ABD": true, - "GB-ABE": true, "GB-AGB": true, "GB-AGY": true, "GB-AND": true, "GB-ANN": true, - "GB-ANS": true, "GB-BAS": true, "GB-BBD": true, "GB-BDF": true, "GB-BDG": true, - "GB-BEN": true, "GB-BEX": true, "GB-BFS": true, "GB-BGE": true, "GB-BGW": true, - "GB-BIR": true, "GB-BKM": true, "GB-BMH": true, "GB-BNE": true, "GB-BNH": true, - "GB-BNS": true, "GB-BOL": true, "GB-BPL": true, "GB-BRC": true, "GB-BRD": true, - "GB-BRY": true, "GB-BST": true, "GB-BUR": true, "GB-CAM": true, "GB-CAY": true, - "GB-CBF": true, "GB-CCG": true, "GB-CGN": true, "GB-CHE": true, "GB-CHW": true, - "GB-CLD": true, "GB-CLK": true, "GB-CMA": true, "GB-CMD": true, "GB-CMN": true, - "GB-CON": true, "GB-COV": true, "GB-CRF": true, "GB-CRY": true, "GB-CWY": true, - "GB-DAL": true, "GB-DBY": true, "GB-DEN": true, "GB-DER": true, "GB-DEV": true, - "GB-DGY": true, "GB-DNC": true, "GB-DND": true, "GB-DOR": true, "GB-DRS": true, - "GB-DUD": true, "GB-DUR": true, "GB-EAL": true, "GB-EAW": true, "GB-EAY": true, - "GB-EDH": true, "GB-EDU": true, "GB-ELN": true, "GB-ELS": true, "GB-ENF": true, - "GB-ENG": true, "GB-ERW": true, "GB-ERY": true, "GB-ESS": true, "GB-ESX": true, - "GB-FAL": true, "GB-FIF": true, "GB-FLN": true, "GB-FMO": true, "GB-GAT": true, - "GB-GBN": true, "GB-GLG": true, "GB-GLS": true, "GB-GRE": true, "GB-GWN": true, - "GB-HAL": true, "GB-HAM": true, "GB-HAV": true, "GB-HCK": true, "GB-HEF": true, - "GB-HIL": true, "GB-HLD": true, "GB-HMF": true, "GB-HNS": true, "GB-HPL": true, - "GB-HRT": true, "GB-HRW": true, "GB-HRY": true, "GB-IOS": true, "GB-IOW": true, - "GB-ISL": true, "GB-IVC": true, "GB-KEC": true, "GB-KEN": true, "GB-KHL": true, - "GB-KIR": true, "GB-KTT": true, "GB-KWL": true, "GB-LAN": true, "GB-LBC": true, - "GB-LBH": true, "GB-LCE": true, "GB-LDS": true, "GB-LEC": true, "GB-LEW": true, - "GB-LIN": true, "GB-LIV": true, "GB-LND": true, "GB-LUT": true, "GB-MAN": true, - "GB-MDB": true, "GB-MDW": true, "GB-MEA": true, "GB-MIK": true, "GD-01": true, - "GB-MLN": true, "GB-MON": true, "GB-MRT": true, "GB-MRY": true, "GB-MTY": true, - "GB-MUL": true, "GB-NAY": true, "GB-NBL": true, "GB-NEL": true, "GB-NET": true, - "GB-NFK": true, "GB-NGM": true, "GB-NIR": true, "GB-NLK": true, "GB-NLN": true, - "GB-NMD": true, "GB-NSM": true, "GB-NTH": true, "GB-NTL": true, "GB-NTT": true, - "GB-NTY": true, "GB-NWM": true, "GB-NWP": true, "GB-NYK": true, "GB-OLD": true, - "GB-ORK": true, "GB-OXF": true, "GB-PEM": true, "GB-PKN": true, "GB-PLY": true, - "GB-POL": true, "GB-POR": true, "GB-POW": true, "GB-PTE": true, "GB-RCC": true, - "GB-RCH": true, "GB-RCT": true, "GB-RDB": true, "GB-RDG": true, "GB-RFW": true, - "GB-RIC": true, "GB-ROT": true, "GB-RUT": true, "GB-SAW": true, "GB-SAY": true, - "GB-SCB": true, "GB-SCT": true, "GB-SFK": true, "GB-SFT": true, "GB-SGC": true, - "GB-SHF": true, "GB-SHN": true, "GB-SHR": true, "GB-SKP": true, "GB-SLF": true, - "GB-SLG": true, "GB-SLK": true, "GB-SND": true, "GB-SOL": true, "GB-SOM": true, - "GB-SOS": true, "GB-SRY": true, "GB-STE": true, "GB-STG": true, "GB-STH": true, - "GB-STN": true, "GB-STS": true, "GB-STT": true, "GB-STY": true, "GB-SWA": true, - "GB-SWD": true, "GB-SWK": true, "GB-TAM": true, "GB-TFW": true, "GB-THR": true, - "GB-TOB": true, "GB-TOF": true, "GB-TRF": true, "GB-TWH": true, "GB-UKM": true, - "GB-VGL": true, "GB-WAR": true, "GB-WBK": true, "GB-WDU": true, "GB-WFT": true, - "GB-WGN": true, "GB-WIL": true, "GB-WKF": true, "GB-WLL": true, "GB-WLN": true, - "GB-WLS": true, "GB-WLV": true, "GB-WND": true, "GB-WNM": true, "GB-WOK": true, - "GB-WOR": true, "GB-WRL": true, "GB-WRT": true, "GB-WRX": true, "GB-WSM": true, - "GB-WSX": true, "GB-YOR": true, "GB-ZET": true, "GD-02": true, "GD-03": true, - "GD-04": true, "GD-05": true, "GD-06": true, "GD-10": true, "GE-AB": true, - "GE-AJ": true, "GE-GU": true, "GE-IM": true, "GE-KA": true, "GE-KK": true, - "GE-MM": true, "GE-RL": true, "GE-SJ": true, "GE-SK": true, "GE-SZ": true, - "GE-TB": true, "GH-AA": true, "GH-AH": true, "GH-AF": true, "GH-BA": true, "GH-BO": true, "GH-BE": true, "GH-CP": true, - "GH-EP": true, "GH-NP": true, "GH-TV": true, "GH-UE": true, "GH-UW": true, - "GH-WP": true, "GL-AV": true, "GL-KU": true, "GL-QA": true, "GL-QT": true, "GL-QE": true, "GL-SM": true, - "GM-B": true, "GM-L": true, "GM-M": true, "GM-N": true, "GM-U": true, - "GM-W": true, "GN-B": true, "GN-BE": true, "GN-BF": true, "GN-BK": true, - "GN-C": true, "GN-CO": true, "GN-D": true, "GN-DB": true, "GN-DI": true, - "GN-DL": true, "GN-DU": true, "GN-F": true, "GN-FA": true, "GN-FO": true, - "GN-FR": true, "GN-GA": true, "GN-GU": true, "GN-K": true, "GN-KA": true, - "GN-KB": true, "GN-KD": true, "GN-KE": true, "GN-KN": true, "GN-KO": true, - "GN-KS": true, "GN-L": true, "GN-LA": true, "GN-LE": true, "GN-LO": true, - "GN-M": true, "GN-MC": true, "GN-MD": true, "GN-ML": true, "GN-MM": true, - "GN-N": true, "GN-NZ": true, "GN-PI": true, "GN-SI": true, "GN-TE": true, - "GN-TO": true, "GN-YO": true, "GQ-AN": true, "GQ-BN": true, "GQ-BS": true, - "GQ-C": true, "GQ-CS": true, "GQ-I": true, "GQ-KN": true, "GQ-LI": true, - "GQ-WN": true, "GR-01": true, "GR-03": true, "GR-04": true, "GR-05": true, - "GR-06": true, "GR-07": true, "GR-11": true, "GR-12": true, "GR-13": true, - "GR-14": true, "GR-15": true, "GR-16": true, "GR-17": true, "GR-21": true, - "GR-22": true, "GR-23": true, "GR-24": true, "GR-31": true, "GR-32": true, - "GR-33": true, "GR-34": true, "GR-41": true, "GR-42": true, "GR-43": true, - "GR-44": true, "GR-51": true, "GR-52": true, "GR-53": true, "GR-54": true, - "GR-55": true, "GR-56": true, "GR-57": true, "GR-58": true, "GR-59": true, - "GR-61": true, "GR-62": true, "GR-63": true, "GR-64": true, "GR-69": true, - "GR-71": true, "GR-72": true, "GR-73": true, "GR-81": true, "GR-82": true, - "GR-83": true, "GR-84": true, "GR-85": true, "GR-91": true, "GR-92": true, - "GR-93": true, "GR-94": true, "GR-A": true, "GR-A1": true, "GR-B": true, - "GR-C": true, "GR-D": true, "GR-E": true, "GR-F": true, "GR-G": true, - "GR-H": true, "GR-I": true, "GR-J": true, "GR-K": true, "GR-L": true, - "GR-M": true, "GT-01": true, "GT-02": true, "GT-03": true, "GT-04": true, - "GT-05": true, "GT-06": true, "GT-07": true, "GT-08": true, "GT-09": true, - "GT-10": true, "GT-11": true, "GT-12": true, "GT-13": true, "GT-14": true, - "GT-15": true, "GT-16": true, "GT-17": true, "GT-18": true, "GT-19": true, - "GT-20": true, "GT-21": true, "GT-22": true, "GW-BA": true, "GW-BL": true, - "GW-BM": true, "GW-BS": true, "GW-CA": true, "GW-GA": true, "GW-L": true, - "GW-N": true, "GW-OI": true, "GW-QU": true, "GW-S": true, "GW-TO": true, - "GY-BA": true, "GY-CU": true, "GY-DE": true, "GY-EB": true, "GY-ES": true, - "GY-MA": true, "GY-PM": true, "GY-PT": true, "GY-UD": true, "GY-UT": true, - "HN-AT": true, "HN-CH": true, "HN-CL": true, "HN-CM": true, "HN-CP": true, - "HN-CR": true, "HN-EP": true, "HN-FM": true, "HN-GD": true, "HN-IB": true, - "HN-IN": true, "HN-LE": true, "HN-LP": true, "HN-OC": true, "HN-OL": true, - "HN-SB": true, "HN-VA": true, "HN-YO": true, "HR-01": true, "HR-02": true, - "HR-03": true, "HR-04": true, "HR-05": true, "HR-06": true, "HR-07": true, - "HR-08": true, "HR-09": true, "HR-10": true, "HR-11": true, "HR-12": true, - "HR-13": true, "HR-14": true, "HR-15": true, "HR-16": true, "HR-17": true, - "HR-18": true, "HR-19": true, "HR-20": true, "HR-21": true, "HT-AR": true, - "HT-CE": true, "HT-GA": true, "HT-ND": true, "HT-NE": true, "HT-NO": true, "HT-NI": true, - "HT-OU": true, "HT-SD": true, "HT-SE": true, "HU-BA": true, "HU-BC": true, - "HU-BE": true, "HU-BK": true, "HU-BU": true, "HU-BZ": true, "HU-CS": true, - "HU-DE": true, "HU-DU": true, "HU-EG": true, "HU-ER": true, "HU-FE": true, - "HU-GS": true, "HU-GY": true, "HU-HB": true, "HU-HE": true, "HU-HV": true, - "HU-JN": true, "HU-KE": true, "HU-KM": true, "HU-KV": true, "HU-MI": true, - "HU-NK": true, "HU-NO": true, "HU-NY": true, "HU-PE": true, "HU-PS": true, - "HU-SD": true, "HU-SF": true, "HU-SH": true, "HU-SK": true, "HU-SN": true, - "HU-SO": true, "HU-SS": true, "HU-ST": true, "HU-SZ": true, "HU-TB": true, - "HU-TO": true, "HU-VA": true, "HU-VE": true, "HU-VM": true, "HU-ZA": true, - "HU-ZE": true, "ID-AC": true, "ID-BA": true, "ID-BB": true, "ID-BE": true, - "ID-BT": true, "ID-GO": true, "ID-IJ": true, "ID-JA": true, "ID-JB": true, - "ID-JI": true, "ID-JK": true, "ID-JT": true, "ID-JW": true, "ID-KA": true, - "ID-KB": true, "ID-KI": true, "ID-KU": true, "ID-KR": true, "ID-KS": true, - "ID-KT": true, "ID-LA": true, "ID-MA": true, "ID-ML": true, "ID-MU": true, - "ID-NB": true, "ID-NT": true, "ID-NU": true, "ID-PA": true, "ID-PB": true, - "ID-PE": true, "ID-PP": true, "ID-PS": true, "ID-PT": true, "ID-RI": true, - "ID-SA": true, "ID-SB": true, "ID-SG": true, "ID-SL": true, "ID-SM": true, - "ID-SN": true, "ID-SR": true, "ID-SS": true, "ID-ST": true, "ID-SU": true, - "ID-YO": true, "IE-C": true, "IE-CE": true, "IE-CN": true, "IE-CO": true, - "IE-CW": true, "IE-D": true, "IE-DL": true, "IE-G": true, "IE-KE": true, - "IE-KK": true, "IE-KY": true, "IE-L": true, "IE-LD": true, "IE-LH": true, - "IE-LK": true, "IE-LM": true, "IE-LS": true, "IE-M": true, "IE-MH": true, - "IE-MN": true, "IE-MO": true, "IE-OY": true, "IE-RN": true, "IE-SO": true, - "IE-TA": true, "IE-U": true, "IE-WD": true, "IE-WH": true, "IE-WW": true, - "IE-WX": true, "IL-D": true, "IL-HA": true, "IL-JM": true, "IL-M": true, - "IL-TA": true, "IL-Z": true, "IN-AN": true, "IN-AP": true, "IN-AR": true, - "IN-AS": true, "IN-BR": true, "IN-CH": true, "IN-CT": true, "IN-DH": true, - "IN-DL": true, "IN-DN": true, "IN-GA": true, "IN-GJ": true, "IN-HP": true, - "IN-HR": true, "IN-JH": true, "IN-JK": true, "IN-KA": true, "IN-KL": true, - "IN-LD": true, "IN-MH": true, "IN-ML": true, "IN-MN": true, "IN-MP": true, - "IN-MZ": true, "IN-NL": true, "IN-TG": true, "IN-OR": true, "IN-PB": true, "IN-PY": true, - "IN-RJ": true, "IN-SK": true, "IN-TN": true, "IN-TR": true, "IN-UP": true, - "IN-UT": true, "IN-WB": true, "IQ-AN": true, "IQ-AR": true, "IQ-BA": true, - "IQ-BB": true, "IQ-BG": true, "IQ-DA": true, "IQ-DI": true, "IQ-DQ": true, - "IQ-KA": true, "IQ-KI": true, "IQ-MA": true, "IQ-MU": true, "IQ-NA": true, "IQ-NI": true, - "IQ-QA": true, "IQ-SD": true, "IQ-SW": true, "IQ-SU": true, "IQ-TS": true, "IQ-WA": true, - "IR-00": true, "IR-01": true, "IR-02": true, "IR-03": true, "IR-04": true, "IR-05": true, - "IR-06": true, "IR-07": true, "IR-08": true, "IR-09": true, "IR-10": true, "IR-11": true, - "IR-12": true, "IR-13": true, "IR-14": true, "IR-15": true, "IR-16": true, - "IR-17": true, "IR-18": true, "IR-19": true, "IR-20": true, "IR-21": true, - "IR-22": true, "IR-23": true, "IR-24": true, "IR-25": true, "IR-26": true, - "IR-27": true, "IR-28": true, "IR-29": true, "IR-30": true, "IR-31": true, - "IS-0": true, "IS-1": true, "IS-2": true, "IS-3": true, "IS-4": true, - "IS-5": true, "IS-6": true, "IS-7": true, "IS-8": true, "IT-21": true, - "IT-23": true, "IT-25": true, "IT-32": true, "IT-34": true, "IT-36": true, - "IT-42": true, "IT-45": true, "IT-52": true, "IT-55": true, "IT-57": true, - "IT-62": true, "IT-65": true, "IT-67": true, "IT-72": true, "IT-75": true, - "IT-77": true, "IT-78": true, "IT-82": true, "IT-88": true, "IT-AG": true, - "IT-AL": true, "IT-AN": true, "IT-AO": true, "IT-AP": true, "IT-AQ": true, - "IT-AR": true, "IT-AT": true, "IT-AV": true, "IT-BA": true, "IT-BG": true, - "IT-BI": true, "IT-BL": true, "IT-BN": true, "IT-BO": true, "IT-BR": true, - "IT-BS": true, "IT-BT": true, "IT-BZ": true, "IT-CA": true, "IT-CB": true, - "IT-CE": true, "IT-CH": true, "IT-CI": true, "IT-CL": true, "IT-CN": true, - "IT-CO": true, "IT-CR": true, "IT-CS": true, "IT-CT": true, "IT-CZ": true, - "IT-EN": true, "IT-FC": true, "IT-FE": true, "IT-FG": true, "IT-FI": true, - "IT-FM": true, "IT-FR": true, "IT-GE": true, "IT-GO": true, "IT-GR": true, - "IT-IM": true, "IT-IS": true, "IT-KR": true, "IT-LC": true, "IT-LE": true, - "IT-LI": true, "IT-LO": true, "IT-LT": true, "IT-LU": true, "IT-MB": true, - "IT-MC": true, "IT-ME": true, "IT-MI": true, "IT-MN": true, "IT-MO": true, - "IT-MS": true, "IT-MT": true, "IT-NA": true, "IT-NO": true, "IT-NU": true, - "IT-OG": true, "IT-OR": true, "IT-OT": true, "IT-PA": true, "IT-PC": true, - "IT-PD": true, "IT-PE": true, "IT-PG": true, "IT-PI": true, "IT-PN": true, - "IT-PO": true, "IT-PR": true, "IT-PT": true, "IT-PU": true, "IT-PV": true, - "IT-PZ": true, "IT-RA": true, "IT-RC": true, "IT-RE": true, "IT-RG": true, - "IT-RI": true, "IT-RM": true, "IT-RN": true, "IT-RO": true, "IT-SA": true, - "IT-SI": true, "IT-SO": true, "IT-SP": true, "IT-SR": true, "IT-SS": true, - "IT-SV": true, "IT-TA": true, "IT-TE": true, "IT-TN": true, "IT-TO": true, - "IT-TP": true, "IT-TR": true, "IT-TS": true, "IT-TV": true, "IT-UD": true, - "IT-VA": true, "IT-VB": true, "IT-VC": true, "IT-VE": true, "IT-VI": true, - "IT-VR": true, "IT-VS": true, "IT-VT": true, "IT-VV": true, "JM-01": true, - "JM-02": true, "JM-03": true, "JM-04": true, "JM-05": true, "JM-06": true, - "JM-07": true, "JM-08": true, "JM-09": true, "JM-10": true, "JM-11": true, - "JM-12": true, "JM-13": true, "JM-14": true, "JO-AJ": true, "JO-AM": true, - "JO-AQ": true, "JO-AT": true, "JO-AZ": true, "JO-BA": true, "JO-IR": true, - "JO-JA": true, "JO-KA": true, "JO-MA": true, "JO-MD": true, "JO-MN": true, - "JP-01": true, "JP-02": true, "JP-03": true, "JP-04": true, "JP-05": true, - "JP-06": true, "JP-07": true, "JP-08": true, "JP-09": true, "JP-10": true, - "JP-11": true, "JP-12": true, "JP-13": true, "JP-14": true, "JP-15": true, - "JP-16": true, "JP-17": true, "JP-18": true, "JP-19": true, "JP-20": true, - "JP-21": true, "JP-22": true, "JP-23": true, "JP-24": true, "JP-25": true, - "JP-26": true, "JP-27": true, "JP-28": true, "JP-29": true, "JP-30": true, - "JP-31": true, "JP-32": true, "JP-33": true, "JP-34": true, "JP-35": true, - "JP-36": true, "JP-37": true, "JP-38": true, "JP-39": true, "JP-40": true, - "JP-41": true, "JP-42": true, "JP-43": true, "JP-44": true, "JP-45": true, - "JP-46": true, "JP-47": true, "KE-01": true, "KE-02": true, "KE-03": true, - "KE-04": true, "KE-05": true, "KE-06": true, "KE-07": true, "KE-08": true, - "KE-09": true, "KE-10": true, "KE-11": true, "KE-12": true, "KE-13": true, - "KE-14": true, "KE-15": true, "KE-16": true, "KE-17": true, "KE-18": true, - "KE-19": true, "KE-20": true, "KE-21": true, "KE-22": true, "KE-23": true, - "KE-24": true, "KE-25": true, "KE-26": true, "KE-27": true, "KE-28": true, - "KE-29": true, "KE-30": true, "KE-31": true, "KE-32": true, "KE-33": true, - "KE-34": true, "KE-35": true, "KE-36": true, "KE-37": true, "KE-38": true, - "KE-39": true, "KE-40": true, "KE-41": true, "KE-42": true, "KE-43": true, - "KE-44": true, "KE-45": true, "KE-46": true, "KE-47": true, "KG-B": true, - "KG-C": true, "KG-GB": true, "KG-GO": true, "KG-J": true, "KG-N": true, "KG-O": true, - "KG-T": true, "KG-Y": true, "KH-1": true, "KH-10": true, "KH-11": true, - "KH-12": true, "KH-13": true, "KH-14": true, "KH-15": true, "KH-16": true, - "KH-17": true, "KH-18": true, "KH-19": true, "KH-2": true, "KH-20": true, - "KH-21": true, "KH-22": true, "KH-23": true, "KH-24": true, "KH-3": true, - "KH-4": true, "KH-5": true, "KH-6": true, "KH-7": true, "KH-8": true, - "KH-9": true, "KI-G": true, "KI-L": true, "KI-P": true, "KM-A": true, - "KM-G": true, "KM-M": true, "KN-01": true, "KN-02": true, "KN-03": true, - "KN-04": true, "KN-05": true, "KN-06": true, "KN-07": true, "KN-08": true, - "KN-09": true, "KN-10": true, "KN-11": true, "KN-12": true, "KN-13": true, - "KN-15": true, "KN-K": true, "KN-N": true, "KP-01": true, "KP-02": true, - "KP-03": true, "KP-04": true, "KP-05": true, "KP-06": true, "KP-07": true, - "KP-08": true, "KP-09": true, "KP-10": true, "KP-13": true, "KR-11": true, - "KR-26": true, "KR-27": true, "KR-28": true, "KR-29": true, "KR-30": true, - "KR-31": true, "KR-41": true, "KR-42": true, "KR-43": true, "KR-44": true, - "KR-45": true, "KR-46": true, "KR-47": true, "KR-48": true, "KR-49": true, - "KW-AH": true, "KW-FA": true, "KW-HA": true, "KW-JA": true, "KW-KU": true, - "KW-MU": true, "KZ-10": true, "KZ-75": true, "KZ-19": true, "KZ-11": true, - "KZ-15": true, "KZ-71": true, "KZ-23": true, "KZ-27": true, "KZ-47": true, - "KZ-55": true, "KZ-35": true, "KZ-39": true, "KZ-43": true, "KZ-63": true, - "KZ-79": true, "KZ-59": true, "KZ-61": true, "KZ-62": true, "KZ-31": true, - "KZ-33": true, "LA-AT": true, "LA-BK": true, "LA-BL": true, - "LA-CH": true, "LA-HO": true, "LA-KH": true, "LA-LM": true, "LA-LP": true, - "LA-OU": true, "LA-PH": true, "LA-SL": true, "LA-SV": true, "LA-VI": true, - "LA-VT": true, "LA-XA": true, "LA-XE": true, "LA-XI": true, "LA-XS": true, - "LB-AK": true, "LB-AS": true, "LB-BA": true, "LB-BH": true, "LB-BI": true, - "LB-JA": true, "LB-JL": true, "LB-NA": true, "LC-01": true, "LC-02": true, - "LC-03": true, "LC-05": true, "LC-06": true, "LC-07": true, "LC-08": true, - "LC-10": true, "LC-11": true, "LI-01": true, "LI-02": true, - "LI-03": true, "LI-04": true, "LI-05": true, "LI-06": true, "LI-07": true, - "LI-08": true, "LI-09": true, "LI-10": true, "LI-11": true, "LK-1": true, - "LK-11": true, "LK-12": true, "LK-13": true, "LK-2": true, "LK-21": true, - "LK-22": true, "LK-23": true, "LK-3": true, "LK-31": true, "LK-32": true, - "LK-33": true, "LK-4": true, "LK-41": true, "LK-42": true, "LK-43": true, - "LK-44": true, "LK-45": true, "LK-5": true, "LK-51": true, "LK-52": true, - "LK-53": true, "LK-6": true, "LK-61": true, "LK-62": true, "LK-7": true, - "LK-71": true, "LK-72": true, "LK-8": true, "LK-81": true, "LK-82": true, - "LK-9": true, "LK-91": true, "LK-92": true, "LR-BG": true, "LR-BM": true, - "LR-CM": true, "LR-GB": true, "LR-GG": true, "LR-GK": true, "LR-LO": true, - "LR-MG": true, "LR-MO": true, "LR-MY": true, "LR-NI": true, "LR-RI": true, - "LR-SI": true, "LS-A": true, "LS-B": true, "LS-C": true, "LS-D": true, - "LS-E": true, "LS-F": true, "LS-G": true, "LS-H": true, "LS-J": true, - "LS-K": true, "LT-AL": true, "LT-KL": true, "LT-KU": true, "LT-MR": true, - "LT-PN": true, "LT-SA": true, "LT-TA": true, "LT-TE": true, "LT-UT": true, - "LT-VL": true, "LU-CA": true, "LU-CL": true, "LU-DI": true, "LU-EC": true, - "LU-ES": true, "LU-GR": true, "LU-LU": true, "LU-ME": true, "LU-RD": true, - "LU-RM": true, "LU-VD": true, "LU-WI": true, "LU-D": true, "LU-G": true, "LU-L": true, - "LV-001": true, "LV-111": true, "LV-112": true, "LV-113": true, - "LV-002": true, "LV-003": true, "LV-004": true, "LV-005": true, "LV-006": true, - "LV-007": true, "LV-008": true, "LV-009": true, "LV-010": true, "LV-011": true, - "LV-012": true, "LV-013": true, "LV-014": true, "LV-015": true, "LV-016": true, - "LV-017": true, "LV-018": true, "LV-019": true, "LV-020": true, "LV-021": true, - "LV-022": true, "LV-023": true, "LV-024": true, "LV-025": true, "LV-026": true, - "LV-027": true, "LV-028": true, "LV-029": true, "LV-030": true, "LV-031": true, - "LV-032": true, "LV-033": true, "LV-034": true, "LV-035": true, "LV-036": true, - "LV-037": true, "LV-038": true, "LV-039": true, "LV-040": true, "LV-041": true, - "LV-042": true, "LV-043": true, "LV-044": true, "LV-045": true, "LV-046": true, - "LV-047": true, "LV-048": true, "LV-049": true, "LV-050": true, "LV-051": true, - "LV-052": true, "LV-053": true, "LV-054": true, "LV-055": true, "LV-056": true, - "LV-057": true, "LV-058": true, "LV-059": true, "LV-060": true, "LV-061": true, - "LV-062": true, "LV-063": true, "LV-064": true, "LV-065": true, "LV-066": true, - "LV-067": true, "LV-068": true, "LV-069": true, "LV-070": true, "LV-071": true, - "LV-072": true, "LV-073": true, "LV-074": true, "LV-075": true, "LV-076": true, - "LV-077": true, "LV-078": true, "LV-079": true, "LV-080": true, "LV-081": true, - "LV-082": true, "LV-083": true, "LV-084": true, "LV-085": true, "LV-086": true, - "LV-087": true, "LV-088": true, "LV-089": true, "LV-090": true, "LV-091": true, - "LV-092": true, "LV-093": true, "LV-094": true, "LV-095": true, "LV-096": true, - "LV-097": true, "LV-098": true, "LV-099": true, "LV-100": true, "LV-101": true, - "LV-102": true, "LV-103": true, "LV-104": true, "LV-105": true, "LV-106": true, - "LV-107": true, "LV-108": true, "LV-109": true, "LV-110": true, "LV-DGV": true, - "LV-JEL": true, "LV-JKB": true, "LV-JUR": true, "LV-LPX": true, "LV-REZ": true, - "LV-RIX": true, "LV-VEN": true, "LV-VMR": true, "LY-BA": true, "LY-BU": true, - "LY-DR": true, "LY-GT": true, "LY-JA": true, "LY-JB": true, "LY-JG": true, - "LY-JI": true, "LY-JU": true, "LY-KF": true, "LY-MB": true, "LY-MI": true, - "LY-MJ": true, "LY-MQ": true, "LY-NL": true, "LY-NQ": true, "LY-SB": true, - "LY-SR": true, "LY-TB": true, "LY-WA": true, "LY-WD": true, "LY-WS": true, - "LY-ZA": true, "MA-01": true, "MA-02": true, "MA-03": true, "MA-04": true, - "MA-05": true, "MA-06": true, "MA-07": true, "MA-08": true, "MA-09": true, - "MA-10": true, "MA-11": true, "MA-12": true, "MA-13": true, "MA-14": true, - "MA-15": true, "MA-16": true, "MA-AGD": true, "MA-AOU": true, "MA-ASZ": true, - "MA-AZI": true, "MA-BEM": true, "MA-BER": true, "MA-BES": true, "MA-BOD": true, - "MA-BOM": true, "MA-CAS": true, "MA-CHE": true, "MA-CHI": true, "MA-CHT": true, - "MA-ERR": true, "MA-ESI": true, "MA-ESM": true, "MA-FAH": true, "MA-FES": true, - "MA-FIG": true, "MA-GUE": true, "MA-HAJ": true, "MA-HAO": true, "MA-HOC": true, - "MA-IFR": true, "MA-INE": true, "MA-JDI": true, "MA-JRA": true, "MA-KEN": true, - "MA-KES": true, "MA-KHE": true, "MA-KHN": true, "MA-KHO": true, "MA-LAA": true, - "MA-LAR": true, "MA-MED": true, "MA-MEK": true, "MA-MMD": true, "MA-MMN": true, - "MA-MOH": true, "MA-MOU": true, "MA-NAD": true, "MA-NOU": true, "MA-OUA": true, - "MA-OUD": true, "MA-OUJ": true, "MA-RAB": true, "MA-SAF": true, "MA-SAL": true, - "MA-SEF": true, "MA-SET": true, "MA-SIK": true, "MA-SKH": true, "MA-SYB": true, - "MA-TAI": true, "MA-TAO": true, "MA-TAR": true, "MA-TAT": true, "MA-TAZ": true, - "MA-TET": true, "MA-TIZ": true, "MA-TNG": true, "MA-TNT": true, "MA-ZAG": true, - "MC-CL": true, "MC-CO": true, "MC-FO": true, "MC-GA": true, "MC-JE": true, - "MC-LA": true, "MC-MA": true, "MC-MC": true, "MC-MG": true, "MC-MO": true, - "MC-MU": true, "MC-PH": true, "MC-SD": true, "MC-SO": true, "MC-SP": true, - "MC-SR": true, "MC-VR": true, "MD-AN": true, "MD-BA": true, "MD-BD": true, - "MD-BR": true, "MD-BS": true, "MD-CA": true, "MD-CL": true, "MD-CM": true, - "MD-CR": true, "MD-CS": true, "MD-CT": true, "MD-CU": true, "MD-DO": true, - "MD-DR": true, "MD-DU": true, "MD-ED": true, "MD-FA": true, "MD-FL": true, - "MD-GA": true, "MD-GL": true, "MD-HI": true, "MD-IA": true, "MD-LE": true, - "MD-NI": true, "MD-OC": true, "MD-OR": true, "MD-RE": true, "MD-RI": true, - "MD-SD": true, "MD-SI": true, "MD-SN": true, "MD-SO": true, "MD-ST": true, - "MD-SV": true, "MD-TA": true, "MD-TE": true, "MD-UN": true, "ME-01": true, - "ME-02": true, "ME-03": true, "ME-04": true, "ME-05": true, "ME-06": true, - "ME-07": true, "ME-08": true, "ME-09": true, "ME-10": true, "ME-11": true, - "ME-12": true, "ME-13": true, "ME-14": true, "ME-15": true, "ME-16": true, - "ME-17": true, "ME-18": true, "ME-19": true, "ME-20": true, "ME-21": true, "ME-24": true, - "MG-A": true, "MG-D": true, "MG-F": true, "MG-M": true, "MG-T": true, - "MG-U": true, "MH-ALK": true, "MH-ALL": true, "MH-ARN": true, "MH-AUR": true, - "MH-EBO": true, "MH-ENI": true, "MH-JAB": true, "MH-JAL": true, "MH-KIL": true, - "MH-KWA": true, "MH-L": true, "MH-LAE": true, "MH-LIB": true, "MH-LIK": true, - "MH-MAJ": true, "MH-MAL": true, "MH-MEJ": true, "MH-MIL": true, "MH-NMK": true, - "MH-NMU": true, "MH-RON": true, "MH-T": true, "MH-UJA": true, "MH-UTI": true, - "MH-WTJ": true, "MH-WTN": true, "MK-101": true, "MK-102": true, "MK-103": true, - "MK-104": true, "MK-105": true, - "MK-106": true, "MK-107": true, "MK-108": true, "MK-109": true, "MK-201": true, - "MK-202": true, "MK-205": true, "MK-206": true, "MK-207": true, "MK-208": true, - "MK-209": true, "MK-210": true, "MK-211": true, "MK-301": true, "MK-303": true, - "MK-307": true, "MK-308": true, "MK-310": true, "MK-311": true, "MK-312": true, - "MK-401": true, "MK-402": true, "MK-403": true, "MK-404": true, "MK-405": true, - "MK-406": true, "MK-408": true, "MK-409": true, "MK-410": true, "MK-501": true, - "MK-502": true, "MK-503": true, "MK-505": true, "MK-506": true, "MK-507": true, - "MK-508": true, "MK-509": true, "MK-601": true, "MK-602": true, "MK-604": true, - "MK-605": true, "MK-606": true, "MK-607": true, "MK-608": true, "MK-609": true, - "MK-701": true, "MK-702": true, "MK-703": true, "MK-704": true, "MK-705": true, - "MK-803": true, "MK-804": true, "MK-806": true, "MK-807": true, "MK-809": true, - "MK-810": true, "MK-811": true, "MK-812": true, "MK-813": true, "MK-814": true, - "MK-816": true, "ML-1": true, "ML-2": true, "ML-3": true, "ML-4": true, - "ML-5": true, "ML-6": true, "ML-7": true, "ML-8": true, "ML-BKO": true, - "MM-01": true, "MM-02": true, "MM-03": true, "MM-04": true, "MM-05": true, - "MM-06": true, "MM-07": true, "MM-11": true, "MM-12": true, "MM-13": true, - "MM-14": true, "MM-15": true, "MM-16": true, "MM-17": true, "MM-18": true, "MN-035": true, - "MN-037": true, "MN-039": true, "MN-041": true, "MN-043": true, "MN-046": true, - "MN-047": true, "MN-049": true, "MN-051": true, "MN-053": true, "MN-055": true, - "MN-057": true, "MN-059": true, "MN-061": true, "MN-063": true, "MN-064": true, - "MN-065": true, "MN-067": true, "MN-069": true, "MN-071": true, "MN-073": true, - "MN-1": true, "MR-01": true, "MR-02": true, "MR-03": true, "MR-04": true, - "MR-05": true, "MR-06": true, "MR-07": true, "MR-08": true, "MR-09": true, - "MR-10": true, "MR-11": true, "MR-12": true, "MR-13": true, "MR-NKC": true, "MT-01": true, - "MT-02": true, "MT-03": true, "MT-04": true, "MT-05": true, "MT-06": true, - "MT-07": true, "MT-08": true, "MT-09": true, "MT-10": true, "MT-11": true, - "MT-12": true, "MT-13": true, "MT-14": true, "MT-15": true, "MT-16": true, - "MT-17": true, "MT-18": true, "MT-19": true, "MT-20": true, "MT-21": true, - "MT-22": true, "MT-23": true, "MT-24": true, "MT-25": true, "MT-26": true, - "MT-27": true, "MT-28": true, "MT-29": true, "MT-30": true, "MT-31": true, - "MT-32": true, "MT-33": true, "MT-34": true, "MT-35": true, "MT-36": true, - "MT-37": true, "MT-38": true, "MT-39": true, "MT-40": true, "MT-41": true, - "MT-42": true, "MT-43": true, "MT-44": true, "MT-45": true, "MT-46": true, - "MT-47": true, "MT-48": true, "MT-49": true, "MT-50": true, "MT-51": true, - "MT-52": true, "MT-53": true, "MT-54": true, "MT-55": true, "MT-56": true, - "MT-57": true, "MT-58": true, "MT-59": true, "MT-60": true, "MT-61": true, - "MT-62": true, "MT-63": true, "MT-64": true, "MT-65": true, "MT-66": true, - "MT-67": true, "MT-68": true, "MU-AG": true, "MU-BL": true, "MU-BR": true, - "MU-CC": true, "MU-CU": true, "MU-FL": true, "MU-GP": true, "MU-MO": true, - "MU-PA": true, "MU-PL": true, "MU-PU": true, "MU-PW": true, "MU-QB": true, - "MU-RO": true, "MU-RP": true, "MU-RR": true, "MU-SA": true, "MU-VP": true, "MV-00": true, - "MV-01": true, "MV-02": true, "MV-03": true, "MV-04": true, "MV-05": true, - "MV-07": true, "MV-08": true, "MV-12": true, "MV-13": true, "MV-14": true, - "MV-17": true, "MV-20": true, "MV-23": true, "MV-24": true, "MV-25": true, - "MV-26": true, "MV-27": true, "MV-28": true, "MV-29": true, "MV-CE": true, - "MV-MLE": true, "MV-NC": true, "MV-NO": true, "MV-SC": true, "MV-SU": true, - "MV-UN": true, "MV-US": true, "MW-BA": true, "MW-BL": true, "MW-C": true, - "MW-CK": true, "MW-CR": true, "MW-CT": true, "MW-DE": true, "MW-DO": true, - "MW-KR": true, "MW-KS": true, "MW-LI": true, "MW-LK": true, "MW-MC": true, - "MW-MG": true, "MW-MH": true, "MW-MU": true, "MW-MW": true, "MW-MZ": true, - "MW-N": true, "MW-NB": true, "MW-NE": true, "MW-NI": true, "MW-NK": true, - "MW-NS": true, "MW-NU": true, "MW-PH": true, "MW-RU": true, "MW-S": true, - "MW-SA": true, "MW-TH": true, "MW-ZO": true, "MX-AGU": true, "MX-BCN": true, - "MX-BCS": true, "MX-CAM": true, "MX-CHH": true, "MX-CHP": true, "MX-COA": true, - "MX-COL": true, "MX-CMX": true, "MX-DIF": true, "MX-DUR": true, "MX-GRO": true, "MX-GUA": true, - "MX-HID": true, "MX-JAL": true, "MX-MEX": true, "MX-MIC": true, "MX-MOR": true, - "MX-NAY": true, "MX-NLE": true, "MX-OAX": true, "MX-PUE": true, "MX-QUE": true, - "MX-ROO": true, "MX-SIN": true, "MX-SLP": true, "MX-SON": true, "MX-TAB": true, - "MX-TAM": true, "MX-TLA": true, "MX-VER": true, "MX-YUC": true, "MX-ZAC": true, - "MY-01": true, "MY-02": true, "MY-03": true, "MY-04": true, "MY-05": true, - "MY-06": true, "MY-07": true, "MY-08": true, "MY-09": true, "MY-10": true, - "MY-11": true, "MY-12": true, "MY-13": true, "MY-14": true, "MY-15": true, - "MY-16": true, "MZ-A": true, "MZ-B": true, "MZ-G": true, "MZ-I": true, - "MZ-L": true, "MZ-MPM": true, "MZ-N": true, "MZ-P": true, "MZ-Q": true, - "MZ-S": true, "MZ-T": true, "NA-CA": true, "NA-ER": true, "NA-HA": true, - "NA-KA": true, "NA-KE": true, "NA-KH": true, "NA-KU": true, "NA-KW": true, "NA-OD": true, "NA-OH": true, - "NA-OK": true, "NA-ON": true, "NA-OS": true, "NA-OT": true, "NA-OW": true, - "NE-1": true, "NE-2": true, "NE-3": true, "NE-4": true, "NE-5": true, - "NE-6": true, "NE-7": true, "NE-8": true, "NG-AB": true, "NG-AD": true, - "NG-AK": true, "NG-AN": true, "NG-BA": true, "NG-BE": true, "NG-BO": true, - "NG-BY": true, "NG-CR": true, "NG-DE": true, "NG-EB": true, "NG-ED": true, - "NG-EK": true, "NG-EN": true, "NG-FC": true, "NG-GO": true, "NG-IM": true, - "NG-JI": true, "NG-KD": true, "NG-KE": true, "NG-KN": true, "NG-KO": true, - "NG-KT": true, "NG-KW": true, "NG-LA": true, "NG-NA": true, "NG-NI": true, - "NG-OG": true, "NG-ON": true, "NG-OS": true, "NG-OY": true, "NG-PL": true, - "NG-RI": true, "NG-SO": true, "NG-TA": true, "NG-YO": true, "NG-ZA": true, - "NI-AN": true, "NI-AS": true, "NI-BO": true, "NI-CA": true, "NI-CI": true, - "NI-CO": true, "NI-ES": true, "NI-GR": true, "NI-JI": true, "NI-LE": true, - "NI-MD": true, "NI-MN": true, "NI-MS": true, "NI-MT": true, "NI-NS": true, - "NI-RI": true, "NI-SJ": true, "NL-AW": true, "NL-BQ1": true, "NL-BQ2": true, - "NL-BQ3": true, "NL-CW": true, "NL-DR": true, "NL-FL": true, "NL-FR": true, - "NL-GE": true, "NL-GR": true, "NL-LI": true, "NL-NB": true, "NL-NH": true, - "NL-OV": true, "NL-SX": true, "NL-UT": true, "NL-ZE": true, "NL-ZH": true, - "NO-03": true, "NO-11": true, "NO-15": true, "NO-16": true, "NO-17": true, - "NO-18": true, "NO-21": true, "NO-30": true, "NO-34": true, "NO-38": true, - "NO-42": true, "NO-46": true, "NO-50": true, "NO-54": true, - "NO-22": true, "NP-1": true, "NP-2": true, "NP-3": true, "NP-4": true, - "NP-5": true, "NP-BA": true, "NP-BH": true, "NP-DH": true, "NP-GA": true, - "NP-JA": true, "NP-KA": true, "NP-KO": true, "NP-LU": true, "NP-MA": true, - "NP-ME": true, "NP-NA": true, "NP-RA": true, "NP-SA": true, "NP-SE": true, - "NR-01": true, "NR-02": true, "NR-03": true, "NR-04": true, "NR-05": true, - "NR-06": true, "NR-07": true, "NR-08": true, "NR-09": true, "NR-10": true, - "NR-11": true, "NR-12": true, "NR-13": true, "NR-14": true, "NZ-AUK": true, - "NZ-BOP": true, "NZ-CAN": true, "NZ-CIT": true, "NZ-GIS": true, "NZ-HKB": true, - "NZ-MBH": true, "NZ-MWT": true, "NZ-N": true, "NZ-NSN": true, "NZ-NTL": true, - "NZ-OTA": true, "NZ-S": true, "NZ-STL": true, "NZ-TAS": true, "NZ-TKI": true, - "NZ-WGN": true, "NZ-WKO": true, "NZ-WTC": true, "OM-BA": true, "OM-BS": true, "OM-BU": true, "OM-BJ": true, - "OM-DA": true, "OM-MA": true, "OM-MU": true, "OM-SH": true, "OM-SJ": true, "OM-SS": true, "OM-WU": true, - "OM-ZA": true, "OM-ZU": true, "PA-1": true, "PA-2": true, "PA-3": true, - "PA-4": true, "PA-5": true, "PA-6": true, "PA-7": true, "PA-8": true, - "PA-9": true, "PA-EM": true, "PA-KY": true, "PA-NB": true, "PE-AMA": true, - "PE-ANC": true, "PE-APU": true, "PE-ARE": true, "PE-AYA": true, "PE-CAJ": true, - "PE-CAL": true, "PE-CUS": true, "PE-HUC": true, "PE-HUV": true, "PE-ICA": true, - "PE-JUN": true, "PE-LAL": true, "PE-LAM": true, "PE-LIM": true, "PE-LMA": true, - "PE-LOR": true, "PE-MDD": true, "PE-MOQ": true, "PE-PAS": true, "PE-PIU": true, - "PE-PUN": true, "PE-SAM": true, "PE-TAC": true, "PE-TUM": true, "PE-UCA": true, - "PG-CPK": true, "PG-CPM": true, "PG-EBR": true, "PG-EHG": true, "PG-EPW": true, - "PG-ESW": true, "PG-GPK": true, "PG-MBA": true, "PG-MPL": true, "PG-MPM": true, - "PG-MRL": true, "PG-NCD": true, "PG-NIK": true, "PG-NPP": true, "PG-NSB": true, - "PG-SAN": true, "PG-SHM": true, "PG-WBK": true, "PG-WHM": true, "PG-WPD": true, - "PH-00": true, "PH-01": true, "PH-02": true, "PH-03": true, "PH-05": true, - "PH-06": true, "PH-07": true, "PH-08": true, "PH-09": true, "PH-10": true, - "PH-11": true, "PH-12": true, "PH-13": true, "PH-14": true, "PH-15": true, - "PH-40": true, "PH-41": true, "PH-ABR": true, "PH-AGN": true, "PH-AGS": true, - "PH-AKL": true, "PH-ALB": true, "PH-ANT": true, "PH-APA": true, "PH-AUR": true, - "PH-BAN": true, "PH-BAS": true, "PH-BEN": true, "PH-BIL": true, "PH-BOH": true, - "PH-BTG": true, "PH-BTN": true, "PH-BUK": true, "PH-BUL": true, "PH-CAG": true, - "PH-CAM": true, "PH-CAN": true, "PH-CAP": true, "PH-CAS": true, "PH-CAT": true, - "PH-CAV": true, "PH-CEB": true, "PH-COM": true, "PH-DAO": true, "PH-DAS": true, - "PH-DAV": true, "PH-DIN": true, "PH-EAS": true, "PH-GUI": true, "PH-IFU": true, - "PH-ILI": true, "PH-ILN": true, "PH-ILS": true, "PH-ISA": true, "PH-KAL": true, - "PH-LAG": true, "PH-LAN": true, "PH-LAS": true, "PH-LEY": true, "PH-LUN": true, - "PH-MAD": true, "PH-MAG": true, "PH-MAS": true, "PH-MDC": true, "PH-MDR": true, - "PH-MOU": true, "PH-MSC": true, "PH-MSR": true, "PH-NCO": true, "PH-NEC": true, - "PH-NER": true, "PH-NSA": true, "PH-NUE": true, "PH-NUV": true, "PH-PAM": true, - "PH-PAN": true, "PH-PLW": true, "PH-QUE": true, "PH-QUI": true, "PH-RIZ": true, - "PH-ROM": true, "PH-SAR": true, "PH-SCO": true, "PH-SIG": true, "PH-SLE": true, - "PH-SLU": true, "PH-SOR": true, "PH-SUK": true, "PH-SUN": true, "PH-SUR": true, - "PH-TAR": true, "PH-TAW": true, "PH-WSA": true, "PH-ZAN": true, "PH-ZAS": true, - "PH-ZMB": true, "PH-ZSI": true, "PK-BA": true, "PK-GB": true, "PK-IS": true, - "PK-JK": true, "PK-KP": true, "PK-PB": true, "PK-SD": true, "PK-TA": true, - "PL-02": true, "PL-04": true, "PL-06": true, "PL-08": true, "PL-10": true, - "PL-12": true, "PL-14": true, "PL-16": true, "PL-18": true, "PL-20": true, - "PL-22": true, "PL-24": true, "PL-26": true, "PL-28": true, "PL-30": true, "PL-32": true, - "PS-BTH": true, "PS-DEB": true, "PS-GZA": true, "PS-HBN": true, - "PS-JEM": true, "PS-JEN": true, "PS-JRH": true, "PS-KYS": true, "PS-NBS": true, - "PS-NGZ": true, "PS-QQA": true, "PS-RBH": true, "PS-RFH": true, "PS-SLT": true, - "PS-TBS": true, "PS-TKM": true, "PT-01": true, "PT-02": true, "PT-03": true, - "PT-04": true, "PT-05": true, "PT-06": true, "PT-07": true, "PT-08": true, - "PT-09": true, "PT-10": true, "PT-11": true, "PT-12": true, "PT-13": true, - "PT-14": true, "PT-15": true, "PT-16": true, "PT-17": true, "PT-18": true, - "PT-20": true, "PT-30": true, "PW-002": true, "PW-004": true, "PW-010": true, - "PW-050": true, "PW-100": true, "PW-150": true, "PW-212": true, "PW-214": true, - "PW-218": true, "PW-222": true, "PW-224": true, "PW-226": true, "PW-227": true, - "PW-228": true, "PW-350": true, "PW-370": true, "PY-1": true, "PY-10": true, - "PY-11": true, "PY-12": true, "PY-13": true, "PY-14": true, "PY-15": true, - "PY-16": true, "PY-19": true, "PY-2": true, "PY-3": true, "PY-4": true, - "PY-5": true, "PY-6": true, "PY-7": true, "PY-8": true, "PY-9": true, - "PY-ASU": true, "QA-DA": true, "QA-KH": true, "QA-MS": true, "QA-RA": true, - "QA-US": true, "QA-WA": true, "QA-ZA": true, "RO-AB": true, "RO-AG": true, - "RO-AR": true, "RO-B": true, "RO-BC": true, "RO-BH": true, "RO-BN": true, - "RO-BR": true, "RO-BT": true, "RO-BV": true, "RO-BZ": true, "RO-CJ": true, - "RO-CL": true, "RO-CS": true, "RO-CT": true, "RO-CV": true, "RO-DB": true, - "RO-DJ": true, "RO-GJ": true, "RO-GL": true, "RO-GR": true, "RO-HD": true, - "RO-HR": true, "RO-IF": true, "RO-IL": true, "RO-IS": true, "RO-MH": true, - "RO-MM": true, "RO-MS": true, "RO-NT": true, "RO-OT": true, "RO-PH": true, - "RO-SB": true, "RO-SJ": true, "RO-SM": true, "RO-SV": true, "RO-TL": true, - "RO-TM": true, "RO-TR": true, "RO-VL": true, "RO-VN": true, "RO-VS": true, - "RS-00": true, "RS-01": true, "RS-02": true, "RS-03": true, "RS-04": true, - "RS-05": true, "RS-06": true, "RS-07": true, "RS-08": true, "RS-09": true, - "RS-10": true, "RS-11": true, "RS-12": true, "RS-13": true, "RS-14": true, - "RS-15": true, "RS-16": true, "RS-17": true, "RS-18": true, "RS-19": true, - "RS-20": true, "RS-21": true, "RS-22": true, "RS-23": true, "RS-24": true, - "RS-25": true, "RS-26": true, "RS-27": true, "RS-28": true, "RS-29": true, - "RS-KM": true, "RS-VO": true, "RU-AD": true, "RU-AL": true, "RU-ALT": true, - "RU-AMU": true, "RU-ARK": true, "RU-AST": true, "RU-BA": true, "RU-BEL": true, - "RU-BRY": true, "RU-BU": true, "RU-CE": true, "RU-CHE": true, "RU-CHU": true, - "RU-CU": true, "RU-DA": true, "RU-IN": true, "RU-IRK": true, "RU-IVA": true, - "RU-KAM": true, "RU-KB": true, "RU-KC": true, "RU-KDA": true, "RU-KEM": true, - "RU-KGD": true, "RU-KGN": true, "RU-KHA": true, "RU-KHM": true, "RU-KIR": true, - "RU-KK": true, "RU-KL": true, "RU-KLU": true, "RU-KO": true, "RU-KOS": true, - "RU-KR": true, "RU-KRS": true, "RU-KYA": true, "RU-LEN": true, "RU-LIP": true, - "RU-MAG": true, "RU-ME": true, "RU-MO": true, "RU-MOS": true, "RU-MOW": true, - "RU-MUR": true, "RU-NEN": true, "RU-NGR": true, "RU-NIZ": true, "RU-NVS": true, - "RU-OMS": true, "RU-ORE": true, "RU-ORL": true, "RU-PER": true, "RU-PNZ": true, - "RU-PRI": true, "RU-PSK": true, "RU-ROS": true, "RU-RYA": true, "RU-SA": true, - "RU-SAK": true, "RU-SAM": true, "RU-SAR": true, "RU-SE": true, "RU-SMO": true, - "RU-SPE": true, "RU-STA": true, "RU-SVE": true, "RU-TA": true, "RU-TAM": true, - "RU-TOM": true, "RU-TUL": true, "RU-TVE": true, "RU-TY": true, "RU-TYU": true, - "RU-UD": true, "RU-ULY": true, "RU-VGG": true, "RU-VLA": true, "RU-VLG": true, - "RU-VOR": true, "RU-YAN": true, "RU-YAR": true, "RU-YEV": true, "RU-ZAB": true, - "RW-01": true, "RW-02": true, "RW-03": true, "RW-04": true, "RW-05": true, - "SA-01": true, "SA-02": true, "SA-03": true, "SA-04": true, "SA-05": true, - "SA-06": true, "SA-07": true, "SA-08": true, "SA-09": true, "SA-10": true, - "SA-11": true, "SA-12": true, "SA-14": true, "SB-CE": true, "SB-CH": true, - "SB-CT": true, "SB-GU": true, "SB-IS": true, "SB-MK": true, "SB-ML": true, - "SB-RB": true, "SB-TE": true, "SB-WE": true, "SC-01": true, "SC-02": true, - "SC-03": true, "SC-04": true, "SC-05": true, "SC-06": true, "SC-07": true, - "SC-08": true, "SC-09": true, "SC-10": true, "SC-11": true, "SC-12": true, - "SC-13": true, "SC-14": true, "SC-15": true, "SC-16": true, "SC-17": true, - "SC-18": true, "SC-19": true, "SC-20": true, "SC-21": true, "SC-22": true, - "SC-23": true, "SC-24": true, "SC-25": true, "SD-DC": true, "SD-DE": true, - "SD-DN": true, "SD-DS": true, "SD-DW": true, "SD-GD": true, "SD-GK": true, "SD-GZ": true, - "SD-KA": true, "SD-KH": true, "SD-KN": true, "SD-KS": true, "SD-NB": true, - "SD-NO": true, "SD-NR": true, "SD-NW": true, "SD-RS": true, "SD-SI": true, - "SE-AB": true, "SE-AC": true, "SE-BD": true, "SE-C": true, "SE-D": true, - "SE-E": true, "SE-F": true, "SE-G": true, "SE-H": true, "SE-I": true, - "SE-K": true, "SE-M": true, "SE-N": true, "SE-O": true, "SE-S": true, - "SE-T": true, "SE-U": true, "SE-W": true, "SE-X": true, "SE-Y": true, - "SE-Z": true, "SG-01": true, "SG-02": true, "SG-03": true, "SG-04": true, - "SG-05": true, "SH-AC": true, "SH-HL": true, "SH-TA": true, "SI-001": true, - "SI-002": true, "SI-003": true, "SI-004": true, "SI-005": true, "SI-006": true, - "SI-007": true, "SI-008": true, "SI-009": true, "SI-010": true, "SI-011": true, - "SI-012": true, "SI-013": true, "SI-014": true, "SI-015": true, "SI-016": true, - "SI-017": true, "SI-018": true, "SI-019": true, "SI-020": true, "SI-021": true, - "SI-022": true, "SI-023": true, "SI-024": true, "SI-025": true, "SI-026": true, - "SI-027": true, "SI-028": true, "SI-029": true, "SI-030": true, "SI-031": true, - "SI-032": true, "SI-033": true, "SI-034": true, "SI-035": true, "SI-036": true, - "SI-037": true, "SI-038": true, "SI-039": true, "SI-040": true, "SI-041": true, - "SI-042": true, "SI-043": true, "SI-044": true, "SI-045": true, "SI-046": true, - "SI-047": true, "SI-048": true, "SI-049": true, "SI-050": true, "SI-051": true, - "SI-052": true, "SI-053": true, "SI-054": true, "SI-055": true, "SI-056": true, - "SI-057": true, "SI-058": true, "SI-059": true, "SI-060": true, "SI-061": true, - "SI-062": true, "SI-063": true, "SI-064": true, "SI-065": true, "SI-066": true, - "SI-067": true, "SI-068": true, "SI-069": true, "SI-070": true, "SI-071": true, - "SI-072": true, "SI-073": true, "SI-074": true, "SI-075": true, "SI-076": true, - "SI-077": true, "SI-078": true, "SI-079": true, "SI-080": true, "SI-081": true, - "SI-082": true, "SI-083": true, "SI-084": true, "SI-085": true, "SI-086": true, - "SI-087": true, "SI-088": true, "SI-089": true, "SI-090": true, "SI-091": true, - "SI-092": true, "SI-093": true, "SI-094": true, "SI-095": true, "SI-096": true, - "SI-097": true, "SI-098": true, "SI-099": true, "SI-100": true, "SI-101": true, - "SI-102": true, "SI-103": true, "SI-104": true, "SI-105": true, "SI-106": true, - "SI-107": true, "SI-108": true, "SI-109": true, "SI-110": true, "SI-111": true, - "SI-112": true, "SI-113": true, "SI-114": true, "SI-115": true, "SI-116": true, - "SI-117": true, "SI-118": true, "SI-119": true, "SI-120": true, "SI-121": true, - "SI-122": true, "SI-123": true, "SI-124": true, "SI-125": true, "SI-126": true, - "SI-127": true, "SI-128": true, "SI-129": true, "SI-130": true, "SI-131": true, - "SI-132": true, "SI-133": true, "SI-134": true, "SI-135": true, "SI-136": true, - "SI-137": true, "SI-138": true, "SI-139": true, "SI-140": true, "SI-141": true, - "SI-142": true, "SI-143": true, "SI-144": true, "SI-146": true, "SI-147": true, - "SI-148": true, "SI-149": true, "SI-150": true, "SI-151": true, "SI-152": true, - "SI-153": true, "SI-154": true, "SI-155": true, "SI-156": true, "SI-157": true, - "SI-158": true, "SI-159": true, "SI-160": true, "SI-161": true, "SI-162": true, - "SI-163": true, "SI-164": true, "SI-165": true, "SI-166": true, "SI-167": true, - "SI-168": true, "SI-169": true, "SI-170": true, "SI-171": true, "SI-172": true, - "SI-173": true, "SI-174": true, "SI-175": true, "SI-176": true, "SI-177": true, - "SI-178": true, "SI-179": true, "SI-180": true, "SI-181": true, "SI-182": true, - "SI-183": true, "SI-184": true, "SI-185": true, "SI-186": true, "SI-187": true, - "SI-188": true, "SI-189": true, "SI-190": true, "SI-191": true, "SI-192": true, - "SI-193": true, "SI-194": true, "SI-195": true, "SI-196": true, "SI-197": true, - "SI-198": true, "SI-199": true, "SI-200": true, "SI-201": true, "SI-202": true, - "SI-203": true, "SI-204": true, "SI-205": true, "SI-206": true, "SI-207": true, - "SI-208": true, "SI-209": true, "SI-210": true, "SI-211": true, "SI-212": true, "SI-213": true, "SK-BC": true, - "SK-BL": true, "SK-KI": true, "SK-NI": true, "SK-PV": true, "SK-TA": true, - "SK-TC": true, "SK-ZI": true, "SL-E": true, "SL-N": true, "SL-S": true, - "SL-W": true, "SM-01": true, "SM-02": true, "SM-03": true, "SM-04": true, - "SM-05": true, "SM-06": true, "SM-07": true, "SM-08": true, "SM-09": true, - "SN-DB": true, "SN-DK": true, "SN-FK": true, "SN-KA": true, "SN-KD": true, - "SN-KE": true, "SN-KL": true, "SN-LG": true, "SN-MT": true, "SN-SE": true, - "SN-SL": true, "SN-TC": true, "SN-TH": true, "SN-ZG": true, "SO-AW": true, - "SO-BK": true, "SO-BN": true, "SO-BR": true, "SO-BY": true, "SO-GA": true, - "SO-GE": true, "SO-HI": true, "SO-JD": true, "SO-JH": true, "SO-MU": true, - "SO-NU": true, "SO-SA": true, "SO-SD": true, "SO-SH": true, "SO-SO": true, - "SO-TO": true, "SO-WO": true, "SR-BR": true, "SR-CM": true, "SR-CR": true, - "SR-MA": true, "SR-NI": true, "SR-PM": true, "SR-PR": true, "SR-SA": true, - "SR-SI": true, "SR-WA": true, "SS-BN": true, "SS-BW": true, "SS-EC": true, - "SS-EE8": true, "SS-EE": true, "SS-EW": true, "SS-JG": true, "SS-LK": true, "SS-NU": true, - "SS-UY": true, "SS-WR": true, "ST-01": true, "ST-P": true, "ST-S": true, "SV-AH": true, - "SV-CA": true, "SV-CH": true, "SV-CU": true, "SV-LI": true, "SV-MO": true, - "SV-PA": true, "SV-SA": true, "SV-SM": true, "SV-SO": true, "SV-SS": true, - "SV-SV": true, "SV-UN": true, "SV-US": true, "SY-DI": true, "SY-DR": true, - "SY-DY": true, "SY-HA": true, "SY-HI": true, "SY-HL": true, "SY-HM": true, - "SY-ID": true, "SY-LA": true, "SY-QU": true, "SY-RA": true, "SY-RD": true, - "SY-SU": true, "SY-TA": true, "SZ-HH": true, "SZ-LU": true, "SZ-MA": true, - "SZ-SH": true, "TD-BA": true, "TD-BG": true, "TD-BO": true, "TD-CB": true, - "TD-EN": true, "TD-GR": true, "TD-HL": true, "TD-KA": true, "TD-LC": true, - "TD-LO": true, "TD-LR": true, "TD-MA": true, "TD-MC": true, "TD-ME": true, - "TD-MO": true, "TD-ND": true, "TD-OD": true, "TD-SA": true, "TD-SI": true, - "TD-TA": true, "TD-TI": true, "TD-WF": true, "TG-C": true, "TG-K": true, - "TG-M": true, "TG-P": true, "TG-S": true, "TH-10": true, "TH-11": true, - "TH-12": true, "TH-13": true, "TH-14": true, "TH-15": true, "TH-16": true, - "TH-17": true, "TH-18": true, "TH-19": true, "TH-20": true, "TH-21": true, - "TH-22": true, "TH-23": true, "TH-24": true, "TH-25": true, "TH-26": true, - "TH-27": true, "TH-30": true, "TH-31": true, "TH-32": true, "TH-33": true, - "TH-34": true, "TH-35": true, "TH-36": true, "TH-37": true, "TH-38": true, "TH-39": true, - "TH-40": true, "TH-41": true, "TH-42": true, "TH-43": true, "TH-44": true, - "TH-45": true, "TH-46": true, "TH-47": true, "TH-48": true, "TH-49": true, - "TH-50": true, "TH-51": true, "TH-52": true, "TH-53": true, "TH-54": true, - "TH-55": true, "TH-56": true, "TH-57": true, "TH-58": true, "TH-60": true, - "TH-61": true, "TH-62": true, "TH-63": true, "TH-64": true, "TH-65": true, - "TH-66": true, "TH-67": true, "TH-70": true, "TH-71": true, "TH-72": true, - "TH-73": true, "TH-74": true, "TH-75": true, "TH-76": true, "TH-77": true, - "TH-80": true, "TH-81": true, "TH-82": true, "TH-83": true, "TH-84": true, - "TH-85": true, "TH-86": true, "TH-90": true, "TH-91": true, "TH-92": true, - "TH-93": true, "TH-94": true, "TH-95": true, "TH-96": true, "TH-S": true, - "TJ-GB": true, "TJ-KT": true, "TJ-SU": true, "TJ-DU": true, "TJ-RA": true, "TL-AL": true, "TL-AN": true, - "TL-BA": true, "TL-BO": true, "TL-CO": true, "TL-DI": true, "TL-ER": true, - "TL-LA": true, "TL-LI": true, "TL-MF": true, "TL-MT": true, "TL-OE": true, - "TL-VI": true, "TM-A": true, "TM-B": true, "TM-D": true, "TM-L": true, - "TM-M": true, "TM-S": true, "TN-11": true, "TN-12": true, "TN-13": true, - "TN-14": true, "TN-21": true, "TN-22": true, "TN-23": true, "TN-31": true, - "TN-32": true, "TN-33": true, "TN-34": true, "TN-41": true, "TN-42": true, - "TN-43": true, "TN-51": true, "TN-52": true, "TN-53": true, "TN-61": true, - "TN-71": true, "TN-72": true, "TN-73": true, "TN-81": true, "TN-82": true, - "TN-83": true, "TO-01": true, "TO-02": true, "TO-03": true, "TO-04": true, - "TO-05": true, "TR-01": true, "TR-02": true, "TR-03": true, "TR-04": true, - "TR-05": true, "TR-06": true, "TR-07": true, "TR-08": true, "TR-09": true, - "TR-10": true, "TR-11": true, "TR-12": true, "TR-13": true, "TR-14": true, - "TR-15": true, "TR-16": true, "TR-17": true, "TR-18": true, "TR-19": true, - "TR-20": true, "TR-21": true, "TR-22": true, "TR-23": true, "TR-24": true, - "TR-25": true, "TR-26": true, "TR-27": true, "TR-28": true, "TR-29": true, - "TR-30": true, "TR-31": true, "TR-32": true, "TR-33": true, "TR-34": true, - "TR-35": true, "TR-36": true, "TR-37": true, "TR-38": true, "TR-39": true, - "TR-40": true, "TR-41": true, "TR-42": true, "TR-43": true, "TR-44": true, - "TR-45": true, "TR-46": true, "TR-47": true, "TR-48": true, "TR-49": true, - "TR-50": true, "TR-51": true, "TR-52": true, "TR-53": true, "TR-54": true, - "TR-55": true, "TR-56": true, "TR-57": true, "TR-58": true, "TR-59": true, - "TR-60": true, "TR-61": true, "TR-62": true, "TR-63": true, "TR-64": true, - "TR-65": true, "TR-66": true, "TR-67": true, "TR-68": true, "TR-69": true, - "TR-70": true, "TR-71": true, "TR-72": true, "TR-73": true, "TR-74": true, - "TR-75": true, "TR-76": true, "TR-77": true, "TR-78": true, "TR-79": true, - "TR-80": true, "TR-81": true, "TT-ARI": true, "TT-CHA": true, "TT-CTT": true, - "TT-DMN": true, "TT-ETO": true, "TT-MRC": true, "TT-TOB": true, "TT-PED": true, "TT-POS": true, "TT-PRT": true, - "TT-PTF": true, "TT-RCM": true, "TT-SFO": true, "TT-SGE": true, "TT-SIP": true, - "TT-SJL": true, "TT-TUP": true, "TT-WTO": true, "TV-FUN": true, "TV-NIT": true, - "TV-NKF": true, "TV-NKL": true, "TV-NMA": true, "TV-NMG": true, "TV-NUI": true, - "TV-VAI": true, "TW-CHA": true, "TW-CYI": true, "TW-CYQ": true, "TW-KIN": true, "TW-HSQ": true, - "TW-HSZ": true, "TW-HUA": true, "TW-LIE": true, "TW-ILA": true, "TW-KEE": true, "TW-KHH": true, - "TW-KHQ": true, "TW-MIA": true, "TW-NAN": true, "TW-NWT": true, "TW-PEN": true, "TW-PIF": true, - "TW-TAO": true, "TW-TNN": true, "TW-TNQ": true, "TW-TPE": true, "TW-TPQ": true, - "TW-TTT": true, "TW-TXG": true, "TW-TXQ": true, "TW-YUN": true, "TZ-01": true, - "TZ-02": true, "TZ-03": true, "TZ-04": true, "TZ-05": true, "TZ-06": true, - "TZ-07": true, "TZ-08": true, "TZ-09": true, "TZ-10": true, "TZ-11": true, - "TZ-12": true, "TZ-13": true, "TZ-14": true, "TZ-15": true, "TZ-16": true, - "TZ-17": true, "TZ-18": true, "TZ-19": true, "TZ-20": true, "TZ-21": true, - "TZ-22": true, "TZ-23": true, "TZ-24": true, "TZ-25": true, "TZ-26": true, "TZ-27": true, "TZ-28": true, "TZ-29": true, "TZ-30": true, "TZ-31": true, - "UA-05": true, "UA-07": true, "UA-09": true, "UA-12": true, "UA-14": true, - "UA-18": true, "UA-21": true, "UA-23": true, "UA-26": true, "UA-30": true, - "UA-32": true, "UA-35": true, "UA-40": true, "UA-43": true, "UA-46": true, - "UA-48": true, "UA-51": true, "UA-53": true, "UA-56": true, "UA-59": true, - "UA-61": true, "UA-63": true, "UA-65": true, "UA-68": true, "UA-71": true, - "UA-74": true, "UA-77": true, "UG-101": true, "UG-102": true, "UG-103": true, - "UG-104": true, "UG-105": true, "UG-106": true, "UG-107": true, "UG-108": true, - "UG-109": true, "UG-110": true, "UG-111": true, "UG-112": true, "UG-113": true, - "UG-114": true, "UG-115": true, "UG-116": true, "UG-201": true, "UG-202": true, - "UG-203": true, "UG-204": true, "UG-205": true, "UG-206": true, "UG-207": true, - "UG-208": true, "UG-209": true, "UG-210": true, "UG-211": true, "UG-212": true, - "UG-213": true, "UG-214": true, "UG-215": true, "UG-216": true, "UG-217": true, - "UG-218": true, "UG-219": true, "UG-220": true, "UG-221": true, "UG-222": true, - "UG-223": true, "UG-224": true, "UG-301": true, "UG-302": true, "UG-303": true, - "UG-304": true, "UG-305": true, "UG-306": true, "UG-307": true, "UG-308": true, - "UG-309": true, "UG-310": true, "UG-311": true, "UG-312": true, "UG-313": true, - "UG-314": true, "UG-315": true, "UG-316": true, "UG-317": true, "UG-318": true, - "UG-319": true, "UG-320": true, "UG-321": true, "UG-401": true, "UG-402": true, - "UG-403": true, "UG-404": true, "UG-405": true, "UG-406": true, "UG-407": true, - "UG-408": true, "UG-409": true, "UG-410": true, "UG-411": true, "UG-412": true, - "UG-413": true, "UG-414": true, "UG-415": true, "UG-416": true, "UG-417": true, - "UG-418": true, "UG-419": true, "UG-C": true, "UG-E": true, "UG-N": true, - "UG-W": true, "UG-322": true, "UG-323": true, "UG-420": true, "UG-117": true, - "UG-118": true, "UG-225": true, "UG-120": true, "UG-226": true, - "UG-121": true, "UG-122": true, "UG-227": true, "UG-421": true, - "UG-325": true, "UG-228": true, "UG-123": true, "UG-422": true, - "UG-326": true, "UG-229": true, "UG-124": true, "UG-423": true, - "UG-230": true, "UG-327": true, "UG-424": true, "UG-328": true, - "UG-425": true, "UG-426": true, "UG-330": true, - "UM-67": true, "UM-71": true, "UM-76": true, "UM-79": true, - "UM-81": true, "UM-84": true, "UM-86": true, "UM-89": true, "UM-95": true, - "US-AK": true, "US-AL": true, "US-AR": true, "US-AS": true, "US-AZ": true, - "US-CA": true, "US-CO": true, "US-CT": true, "US-DC": true, "US-DE": true, - "US-FL": true, "US-GA": true, "US-GU": true, "US-HI": true, "US-IA": true, - "US-ID": true, "US-IL": true, "US-IN": true, "US-KS": true, "US-KY": true, - "US-LA": true, "US-MA": true, "US-MD": true, "US-ME": true, "US-MI": true, - "US-MN": true, "US-MO": true, "US-MP": true, "US-MS": true, "US-MT": true, - "US-NC": true, "US-ND": true, "US-NE": true, "US-NH": true, "US-NJ": true, - "US-NM": true, "US-NV": true, "US-NY": true, "US-OH": true, "US-OK": true, - "US-OR": true, "US-PA": true, "US-PR": true, "US-RI": true, "US-SC": true, - "US-SD": true, "US-TN": true, "US-TX": true, "US-UM": true, "US-UT": true, - "US-VA": true, "US-VI": true, "US-VT": true, "US-WA": true, "US-WI": true, - "US-WV": true, "US-WY": true, "UY-AR": true, "UY-CA": true, "UY-CL": true, - "UY-CO": true, "UY-DU": true, "UY-FD": true, "UY-FS": true, "UY-LA": true, - "UY-MA": true, "UY-MO": true, "UY-PA": true, "UY-RN": true, "UY-RO": true, - "UY-RV": true, "UY-SA": true, "UY-SJ": true, "UY-SO": true, "UY-TA": true, - "UY-TT": true, "UZ-AN": true, "UZ-BU": true, "UZ-FA": true, "UZ-JI": true, - "UZ-NG": true, "UZ-NW": true, "UZ-QA": true, "UZ-QR": true, "UZ-SA": true, - "UZ-SI": true, "UZ-SU": true, "UZ-TK": true, "UZ-TO": true, "UZ-XO": true, - "VC-01": true, "VC-02": true, "VC-03": true, "VC-04": true, "VC-05": true, - "VC-06": true, "VE-A": true, "VE-B": true, "VE-C": true, "VE-D": true, - "VE-E": true, "VE-F": true, "VE-G": true, "VE-H": true, "VE-I": true, - "VE-J": true, "VE-K": true, "VE-L": true, "VE-M": true, "VE-N": true, - "VE-O": true, "VE-P": true, "VE-R": true, "VE-S": true, "VE-T": true, - "VE-U": true, "VE-V": true, "VE-W": true, "VE-X": true, "VE-Y": true, - "VE-Z": true, "VN-01": true, "VN-02": true, "VN-03": true, "VN-04": true, - "VN-05": true, "VN-06": true, "VN-07": true, "VN-09": true, "VN-13": true, - "VN-14": true, "VN-15": true, "VN-18": true, "VN-20": true, "VN-21": true, - "VN-22": true, "VN-23": true, "VN-24": true, "VN-25": true, "VN-26": true, - "VN-27": true, "VN-28": true, "VN-29": true, "VN-30": true, "VN-31": true, - "VN-32": true, "VN-33": true, "VN-34": true, "VN-35": true, "VN-36": true, - "VN-37": true, "VN-39": true, "VN-40": true, "VN-41": true, "VN-43": true, - "VN-44": true, "VN-45": true, "VN-46": true, "VN-47": true, "VN-49": true, - "VN-50": true, "VN-51": true, "VN-52": true, "VN-53": true, "VN-54": true, - "VN-55": true, "VN-56": true, "VN-57": true, "VN-58": true, "VN-59": true, - "VN-61": true, "VN-63": true, "VN-66": true, "VN-67": true, "VN-68": true, - "VN-69": true, "VN-70": true, "VN-71": true, "VN-72": true, "VN-73": true, - "VN-CT": true, "VN-DN": true, "VN-HN": true, "VN-HP": true, "VN-SG": true, - "VU-MAP": true, "VU-PAM": true, "VU-SAM": true, "VU-SEE": true, "VU-TAE": true, - "VU-TOB": true, "WF-SG": true, "WF-UV": true, "WS-AA": true, "WS-AL": true, "WS-AT": true, "WS-FA": true, - "WS-GE": true, "WS-GI": true, "WS-PA": true, "WS-SA": true, "WS-TU": true, - "WS-VF": true, "WS-VS": true, "YE-AB": true, "YE-AD": true, "YE-AM": true, - "YE-BA": true, "YE-DA": true, "YE-DH": true, "YE-HD": true, "YE-HJ": true, "YE-HU": true, - "YE-IB": true, "YE-JA": true, "YE-LA": true, "YE-MA": true, "YE-MR": true, - "YE-MU": true, "YE-MW": true, "YE-RA": true, "YE-SA": true, "YE-SD": true, "YE-SH": true, - "YE-SN": true, "YE-TA": true, "ZA-EC": true, "ZA-FS": true, "ZA-GP": true, - "ZA-LP": true, "ZA-MP": true, "ZA-NC": true, "ZA-NW": true, "ZA-WC": true, - "ZA-ZN": true, "ZA-KZN": true, "ZM-01": true, "ZM-02": true, "ZM-03": true, "ZM-04": true, - "ZM-05": true, "ZM-06": true, "ZM-07": true, "ZM-08": true, "ZM-09": true, "ZM-10": true, - "ZW-BU": true, "ZW-HA": true, "ZW-MA": true, "ZW-MC": true, "ZW-ME": true, - "ZW-MI": true, "ZW-MN": true, "ZW-MS": true, "ZW-MV": true, "ZW-MW": true, +var iso3166_2 = map[string]struct{}{ + "AD-02": {}, "AD-03": {}, "AD-04": {}, "AD-05": {}, "AD-06": {}, + "AD-07": {}, "AD-08": {}, "AE-AJ": {}, "AE-AZ": {}, "AE-DU": {}, + "AE-FU": {}, "AE-RK": {}, "AE-SH": {}, "AE-UQ": {}, "AF-BAL": {}, + "AF-BAM": {}, "AF-BDG": {}, "AF-BDS": {}, "AF-BGL": {}, "AF-DAY": {}, + "AF-FRA": {}, "AF-FYB": {}, "AF-GHA": {}, "AF-GHO": {}, "AF-HEL": {}, + "AF-HER": {}, "AF-JOW": {}, "AF-KAB": {}, "AF-KAN": {}, "AF-KAP": {}, + "AF-KDZ": {}, "AF-KHO": {}, "AF-KNR": {}, "AF-LAG": {}, "AF-LOG": {}, + "AF-NAN": {}, "AF-NIM": {}, "AF-NUR": {}, "AF-PAN": {}, "AF-PAR": {}, + "AF-PIA": {}, "AF-PKA": {}, "AF-SAM": {}, "AF-SAR": {}, "AF-TAK": {}, + "AF-URU": {}, "AF-WAR": {}, "AF-ZAB": {}, "AG-03": {}, "AG-04": {}, + "AG-05": {}, "AG-06": {}, "AG-07": {}, "AG-08": {}, "AG-10": {}, + "AG-11": {}, "AL-01": {}, "AL-02": {}, "AL-03": {}, "AL-04": {}, + "AL-05": {}, "AL-06": {}, "AL-07": {}, "AL-08": {}, "AL-09": {}, + "AL-10": {}, "AL-11": {}, "AL-12": {}, "AL-BR": {}, "AL-BU": {}, + "AL-DI": {}, "AL-DL": {}, "AL-DR": {}, "AL-DV": {}, "AL-EL": {}, + "AL-ER": {}, "AL-FR": {}, "AL-GJ": {}, "AL-GR": {}, "AL-HA": {}, + "AL-KA": {}, "AL-KB": {}, "AL-KC": {}, "AL-KO": {}, "AL-KR": {}, + "AL-KU": {}, "AL-LB": {}, "AL-LE": {}, "AL-LU": {}, "AL-MK": {}, + "AL-MM": {}, "AL-MR": {}, "AL-MT": {}, "AL-PG": {}, "AL-PQ": {}, + "AL-PR": {}, "AL-PU": {}, "AL-SH": {}, "AL-SK": {}, "AL-SR": {}, + "AL-TE": {}, "AL-TP": {}, "AL-TR": {}, "AL-VL": {}, "AM-AG": {}, + "AM-AR": {}, "AM-AV": {}, "AM-ER": {}, "AM-GR": {}, "AM-KT": {}, + "AM-LO": {}, "AM-SH": {}, "AM-SU": {}, "AM-TV": {}, "AM-VD": {}, + "AO-BGO": {}, "AO-BGU": {}, "AO-BIE": {}, "AO-CAB": {}, "AO-CCU": {}, + "AO-CNN": {}, "AO-CNO": {}, "AO-CUS": {}, "AO-HUA": {}, "AO-HUI": {}, + "AO-LNO": {}, "AO-LSU": {}, "AO-LUA": {}, "AO-MAL": {}, "AO-MOX": {}, + "AO-NAM": {}, "AO-UIG": {}, "AO-ZAI": {}, "AR-A": {}, "AR-B": {}, + "AR-C": {}, "AR-D": {}, "AR-E": {}, "AR-F": {}, "AR-G": {}, "AR-H": {}, + "AR-J": {}, "AR-K": {}, "AR-L": {}, "AR-M": {}, "AR-N": {}, + "AR-P": {}, "AR-Q": {}, "AR-R": {}, "AR-S": {}, "AR-T": {}, + "AR-U": {}, "AR-V": {}, "AR-W": {}, "AR-X": {}, "AR-Y": {}, + "AR-Z": {}, "AT-1": {}, "AT-2": {}, "AT-3": {}, "AT-4": {}, + "AT-5": {}, "AT-6": {}, "AT-7": {}, "AT-8": {}, "AT-9": {}, + "AU-ACT": {}, "AU-NSW": {}, "AU-NT": {}, "AU-QLD": {}, "AU-SA": {}, + "AU-TAS": {}, "AU-VIC": {}, "AU-WA": {}, "AZ-ABS": {}, "AZ-AGA": {}, + "AZ-AGC": {}, "AZ-AGM": {}, "AZ-AGS": {}, "AZ-AGU": {}, "AZ-AST": {}, + "AZ-BA": {}, "AZ-BAB": {}, "AZ-BAL": {}, "AZ-BAR": {}, "AZ-BEY": {}, + "AZ-BIL": {}, "AZ-CAB": {}, "AZ-CAL": {}, "AZ-CUL": {}, "AZ-DAS": {}, + "AZ-FUZ": {}, "AZ-GA": {}, "AZ-GAD": {}, "AZ-GOR": {}, "AZ-GOY": {}, + "AZ-GYG": {}, "AZ-HAC": {}, "AZ-IMI": {}, "AZ-ISM": {}, "AZ-KAL": {}, + "AZ-KAN": {}, "AZ-KUR": {}, "AZ-LA": {}, "AZ-LAC": {}, "AZ-LAN": {}, + "AZ-LER": {}, "AZ-MAS": {}, "AZ-MI": {}, "AZ-NA": {}, "AZ-NEF": {}, + "AZ-NV": {}, "AZ-NX": {}, "AZ-OGU": {}, "AZ-ORD": {}, "AZ-QAB": {}, + "AZ-QAX": {}, "AZ-QAZ": {}, "AZ-QBA": {}, "AZ-QBI": {}, "AZ-QOB": {}, + "AZ-QUS": {}, "AZ-SA": {}, "AZ-SAB": {}, "AZ-SAD": {}, "AZ-SAH": {}, + "AZ-SAK": {}, "AZ-SAL": {}, "AZ-SAR": {}, "AZ-SAT": {}, "AZ-SBN": {}, + "AZ-SIY": {}, "AZ-SKR": {}, "AZ-SM": {}, "AZ-SMI": {}, "AZ-SMX": {}, + "AZ-SR": {}, "AZ-SUS": {}, "AZ-TAR": {}, "AZ-TOV": {}, "AZ-UCA": {}, + "AZ-XA": {}, "AZ-XAC": {}, "AZ-XCI": {}, "AZ-XIZ": {}, "AZ-XVD": {}, + "AZ-YAR": {}, "AZ-YE": {}, "AZ-YEV": {}, "AZ-ZAN": {}, "AZ-ZAQ": {}, + "AZ-ZAR": {}, "BA-01": {}, "BA-02": {}, "BA-03": {}, "BA-04": {}, + "BA-05": {}, "BA-06": {}, "BA-07": {}, "BA-08": {}, "BA-09": {}, + "BA-10": {}, "BA-BIH": {}, "BA-BRC": {}, "BA-SRP": {}, "BB-01": {}, + "BB-02": {}, "BB-03": {}, "BB-04": {}, "BB-05": {}, "BB-06": {}, + "BB-07": {}, "BB-08": {}, "BB-09": {}, "BB-10": {}, "BB-11": {}, + "BD-01": {}, "BD-02": {}, "BD-03": {}, "BD-04": {}, "BD-05": {}, + "BD-06": {}, "BD-07": {}, "BD-08": {}, "BD-09": {}, "BD-10": {}, + "BD-11": {}, "BD-12": {}, "BD-13": {}, "BD-14": {}, "BD-15": {}, + "BD-16": {}, "BD-17": {}, "BD-18": {}, "BD-19": {}, "BD-20": {}, + "BD-21": {}, "BD-22": {}, "BD-23": {}, "BD-24": {}, "BD-25": {}, + "BD-26": {}, "BD-27": {}, "BD-28": {}, "BD-29": {}, "BD-30": {}, + "BD-31": {}, "BD-32": {}, "BD-33": {}, "BD-34": {}, "BD-35": {}, + "BD-36": {}, "BD-37": {}, "BD-38": {}, "BD-39": {}, "BD-40": {}, + "BD-41": {}, "BD-42": {}, "BD-43": {}, "BD-44": {}, "BD-45": {}, + "BD-46": {}, "BD-47": {}, "BD-48": {}, "BD-49": {}, "BD-50": {}, + "BD-51": {}, "BD-52": {}, "BD-53": {}, "BD-54": {}, "BD-55": {}, + "BD-56": {}, "BD-57": {}, "BD-58": {}, "BD-59": {}, "BD-60": {}, + "BD-61": {}, "BD-62": {}, "BD-63": {}, "BD-64": {}, "BD-A": {}, + "BD-B": {}, "BD-C": {}, "BD-D": {}, "BD-E": {}, "BD-F": {}, + "BD-G": {}, "BE-BRU": {}, "BE-VAN": {}, "BE-VBR": {}, "BE-VLG": {}, + "BE-VLI": {}, "BE-VOV": {}, "BE-VWV": {}, "BE-WAL": {}, "BE-WBR": {}, + "BE-WHT": {}, "BE-WLG": {}, "BE-WLX": {}, "BE-WNA": {}, "BF-01": {}, + "BF-02": {}, "BF-03": {}, "BF-04": {}, "BF-05": {}, "BF-06": {}, + "BF-07": {}, "BF-08": {}, "BF-09": {}, "BF-10": {}, "BF-11": {}, + "BF-12": {}, "BF-13": {}, "BF-BAL": {}, "BF-BAM": {}, "BF-BAN": {}, + "BF-BAZ": {}, "BF-BGR": {}, "BF-BLG": {}, "BF-BLK": {}, "BF-COM": {}, + "BF-GAN": {}, "BF-GNA": {}, "BF-GOU": {}, "BF-HOU": {}, "BF-IOB": {}, + "BF-KAD": {}, "BF-KEN": {}, "BF-KMD": {}, "BF-KMP": {}, "BF-KOP": {}, + "BF-KOS": {}, "BF-KOT": {}, "BF-KOW": {}, "BF-LER": {}, "BF-LOR": {}, + "BF-MOU": {}, "BF-NAM": {}, "BF-NAO": {}, "BF-NAY": {}, "BF-NOU": {}, + "BF-OUB": {}, "BF-OUD": {}, "BF-PAS": {}, "BF-PON": {}, "BF-SEN": {}, + "BF-SIS": {}, "BF-SMT": {}, "BF-SNG": {}, "BF-SOM": {}, "BF-SOR": {}, + "BF-TAP": {}, "BF-TUI": {}, "BF-YAG": {}, "BF-YAT": {}, "BF-ZIR": {}, + "BF-ZON": {}, "BF-ZOU": {}, "BG-01": {}, "BG-02": {}, "BG-03": {}, + "BG-04": {}, "BG-05": {}, "BG-06": {}, "BG-07": {}, "BG-08": {}, + "BG-09": {}, "BG-10": {}, "BG-11": {}, "BG-12": {}, "BG-13": {}, + "BG-14": {}, "BG-15": {}, "BG-16": {}, "BG-17": {}, "BG-18": {}, + "BG-19": {}, "BG-20": {}, "BG-21": {}, "BG-22": {}, "BG-23": {}, + "BG-24": {}, "BG-25": {}, "BG-26": {}, "BG-27": {}, "BG-28": {}, + "BH-13": {}, "BH-14": {}, "BH-15": {}, "BH-16": {}, "BH-17": {}, + "BI-BB": {}, "BI-BL": {}, "BI-BM": {}, "BI-BR": {}, "BI-CA": {}, + "BI-CI": {}, "BI-GI": {}, "BI-KI": {}, "BI-KR": {}, "BI-KY": {}, + "BI-MA": {}, "BI-MU": {}, "BI-MW": {}, "BI-NG": {}, "BI-RM": {}, "BI-RT": {}, + "BI-RY": {}, "BJ-AK": {}, "BJ-AL": {}, "BJ-AQ": {}, "BJ-BO": {}, + "BJ-CO": {}, "BJ-DO": {}, "BJ-KO": {}, "BJ-LI": {}, "BJ-MO": {}, + "BJ-OU": {}, "BJ-PL": {}, "BJ-ZO": {}, "BN-BE": {}, "BN-BM": {}, + "BN-TE": {}, "BN-TU": {}, "BO-B": {}, "BO-C": {}, "BO-H": {}, + "BO-L": {}, "BO-N": {}, "BO-O": {}, "BO-P": {}, "BO-S": {}, + "BO-T": {}, "BQ-BO": {}, "BQ-SA": {}, "BQ-SE": {}, "BR-AC": {}, + "BR-AL": {}, "BR-AM": {}, "BR-AP": {}, "BR-BA": {}, "BR-CE": {}, + "BR-DF": {}, "BR-ES": {}, "BR-FN": {}, "BR-GO": {}, "BR-MA": {}, + "BR-MG": {}, "BR-MS": {}, "BR-MT": {}, "BR-PA": {}, "BR-PB": {}, + "BR-PE": {}, "BR-PI": {}, "BR-PR": {}, "BR-RJ": {}, "BR-RN": {}, + "BR-RO": {}, "BR-RR": {}, "BR-RS": {}, "BR-SC": {}, "BR-SE": {}, + "BR-SP": {}, "BR-TO": {}, "BS-AK": {}, "BS-BI": {}, "BS-BP": {}, + "BS-BY": {}, "BS-CE": {}, "BS-CI": {}, "BS-CK": {}, "BS-CO": {}, + "BS-CS": {}, "BS-EG": {}, "BS-EX": {}, "BS-FP": {}, "BS-GC": {}, + "BS-HI": {}, "BS-HT": {}, "BS-IN": {}, "BS-LI": {}, "BS-MC": {}, + "BS-MG": {}, "BS-MI": {}, "BS-NE": {}, "BS-NO": {}, "BS-NP": {}, "BS-NS": {}, + "BS-RC": {}, "BS-RI": {}, "BS-SA": {}, "BS-SE": {}, "BS-SO": {}, + "BS-SS": {}, "BS-SW": {}, "BS-WG": {}, "BT-11": {}, "BT-12": {}, + "BT-13": {}, "BT-14": {}, "BT-15": {}, "BT-21": {}, "BT-22": {}, + "BT-23": {}, "BT-24": {}, "BT-31": {}, "BT-32": {}, "BT-33": {}, + "BT-34": {}, "BT-41": {}, "BT-42": {}, "BT-43": {}, "BT-44": {}, + "BT-45": {}, "BT-GA": {}, "BT-TY": {}, "BW-CE": {}, "BW-CH": {}, "BW-GH": {}, + "BW-KG": {}, "BW-KL": {}, "BW-KW": {}, "BW-NE": {}, "BW-NW": {}, + "BW-SE": {}, "BW-SO": {}, "BY-BR": {}, "BY-HM": {}, "BY-HO": {}, + "BY-HR": {}, "BY-MA": {}, "BY-MI": {}, "BY-VI": {}, "BZ-BZ": {}, + "BZ-CY": {}, "BZ-CZL": {}, "BZ-OW": {}, "BZ-SC": {}, "BZ-TOL": {}, + "CA-AB": {}, "CA-BC": {}, "CA-MB": {}, "CA-NB": {}, "CA-NL": {}, + "CA-NS": {}, "CA-NT": {}, "CA-NU": {}, "CA-ON": {}, "CA-PE": {}, + "CA-QC": {}, "CA-SK": {}, "CA-YT": {}, "CD-BC": {}, "CD-BN": {}, + "CD-EQ": {}, "CD-HK": {}, "CD-IT": {}, "CD-KA": {}, "CD-KC": {}, "CD-KE": {}, "CD-KG": {}, "CD-KN": {}, + "CD-KW": {}, "CD-KS": {}, "CD-LU": {}, "CD-MA": {}, "CD-NK": {}, "CD-OR": {}, "CD-SA": {}, "CD-SK": {}, + "CD-TA": {}, "CD-TO": {}, "CF-AC": {}, "CF-BB": {}, "CF-BGF": {}, "CF-BK": {}, "CF-HK": {}, "CF-HM": {}, + "CF-HS": {}, "CF-KB": {}, "CF-KG": {}, "CF-LB": {}, "CF-MB": {}, + "CF-MP": {}, "CF-NM": {}, "CF-OP": {}, "CF-SE": {}, "CF-UK": {}, + "CF-VK": {}, "CG-11": {}, "CG-12": {}, "CG-13": {}, "CG-14": {}, + "CG-15": {}, "CG-16": {}, "CG-2": {}, "CG-5": {}, "CG-7": {}, "CG-8": {}, + "CG-9": {}, "CG-BZV": {}, "CH-AG": {}, "CH-AI": {}, "CH-AR": {}, + "CH-BE": {}, "CH-BL": {}, "CH-BS": {}, "CH-FR": {}, "CH-GE": {}, + "CH-GL": {}, "CH-GR": {}, "CH-JU": {}, "CH-LU": {}, "CH-NE": {}, + "CH-NW": {}, "CH-OW": {}, "CH-SG": {}, "CH-SH": {}, "CH-SO": {}, + "CH-SZ": {}, "CH-TG": {}, "CH-TI": {}, "CH-UR": {}, "CH-VD": {}, + "CH-VS": {}, "CH-ZG": {}, "CH-ZH": {}, "CI-AB": {}, "CI-BS": {}, + "CI-CM": {}, "CI-DN": {}, "CI-GD": {}, "CI-LC": {}, "CI-LG": {}, + "CI-MG": {}, "CI-SM": {}, "CI-SV": {}, "CI-VB": {}, "CI-WR": {}, + "CI-YM": {}, "CI-ZZ": {}, "CL-AI": {}, "CL-AN": {}, "CL-AP": {}, + "CL-AR": {}, "CL-AT": {}, "CL-BI": {}, "CL-CO": {}, "CL-LI": {}, + "CL-LL": {}, "CL-LR": {}, "CL-MA": {}, "CL-ML": {}, "CL-NB": {}, "CL-RM": {}, + "CL-TA": {}, "CL-VS": {}, "CM-AD": {}, "CM-CE": {}, "CM-EN": {}, + "CM-ES": {}, "CM-LT": {}, "CM-NO": {}, "CM-NW": {}, "CM-OU": {}, + "CM-SU": {}, "CM-SW": {}, "CN-AH": {}, "CN-BJ": {}, "CN-CQ": {}, + "CN-FJ": {}, "CN-GS": {}, "CN-GD": {}, "CN-GX": {}, "CN-GZ": {}, + "CN-HI": {}, "CN-HE": {}, "CN-HL": {}, "CN-HA": {}, "CN-HB": {}, + "CN-HN": {}, "CN-JS": {}, "CN-JX": {}, "CN-JL": {}, "CN-LN": {}, + "CN-NM": {}, "CN-NX": {}, "CN-QH": {}, "CN-SN": {}, "CN-SD": {}, "CN-SH": {}, + "CN-SX": {}, "CN-SC": {}, "CN-TJ": {}, "CN-XJ": {}, "CN-XZ": {}, "CN-YN": {}, + "CN-ZJ": {}, "CO-AMA": {}, "CO-ANT": {}, "CO-ARA": {}, "CO-ATL": {}, + "CO-BOL": {}, "CO-BOY": {}, "CO-CAL": {}, "CO-CAQ": {}, "CO-CAS": {}, + "CO-CAU": {}, "CO-CES": {}, "CO-CHO": {}, "CO-COR": {}, "CO-CUN": {}, + "CO-DC": {}, "CO-GUA": {}, "CO-GUV": {}, "CO-HUI": {}, "CO-LAG": {}, + "CO-MAG": {}, "CO-MET": {}, "CO-NAR": {}, "CO-NSA": {}, "CO-PUT": {}, + "CO-QUI": {}, "CO-RIS": {}, "CO-SAN": {}, "CO-SAP": {}, "CO-SUC": {}, + "CO-TOL": {}, "CO-VAC": {}, "CO-VAU": {}, "CO-VID": {}, "CR-A": {}, + "CR-C": {}, "CR-G": {}, "CR-H": {}, "CR-L": {}, "CR-P": {}, + "CR-SJ": {}, "CU-01": {}, "CU-02": {}, "CU-03": {}, "CU-04": {}, + "CU-05": {}, "CU-06": {}, "CU-07": {}, "CU-08": {}, "CU-09": {}, + "CU-10": {}, "CU-11": {}, "CU-12": {}, "CU-13": {}, "CU-14": {}, "CU-15": {}, + "CU-16": {}, "CU-99": {}, "CV-B": {}, "CV-BR": {}, "CV-BV": {}, "CV-CA": {}, + "CV-CF": {}, "CV-CR": {}, "CV-MA": {}, "CV-MO": {}, "CV-PA": {}, + "CV-PN": {}, "CV-PR": {}, "CV-RB": {}, "CV-RG": {}, "CV-RS": {}, + "CV-S": {}, "CV-SD": {}, "CV-SF": {}, "CV-SL": {}, "CV-SM": {}, + "CV-SO": {}, "CV-SS": {}, "CV-SV": {}, "CV-TA": {}, "CV-TS": {}, + "CY-01": {}, "CY-02": {}, "CY-03": {}, "CY-04": {}, "CY-05": {}, + "CY-06": {}, "CZ-10": {}, "CZ-101": {}, "CZ-102": {}, "CZ-103": {}, + "CZ-104": {}, "CZ-105": {}, "CZ-106": {}, "CZ-107": {}, "CZ-108": {}, + "CZ-109": {}, "CZ-110": {}, "CZ-111": {}, "CZ-112": {}, "CZ-113": {}, + "CZ-114": {}, "CZ-115": {}, "CZ-116": {}, "CZ-117": {}, "CZ-118": {}, + "CZ-119": {}, "CZ-120": {}, "CZ-121": {}, "CZ-122": {}, "CZ-20": {}, + "CZ-201": {}, "CZ-202": {}, "CZ-203": {}, "CZ-204": {}, "CZ-205": {}, + "CZ-206": {}, "CZ-207": {}, "CZ-208": {}, "CZ-209": {}, "CZ-20A": {}, + "CZ-20B": {}, "CZ-20C": {}, "CZ-31": {}, "CZ-311": {}, "CZ-312": {}, + "CZ-313": {}, "CZ-314": {}, "CZ-315": {}, "CZ-316": {}, "CZ-317": {}, + "CZ-32": {}, "CZ-321": {}, "CZ-322": {}, "CZ-323": {}, "CZ-324": {}, + "CZ-325": {}, "CZ-326": {}, "CZ-327": {}, "CZ-41": {}, "CZ-411": {}, + "CZ-412": {}, "CZ-413": {}, "CZ-42": {}, "CZ-421": {}, "CZ-422": {}, + "CZ-423": {}, "CZ-424": {}, "CZ-425": {}, "CZ-426": {}, "CZ-427": {}, + "CZ-51": {}, "CZ-511": {}, "CZ-512": {}, "CZ-513": {}, "CZ-514": {}, + "CZ-52": {}, "CZ-521": {}, "CZ-522": {}, "CZ-523": {}, "CZ-524": {}, + "CZ-525": {}, "CZ-53": {}, "CZ-531": {}, "CZ-532": {}, "CZ-533": {}, + "CZ-534": {}, "CZ-63": {}, "CZ-631": {}, "CZ-632": {}, "CZ-633": {}, + "CZ-634": {}, "CZ-635": {}, "CZ-64": {}, "CZ-641": {}, "CZ-642": {}, + "CZ-643": {}, "CZ-644": {}, "CZ-645": {}, "CZ-646": {}, "CZ-647": {}, + "CZ-71": {}, "CZ-711": {}, "CZ-712": {}, "CZ-713": {}, "CZ-714": {}, + "CZ-715": {}, "CZ-72": {}, "CZ-721": {}, "CZ-722": {}, "CZ-723": {}, + "CZ-724": {}, "CZ-80": {}, "CZ-801": {}, "CZ-802": {}, "CZ-803": {}, + "CZ-804": {}, "CZ-805": {}, "CZ-806": {}, "DE-BB": {}, "DE-BE": {}, + "DE-BW": {}, "DE-BY": {}, "DE-HB": {}, "DE-HE": {}, "DE-HH": {}, + "DE-MV": {}, "DE-NI": {}, "DE-NW": {}, "DE-RP": {}, "DE-SH": {}, + "DE-SL": {}, "DE-SN": {}, "DE-ST": {}, "DE-TH": {}, "DJ-AR": {}, + "DJ-AS": {}, "DJ-DI": {}, "DJ-DJ": {}, "DJ-OB": {}, "DJ-TA": {}, + "DK-81": {}, "DK-82": {}, "DK-83": {}, "DK-84": {}, "DK-85": {}, + "DM-01": {}, "DM-02": {}, "DM-03": {}, "DM-04": {}, "DM-05": {}, + "DM-06": {}, "DM-07": {}, "DM-08": {}, "DM-09": {}, "DM-10": {}, + "DO-01": {}, "DO-02": {}, "DO-03": {}, "DO-04": {}, "DO-05": {}, + "DO-06": {}, "DO-07": {}, "DO-08": {}, "DO-09": {}, "DO-10": {}, + "DO-11": {}, "DO-12": {}, "DO-13": {}, "DO-14": {}, "DO-15": {}, + "DO-16": {}, "DO-17": {}, "DO-18": {}, "DO-19": {}, "DO-20": {}, + "DO-21": {}, "DO-22": {}, "DO-23": {}, "DO-24": {}, "DO-25": {}, + "DO-26": {}, "DO-27": {}, "DO-28": {}, "DO-29": {}, "DO-30": {}, "DO-31": {}, + "DZ-01": {}, "DZ-02": {}, "DZ-03": {}, "DZ-04": {}, "DZ-05": {}, + "DZ-06": {}, "DZ-07": {}, "DZ-08": {}, "DZ-09": {}, "DZ-10": {}, + "DZ-11": {}, "DZ-12": {}, "DZ-13": {}, "DZ-14": {}, "DZ-15": {}, + "DZ-16": {}, "DZ-17": {}, "DZ-18": {}, "DZ-19": {}, "DZ-20": {}, + "DZ-21": {}, "DZ-22": {}, "DZ-23": {}, "DZ-24": {}, "DZ-25": {}, + "DZ-26": {}, "DZ-27": {}, "DZ-28": {}, "DZ-29": {}, "DZ-30": {}, + "DZ-31": {}, "DZ-32": {}, "DZ-33": {}, "DZ-34": {}, "DZ-35": {}, + "DZ-36": {}, "DZ-37": {}, "DZ-38": {}, "DZ-39": {}, "DZ-40": {}, + "DZ-41": {}, "DZ-42": {}, "DZ-43": {}, "DZ-44": {}, "DZ-45": {}, + "DZ-46": {}, "DZ-47": {}, "DZ-48": {}, "DZ-49": {}, "DZ-51": {}, + "DZ-53": {}, "DZ-55": {}, "DZ-56": {}, "DZ-57": {}, "EC-A": {}, "EC-B": {}, + "EC-C": {}, "EC-D": {}, "EC-E": {}, "EC-F": {}, "EC-G": {}, + "EC-H": {}, "EC-I": {}, "EC-L": {}, "EC-M": {}, "EC-N": {}, + "EC-O": {}, "EC-P": {}, "EC-R": {}, "EC-S": {}, "EC-SD": {}, + "EC-SE": {}, "EC-T": {}, "EC-U": {}, "EC-W": {}, "EC-X": {}, + "EC-Y": {}, "EC-Z": {}, "EE-37": {}, "EE-39": {}, "EE-44": {}, "EE-45": {}, + "EE-49": {}, "EE-50": {}, "EE-51": {}, "EE-52": {}, "EE-56": {}, "EE-57": {}, + "EE-59": {}, "EE-60": {}, "EE-64": {}, "EE-65": {}, "EE-67": {}, "EE-68": {}, + "EE-70": {}, "EE-71": {}, "EE-74": {}, "EE-78": {}, "EE-79": {}, "EE-81": {}, "EE-82": {}, + "EE-84": {}, "EE-86": {}, "EE-87": {}, "EG-ALX": {}, "EG-ASN": {}, "EG-AST": {}, + "EG-BA": {}, "EG-BH": {}, "EG-BNS": {}, "EG-C": {}, "EG-DK": {}, + "EG-DT": {}, "EG-FYM": {}, "EG-GH": {}, "EG-GZ": {}, "EG-HU": {}, + "EG-IS": {}, "EG-JS": {}, "EG-KB": {}, "EG-KFS": {}, "EG-KN": {}, + "EG-LX": {}, "EG-MN": {}, "EG-MNF": {}, "EG-MT": {}, "EG-PTS": {}, "EG-SHG": {}, + "EG-SHR": {}, "EG-SIN": {}, "EG-SU": {}, "EG-SUZ": {}, "EG-WAD": {}, + "ER-AN": {}, "ER-DK": {}, "ER-DU": {}, "ER-GB": {}, "ER-MA": {}, + "ER-SK": {}, "ES-A": {}, "ES-AB": {}, "ES-AL": {}, "ES-AN": {}, + "ES-AR": {}, "ES-AS": {}, "ES-AV": {}, "ES-B": {}, "ES-BA": {}, + "ES-BI": {}, "ES-BU": {}, "ES-C": {}, "ES-CA": {}, "ES-CB": {}, + "ES-CC": {}, "ES-CE": {}, "ES-CL": {}, "ES-CM": {}, "ES-CN": {}, + "ES-CO": {}, "ES-CR": {}, "ES-CS": {}, "ES-CT": {}, "ES-CU": {}, + "ES-EX": {}, "ES-GA": {}, "ES-GC": {}, "ES-GI": {}, "ES-GR": {}, + "ES-GU": {}, "ES-H": {}, "ES-HU": {}, "ES-IB": {}, "ES-J": {}, + "ES-L": {}, "ES-LE": {}, "ES-LO": {}, "ES-LU": {}, "ES-M": {}, + "ES-MA": {}, "ES-MC": {}, "ES-MD": {}, "ES-ML": {}, "ES-MU": {}, + "ES-NA": {}, "ES-NC": {}, "ES-O": {}, "ES-OR": {}, "ES-P": {}, + "ES-PM": {}, "ES-PO": {}, "ES-PV": {}, "ES-RI": {}, "ES-S": {}, + "ES-SA": {}, "ES-SE": {}, "ES-SG": {}, "ES-SO": {}, "ES-SS": {}, + "ES-T": {}, "ES-TE": {}, "ES-TF": {}, "ES-TO": {}, "ES-V": {}, + "ES-VA": {}, "ES-VC": {}, "ES-VI": {}, "ES-Z": {}, "ES-ZA": {}, + "ET-AA": {}, "ET-AF": {}, "ET-AM": {}, "ET-BE": {}, "ET-DD": {}, + "ET-GA": {}, "ET-HA": {}, "ET-OR": {}, "ET-SN": {}, "ET-SO": {}, + "ET-TI": {}, "FI-01": {}, "FI-02": {}, "FI-03": {}, "FI-04": {}, + "FI-05": {}, "FI-06": {}, "FI-07": {}, "FI-08": {}, "FI-09": {}, + "FI-10": {}, "FI-11": {}, "FI-12": {}, "FI-13": {}, "FI-14": {}, + "FI-15": {}, "FI-16": {}, "FI-17": {}, "FI-18": {}, "FI-19": {}, + "FJ-C": {}, "FJ-E": {}, "FJ-N": {}, "FJ-R": {}, "FJ-W": {}, + "FM-KSA": {}, "FM-PNI": {}, "FM-TRK": {}, "FM-YAP": {}, "FR-01": {}, + "FR-02": {}, "FR-03": {}, "FR-04": {}, "FR-05": {}, "FR-06": {}, + "FR-07": {}, "FR-08": {}, "FR-09": {}, "FR-10": {}, "FR-11": {}, + "FR-12": {}, "FR-13": {}, "FR-14": {}, "FR-15": {}, "FR-16": {}, + "FR-17": {}, "FR-18": {}, "FR-19": {}, "FR-20R": {}, "FR-21": {}, "FR-22": {}, + "FR-23": {}, "FR-24": {}, "FR-25": {}, "FR-26": {}, "FR-27": {}, + "FR-28": {}, "FR-29": {}, "FR-2A": {}, "FR-2B": {}, "FR-30": {}, + "FR-31": {}, "FR-32": {}, "FR-33": {}, "FR-34": {}, "FR-35": {}, + "FR-36": {}, "FR-37": {}, "FR-38": {}, "FR-39": {}, "FR-40": {}, + "FR-41": {}, "FR-42": {}, "FR-43": {}, "FR-44": {}, "FR-45": {}, + "FR-46": {}, "FR-47": {}, "FR-48": {}, "FR-49": {}, "FR-50": {}, + "FR-51": {}, "FR-52": {}, "FR-53": {}, "FR-54": {}, "FR-55": {}, + "FR-56": {}, "FR-57": {}, "FR-58": {}, "FR-59": {}, "FR-60": {}, + "FR-61": {}, "FR-62": {}, "FR-63": {}, "FR-64": {}, "FR-65": {}, + "FR-66": {}, "FR-67": {}, "FR-68": {}, "FR-69": {}, "FR-70": {}, + "FR-71": {}, "FR-72": {}, "FR-73": {}, "FR-74": {}, "FR-75": {}, + "FR-76": {}, "FR-77": {}, "FR-78": {}, "FR-79": {}, "FR-80": {}, + "FR-81": {}, "FR-82": {}, "FR-83": {}, "FR-84": {}, "FR-85": {}, + "FR-86": {}, "FR-87": {}, "FR-88": {}, "FR-89": {}, "FR-90": {}, + "FR-91": {}, "FR-92": {}, "FR-93": {}, "FR-94": {}, "FR-95": {}, + "FR-ARA": {}, "FR-BFC": {}, "FR-BL": {}, "FR-BRE": {}, "FR-COR": {}, + "FR-CP": {}, "FR-CVL": {}, "FR-GES": {}, "FR-GF": {}, "FR-GP": {}, + "FR-GUA": {}, "FR-HDF": {}, "FR-IDF": {}, "FR-LRE": {}, "FR-MAY": {}, + "FR-MF": {}, "FR-MQ": {}, "FR-NAQ": {}, "FR-NC": {}, "FR-NOR": {}, + "FR-OCC": {}, "FR-PAC": {}, "FR-PDL": {}, "FR-PF": {}, "FR-PM": {}, + "FR-RE": {}, "FR-TF": {}, "FR-WF": {}, "FR-YT": {}, "GA-1": {}, + "GA-2": {}, "GA-3": {}, "GA-4": {}, "GA-5": {}, "GA-6": {}, + "GA-7": {}, "GA-8": {}, "GA-9": {}, "GB-ABC": {}, "GB-ABD": {}, + "GB-ABE": {}, "GB-AGB": {}, "GB-AGY": {}, "GB-AND": {}, "GB-ANN": {}, + "GB-ANS": {}, "GB-BAS": {}, "GB-BBD": {}, "GB-BDF": {}, "GB-BDG": {}, + "GB-BEN": {}, "GB-BEX": {}, "GB-BFS": {}, "GB-BGE": {}, "GB-BGW": {}, + "GB-BIR": {}, "GB-BKM": {}, "GB-BMH": {}, "GB-BNE": {}, "GB-BNH": {}, + "GB-BNS": {}, "GB-BOL": {}, "GB-BPL": {}, "GB-BRC": {}, "GB-BRD": {}, + "GB-BRY": {}, "GB-BST": {}, "GB-BUR": {}, "GB-CAM": {}, "GB-CAY": {}, + "GB-CBF": {}, "GB-CCG": {}, "GB-CGN": {}, "GB-CHE": {}, "GB-CHW": {}, + "GB-CLD": {}, "GB-CLK": {}, "GB-CMA": {}, "GB-CMD": {}, "GB-CMN": {}, + "GB-CON": {}, "GB-COV": {}, "GB-CRF": {}, "GB-CRY": {}, "GB-CWY": {}, + "GB-DAL": {}, "GB-DBY": {}, "GB-DEN": {}, "GB-DER": {}, "GB-DEV": {}, + "GB-DGY": {}, "GB-DNC": {}, "GB-DND": {}, "GB-DOR": {}, "GB-DRS": {}, + "GB-DUD": {}, "GB-DUR": {}, "GB-EAL": {}, "GB-EAW": {}, "GB-EAY": {}, + "GB-EDH": {}, "GB-EDU": {}, "GB-ELN": {}, "GB-ELS": {}, "GB-ENF": {}, + "GB-ENG": {}, "GB-ERW": {}, "GB-ERY": {}, "GB-ESS": {}, "GB-ESX": {}, + "GB-FAL": {}, "GB-FIF": {}, "GB-FLN": {}, "GB-FMO": {}, "GB-GAT": {}, + "GB-GBN": {}, "GB-GLG": {}, "GB-GLS": {}, "GB-GRE": {}, "GB-GWN": {}, + "GB-HAL": {}, "GB-HAM": {}, "GB-HAV": {}, "GB-HCK": {}, "GB-HEF": {}, + "GB-HIL": {}, "GB-HLD": {}, "GB-HMF": {}, "GB-HNS": {}, "GB-HPL": {}, + "GB-HRT": {}, "GB-HRW": {}, "GB-HRY": {}, "GB-IOS": {}, "GB-IOW": {}, + "GB-ISL": {}, "GB-IVC": {}, "GB-KEC": {}, "GB-KEN": {}, "GB-KHL": {}, + "GB-KIR": {}, "GB-KTT": {}, "GB-KWL": {}, "GB-LAN": {}, "GB-LBC": {}, + "GB-LBH": {}, "GB-LCE": {}, "GB-LDS": {}, "GB-LEC": {}, "GB-LEW": {}, + "GB-LIN": {}, "GB-LIV": {}, "GB-LND": {}, "GB-LUT": {}, "GB-MAN": {}, + "GB-MDB": {}, "GB-MDW": {}, "GB-MEA": {}, "GB-MIK": {}, "GD-01": {}, + "GB-MLN": {}, "GB-MON": {}, "GB-MRT": {}, "GB-MRY": {}, "GB-MTY": {}, + "GB-MUL": {}, "GB-NAY": {}, "GB-NBL": {}, "GB-NEL": {}, "GB-NET": {}, + "GB-NFK": {}, "GB-NGM": {}, "GB-NIR": {}, "GB-NLK": {}, "GB-NLN": {}, + "GB-NMD": {}, "GB-NSM": {}, "GB-NTH": {}, "GB-NTL": {}, "GB-NTT": {}, + "GB-NTY": {}, "GB-NWM": {}, "GB-NWP": {}, "GB-NYK": {}, "GB-OLD": {}, + "GB-ORK": {}, "GB-OXF": {}, "GB-PEM": {}, "GB-PKN": {}, "GB-PLY": {}, + "GB-POL": {}, "GB-POR": {}, "GB-POW": {}, "GB-PTE": {}, "GB-RCC": {}, + "GB-RCH": {}, "GB-RCT": {}, "GB-RDB": {}, "GB-RDG": {}, "GB-RFW": {}, + "GB-RIC": {}, "GB-ROT": {}, "GB-RUT": {}, "GB-SAW": {}, "GB-SAY": {}, + "GB-SCB": {}, "GB-SCT": {}, "GB-SFK": {}, "GB-SFT": {}, "GB-SGC": {}, + "GB-SHF": {}, "GB-SHN": {}, "GB-SHR": {}, "GB-SKP": {}, "GB-SLF": {}, + "GB-SLG": {}, "GB-SLK": {}, "GB-SND": {}, "GB-SOL": {}, "GB-SOM": {}, + "GB-SOS": {}, "GB-SRY": {}, "GB-STE": {}, "GB-STG": {}, "GB-STH": {}, + "GB-STN": {}, "GB-STS": {}, "GB-STT": {}, "GB-STY": {}, "GB-SWA": {}, + "GB-SWD": {}, "GB-SWK": {}, "GB-TAM": {}, "GB-TFW": {}, "GB-THR": {}, + "GB-TOB": {}, "GB-TOF": {}, "GB-TRF": {}, "GB-TWH": {}, "GB-UKM": {}, + "GB-VGL": {}, "GB-WAR": {}, "GB-WBK": {}, "GB-WDU": {}, "GB-WFT": {}, + "GB-WGN": {}, "GB-WIL": {}, "GB-WKF": {}, "GB-WLL": {}, "GB-WLN": {}, + "GB-WLS": {}, "GB-WLV": {}, "GB-WND": {}, "GB-WNM": {}, "GB-WOK": {}, + "GB-WOR": {}, "GB-WRL": {}, "GB-WRT": {}, "GB-WRX": {}, "GB-WSM": {}, + "GB-WSX": {}, "GB-YOR": {}, "GB-ZET": {}, "GD-02": {}, "GD-03": {}, + "GD-04": {}, "GD-05": {}, "GD-06": {}, "GD-10": {}, "GE-AB": {}, + "GE-AJ": {}, "GE-GU": {}, "GE-IM": {}, "GE-KA": {}, "GE-KK": {}, + "GE-MM": {}, "GE-RL": {}, "GE-SJ": {}, "GE-SK": {}, "GE-SZ": {}, + "GE-TB": {}, "GH-AA": {}, "GH-AH": {}, "GH-AF": {}, "GH-BA": {}, "GH-BO": {}, "GH-BE": {}, "GH-CP": {}, + "GH-EP": {}, "GH-NP": {}, "GH-TV": {}, "GH-UE": {}, "GH-UW": {}, + "GH-WP": {}, "GL-AV": {}, "GL-KU": {}, "GL-QA": {}, "GL-QT": {}, "GL-QE": {}, "GL-SM": {}, + "GM-B": {}, "GM-L": {}, "GM-M": {}, "GM-N": {}, "GM-U": {}, + "GM-W": {}, "GN-B": {}, "GN-BE": {}, "GN-BF": {}, "GN-BK": {}, + "GN-C": {}, "GN-CO": {}, "GN-D": {}, "GN-DB": {}, "GN-DI": {}, + "GN-DL": {}, "GN-DU": {}, "GN-F": {}, "GN-FA": {}, "GN-FO": {}, + "GN-FR": {}, "GN-GA": {}, "GN-GU": {}, "GN-K": {}, "GN-KA": {}, + "GN-KB": {}, "GN-KD": {}, "GN-KE": {}, "GN-KN": {}, "GN-KO": {}, + "GN-KS": {}, "GN-L": {}, "GN-LA": {}, "GN-LE": {}, "GN-LO": {}, + "GN-M": {}, "GN-MC": {}, "GN-MD": {}, "GN-ML": {}, "GN-MM": {}, + "GN-N": {}, "GN-NZ": {}, "GN-PI": {}, "GN-SI": {}, "GN-TE": {}, + "GN-TO": {}, "GN-YO": {}, "GQ-AN": {}, "GQ-BN": {}, "GQ-BS": {}, + "GQ-C": {}, "GQ-CS": {}, "GQ-I": {}, "GQ-KN": {}, "GQ-LI": {}, + "GQ-WN": {}, "GR-01": {}, "GR-03": {}, "GR-04": {}, "GR-05": {}, + "GR-06": {}, "GR-07": {}, "GR-11": {}, "GR-12": {}, "GR-13": {}, + "GR-14": {}, "GR-15": {}, "GR-16": {}, "GR-17": {}, "GR-21": {}, + "GR-22": {}, "GR-23": {}, "GR-24": {}, "GR-31": {}, "GR-32": {}, + "GR-33": {}, "GR-34": {}, "GR-41": {}, "GR-42": {}, "GR-43": {}, + "GR-44": {}, "GR-51": {}, "GR-52": {}, "GR-53": {}, "GR-54": {}, + "GR-55": {}, "GR-56": {}, "GR-57": {}, "GR-58": {}, "GR-59": {}, + "GR-61": {}, "GR-62": {}, "GR-63": {}, "GR-64": {}, "GR-69": {}, + "GR-71": {}, "GR-72": {}, "GR-73": {}, "GR-81": {}, "GR-82": {}, + "GR-83": {}, "GR-84": {}, "GR-85": {}, "GR-91": {}, "GR-92": {}, + "GR-93": {}, "GR-94": {}, "GR-A": {}, "GR-A1": {}, "GR-B": {}, + "GR-C": {}, "GR-D": {}, "GR-E": {}, "GR-F": {}, "GR-G": {}, + "GR-H": {}, "GR-I": {}, "GR-J": {}, "GR-K": {}, "GR-L": {}, + "GR-M": {}, "GT-01": {}, "GT-02": {}, "GT-03": {}, "GT-04": {}, + "GT-05": {}, "GT-06": {}, "GT-07": {}, "GT-08": {}, "GT-09": {}, + "GT-10": {}, "GT-11": {}, "GT-12": {}, "GT-13": {}, "GT-14": {}, + "GT-15": {}, "GT-16": {}, "GT-17": {}, "GT-18": {}, "GT-19": {}, + "GT-20": {}, "GT-21": {}, "GT-22": {}, "GW-BA": {}, "GW-BL": {}, + "GW-BM": {}, "GW-BS": {}, "GW-CA": {}, "GW-GA": {}, "GW-L": {}, + "GW-N": {}, "GW-OI": {}, "GW-QU": {}, "GW-S": {}, "GW-TO": {}, + "GY-BA": {}, "GY-CU": {}, "GY-DE": {}, "GY-EB": {}, "GY-ES": {}, + "GY-MA": {}, "GY-PM": {}, "GY-PT": {}, "GY-UD": {}, "GY-UT": {}, + "HN-AT": {}, "HN-CH": {}, "HN-CL": {}, "HN-CM": {}, "HN-CP": {}, + "HN-CR": {}, "HN-EP": {}, "HN-FM": {}, "HN-GD": {}, "HN-IB": {}, + "HN-IN": {}, "HN-LE": {}, "HN-LP": {}, "HN-OC": {}, "HN-OL": {}, + "HN-SB": {}, "HN-VA": {}, "HN-YO": {}, "HR-01": {}, "HR-02": {}, + "HR-03": {}, "HR-04": {}, "HR-05": {}, "HR-06": {}, "HR-07": {}, + "HR-08": {}, "HR-09": {}, "HR-10": {}, "HR-11": {}, "HR-12": {}, + "HR-13": {}, "HR-14": {}, "HR-15": {}, "HR-16": {}, "HR-17": {}, + "HR-18": {}, "HR-19": {}, "HR-20": {}, "HR-21": {}, "HT-AR": {}, + "HT-CE": {}, "HT-GA": {}, "HT-ND": {}, "HT-NE": {}, "HT-NO": {}, "HT-NI": {}, + "HT-OU": {}, "HT-SD": {}, "HT-SE": {}, "HU-BA": {}, "HU-BC": {}, + "HU-BE": {}, "HU-BK": {}, "HU-BU": {}, "HU-BZ": {}, "HU-CS": {}, + "HU-DE": {}, "HU-DU": {}, "HU-EG": {}, "HU-ER": {}, "HU-FE": {}, + "HU-GS": {}, "HU-GY": {}, "HU-HB": {}, "HU-HE": {}, "HU-HV": {}, + "HU-JN": {}, "HU-KE": {}, "HU-KM": {}, "HU-KV": {}, "HU-MI": {}, + "HU-NK": {}, "HU-NO": {}, "HU-NY": {}, "HU-PE": {}, "HU-PS": {}, + "HU-SD": {}, "HU-SF": {}, "HU-SH": {}, "HU-SK": {}, "HU-SN": {}, + "HU-SO": {}, "HU-SS": {}, "HU-ST": {}, "HU-SZ": {}, "HU-TB": {}, + "HU-TO": {}, "HU-VA": {}, "HU-VE": {}, "HU-VM": {}, "HU-ZA": {}, + "HU-ZE": {}, "ID-AC": {}, "ID-BA": {}, "ID-BB": {}, "ID-BE": {}, + "ID-BT": {}, "ID-GO": {}, "ID-IJ": {}, "ID-JA": {}, "ID-JB": {}, + "ID-JI": {}, "ID-JK": {}, "ID-JT": {}, "ID-JW": {}, "ID-KA": {}, + "ID-KB": {}, "ID-KI": {}, "ID-KU": {}, "ID-KR": {}, "ID-KS": {}, + "ID-KT": {}, "ID-LA": {}, "ID-MA": {}, "ID-ML": {}, "ID-MU": {}, + "ID-NB": {}, "ID-NT": {}, "ID-NU": {}, "ID-PA": {}, "ID-PB": {}, + "ID-PE": {}, "ID-PP": {}, "ID-PS": {}, "ID-PT": {}, "ID-RI": {}, + "ID-SA": {}, "ID-SB": {}, "ID-SG": {}, "ID-SL": {}, "ID-SM": {}, + "ID-SN": {}, "ID-SR": {}, "ID-SS": {}, "ID-ST": {}, "ID-SU": {}, + "ID-YO": {}, "IE-C": {}, "IE-CE": {}, "IE-CN": {}, "IE-CO": {}, + "IE-CW": {}, "IE-D": {}, "IE-DL": {}, "IE-G": {}, "IE-KE": {}, + "IE-KK": {}, "IE-KY": {}, "IE-L": {}, "IE-LD": {}, "IE-LH": {}, + "IE-LK": {}, "IE-LM": {}, "IE-LS": {}, "IE-M": {}, "IE-MH": {}, + "IE-MN": {}, "IE-MO": {}, "IE-OY": {}, "IE-RN": {}, "IE-SO": {}, + "IE-TA": {}, "IE-U": {}, "IE-WD": {}, "IE-WH": {}, "IE-WW": {}, + "IE-WX": {}, "IL-D": {}, "IL-HA": {}, "IL-JM": {}, "IL-M": {}, + "IL-TA": {}, "IL-Z": {}, "IN-AN": {}, "IN-AP": {}, "IN-AR": {}, + "IN-AS": {}, "IN-BR": {}, "IN-CH": {}, "IN-CT": {}, "IN-DH": {}, + "IN-DL": {}, "IN-DN": {}, "IN-GA": {}, "IN-GJ": {}, "IN-HP": {}, + "IN-HR": {}, "IN-JH": {}, "IN-JK": {}, "IN-KA": {}, "IN-KL": {}, + "IN-LD": {}, "IN-MH": {}, "IN-ML": {}, "IN-MN": {}, "IN-MP": {}, + "IN-MZ": {}, "IN-NL": {}, "IN-TG": {}, "IN-OR": {}, "IN-PB": {}, "IN-PY": {}, + "IN-RJ": {}, "IN-SK": {}, "IN-TN": {}, "IN-TR": {}, "IN-UP": {}, + "IN-UT": {}, "IN-WB": {}, "IQ-AN": {}, "IQ-AR": {}, "IQ-BA": {}, + "IQ-BB": {}, "IQ-BG": {}, "IQ-DA": {}, "IQ-DI": {}, "IQ-DQ": {}, + "IQ-KA": {}, "IQ-KI": {}, "IQ-MA": {}, "IQ-MU": {}, "IQ-NA": {}, "IQ-NI": {}, + "IQ-QA": {}, "IQ-SD": {}, "IQ-SW": {}, "IQ-SU": {}, "IQ-TS": {}, "IQ-WA": {}, + "IR-00": {}, "IR-01": {}, "IR-02": {}, "IR-03": {}, "IR-04": {}, "IR-05": {}, + "IR-06": {}, "IR-07": {}, "IR-08": {}, "IR-09": {}, "IR-10": {}, "IR-11": {}, + "IR-12": {}, "IR-13": {}, "IR-14": {}, "IR-15": {}, "IR-16": {}, + "IR-17": {}, "IR-18": {}, "IR-19": {}, "IR-20": {}, "IR-21": {}, + "IR-22": {}, "IR-23": {}, "IR-24": {}, "IR-25": {}, "IR-26": {}, + "IR-27": {}, "IR-28": {}, "IR-29": {}, "IR-30": {}, "IR-31": {}, + "IS-0": {}, "IS-1": {}, "IS-2": {}, "IS-3": {}, "IS-4": {}, + "IS-5": {}, "IS-6": {}, "IS-7": {}, "IS-8": {}, "IT-21": {}, + "IT-23": {}, "IT-25": {}, "IT-32": {}, "IT-34": {}, "IT-36": {}, + "IT-42": {}, "IT-45": {}, "IT-52": {}, "IT-55": {}, "IT-57": {}, + "IT-62": {}, "IT-65": {}, "IT-67": {}, "IT-72": {}, "IT-75": {}, + "IT-77": {}, "IT-78": {}, "IT-82": {}, "IT-88": {}, "IT-AG": {}, + "IT-AL": {}, "IT-AN": {}, "IT-AO": {}, "IT-AP": {}, "IT-AQ": {}, + "IT-AR": {}, "IT-AT": {}, "IT-AV": {}, "IT-BA": {}, "IT-BG": {}, + "IT-BI": {}, "IT-BL": {}, "IT-BN": {}, "IT-BO": {}, "IT-BR": {}, + "IT-BS": {}, "IT-BT": {}, "IT-BZ": {}, "IT-CA": {}, "IT-CB": {}, + "IT-CE": {}, "IT-CH": {}, "IT-CI": {}, "IT-CL": {}, "IT-CN": {}, + "IT-CO": {}, "IT-CR": {}, "IT-CS": {}, "IT-CT": {}, "IT-CZ": {}, + "IT-EN": {}, "IT-FC": {}, "IT-FE": {}, "IT-FG": {}, "IT-FI": {}, + "IT-FM": {}, "IT-FR": {}, "IT-GE": {}, "IT-GO": {}, "IT-GR": {}, + "IT-IM": {}, "IT-IS": {}, "IT-KR": {}, "IT-LC": {}, "IT-LE": {}, + "IT-LI": {}, "IT-LO": {}, "IT-LT": {}, "IT-LU": {}, "IT-MB": {}, + "IT-MC": {}, "IT-ME": {}, "IT-MI": {}, "IT-MN": {}, "IT-MO": {}, + "IT-MS": {}, "IT-MT": {}, "IT-NA": {}, "IT-NO": {}, "IT-NU": {}, + "IT-OG": {}, "IT-OR": {}, "IT-OT": {}, "IT-PA": {}, "IT-PC": {}, + "IT-PD": {}, "IT-PE": {}, "IT-PG": {}, "IT-PI": {}, "IT-PN": {}, + "IT-PO": {}, "IT-PR": {}, "IT-PT": {}, "IT-PU": {}, "IT-PV": {}, + "IT-PZ": {}, "IT-RA": {}, "IT-RC": {}, "IT-RE": {}, "IT-RG": {}, + "IT-RI": {}, "IT-RM": {}, "IT-RN": {}, "IT-RO": {}, "IT-SA": {}, + "IT-SI": {}, "IT-SO": {}, "IT-SP": {}, "IT-SR": {}, "IT-SS": {}, + "IT-SV": {}, "IT-TA": {}, "IT-TE": {}, "IT-TN": {}, "IT-TO": {}, + "IT-TP": {}, "IT-TR": {}, "IT-TS": {}, "IT-TV": {}, "IT-UD": {}, + "IT-VA": {}, "IT-VB": {}, "IT-VC": {}, "IT-VE": {}, "IT-VI": {}, + "IT-VR": {}, "IT-VS": {}, "IT-VT": {}, "IT-VV": {}, "JM-01": {}, + "JM-02": {}, "JM-03": {}, "JM-04": {}, "JM-05": {}, "JM-06": {}, + "JM-07": {}, "JM-08": {}, "JM-09": {}, "JM-10": {}, "JM-11": {}, + "JM-12": {}, "JM-13": {}, "JM-14": {}, "JO-AJ": {}, "JO-AM": {}, + "JO-AQ": {}, "JO-AT": {}, "JO-AZ": {}, "JO-BA": {}, "JO-IR": {}, + "JO-JA": {}, "JO-KA": {}, "JO-MA": {}, "JO-MD": {}, "JO-MN": {}, + "JP-01": {}, "JP-02": {}, "JP-03": {}, "JP-04": {}, "JP-05": {}, + "JP-06": {}, "JP-07": {}, "JP-08": {}, "JP-09": {}, "JP-10": {}, + "JP-11": {}, "JP-12": {}, "JP-13": {}, "JP-14": {}, "JP-15": {}, + "JP-16": {}, "JP-17": {}, "JP-18": {}, "JP-19": {}, "JP-20": {}, + "JP-21": {}, "JP-22": {}, "JP-23": {}, "JP-24": {}, "JP-25": {}, + "JP-26": {}, "JP-27": {}, "JP-28": {}, "JP-29": {}, "JP-30": {}, + "JP-31": {}, "JP-32": {}, "JP-33": {}, "JP-34": {}, "JP-35": {}, + "JP-36": {}, "JP-37": {}, "JP-38": {}, "JP-39": {}, "JP-40": {}, + "JP-41": {}, "JP-42": {}, "JP-43": {}, "JP-44": {}, "JP-45": {}, + "JP-46": {}, "JP-47": {}, "KE-01": {}, "KE-02": {}, "KE-03": {}, + "KE-04": {}, "KE-05": {}, "KE-06": {}, "KE-07": {}, "KE-08": {}, + "KE-09": {}, "KE-10": {}, "KE-11": {}, "KE-12": {}, "KE-13": {}, + "KE-14": {}, "KE-15": {}, "KE-16": {}, "KE-17": {}, "KE-18": {}, + "KE-19": {}, "KE-20": {}, "KE-21": {}, "KE-22": {}, "KE-23": {}, + "KE-24": {}, "KE-25": {}, "KE-26": {}, "KE-27": {}, "KE-28": {}, + "KE-29": {}, "KE-30": {}, "KE-31": {}, "KE-32": {}, "KE-33": {}, + "KE-34": {}, "KE-35": {}, "KE-36": {}, "KE-37": {}, "KE-38": {}, + "KE-39": {}, "KE-40": {}, "KE-41": {}, "KE-42": {}, "KE-43": {}, + "KE-44": {}, "KE-45": {}, "KE-46": {}, "KE-47": {}, "KG-B": {}, + "KG-C": {}, "KG-GB": {}, "KG-GO": {}, "KG-J": {}, "KG-N": {}, "KG-O": {}, + "KG-T": {}, "KG-Y": {}, "KH-1": {}, "KH-10": {}, "KH-11": {}, + "KH-12": {}, "KH-13": {}, "KH-14": {}, "KH-15": {}, "KH-16": {}, + "KH-17": {}, "KH-18": {}, "KH-19": {}, "KH-2": {}, "KH-20": {}, + "KH-21": {}, "KH-22": {}, "KH-23": {}, "KH-24": {}, "KH-3": {}, + "KH-4": {}, "KH-5": {}, "KH-6": {}, "KH-7": {}, "KH-8": {}, + "KH-9": {}, "KI-G": {}, "KI-L": {}, "KI-P": {}, "KM-A": {}, + "KM-G": {}, "KM-M": {}, "KN-01": {}, "KN-02": {}, "KN-03": {}, + "KN-04": {}, "KN-05": {}, "KN-06": {}, "KN-07": {}, "KN-08": {}, + "KN-09": {}, "KN-10": {}, "KN-11": {}, "KN-12": {}, "KN-13": {}, + "KN-15": {}, "KN-K": {}, "KN-N": {}, "KP-01": {}, "KP-02": {}, + "KP-03": {}, "KP-04": {}, "KP-05": {}, "KP-06": {}, "KP-07": {}, + "KP-08": {}, "KP-09": {}, "KP-10": {}, "KP-13": {}, "KR-11": {}, + "KR-26": {}, "KR-27": {}, "KR-28": {}, "KR-29": {}, "KR-30": {}, + "KR-31": {}, "KR-41": {}, "KR-42": {}, "KR-43": {}, "KR-44": {}, + "KR-45": {}, "KR-46": {}, "KR-47": {}, "KR-48": {}, "KR-49": {}, + "KW-AH": {}, "KW-FA": {}, "KW-HA": {}, "KW-JA": {}, "KW-KU": {}, + "KW-MU": {}, "KZ-10": {}, "KZ-75": {}, "KZ-19": {}, "KZ-11": {}, + "KZ-15": {}, "KZ-71": {}, "KZ-23": {}, "KZ-27": {}, "KZ-47": {}, + "KZ-55": {}, "KZ-35": {}, "KZ-39": {}, "KZ-43": {}, "KZ-63": {}, + "KZ-79": {}, "KZ-59": {}, "KZ-61": {}, "KZ-62": {}, "KZ-31": {}, + "KZ-33": {}, "LA-AT": {}, "LA-BK": {}, "LA-BL": {}, + "LA-CH": {}, "LA-HO": {}, "LA-KH": {}, "LA-LM": {}, "LA-LP": {}, + "LA-OU": {}, "LA-PH": {}, "LA-SL": {}, "LA-SV": {}, "LA-VI": {}, + "LA-VT": {}, "LA-XA": {}, "LA-XE": {}, "LA-XI": {}, "LA-XS": {}, + "LB-AK": {}, "LB-AS": {}, "LB-BA": {}, "LB-BH": {}, "LB-BI": {}, + "LB-JA": {}, "LB-JL": {}, "LB-NA": {}, "LC-01": {}, "LC-02": {}, + "LC-03": {}, "LC-05": {}, "LC-06": {}, "LC-07": {}, "LC-08": {}, + "LC-10": {}, "LC-11": {}, "LI-01": {}, "LI-02": {}, + "LI-03": {}, "LI-04": {}, "LI-05": {}, "LI-06": {}, "LI-07": {}, + "LI-08": {}, "LI-09": {}, "LI-10": {}, "LI-11": {}, "LK-1": {}, + "LK-11": {}, "LK-12": {}, "LK-13": {}, "LK-2": {}, "LK-21": {}, + "LK-22": {}, "LK-23": {}, "LK-3": {}, "LK-31": {}, "LK-32": {}, + "LK-33": {}, "LK-4": {}, "LK-41": {}, "LK-42": {}, "LK-43": {}, + "LK-44": {}, "LK-45": {}, "LK-5": {}, "LK-51": {}, "LK-52": {}, + "LK-53": {}, "LK-6": {}, "LK-61": {}, "LK-62": {}, "LK-7": {}, + "LK-71": {}, "LK-72": {}, "LK-8": {}, "LK-81": {}, "LK-82": {}, + "LK-9": {}, "LK-91": {}, "LK-92": {}, "LR-BG": {}, "LR-BM": {}, + "LR-CM": {}, "LR-GB": {}, "LR-GG": {}, "LR-GK": {}, "LR-LO": {}, + "LR-MG": {}, "LR-MO": {}, "LR-MY": {}, "LR-NI": {}, "LR-RI": {}, + "LR-SI": {}, "LS-A": {}, "LS-B": {}, "LS-C": {}, "LS-D": {}, + "LS-E": {}, "LS-F": {}, "LS-G": {}, "LS-H": {}, "LS-J": {}, + "LS-K": {}, "LT-AL": {}, "LT-KL": {}, "LT-KU": {}, "LT-MR": {}, + "LT-PN": {}, "LT-SA": {}, "LT-TA": {}, "LT-TE": {}, "LT-UT": {}, + "LT-VL": {}, "LU-CA": {}, "LU-CL": {}, "LU-DI": {}, "LU-EC": {}, + "LU-ES": {}, "LU-GR": {}, "LU-LU": {}, "LU-ME": {}, "LU-RD": {}, + "LU-RM": {}, "LU-VD": {}, "LU-WI": {}, "LU-D": {}, "LU-G": {}, "LU-L": {}, + "LV-001": {}, "LV-111": {}, "LV-112": {}, "LV-113": {}, + "LV-002": {}, "LV-003": {}, "LV-004": {}, "LV-005": {}, "LV-006": {}, + "LV-007": {}, "LV-008": {}, "LV-009": {}, "LV-010": {}, "LV-011": {}, + "LV-012": {}, "LV-013": {}, "LV-014": {}, "LV-015": {}, "LV-016": {}, + "LV-017": {}, "LV-018": {}, "LV-019": {}, "LV-020": {}, "LV-021": {}, + "LV-022": {}, "LV-023": {}, "LV-024": {}, "LV-025": {}, "LV-026": {}, + "LV-027": {}, "LV-028": {}, "LV-029": {}, "LV-030": {}, "LV-031": {}, + "LV-032": {}, "LV-033": {}, "LV-034": {}, "LV-035": {}, "LV-036": {}, + "LV-037": {}, "LV-038": {}, "LV-039": {}, "LV-040": {}, "LV-041": {}, + "LV-042": {}, "LV-043": {}, "LV-044": {}, "LV-045": {}, "LV-046": {}, + "LV-047": {}, "LV-048": {}, "LV-049": {}, "LV-050": {}, "LV-051": {}, + "LV-052": {}, "LV-053": {}, "LV-054": {}, "LV-055": {}, "LV-056": {}, + "LV-057": {}, "LV-058": {}, "LV-059": {}, "LV-060": {}, "LV-061": {}, + "LV-062": {}, "LV-063": {}, "LV-064": {}, "LV-065": {}, "LV-066": {}, + "LV-067": {}, "LV-068": {}, "LV-069": {}, "LV-070": {}, "LV-071": {}, + "LV-072": {}, "LV-073": {}, "LV-074": {}, "LV-075": {}, "LV-076": {}, + "LV-077": {}, "LV-078": {}, "LV-079": {}, "LV-080": {}, "LV-081": {}, + "LV-082": {}, "LV-083": {}, "LV-084": {}, "LV-085": {}, "LV-086": {}, + "LV-087": {}, "LV-088": {}, "LV-089": {}, "LV-090": {}, "LV-091": {}, + "LV-092": {}, "LV-093": {}, "LV-094": {}, "LV-095": {}, "LV-096": {}, + "LV-097": {}, "LV-098": {}, "LV-099": {}, "LV-100": {}, "LV-101": {}, + "LV-102": {}, "LV-103": {}, "LV-104": {}, "LV-105": {}, "LV-106": {}, + "LV-107": {}, "LV-108": {}, "LV-109": {}, "LV-110": {}, "LV-DGV": {}, + "LV-JEL": {}, "LV-JKB": {}, "LV-JUR": {}, "LV-LPX": {}, "LV-REZ": {}, + "LV-RIX": {}, "LV-VEN": {}, "LV-VMR": {}, "LY-BA": {}, "LY-BU": {}, + "LY-DR": {}, "LY-GT": {}, "LY-JA": {}, "LY-JB": {}, "LY-JG": {}, + "LY-JI": {}, "LY-JU": {}, "LY-KF": {}, "LY-MB": {}, "LY-MI": {}, + "LY-MJ": {}, "LY-MQ": {}, "LY-NL": {}, "LY-NQ": {}, "LY-SB": {}, + "LY-SR": {}, "LY-TB": {}, "LY-WA": {}, "LY-WD": {}, "LY-WS": {}, + "LY-ZA": {}, "MA-01": {}, "MA-02": {}, "MA-03": {}, "MA-04": {}, + "MA-05": {}, "MA-06": {}, "MA-07": {}, "MA-08": {}, "MA-09": {}, + "MA-10": {}, "MA-11": {}, "MA-12": {}, "MA-13": {}, "MA-14": {}, + "MA-15": {}, "MA-16": {}, "MA-AGD": {}, "MA-AOU": {}, "MA-ASZ": {}, + "MA-AZI": {}, "MA-BEM": {}, "MA-BER": {}, "MA-BES": {}, "MA-BOD": {}, + "MA-BOM": {}, "MA-CAS": {}, "MA-CHE": {}, "MA-CHI": {}, "MA-CHT": {}, + "MA-ERR": {}, "MA-ESI": {}, "MA-ESM": {}, "MA-FAH": {}, "MA-FES": {}, + "MA-FIG": {}, "MA-GUE": {}, "MA-HAJ": {}, "MA-HAO": {}, "MA-HOC": {}, + "MA-IFR": {}, "MA-INE": {}, "MA-JDI": {}, "MA-JRA": {}, "MA-KEN": {}, + "MA-KES": {}, "MA-KHE": {}, "MA-KHN": {}, "MA-KHO": {}, "MA-LAA": {}, + "MA-LAR": {}, "MA-MED": {}, "MA-MEK": {}, "MA-MMD": {}, "MA-MMN": {}, + "MA-MOH": {}, "MA-MOU": {}, "MA-NAD": {}, "MA-NOU": {}, "MA-OUA": {}, + "MA-OUD": {}, "MA-OUJ": {}, "MA-RAB": {}, "MA-SAF": {}, "MA-SAL": {}, + "MA-SEF": {}, "MA-SET": {}, "MA-SIK": {}, "MA-SKH": {}, "MA-SYB": {}, + "MA-TAI": {}, "MA-TAO": {}, "MA-TAR": {}, "MA-TAT": {}, "MA-TAZ": {}, + "MA-TET": {}, "MA-TIZ": {}, "MA-TNG": {}, "MA-TNT": {}, "MA-ZAG": {}, + "MC-CL": {}, "MC-CO": {}, "MC-FO": {}, "MC-GA": {}, "MC-JE": {}, + "MC-LA": {}, "MC-MA": {}, "MC-MC": {}, "MC-MG": {}, "MC-MO": {}, + "MC-MU": {}, "MC-PH": {}, "MC-SD": {}, "MC-SO": {}, "MC-SP": {}, + "MC-SR": {}, "MC-VR": {}, "MD-AN": {}, "MD-BA": {}, "MD-BD": {}, + "MD-BR": {}, "MD-BS": {}, "MD-CA": {}, "MD-CL": {}, "MD-CM": {}, + "MD-CR": {}, "MD-CS": {}, "MD-CT": {}, "MD-CU": {}, "MD-DO": {}, + "MD-DR": {}, "MD-DU": {}, "MD-ED": {}, "MD-FA": {}, "MD-FL": {}, + "MD-GA": {}, "MD-GL": {}, "MD-HI": {}, "MD-IA": {}, "MD-LE": {}, + "MD-NI": {}, "MD-OC": {}, "MD-OR": {}, "MD-RE": {}, "MD-RI": {}, + "MD-SD": {}, "MD-SI": {}, "MD-SN": {}, "MD-SO": {}, "MD-ST": {}, + "MD-SV": {}, "MD-TA": {}, "MD-TE": {}, "MD-UN": {}, "ME-01": {}, + "ME-02": {}, "ME-03": {}, "ME-04": {}, "ME-05": {}, "ME-06": {}, + "ME-07": {}, "ME-08": {}, "ME-09": {}, "ME-10": {}, "ME-11": {}, + "ME-12": {}, "ME-13": {}, "ME-14": {}, "ME-15": {}, "ME-16": {}, + "ME-17": {}, "ME-18": {}, "ME-19": {}, "ME-20": {}, "ME-21": {}, "ME-24": {}, + "MG-A": {}, "MG-D": {}, "MG-F": {}, "MG-M": {}, "MG-T": {}, + "MG-U": {}, "MH-ALK": {}, "MH-ALL": {}, "MH-ARN": {}, "MH-AUR": {}, + "MH-EBO": {}, "MH-ENI": {}, "MH-JAB": {}, "MH-JAL": {}, "MH-KIL": {}, + "MH-KWA": {}, "MH-L": {}, "MH-LAE": {}, "MH-LIB": {}, "MH-LIK": {}, + "MH-MAJ": {}, "MH-MAL": {}, "MH-MEJ": {}, "MH-MIL": {}, "MH-NMK": {}, + "MH-NMU": {}, "MH-RON": {}, "MH-T": {}, "MH-UJA": {}, "MH-UTI": {}, + "MH-WTJ": {}, "MH-WTN": {}, "MK-101": {}, "MK-102": {}, "MK-103": {}, + "MK-104": {}, "MK-105": {}, + "MK-106": {}, "MK-107": {}, "MK-108": {}, "MK-109": {}, "MK-201": {}, + "MK-202": {}, "MK-205": {}, "MK-206": {}, "MK-207": {}, "MK-208": {}, + "MK-209": {}, "MK-210": {}, "MK-211": {}, "MK-301": {}, "MK-303": {}, + "MK-307": {}, "MK-308": {}, "MK-310": {}, "MK-311": {}, "MK-312": {}, + "MK-401": {}, "MK-402": {}, "MK-403": {}, "MK-404": {}, "MK-405": {}, + "MK-406": {}, "MK-408": {}, "MK-409": {}, "MK-410": {}, "MK-501": {}, + "MK-502": {}, "MK-503": {}, "MK-505": {}, "MK-506": {}, "MK-507": {}, + "MK-508": {}, "MK-509": {}, "MK-601": {}, "MK-602": {}, "MK-604": {}, + "MK-605": {}, "MK-606": {}, "MK-607": {}, "MK-608": {}, "MK-609": {}, + "MK-701": {}, "MK-702": {}, "MK-703": {}, "MK-704": {}, "MK-705": {}, + "MK-803": {}, "MK-804": {}, "MK-806": {}, "MK-807": {}, "MK-809": {}, + "MK-810": {}, "MK-811": {}, "MK-812": {}, "MK-813": {}, "MK-814": {}, + "MK-816": {}, "ML-1": {}, "ML-2": {}, "ML-3": {}, "ML-4": {}, + "ML-5": {}, "ML-6": {}, "ML-7": {}, "ML-8": {}, "ML-BKO": {}, + "MM-01": {}, "MM-02": {}, "MM-03": {}, "MM-04": {}, "MM-05": {}, + "MM-06": {}, "MM-07": {}, "MM-11": {}, "MM-12": {}, "MM-13": {}, + "MM-14": {}, "MM-15": {}, "MM-16": {}, "MM-17": {}, "MM-18": {}, "MN-035": {}, + "MN-037": {}, "MN-039": {}, "MN-041": {}, "MN-043": {}, "MN-046": {}, + "MN-047": {}, "MN-049": {}, "MN-051": {}, "MN-053": {}, "MN-055": {}, + "MN-057": {}, "MN-059": {}, "MN-061": {}, "MN-063": {}, "MN-064": {}, + "MN-065": {}, "MN-067": {}, "MN-069": {}, "MN-071": {}, "MN-073": {}, + "MN-1": {}, "MR-01": {}, "MR-02": {}, "MR-03": {}, "MR-04": {}, + "MR-05": {}, "MR-06": {}, "MR-07": {}, "MR-08": {}, "MR-09": {}, + "MR-10": {}, "MR-11": {}, "MR-12": {}, "MR-13": {}, "MR-NKC": {}, "MT-01": {}, + "MT-02": {}, "MT-03": {}, "MT-04": {}, "MT-05": {}, "MT-06": {}, + "MT-07": {}, "MT-08": {}, "MT-09": {}, "MT-10": {}, "MT-11": {}, + "MT-12": {}, "MT-13": {}, "MT-14": {}, "MT-15": {}, "MT-16": {}, + "MT-17": {}, "MT-18": {}, "MT-19": {}, "MT-20": {}, "MT-21": {}, + "MT-22": {}, "MT-23": {}, "MT-24": {}, "MT-25": {}, "MT-26": {}, + "MT-27": {}, "MT-28": {}, "MT-29": {}, "MT-30": {}, "MT-31": {}, + "MT-32": {}, "MT-33": {}, "MT-34": {}, "MT-35": {}, "MT-36": {}, + "MT-37": {}, "MT-38": {}, "MT-39": {}, "MT-40": {}, "MT-41": {}, + "MT-42": {}, "MT-43": {}, "MT-44": {}, "MT-45": {}, "MT-46": {}, + "MT-47": {}, "MT-48": {}, "MT-49": {}, "MT-50": {}, "MT-51": {}, + "MT-52": {}, "MT-53": {}, "MT-54": {}, "MT-55": {}, "MT-56": {}, + "MT-57": {}, "MT-58": {}, "MT-59": {}, "MT-60": {}, "MT-61": {}, + "MT-62": {}, "MT-63": {}, "MT-64": {}, "MT-65": {}, "MT-66": {}, + "MT-67": {}, "MT-68": {}, "MU-AG": {}, "MU-BL": {}, "MU-BR": {}, + "MU-CC": {}, "MU-CU": {}, "MU-FL": {}, "MU-GP": {}, "MU-MO": {}, + "MU-PA": {}, "MU-PL": {}, "MU-PU": {}, "MU-PW": {}, "MU-QB": {}, + "MU-RO": {}, "MU-RP": {}, "MU-RR": {}, "MU-SA": {}, "MU-VP": {}, "MV-00": {}, + "MV-01": {}, "MV-02": {}, "MV-03": {}, "MV-04": {}, "MV-05": {}, + "MV-07": {}, "MV-08": {}, "MV-12": {}, "MV-13": {}, "MV-14": {}, + "MV-17": {}, "MV-20": {}, "MV-23": {}, "MV-24": {}, "MV-25": {}, + "MV-26": {}, "MV-27": {}, "MV-28": {}, "MV-29": {}, "MV-CE": {}, + "MV-MLE": {}, "MV-NC": {}, "MV-NO": {}, "MV-SC": {}, "MV-SU": {}, + "MV-UN": {}, "MV-US": {}, "MW-BA": {}, "MW-BL": {}, "MW-C": {}, + "MW-CK": {}, "MW-CR": {}, "MW-CT": {}, "MW-DE": {}, "MW-DO": {}, + "MW-KR": {}, "MW-KS": {}, "MW-LI": {}, "MW-LK": {}, "MW-MC": {}, + "MW-MG": {}, "MW-MH": {}, "MW-MU": {}, "MW-MW": {}, "MW-MZ": {}, + "MW-N": {}, "MW-NB": {}, "MW-NE": {}, "MW-NI": {}, "MW-NK": {}, + "MW-NS": {}, "MW-NU": {}, "MW-PH": {}, "MW-RU": {}, "MW-S": {}, + "MW-SA": {}, "MW-TH": {}, "MW-ZO": {}, "MX-AGU": {}, "MX-BCN": {}, + "MX-BCS": {}, "MX-CAM": {}, "MX-CHH": {}, "MX-CHP": {}, "MX-COA": {}, + "MX-COL": {}, "MX-CMX": {}, "MX-DIF": {}, "MX-DUR": {}, "MX-GRO": {}, "MX-GUA": {}, + "MX-HID": {}, "MX-JAL": {}, "MX-MEX": {}, "MX-MIC": {}, "MX-MOR": {}, + "MX-NAY": {}, "MX-NLE": {}, "MX-OAX": {}, "MX-PUE": {}, "MX-QUE": {}, + "MX-ROO": {}, "MX-SIN": {}, "MX-SLP": {}, "MX-SON": {}, "MX-TAB": {}, + "MX-TAM": {}, "MX-TLA": {}, "MX-VER": {}, "MX-YUC": {}, "MX-ZAC": {}, + "MY-01": {}, "MY-02": {}, "MY-03": {}, "MY-04": {}, "MY-05": {}, + "MY-06": {}, "MY-07": {}, "MY-08": {}, "MY-09": {}, "MY-10": {}, + "MY-11": {}, "MY-12": {}, "MY-13": {}, "MY-14": {}, "MY-15": {}, + "MY-16": {}, "MZ-A": {}, "MZ-B": {}, "MZ-G": {}, "MZ-I": {}, + "MZ-L": {}, "MZ-MPM": {}, "MZ-N": {}, "MZ-P": {}, "MZ-Q": {}, + "MZ-S": {}, "MZ-T": {}, "NA-CA": {}, "NA-ER": {}, "NA-HA": {}, + "NA-KA": {}, "NA-KE": {}, "NA-KH": {}, "NA-KU": {}, "NA-KW": {}, "NA-OD": {}, "NA-OH": {}, + "NA-OK": {}, "NA-ON": {}, "NA-OS": {}, "NA-OT": {}, "NA-OW": {}, + "NE-1": {}, "NE-2": {}, "NE-3": {}, "NE-4": {}, "NE-5": {}, + "NE-6": {}, "NE-7": {}, "NE-8": {}, "NG-AB": {}, "NG-AD": {}, + "NG-AK": {}, "NG-AN": {}, "NG-BA": {}, "NG-BE": {}, "NG-BO": {}, + "NG-BY": {}, "NG-CR": {}, "NG-DE": {}, "NG-EB": {}, "NG-ED": {}, + "NG-EK": {}, "NG-EN": {}, "NG-FC": {}, "NG-GO": {}, "NG-IM": {}, + "NG-JI": {}, "NG-KD": {}, "NG-KE": {}, "NG-KN": {}, "NG-KO": {}, + "NG-KT": {}, "NG-KW": {}, "NG-LA": {}, "NG-NA": {}, "NG-NI": {}, + "NG-OG": {}, "NG-ON": {}, "NG-OS": {}, "NG-OY": {}, "NG-PL": {}, + "NG-RI": {}, "NG-SO": {}, "NG-TA": {}, "NG-YO": {}, "NG-ZA": {}, + "NI-AN": {}, "NI-AS": {}, "NI-BO": {}, "NI-CA": {}, "NI-CI": {}, + "NI-CO": {}, "NI-ES": {}, "NI-GR": {}, "NI-JI": {}, "NI-LE": {}, + "NI-MD": {}, "NI-MN": {}, "NI-MS": {}, "NI-MT": {}, "NI-NS": {}, + "NI-RI": {}, "NI-SJ": {}, "NL-AW": {}, "NL-BQ1": {}, "NL-BQ2": {}, + "NL-BQ3": {}, "NL-CW": {}, "NL-DR": {}, "NL-FL": {}, "NL-FR": {}, + "NL-GE": {}, "NL-GR": {}, "NL-LI": {}, "NL-NB": {}, "NL-NH": {}, + "NL-OV": {}, "NL-SX": {}, "NL-UT": {}, "NL-ZE": {}, "NL-ZH": {}, + "NO-03": {}, "NO-11": {}, "NO-15": {}, "NO-16": {}, "NO-17": {}, + "NO-18": {}, "NO-21": {}, "NO-30": {}, "NO-34": {}, "NO-38": {}, + "NO-42": {}, "NO-46": {}, "NO-50": {}, "NO-54": {}, + "NO-22": {}, "NP-1": {}, "NP-2": {}, "NP-3": {}, "NP-4": {}, + "NP-5": {}, "NP-BA": {}, "NP-BH": {}, "NP-DH": {}, "NP-GA": {}, + "NP-JA": {}, "NP-KA": {}, "NP-KO": {}, "NP-LU": {}, "NP-MA": {}, + "NP-ME": {}, "NP-NA": {}, "NP-RA": {}, "NP-SA": {}, "NP-SE": {}, + "NR-01": {}, "NR-02": {}, "NR-03": {}, "NR-04": {}, "NR-05": {}, + "NR-06": {}, "NR-07": {}, "NR-08": {}, "NR-09": {}, "NR-10": {}, + "NR-11": {}, "NR-12": {}, "NR-13": {}, "NR-14": {}, "NZ-AUK": {}, + "NZ-BOP": {}, "NZ-CAN": {}, "NZ-CIT": {}, "NZ-GIS": {}, "NZ-HKB": {}, + "NZ-MBH": {}, "NZ-MWT": {}, "NZ-N": {}, "NZ-NSN": {}, "NZ-NTL": {}, + "NZ-OTA": {}, "NZ-S": {}, "NZ-STL": {}, "NZ-TAS": {}, "NZ-TKI": {}, + "NZ-WGN": {}, "NZ-WKO": {}, "NZ-WTC": {}, "OM-BA": {}, "OM-BS": {}, "OM-BU": {}, "OM-BJ": {}, + "OM-DA": {}, "OM-MA": {}, "OM-MU": {}, "OM-SH": {}, "OM-SJ": {}, "OM-SS": {}, "OM-WU": {}, + "OM-ZA": {}, "OM-ZU": {}, "PA-1": {}, "PA-2": {}, "PA-3": {}, + "PA-4": {}, "PA-5": {}, "PA-6": {}, "PA-7": {}, "PA-8": {}, + "PA-9": {}, "PA-EM": {}, "PA-KY": {}, "PA-NB": {}, "PE-AMA": {}, + "PE-ANC": {}, "PE-APU": {}, "PE-ARE": {}, "PE-AYA": {}, "PE-CAJ": {}, + "PE-CAL": {}, "PE-CUS": {}, "PE-HUC": {}, "PE-HUV": {}, "PE-ICA": {}, + "PE-JUN": {}, "PE-LAL": {}, "PE-LAM": {}, "PE-LIM": {}, "PE-LMA": {}, + "PE-LOR": {}, "PE-MDD": {}, "PE-MOQ": {}, "PE-PAS": {}, "PE-PIU": {}, + "PE-PUN": {}, "PE-SAM": {}, "PE-TAC": {}, "PE-TUM": {}, "PE-UCA": {}, + "PG-CPK": {}, "PG-CPM": {}, "PG-EBR": {}, "PG-EHG": {}, "PG-EPW": {}, + "PG-ESW": {}, "PG-GPK": {}, "PG-MBA": {}, "PG-MPL": {}, "PG-MPM": {}, + "PG-MRL": {}, "PG-NCD": {}, "PG-NIK": {}, "PG-NPP": {}, "PG-NSB": {}, + "PG-SAN": {}, "PG-SHM": {}, "PG-WBK": {}, "PG-WHM": {}, "PG-WPD": {}, + "PH-00": {}, "PH-01": {}, "PH-02": {}, "PH-03": {}, "PH-05": {}, + "PH-06": {}, "PH-07": {}, "PH-08": {}, "PH-09": {}, "PH-10": {}, + "PH-11": {}, "PH-12": {}, "PH-13": {}, "PH-14": {}, "PH-15": {}, + "PH-40": {}, "PH-41": {}, "PH-ABR": {}, "PH-AGN": {}, "PH-AGS": {}, + "PH-AKL": {}, "PH-ALB": {}, "PH-ANT": {}, "PH-APA": {}, "PH-AUR": {}, + "PH-BAN": {}, "PH-BAS": {}, "PH-BEN": {}, "PH-BIL": {}, "PH-BOH": {}, + "PH-BTG": {}, "PH-BTN": {}, "PH-BUK": {}, "PH-BUL": {}, "PH-CAG": {}, + "PH-CAM": {}, "PH-CAN": {}, "PH-CAP": {}, "PH-CAS": {}, "PH-CAT": {}, + "PH-CAV": {}, "PH-CEB": {}, "PH-COM": {}, "PH-DAO": {}, "PH-DAS": {}, + "PH-DAV": {}, "PH-DIN": {}, "PH-EAS": {}, "PH-GUI": {}, "PH-IFU": {}, + "PH-ILI": {}, "PH-ILN": {}, "PH-ILS": {}, "PH-ISA": {}, "PH-KAL": {}, + "PH-LAG": {}, "PH-LAN": {}, "PH-LAS": {}, "PH-LEY": {}, "PH-LUN": {}, + "PH-MAD": {}, "PH-MAG": {}, "PH-MAS": {}, "PH-MDC": {}, "PH-MDR": {}, + "PH-MOU": {}, "PH-MSC": {}, "PH-MSR": {}, "PH-NCO": {}, "PH-NEC": {}, + "PH-NER": {}, "PH-NSA": {}, "PH-NUE": {}, "PH-NUV": {}, "PH-PAM": {}, + "PH-PAN": {}, "PH-PLW": {}, "PH-QUE": {}, "PH-QUI": {}, "PH-RIZ": {}, + "PH-ROM": {}, "PH-SAR": {}, "PH-SCO": {}, "PH-SIG": {}, "PH-SLE": {}, + "PH-SLU": {}, "PH-SOR": {}, "PH-SUK": {}, "PH-SUN": {}, "PH-SUR": {}, + "PH-TAR": {}, "PH-TAW": {}, "PH-WSA": {}, "PH-ZAN": {}, "PH-ZAS": {}, + "PH-ZMB": {}, "PH-ZSI": {}, "PK-BA": {}, "PK-GB": {}, "PK-IS": {}, + "PK-JK": {}, "PK-KP": {}, "PK-PB": {}, "PK-SD": {}, "PK-TA": {}, + "PL-02": {}, "PL-04": {}, "PL-06": {}, "PL-08": {}, "PL-10": {}, + "PL-12": {}, "PL-14": {}, "PL-16": {}, "PL-18": {}, "PL-20": {}, + "PL-22": {}, "PL-24": {}, "PL-26": {}, "PL-28": {}, "PL-30": {}, "PL-32": {}, + "PS-BTH": {}, "PS-DEB": {}, "PS-GZA": {}, "PS-HBN": {}, + "PS-JEM": {}, "PS-JEN": {}, "PS-JRH": {}, "PS-KYS": {}, "PS-NBS": {}, + "PS-NGZ": {}, "PS-QQA": {}, "PS-RBH": {}, "PS-RFH": {}, "PS-SLT": {}, + "PS-TBS": {}, "PS-TKM": {}, "PT-01": {}, "PT-02": {}, "PT-03": {}, + "PT-04": {}, "PT-05": {}, "PT-06": {}, "PT-07": {}, "PT-08": {}, + "PT-09": {}, "PT-10": {}, "PT-11": {}, "PT-12": {}, "PT-13": {}, + "PT-14": {}, "PT-15": {}, "PT-16": {}, "PT-17": {}, "PT-18": {}, + "PT-20": {}, "PT-30": {}, "PW-002": {}, "PW-004": {}, "PW-010": {}, + "PW-050": {}, "PW-100": {}, "PW-150": {}, "PW-212": {}, "PW-214": {}, + "PW-218": {}, "PW-222": {}, "PW-224": {}, "PW-226": {}, "PW-227": {}, + "PW-228": {}, "PW-350": {}, "PW-370": {}, "PY-1": {}, "PY-10": {}, + "PY-11": {}, "PY-12": {}, "PY-13": {}, "PY-14": {}, "PY-15": {}, + "PY-16": {}, "PY-19": {}, "PY-2": {}, "PY-3": {}, "PY-4": {}, + "PY-5": {}, "PY-6": {}, "PY-7": {}, "PY-8": {}, "PY-9": {}, + "PY-ASU": {}, "QA-DA": {}, "QA-KH": {}, "QA-MS": {}, "QA-RA": {}, + "QA-US": {}, "QA-WA": {}, "QA-ZA": {}, "RO-AB": {}, "RO-AG": {}, + "RO-AR": {}, "RO-B": {}, "RO-BC": {}, "RO-BH": {}, "RO-BN": {}, + "RO-BR": {}, "RO-BT": {}, "RO-BV": {}, "RO-BZ": {}, "RO-CJ": {}, + "RO-CL": {}, "RO-CS": {}, "RO-CT": {}, "RO-CV": {}, "RO-DB": {}, + "RO-DJ": {}, "RO-GJ": {}, "RO-GL": {}, "RO-GR": {}, "RO-HD": {}, + "RO-HR": {}, "RO-IF": {}, "RO-IL": {}, "RO-IS": {}, "RO-MH": {}, + "RO-MM": {}, "RO-MS": {}, "RO-NT": {}, "RO-OT": {}, "RO-PH": {}, + "RO-SB": {}, "RO-SJ": {}, "RO-SM": {}, "RO-SV": {}, "RO-TL": {}, + "RO-TM": {}, "RO-TR": {}, "RO-VL": {}, "RO-VN": {}, "RO-VS": {}, + "RS-00": {}, "RS-01": {}, "RS-02": {}, "RS-03": {}, "RS-04": {}, + "RS-05": {}, "RS-06": {}, "RS-07": {}, "RS-08": {}, "RS-09": {}, + "RS-10": {}, "RS-11": {}, "RS-12": {}, "RS-13": {}, "RS-14": {}, + "RS-15": {}, "RS-16": {}, "RS-17": {}, "RS-18": {}, "RS-19": {}, + "RS-20": {}, "RS-21": {}, "RS-22": {}, "RS-23": {}, "RS-24": {}, + "RS-25": {}, "RS-26": {}, "RS-27": {}, "RS-28": {}, "RS-29": {}, + "RS-KM": {}, "RS-VO": {}, "RU-AD": {}, "RU-AL": {}, "RU-ALT": {}, + "RU-AMU": {}, "RU-ARK": {}, "RU-AST": {}, "RU-BA": {}, "RU-BEL": {}, + "RU-BRY": {}, "RU-BU": {}, "RU-CE": {}, "RU-CHE": {}, "RU-CHU": {}, + "RU-CU": {}, "RU-DA": {}, "RU-IN": {}, "RU-IRK": {}, "RU-IVA": {}, + "RU-KAM": {}, "RU-KB": {}, "RU-KC": {}, "RU-KDA": {}, "RU-KEM": {}, + "RU-KGD": {}, "RU-KGN": {}, "RU-KHA": {}, "RU-KHM": {}, "RU-KIR": {}, + "RU-KK": {}, "RU-KL": {}, "RU-KLU": {}, "RU-KO": {}, "RU-KOS": {}, + "RU-KR": {}, "RU-KRS": {}, "RU-KYA": {}, "RU-LEN": {}, "RU-LIP": {}, + "RU-MAG": {}, "RU-ME": {}, "RU-MO": {}, "RU-MOS": {}, "RU-MOW": {}, + "RU-MUR": {}, "RU-NEN": {}, "RU-NGR": {}, "RU-NIZ": {}, "RU-NVS": {}, + "RU-OMS": {}, "RU-ORE": {}, "RU-ORL": {}, "RU-PER": {}, "RU-PNZ": {}, + "RU-PRI": {}, "RU-PSK": {}, "RU-ROS": {}, "RU-RYA": {}, "RU-SA": {}, + "RU-SAK": {}, "RU-SAM": {}, "RU-SAR": {}, "RU-SE": {}, "RU-SMO": {}, + "RU-SPE": {}, "RU-STA": {}, "RU-SVE": {}, "RU-TA": {}, "RU-TAM": {}, + "RU-TOM": {}, "RU-TUL": {}, "RU-TVE": {}, "RU-TY": {}, "RU-TYU": {}, + "RU-UD": {}, "RU-ULY": {}, "RU-VGG": {}, "RU-VLA": {}, "RU-VLG": {}, + "RU-VOR": {}, "RU-YAN": {}, "RU-YAR": {}, "RU-YEV": {}, "RU-ZAB": {}, + "RW-01": {}, "RW-02": {}, "RW-03": {}, "RW-04": {}, "RW-05": {}, + "SA-01": {}, "SA-02": {}, "SA-03": {}, "SA-04": {}, "SA-05": {}, + "SA-06": {}, "SA-07": {}, "SA-08": {}, "SA-09": {}, "SA-10": {}, + "SA-11": {}, "SA-12": {}, "SA-14": {}, "SB-CE": {}, "SB-CH": {}, + "SB-CT": {}, "SB-GU": {}, "SB-IS": {}, "SB-MK": {}, "SB-ML": {}, + "SB-RB": {}, "SB-TE": {}, "SB-WE": {}, "SC-01": {}, "SC-02": {}, + "SC-03": {}, "SC-04": {}, "SC-05": {}, "SC-06": {}, "SC-07": {}, + "SC-08": {}, "SC-09": {}, "SC-10": {}, "SC-11": {}, "SC-12": {}, + "SC-13": {}, "SC-14": {}, "SC-15": {}, "SC-16": {}, "SC-17": {}, + "SC-18": {}, "SC-19": {}, "SC-20": {}, "SC-21": {}, "SC-22": {}, + "SC-23": {}, "SC-24": {}, "SC-25": {}, "SD-DC": {}, "SD-DE": {}, + "SD-DN": {}, "SD-DS": {}, "SD-DW": {}, "SD-GD": {}, "SD-GK": {}, "SD-GZ": {}, + "SD-KA": {}, "SD-KH": {}, "SD-KN": {}, "SD-KS": {}, "SD-NB": {}, + "SD-NO": {}, "SD-NR": {}, "SD-NW": {}, "SD-RS": {}, "SD-SI": {}, + "SE-AB": {}, "SE-AC": {}, "SE-BD": {}, "SE-C": {}, "SE-D": {}, + "SE-E": {}, "SE-F": {}, "SE-G": {}, "SE-H": {}, "SE-I": {}, + "SE-K": {}, "SE-M": {}, "SE-N": {}, "SE-O": {}, "SE-S": {}, + "SE-T": {}, "SE-U": {}, "SE-W": {}, "SE-X": {}, "SE-Y": {}, + "SE-Z": {}, "SG-01": {}, "SG-02": {}, "SG-03": {}, "SG-04": {}, + "SG-05": {}, "SH-AC": {}, "SH-HL": {}, "SH-TA": {}, "SI-001": {}, + "SI-002": {}, "SI-003": {}, "SI-004": {}, "SI-005": {}, "SI-006": {}, + "SI-007": {}, "SI-008": {}, "SI-009": {}, "SI-010": {}, "SI-011": {}, + "SI-012": {}, "SI-013": {}, "SI-014": {}, "SI-015": {}, "SI-016": {}, + "SI-017": {}, "SI-018": {}, "SI-019": {}, "SI-020": {}, "SI-021": {}, + "SI-022": {}, "SI-023": {}, "SI-024": {}, "SI-025": {}, "SI-026": {}, + "SI-027": {}, "SI-028": {}, "SI-029": {}, "SI-030": {}, "SI-031": {}, + "SI-032": {}, "SI-033": {}, "SI-034": {}, "SI-035": {}, "SI-036": {}, + "SI-037": {}, "SI-038": {}, "SI-039": {}, "SI-040": {}, "SI-041": {}, + "SI-042": {}, "SI-043": {}, "SI-044": {}, "SI-045": {}, "SI-046": {}, + "SI-047": {}, "SI-048": {}, "SI-049": {}, "SI-050": {}, "SI-051": {}, + "SI-052": {}, "SI-053": {}, "SI-054": {}, "SI-055": {}, "SI-056": {}, + "SI-057": {}, "SI-058": {}, "SI-059": {}, "SI-060": {}, "SI-061": {}, + "SI-062": {}, "SI-063": {}, "SI-064": {}, "SI-065": {}, "SI-066": {}, + "SI-067": {}, "SI-068": {}, "SI-069": {}, "SI-070": {}, "SI-071": {}, + "SI-072": {}, "SI-073": {}, "SI-074": {}, "SI-075": {}, "SI-076": {}, + "SI-077": {}, "SI-078": {}, "SI-079": {}, "SI-080": {}, "SI-081": {}, + "SI-082": {}, "SI-083": {}, "SI-084": {}, "SI-085": {}, "SI-086": {}, + "SI-087": {}, "SI-088": {}, "SI-089": {}, "SI-090": {}, "SI-091": {}, + "SI-092": {}, "SI-093": {}, "SI-094": {}, "SI-095": {}, "SI-096": {}, + "SI-097": {}, "SI-098": {}, "SI-099": {}, "SI-100": {}, "SI-101": {}, + "SI-102": {}, "SI-103": {}, "SI-104": {}, "SI-105": {}, "SI-106": {}, + "SI-107": {}, "SI-108": {}, "SI-109": {}, "SI-110": {}, "SI-111": {}, + "SI-112": {}, "SI-113": {}, "SI-114": {}, "SI-115": {}, "SI-116": {}, + "SI-117": {}, "SI-118": {}, "SI-119": {}, "SI-120": {}, "SI-121": {}, + "SI-122": {}, "SI-123": {}, "SI-124": {}, "SI-125": {}, "SI-126": {}, + "SI-127": {}, "SI-128": {}, "SI-129": {}, "SI-130": {}, "SI-131": {}, + "SI-132": {}, "SI-133": {}, "SI-134": {}, "SI-135": {}, "SI-136": {}, + "SI-137": {}, "SI-138": {}, "SI-139": {}, "SI-140": {}, "SI-141": {}, + "SI-142": {}, "SI-143": {}, "SI-144": {}, "SI-146": {}, "SI-147": {}, + "SI-148": {}, "SI-149": {}, "SI-150": {}, "SI-151": {}, "SI-152": {}, + "SI-153": {}, "SI-154": {}, "SI-155": {}, "SI-156": {}, "SI-157": {}, + "SI-158": {}, "SI-159": {}, "SI-160": {}, "SI-161": {}, "SI-162": {}, + "SI-163": {}, "SI-164": {}, "SI-165": {}, "SI-166": {}, "SI-167": {}, + "SI-168": {}, "SI-169": {}, "SI-170": {}, "SI-171": {}, "SI-172": {}, + "SI-173": {}, "SI-174": {}, "SI-175": {}, "SI-176": {}, "SI-177": {}, + "SI-178": {}, "SI-179": {}, "SI-180": {}, "SI-181": {}, "SI-182": {}, + "SI-183": {}, "SI-184": {}, "SI-185": {}, "SI-186": {}, "SI-187": {}, + "SI-188": {}, "SI-189": {}, "SI-190": {}, "SI-191": {}, "SI-192": {}, + "SI-193": {}, "SI-194": {}, "SI-195": {}, "SI-196": {}, "SI-197": {}, + "SI-198": {}, "SI-199": {}, "SI-200": {}, "SI-201": {}, "SI-202": {}, + "SI-203": {}, "SI-204": {}, "SI-205": {}, "SI-206": {}, "SI-207": {}, + "SI-208": {}, "SI-209": {}, "SI-210": {}, "SI-211": {}, "SI-212": {}, "SI-213": {}, "SK-BC": {}, + "SK-BL": {}, "SK-KI": {}, "SK-NI": {}, "SK-PV": {}, "SK-TA": {}, + "SK-TC": {}, "SK-ZI": {}, "SL-E": {}, "SL-N": {}, "SL-S": {}, + "SL-W": {}, "SM-01": {}, "SM-02": {}, "SM-03": {}, "SM-04": {}, + "SM-05": {}, "SM-06": {}, "SM-07": {}, "SM-08": {}, "SM-09": {}, + "SN-DB": {}, "SN-DK": {}, "SN-FK": {}, "SN-KA": {}, "SN-KD": {}, + "SN-KE": {}, "SN-KL": {}, "SN-LG": {}, "SN-MT": {}, "SN-SE": {}, + "SN-SL": {}, "SN-TC": {}, "SN-TH": {}, "SN-ZG": {}, "SO-AW": {}, + "SO-BK": {}, "SO-BN": {}, "SO-BR": {}, "SO-BY": {}, "SO-GA": {}, + "SO-GE": {}, "SO-HI": {}, "SO-JD": {}, "SO-JH": {}, "SO-MU": {}, + "SO-NU": {}, "SO-SA": {}, "SO-SD": {}, "SO-SH": {}, "SO-SO": {}, + "SO-TO": {}, "SO-WO": {}, "SR-BR": {}, "SR-CM": {}, "SR-CR": {}, + "SR-MA": {}, "SR-NI": {}, "SR-PM": {}, "SR-PR": {}, "SR-SA": {}, + "SR-SI": {}, "SR-WA": {}, "SS-BN": {}, "SS-BW": {}, "SS-EC": {}, + "SS-EE8": {}, "SS-EE": {}, "SS-EW": {}, "SS-JG": {}, "SS-LK": {}, "SS-NU": {}, + "SS-UY": {}, "SS-WR": {}, "ST-01": {}, "ST-P": {}, "ST-S": {}, "SV-AH": {}, + "SV-CA": {}, "SV-CH": {}, "SV-CU": {}, "SV-LI": {}, "SV-MO": {}, + "SV-PA": {}, "SV-SA": {}, "SV-SM": {}, "SV-SO": {}, "SV-SS": {}, + "SV-SV": {}, "SV-UN": {}, "SV-US": {}, "SY-DI": {}, "SY-DR": {}, + "SY-DY": {}, "SY-HA": {}, "SY-HI": {}, "SY-HL": {}, "SY-HM": {}, + "SY-ID": {}, "SY-LA": {}, "SY-QU": {}, "SY-RA": {}, "SY-RD": {}, + "SY-SU": {}, "SY-TA": {}, "SZ-HH": {}, "SZ-LU": {}, "SZ-MA": {}, + "SZ-SH": {}, "TD-BA": {}, "TD-BG": {}, "TD-BO": {}, "TD-CB": {}, + "TD-EN": {}, "TD-GR": {}, "TD-HL": {}, "TD-KA": {}, "TD-LC": {}, + "TD-LO": {}, "TD-LR": {}, "TD-MA": {}, "TD-MC": {}, "TD-ME": {}, + "TD-MO": {}, "TD-ND": {}, "TD-OD": {}, "TD-SA": {}, "TD-SI": {}, + "TD-TA": {}, "TD-TI": {}, "TD-WF": {}, "TG-C": {}, "TG-K": {}, + "TG-M": {}, "TG-P": {}, "TG-S": {}, "TH-10": {}, "TH-11": {}, + "TH-12": {}, "TH-13": {}, "TH-14": {}, "TH-15": {}, "TH-16": {}, + "TH-17": {}, "TH-18": {}, "TH-19": {}, "TH-20": {}, "TH-21": {}, + "TH-22": {}, "TH-23": {}, "TH-24": {}, "TH-25": {}, "TH-26": {}, + "TH-27": {}, "TH-30": {}, "TH-31": {}, "TH-32": {}, "TH-33": {}, + "TH-34": {}, "TH-35": {}, "TH-36": {}, "TH-37": {}, "TH-38": {}, "TH-39": {}, + "TH-40": {}, "TH-41": {}, "TH-42": {}, "TH-43": {}, "TH-44": {}, + "TH-45": {}, "TH-46": {}, "TH-47": {}, "TH-48": {}, "TH-49": {}, + "TH-50": {}, "TH-51": {}, "TH-52": {}, "TH-53": {}, "TH-54": {}, + "TH-55": {}, "TH-56": {}, "TH-57": {}, "TH-58": {}, "TH-60": {}, + "TH-61": {}, "TH-62": {}, "TH-63": {}, "TH-64": {}, "TH-65": {}, + "TH-66": {}, "TH-67": {}, "TH-70": {}, "TH-71": {}, "TH-72": {}, + "TH-73": {}, "TH-74": {}, "TH-75": {}, "TH-76": {}, "TH-77": {}, + "TH-80": {}, "TH-81": {}, "TH-82": {}, "TH-83": {}, "TH-84": {}, + "TH-85": {}, "TH-86": {}, "TH-90": {}, "TH-91": {}, "TH-92": {}, + "TH-93": {}, "TH-94": {}, "TH-95": {}, "TH-96": {}, "TH-S": {}, + "TJ-GB": {}, "TJ-KT": {}, "TJ-SU": {}, "TJ-DU": {}, "TJ-RA": {}, "TL-AL": {}, "TL-AN": {}, + "TL-BA": {}, "TL-BO": {}, "TL-CO": {}, "TL-DI": {}, "TL-ER": {}, + "TL-LA": {}, "TL-LI": {}, "TL-MF": {}, "TL-MT": {}, "TL-OE": {}, + "TL-VI": {}, "TM-A": {}, "TM-B": {}, "TM-D": {}, "TM-L": {}, + "TM-M": {}, "TM-S": {}, "TN-11": {}, "TN-12": {}, "TN-13": {}, + "TN-14": {}, "TN-21": {}, "TN-22": {}, "TN-23": {}, "TN-31": {}, + "TN-32": {}, "TN-33": {}, "TN-34": {}, "TN-41": {}, "TN-42": {}, + "TN-43": {}, "TN-51": {}, "TN-52": {}, "TN-53": {}, "TN-61": {}, + "TN-71": {}, "TN-72": {}, "TN-73": {}, "TN-81": {}, "TN-82": {}, + "TN-83": {}, "TO-01": {}, "TO-02": {}, "TO-03": {}, "TO-04": {}, + "TO-05": {}, "TR-01": {}, "TR-02": {}, "TR-03": {}, "TR-04": {}, + "TR-05": {}, "TR-06": {}, "TR-07": {}, "TR-08": {}, "TR-09": {}, + "TR-10": {}, "TR-11": {}, "TR-12": {}, "TR-13": {}, "TR-14": {}, + "TR-15": {}, "TR-16": {}, "TR-17": {}, "TR-18": {}, "TR-19": {}, + "TR-20": {}, "TR-21": {}, "TR-22": {}, "TR-23": {}, "TR-24": {}, + "TR-25": {}, "TR-26": {}, "TR-27": {}, "TR-28": {}, "TR-29": {}, + "TR-30": {}, "TR-31": {}, "TR-32": {}, "TR-33": {}, "TR-34": {}, + "TR-35": {}, "TR-36": {}, "TR-37": {}, "TR-38": {}, "TR-39": {}, + "TR-40": {}, "TR-41": {}, "TR-42": {}, "TR-43": {}, "TR-44": {}, + "TR-45": {}, "TR-46": {}, "TR-47": {}, "TR-48": {}, "TR-49": {}, + "TR-50": {}, "TR-51": {}, "TR-52": {}, "TR-53": {}, "TR-54": {}, + "TR-55": {}, "TR-56": {}, "TR-57": {}, "TR-58": {}, "TR-59": {}, + "TR-60": {}, "TR-61": {}, "TR-62": {}, "TR-63": {}, "TR-64": {}, + "TR-65": {}, "TR-66": {}, "TR-67": {}, "TR-68": {}, "TR-69": {}, + "TR-70": {}, "TR-71": {}, "TR-72": {}, "TR-73": {}, "TR-74": {}, + "TR-75": {}, "TR-76": {}, "TR-77": {}, "TR-78": {}, "TR-79": {}, + "TR-80": {}, "TR-81": {}, "TT-ARI": {}, "TT-CHA": {}, "TT-CTT": {}, + "TT-DMN": {}, "TT-ETO": {}, "TT-MRC": {}, "TT-TOB": {}, "TT-PED": {}, "TT-POS": {}, "TT-PRT": {}, + "TT-PTF": {}, "TT-RCM": {}, "TT-SFO": {}, "TT-SGE": {}, "TT-SIP": {}, + "TT-SJL": {}, "TT-TUP": {}, "TT-WTO": {}, "TV-FUN": {}, "TV-NIT": {}, + "TV-NKF": {}, "TV-NKL": {}, "TV-NMA": {}, "TV-NMG": {}, "TV-NUI": {}, + "TV-VAI": {}, "TW-CHA": {}, "TW-CYI": {}, "TW-CYQ": {}, "TW-KIN": {}, "TW-HSQ": {}, + "TW-HSZ": {}, "TW-HUA": {}, "TW-LIE": {}, "TW-ILA": {}, "TW-KEE": {}, "TW-KHH": {}, + "TW-KHQ": {}, "TW-MIA": {}, "TW-NAN": {}, "TW-NWT": {}, "TW-PEN": {}, "TW-PIF": {}, + "TW-TAO": {}, "TW-TNN": {}, "TW-TNQ": {}, "TW-TPE": {}, "TW-TPQ": {}, + "TW-TTT": {}, "TW-TXG": {}, "TW-TXQ": {}, "TW-YUN": {}, "TZ-01": {}, + "TZ-02": {}, "TZ-03": {}, "TZ-04": {}, "TZ-05": {}, "TZ-06": {}, + "TZ-07": {}, "TZ-08": {}, "TZ-09": {}, "TZ-10": {}, "TZ-11": {}, + "TZ-12": {}, "TZ-13": {}, "TZ-14": {}, "TZ-15": {}, "TZ-16": {}, + "TZ-17": {}, "TZ-18": {}, "TZ-19": {}, "TZ-20": {}, "TZ-21": {}, + "TZ-22": {}, "TZ-23": {}, "TZ-24": {}, "TZ-25": {}, "TZ-26": {}, "TZ-27": {}, "TZ-28": {}, "TZ-29": {}, "TZ-30": {}, "TZ-31": {}, + "UA-05": {}, "UA-07": {}, "UA-09": {}, "UA-12": {}, "UA-14": {}, + "UA-18": {}, "UA-21": {}, "UA-23": {}, "UA-26": {}, "UA-30": {}, + "UA-32": {}, "UA-35": {}, "UA-40": {}, "UA-43": {}, "UA-46": {}, + "UA-48": {}, "UA-51": {}, "UA-53": {}, "UA-56": {}, "UA-59": {}, + "UA-61": {}, "UA-63": {}, "UA-65": {}, "UA-68": {}, "UA-71": {}, + "UA-74": {}, "UA-77": {}, "UG-101": {}, "UG-102": {}, "UG-103": {}, + "UG-104": {}, "UG-105": {}, "UG-106": {}, "UG-107": {}, "UG-108": {}, + "UG-109": {}, "UG-110": {}, "UG-111": {}, "UG-112": {}, "UG-113": {}, + "UG-114": {}, "UG-115": {}, "UG-116": {}, "UG-201": {}, "UG-202": {}, + "UG-203": {}, "UG-204": {}, "UG-205": {}, "UG-206": {}, "UG-207": {}, + "UG-208": {}, "UG-209": {}, "UG-210": {}, "UG-211": {}, "UG-212": {}, + "UG-213": {}, "UG-214": {}, "UG-215": {}, "UG-216": {}, "UG-217": {}, + "UG-218": {}, "UG-219": {}, "UG-220": {}, "UG-221": {}, "UG-222": {}, + "UG-223": {}, "UG-224": {}, "UG-301": {}, "UG-302": {}, "UG-303": {}, + "UG-304": {}, "UG-305": {}, "UG-306": {}, "UG-307": {}, "UG-308": {}, + "UG-309": {}, "UG-310": {}, "UG-311": {}, "UG-312": {}, "UG-313": {}, + "UG-314": {}, "UG-315": {}, "UG-316": {}, "UG-317": {}, "UG-318": {}, + "UG-319": {}, "UG-320": {}, "UG-321": {}, "UG-401": {}, "UG-402": {}, + "UG-403": {}, "UG-404": {}, "UG-405": {}, "UG-406": {}, "UG-407": {}, + "UG-408": {}, "UG-409": {}, "UG-410": {}, "UG-411": {}, "UG-412": {}, + "UG-413": {}, "UG-414": {}, "UG-415": {}, "UG-416": {}, "UG-417": {}, + "UG-418": {}, "UG-419": {}, "UG-C": {}, "UG-E": {}, "UG-N": {}, + "UG-W": {}, "UG-322": {}, "UG-323": {}, "UG-420": {}, "UG-117": {}, + "UG-118": {}, "UG-225": {}, "UG-120": {}, "UG-226": {}, + "UG-121": {}, "UG-122": {}, "UG-227": {}, "UG-421": {}, + "UG-325": {}, "UG-228": {}, "UG-123": {}, "UG-422": {}, + "UG-326": {}, "UG-229": {}, "UG-124": {}, "UG-423": {}, + "UG-230": {}, "UG-327": {}, "UG-424": {}, "UG-328": {}, + "UG-425": {}, "UG-426": {}, "UG-330": {}, + "UM-67": {}, "UM-71": {}, "UM-76": {}, "UM-79": {}, + "UM-81": {}, "UM-84": {}, "UM-86": {}, "UM-89": {}, "UM-95": {}, + "US-AK": {}, "US-AL": {}, "US-AR": {}, "US-AS": {}, "US-AZ": {}, + "US-CA": {}, "US-CO": {}, "US-CT": {}, "US-DC": {}, "US-DE": {}, + "US-FL": {}, "US-GA": {}, "US-GU": {}, "US-HI": {}, "US-IA": {}, + "US-ID": {}, "US-IL": {}, "US-IN": {}, "US-KS": {}, "US-KY": {}, + "US-LA": {}, "US-MA": {}, "US-MD": {}, "US-ME": {}, "US-MI": {}, + "US-MN": {}, "US-MO": {}, "US-MP": {}, "US-MS": {}, "US-MT": {}, + "US-NC": {}, "US-ND": {}, "US-NE": {}, "US-NH": {}, "US-NJ": {}, + "US-NM": {}, "US-NV": {}, "US-NY": {}, "US-OH": {}, "US-OK": {}, + "US-OR": {}, "US-PA": {}, "US-PR": {}, "US-RI": {}, "US-SC": {}, + "US-SD": {}, "US-TN": {}, "US-TX": {}, "US-UM": {}, "US-UT": {}, + "US-VA": {}, "US-VI": {}, "US-VT": {}, "US-WA": {}, "US-WI": {}, + "US-WV": {}, "US-WY": {}, "UY-AR": {}, "UY-CA": {}, "UY-CL": {}, + "UY-CO": {}, "UY-DU": {}, "UY-FD": {}, "UY-FS": {}, "UY-LA": {}, + "UY-MA": {}, "UY-MO": {}, "UY-PA": {}, "UY-RN": {}, "UY-RO": {}, + "UY-RV": {}, "UY-SA": {}, "UY-SJ": {}, "UY-SO": {}, "UY-TA": {}, + "UY-TT": {}, "UZ-AN": {}, "UZ-BU": {}, "UZ-FA": {}, "UZ-JI": {}, + "UZ-NG": {}, "UZ-NW": {}, "UZ-QA": {}, "UZ-QR": {}, "UZ-SA": {}, + "UZ-SI": {}, "UZ-SU": {}, "UZ-TK": {}, "UZ-TO": {}, "UZ-XO": {}, + "VC-01": {}, "VC-02": {}, "VC-03": {}, "VC-04": {}, "VC-05": {}, + "VC-06": {}, "VE-A": {}, "VE-B": {}, "VE-C": {}, "VE-D": {}, + "VE-E": {}, "VE-F": {}, "VE-G": {}, "VE-H": {}, "VE-I": {}, + "VE-J": {}, "VE-K": {}, "VE-L": {}, "VE-M": {}, "VE-N": {}, + "VE-O": {}, "VE-P": {}, "VE-R": {}, "VE-S": {}, "VE-T": {}, + "VE-U": {}, "VE-V": {}, "VE-W": {}, "VE-X": {}, "VE-Y": {}, + "VE-Z": {}, "VN-01": {}, "VN-02": {}, "VN-03": {}, "VN-04": {}, + "VN-05": {}, "VN-06": {}, "VN-07": {}, "VN-09": {}, "VN-13": {}, + "VN-14": {}, "VN-15": {}, "VN-18": {}, "VN-20": {}, "VN-21": {}, + "VN-22": {}, "VN-23": {}, "VN-24": {}, "VN-25": {}, "VN-26": {}, + "VN-27": {}, "VN-28": {}, "VN-29": {}, "VN-30": {}, "VN-31": {}, + "VN-32": {}, "VN-33": {}, "VN-34": {}, "VN-35": {}, "VN-36": {}, + "VN-37": {}, "VN-39": {}, "VN-40": {}, "VN-41": {}, "VN-43": {}, + "VN-44": {}, "VN-45": {}, "VN-46": {}, "VN-47": {}, "VN-49": {}, + "VN-50": {}, "VN-51": {}, "VN-52": {}, "VN-53": {}, "VN-54": {}, + "VN-55": {}, "VN-56": {}, "VN-57": {}, "VN-58": {}, "VN-59": {}, + "VN-61": {}, "VN-63": {}, "VN-66": {}, "VN-67": {}, "VN-68": {}, + "VN-69": {}, "VN-70": {}, "VN-71": {}, "VN-72": {}, "VN-73": {}, + "VN-CT": {}, "VN-DN": {}, "VN-HN": {}, "VN-HP": {}, "VN-SG": {}, + "VU-MAP": {}, "VU-PAM": {}, "VU-SAM": {}, "VU-SEE": {}, "VU-TAE": {}, + "VU-TOB": {}, "WF-SG": {}, "WF-UV": {}, "WS-AA": {}, "WS-AL": {}, "WS-AT": {}, "WS-FA": {}, + "WS-GE": {}, "WS-GI": {}, "WS-PA": {}, "WS-SA": {}, "WS-TU": {}, + "WS-VF": {}, "WS-VS": {}, "YE-AB": {}, "YE-AD": {}, "YE-AM": {}, + "YE-BA": {}, "YE-DA": {}, "YE-DH": {}, "YE-HD": {}, "YE-HJ": {}, "YE-HU": {}, + "YE-IB": {}, "YE-JA": {}, "YE-LA": {}, "YE-MA": {}, "YE-MR": {}, + "YE-MU": {}, "YE-MW": {}, "YE-RA": {}, "YE-SA": {}, "YE-SD": {}, "YE-SH": {}, + "YE-SN": {}, "YE-TA": {}, "ZA-EC": {}, "ZA-FS": {}, "ZA-GP": {}, + "ZA-LP": {}, "ZA-MP": {}, "ZA-NC": {}, "ZA-NW": {}, "ZA-WC": {}, + "ZA-ZN": {}, "ZA-KZN": {}, "ZM-01": {}, "ZM-02": {}, "ZM-03": {}, "ZM-04": {}, + "ZM-05": {}, "ZM-06": {}, "ZM-07": {}, "ZM-08": {}, "ZM-09": {}, "ZM-10": {}, + "ZW-BU": {}, "ZW-HA": {}, "ZW-MA": {}, "ZW-MC": {}, "ZW-ME": {}, + "ZW-MI": {}, "ZW-MN": {}, "ZW-MS": {}, "ZW-MV": {}, "ZW-MW": {}, } diff --git a/vendor/github.com/go-playground/validator/v10/currency_codes.go b/vendor/github.com/go-playground/validator/v10/currency_codes.go index a5cd9b18..d0317f89 100644 --- a/vendor/github.com/go-playground/validator/v10/currency_codes.go +++ b/vendor/github.com/go-playground/validator/v10/currency_codes.go @@ -1,79 +1,79 @@ package validator -var iso4217 = map[string]bool{ - "AFN": true, "EUR": true, "ALL": true, "DZD": true, "USD": true, - "AOA": true, "XCD": true, "ARS": true, "AMD": true, "AWG": true, - "AUD": true, "AZN": true, "BSD": true, "BHD": true, "BDT": true, - "BBD": true, "BYN": true, "BZD": true, "XOF": true, "BMD": true, - "INR": true, "BTN": true, "BOB": true, "BOV": true, "BAM": true, - "BWP": true, "NOK": true, "BRL": true, "BND": true, "BGN": true, - "BIF": true, "CVE": true, "KHR": true, "XAF": true, "CAD": true, - "KYD": true, "CLP": true, "CLF": true, "CNY": true, "COP": true, - "COU": true, "KMF": true, "CDF": true, "NZD": true, "CRC": true, - "HRK": true, "CUP": true, "CUC": true, "ANG": true, "CZK": true, - "DKK": true, "DJF": true, "DOP": true, "EGP": true, "SVC": true, - "ERN": true, "SZL": true, "ETB": true, "FKP": true, "FJD": true, - "XPF": true, "GMD": true, "GEL": true, "GHS": true, "GIP": true, - "GTQ": true, "GBP": true, "GNF": true, "GYD": true, "HTG": true, - "HNL": true, "HKD": true, "HUF": true, "ISK": true, "IDR": true, - "XDR": true, "IRR": true, "IQD": true, "ILS": true, "JMD": true, - "JPY": true, "JOD": true, "KZT": true, "KES": true, "KPW": true, - "KRW": true, "KWD": true, "KGS": true, "LAK": true, "LBP": true, - "LSL": true, "ZAR": true, "LRD": true, "LYD": true, "CHF": true, - "MOP": true, "MKD": true, "MGA": true, "MWK": true, "MYR": true, - "MVR": true, "MRU": true, "MUR": true, "XUA": true, "MXN": true, - "MXV": true, "MDL": true, "MNT": true, "MAD": true, "MZN": true, - "MMK": true, "NAD": true, "NPR": true, "NIO": true, "NGN": true, - "OMR": true, "PKR": true, "PAB": true, "PGK": true, "PYG": true, - "PEN": true, "PHP": true, "PLN": true, "QAR": true, "RON": true, - "RUB": true, "RWF": true, "SHP": true, "WST": true, "STN": true, - "SAR": true, "RSD": true, "SCR": true, "SLL": true, "SGD": true, - "XSU": true, "SBD": true, "SOS": true, "SSP": true, "LKR": true, - "SDG": true, "SRD": true, "SEK": true, "CHE": true, "CHW": true, - "SYP": true, "TWD": true, "TJS": true, "TZS": true, "THB": true, - "TOP": true, "TTD": true, "TND": true, "TRY": true, "TMT": true, - "UGX": true, "UAH": true, "AED": true, "USN": true, "UYU": true, - "UYI": true, "UYW": true, "UZS": true, "VUV": true, "VES": true, - "VND": true, "YER": true, "ZMW": true, "ZWL": true, "XBA": true, - "XBB": true, "XBC": true, "XBD": true, "XTS": true, "XXX": true, - "XAU": true, "XPD": true, "XPT": true, "XAG": true, +var iso4217 = map[string]struct{}{ + "AFN": {}, "EUR": {}, "ALL": {}, "DZD": {}, "USD": {}, + "AOA": {}, "XCD": {}, "ARS": {}, "AMD": {}, "AWG": {}, + "AUD": {}, "AZN": {}, "BSD": {}, "BHD": {}, "BDT": {}, + "BBD": {}, "BYN": {}, "BZD": {}, "XOF": {}, "BMD": {}, + "INR": {}, "BTN": {}, "BOB": {}, "BOV": {}, "BAM": {}, + "BWP": {}, "NOK": {}, "BRL": {}, "BND": {}, "BGN": {}, + "BIF": {}, "CVE": {}, "KHR": {}, "XAF": {}, "CAD": {}, + "KYD": {}, "CLP": {}, "CLF": {}, "CNY": {}, "COP": {}, + "COU": {}, "KMF": {}, "CDF": {}, "NZD": {}, "CRC": {}, + "HRK": {}, "CUP": {}, "CUC": {}, "ANG": {}, "CZK": {}, + "DKK": {}, "DJF": {}, "DOP": {}, "EGP": {}, "SVC": {}, + "ERN": {}, "SZL": {}, "ETB": {}, "FKP": {}, "FJD": {}, + "XPF": {}, "GMD": {}, "GEL": {}, "GHS": {}, "GIP": {}, + "GTQ": {}, "GBP": {}, "GNF": {}, "GYD": {}, "HTG": {}, + "HNL": {}, "HKD": {}, "HUF": {}, "ISK": {}, "IDR": {}, + "XDR": {}, "IRR": {}, "IQD": {}, "ILS": {}, "JMD": {}, + "JPY": {}, "JOD": {}, "KZT": {}, "KES": {}, "KPW": {}, + "KRW": {}, "KWD": {}, "KGS": {}, "LAK": {}, "LBP": {}, + "LSL": {}, "ZAR": {}, "LRD": {}, "LYD": {}, "CHF": {}, + "MOP": {}, "MKD": {}, "MGA": {}, "MWK": {}, "MYR": {}, + "MVR": {}, "MRU": {}, "MUR": {}, "XUA": {}, "MXN": {}, + "MXV": {}, "MDL": {}, "MNT": {}, "MAD": {}, "MZN": {}, + "MMK": {}, "NAD": {}, "NPR": {}, "NIO": {}, "NGN": {}, + "OMR": {}, "PKR": {}, "PAB": {}, "PGK": {}, "PYG": {}, + "PEN": {}, "PHP": {}, "PLN": {}, "QAR": {}, "RON": {}, + "RUB": {}, "RWF": {}, "SHP": {}, "WST": {}, "STN": {}, + "SAR": {}, "RSD": {}, "SCR": {}, "SLL": {}, "SGD": {}, + "XSU": {}, "SBD": {}, "SOS": {}, "SSP": {}, "LKR": {}, + "SDG": {}, "SRD": {}, "SEK": {}, "CHE": {}, "CHW": {}, + "SYP": {}, "TWD": {}, "TJS": {}, "TZS": {}, "THB": {}, + "TOP": {}, "TTD": {}, "TND": {}, "TRY": {}, "TMT": {}, + "UGX": {}, "UAH": {}, "AED": {}, "USN": {}, "UYU": {}, + "UYI": {}, "UYW": {}, "UZS": {}, "VUV": {}, "VES": {}, + "VND": {}, "YER": {}, "ZMW": {}, "ZWL": {}, "XBA": {}, + "XBB": {}, "XBC": {}, "XBD": {}, "XTS": {}, "XXX": {}, + "XAU": {}, "XPD": {}, "XPT": {}, "XAG": {}, } -var iso4217_numeric = map[int]bool{ - 8: true, 12: true, 32: true, 36: true, 44: true, - 48: true, 50: true, 51: true, 52: true, 60: true, - 64: true, 68: true, 72: true, 84: true, 90: true, - 96: true, 104: true, 108: true, 116: true, 124: true, - 132: true, 136: true, 144: true, 152: true, 156: true, - 170: true, 174: true, 188: true, 191: true, 192: true, - 203: true, 208: true, 214: true, 222: true, 230: true, - 232: true, 238: true, 242: true, 262: true, 270: true, - 292: true, 320: true, 324: true, 328: true, 332: true, - 340: true, 344: true, 348: true, 352: true, 356: true, - 360: true, 364: true, 368: true, 376: true, 388: true, - 392: true, 398: true, 400: true, 404: true, 408: true, - 410: true, 414: true, 417: true, 418: true, 422: true, - 426: true, 430: true, 434: true, 446: true, 454: true, - 458: true, 462: true, 480: true, 484: true, 496: true, - 498: true, 504: true, 512: true, 516: true, 524: true, - 532: true, 533: true, 548: true, 554: true, 558: true, - 566: true, 578: true, 586: true, 590: true, 598: true, - 600: true, 604: true, 608: true, 634: true, 643: true, - 646: true, 654: true, 682: true, 690: true, 694: true, - 702: true, 704: true, 706: true, 710: true, 728: true, - 748: true, 752: true, 756: true, 760: true, 764: true, - 776: true, 780: true, 784: true, 788: true, 800: true, - 807: true, 818: true, 826: true, 834: true, 840: true, - 858: true, 860: true, 882: true, 886: true, 901: true, - 927: true, 928: true, 929: true, 930: true, 931: true, - 932: true, 933: true, 934: true, 936: true, 938: true, - 940: true, 941: true, 943: true, 944: true, 946: true, - 947: true, 948: true, 949: true, 950: true, 951: true, - 952: true, 953: true, 955: true, 956: true, 957: true, - 958: true, 959: true, 960: true, 961: true, 962: true, - 963: true, 964: true, 965: true, 967: true, 968: true, - 969: true, 970: true, 971: true, 972: true, 973: true, - 975: true, 976: true, 977: true, 978: true, 979: true, - 980: true, 981: true, 984: true, 985: true, 986: true, - 990: true, 994: true, 997: true, 999: true, +var iso4217_numeric = map[int]struct{}{ + 8: {}, 12: {}, 32: {}, 36: {}, 44: {}, + 48: {}, 50: {}, 51: {}, 52: {}, 60: {}, + 64: {}, 68: {}, 72: {}, 84: {}, 90: {}, + 96: {}, 104: {}, 108: {}, 116: {}, 124: {}, + 132: {}, 136: {}, 144: {}, 152: {}, 156: {}, + 170: {}, 174: {}, 188: {}, 191: {}, 192: {}, + 203: {}, 208: {}, 214: {}, 222: {}, 230: {}, + 232: {}, 238: {}, 242: {}, 262: {}, 270: {}, + 292: {}, 320: {}, 324: {}, 328: {}, 332: {}, + 340: {}, 344: {}, 348: {}, 352: {}, 356: {}, + 360: {}, 364: {}, 368: {}, 376: {}, 388: {}, + 392: {}, 398: {}, 400: {}, 404: {}, 408: {}, + 410: {}, 414: {}, 417: {}, 418: {}, 422: {}, + 426: {}, 430: {}, 434: {}, 446: {}, 454: {}, + 458: {}, 462: {}, 480: {}, 484: {}, 496: {}, + 498: {}, 504: {}, 512: {}, 516: {}, 524: {}, + 532: {}, 533: {}, 548: {}, 554: {}, 558: {}, + 566: {}, 578: {}, 586: {}, 590: {}, 598: {}, + 600: {}, 604: {}, 608: {}, 634: {}, 643: {}, + 646: {}, 654: {}, 682: {}, 690: {}, 694: {}, + 702: {}, 704: {}, 706: {}, 710: {}, 728: {}, + 748: {}, 752: {}, 756: {}, 760: {}, 764: {}, + 776: {}, 780: {}, 784: {}, 788: {}, 800: {}, + 807: {}, 818: {}, 826: {}, 834: {}, 840: {}, + 858: {}, 860: {}, 882: {}, 886: {}, 901: {}, + 927: {}, 928: {}, 929: {}, 930: {}, 931: {}, + 932: {}, 933: {}, 934: {}, 936: {}, 938: {}, + 940: {}, 941: {}, 943: {}, 944: {}, 946: {}, + 947: {}, 948: {}, 949: {}, 950: {}, 951: {}, + 952: {}, 953: {}, 955: {}, 956: {}, 957: {}, + 958: {}, 959: {}, 960: {}, 961: {}, 962: {}, + 963: {}, 964: {}, 965: {}, 967: {}, 968: {}, + 969: {}, 970: {}, 971: {}, 972: {}, 973: {}, + 975: {}, 976: {}, 977: {}, 978: {}, 979: {}, + 980: {}, 981: {}, 984: {}, 985: {}, 986: {}, + 990: {}, 994: {}, 997: {}, 999: {}, } diff --git a/vendor/github.com/go-playground/validator/v10/doc.go b/vendor/github.com/go-playground/validator/v10/doc.go index d1eff50f..90a8ade6 100644 --- a/vendor/github.com/go-playground/validator/v10/doc.go +++ b/vendor/github.com/go-playground/validator/v10/doc.go @@ -194,6 +194,13 @@ such as min or max won't run, but if a value is set validation will run. Usage: omitempty +# Omit Nil + +Allows to skip the validation if the value is nil (same as omitempty, but +only for the nil-values). + + Usage: omitnil + # Dive This tells the validator to dive into a slice, array or map and validate that @@ -246,8 +253,8 @@ Example #2 This validates that the value is not the data types default zero value. For numbers ensures value is not zero. For strings ensures value is -not "". For slices, maps, pointers, interfaces, channels and functions -ensures the value is not nil. For structs ensures value is not the zero value. +not "". For booleans ensures value is not false. For slices, maps, pointers, interfaces, channels and functions +ensures the value is not nil. For structs ensures value is not the zero value when using WithRequiredStructEnabled. Usage: required @@ -904,11 +911,20 @@ This will accept any uri the golang request uri accepts # Urn RFC 2141 String -This validataes that a string value contains a valid URN +This validates that a string value contains a valid URN according to the RFC 2141 spec. Usage: urn_rfc2141 +# Base32 String + +This validates that a string value contains a valid bas324 value. +Although an empty string is valid base32 this will report an empty string +as an error, if you wish to accept an empty string as valid you can use +this with the omitempty tag. + + Usage: base32 + # Base64 String This validates that a string value contains a valid base64 value. @@ -950,7 +966,7 @@ Bitcoin Bech32 Address (segwit) This validates that a string value contains a valid bitcoin Bech32 address as defined by bip-0173 (https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki) -Special thanks to Pieter Wuille for providng reference implementations. +Special thanks to Pieter Wuille for providing reference implementations. Usage: btc_addr_bech32 @@ -1283,7 +1299,7 @@ may not exist at the time of validation. # HostPort This validates that a string value contains a valid DNS hostname and port that -can be used to valiate fields typically passed to sockets and connections. +can be used to validate fields typically passed to sockets and connections. Usage: hostname_port @@ -1370,11 +1386,19 @@ This validates that a string value contains a valid credit card number using Luh This validates that a string or (u)int value contains a valid checksum using the Luhn algorithm. -# MongoDb ObjectID +# MongoDB -This validates that a string is a valid 24 character hexadecimal string. +This validates that a string is a valid 24 character hexadecimal string or valid connection string. Usage: mongodb + mongodb_connection_string + +Example: + + type Test struct { + ObjectIdField string `validate:"mongodb"` + ConnectionStringField string `validate:"mongodb_connection_string"` + } # Cron diff --git a/vendor/github.com/go-playground/validator/v10/errors.go b/vendor/github.com/go-playground/validator/v10/errors.go index 5856d57c..be2676e9 100644 --- a/vendor/github.com/go-playground/validator/v10/errors.go +++ b/vendor/github.com/go-playground/validator/v10/errors.go @@ -257,15 +257,19 @@ func (fe *fieldError) Error() string { // NOTE: if no registered translation can be found, it returns the original // untranslated error message. func (fe *fieldError) Translate(ut ut.Translator) string { + var fn TranslationFunc m, ok := fe.v.transTagFunc[ut] if !ok { return fe.Error() } - fn, ok := m[fe.tag] + fn, ok = m[fe.tag] if !ok { - return fe.Error() + fn, ok = m[fe.actualTag] + if !ok { + return fe.Error() + } } return fn(ut, fe) diff --git a/vendor/github.com/go-playground/validator/v10/options.go b/vendor/github.com/go-playground/validator/v10/options.go new file mode 100644 index 00000000..86a0db21 --- /dev/null +++ b/vendor/github.com/go-playground/validator/v10/options.go @@ -0,0 +1,26 @@ +package validator + +// Option represents a configurations option to be applied to validator during initialization. +type Option func(*Validate) + +// WithRequiredStructEnabled enables required tag on non-pointer structs to be applied instead of ignored. +// +// This was made opt-in behaviour in order to maintain backward compatibility with the behaviour previous +// to being able to apply struct level validations on struct fields directly. +// +// It is recommended you enabled this as it will be the default behaviour in v11+ +func WithRequiredStructEnabled() Option { + return func(v *Validate) { + v.requiredStructEnabled = true + } +} + +// WithPrivateFieldValidation activates validation for unexported fields via the use of the `unsafe` package. +// +// By opting into this feature you are acknowledging that you are aware of the risks and accept any current or future +// consequences of using this feature. +func WithPrivateFieldValidation() Option { + return func(v *Validate) { + v.privateFieldValidation = true + } +} diff --git a/vendor/github.com/go-playground/validator/v10/postcode_regexes.go b/vendor/github.com/go-playground/validator/v10/postcode_regexes.go index e7e7b687..326b8f75 100644 --- a/vendor/github.com/go-playground/validator/v10/postcode_regexes.go +++ b/vendor/github.com/go-playground/validator/v10/postcode_regexes.go @@ -1,6 +1,9 @@ package validator -import "regexp" +import ( + "regexp" + "sync" +) var postCodePatternDict = map[string]string{ "GB": `^GIR[ ]?0AA|((AB|AL|B|BA|BB|BD|BH|BL|BN|BR|BS|BT|CA|CB|CF|CH|CM|CO|CR|CT|CV|CW|DA|DD|DE|DG|DH|DL|DN|DT|DY|E|EC|EH|EN|EX|FK|FY|G|GL|GY|GU|HA|HD|HG|HP|HR|HS|HU|HX|IG|IM|IP|IV|JE|KA|KT|KW|KY|L|LA|LD|LE|LL|LN|LS|LU|M|ME|MK|ML|N|NE|NG|NN|NP|NR|NW|OL|OX|PA|PE|PH|PL|PO|PR|RG|RH|RM|S|SA|SE|SG|SK|SL|SM|SN|SO|SP|SR|SS|ST|SW|SY|TA|TD|TF|TN|TQ|TR|TS|TW|UB|W|WA|WC|WD|WF|WN|WR|WS|WV|YO|ZE)(\d[\dA-Z]?[ ]?\d[ABD-HJLN-UW-Z]{2}))|BFPO[ ]?\d{1,4}$`, @@ -164,9 +167,12 @@ var postCodePatternDict = map[string]string{ "YT": `^976\d{2}$`, } -var postCodeRegexDict = map[string]*regexp.Regexp{} +var ( + postcodeRegexInit sync.Once + postCodeRegexDict = map[string]*regexp.Regexp{} +) -func init() { +func initPostcodes() { for countryCode, pattern := range postCodePatternDict { postCodeRegexDict[countryCode] = regexp.MustCompile(pattern) } diff --git a/vendor/github.com/go-playground/validator/v10/regexes.go b/vendor/github.com/go-playground/validator/v10/regexes.go index 6c8f9856..7e1dd5a0 100644 --- a/vendor/github.com/go-playground/validator/v10/regexes.go +++ b/vendor/github.com/go-playground/validator/v10/regexes.go @@ -1,6 +1,9 @@ package validator -import "regexp" +import ( + "regexp" + "sync" +) const ( alphaRegexString = "^[a-zA-Z]+$" @@ -17,11 +20,13 @@ const ( hslaRegexString = "^hsla\\(\\s*(?:0|[1-9]\\d?|[12]\\d\\d|3[0-5]\\d|360)\\s*,\\s*(?:(?:0|[1-9]\\d?|100)%)\\s*,\\s*(?:(?:0|[1-9]\\d?|100)%)\\s*,\\s*(?:(?:0.[1-9]*)|[01])\\s*\\)$" emailRegexString = "^(?:(?:(?:(?:[a-zA-Z]|\\d|[!#\\$%&'\\*\\+\\-\\/=\\?\\^_`{\\|}~]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])+(?:\\.([a-zA-Z]|\\d|[!#\\$%&'\\*\\+\\-\\/=\\?\\^_`{\\|}~]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])+)*)|(?:(?:\\x22)(?:(?:(?:(?:\\x20|\\x09)*(?:\\x0d\\x0a))?(?:\\x20|\\x09)+)?(?:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]|\\x21|[\\x23-\\x5b]|[\\x5d-\\x7e]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(?:(?:[\\x01-\\x09\\x0b\\x0c\\x0d-\\x7f]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}]))))*(?:(?:(?:\\x20|\\x09)*(?:\\x0d\\x0a))?(\\x20|\\x09)+)?(?:\\x22))))@(?:(?:(?:[a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(?:(?:[a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])(?:[a-zA-Z]|\\d|-|\\.|~|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])*(?:[a-zA-Z]|\\d|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])))\\.)+(?:(?:[a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])|(?:(?:[a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])(?:[a-zA-Z]|\\d|-|\\.|~|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])*(?:[a-zA-Z]|[\\x{00A0}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFEF}])))\\.?$" e164RegexString = "^\\+[1-9]?[0-9]{7,14}$" + base32RegexString = "^(?:[A-Z2-7]{8})*(?:[A-Z2-7]{2}={6}|[A-Z2-7]{4}={4}|[A-Z2-7]{5}={3}|[A-Z2-7]{7}=|[A-Z2-7]{8})$" base64RegexString = "^(?:[A-Za-z0-9+\\/]{4})*(?:[A-Za-z0-9+\\/]{2}==|[A-Za-z0-9+\\/]{3}=|[A-Za-z0-9+\\/]{4})$" base64URLRegexString = "^(?:[A-Za-z0-9-_]{4})*(?:[A-Za-z0-9-_]{2}==|[A-Za-z0-9-_]{3}=|[A-Za-z0-9-_]{4})$" base64RawURLRegexString = "^(?:[A-Za-z0-9-_]{4})*(?:[A-Za-z0-9-_]{2,4})$" iSBN10RegexString = "^(?:[0-9]{9}X|[0-9]{10})$" iSBN13RegexString = "^(?:(?:97(?:8|9))[0-9]{10})$" + iSSNRegexString = "^(?:[0-9]{4}-[0-9]{3}[0-9X])$" uUID3RegexString = "^[0-9a-f]{8}-[0-9a-f]{4}-3[0-9a-f]{3}-[0-9a-f]{4}-[0-9a-f]{12}$" uUID4RegexString = "^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" uUID5RegexString = "^[0-9a-f]{8}-[0-9a-f]{4}-5[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" @@ -30,7 +35,7 @@ const ( uUID4RFC4122RegexString = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-4[0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$" uUID5RFC4122RegexString = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-5[0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$" uUIDRFC4122RegexString = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" - uLIDRegexString = "^[A-HJKMNP-TV-Z0-9]{26}$" + uLIDRegexString = "^(?i)[A-HJKMNP-TV-Z0-9]{26}$" md4RegexString = "^[0-9a-f]{32}$" md5RegexString = "^[0-9a-f]{32}$" sha256RegexString = "^[0-9a-f]{64}$" @@ -66,78 +71,93 @@ const ( semverRegexString = `^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$` // numbered capture groups https://semver.org/ dnsRegexStringRFC1035Label = "^[a-z]([-a-z0-9]*[a-z0-9]){0,62}$" cveRegexString = `^CVE-(1999|2\d{3})-(0[^0]\d{2}|0\d[^0]\d{1}|0\d{2}[^0]|[1-9]{1}\d{3,})$` // CVE Format Id https://cve.mitre.org/cve/identifiers/syntaxchange.html - mongodbRegexString = "^[a-f\\d]{24}$" + mongodbIdRegexString = "^[a-f\\d]{24}$" + mongodbConnStringRegexString = "^mongodb(\\+srv)?:\\/\\/(([a-zA-Z\\d]+):([a-zA-Z\\d$:\\/?#\\[\\]@]+)@)?(([a-z\\d.-]+)(:[\\d]+)?)((,(([a-z\\d.-]+)(:(\\d+))?))*)?(\\/[a-zA-Z-_]{1,64})?(\\?(([a-zA-Z]+)=([a-zA-Z\\d]+))(&(([a-zA-Z\\d]+)=([a-zA-Z\\d]+))?)*)?$" cronRegexString = `(@(annually|yearly|monthly|weekly|daily|hourly|reboot))|(@every (\d+(ns|us|µs|ms|s|m|h))+)|((((\d+,)+\d+|(\d+(\/|-)\d+)|\d+|\*) ?){5,7})` spicedbIDRegexString = `^(([a-zA-Z0-9/_|\-=+]{1,})|\*)$` spicedbPermissionRegexString = "^([a-z][a-z0-9_]{1,62}[a-z0-9])?$" spicedbTypeRegexString = "^([a-z][a-z0-9_]{1,61}[a-z0-9]/)?[a-z][a-z0-9_]{1,62}[a-z0-9]$" ) +func lazyRegexCompile(str string) func() *regexp.Regexp { + var regex *regexp.Regexp + var once sync.Once + return func() *regexp.Regexp { + once.Do(func() { + regex = regexp.MustCompile(str) + }) + return regex + } +} + var ( - alphaRegex = regexp.MustCompile(alphaRegexString) - alphaNumericRegex = regexp.MustCompile(alphaNumericRegexString) - alphaUnicodeRegex = regexp.MustCompile(alphaUnicodeRegexString) - alphaUnicodeNumericRegex = regexp.MustCompile(alphaUnicodeNumericRegexString) - numericRegex = regexp.MustCompile(numericRegexString) - numberRegex = regexp.MustCompile(numberRegexString) - hexadecimalRegex = regexp.MustCompile(hexadecimalRegexString) - hexColorRegex = regexp.MustCompile(hexColorRegexString) - rgbRegex = regexp.MustCompile(rgbRegexString) - rgbaRegex = regexp.MustCompile(rgbaRegexString) - hslRegex = regexp.MustCompile(hslRegexString) - hslaRegex = regexp.MustCompile(hslaRegexString) - e164Regex = regexp.MustCompile(e164RegexString) - emailRegex = regexp.MustCompile(emailRegexString) - base64Regex = regexp.MustCompile(base64RegexString) - base64URLRegex = regexp.MustCompile(base64URLRegexString) - base64RawURLRegex = regexp.MustCompile(base64RawURLRegexString) - iSBN10Regex = regexp.MustCompile(iSBN10RegexString) - iSBN13Regex = regexp.MustCompile(iSBN13RegexString) - uUID3Regex = regexp.MustCompile(uUID3RegexString) - uUID4Regex = regexp.MustCompile(uUID4RegexString) - uUID5Regex = regexp.MustCompile(uUID5RegexString) - uUIDRegex = regexp.MustCompile(uUIDRegexString) - uUID3RFC4122Regex = regexp.MustCompile(uUID3RFC4122RegexString) - uUID4RFC4122Regex = regexp.MustCompile(uUID4RFC4122RegexString) - uUID5RFC4122Regex = regexp.MustCompile(uUID5RFC4122RegexString) - uUIDRFC4122Regex = regexp.MustCompile(uUIDRFC4122RegexString) - uLIDRegex = regexp.MustCompile(uLIDRegexString) - md4Regex = regexp.MustCompile(md4RegexString) - md5Regex = regexp.MustCompile(md5RegexString) - sha256Regex = regexp.MustCompile(sha256RegexString) - sha384Regex = regexp.MustCompile(sha384RegexString) - sha512Regex = regexp.MustCompile(sha512RegexString) - ripemd128Regex = regexp.MustCompile(ripemd128RegexString) - ripemd160Regex = regexp.MustCompile(ripemd160RegexString) - tiger128Regex = regexp.MustCompile(tiger128RegexString) - tiger160Regex = regexp.MustCompile(tiger160RegexString) - tiger192Regex = regexp.MustCompile(tiger192RegexString) - aSCIIRegex = regexp.MustCompile(aSCIIRegexString) - printableASCIIRegex = regexp.MustCompile(printableASCIIRegexString) - multibyteRegex = regexp.MustCompile(multibyteRegexString) - dataURIRegex = regexp.MustCompile(dataURIRegexString) - latitudeRegex = regexp.MustCompile(latitudeRegexString) - longitudeRegex = regexp.MustCompile(longitudeRegexString) - sSNRegex = regexp.MustCompile(sSNRegexString) - hostnameRegexRFC952 = regexp.MustCompile(hostnameRegexStringRFC952) - hostnameRegexRFC1123 = regexp.MustCompile(hostnameRegexStringRFC1123) - fqdnRegexRFC1123 = regexp.MustCompile(fqdnRegexStringRFC1123) - btcAddressRegex = regexp.MustCompile(btcAddressRegexString) - btcUpperAddressRegexBech32 = regexp.MustCompile(btcAddressUpperRegexStringBech32) - btcLowerAddressRegexBech32 = regexp.MustCompile(btcAddressLowerRegexStringBech32) - ethAddressRegex = regexp.MustCompile(ethAddressRegexString) - uRLEncodedRegex = regexp.MustCompile(uRLEncodedRegexString) - hTMLEncodedRegex = regexp.MustCompile(hTMLEncodedRegexString) - hTMLRegex = regexp.MustCompile(hTMLRegexString) - jWTRegex = regexp.MustCompile(jWTRegexString) - splitParamsRegex = regexp.MustCompile(splitParamsRegexString) - bicRegex = regexp.MustCompile(bicRegexString) - semverRegex = regexp.MustCompile(semverRegexString) - dnsRegexRFC1035Label = regexp.MustCompile(dnsRegexStringRFC1035Label) - cveRegex = regexp.MustCompile(cveRegexString) - mongodbRegex = regexp.MustCompile(mongodbRegexString) - cronRegex = regexp.MustCompile(cronRegexString) - spicedbIDRegex = regexp.MustCompile(spicedbIDRegexString) - spicedbPermissionRegex = regexp.MustCompile(spicedbPermissionRegexString) - spicedbTypeRegex = regexp.MustCompile(spicedbTypeRegexString) + alphaRegex = lazyRegexCompile(alphaRegexString) + alphaNumericRegex = lazyRegexCompile(alphaNumericRegexString) + alphaUnicodeRegex = lazyRegexCompile(alphaUnicodeRegexString) + alphaUnicodeNumericRegex = lazyRegexCompile(alphaUnicodeNumericRegexString) + numericRegex = lazyRegexCompile(numericRegexString) + numberRegex = lazyRegexCompile(numberRegexString) + hexadecimalRegex = lazyRegexCompile(hexadecimalRegexString) + hexColorRegex = lazyRegexCompile(hexColorRegexString) + rgbRegex = lazyRegexCompile(rgbRegexString) + rgbaRegex = lazyRegexCompile(rgbaRegexString) + hslRegex = lazyRegexCompile(hslRegexString) + hslaRegex = lazyRegexCompile(hslaRegexString) + e164Regex = lazyRegexCompile(e164RegexString) + emailRegex = lazyRegexCompile(emailRegexString) + base32Regex = lazyRegexCompile(base32RegexString) + base64Regex = lazyRegexCompile(base64RegexString) + base64URLRegex = lazyRegexCompile(base64URLRegexString) + base64RawURLRegex = lazyRegexCompile(base64RawURLRegexString) + iSBN10Regex = lazyRegexCompile(iSBN10RegexString) + iSBN13Regex = lazyRegexCompile(iSBN13RegexString) + iSSNRegex = lazyRegexCompile(iSSNRegexString) + uUID3Regex = lazyRegexCompile(uUID3RegexString) + uUID4Regex = lazyRegexCompile(uUID4RegexString) + uUID5Regex = lazyRegexCompile(uUID5RegexString) + uUIDRegex = lazyRegexCompile(uUIDRegexString) + uUID3RFC4122Regex = lazyRegexCompile(uUID3RFC4122RegexString) + uUID4RFC4122Regex = lazyRegexCompile(uUID4RFC4122RegexString) + uUID5RFC4122Regex = lazyRegexCompile(uUID5RFC4122RegexString) + uUIDRFC4122Regex = lazyRegexCompile(uUIDRFC4122RegexString) + uLIDRegex = lazyRegexCompile(uLIDRegexString) + md4Regex = lazyRegexCompile(md4RegexString) + md5Regex = lazyRegexCompile(md5RegexString) + sha256Regex = lazyRegexCompile(sha256RegexString) + sha384Regex = lazyRegexCompile(sha384RegexString) + sha512Regex = lazyRegexCompile(sha512RegexString) + ripemd128Regex = lazyRegexCompile(ripemd128RegexString) + ripemd160Regex = lazyRegexCompile(ripemd160RegexString) + tiger128Regex = lazyRegexCompile(tiger128RegexString) + tiger160Regex = lazyRegexCompile(tiger160RegexString) + tiger192Regex = lazyRegexCompile(tiger192RegexString) + aSCIIRegex = lazyRegexCompile(aSCIIRegexString) + printableASCIIRegex = lazyRegexCompile(printableASCIIRegexString) + multibyteRegex = lazyRegexCompile(multibyteRegexString) + dataURIRegex = lazyRegexCompile(dataURIRegexString) + latitudeRegex = lazyRegexCompile(latitudeRegexString) + longitudeRegex = lazyRegexCompile(longitudeRegexString) + sSNRegex = lazyRegexCompile(sSNRegexString) + hostnameRegexRFC952 = lazyRegexCompile(hostnameRegexStringRFC952) + hostnameRegexRFC1123 = lazyRegexCompile(hostnameRegexStringRFC1123) + fqdnRegexRFC1123 = lazyRegexCompile(fqdnRegexStringRFC1123) + btcAddressRegex = lazyRegexCompile(btcAddressRegexString) + btcUpperAddressRegexBech32 = lazyRegexCompile(btcAddressUpperRegexStringBech32) + btcLowerAddressRegexBech32 = lazyRegexCompile(btcAddressLowerRegexStringBech32) + ethAddressRegex = lazyRegexCompile(ethAddressRegexString) + uRLEncodedRegex = lazyRegexCompile(uRLEncodedRegexString) + hTMLEncodedRegex = lazyRegexCompile(hTMLEncodedRegexString) + hTMLRegex = lazyRegexCompile(hTMLRegexString) + jWTRegex = lazyRegexCompile(jWTRegexString) + splitParamsRegex = lazyRegexCompile(splitParamsRegexString) + bicRegex = lazyRegexCompile(bicRegexString) + semverRegex = lazyRegexCompile(semverRegexString) + dnsRegexRFC1035Label = lazyRegexCompile(dnsRegexStringRFC1035Label) + cveRegex = lazyRegexCompile(cveRegexString) + mongodbIdRegex = lazyRegexCompile(mongodbIdRegexString) + mongodbConnectionRegex = lazyRegexCompile(mongodbConnStringRegexString) + cronRegex = lazyRegexCompile(cronRegexString) + spicedbIDRegex = lazyRegexCompile(spicedbIDRegexString) + spicedbPermissionRegex = lazyRegexCompile(spicedbPermissionRegexString) + spicedbTypeRegex = lazyRegexCompile(spicedbTypeRegexString) ) diff --git a/vendor/github.com/go-playground/validator/v10/util.go b/vendor/github.com/go-playground/validator/v10/util.go index 084d4617..9285223a 100644 --- a/vendor/github.com/go-playground/validator/v10/util.go +++ b/vendor/github.com/go-playground/validator/v10/util.go @@ -1,7 +1,9 @@ package validator import ( + "fmt" "reflect" + "regexp" "strconv" "strings" "time" @@ -269,7 +271,7 @@ func asFloat64(param string) float64 { return i } -// asFloat64 returns the parameter as a float64 +// asFloat32 returns the parameter as a float32 // or panics if it can't convert func asFloat32(param string) float64 { i, err := strconv.ParseFloat(param, 32) @@ -293,10 +295,18 @@ func panicIf(err error) { } } -func isNestedStructOrStructPtr(v reflect.StructField) bool { - if v.Type == nil { - return false +// Checks if field value matches regex. If fl.Field can be cast to Stringer, it uses the Stringer interfaces +// String() return value. Otherwise, it uses fl.Field's String() value. +func fieldMatchesRegexByStringerValOrString(regexFn func() *regexp.Regexp, fl FieldLevel) bool { + regex := regexFn() + switch fl.Field().Kind() { + case reflect.String: + return regex.MatchString(fl.Field().String()) + default: + if stringer, ok := fl.Field().Interface().(fmt.Stringer); ok { + return regex.MatchString(stringer.String()) + } else { + return regex.MatchString(fl.Field().String()) + } } - kind := v.Type.Kind() - return kind == reflect.Struct || kind == reflect.Ptr && v.Type.Elem().Kind() == reflect.Struct } diff --git a/vendor/github.com/go-playground/validator/v10/validator.go b/vendor/github.com/go-playground/validator/v10/validator.go index a6fa1f5d..901e7b50 100644 --- a/vendor/github.com/go-playground/validator/v10/validator.go +++ b/vendor/github.com/go-playground/validator/v10/validator.go @@ -5,6 +5,7 @@ import ( "fmt" "reflect" "strconv" + "unsafe" ) // per validate construct @@ -99,6 +100,8 @@ func (v *validate) traverseField(ctx context.Context, parent reflect.Value, curr current, kind, v.fldIsPointer = v.extractTypeInternal(current, false) + var isNestedStruct bool + switch kind { case reflect.Ptr, reflect.Interface, reflect.Invalid: @@ -110,6 +113,10 @@ func (v *validate) traverseField(ctx context.Context, parent reflect.Value, curr return } + if ct.typeof == typeOmitNil && (kind != reflect.Invalid && current.IsNil()) { + return + } + if ct.hasTag { if kind == reflect.Invalid { v.str1 = string(append(ns, cf.altName...)) @@ -150,7 +157,7 @@ func (v *validate) traverseField(ctx context.Context, parent reflect.Value, curr structNs: v.str2, fieldLen: uint8(len(cf.altName)), structfieldLen: uint8(len(cf.name)), - value: current.Interface(), + value: getValue(current), param: ct.param, kind: kind, typ: current.Type(), @@ -160,86 +167,61 @@ func (v *validate) traverseField(ctx context.Context, parent reflect.Value, curr } } - case reflect.Struct: - - typ = current.Type() - - if !typ.ConvertibleTo(timeType) { - - if ct != nil { - - if ct.typeof == typeStructOnly { - goto CONTINUE - } else if ct.typeof == typeIsDefault || ct.typeof == typeNestedStructLevel { - // set Field Level fields - v.slflParent = parent - v.flField = current - v.cf = cf - v.ct = ct - - if !ct.fn(ctx, v) { - v.str1 = string(append(ns, cf.altName...)) - - if v.v.hasTagNameFunc { - v.str2 = string(append(structNs, cf.name...)) - } else { - v.str2 = v.str1 - } - - v.errs = append(v.errs, - &fieldError{ - v: v.v, - tag: ct.aliasTag, - actualTag: ct.tag, - ns: v.str1, - structNs: v.str2, - fieldLen: uint8(len(cf.altName)), - structfieldLen: uint8(len(cf.name)), - value: current.Interface(), - param: ct.param, - kind: kind, - typ: typ, - }, - ) - return - } - } - - ct = ct.next - } - - if ct != nil && ct.typeof == typeNoStructLevel { - return - } - - CONTINUE: - // if len == 0 then validating using 'Var' or 'VarWithValue' - // Var - doesn't make much sense to do it that way, should call 'Struct', but no harm... - // VarWithField - this allows for validating against each field within the struct against a specific value - // pretty handy in certain situations - if len(cf.name) > 0 { - ns = append(append(ns, cf.altName...), '.') - structNs = append(append(structNs, cf.name...), '.') - } - - v.validateStruct(ctx, parent, current, typ, ns, structNs, ct) + if kind == reflect.Invalid { return } - } - if ct == nil || !ct.hasTag { - return + case reflect.Struct: + isNestedStruct = !current.Type().ConvertibleTo(timeType) + // For backward compatibility before struct level validation tags were supported + // as there were a number of projects relying on `required` not failing on non-pointer + // structs. Since it's basically nonsensical to use `required` with a non-pointer struct + // are explicitly skipping the required validation for it. This WILL be removed in the + // next major version. + if isNestedStruct && !v.v.requiredStructEnabled && ct != nil && ct.tag == requiredTag { + ct = ct.next + } } typ = current.Type() OUTER: for { - if ct == nil { + if ct == nil || !ct.hasTag || (isNestedStruct && len(cf.name) == 0) { + // isNestedStruct check here + if isNestedStruct { + // if len == 0 then validating using 'Var' or 'VarWithValue' + // Var - doesn't make much sense to do it that way, should call 'Struct', but no harm... + // VarWithField - this allows for validating against each field within the struct against a specific value + // pretty handy in certain situations + if len(cf.name) > 0 { + ns = append(append(ns, cf.altName...), '.') + structNs = append(append(structNs, cf.name...), '.') + } + + v.validateStruct(ctx, parent, current, typ, ns, structNs, ct) + } return } switch ct.typeof { + case typeNoStructLevel: + return + + case typeStructOnly: + if isNestedStruct { + // if len == 0 then validating using 'Var' or 'VarWithValue' + // Var - doesn't make much sense to do it that way, should call 'Struct', but no harm... + // VarWithField - this allows for validating against each field within the struct against a specific value + // pretty handy in certain situations + if len(cf.name) > 0 { + ns = append(append(ns, cf.altName...), '.') + structNs = append(append(structNs, cf.name...), '.') + } + + v.validateStruct(ctx, parent, current, typ, ns, structNs, ct) + } + return case typeOmitEmpty: @@ -256,6 +238,26 @@ OUTER: ct = ct.next continue + case typeOmitNil: + v.slflParent = parent + v.flField = current + v.cf = cf + v.ct = ct + + switch field := v.Field(); field.Kind() { + case reflect.Slice, reflect.Map, reflect.Ptr, reflect.Interface, reflect.Chan, reflect.Func: + if field.IsNil() { + return + } + default: + if v.fldIsPointer && field.Interface() == nil { + return + } + } + + ct = ct.next + continue + case typeEndKeys: return @@ -366,7 +368,7 @@ OUTER: ct = ct.next if ct == nil { - return + continue OUTER } if ct.typeof != typeOr { @@ -409,7 +411,7 @@ OUTER: structNs: v.str2, fieldLen: uint8(len(cf.altName)), structfieldLen: uint8(len(cf.name)), - value: current.Interface(), + value: getValue(current), param: ct.param, kind: kind, typ: typ, @@ -429,7 +431,7 @@ OUTER: structNs: v.str2, fieldLen: uint8(len(cf.altName)), structfieldLen: uint8(len(cf.name)), - value: current.Interface(), + value: getValue(current), param: ct.param, kind: kind, typ: typ, @@ -469,7 +471,7 @@ OUTER: structNs: v.str2, fieldLen: uint8(len(cf.altName)), structfieldLen: uint8(len(cf.name)), - value: current.Interface(), + value: getValue(current), param: ct.param, kind: kind, typ: typ, @@ -483,3 +485,26 @@ OUTER: } } + +func getValue(val reflect.Value) interface{} { + if val.CanInterface() { + return val.Interface() + } + + if val.CanAddr() { + return reflect.NewAt(val.Type(), unsafe.Pointer(val.UnsafeAddr())).Elem().Interface() + } + + switch val.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return val.Int() + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return val.Uint() + case reflect.Complex64, reflect.Complex128: + return val.Complex() + case reflect.Float32, reflect.Float64: + return val.Float() + default: + return val.String() + } +} diff --git a/vendor/github.com/go-playground/validator/v10/validator_instance.go b/vendor/github.com/go-playground/validator/v10/validator_instance.go index d9dbf0ce..d9f148db 100644 --- a/vendor/github.com/go-playground/validator/v10/validator_instance.go +++ b/vendor/github.com/go-playground/validator/v10/validator_instance.go @@ -22,6 +22,7 @@ const ( structOnlyTag = "structonly" noStructLevelTag = "nostructlevel" omitempty = "omitempty" + omitnil = "omitnil" isdefault = "isdefault" requiredWithoutAllTag = "required_without_all" requiredWithoutTag = "required_without" @@ -73,25 +74,27 @@ type CustomTypeFunc func(field reflect.Value) interface{} type TagNameFunc func(field reflect.StructField) string type internalValidationFuncWrapper struct { - fn FuncCtx - runValidatinOnNil bool + fn FuncCtx + runValidationOnNil bool } // Validate contains the validator settings and cache type Validate struct { - tagName string - pool *sync.Pool - hasCustomFuncs bool - hasTagNameFunc bool - tagNameFunc TagNameFunc - structLevelFuncs map[reflect.Type]StructLevelFuncCtx - customFuncs map[reflect.Type]CustomTypeFunc - aliases map[string]string - validations map[string]internalValidationFuncWrapper - transTagFunc map[ut.Translator]map[string]TranslationFunc // map[]map[]TranslationFunc - rules map[reflect.Type]map[string]string - tagCache *tagCache - structCache *structCache + tagName string + pool *sync.Pool + tagNameFunc TagNameFunc + structLevelFuncs map[reflect.Type]StructLevelFuncCtx + customFuncs map[reflect.Type]CustomTypeFunc + aliases map[string]string + validations map[string]internalValidationFuncWrapper + transTagFunc map[ut.Translator]map[string]TranslationFunc // map[]map[]TranslationFunc + rules map[reflect.Type]map[string]string + tagCache *tagCache + structCache *structCache + hasCustomFuncs bool + hasTagNameFunc bool + requiredStructEnabled bool + privateFieldValidation bool } // New returns a new instance of 'validate' with sane defaults. @@ -99,7 +102,7 @@ type Validate struct { // It caches information about your struct and validations, // in essence only parsing your validation tags once per struct type. // Using multiple instances neglects the benefit of caching. -func New() *Validate { +func New(options ...Option) *Validate { tc := new(tagCache) tc.m.Store(make(map[string]*cTag)) @@ -146,6 +149,9 @@ func New() *Validate { }, } + for _, o := range options { + o(v) + } return v } @@ -239,7 +245,7 @@ func (v *Validate) registerValidation(tag string, fn FuncCtx, bakedIn bool, nilC if !bakedIn && (ok || strings.ContainsAny(tag, restrictedTagChars)) { panic(fmt.Sprintf(restrictedTagErr, tag)) } - v.validations[tag] = internalValidationFuncWrapper{fn: fn, runValidatinOnNil: nilCheckable} + v.validations[tag] = internalValidationFuncWrapper{fn: fn, runValidationOnNil: nilCheckable} return nil } @@ -670,7 +676,7 @@ func (v *Validate) VarWithValue(field interface{}, other interface{}, tag string } // VarWithValueCtx validates a single variable, against another variable/field's value using tag style validation and -// allows passing of contextual validation validation information via context.Context. +// allows passing of contextual validation information via context.Context. // eg. // s1 := "abcd" // s2 := "abcd" diff --git a/vendor/github.com/go-task/slim-sprig/.editorconfig b/vendor/github.com/go-task/slim-sprig/v3/.editorconfig similarity index 100% rename from vendor/github.com/go-task/slim-sprig/.editorconfig rename to vendor/github.com/go-task/slim-sprig/v3/.editorconfig diff --git a/vendor/github.com/go-task/slim-sprig/.gitattributes b/vendor/github.com/go-task/slim-sprig/v3/.gitattributes similarity index 100% rename from vendor/github.com/go-task/slim-sprig/.gitattributes rename to vendor/github.com/go-task/slim-sprig/v3/.gitattributes diff --git a/vendor/github.com/go-task/slim-sprig/.gitignore b/vendor/github.com/go-task/slim-sprig/v3/.gitignore similarity index 100% rename from vendor/github.com/go-task/slim-sprig/.gitignore rename to vendor/github.com/go-task/slim-sprig/v3/.gitignore diff --git a/vendor/github.com/go-task/slim-sprig/CHANGELOG.md b/vendor/github.com/go-task/slim-sprig/v3/CHANGELOG.md similarity index 95% rename from vendor/github.com/go-task/slim-sprig/CHANGELOG.md rename to vendor/github.com/go-task/slim-sprig/v3/CHANGELOG.md index 61d8ebff..2ce45dd4 100644 --- a/vendor/github.com/go-task/slim-sprig/CHANGELOG.md +++ b/vendor/github.com/go-task/slim-sprig/v3/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +## Release 3.2.3 (2022-11-29) + +### Changed + +- Updated docs (thanks @book987 @aJetHorn @neelayu @pellizzetti @apricote @SaigyoujiYuyuko233 @AlekSi) +- #348: Updated huandu/xstrings which fixed a snake case bug (thanks @yxxhero) +- #353: Updated masterminds/semver which included bug fixes +- #354: Updated golang.org/x/crypto which included bug fixes + +## Release 3.2.2 (2021-02-04) + +This is a re-release of 3.2.1 to satisfy something with the Go module system. + +## Release 3.2.1 (2021-02-04) + +### Changed + +- Upgraded `Masterminds/goutils` to `v1.1.1`. see the [Security Advisory](https://github.com/Masterminds/goutils/security/advisories/GHSA-xg2h-wx96-xgxr) + ## Release 3.2.0 (2020-12-14) ### Added diff --git a/vendor/github.com/go-task/slim-sprig/LICENSE.txt b/vendor/github.com/go-task/slim-sprig/v3/LICENSE.txt similarity index 100% rename from vendor/github.com/go-task/slim-sprig/LICENSE.txt rename to vendor/github.com/go-task/slim-sprig/v3/LICENSE.txt diff --git a/vendor/github.com/go-task/slim-sprig/README.md b/vendor/github.com/go-task/slim-sprig/v3/README.md similarity index 88% rename from vendor/github.com/go-task/slim-sprig/README.md rename to vendor/github.com/go-task/slim-sprig/v3/README.md index 72579471..b5ab5642 100644 --- a/vendor/github.com/go-task/slim-sprig/README.md +++ b/vendor/github.com/go-task/slim-sprig/v3/README.md @@ -1,4 +1,4 @@ -# Slim-Sprig: Template functions for Go templates [![GoDoc](https://godoc.org/github.com/go-task/slim-sprig?status.svg)](https://godoc.org/github.com/go-task/slim-sprig) [![Go Report Card](https://goreportcard.com/badge/github.com/go-task/slim-sprig)](https://goreportcard.com/report/github.com/go-task/slim-sprig) +# Slim-Sprig: Template functions for Go templates [![Go Reference](https://pkg.go.dev/badge/github.com/go-task/slim-sprig/v3.svg)](https://pkg.go.dev/github.com/go-task/slim-sprig/v3) Slim-Sprig is a fork of [Sprig](https://github.com/Masterminds/sprig), but with all functions that depend on external (non standard library) or crypto packages diff --git a/vendor/github.com/go-task/slim-sprig/Taskfile.yml b/vendor/github.com/go-task/slim-sprig/v3/Taskfile.yml similarity index 89% rename from vendor/github.com/go-task/slim-sprig/Taskfile.yml rename to vendor/github.com/go-task/slim-sprig/v3/Taskfile.yml index cdcfd223..8e6346bb 100644 --- a/vendor/github.com/go-task/slim-sprig/Taskfile.yml +++ b/vendor/github.com/go-task/slim-sprig/v3/Taskfile.yml @@ -1,6 +1,6 @@ # https://taskfile.dev -version: '2' +version: '3' tasks: default: diff --git a/vendor/github.com/go-task/slim-sprig/crypto.go b/vendor/github.com/go-task/slim-sprig/v3/crypto.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/crypto.go rename to vendor/github.com/go-task/slim-sprig/v3/crypto.go diff --git a/vendor/github.com/go-task/slim-sprig/date.go b/vendor/github.com/go-task/slim-sprig/v3/date.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/date.go rename to vendor/github.com/go-task/slim-sprig/v3/date.go diff --git a/vendor/github.com/go-task/slim-sprig/defaults.go b/vendor/github.com/go-task/slim-sprig/v3/defaults.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/defaults.go rename to vendor/github.com/go-task/slim-sprig/v3/defaults.go diff --git a/vendor/github.com/go-task/slim-sprig/dict.go b/vendor/github.com/go-task/slim-sprig/v3/dict.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/dict.go rename to vendor/github.com/go-task/slim-sprig/v3/dict.go diff --git a/vendor/github.com/go-task/slim-sprig/doc.go b/vendor/github.com/go-task/slim-sprig/v3/doc.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/doc.go rename to vendor/github.com/go-task/slim-sprig/v3/doc.go diff --git a/vendor/github.com/go-task/slim-sprig/functions.go b/vendor/github.com/go-task/slim-sprig/v3/functions.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/functions.go rename to vendor/github.com/go-task/slim-sprig/v3/functions.go diff --git a/vendor/github.com/go-task/slim-sprig/list.go b/vendor/github.com/go-task/slim-sprig/v3/list.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/list.go rename to vendor/github.com/go-task/slim-sprig/v3/list.go diff --git a/vendor/github.com/go-task/slim-sprig/network.go b/vendor/github.com/go-task/slim-sprig/v3/network.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/network.go rename to vendor/github.com/go-task/slim-sprig/v3/network.go diff --git a/vendor/github.com/go-task/slim-sprig/numeric.go b/vendor/github.com/go-task/slim-sprig/v3/numeric.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/numeric.go rename to vendor/github.com/go-task/slim-sprig/v3/numeric.go diff --git a/vendor/github.com/go-task/slim-sprig/reflect.go b/vendor/github.com/go-task/slim-sprig/v3/reflect.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/reflect.go rename to vendor/github.com/go-task/slim-sprig/v3/reflect.go diff --git a/vendor/github.com/go-task/slim-sprig/regex.go b/vendor/github.com/go-task/slim-sprig/v3/regex.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/regex.go rename to vendor/github.com/go-task/slim-sprig/v3/regex.go diff --git a/vendor/github.com/go-task/slim-sprig/strings.go b/vendor/github.com/go-task/slim-sprig/v3/strings.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/strings.go rename to vendor/github.com/go-task/slim-sprig/v3/strings.go diff --git a/vendor/github.com/go-task/slim-sprig/url.go b/vendor/github.com/go-task/slim-sprig/v3/url.go similarity index 100% rename from vendor/github.com/go-task/slim-sprig/url.go rename to vendor/github.com/go-task/slim-sprig/v3/url.go diff --git a/vendor/github.com/gocarina/gocsv/decode.go b/vendor/github.com/gocarina/gocsv/decode.go index 24d49d09..e91c5953 100644 --- a/vendor/github.com/gocarina/gocsv/decode.go +++ b/vendor/github.com/gocarina/gocsv/decode.go @@ -203,22 +203,22 @@ func readToWithErrorHandler(decoder Decoder, errHandler ErrorHandler, out interf objectIface := reflect.New(outValue.Index(i).Type()).Interface() outInner := createNewOutInner(outInnerWasPointer, outInnerType) for j, csvColumnContent := range csvRow { - if fieldInfo, ok := csvHeadersLabels[j]; ok { // Position found accordingly to header name - - if outInner.CanInterface() { - fieldTypeUnmarshallerWithKeys, withFieldsOK = objectIface.(TypeUnmarshalCSVWithFields) - if withFieldsOK { - if err := fieldTypeUnmarshallerWithKeys.UnmarshalCSVWithFields(fieldInfo.getFirstKey(), csvColumnContent); err != nil { - parseError := csv.ParseError{ - Line: i + 2, //add 2 to account for the header & 0-indexing of arrays - Column: j + 1, - Err: err, - } - return &parseError + if outInner.CanInterface() { + fieldTypeUnmarshallerWithKeys, withFieldsOK = objectIface.(TypeUnmarshalCSVWithFields) + if withFieldsOK { + if err := fieldTypeUnmarshallerWithKeys.UnmarshalCSVWithFields(headers[j], csvColumnContent); err != nil { + parseError := csv.ParseError{ + Line: i + 2, //add 2 to account for the header & 0-indexing of arrays + Column: j + 1, + Err: err, } - continue + return &parseError } + continue } + } + + if fieldInfo, ok := csvHeadersLabels[j]; ok { // Position found accordingly to header name value := csvColumnContent if value == "" { value = fieldInfo.defaultValue @@ -289,8 +289,13 @@ func readEach(decoder SimpleDecoder, errHandler ErrorHandler, c interface{}) err return err } } + + var withFieldsOK bool + var fieldTypeUnmarshallerWithKeys TypeUnmarshalCSVWithFields + i := 0 for { + objectIface := reflect.New(outValue.Type().Elem()).Interface() line, err := decoder.GetCSVRow() if err == io.EOF { break @@ -299,8 +304,31 @@ func readEach(decoder SimpleDecoder, errHandler ErrorHandler, c interface{}) err } outInner := createNewOutInner(outInnerWasPointer, outInnerType) for j, csvColumnContent := range line { + + if outInner.CanInterface() { + fieldTypeUnmarshallerWithKeys, withFieldsOK = objectIface.(TypeUnmarshalCSVWithFields) + if withFieldsOK { + if err := fieldTypeUnmarshallerWithKeys.UnmarshalCSVWithFields(headers[j], csvColumnContent); err != nil { + parseError := csv.ParseError{ + Line: i + 2, //add 2 to account for the header & 0-indexing of arrays + Column: j + 1, + Err: err, + } + return &parseError + } + + continue + } + } + if fieldInfo, ok := csvHeadersLabels[j]; ok { // Position found accordingly to header name - if err := setInnerField(&outInner, outInnerWasPointer, fieldInfo.IndexChain, csvColumnContent, fieldInfo.omitEmpty); err != nil { // Set field of struct + + value := csvColumnContent + if value == "" { + value = fieldInfo.defaultValue + } + + if err := setInnerField(&outInner, outInnerWasPointer, fieldInfo.IndexChain, value, fieldInfo.omitEmpty); err != nil { // Set field of struct parseError := &csv.ParseError{ Line: i + 2, //add 2 to account for the header & 0-indexing of arrays Column: j + 1, @@ -313,6 +341,12 @@ func readEach(decoder SimpleDecoder, errHandler ErrorHandler, c interface{}) err } } } + + if withFieldsOK { + reflectedObject := reflect.ValueOf(objectIface) + outInner = reflectedObject.Elem() + } + outValue.Send(outInner) i++ } diff --git a/vendor/github.com/goccy/go-json/.golangci.yml b/vendor/github.com/goccy/go-json/.golangci.yml index 57ae5a52..977accaa 100644 --- a/vendor/github.com/goccy/go-json/.golangci.yml +++ b/vendor/github.com/goccy/go-json/.golangci.yml @@ -56,6 +56,9 @@ linters: - cyclop - containedctx - revive + - nosnakecase + - exhaustruct + - depguard issues: exclude-rules: diff --git a/vendor/github.com/goccy/go-json/Makefile b/vendor/github.com/goccy/go-json/Makefile index 5bbfc4c9..c030577d 100644 --- a/vendor/github.com/goccy/go-json/Makefile +++ b/vendor/github.com/goccy/go-json/Makefile @@ -30,7 +30,7 @@ golangci-lint: | $(BIN_DIR) GOLANGCI_LINT_TMP_DIR=$$(mktemp -d); \ cd $$GOLANGCI_LINT_TMP_DIR; \ go mod init tmp; \ - GOBIN=$(BIN_DIR) go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.48.0; \ + GOBIN=$(BIN_DIR) go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.54.2; \ rm -rf $$GOLANGCI_LINT_TMP_DIR; \ } diff --git a/vendor/github.com/goccy/go-json/encode.go b/vendor/github.com/goccy/go-json/encode.go index 4bd899f3..c5173825 100644 --- a/vendor/github.com/goccy/go-json/encode.go +++ b/vendor/github.com/goccy/go-json/encode.go @@ -52,7 +52,7 @@ func (e *Encoder) EncodeContext(ctx context.Context, v interface{}, optFuncs ... rctx.Option.Flag |= encoder.ContextOption rctx.Option.Context = ctx - err := e.encodeWithOption(rctx, v, optFuncs...) + err := e.encodeWithOption(rctx, v, optFuncs...) //nolint: contextcheck encoder.ReleaseRuntimeContext(rctx) return err @@ -120,7 +120,7 @@ func marshalContext(ctx context.Context, v interface{}, optFuncs ...EncodeOption optFunc(rctx.Option) } - buf, err := encode(rctx, v) + buf, err := encode(rctx, v) //nolint: contextcheck if err != nil { encoder.ReleaseRuntimeContext(rctx) return nil, err diff --git a/vendor/github.com/goccy/go-json/internal/decoder/ptr.go b/vendor/github.com/goccy/go-json/internal/decoder/ptr.go index de12e105..ae229946 100644 --- a/vendor/github.com/goccy/go-json/internal/decoder/ptr.go +++ b/vendor/github.com/goccy/go-json/internal/decoder/ptr.go @@ -85,6 +85,7 @@ func (d *ptrDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.P } c, err := d.dec.Decode(ctx, cursor, depth, newptr) if err != nil { + *(*unsafe.Pointer)(p) = nil return 0, err } cursor = c diff --git a/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_text.go b/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_text.go index 6d37993f..d711d0f8 100644 --- a/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_text.go +++ b/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_text.go @@ -147,7 +147,7 @@ func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int return nil, 0, fmt.Errorf("json: unmarshal text decoder does not support decode path") } -func unquoteBytes(s []byte) (t []byte, ok bool) { +func unquoteBytes(s []byte) (t []byte, ok bool) { //nolint: nonamedreturns length := len(s) if length < 2 || s[0] != '"' || s[length-1] != '"' { return diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compact.go b/vendor/github.com/goccy/go-json/internal/encoder/compact.go index 0eb9545d..e287a6c0 100644 --- a/vendor/github.com/goccy/go-json/internal/encoder/compact.go +++ b/vendor/github.com/goccy/go-json/internal/encoder/compact.go @@ -213,8 +213,8 @@ func compactString(dst, src []byte, cursor int64, escape bool) ([]byte, int64, e dst = append(dst, src[start:cursor]...) dst = append(dst, `\u202`...) dst = append(dst, hex[src[cursor+2]&0xF]) - cursor += 2 start = cursor + 3 + cursor += 2 } } switch c { diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compiler.go b/vendor/github.com/goccy/go-json/internal/encoder/compiler.go index 3ae39ba8..37b7aa38 100644 --- a/vendor/github.com/goccy/go-json/internal/encoder/compiler.go +++ b/vendor/github.com/goccy/go-json/internal/encoder/compiler.go @@ -480,7 +480,7 @@ func (c *Compiler) mapCode(typ *runtime.Type) (*MapCode, error) { func (c *Compiler) listElemCode(typ *runtime.Type) (Code, error) { switch { - case c.isPtrMarshalJSONType(typ): + case c.implementsMarshalJSONType(typ) || c.implementsMarshalJSONType(runtime.PtrTo(typ)): return c.marshalJSONCode(typ) case !typ.Implements(marshalTextType) && runtime.PtrTo(typ).Implements(marshalTextType): return c.marshalTextCode(typ) diff --git a/vendor/github.com/goccy/go-json/internal/encoder/int.go b/vendor/github.com/goccy/go-json/internal/encoder/int.go index 85f07960..8b5febea 100644 --- a/vendor/github.com/goccy/go-json/internal/encoder/int.go +++ b/vendor/github.com/goccy/go-json/internal/encoder/int.go @@ -1,3 +1,27 @@ +// This files's processing codes are inspired by https://github.com/segmentio/encoding. +// The license notation is as follows. +// +// # MIT License +// +// Copyright (c) 2019 Segment.io, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. package encoder import ( diff --git a/vendor/github.com/goccy/go-json/internal/encoder/string.go b/vendor/github.com/goccy/go-json/internal/encoder/string.go index e4152b27..4abb8416 100644 --- a/vendor/github.com/goccy/go-json/internal/encoder/string.go +++ b/vendor/github.com/goccy/go-json/internal/encoder/string.go @@ -1,3 +1,27 @@ +// This files's string processing codes are inspired by https://github.com/segmentio/encoding. +// The license notation is as follows. +// +// # MIT License +// +// Copyright (c) 2019 Segment.io, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. package encoder import ( diff --git a/vendor/github.com/goccy/go-json/internal/runtime/rtype.go b/vendor/github.com/goccy/go-json/internal/runtime/rtype.go index 4db10deb..37cfe35a 100644 --- a/vendor/github.com/goccy/go-json/internal/runtime/rtype.go +++ b/vendor/github.com/goccy/go-json/internal/runtime/rtype.go @@ -252,7 +252,6 @@ func IfaceIndir(*Type) bool //go:noescape func RType2Type(t *Type) reflect.Type -//go:nolint structcheck type emptyInterface struct { _ *Type ptr unsafe.Pointer diff --git a/vendor/github.com/goccy/go-json/json.go b/vendor/github.com/goccy/go-json/json.go index 413cb20b..fb18065a 100644 --- a/vendor/github.com/goccy/go-json/json.go +++ b/vendor/github.com/goccy/go-json/json.go @@ -89,31 +89,31 @@ type UnmarshalerContext interface { // // Examples of struct field tags and their meanings: // -// // Field appears in JSON as key "myName". -// Field int `json:"myName"` +// // Field appears in JSON as key "myName". +// Field int `json:"myName"` // -// // Field appears in JSON as key "myName" and -// // the field is omitted from the object if its value is empty, -// // as defined above. -// Field int `json:"myName,omitempty"` +// // Field appears in JSON as key "myName" and +// // the field is omitted from the object if its value is empty, +// // as defined above. +// Field int `json:"myName,omitempty"` // -// // Field appears in JSON as key "Field" (the default), but -// // the field is skipped if empty. -// // Note the leading comma. -// Field int `json:",omitempty"` +// // Field appears in JSON as key "Field" (the default), but +// // the field is skipped if empty. +// // Note the leading comma. +// Field int `json:",omitempty"` // -// // Field is ignored by this package. -// Field int `json:"-"` +// // Field is ignored by this package. +// Field int `json:"-"` // -// // Field appears in JSON as key "-". -// Field int `json:"-,"` +// // Field appears in JSON as key "-". +// Field int `json:"-,"` // // The "string" option signals that a field is stored as JSON inside a // JSON-encoded string. It applies only to fields of string, floating point, // integer, or boolean types. This extra level of encoding is sometimes used // when communicating with JavaScript programs: // -// Int64String int64 `json:",string"` +// Int64String int64 `json:",string"` // // The key name will be used if it's a non-empty string consisting of // only Unicode letters, digits, and ASCII punctuation except quotation @@ -166,7 +166,6 @@ type UnmarshalerContext interface { // JSON cannot represent cyclic data structures and Marshal does not // handle them. Passing cyclic structures to Marshal will result in // an infinite recursion. -// func Marshal(v interface{}) ([]byte, error) { return MarshalWithOption(v) } @@ -264,14 +263,13 @@ func MarshalIndentWithOption(v interface{}, prefix, indent string, optFuncs ...E // // The JSON null value unmarshals into an interface, map, pointer, or slice // by setting that Go value to nil. Because null is often used in JSON to mean -// ``not present,'' unmarshaling a JSON null into any other Go type has no effect +// “not present,” unmarshaling a JSON null into any other Go type has no effect // on the value and produces no error. // // When unmarshaling quoted strings, invalid UTF-8 or // invalid UTF-16 surrogate pairs are not treated as an error. // Instead, they are replaced by the Unicode replacement // character U+FFFD. -// func Unmarshal(data []byte, v interface{}) error { return unmarshal(data, v) } @@ -299,7 +297,6 @@ func UnmarshalNoEscape(data []byte, v interface{}, optFuncs ...DecodeOptionFunc) // Number, for JSON numbers // string, for JSON string literals // nil, for JSON null -// type Token = json.Token // A Number represents a JSON number literal. diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS index 203e84eb..52ccb5a9 100644 --- a/vendor/github.com/golang/snappy/AUTHORS +++ b/vendor/github.com/golang/snappy/AUTHORS @@ -10,6 +10,7 @@ Amazon.com, Inc Damian Gryski +Eric Buth Google Inc. Jan Mercl <0xjnml@gmail.com> Klaus Post diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS index d9914732..ea6524dd 100644 --- a/vendor/github.com/golang/snappy/CONTRIBUTORS +++ b/vendor/github.com/golang/snappy/CONTRIBUTORS @@ -26,7 +26,9 @@ # Please keep the list sorted. +Alex Legg Damian Gryski +Eric Buth Jan Mercl <0xjnml@gmail.com> Jonathan Swinney Kai Backman diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go index f1e04b17..23c6e26c 100644 --- a/vendor/github.com/golang/snappy/decode.go +++ b/vendor/github.com/golang/snappy/decode.go @@ -118,32 +118,23 @@ func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { return true } -// Read satisfies the io.Reader interface. -func (r *Reader) Read(p []byte) (int, error) { - if r.err != nil { - return 0, r.err - } - for { - if r.i < r.j { - n := copy(p, r.decoded[r.i:r.j]) - r.i += n - return n, nil - } +func (r *Reader) fill() error { + for r.i >= r.j { if !r.readFull(r.buf[:4], true) { - return 0, r.err + return r.err } chunkType := r.buf[0] if !r.readHeader { if chunkType != chunkTypeStreamIdentifier { r.err = ErrCorrupt - return 0, r.err + return r.err } r.readHeader = true } chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 if chunkLen > len(r.buf) { r.err = ErrUnsupported - return 0, r.err + return r.err } // The chunk types are specified at @@ -153,11 +144,11 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.2. Compressed data (chunk type 0x00). if chunkLen < checksumSize { r.err = ErrCorrupt - return 0, r.err + return r.err } buf := r.buf[:chunkLen] if !r.readFull(buf, false) { - return 0, r.err + return r.err } checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 buf = buf[checksumSize:] @@ -165,19 +156,19 @@ func (r *Reader) Read(p []byte) (int, error) { n, err := DecodedLen(buf) if err != nil { r.err = err - return 0, r.err + return r.err } if n > len(r.decoded) { r.err = ErrCorrupt - return 0, r.err + return r.err } if _, err := Decode(r.decoded, buf); err != nil { r.err = err - return 0, r.err + return r.err } if crc(r.decoded[:n]) != checksum { r.err = ErrCorrupt - return 0, r.err + return r.err } r.i, r.j = 0, n continue @@ -186,25 +177,25 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.3. Uncompressed data (chunk type 0x01). if chunkLen < checksumSize { r.err = ErrCorrupt - return 0, r.err + return r.err } buf := r.buf[:checksumSize] if !r.readFull(buf, false) { - return 0, r.err + return r.err } checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 // Read directly into r.decoded instead of via r.buf. n := chunkLen - checksumSize if n > len(r.decoded) { r.err = ErrCorrupt - return 0, r.err + return r.err } if !r.readFull(r.decoded[:n], false) { - return 0, r.err + return r.err } if crc(r.decoded[:n]) != checksum { r.err = ErrCorrupt - return 0, r.err + return r.err } r.i, r.j = 0, n continue @@ -213,15 +204,15 @@ func (r *Reader) Read(p []byte) (int, error) { // Section 4.1. Stream identifier (chunk type 0xff). if chunkLen != len(magicBody) { r.err = ErrCorrupt - return 0, r.err + return r.err } if !r.readFull(r.buf[:len(magicBody)], false) { - return 0, r.err + return r.err } for i := 0; i < len(magicBody); i++ { if r.buf[i] != magicBody[i] { r.err = ErrCorrupt - return 0, r.err + return r.err } } continue @@ -230,12 +221,44 @@ func (r *Reader) Read(p []byte) (int, error) { if chunkType <= 0x7f { // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). r.err = ErrUnsupported - return 0, r.err + return r.err } // Section 4.4 Padding (chunk type 0xfe). // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). if !r.readFull(r.buf[:chunkLen], false) { - return 0, r.err + return r.err } } + + return nil +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil +} + +// ReadByte satisfies the io.ByteReader interface. +func (r *Reader) ReadByte() (byte, error) { + if r.err != nil { + return 0, r.err + } + + if err := r.fill(); err != nil { + return 0, err + } + + c := r.decoded[r.i] + r.i++ + return c, nil } diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s index bf83667d..f8d54adf 100644 --- a/vendor/github.com/golang/snappy/encode_arm64.s +++ b/vendor/github.com/golang/snappy/encode_arm64.s @@ -382,7 +382,7 @@ inner0: // if load32(src, s) != load32(src, candidate) { continue } break MOVW 0(R7), R3 - MOVW (R6)(R15*1), R4 + MOVW (R6)(R15), R4 CMPW R4, R3 BNE inner0 @@ -672,7 +672,7 @@ inlineEmitCopyEnd: MOVHU R3, 0(R17)(R11<<1) // if uint32(x>>8) == load32(src, candidate) { continue } - MOVW (R6)(R15*1), R4 + MOVW (R6)(R15), R4 CMPW R4, R14 BEQ inner1 diff --git a/vendor/github.com/google/flatbuffers/LICENSE.txt b/vendor/github.com/google/flatbuffers/LICENSE similarity index 100% rename from vendor/github.com/google/flatbuffers/LICENSE.txt rename to vendor/github.com/google/flatbuffers/LICENSE diff --git a/vendor/github.com/google/flatbuffers/go/builder.go b/vendor/github.com/google/flatbuffers/go/builder.go index 0e763d7a..5d90e8ef 100644 --- a/vendor/github.com/google/flatbuffers/go/builder.go +++ b/vendor/github.com/google/flatbuffers/go/builder.go @@ -1,5 +1,7 @@ package flatbuffers +import "sort" + // Builder is a state machine for creating FlatBuffer objects. // Use a Builder to construct object(s) starting from leaf nodes. // @@ -22,6 +24,7 @@ type Builder struct { } const fileIdentifierLength = 4 +const sizePrefixLength = 4 // NewBuilder initializes a Builder of size `initial_size`. // The internal buffer is grown as needed. @@ -53,6 +56,12 @@ func (b *Builder) Reset() { b.vtable = b.vtable[:0] } + if b.sharedStrings != nil { + for key := range b.sharedStrings { + delete(b.sharedStrings, key) + } + } + b.head = UOffsetT(len(b.Bytes)) b.minalign = 1 b.nested = false @@ -308,6 +317,25 @@ func (b *Builder) EndVector(vectorNumElems int) UOffsetT { return b.Offset() } +// CreateVectorOfTables serializes slice of table offsets into a vector. +func (b *Builder) CreateVectorOfTables(offsets []UOffsetT) UOffsetT { + b.assertNotNested() + b.StartVector(4, len(offsets), 4) + for i := len(offsets) - 1; i >= 0; i-- { + b.PrependUOffsetT(offsets[i]) + } + return b.EndVector(len(offsets)) +} + +type KeyCompare func(o1, o2 UOffsetT, buf []byte) bool + +func (b *Builder) CreateVectorOfSortedTables(offsets []UOffsetT, keyCompare KeyCompare) UOffsetT { + sort.Slice(offsets, func(i, j int) bool { + return keyCompare(offsets[i], offsets[j], b.Bytes) + }) + return b.CreateVectorOfTables(offsets) +} + // CreateSharedString Checks if the string is already written // to the buffer before calling CreateString func (b *Builder) CreateSharedString(s string) UOffsetT { @@ -574,11 +602,53 @@ func (b *Builder) FinishWithFileIdentifier(rootTable UOffsetT, fid []byte) { b.Finish(rootTable) } +// FinishSizePrefixed finalizes a buffer, pointing to the given `rootTable`. +// The buffer is prefixed with the size of the buffer, excluding the size +// of the prefix itself. +func (b *Builder) FinishSizePrefixed(rootTable UOffsetT) { + b.finish(rootTable, true) +} + +// FinishSizePrefixedWithFileIdentifier finalizes a buffer, pointing to the given `rootTable` +// and applies a file identifier. The buffer is prefixed with the size of the buffer, +// excluding the size of the prefix itself. +func (b *Builder) FinishSizePrefixedWithFileIdentifier(rootTable UOffsetT, fid []byte) { + if fid == nil || len(fid) != fileIdentifierLength { + panic("incorrect file identifier length") + } + // In order to add a file identifier and size prefix to the flatbuffer message, + // we need to prepare an alignment, a size prefix length, and file identifier length + b.Prep(b.minalign, SizeInt32+fileIdentifierLength+sizePrefixLength) + for i := fileIdentifierLength - 1; i >= 0; i-- { + // place the file identifier + b.PlaceByte(fid[i]) + } + // finish + b.finish(rootTable, true) +} + // Finish finalizes a buffer, pointing to the given `rootTable`. func (b *Builder) Finish(rootTable UOffsetT) { + b.finish(rootTable, false) +} + +// finish finalizes a buffer, pointing to the given `rootTable` +// with an optional size prefix. +func (b *Builder) finish(rootTable UOffsetT, sizePrefix bool) { b.assertNotNested() - b.Prep(b.minalign, SizeUOffsetT) + + if sizePrefix { + b.Prep(b.minalign, SizeUOffsetT+sizePrefixLength) + } else { + b.Prep(b.minalign, SizeUOffsetT) + } + b.PrependUOffsetT(rootTable) + + if sizePrefix { + b.PlaceUint32(uint32(b.Offset())) + } + b.finished = true } diff --git a/vendor/github.com/google/flatbuffers/go/encode.go b/vendor/github.com/google/flatbuffers/go/encode.go index 72d4f3a1..a2a57981 100644 --- a/vendor/github.com/google/flatbuffers/go/encode.go +++ b/vendor/github.com/google/flatbuffers/go/encode.go @@ -118,7 +118,7 @@ func GetFloat64(buf []byte) float64 { // GetUOffsetT decodes a little-endian UOffsetT from a byte slice. func GetUOffsetT(buf []byte) UOffsetT { - return UOffsetT(GetInt32(buf)) + return UOffsetT(GetUint32(buf)) } // GetSOffsetT decodes a little-endian SOffsetT from a byte slice. diff --git a/vendor/github.com/google/flatbuffers/go/lib.go b/vendor/github.com/google/flatbuffers/go/lib.go index adfce52e..a4e99de1 100644 --- a/vendor/github.com/google/flatbuffers/go/lib.go +++ b/vendor/github.com/google/flatbuffers/go/lib.go @@ -11,3 +11,40 @@ func GetRootAs(buf []byte, offset UOffsetT, fb FlatBuffer) { n := GetUOffsetT(buf[offset:]) fb.Init(buf, n+offset) } + +// GetSizePrefixedRootAs is a generic helper to initialize a FlatBuffer with the provided size-prefixed buffer +// bytes and its data offset +func GetSizePrefixedRootAs(buf []byte, offset UOffsetT, fb FlatBuffer) { + n := GetUOffsetT(buf[offset+sizePrefixLength:]) + fb.Init(buf, n+offset+sizePrefixLength) +} + +// GetSizePrefix reads the size from a size-prefixed flatbuffer +func GetSizePrefix(buf []byte, offset UOffsetT) uint32 { + return GetUint32(buf[offset:]) +} + +// GetIndirectOffset retrives the relative offset in the provided buffer stored at `offset`. +func GetIndirectOffset(buf []byte, offset UOffsetT) UOffsetT { + return offset + GetUOffsetT(buf[offset:]) +} + +// GetBufferIdentifier returns the file identifier as string +func GetBufferIdentifier(buf []byte) string { + return string(buf[SizeUOffsetT:][:fileIdentifierLength]) +} + +// GetBufferIdentifier returns the file identifier as string for a size-prefixed buffer +func GetSizePrefixedBufferIdentifier(buf []byte) string { + return string(buf[SizeUOffsetT+sizePrefixLength:][:fileIdentifierLength]) +} + +// BufferHasIdentifier checks if the identifier in a buffer has the expected value +func BufferHasIdentifier(buf []byte, identifier string) bool { + return GetBufferIdentifier(buf) == identifier +} + +// BufferHasIdentifier checks if the identifier in a buffer has the expected value for a size-prefixed buffer +func SizePrefixedBufferHasIdentifier(buf []byte, identifier string) bool { + return GetSizePrefixedBufferIdentifier(buf) == identifier +} diff --git a/vendor/github.com/google/pprof/profile/encode.go b/vendor/github.com/google/pprof/profile/encode.go index 182c926b..8ce9d3cf 100644 --- a/vendor/github.com/google/pprof/profile/encode.go +++ b/vendor/github.com/google/pprof/profile/encode.go @@ -122,6 +122,7 @@ func (p *Profile) preEncode() { } p.defaultSampleTypeX = addString(strings, p.DefaultSampleType) + p.docURLX = addString(strings, p.DocURL) p.stringTable = make([]string, len(strings)) for s, i := range strings { @@ -156,6 +157,7 @@ func (p *Profile) encode(b *buffer) { encodeInt64Opt(b, 12, p.Period) encodeInt64s(b, 13, p.commentX) encodeInt64(b, 14, p.defaultSampleTypeX) + encodeInt64Opt(b, 15, p.docURLX) } var profileDecoder = []decoder{ @@ -237,6 +239,8 @@ var profileDecoder = []decoder{ func(b *buffer, m message) error { return decodeInt64s(b, &m.(*Profile).commentX) }, // int64 defaultSampleType = 14 func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).defaultSampleTypeX) }, + // string doc_link = 15; + func(b *buffer, m message) error { return decodeInt64(b, &m.(*Profile).docURLX) }, } // postDecode takes the unexported fields populated by decode (with @@ -384,6 +388,7 @@ func (p *Profile) postDecode() error { p.commentX = nil p.DefaultSampleType, err = getString(p.stringTable, &p.defaultSampleTypeX, err) + p.DocURL, err = getString(p.stringTable, &p.docURLX, err) p.stringTable = nil return err } @@ -530,6 +535,7 @@ func (p *Line) decoder() []decoder { func (p *Line) encode(b *buffer) { encodeUint64Opt(b, 1, p.functionIDX) encodeInt64Opt(b, 2, p.Line) + encodeInt64Opt(b, 3, p.Column) } var lineDecoder = []decoder{ @@ -538,6 +544,8 @@ var lineDecoder = []decoder{ func(b *buffer, m message) error { return decodeUint64(b, &m.(*Line).functionIDX) }, // optional int64 line = 2 func(b *buffer, m message) error { return decodeInt64(b, &m.(*Line).Line) }, + // optional int64 column = 3 + func(b *buffer, m message) error { return decodeInt64(b, &m.(*Line).Column) }, } func (p *Function) decoder() []decoder { diff --git a/vendor/github.com/google/pprof/profile/legacy_java_profile.go b/vendor/github.com/google/pprof/profile/legacy_java_profile.go index 91f45e53..4580bab1 100644 --- a/vendor/github.com/google/pprof/profile/legacy_java_profile.go +++ b/vendor/github.com/google/pprof/profile/legacy_java_profile.go @@ -56,7 +56,7 @@ func javaCPUProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte } // Strip out addresses for better merge. - if err = p.Aggregate(true, true, true, true, false); err != nil { + if err = p.Aggregate(true, true, true, true, false, false); err != nil { return nil, err } @@ -99,7 +99,7 @@ func parseJavaProfile(b []byte) (*Profile, error) { } // Strip out addresses for better merge. - if err = p.Aggregate(true, true, true, true, false); err != nil { + if err = p.Aggregate(true, true, true, true, false, false); err != nil { return nil, err } diff --git a/vendor/github.com/google/pprof/profile/merge.go b/vendor/github.com/google/pprof/profile/merge.go index 4b66282c..ba4d7464 100644 --- a/vendor/github.com/google/pprof/profile/merge.go +++ b/vendor/github.com/google/pprof/profile/merge.go @@ -326,12 +326,13 @@ func (l *Location) key() locationKey { key.addr -= l.Mapping.Start key.mappingID = l.Mapping.ID } - lines := make([]string, len(l.Line)*2) + lines := make([]string, len(l.Line)*3) for i, line := range l.Line { if line.Function != nil { lines[i*2] = strconv.FormatUint(line.Function.ID, 16) } lines[i*2+1] = strconv.FormatInt(line.Line, 16) + lines[i*2+2] = strconv.FormatInt(line.Column, 16) } key.lines = strings.Join(lines, "|") return key @@ -418,6 +419,7 @@ func (pm *profileMerger) mapLine(src Line) Line { ln := Line{ Function: pm.mapFunction(src.Function), Line: src.Line, + Column: src.Column, } return ln } @@ -474,6 +476,7 @@ func combineHeaders(srcs []*Profile) (*Profile, error) { var timeNanos, durationNanos, period int64 var comments []string seenComments := map[string]bool{} + var docURL string var defaultSampleType string for _, s := range srcs { if timeNanos == 0 || s.TimeNanos < timeNanos { @@ -492,6 +495,9 @@ func combineHeaders(srcs []*Profile) (*Profile, error) { if defaultSampleType == "" { defaultSampleType = s.DefaultSampleType } + if docURL == "" { + docURL = s.DocURL + } } p := &Profile{ @@ -507,6 +513,7 @@ func combineHeaders(srcs []*Profile) (*Profile, error) { Comments: comments, DefaultSampleType: defaultSampleType, + DocURL: docURL, } copy(p.SampleType, srcs[0].SampleType) return p, nil diff --git a/vendor/github.com/google/pprof/profile/profile.go b/vendor/github.com/google/pprof/profile/profile.go index 60ef7e92..f47a2439 100644 --- a/vendor/github.com/google/pprof/profile/profile.go +++ b/vendor/github.com/google/pprof/profile/profile.go @@ -39,6 +39,7 @@ type Profile struct { Location []*Location Function []*Function Comments []string + DocURL string DropFrames string KeepFrames string @@ -53,6 +54,7 @@ type Profile struct { encodeMu sync.Mutex commentX []int64 + docURLX int64 dropFramesX int64 keepFramesX int64 stringTable []string @@ -145,6 +147,7 @@ type Location struct { type Line struct { Function *Function Line int64 + Column int64 functionIDX uint64 } @@ -436,7 +439,7 @@ func (p *Profile) CheckValid() error { // Aggregate merges the locations in the profile into equivalence // classes preserving the request attributes. It also updates the // samples to point to the merged locations. -func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, address bool) error { +func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, columnnumber, address bool) error { for _, m := range p.Mapping { m.HasInlineFrames = m.HasInlineFrames && inlineFrame m.HasFunctions = m.HasFunctions && function @@ -458,7 +461,7 @@ func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, address } // Aggregate locations - if !inlineFrame || !address || !linenumber { + if !inlineFrame || !address || !linenumber || !columnnumber { for _, l := range p.Location { if !inlineFrame && len(l.Line) > 1 { l.Line = l.Line[len(l.Line)-1:] @@ -466,6 +469,12 @@ func (p *Profile) Aggregate(inlineFrame, function, filename, linenumber, address if !linenumber { for i := range l.Line { l.Line[i].Line = 0 + l.Line[i].Column = 0 + } + } + if !columnnumber { + for i := range l.Line { + l.Line[i].Column = 0 } } if !address { @@ -548,6 +557,9 @@ func (p *Profile) String() string { for _, c := range p.Comments { ss = append(ss, "Comment: "+c) } + if url := p.DocURL; url != "" { + ss = append(ss, fmt.Sprintf("Doc: %s", url)) + } if pt := p.PeriodType; pt != nil { ss = append(ss, fmt.Sprintf("PeriodType: %s %s", pt.Type, pt.Unit)) } @@ -627,10 +639,11 @@ func (l *Location) string() string { for li := range l.Line { lnStr := "??" if fn := l.Line[li].Function; fn != nil { - lnStr = fmt.Sprintf("%s %s:%d s=%d", + lnStr = fmt.Sprintf("%s %s:%d:%d s=%d", fn.Name, fn.Filename, l.Line[li].Line, + l.Line[li].Column, fn.StartLine) if fn.Name != fn.SystemName { lnStr = lnStr + "(" + fn.SystemName + ")" @@ -836,10 +849,10 @@ func (p *Profile) HasFileLines() bool { // Unsymbolizable returns true if a mapping points to a binary for which // locations can't be symbolized in principle, at least now. Examples are -// "[vdso]", [vsyscall]" and some others, see the code. +// "[vdso]", "[vsyscall]" and some others, see the code. func (m *Mapping) Unsymbolizable() bool { name := filepath.Base(m.File) - return strings.HasPrefix(name, "[") || strings.HasPrefix(name, "linux-vdso") || strings.HasPrefix(m.File, "/dev/dri/") + return strings.HasPrefix(name, "[") || strings.HasPrefix(name, "linux-vdso") || strings.HasPrefix(m.File, "/dev/dri/") || m.File == "//anon" } // Copy makes a fully independent copy of a profile. diff --git a/vendor/github.com/google/uuid/CHANGELOG.md b/vendor/github.com/google/uuid/CHANGELOG.md index 2bd78667..7ec5ac7e 100644 --- a/vendor/github.com/google/uuid/CHANGELOG.md +++ b/vendor/github.com/google/uuid/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +## [1.6.0](https://github.com/google/uuid/compare/v1.5.0...v1.6.0) (2024-01-16) + + +### Features + +* add Max UUID constant ([#149](https://github.com/google/uuid/issues/149)) ([c58770e](https://github.com/google/uuid/commit/c58770eb495f55fe2ced6284f93c5158a62e53e3)) + + +### Bug Fixes + +* fix typo in version 7 uuid documentation ([#153](https://github.com/google/uuid/issues/153)) ([016b199](https://github.com/google/uuid/commit/016b199544692f745ffc8867b914129ecb47ef06)) +* Monotonicity in UUIDv7 ([#150](https://github.com/google/uuid/issues/150)) ([a2b2b32](https://github.com/google/uuid/commit/a2b2b32373ff0b1a312b7fdf6d38a977099698a6)) + +## [1.5.0](https://github.com/google/uuid/compare/v1.4.0...v1.5.0) (2023-12-12) + + +### Features + +* Validate UUID without creating new UUID ([#141](https://github.com/google/uuid/issues/141)) ([9ee7366](https://github.com/google/uuid/commit/9ee7366e66c9ad96bab89139418a713dc584ae29)) + +## [1.4.0](https://github.com/google/uuid/compare/v1.3.1...v1.4.0) (2023-10-26) + + +### Features + +* UUIDs slice type with Strings() convenience method ([#133](https://github.com/google/uuid/issues/133)) ([cd5fbbd](https://github.com/google/uuid/commit/cd5fbbdd02f3e3467ac18940e07e062be1f864b4)) + +### Fixes + +* Clarify that Parse's job is to parse but not necessarily validate strings. (Documents current behavior) + ## [1.3.1](https://github.com/google/uuid/compare/v1.3.0...v1.3.1) (2023-08-18) diff --git a/vendor/github.com/google/uuid/CONTRIBUTING.md b/vendor/github.com/google/uuid/CONTRIBUTING.md index 55668887..a502fdc5 100644 --- a/vendor/github.com/google/uuid/CONTRIBUTING.md +++ b/vendor/github.com/google/uuid/CONTRIBUTING.md @@ -11,7 +11,7 @@ please explain why in the pull request description. ### Releasing -Commits that would precipitate a SemVer change, as desrcibed in the Conventional +Commits that would precipitate a SemVer change, as described in the Conventional Commits Specification, will trigger [`release-please`](https://github.com/google-github-actions/release-please-action) to create a release candidate pull request. Once submitted, `release-please` will create a release. diff --git a/vendor/github.com/google/uuid/hash.go b/vendor/github.com/google/uuid/hash.go index b404f4be..dc60082d 100644 --- a/vendor/github.com/google/uuid/hash.go +++ b/vendor/github.com/google/uuid/hash.go @@ -17,6 +17,12 @@ var ( NameSpaceOID = Must(Parse("6ba7b812-9dad-11d1-80b4-00c04fd430c8")) NameSpaceX500 = Must(Parse("6ba7b814-9dad-11d1-80b4-00c04fd430c8")) Nil UUID // empty UUID, all zeros + + // The Max UUID is special form of UUID that is specified to have all 128 bits set to 1. + Max = UUID{ + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + } ) // NewHash returns a new UUID derived from the hash of space concatenated with diff --git a/vendor/github.com/google/uuid/time.go b/vendor/github.com/google/uuid/time.go index e6ef06cd..c3511292 100644 --- a/vendor/github.com/google/uuid/time.go +++ b/vendor/github.com/google/uuid/time.go @@ -108,12 +108,23 @@ func setClockSequence(seq int) { } // Time returns the time in 100s of nanoseconds since 15 Oct 1582 encoded in -// uuid. The time is only defined for version 1 and 2 UUIDs. +// uuid. The time is only defined for version 1, 2, 6 and 7 UUIDs. func (uuid UUID) Time() Time { - time := int64(binary.BigEndian.Uint32(uuid[0:4])) - time |= int64(binary.BigEndian.Uint16(uuid[4:6])) << 32 - time |= int64(binary.BigEndian.Uint16(uuid[6:8])&0xfff) << 48 - return Time(time) + var t Time + switch uuid.Version() { + case 6: + time := binary.BigEndian.Uint64(uuid[:8]) // Ignore uuid[6] version b0110 + t = Time(time) + case 7: + time := binary.BigEndian.Uint64(uuid[:8]) + t = Time((time>>16)*10000 + g1582ns100) + default: // forward compatible + time := int64(binary.BigEndian.Uint32(uuid[0:4])) + time |= int64(binary.BigEndian.Uint16(uuid[4:6])) << 32 + time |= int64(binary.BigEndian.Uint16(uuid[6:8])&0xfff) << 48 + t = Time(time) + } + return t } // ClockSequence returns the clock sequence encoded in uuid. diff --git a/vendor/github.com/google/uuid/uuid.go b/vendor/github.com/google/uuid/uuid.go index a56138cc..5232b486 100644 --- a/vendor/github.com/google/uuid/uuid.go +++ b/vendor/github.com/google/uuid/uuid.go @@ -56,11 +56,15 @@ func IsInvalidLengthError(err error) bool { return ok } -// Parse decodes s into a UUID or returns an error. Both the standard UUID -// forms of xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx and -// urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx are decoded as well as the -// Microsoft encoding {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} and the raw hex -// encoding: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx. +// Parse decodes s into a UUID or returns an error if it cannot be parsed. Both +// the standard UUID forms defined in RFC 4122 +// (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx and +// urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx) are decoded. In addition, +// Parse accepts non-standard strings such as the raw hex encoding +// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx and 38 byte "Microsoft style" encodings, +// e.g. {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx}. Only the middle 36 bytes are +// examined in the latter case. Parse should not be used to validate strings as +// it parses non-standard encodings as indicated above. func Parse(s string) (UUID, error) { var uuid UUID switch len(s) { @@ -182,6 +186,59 @@ func Must(uuid UUID, err error) UUID { return uuid } +// Validate returns an error if s is not a properly formatted UUID in one of the following formats: +// xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx +// urn:uuid:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx +// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +// {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} +// It returns an error if the format is invalid, otherwise nil. +func Validate(s string) error { + switch len(s) { + // Standard UUID format + case 36: + + // UUID with "urn:uuid:" prefix + case 36 + 9: + if !strings.EqualFold(s[:9], "urn:uuid:") { + return fmt.Errorf("invalid urn prefix: %q", s[:9]) + } + s = s[9:] + + // UUID enclosed in braces + case 36 + 2: + if s[0] != '{' || s[len(s)-1] != '}' { + return fmt.Errorf("invalid bracketed UUID format") + } + s = s[1 : len(s)-1] + + // UUID without hyphens + case 32: + for i := 0; i < len(s); i += 2 { + _, ok := xtob(s[i], s[i+1]) + if !ok { + return errors.New("invalid UUID format") + } + } + + default: + return invalidLengthError{len(s)} + } + + // Check for standard UUID format + if len(s) == 36 { + if s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-' { + return errors.New("invalid UUID format") + } + for _, x := range []int{0, 2, 4, 6, 9, 11, 14, 16, 19, 21, 24, 26, 28, 30, 32, 34} { + if _, ok := xtob(s[x], s[x+1]); !ok { + return errors.New("invalid UUID format") + } + } + } + + return nil +} + // String returns the string form of uuid, xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx // , or "" if uuid is invalid. func (uuid UUID) String() string { @@ -294,3 +351,15 @@ func DisableRandPool() { poolMu.Lock() poolPos = randPoolSize } + +// UUIDs is a slice of UUID types. +type UUIDs []UUID + +// Strings returns a string slice containing the string form of each UUID in uuids. +func (uuids UUIDs) Strings() []string { + var uuidStrs = make([]string, len(uuids)) + for i, uuid := range uuids { + uuidStrs[i] = uuid.String() + } + return uuidStrs +} diff --git a/vendor/github.com/google/uuid/version6.go b/vendor/github.com/google/uuid/version6.go new file mode 100644 index 00000000..339a959a --- /dev/null +++ b/vendor/github.com/google/uuid/version6.go @@ -0,0 +1,56 @@ +// Copyright 2023 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import "encoding/binary" + +// UUID version 6 is a field-compatible version of UUIDv1, reordered for improved DB locality. +// It is expected that UUIDv6 will primarily be used in contexts where there are existing v1 UUIDs. +// Systems that do not involve legacy UUIDv1 SHOULD consider using UUIDv7 instead. +// +// see https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format-03#uuidv6 +// +// NewV6 returns a Version 6 UUID based on the current NodeID and clock +// sequence, and the current time. If the NodeID has not been set by SetNodeID +// or SetNodeInterface then it will be set automatically. If the NodeID cannot +// be set NewV6 set NodeID is random bits automatically . If clock sequence has not been set by +// SetClockSequence then it will be set automatically. If GetTime fails to +// return the current NewV6 returns Nil and an error. +func NewV6() (UUID, error) { + var uuid UUID + now, seq, err := GetTime() + if err != nil { + return uuid, err + } + + /* + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | time_high | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | time_mid | time_low_and_version | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |clk_seq_hi_res | clk_seq_low | node (0-1) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | node (2-5) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + + binary.BigEndian.PutUint64(uuid[0:], uint64(now)) + binary.BigEndian.PutUint16(uuid[8:], seq) + + uuid[6] = 0x60 | (uuid[6] & 0x0F) + uuid[8] = 0x80 | (uuid[8] & 0x3F) + + nodeMu.Lock() + if nodeID == zeroID { + setNodeInterface("") + } + copy(uuid[10:], nodeID[:]) + nodeMu.Unlock() + + return uuid, nil +} diff --git a/vendor/github.com/google/uuid/version7.go b/vendor/github.com/google/uuid/version7.go new file mode 100644 index 00000000..3167b643 --- /dev/null +++ b/vendor/github.com/google/uuid/version7.go @@ -0,0 +1,104 @@ +// Copyright 2023 Google Inc. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package uuid + +import ( + "io" +) + +// UUID version 7 features a time-ordered value field derived from the widely +// implemented and well known Unix Epoch timestamp source, +// the number of milliseconds seconds since midnight 1 Jan 1970 UTC, leap seconds excluded. +// As well as improved entropy characteristics over versions 1 or 6. +// +// see https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format-03#name-uuid-version-7 +// +// Implementations SHOULD utilize UUID version 7 over UUID version 1 and 6 if possible. +// +// NewV7 returns a Version 7 UUID based on the current time(Unix Epoch). +// Uses the randomness pool if it was enabled with EnableRandPool. +// On error, NewV7 returns Nil and an error +func NewV7() (UUID, error) { + uuid, err := NewRandom() + if err != nil { + return uuid, err + } + makeV7(uuid[:]) + return uuid, nil +} + +// NewV7FromReader returns a Version 7 UUID based on the current time(Unix Epoch). +// it use NewRandomFromReader fill random bits. +// On error, NewV7FromReader returns Nil and an error. +func NewV7FromReader(r io.Reader) (UUID, error) { + uuid, err := NewRandomFromReader(r) + if err != nil { + return uuid, err + } + + makeV7(uuid[:]) + return uuid, nil +} + +// makeV7 fill 48 bits time (uuid[0] - uuid[5]), set version b0111 (uuid[6]) +// uuid[8] already has the right version number (Variant is 10) +// see function NewV7 and NewV7FromReader +func makeV7(uuid []byte) { + /* + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | unix_ts_ms | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | unix_ts_ms | ver | rand_a (12 bit seq) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |var| rand_b | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | rand_b | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + _ = uuid[15] // bounds check + + t, s := getV7Time() + + uuid[0] = byte(t >> 40) + uuid[1] = byte(t >> 32) + uuid[2] = byte(t >> 24) + uuid[3] = byte(t >> 16) + uuid[4] = byte(t >> 8) + uuid[5] = byte(t) + + uuid[6] = 0x70 | (0x0F & byte(s>>8)) + uuid[7] = byte(s) +} + +// lastV7time is the last time we returned stored as: +// +// 52 bits of time in milliseconds since epoch +// 12 bits of (fractional nanoseconds) >> 8 +var lastV7time int64 + +const nanoPerMilli = 1000000 + +// getV7Time returns the time in milliseconds and nanoseconds / 256. +// The returned (milli << 12 + seq) is guarenteed to be greater than +// (milli << 12 + seq) returned by any previous call to getV7Time. +func getV7Time() (milli, seq int64) { + timeMu.Lock() + defer timeMu.Unlock() + + nano := timeNow().UnixNano() + milli = nano / nanoPerMilli + // Sequence number is between 0 and 3906 (nanoPerMilli>>8) + seq = (nano - milli*nanoPerMilli) >> 8 + now := milli<<12 + seq + if now <= lastV7time { + now = lastV7time + 1 + milli = now >> 12 + seq = now & 0xfff + } + lastV7time = now + return milli, seq +} diff --git a/vendor/github.com/gorilla/securecookie/.editorconfig b/vendor/github.com/gorilla/securecookie/.editorconfig new file mode 100644 index 00000000..2940ec92 --- /dev/null +++ b/vendor/github.com/gorilla/securecookie/.editorconfig @@ -0,0 +1,20 @@ +; https://editorconfig.org/ + +root = true + +[*] +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +indent_style = space +indent_size = 2 + +[{Makefile,go.mod,go.sum,*.go,.gitmodules}] +indent_style = tab +indent_size = 4 + +[*.md] +indent_size = 4 +trim_trailing_whitespace = false + +eclint_indent_style = unset diff --git a/vendor/github.com/gorilla/securecookie/.gitignore b/vendor/github.com/gorilla/securecookie/.gitignore new file mode 100644 index 00000000..84039fec --- /dev/null +++ b/vendor/github.com/gorilla/securecookie/.gitignore @@ -0,0 +1 @@ +coverage.coverprofile diff --git a/vendor/github.com/gorilla/securecookie/.travis.yml b/vendor/github.com/gorilla/securecookie/.travis.yml deleted file mode 100644 index 6f440f1e..00000000 --- a/vendor/github.com/gorilla/securecookie/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -language: go -sudo: false - -matrix: - include: - - go: 1.3 - - go: 1.4 - - go: 1.5 - - go: 1.6 - - go: 1.7 - - go: tip - allow_failures: - - go: tip - -script: - - go get -t -v ./... - - diff -u <(echo -n) <(gofmt -d .) - - go vet $(go list ./... | grep -v /vendor/) - - go test -v -race ./... diff --git a/vendor/github.com/gorilla/securecookie/LICENSE b/vendor/github.com/gorilla/securecookie/LICENSE index 0e5fb872..bb9d80bc 100644 --- a/vendor/github.com/gorilla/securecookie/LICENSE +++ b/vendor/github.com/gorilla/securecookie/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2012 Rodrigo Moraes. All rights reserved. +Copyright (c) 2023 The Gorilla Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are diff --git a/vendor/github.com/gorilla/securecookie/Makefile b/vendor/github.com/gorilla/securecookie/Makefile new file mode 100644 index 00000000..2b9008a2 --- /dev/null +++ b/vendor/github.com/gorilla/securecookie/Makefile @@ -0,0 +1,39 @@ +GO_LINT=$(shell which golangci-lint 2> /dev/null || echo '') +GO_LINT_URI=github.com/golangci/golangci-lint/cmd/golangci-lint@latest + +GO_SEC=$(shell which gosec 2> /dev/null || echo '') +GO_SEC_URI=github.com/securego/gosec/v2/cmd/gosec@latest + +GO_VULNCHECK=$(shell which govulncheck 2> /dev/null || echo '') +GO_VULNCHECK_URI=golang.org/x/vuln/cmd/govulncheck@latest + +.PHONY: golangci-lint +golangci-lint: + $(if $(GO_LINT), ,go install $(GO_LINT_URI)) + @echo "##### Running golangci-lint" + golangci-lint run -v + +.PHONY: gosec +gosec: + $(if $(GO_SEC), ,go install $(GO_SEC_URI)) + @echo "##### Running gosec" + gosec ./... + +.PHONY: govulncheck +govulncheck: + $(if $(GO_VULNCHECK), ,go install $(GO_VULNCHECK_URI)) + @echo "##### Running govulncheck" + govulncheck ./... + +.PHONY: verify +verify: golangci-lint gosec govulncheck + +.PHONY: test +test: + @echo "##### Running tests" + go test -race -cover -coverprofile=coverage.coverprofile -covermode=atomic -v ./... + +.PHONY: fuzz +fuzz: + @echo "##### Running fuzz tests" + go test -v -fuzz FuzzEncodeDecode -fuzztime 60s diff --git a/vendor/github.com/gorilla/securecookie/README.md b/vendor/github.com/gorilla/securecookie/README.md index aa7bd1a5..c3b9815d 100644 --- a/vendor/github.com/gorilla/securecookie/README.md +++ b/vendor/github.com/gorilla/securecookie/README.md @@ -1,10 +1,13 @@ -securecookie -============ -[![GoDoc](https://godoc.org/github.com/gorilla/securecookie?status.svg)](https://godoc.org/github.com/gorilla/securecookie) [![Build Status](https://travis-ci.org/gorilla/securecookie.png?branch=master)](https://travis-ci.org/gorilla/securecookie) -[![Sourcegraph](https://sourcegraph.com/github.com/gorilla/securecookie/-/badge.svg)](https://sourcegraph.com/github.com/gorilla/securecookie?badge) +# gorilla/securecookie +![testing](https://github.com/gorilla/securecookie/actions/workflows/test.yml/badge.svg) +[![codecov](https://codecov.io/github/gorilla/securecookie/branch/main/graph/badge.svg)](https://codecov.io/github/gorilla/securecookie) +[![godoc](https://godoc.org/github.com/gorilla/securecookie?status.svg)](https://godoc.org/github.com/gorilla/securecookie) +[![sourcegraph](https://sourcegraph.com/github.com/gorilla/securecookie/-/badge.svg)](https://sourcegraph.com/github.com/gorilla/securecookie?badge) -securecookie encodes and decodes authenticated and optionally encrypted +![Gorilla Logo](https://github.com/gorilla/.github/assets/53367916/d92caabf-98e0-473e-bfbf-ab554ba435e5) + +securecookie encodes and decodes authenticated and optionally encrypted cookie values. Secure cookies can't be forged, because their values are validated using HMAC. @@ -33,7 +36,10 @@ to not use encryption. If set, the length must correspond to the block size of the encryption algorithm. For AES, used by default, valid lengths are 16, 24, or 32 bytes to select AES-128, AES-192, or AES-256. -Strong keys can be created using the convenience function GenerateRandomKey(). +Strong keys can be created using the convenience function +`GenerateRandomKey()`. Note that keys created using `GenerateRandomKey()` are not +automatically persisted. New keys will be created when the application is +restarted, and previously issued cookies will not be able to be decoded. Once a SecureCookie instance is set, use it to encode a cookie value: @@ -75,6 +81,64 @@ registered first using gob.Register(). For basic types this is not needed; it works out of the box. An optional JSON encoder that uses `encoding/json` is available for types compatible with JSON. +### Key Rotation +Rotating keys is an important part of any security strategy. The `EncodeMulti` and +`DecodeMulti` functions allow for multiple keys to be rotated in and out. +For example, let's take a system that stores keys in a map: + +```go +// keys stored in a map will not be persisted between restarts +// a more persistent storage should be considered for production applications. +var cookies = map[string]*securecookie.SecureCookie{ + "previous": securecookie.New( + securecookie.GenerateRandomKey(64), + securecookie.GenerateRandomKey(32), + ), + "current": securecookie.New( + securecookie.GenerateRandomKey(64), + securecookie.GenerateRandomKey(32), + ), +} +``` + +Using the current key to encode new cookies: +```go +func SetCookieHandler(w http.ResponseWriter, r *http.Request) { + value := map[string]string{ + "foo": "bar", + } + if encoded, err := securecookie.EncodeMulti("cookie-name", value, cookies["current"]); err == nil { + cookie := &http.Cookie{ + Name: "cookie-name", + Value: encoded, + Path: "/", + } + http.SetCookie(w, cookie) + } +} +``` + +Later, decode cookies. Check against all valid keys: +```go +func ReadCookieHandler(w http.ResponseWriter, r *http.Request) { + if cookie, err := r.Cookie("cookie-name"); err == nil { + value := make(map[string]string) + err = securecookie.DecodeMulti("cookie-name", cookie.Value, &value, cookies["current"], cookies["previous"]) + if err == nil { + fmt.Fprintf(w, "The value of foo is %q", value["foo"]) + } + } +} +``` + +Rotate the keys. This strategy allows previously issued cookies to be valid until the next rotation: +```go +func Rotate(newCookie *securecookie.SecureCookie) { + cookies["previous"] = cookies["current"] + cookies["current"] = newCookie +} +``` + ## License BSD licensed. See the LICENSE file for details. diff --git a/vendor/github.com/gorilla/securecookie/fuzz.go b/vendor/github.com/gorilla/securecookie/fuzz.go deleted file mode 100644 index e4d0534e..00000000 --- a/vendor/github.com/gorilla/securecookie/fuzz.go +++ /dev/null @@ -1,25 +0,0 @@ -// +build gofuzz - -package securecookie - -var hashKey = []byte("very-secret12345") -var blockKey = []byte("a-lot-secret1234") -var s = New(hashKey, blockKey) - -type Cookie struct { - B bool - I int - S string -} - -func Fuzz(data []byte) int { - datas := string(data) - var c Cookie - if err := s.Decode("fuzz", datas, &c); err != nil { - return 0 - } - if _, err := s.Encode("fuzz", c); err != nil { - panic(err) - } - return 1 -} diff --git a/vendor/github.com/gorilla/securecookie/securecookie.go b/vendor/github.com/gorilla/securecookie/securecookie.go index cd4e0976..4d5ea860 100644 --- a/vendor/github.com/gorilla/securecookie/securecookie.go +++ b/vendor/github.com/gorilla/securecookie/securecookie.go @@ -124,7 +124,7 @@ type Codec interface { // GenerateRandomKey(). It is recommended to use a key with 32 or 64 bytes. // // blockKey is optional, used to encrypt values. Create it using -// GenerateRandomKey(). The key length must correspond to the block size +// GenerateRandomKey(). The key length must correspond to the key size // of the encryption algorithm. For AES, used by default, valid lengths are // 16, 24, or 32 bytes to select AES-128, AES-192, or AES-256. // The default encoder used for cookie serialization is encoding/gob. @@ -141,7 +141,7 @@ func New(hashKey, blockKey []byte) *SecureCookie { maxLength: 4096, sz: GobEncoder{}, } - if hashKey == nil { + if len(hashKey) == 0 { s.err = errHashKeyNotSet } if blockKey != nil { @@ -286,7 +286,7 @@ func (s *SecureCookie) Encode(name string, value interface{}) (string, error) { b = encode(b) // 5. Check length. if s.maxLength != 0 && len(b) > s.maxLength { - return "", errEncodedValueTooLong + return "", fmt.Errorf("%s: %d", errEncodedValueTooLong, len(b)) } // Done. return string(b), nil @@ -310,7 +310,7 @@ func (s *SecureCookie) Decode(name, value string, dst interface{}) error { } // 1. Check length. if s.maxLength != 0 && len(value) > s.maxLength { - return errValueToDecodeTooLong + return fmt.Errorf("%s: %d", errValueToDecodeTooLong, len(value)) } // 2. Decode from base64. b, err := decode([]byte(value)) @@ -391,7 +391,7 @@ func verifyMac(h hash.Hash, value []byte, mac []byte) error { // encrypt encrypts a value using the given block in counter mode. // -// A random initialization vector (http://goo.gl/zF67k) with the length of the +// A random initialization vector ( https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Initialization_vector_(IV) ) with the length of the // block size is prepended to the resulting ciphertext. func encrypt(block cipher.Block, value []byte) ([]byte, error) { iv := GenerateRandomKey(block.BlockSize()) @@ -408,7 +408,7 @@ func encrypt(block cipher.Block, value []byte) ([]byte, error) { // decrypt decrypts a value using the given block in counter mode. // // The value to be decrypted must be prepended by a initialization vector -// (http://goo.gl/zF67k) with the length of the block size. +// ( https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Initialization_vector_(IV) ) with the length of the block size. func decrypt(block cipher.Block, value []byte) ([]byte, error) { size := block.BlockSize() if len(value) > size { @@ -506,6 +506,10 @@ func decode(value []byte) ([]byte, error) { // GenerateRandomKey creates a random key with the given length in bytes. // On failure, returns nil. // +// Note that keys created using `GenerateRandomKey()` are not automatically +// persisted. New keys will be created when the application is restarted, and +// previously issued cookies will not be able to be decoded. +// // Callers should explicitly check for the possibility of a nil return, treat // it as a failure of the system random number generator, and not continue. func GenerateRandomKey(length int) []byte { @@ -525,22 +529,21 @@ func GenerateRandomKey(length int) []byte { // // Example: // -// codecs := securecookie.CodecsFromPairs( -// []byte("new-hash-key"), -// []byte("new-block-key"), -// []byte("old-hash-key"), -// []byte("old-block-key"), -// ) -// -// // Modify each instance. -// for _, s := range codecs { -// if cookie, ok := s.(*securecookie.SecureCookie); ok { -// cookie.MaxAge(86400 * 7) -// cookie.SetSerializer(securecookie.JSONEncoder{}) -// cookie.HashFunc(sha512.New512_256) -// } -// } +// codecs := securecookie.CodecsFromPairs( +// []byte("new-hash-key"), +// []byte("new-block-key"), +// []byte("old-hash-key"), +// []byte("old-block-key"), +// ) // +// // Modify each instance. +// for _, s := range codecs { +// if cookie, ok := s.(*securecookie.SecureCookie); ok { +// cookie.MaxAge(86400 * 7) +// cookie.SetSerializer(securecookie.JSONEncoder{}) +// cookie.HashFunc(sha512.New512_256) +// } +// } func CodecsFromPairs(keyPairs ...[]byte) []Codec { codecs := make([]Codec, len(keyPairs)/2+len(keyPairs)%2) for i := 0; i < len(keyPairs); i += 2 { diff --git a/vendor/github.com/hashicorp/yamux/LICENSE b/vendor/github.com/hashicorp/yamux/LICENSE index f0e5c79e..b7df72ed 100644 --- a/vendor/github.com/hashicorp/yamux/LICENSE +++ b/vendor/github.com/hashicorp/yamux/LICENSE @@ -1,3 +1,5 @@ +Copyright (c) 2014 HashiCorp, Inc. + Mozilla Public License, version 2.0 1. Definitions diff --git a/vendor/github.com/hashicorp/yamux/mux.go b/vendor/github.com/hashicorp/yamux/mux.go index 0c3e67b0..d3d5b3fe 100644 --- a/vendor/github.com/hashicorp/yamux/mux.go +++ b/vendor/github.com/hashicorp/yamux/mux.go @@ -3,7 +3,6 @@ package yamux import ( "fmt" "io" - "log" "os" "time" ) @@ -51,7 +50,12 @@ type Config struct { // Logger is used to pass in the logger to be used. Either Logger or // LogOutput can be set, not both. - Logger *log.Logger + Logger Logger +} + +func (c *Config) Clone() *Config { + c2 := *c + return &c2 } // DefaultConfig is used to return a default configuration diff --git a/vendor/github.com/hashicorp/yamux/session.go b/vendor/github.com/hashicorp/yamux/session.go index 38fe3ed1..c08c4dac 100644 --- a/vendor/github.com/hashicorp/yamux/session.go +++ b/vendor/github.com/hashicorp/yamux/session.go @@ -3,6 +3,7 @@ package yamux import ( "bufio" "bytes" + "context" "fmt" "io" "io/ioutil" @@ -34,7 +35,7 @@ type Session struct { config *Config // logger is used for our logs - logger *log.Logger + logger Logger // conn is the underlying connection conn io.ReadWriteCloser @@ -250,6 +251,22 @@ func (s *Session) AcceptStream() (*Stream, error) { } } +// AcceptStream is used to block until the next available stream +// is ready to be accepted. +func (s *Session) AcceptStreamWithContext(ctx context.Context) (*Stream, error) { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case stream := <-s.acceptCh: + if err := stream.sendWindowUpdate(); err != nil { + return nil, err + } + return stream, nil + case <-s.shutdownCh: + return nil, s.shutdownErr + } +} + // Close is used to close the session and all streams. // Attempts to send a GoAway before closing the connection. func (s *Session) Close() error { @@ -339,7 +356,7 @@ func (s *Session) Ping() (time.Duration, error) { } // Compute the RTT - return time.Now().Sub(start), nil + return time.Since(start), nil } // keepalive is a long running goroutine that periodically does diff --git a/vendor/github.com/hashicorp/yamux/spec.md b/vendor/github.com/hashicorp/yamux/spec.md index 183d797b..cb98618b 100644 --- a/vendor/github.com/hashicorp/yamux/spec.md +++ b/vendor/github.com/hashicorp/yamux/spec.md @@ -100,7 +100,7 @@ fire a request without waiting for the RTT of the ACK. This does introduce the possibility of a connection being rejected after data has been sent already. This is a slight semantic difference -from TCP, where the conection cannot be refused after it is opened. +from TCP, where the connection cannot be refused after it is opened. Clients should be prepared to handle this by checking for an error that indicates a RST was received. diff --git a/vendor/github.com/hashicorp/yamux/stream.go b/vendor/github.com/hashicorp/yamux/stream.go index 23d08fcc..31168d92 100644 --- a/vendor/github.com/hashicorp/yamux/stream.go +++ b/vendor/github.com/hashicorp/yamux/stream.go @@ -95,10 +95,12 @@ func (s *Stream) StreamID() uint32 { func (s *Stream) Read(b []byte) (n int, err error) { defer asyncNotify(s.recvNotifyCh) START: + + // If the stream is closed and there's no data buffered, return EOF s.stateLock.Lock() switch s.state { case streamLocalClose: - fallthrough + // LocalClose only prohibits further local writes. Handle reads normally. case streamRemoteClose: fallthrough case streamClosed: @@ -138,19 +140,22 @@ WAIT: var timer *time.Timer readDeadline := s.readDeadline.Load().(time.Time) if !readDeadline.IsZero() { - delay := readDeadline.Sub(time.Now()) + delay := time.Until(readDeadline) timer = time.NewTimer(delay) timeout = timer.C } select { + case <-s.session.shutdownCh: case <-s.recvNotifyCh: - if timer != nil { - timer.Stop() - } - goto START case <-timeout: return 0, ErrTimeout } + if timer != nil { + if !timer.Stop() { + <-timeout + } + } + goto START } // Write is used to write to the stream @@ -219,18 +224,25 @@ START: WAIT: var timeout <-chan time.Time + var timer *time.Timer writeDeadline := s.writeDeadline.Load().(time.Time) if !writeDeadline.IsZero() { - delay := writeDeadline.Sub(time.Now()) - timeout = time.After(delay) + delay := time.Until(writeDeadline) + timer = time.NewTimer(delay) + timeout = timer.C } select { + case <-s.session.shutdownCh: case <-s.sendNotifyCh: - goto START case <-timeout: return 0, ErrTimeout } - return 0, nil + if timer != nil { + if !timer.Stop() { + <-timeout + } + } + goto START } // sendFlags determines any flags that are appropriate @@ -380,7 +392,7 @@ func (s *Stream) closeTimeout() { defer s.sendLock.Unlock() hdr := header(make([]byte, headerSize)) hdr.encode(typeWindowUpdate, flagRST, s.id, 0) - s.session.sendNoWait(hdr) + _ = s.session.sendNoWait(hdr) } // forceClose is used for when the session is exiting diff --git a/vendor/github.com/hashicorp/yamux/util.go b/vendor/github.com/hashicorp/yamux/util.go index 8a73e924..4462518a 100644 --- a/vendor/github.com/hashicorp/yamux/util.go +++ b/vendor/github.com/hashicorp/yamux/util.go @@ -5,6 +5,13 @@ import ( "time" ) +// Logger is a abstract of *log.Logger +type Logger interface { + Print(v ...interface{}) + Printf(format string, v ...interface{}) + Println(v ...interface{}) +} + var ( timerPool = &sync.Pool{ New: func() interface{} { diff --git a/vendor/github.com/itchyny/gojq/.dockerignore b/vendor/github.com/itchyny/gojq/.dockerignore index ac001636..54095155 100644 --- a/vendor/github.com/itchyny/gojq/.dockerignore +++ b/vendor/github.com/itchyny/gojq/.dockerignore @@ -1,7 +1,6 @@ /gojq /goxz /CREDITS -/._* /y.output *.exe *.test diff --git a/vendor/github.com/itchyny/gojq/.gitattributes b/vendor/github.com/itchyny/gojq/.gitattributes index 9c2075be..797f3959 100644 --- a/vendor/github.com/itchyny/gojq/.gitattributes +++ b/vendor/github.com/itchyny/gojq/.gitattributes @@ -1,2 +1,3 @@ **/testdata/** binary -/builtin.go eol=lf +/builtin.go eol=lf linguist-generated=true +/parser.go eol=lf linguist-generated=true diff --git a/vendor/github.com/itchyny/gojq/.gitignore b/vendor/github.com/itchyny/gojq/.gitignore index e350f930..bbeb991f 100644 --- a/vendor/github.com/itchyny/gojq/.gitignore +++ b/vendor/github.com/itchyny/gojq/.gitignore @@ -1,7 +1,6 @@ /gojq /goxz /CREDITS -/._* /y.output *.exe *.test diff --git a/vendor/github.com/itchyny/gojq/CHANGELOG.md b/vendor/github.com/itchyny/gojq/CHANGELOG.md index 8477cd38..9ae257a2 100644 --- a/vendor/github.com/itchyny/gojq/CHANGELOG.md +++ b/vendor/github.com/itchyny/gojq/CHANGELOG.md @@ -1,4 +1,41 @@ # Changelog +## [v0.12.16](https://github.com/itchyny/gojq/compare/v0.12.15..v0.12.16) (2024-06-01) +* fix offset of query parsing error on multi-byte characters +* fix tests of `exp10` and `atan2` failing on some platforms +* fix `debug/1` to be available only when `debug/0` is defined +* improve parser to allow binary operators as object values +* improve compiler to emit error if query is missing + +## [v0.12.15](https://github.com/itchyny/gojq/compare/v0.12.14..v0.12.15) (2024-04-01) +* implement `ltrim`, `rtrim`, and `trim` functions +* implement `gojq.ParseError` for getting the offset and token of query parsing error +* implement `gojq.HaltError` for detecting halt errors and stopping outer iteration +* fix object construction with duplicate keys (`{x:0,y:1} | {a:.x,a:.y}`) +* fix `halt` and `halt_error` functions to stop the command execution immediately +* fix variable scope of binding syntax (`"a" as $v | def f: $v; "b" as $v | f`) +* fix pre-defined variables to be available in initial modules (`$ARGS` in `~/.jq`) +* fix `ltrimstr` and `rtrimstr` functions to emit error on non-string input +* fix `nearbyint` and `rint` functions to round ties to even +* improve parser to allow `reduce`, `foreach`, `if`, `try`-`catch` syntax as object values +* remove `pow10` in favor of `exp10`, define `scalbn` and `scalbln` by `ldexp` + +## [v0.12.14](https://github.com/itchyny/gojq/compare/v0.12.13..v0.12.14) (2023-12-01) +* implement `abs`, `pick`, and `debug/1` functions +* implement `--raw-output0` option, and remove `--nul-output` (`-0`) option +* fix string multiplication by zero to emit an empty string +* fix zero divided by zero to emit an error, not `nan` +* fix modulo operator to emit `nan` if either side is `nan` +* fix `implode` function to emit replacement characters on invalid code points +* fix `stderr` function to output strings in raw format +* fix `error` function to throw an error even for `null` +* fix `walk` function on multiple outputs arguments +* fix `--from-file` option to work with `--args` and `--jsonargs` options +* fix the default module search path `../lib` relative to the executable +* improve query parser to support comment continuation with backslash +* improve `modulemeta` function to include defined function names in the module +* improve search path of `import` and `include` directives to support `$ORIGIN` expansion +* remove deprecated `leaf_paths` function + ## [v0.12.13](https://github.com/itchyny/gojq/compare/v0.12.12..v0.12.13) (2023-06-01) * implement `@urid` format string to decode URI values * fix functions returning arrays not to emit nil slices (`flatten`, `group_by`, diff --git a/vendor/github.com/itchyny/gojq/Dockerfile b/vendor/github.com/itchyny/gojq/Dockerfile index 51f8632e..d5e0dce6 100644 --- a/vendor/github.com/itchyny/gojq/Dockerfile +++ b/vendor/github.com/itchyny/gojq/Dockerfile @@ -1,6 +1,8 @@ -FROM golang:1.20 AS builder +FROM golang:1.22 AS builder WORKDIR /app +COPY go.* ./ +RUN go mod download COPY . . ENV CGO_ENABLED 0 RUN make build diff --git a/vendor/github.com/itchyny/gojq/LICENSE b/vendor/github.com/itchyny/gojq/LICENSE index 3f4fcb26..fe590040 100644 --- a/vendor/github.com/itchyny/gojq/LICENSE +++ b/vendor/github.com/itchyny/gojq/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2019-2023 itchyny +Copyright (c) 2019-2024 itchyny Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/vendor/github.com/itchyny/gojq/README.md b/vendor/github.com/itchyny/gojq/README.md index ca13b2f6..7b34f93c 100644 --- a/vendor/github.com/itchyny/gojq/README.md +++ b/vendor/github.com/itchyny/gojq/README.md @@ -1,5 +1,5 @@ # gojq -[![CI Status](https://github.com/itchyny/gojq/workflows/CI/badge.svg)](https://github.com/itchyny/gojq/actions) +[![CI Status](https://github.com/itchyny/gojq/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/itchyny/gojq/actions?query=branch:main) [![Go Report Card](https://goreportcard.com/badge/github.com/itchyny/gojq)](https://goreportcard.com/report/github.com/itchyny/gojq) [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/itchyny/gojq/blob/main/LICENSE) [![release](https://img.shields.io/github/release/itchyny/gojq/all.svg)](https://github.com/itchyny/gojq/releases) @@ -77,10 +77,9 @@ docker run -i --rm ghcr.io/itchyny/gojq - gojq implements nice error messages for invalid query and JSON input. The error message of jq is sometimes difficult to tell where to fix the query. - gojq does not keep the order of object keys. I understand this might cause problems for some scripts but basically, we should not rely on the order of object keys. Due to this limitation, gojq does not have `keys_unsorted` function and `--sort-keys` (`-S`) option. I would implement when ordered map is implemented in the standard library of Go but I'm less motivated. - gojq supports arbitrary-precision integer calculation while jq does not; jq loses the precision of large integers when calculation is involved. Note that even with gojq, all mathematical functions, including `floor` and `round`, convert integers to floating-point numbers; only addition, subtraction, multiplication, modulo, and division operators (when divisible) keep the integer precision. To calculate floor division of integers without losing the precision, use `def idivide($n): (. - . % $n) / $n;`. To round down floating-point numbers to integers, use `def ifloor: floor | tostring | tonumber;`, but note that this function does not work with large floating-point numbers and also loses the precision of large integers. -- gojq fixes various bugs of jq. gojq correctly deletes elements of arrays by `|= empty` ([jq#2051](https://github.com/jqlang/jq/issues/2051)). gojq fixes `try`/`catch` handling ([jq#1859](https://github.com/jqlang/jq/issues/1859), [jq#1885](https://github.com/jqlang/jq/issues/1885), [jq#2140](https://github.com/jqlang/jq/issues/2140)). gojq fixes `nth/2` to output nothing when the count is equal to or larger than the stream size ([jq#1867](https://github.com/jqlang/jq/issues/1867)). gojq consistently counts by characters (not by bytes) in `index`, `rindex`, and `indices` functions; `"12345" | .[index("3"):]` results in `"345"` ([jq#1430](https://github.com/jqlang/jq/issues/1430), [jq#1624](https://github.com/jqlang/jq/issues/1624)). gojq handles overlapping occurrence differently in `rindex` and `indices`; `"ababa" | [rindex("aba"), indices("aba")]` results in `[2,[0,2]]` ([jq#2433](https://github.com/jqlang/jq/issues/2433)). gojq supports string indexing; `"abcde"[2]` ([jq#1520](https://github.com/jqlang/jq/issues/1520)). gojq accepts indexing query `.e0` ([jq#1526](https://github.com/jqlang/jq/issues/1526), [jq#1651](https://github.com/jqlang/jq/issues/1651)), and allows `gsub` to handle patterns including `"^"` ([jq#2148](https://github.com/jqlang/jq/issues/2148)). gojq improves variable lexer to allow using keywords for variable names, especially in binding patterns, also disallows spaces after `$` ([jq#526](https://github.com/jqlang/jq/issues/526)). gojq fixes handling files with no newline characters at the end ([jq#2374](https://github.com/jqlang/jq/issues/2374)). -- gojq truncates down floating-point numbers on indexing (`[0] | .[0.5]` results in `0` not `null`), and slicing (`[0,1,2] | .[0.5:1.5]` results in `[0]` not `[0,1]`). gojq parses unary operators with higher precedence than variable binding (`[-1 as $x | 1,$x]` results in `[1,-1]` not `[-1,-1]`). gojq implements `@uri` to escape all the reserved characters defined in RFC 3986, Sec. 2.2 ([jq#1506](https://github.com/jqlang/jq/issues/1506)), and fixes `@base64d` to allow binary string as the decoded string ([jq#1931](https://github.com/jqlang/jq/issues/1931)). gojq improves time formatting and parsing; deals with `%f` in `strftime` and `strptime` ([jq#1409](https://github.com/jqlang/jq/issues/1409)), parses timezone offsets with `fromdate` and `fromdateiso8601` ([jq#1053](https://github.com/jqlang/jq/issues/1053)), supports timezone name/offset with `%Z`/`%z` in `strptime` ([jq#929](https://github.com/jqlang/jq/issues/929), [jq#2195](https://github.com/jqlang/jq/issues/2195)), and looks up correct timezone during daylight saving time on formatting with `%Z` ([jq#1912](https://github.com/jqlang/jq/issues/1912)). gojq supports nanoseconds in date and time functions. -- gojq does not support some functions intentionally; `get_jq_origin`, `get_prog_origin`, `get_search_list` (unstable, not listed in jq document), `input_line_number`, `$__loc__` (performance issue), `recurse_down` (deprecated in jq). gojq does not support some flags; `--ascii-output, -a` (performance issue), `--seq` (not used commonly), `--sort-keys, -S` (sorts by default because `map[string]any` does not keep the order), `--unbuffered` (unbuffered by default). gojq does not parse JSON extensions supported by jq; `NaN`, `Infinity`, and `[000]`. gojq normalizes floating-point numbers to fit to double-precision floating-point numbers. gojq does not support or behaves differently with some regular expression metacharacters and flags (regular expression engine differences). gojq does not support BOM (`encoding/json` does not support this). gojq disallows using keywords for function names (`def true: .; true` is a confusing query), and module name prefixes in function declarations (using module prefixes like `def m::f: .;` is undocumented). -- gojq supports reading from YAML input (`--yaml-input`) while jq does not. gojq also supports YAML output (`--yaml-output`). gojq supports a few filters missing in jq; `scan/2` ([jq#2207](https://github.com/jqlang/jq/pull/2207)), and `@urid` format string ([jq#2261](https://github.com/jqlang/jq/issues/2261)). +- gojq behaves differently than jq in some features, hoping that jq will fix the behaviors in the future. gojq consistently counts by characters (not by bytes) in `index`, `rindex`, and `indices` functions; `"12345" | .[index("3"):]` results in `"345"` ([jq#1430](https://github.com/jqlang/jq/issues/1430), [jq#1624](https://github.com/jqlang/jq/issues/1624)). gojq supports string indexing; `"abcde"[2]` ([jq#1520](https://github.com/jqlang/jq/issues/1520)). gojq fixes handling files with no newline characters at the end ([jq#2374](https://github.com/jqlang/jq/issues/2374)). gojq consistently truncates down floating-point number indices both in indexing (`[0] | .[0.5]` results in `0`), and slicing (`[0,1,2] | .[0.5:1.5]` results in `[0]`). gojq parses unary operators with higher precedence than variable binding (`[-1 as $x | 1,$x]` results in `[1,-1]` not `[-1,-1]`) ([jq#3053](https://github.com/jqlang/jq/pull/3053)). gojq fixes `@base64d` to allow binary string as the decoded string ([jq#1931](https://github.com/jqlang/jq/issues/1931)). gojq improves time formatting and parsing; deals with `%f` in `strftime` and `strptime` ([jq#1409](https://github.com/jqlang/jq/issues/1409)), parses timezone offsets with `fromdate` and `fromdateiso8601` ([jq#1053](https://github.com/jqlang/jq/issues/1053)), supports timezone name/offset with `%Z`/`%z` in `strptime` ([jq#929](https://github.com/jqlang/jq/issues/929), [jq#2195](https://github.com/jqlang/jq/issues/2195)), and looks up correct timezone during daylight saving time on formatting with `%Z` ([jq#1912](https://github.com/jqlang/jq/issues/1912)). gojq supports nanoseconds in date and time functions. +- gojq does not support some functions intentionally; `get_jq_origin`, `get_prog_origin`, `get_search_list` (unstable, not listed in jq document), `input_line_number`, `$__loc__` (performance issue). gojq does not support some flags; `--ascii-output, -a` (performance issue), `--seq` (not used commonly), `--sort-keys, -S` (sorts by default because `map[string]any` does not keep the order), `--unbuffered` (unbuffered by default). gojq does not parse JSON extensions supported by jq; `NaN`, `Infinity`, and `[000]`. gojq normalizes floating-point numbers to fit to double-precision floating-point numbers. gojq does not support some regular expression metacharacters, backreferences, look-around assertions, and some flags (regular expression engine differences). gojq does not support BOM (`encoding/json` does not support this). gojq disallows using keywords for function names (`def true: .; true` is a confusing query), and module name prefixes in function declarations (using module prefixes like `def m::f: .;` is undocumented). +- gojq supports reading from YAML input (`--yaml-input`) while jq does not. gojq also supports YAML output (`--yaml-output`). gojq supports `@urid` format string ([jq#798](https://github.com/jqlang/jq/issues/798), [jq#2261](https://github.com/jqlang/jq/issues/2261)). ### Color configuration The gojq command automatically disables coloring output when the output is not a tty. @@ -117,6 +116,9 @@ func main() { break } if err, ok := v.(error); ok { + if err, ok := err.(*gojq.HaltError); ok && err.Value() == nil { + break + } log.Fatalln(err) } fmt.Printf("%#v\n", v) @@ -125,12 +127,18 @@ func main() { ``` - Firstly, use [`gojq.Parse(string) (*Query, error)`](https://pkg.go.dev/github.com/itchyny/gojq#Parse) to get the query from a string. + - Use [`gojq.ParseError`](https://pkg.go.dev/github.com/itchyny/gojq#ParseError) to get the error position and token of the parsing error. - Secondly, get the result iterator - using [`query.Run`](https://pkg.go.dev/github.com/itchyny/gojq#Query.Run) or [`query.RunWithContext`](https://pkg.go.dev/github.com/itchyny/gojq#Query.RunWithContext) - or alternatively, compile the query using [`gojq.Compile`](https://pkg.go.dev/github.com/itchyny/gojq#Compile) and then [`code.Run`](https://pkg.go.dev/github.com/itchyny/gojq#Code.Run) or [`code.RunWithContext`](https://pkg.go.dev/github.com/itchyny/gojq#Code.RunWithContext). You can reuse the `*Code` against multiple inputs to avoid compilation of the same query. But for arguments of `code.Run`, do not give values sharing same data between multiple calls. - In either case, you cannot use custom type values as the query input. The type should be `[]any` for an array and `map[string]any` for a map (just like decoded to an `any` using the [encoding/json](https://golang.org/pkg/encoding/json/) package). You can't use `[]int` or `map[string]string`, for example. If you want to query your custom struct, marshal to JSON, unmarshal to `any` and use it as the query input. - Thirdly, iterate through the results using [`iter.Next() (any, bool)`](https://pkg.go.dev/github.com/itchyny/gojq#Iter). The iterator can emit an error so make sure to handle it. The method returns `true` with results, and `false` when the iterator terminates. - - The return type is not `(any, error)` because iterators can emit multiple errors and you can continue after an error. It is difficult for the iterator to tell the termination in this situation. + - The return type is not `(any, error)` because the iterator may emit multiple errors. The `jq` and `gojq` commands stop the iteration on the first error, but the library user can choose to stop the iteration on errors, or to continue until it terminates. + - In any case, it is recommended to stop the iteration on [`gojq.HaltError`](https://pkg.go.dev/github.com/itchyny/gojq#HaltError), which is emitted by `halt` and `halt_error` functions, although these functions are rarely used. + The error implements [`gojq.ValueError`](https://pkg.go.dev/github.com/itchyny/gojq#ValueError), and if the error value is `nil`, stop the iteration without handling the error. + Technically speaking, we can fix the iterator to terminate on the halting error, but it does not terminate at the moment. + The `halt` function in jq not only stops the iteration, but also terminates the command execution, even if there are still input values. + So, gojq leaves it up to the library user how to handle the halting error. - Note that the result iterator may emit infinite number of values; `repeat(0)` and `range(infinite)`. It may stuck with no output value; `def f: f; f`. Use `RunWithContext` when you want to limit the execution time. [`gojq.Compile`](https://pkg.go.dev/github.com/itchyny/gojq#Compile) allows to configure the following compiler options. @@ -146,7 +154,7 @@ func main() { Report bug at [Issues・itchyny/gojq - GitHub](https://github.com/itchyny/gojq/issues). ## Author -itchyny (https://github.com/itchyny) +itchyny () ## License This software is released under the MIT License, see LICENSE. diff --git a/vendor/github.com/itchyny/gojq/_gojq b/vendor/github.com/itchyny/gojq/_gojq index d403a314..01e4c4f7 100644 --- a/vendor/github.com/itchyny/gojq/_gojq +++ b/vendor/github.com/itchyny/gojq/_gojq @@ -3,9 +3,9 @@ _gojq() { _arguments -s -S \ - '(-r --raw-output -j --join-output -0 --nul-output)'{-r,--raw-output}'[output raw strings]' \ - '(-r --raw-output -j --join-output -0 --nul-output)'{-j,--join-output}'[output without newlines]' \ - '(-r --raw-output -j --join-output -0 --nul-output)'{-0,--nul-output}'[output with NUL character]' \ + '(-r --raw-output --raw-output0 -j --join-output)'{-r,--raw-output}'[output raw strings]' \ + '(-r --raw-output -j --join-output)--raw-output0[implies -r with NUL character delimiter]' \ + '(-r --raw-output --raw-output0 -j --join-output)'{-j,--join-output}'[implies -r with no newline delimiter]' \ '(-c --compact-output --indent --tab --yaml-output)'{-c,--compact-output}'[output without pretty-printing]' \ '(-c --compact-output --tab --yaml-output)--indent=[number of spaces for indentation]:indentation count:(2 4 8)' \ '(-c --compact-output --indent --yaml-output)--tab[use tabs for indentation]' \ @@ -17,7 +17,7 @@ _gojq() '(-R --raw-input --yaml-input)--stream[parse input in stream fashion]' \ '(-R --raw-input --stream )--yaml-input[read input as YAML format]' \ '(-s --slurp)'{-s,--slurp}'[read all inputs into an array]' \ - '(-f --from-file 1)'{-f,--from-file}='[load query from file]:filename of jq query:_files' \ + '(-f --from-file 1)'{-f,--from-file}'[load query from file]:filename of jq query:_files' \ '*-L=[directory to search modules from]:module directory:_directories' \ '*--arg[set a string value to a variable]:variable name: :string value' \ '*--argjson[set a JSON value to a variable]:variable name: :JSON value' \ diff --git a/vendor/github.com/itchyny/gojq/builtin.go b/vendor/github.com/itchyny/gojq/builtin.go index ccf31358..89b03dc7 100644 --- a/vendor/github.com/itchyny/gojq/builtin.go +++ b/vendor/github.com/itchyny/gojq/builtin.go @@ -4,65 +4,65 @@ package gojq func init() { builtinFuncDefs = map[string][]*FuncDef{ - "IN": []*FuncDef{&FuncDef{Name: "IN", Args: []string{"s"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{&Query{Left: &Query{Func: "s"}, Op: OpEq, Right: &Query{Func: "."}}, &Query{Func: "."}}}}}}, &FuncDef{Name: "IN", Args: []string{"src", "s"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{&Query{Left: &Query{Func: "src"}, Op: OpEq, Right: &Query{Func: "s"}}, &Query{Func: "."}}}}}}}, - "INDEX": []*FuncDef{&FuncDef{Name: "INDEX", Args: []string{"stream", "idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "stream"}}, Pattern: &Pattern{Name: "$row"}, Start: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{}}}, Update: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Left: &Query{Func: "$row"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "idx_expr"}, Op: OpPipe, Right: &Query{Func: "tostring"}}}}}}, Op: OpAssign, Right: &Query{Func: "$row"}}}}}}, &FuncDef{Name: "INDEX", Args: []string{"idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "INDEX", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, &Query{Func: "idx_expr"}}}}}}}, - "JOIN": []*FuncDef{&FuncDef{Name: "JOIN", Args: []string{"$idx", "idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}}}}}}, &FuncDef{Name: "JOIN", Args: []string{"$idx", "stream", "idx_expr"}, Body: &Query{Left: &Query{Func: "stream"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}}}, &FuncDef{Name: "JOIN", Args: []string{"$idx", "stream", "idx_expr", "join_expr"}, Body: &Query{Left: &Query{Func: "stream"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "join_expr"}}}}}, - "_assign": []*FuncDef{}, - "_modify": []*FuncDef{}, - "all": []*FuncDef{&FuncDef{Name: "all", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "all", Args: []*Query{&Query{Func: "."}}}}}}, &FuncDef{Name: "all", Args: []string{"y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "all", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, &Query{Func: "y"}}}}}}, &FuncDef{Name: "all", Args: []string{"g", "y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "isempty", Args: []*Query{&Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "y"}, Op: OpPipe, Right: &Query{Func: "not"}}}}}}}}}}}}}, - "any": []*FuncDef{&FuncDef{Name: "any", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{&Query{Func: "."}}}}}}, &FuncDef{Name: "any", Args: []string{"y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, &Query{Func: "y"}}}}}}, &FuncDef{Name: "any", Args: []string{"g", "y"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "isempty", Args: []*Query{&Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Func: "y"}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "not"}}}}, - "arrays": []*FuncDef{&FuncDef{Name: "arrays", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}}}}}}}, - "booleans": []*FuncDef{&FuncDef{Name: "booleans", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "boolean"}}}}}}}}}}, - "capture": []*FuncDef{&FuncDef{Name: "capture", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "capture", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "capture", Args: []string{"$re", "$flags"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}}}}}, Op: OpPipe, Right: &Query{Func: "_capture"}}}}, - "combinations": []*FuncDef{&FuncDef{Name: "combinations", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}, Else: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$x"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "$x"}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}, IsSlice: true}}}, Op: OpPipe, Right: &Query{Func: "combinations"}}}}}}}}}}}}}}, &FuncDef{Name: "combinations", Args: []string{"n"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "limit", Args: []*Query{&Query{Func: "n"}, &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "repeat", Args: []*Query{&Query{Func: "."}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "combinations"}}}}, - "del": []*FuncDef{&FuncDef{Name: "del", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "delpaths", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{&Query{Func: "f"}}}}}}}}}}}}}}, - "finites": []*FuncDef{&FuncDef{Name: "finites", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Func: "isfinite"}}}}}}}, - "first": []*FuncDef{&FuncDef{Name: "first", Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}, &FuncDef{Name: "first", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}}}}}}}, - "fromdate": []*FuncDef{&FuncDef{Name: "fromdate", Body: &Query{Func: "fromdateiso8601"}}}, - "fromdateiso8601": []*FuncDef{&FuncDef{Name: "fromdateiso8601", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "strptime", Args: []*Query{&Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "%Y-%m-%dT%H:%M:%S%z"}}}}}}}, Op: OpPipe, Right: &Query{Func: "mktime"}}}}, - "fromstream": []*FuncDef{&FuncDef{Name: "fromstream", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{&ObjectKeyVal{Key: "x", Val: &ObjectVal{Queries: []*Query{&Query{Func: "null"}}}}, &ObjectKeyVal{Key: "e", Val: &ObjectVal{Queries: []*Query{&Query{Func: "false"}}}}}}, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$init"}}, Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "f"}}, Pattern: &Pattern{Name: "$i"}, Start: &Query{Func: "$init"}, Update: &Query{Left: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "e"}}}, Then: &Query{Func: "$init"}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$i"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "2"}}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "e"}}}}}}, &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{&Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "x"}}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}}, &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}}}}}, Else: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "e"}}}}}}, &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}}}}}, Extract: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "e"}}}, Then: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "x"}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}}}, - "group_by": []*FuncDef{&FuncDef{Name: "group_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_group_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, - "gsub": []*FuncDef{&FuncDef{Name: "gsub", Args: []string{"$re", "str"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "str"}, &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}, &FuncDef{Name: "gsub", Args: []string{"$re", "str", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "str"}, &Query{Left: &Query{Func: "$flags"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}}}, - "in": []*FuncDef{&FuncDef{Name: "in", Args: []string{"xs"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$x"}}, Body: &Query{Left: &Query{Func: "xs"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "has", Args: []*Query{&Query{Func: "$x"}}}}}}}}}}}}}, - "inputs": []*FuncDef{&FuncDef{Name: "inputs", Body: &Query{Term: &Term{Type: TermTypeTry, Try: &Try{Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "repeat", Args: []*Query{&Query{Func: "input"}}}}}, Catch: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "break"}}}}, Then: &Query{Func: "empty"}, Else: &Query{Func: "error"}}}}}}}}}, - "inside": []*FuncDef{&FuncDef{Name: "inside", Args: []string{"xs"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$x"}}, Body: &Query{Left: &Query{Func: "xs"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "contains", Args: []*Query{&Query{Func: "$x"}}}}}}}}}}}}}, - "isempty": []*FuncDef{&FuncDef{Name: "isempty", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "false"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}}}}, Op: OpComma, Right: &Query{Func: "true"}}}}}}}, - "iterables": []*FuncDef{&FuncDef{Name: "iterables", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpPipe, Right: &Query{Left: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Op: OpOr, Right: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}}}}, - "last": []*FuncDef{&FuncDef{Name: "last", Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}, &FuncDef{Name: "last", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "g"}}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Func: "null"}, Update: &Query{Func: "$item"}}}}}}, - "leaf_paths": []*FuncDef{&FuncDef{Name: "leaf_paths", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "paths", Args: []*Query{&Query{Func: "scalars"}}}}}}}, - "limit": []*FuncDef{&FuncDef{Name: "limit", Args: []string{"$n", "g"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpGt, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "g"}}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Func: "$n"}, Update: &Query{Left: &Query{Func: "."}, Op: OpSub, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Extract: &Query{Left: &Query{Func: "$item"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpLe, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}, Else: &Query{Func: "empty"}}}}}}}}}}}, Elif: []*IfElif{&IfElif{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Func: "empty"}}}, Else: &Query{Func: "g"}}}}}}, - "map": []*FuncDef{&FuncDef{Name: "map", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, Op: OpPipe, Right: &Query{Func: "f"}}}}}}}, - "map_values": []*FuncDef{&FuncDef{Name: "map_values", Args: []string{"f"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}}}}, Op: OpModify, Right: &Query{Func: "f"}}}}, - "match": []*FuncDef{&FuncDef{Name: "match", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "match", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}, &Query{Func: "false"}}}, SuffixList: []*Suffix{&Suffix{Iter: true}}}}}}, - "max_by": []*FuncDef{&FuncDef{Name: "max_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_max_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, - "min_by": []*FuncDef{&FuncDef{Name: "min_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_min_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, - "normals": []*FuncDef{&FuncDef{Name: "normals", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Func: "isnormal"}}}}}}}, - "not": []*FuncDef{&FuncDef{Name: "not", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "."}, Then: &Query{Func: "false"}, Else: &Query{Func: "true"}}}}}}, - "nth": []*FuncDef{&FuncDef{Name: "nth", Args: []string{"$n"}, Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Func: "$n"}}}}}, &FuncDef{Name: "nth", Args: []string{"$n", "g"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpLt, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "error", Args: []*Query{&Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "nth doesn't support negative indices"}}}}}}}, Else: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "g"}}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Left: &Query{Func: "$n"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Update: &Query{Left: &Query{Func: "."}, Op: OpSub, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Extract: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpLe, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Left: &Query{Func: "$item"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}}}}, - "nulls": []*FuncDef{&FuncDef{Name: "nulls", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Func: "null"}}}}}}}}, - "numbers": []*FuncDef{&FuncDef{Name: "numbers", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "number"}}}}}}}}}}, - "objects": []*FuncDef{&FuncDef{Name: "objects", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}}, - "paths": []*FuncDef{&FuncDef{Name: "paths", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{&Query{Func: ".."}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}}}}}}}, &FuncDef{Name: "paths", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "paths"}, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$p"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "getpath", Args: []*Query{&Query{Func: "$p"}}}}}, Op: OpPipe, Right: &Query{Func: "f"}}}}}}, Op: OpPipe, Right: &Query{Func: "$p"}}}}}}}}}, - "range": []*FuncDef{&FuncDef{Name: "range", Args: []string{"$end"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{&Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}, &Query{Func: "$end"}, &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}, &FuncDef{Name: "range", Args: []string{"$start", "$end"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{&Query{Func: "$start"}, &Query{Func: "$end"}, &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}, &FuncDef{Name: "range", Args: []string{"$start", "$end", "$step"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{&Query{Func: "$start"}, &Query{Func: "$end"}, &Query{Func: "$step"}}}}}}}, - "recurse": []*FuncDef{&FuncDef{Name: "recurse", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "recurse", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Optional: true}}}}}}}}}, &FuncDef{Name: "recurse", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "r", Body: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Func: "r"}}}}}}}, Func: "r"}}, &FuncDef{Name: "recurse", Args: []string{"f", "cond"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "r", Body: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Func: "cond"}}}}}, Op: OpPipe, Right: &Query{Func: "r"}}}}}}}}, Func: "r"}}}, - "repeat": []*FuncDef{&FuncDef{Name: "repeat", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_repeat", Body: &Query{Left: &Query{Func: "f"}, Op: OpComma, Right: &Query{Func: "_repeat"}}}}, Func: "_repeat"}}}, - "scalars": []*FuncDef{&FuncDef{Name: "scalars", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpPipe, Right: &Query{Left: &Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Op: OpAnd, Right: &Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}}}}, - "scan": []*FuncDef{&FuncDef{Name: "scan", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "scan", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "scan", Args: []string{"$re", "$flags"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{&Query{Func: "$re"}, &Query{Left: &Query{Func: "$flags"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "captures"}}}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}, Then: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}, Else: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "captures"}, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Index: &Index{Name: "string"}}}}}}}}}}}}}}, - "select": []*FuncDef{&FuncDef{Name: "select", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "f"}, Then: &Query{Func: "."}, Else: &Query{Func: "empty"}}}}}}, - "sort_by": []*FuncDef{&FuncDef{Name: "sort_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_sort_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, - "splits": []*FuncDef{&FuncDef{Name: "splits", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "splits", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "splits", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "split", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}}}, SuffixList: []*Suffix{&Suffix{Iter: true}}}}}}, - "strings": []*FuncDef{&FuncDef{Name: "strings", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "string"}}}}}}}}}}, - "sub": []*FuncDef{&FuncDef{Name: "sub", Args: []string{"$re", "str"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "str"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "sub", Args: []string{"$re", "str", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$str"}}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_sub", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}}}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$str"}, SuffixList: []*Suffix{&Suffix{Index: &Index{End: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "offset"}}}, IsSlice: true}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}}, Else: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}, &Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$r"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{&ObjectKeyVal{Key: "string", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "$r"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "_capture"}, Op: OpPipe, Right: &Query{Func: "str"}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$str"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Start: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Name: "offset"}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Name: "length"}}}}}}, End: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "offset"}}}, IsSlice: true}}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}}}}}}}, &ObjectKeyVal{Key: "offset", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{&Suffix{Index: &Index{Name: "offset"}}}}}}}}, &ObjectKeyVal{Key: "matches", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}, SuffixList: []*Suffix{&Suffix{Index: &Index{End: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}, IsSlice: true}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "_sub"}}}}}}}}}}}}, Left: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{&ObjectKeyVal{Key: "string", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeString, Str: &String{}}}}}}, &ObjectKeyVal{Key: "matches", Val: &ObjectVal{Queries: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}}}}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "_sub"}}}}}}}}}, - "test": []*FuncDef{&FuncDef{Name: "test", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "test", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "null"}}}}}}, &FuncDef{Name: "test", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_match", Args: []*Query{&Query{Func: "$re"}, &Query{Func: "$flags"}, &Query{Func: "true"}}}}}}}, - "todate": []*FuncDef{&FuncDef{Name: "todate", Body: &Query{Func: "todateiso8601"}}}, - "todateiso8601": []*FuncDef{&FuncDef{Name: "todateiso8601", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "strftime", Args: []*Query{&Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "%Y-%m-%dT%H:%M:%SZ"}}}}}}}}}, - "tostream": []*FuncDef{&FuncDef{Name: "tostream", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{&Query{FuncDefs: []*FuncDef{&FuncDef{Name: "r", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Optional: true}}}}, Op: OpPipe, Right: &Query{Func: "r"}}}}, Op: OpComma, Right: &Query{Func: "."}}}}, Func: "r"}}}, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$p"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "getpath", Args: []*Query{&Query{Func: "$p"}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{&Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Iter: true}, &Suffix{Optional: true}}}}}}}, Pattern: &Pattern{Name: "$q"}, Start: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "$p"}, Op: OpComma, Right: &Query{Func: "."}}}}}, Update: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "$p"}, Op: OpAdd, Right: &Query{Func: "$q"}}}}}}}}}}}}}}}}, - "truncate_stream": []*FuncDef{&FuncDef{Name: "truncate_stream", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{&Suffix{Bind: &Bind{Patterns: []*Pattern{&Pattern{Name: "$n"}}, Body: &Query{Left: &Query{Func: "null"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpGt, Right: &Query{Func: "$n"}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}, Op: OpModify, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Func: "$n"}, IsSlice: true}}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}}, - "unique_by": []*FuncDef{&FuncDef{Name: "unique_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_unique_by", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, - "until": []*FuncDef{&FuncDef{Name: "until", Args: []string{"cond", "next"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_until", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "cond"}, Then: &Query{Func: "."}, Else: &Query{Left: &Query{Func: "next"}, Op: OpPipe, Right: &Query{Func: "_until"}}}}}}}, Func: "_until"}}}, - "values": []*FuncDef{&FuncDef{Name: "values", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{&Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Func: "null"}}}}}}}}, - "walk": []*FuncDef{&FuncDef{Name: "walk", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_walk", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Func: "_walk"}}}}}, Elif: []*IfElif{&IfElif{Cond: &Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map_values", Args: []*Query{&Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "last", Args: []*Query{&Query{Func: "_walk"}}}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "f"}}}}, Func: "_walk"}}}, - "while": []*FuncDef{&FuncDef{Name: "while", Args: []string{"cond", "update"}, Body: &Query{FuncDefs: []*FuncDef{&FuncDef{Name: "_while", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "cond"}, Then: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "update"}, Op: OpPipe, Right: &Query{Func: "_while"}}}}}, Else: &Query{Func: "empty"}}}}}}, Func: "_while"}}}, - "with_entries": []*FuncDef{&FuncDef{Name: "with_entries", Args: []string{"f"}, Body: &Query{Left: &Query{Func: "to_entries"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{&Query{Func: "f"}}}}}, Op: OpPipe, Right: &Query{Func: "from_entries"}}}}}, + "IN": {{Name: "IN", Args: []string{"s"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{{Left: &Query{Func: "s"}, Op: OpEq, Right: &Query{Func: "."}}, {Func: "."}}}}}}, {Name: "IN", Args: []string{"src", "s"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{{Left: &Query{Func: "src"}, Op: OpEq, Right: &Query{Func: "s"}}, {Func: "."}}}}}}}, + "INDEX": {{Name: "INDEX", Args: []string{"stream", "idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Query: &Query{Func: "stream"}, Pattern: &Pattern{Name: "$row"}, Start: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{}}}, Update: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Left: &Query{Func: "$row"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "idx_expr"}, Op: OpPipe, Right: &Query{Func: "tostring"}}}}}}, Op: OpAssign, Right: &Query{Func: "$row"}}}}}}, {Name: "INDEX", Args: []string{"idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "INDEX", Args: []*Query{{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}}}}, {Func: "idx_expr"}}}}}}}, + "JOIN": {{Name: "JOIN", Args: []string{"$idx", "idx_expr"}, Body: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}}}}}}, {Name: "JOIN", Args: []string{"$idx", "stream", "idx_expr"}, Body: &Query{Left: &Query{Func: "stream"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}}}, {Name: "JOIN", Args: []string{"$idx", "stream", "idx_expr", "join_expr"}, Body: &Query{Left: &Query{Func: "stream"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$idx"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Func: "idx_expr"}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "join_expr"}}}}}, + "_assign": {}, + "_modify": {}, + "all": {{Name: "all", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "all", Args: []*Query{{Func: "."}}}}}}, {Name: "all", Args: []string{"y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "all", Args: []*Query{{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}}}}, {Func: "y"}}}}}}, {Name: "all", Args: []string{"g", "y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "isempty", Args: []*Query{{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "y"}, Op: OpPipe, Right: &Query{Func: "not"}}}}}}}}}}}}}, + "any": {{Name: "any", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{{Func: "."}}}}}}, {Name: "any", Args: []string{"y"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "any", Args: []*Query{{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}}}}, {Func: "y"}}}}}}, {Name: "any", Args: []string{"g", "y"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "isempty", Args: []*Query{{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Func: "y"}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "not"}}}}, + "arrays": {{Name: "arrays", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}}}}}}}, + "booleans": {{Name: "booleans", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "boolean"}}}}}}}}}}, + "capture": {{Name: "capture", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "capture", Args: []*Query{{Func: "$re"}, {Func: "null"}}}}}}, {Name: "capture", Args: []string{"$re", "$flags"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{{Func: "$re"}, {Func: "$flags"}}}}}, Op: OpPipe, Right: &Query{Func: "_capture"}}}}, + "combinations": {{Name: "combinations", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}, Else: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, SuffixList: []*Suffix{{Iter: true}, {Bind: &Bind{Patterns: []*Pattern{{Name: "$x"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "$x"}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}, IsSlice: true}}}, Op: OpPipe, Right: &Query{Func: "combinations"}}}}}}}}}}}}}}, {Name: "combinations", Args: []string{"n"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "limit", Args: []*Query{{Func: "n"}, {Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "repeat", Args: []*Query{{Func: "."}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "combinations"}}}}, + "del": {{Name: "del", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "delpaths", Args: []*Query{{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{{Func: "f"}}}}}}}}}}}}}}, + "finites": {{Name: "finites", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Func: "isfinite"}}}}}}}, + "first": {{Name: "first", Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}, {Name: "first", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}}}}}}}, + "fromdate": {{Name: "fromdate", Body: &Query{Func: "fromdateiso8601"}}}, + "fromdateiso8601": {{Name: "fromdateiso8601", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "strptime", Args: []*Query{{Term: &Term{Type: TermTypeString, Str: &String{Str: "%Y-%m-%dT%H:%M:%S%z"}}}}}}}, Op: OpPipe, Right: &Query{Func: "mktime"}}}}, + "fromstream": {{Name: "fromstream", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{{Key: "x", Val: &Query{Func: "null"}}, {Key: "e", Val: &Query{Func: "false"}}}}, SuffixList: []*Suffix{{Bind: &Bind{Patterns: []*Pattern{{Name: "$init"}}, Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Query: &Query{Func: "f"}, Pattern: &Pattern{Name: "$i"}, Start: &Query{Func: "$init"}, Update: &Query{Left: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "e"}}}, Then: &Query{Func: "$init"}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$i"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "2"}}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "e"}}}}}}, {Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{{Left: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "x"}}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}}, {Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}}}}}, Else: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "e"}}}}}}, {Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$i"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}}}}}, Extract: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "e"}}}, Then: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "x"}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}}}, + "group_by": {{Name: "group_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_group_by", Args: []*Query{{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, + "gsub": {{Name: "gsub", Args: []string{"$re", "str"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{{Func: "$re"}, {Func: "str"}, {Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}, {Name: "gsub", Args: []string{"$re", "str", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{{Func: "$re"}, {Func: "str"}, {Left: &Query{Func: "$flags"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}}}, + "in": {{Name: "in", Args: []string{"xs"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Bind: &Bind{Patterns: []*Pattern{{Name: "$x"}}, Body: &Query{Left: &Query{Func: "xs"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "has", Args: []*Query{{Func: "$x"}}}}}}}}}}}}}, + "inputs": {{Name: "inputs", Body: &Query{Term: &Term{Type: TermTypeTry, Try: &Try{Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "repeat", Args: []*Query{{Func: "input"}}}}}, Catch: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "break"}}}}, Then: &Query{Func: "empty"}, Else: &Query{Func: "error"}}}}}}}}}, + "inside": {{Name: "inside", Args: []string{"xs"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Bind: &Bind{Patterns: []*Pattern{{Name: "$x"}}, Body: &Query{Left: &Query{Func: "xs"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "contains", Args: []*Query{{Func: "$x"}}}}}}}}}}}}}, + "isempty": {{Name: "isempty", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "g"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "false"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}}}}, Op: OpComma, Right: &Query{Func: "true"}}}}}}}, + "iterables": {{Name: "iterables", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "type"}, Op: OpPipe, Right: &Query{Left: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Op: OpOr, Right: &Query{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}}}}, + "last": {{Name: "last", Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}}, {Name: "last", Args: []string{"g"}, Body: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Query: &Query{Func: "g"}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Func: "null"}, Update: &Query{Func: "$item"}}}}}}, + "limit": {{Name: "limit", Args: []string{"$n", "g"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpGt, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Query: &Query{Func: "g"}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Func: "$n"}, Update: &Query{Left: &Query{Func: "."}, Op: OpSub, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Extract: &Query{Left: &Query{Func: "$item"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpLe, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}, Else: &Query{Func: "empty"}}}}}}}}}}}, Elif: []*IfElif{{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Func: "empty"}}}, Else: &Query{Func: "g"}}}}}}, + "map": {{Name: "map", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}}}}, Op: OpPipe, Right: &Query{Func: "f"}}}}}}}, + "map_values": {{Name: "map_values", Args: []string{"f"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}}}}, Op: OpModify, Right: &Query{Func: "f"}}}}, + "match": {{Name: "match", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{{Func: "$re"}, {Func: "null"}}}}}}, {Name: "match", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_match", Args: []*Query{{Func: "$re"}, {Func: "$flags"}, {Func: "false"}}}, SuffixList: []*Suffix{{Iter: true}}}}}}, + "max_by": {{Name: "max_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_max_by", Args: []*Query{{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, + "min_by": {{Name: "min_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_min_by", Args: []*Query{{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, + "normals": {{Name: "normals", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Func: "isnormal"}}}}}}}, + "not": {{Name: "not", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "."}, Then: &Query{Func: "false"}, Else: &Query{Func: "true"}}}}}}, + "nth": {{Name: "nth", Args: []string{"$n"}, Body: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Func: "$n"}}}}}, {Name: "nth", Args: []string{"$n", "g"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "$n"}, Op: OpLt, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "error", Args: []*Query{{Term: &Term{Type: TermTypeString, Str: &String{Str: "nth doesn't support negative indices"}}}}}}}, Else: &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{Ident: "$out", Body: &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{Query: &Query{Func: "g"}, Pattern: &Pattern{Name: "$item"}, Start: &Query{Left: &Query{Func: "$n"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Update: &Query{Left: &Query{Func: "."}, Op: OpSub, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "1"}}}, Extract: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "."}, Op: OpLe, Right: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}, Then: &Query{Left: &Query{Func: "$item"}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeBreak, Break: "$out"}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}}}}, + "nulls": {{Name: "nulls", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "."}, Op: OpEq, Right: &Query{Func: "null"}}}}}}}}, + "numbers": {{Name: "numbers", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "number"}}}}}}}}}}, + "objects": {{Name: "objects", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}}, + "paths": {{Name: "paths", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{{Func: ".."}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}}}}}}}, {Name: "paths", Args: []string{"f"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{{Left: &Query{Func: ".."}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Func: "f"}}}}}}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}}}}}}}}, + "pick": {{Name: "pick", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Bind: &Bind{Patterns: []*Pattern{{Name: "$v"}}, Body: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{{Func: "f"}}}}}, Pattern: &Pattern{Name: "$p"}, Start: &Query{Func: "null"}, Update: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "setpath", Args: []*Query{{Func: "$p"}, {Left: &Query{Func: "$v"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "getpath", Args: []*Query{{Func: "$p"}}}}}}}}}}}}}}}}}}}}, + "range": {{Name: "range", Args: []string{"$end"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{{Term: &Term{Type: TermTypeNumber, Number: "0"}}, {Func: "$end"}, {Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}, {Name: "range", Args: []string{"$start", "$end"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{{Func: "$start"}, {Func: "$end"}, {Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}}, {Name: "range", Args: []string{"$start", "$end", "$step"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_range", Args: []*Query{{Func: "$start"}, {Func: "$end"}, {Func: "$step"}}}}}}}, + "recurse": {{Name: "recurse", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "recurse", Args: []*Query{{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}, {Optional: true}}}}}}}}}, {Name: "recurse", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{{Name: "r", Body: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Func: "r"}}}}}}}, Func: "r"}}, {Name: "recurse", Args: []string{"f", "cond"}, Body: &Query{FuncDefs: []*FuncDef{{Name: "r", Body: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Func: "cond"}}}}}, Op: OpPipe, Right: &Query{Func: "r"}}}}}}}}, Func: "r"}}}, + "repeat": {{Name: "repeat", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{{Name: "_repeat", Body: &Query{Left: &Query{Func: "f"}, Op: OpComma, Right: &Query{Func: "_repeat"}}}}, Func: "_repeat"}}}, + "scalars": {{Name: "scalars", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "type"}, Op: OpPipe, Right: &Query{Left: &Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Op: OpAnd, Right: &Query{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}}}}}}}}}, + "scan": {{Name: "scan", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "scan", Args: []*Query{{Func: "$re"}, {Func: "null"}}}}}}, {Name: "scan", Args: []string{"$re", "$flags"}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{{Func: "$re"}, {Left: &Query{Func: "$flags"}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "g"}}}}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "captures"}}}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}, Then: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}, Else: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "captures"}, SuffixList: []*Suffix{{Iter: true}, {Index: &Index{Name: "string"}}}}}}}}}}}}}}, + "select": {{Name: "select", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "f"}, Then: &Query{Func: "."}, Else: &Query{Func: "empty"}}}}}}, + "sort_by": {{Name: "sort_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_sort_by", Args: []*Query{{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, + "splits": {{Name: "splits", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "splits", Args: []*Query{{Func: "$re"}, {Func: "null"}}}}}}, {Name: "splits", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "split", Args: []*Query{{Func: "$re"}, {Func: "$flags"}}}, SuffixList: []*Suffix{{Iter: true}}}}}}, + "strings": {{Name: "strings", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "string"}}}}}}}}}}, + "sub": {{Name: "sub", Args: []string{"$re", "str"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "sub", Args: []*Query{{Func: "$re"}, {Func: "str"}, {Func: "null"}}}}}}, {Name: "sub", Args: []string{"$re", "str", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Bind: &Bind{Patterns: []*Pattern{{Name: "$str"}}, Body: &Query{FuncDefs: []*FuncDef{{Name: "_sub", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}}}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{}}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$str"}, SuffixList: []*Suffix{{Index: &Index{End: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "offset"}}}, IsSlice: true}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}}, Else: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}}}, {Bind: &Bind{Patterns: []*Pattern{{Name: "$r"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{{Key: "string", Val: &Query{Left: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "$r"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "_capture"}, Op: OpPipe, Right: &Query{Func: "str"}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$str"}, SuffixList: []*Suffix{{Index: &Index{Start: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{{Index: &Index{Name: "offset"}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{{Index: &Index{Name: "length"}}}}}}, End: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "offset"}}}, IsSlice: true}}}}}}, Op: OpAdd, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "string"}}}}}, {Key: "offset", Val: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "$r"}, SuffixList: []*Suffix{{Index: &Index{Name: "offset"}}}}}}, {Key: "matches", Val: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Name: "matches"}, SuffixList: []*Suffix{{Index: &Index{End: &Query{Term: &Term{Type: TermTypeUnary, Unary: &Unary{Op: OpSub, Term: &Term{Type: TermTypeNumber, Number: "1"}}}}, IsSlice: true}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "_sub"}}}}}}}}}}}}, Left: &Query{Term: &Term{Type: TermTypeObject, Object: &Object{KeyVals: []*ObjectKeyVal{{Key: "string", Val: &Query{Term: &Term{Type: TermTypeString, Str: &String{}}}}, {Key: "matches", Val: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "match", Args: []*Query{{Func: "$re"}, {Func: "$flags"}}}}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "_sub"}}}}}}}}}, + "test": {{Name: "test", Args: []string{"$re"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "test", Args: []*Query{{Func: "$re"}, {Func: "null"}}}}}}, {Name: "test", Args: []string{"$re", "$flags"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_match", Args: []*Query{{Func: "$re"}, {Func: "$flags"}, {Func: "true"}}}}}}}, + "todate": {{Name: "todate", Body: &Query{Func: "todateiso8601"}}}, + "todateiso8601": {{Name: "todateiso8601", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "strftime", Args: []*Query{{Term: &Term{Type: TermTypeString, Str: &String{Str: "%Y-%m-%dT%H:%M:%SZ"}}}}}}}}}, + "tostream": {{Name: "tostream", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{{FuncDefs: []*FuncDef{{Name: "r", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}, {Optional: true}}}}, Op: OpPipe, Right: &Query{Func: "r"}}}}, Op: OpComma, Right: &Query{Func: "."}}}}, Func: "r"}}}, SuffixList: []*Suffix{{Bind: &Bind{Patterns: []*Pattern{{Name: "$p"}}, Body: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "getpath", Args: []*Query{{Func: "$p"}}}}}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{Query: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "path", Args: []*Query{{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Iter: true}, {Optional: true}}}}}}}}, Pattern: &Pattern{Name: "$q"}, Start: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "$p"}, Op: OpComma, Right: &Query{Func: "."}}}}}, Update: &Query{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Left: &Query{Func: "$p"}, Op: OpAdd, Right: &Query{Func: "$q"}}}}}}}}}}}}}}}}, + "truncate_stream": {{Name: "truncate_stream", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{{Bind: &Bind{Patterns: []*Pattern{{Name: "$n"}}, Body: &Query{Left: &Query{Func: "null"}, Op: OpPipe, Right: &Query{Left: &Query{Func: "f"}, Op: OpPipe, Right: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}, Op: OpPipe, Right: &Query{Left: &Query{Func: "length"}, Op: OpGt, Right: &Query{Func: "$n"}}}, Then: &Query{Left: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Term: &Term{Type: TermTypeNumber, Number: "0"}}}}}, Op: OpModify, Right: &Query{Term: &Term{Type: TermTypeIndex, Index: &Index{Start: &Query{Func: "$n"}, IsSlice: true}}}}, Else: &Query{Func: "empty"}}}}}}}}}}}}}, + "unique_by": {{Name: "unique_by", Args: []string{"f"}, Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "_unique_by", Args: []*Query{{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{{Term: &Term{Type: TermTypeArray, Array: &Array{Query: &Query{Func: "f"}}}}}}}}}}}}}}, + "until": {{Name: "until", Args: []string{"cond", "next"}, Body: &Query{FuncDefs: []*FuncDef{{Name: "_until", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "cond"}, Then: &Query{Func: "."}, Else: &Query{Left: &Query{Func: "next"}, Op: OpPipe, Right: &Query{Func: "_until"}}}}}}}, Func: "_until"}}}, + "values": {{Name: "values", Body: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "select", Args: []*Query{{Left: &Query{Func: "."}, Op: OpNe, Right: &Query{Func: "null"}}}}}}}}, + "walk": {{Name: "walk", Args: []string{"f"}, Body: &Query{FuncDefs: []*FuncDef{{Name: "_walk", Body: &Query{Left: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "array"}}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{{Func: "_walk"}}}}}, Elif: []*IfElif{{Cond: &Query{Left: &Query{Func: "type"}, Op: OpEq, Right: &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: "object"}}}}, Then: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map_values", Args: []*Query{{Func: "_walk"}}}}}}}}}}, Op: OpPipe, Right: &Query{Func: "f"}}}}, Func: "_walk"}}}, + "while": {{Name: "while", Args: []string{"cond", "update"}, Body: &Query{FuncDefs: []*FuncDef{{Name: "_while", Body: &Query{Term: &Term{Type: TermTypeIf, If: &If{Cond: &Query{Func: "cond"}, Then: &Query{Left: &Query{Func: "."}, Op: OpComma, Right: &Query{Term: &Term{Type: TermTypeQuery, Query: &Query{Left: &Query{Func: "update"}, Op: OpPipe, Right: &Query{Func: "_while"}}}}}, Else: &Query{Func: "empty"}}}}}}, Func: "_while"}}}, + "with_entries": {{Name: "with_entries", Args: []string{"f"}, Body: &Query{Left: &Query{Func: "to_entries"}, Op: OpPipe, Right: &Query{Left: &Query{Term: &Term{Type: TermTypeFunc, Func: &Func{Name: "map", Args: []*Query{{Func: "f"}}}}}, Op: OpPipe, Right: &Query{Func: "from_entries"}}}}}, } } diff --git a/vendor/github.com/itchyny/gojq/builtin.jq b/vendor/github.com/itchyny/gojq/builtin.jq index 66d63073..ac6292ba 100644 --- a/vendor/github.com/itchyny/gojq/builtin.jq +++ b/vendor/github.com/itchyny/gojq/builtin.jq @@ -37,7 +37,6 @@ def strings: select(type == "string"); def nulls: select(. == null); def values: select(. != null); def scalars: select(type | . != "array" and . != "object"); -def leaf_paths: paths(scalars); def inside(xs): . as $x | xs | contains($x); def combinations: @@ -52,7 +51,7 @@ def walk(f): if type == "array" then map(_walk) elif type == "object" then - map_values(last(_walk)) + map_values(_walk) end | f; _walk; @@ -117,7 +116,9 @@ def tostream: def map_values(f): .[] |= f; def del(f): delpaths([path(f)]); def paths: path(..) | select(. != []); -def paths(f): paths as $p | select(getpath($p) | f) | $p; +def paths(f): path(.. | select(f)) | select(. != []); +def pick(f): . as $v | + reduce path(f) as $p (null; setpath($p; $v | getpath($p))); def fromdateiso8601: strptime("%Y-%m-%dT%H:%M:%S%z") | mktime; def todateiso8601: strftime("%Y-%m-%dT%H:%M:%SZ"); @@ -149,7 +150,7 @@ def sub($re; str; $flags): else .matches[-1] as $r | { - string: (($r | _capture | str) + $str[$r.offset+$r.length:.offset] + .string), + string: ($r | _capture | str) + $str[$r.offset+$r.length:.offset] + .string, offset: $r.offset, matches: .matches[:-1], } | diff --git a/vendor/github.com/itchyny/gojq/compare.go b/vendor/github.com/itchyny/gojq/compare.go index e70c1fbb..6ab22754 100644 --- a/vendor/github.com/itchyny/gojq/compare.go +++ b/vendor/github.com/itchyny/gojq/compare.go @@ -9,10 +9,6 @@ import ( // The result will be 0 if l == r, -1 if l < r, and +1 if l > r. // This comparison is used by built-in operators and functions. func Compare(l, r any) int { - return compare(l, r) -} - -func compare(l, r any) int { return binopTypeSwitch(l, r, compareInt, func(l, r float64) any { @@ -44,7 +40,7 @@ func compare(l, r any) int { n = len(r) } for i := 0; i < n; i++ { - if cmp := compare(l[i], r[i]); cmp != 0 { + if cmp := Compare(l[i], r[i]); cmp != 0 { return cmp } } @@ -52,11 +48,11 @@ func compare(l, r any) int { }, func(l, r map[string]any) any { lk, rk := funcKeys(l), funcKeys(r) - if cmp := compare(lk, rk); cmp != 0 { + if cmp := Compare(lk, rk); cmp != 0 { return cmp } for _, k := range lk.([]any) { - if cmp := compare(l[k.(string)], r[k.(string)]); cmp != 0 { + if cmp := Compare(l[k.(string)], r[k.(string)]); cmp != 0 { return cmp } } diff --git a/vendor/github.com/itchyny/gojq/compiler.go b/vendor/github.com/itchyny/gojq/compiler.go index de5f9a10..b42517f8 100644 --- a/vendor/github.com/itchyny/gojq/compiler.go +++ b/vendor/github.com/itchyny/gojq/compiler.go @@ -84,6 +84,13 @@ func Compile(q *Query, options ...CompilerOption) (*Code, error) { setscope := c.lazy(func() *code { return &code{op: opscope, v: [3]int{scope.id, scope.variablecnt, 0}} }) + for _, name := range c.variables { + if !newLexer(name).validVarName() { + return nil, &variableNameError{name} + } + c.appendCodeInfo(name) + c.append(&code{op: opstore, v: c.pushVariable(name)}) + } if c.moduleLoader != nil { if moduleLoader, ok := c.moduleLoader.(interface { LoadInitModules() ([]*Query, error) @@ -113,13 +120,6 @@ func Compile(q *Query, options ...CompilerOption) (*Code, error) { } func (c *compiler) compile(q *Query) error { - for _, name := range c.variables { - if !newLexer(name).validVarName() { - return &variableNameError{name} - } - c.appendCodeInfo(name) - c.append(&code{op: opstore, v: c.pushVariable(name)}) - } for _, i := range q.Imports { if err := c.compileImport(i); err != nil { return err @@ -398,6 +398,8 @@ func (c *compiler) compileQuery(e *Query) error { return c.compileTerm(e.Term) } switch e.Op { + case Operator(0): + return errors.New(`missing query (try ".")`) case OpPipe: if err := c.compileQuery(e.Left); err != nil { return err @@ -538,6 +540,7 @@ func (c *compiler) compileQueryUpdate(l, r *Query, op Operator) error { } func (c *compiler) compileBind(e *Term, b *Bind) error { + defer c.newScopeDepth()() c.append(&code{op: opdup}) c.append(&code{op: opexpbegin}) if err := c.compileTerm(e); err != nil { @@ -734,7 +737,7 @@ func (c *compiler) compileReduce(e *Reduce) error { } f() c.append(&code{op: opstore, v: v}) - if err := c.compileTerm(e.Term); err != nil { + if err := c.compileQuery(e.Query); err != nil { return err } if _, err := c.compilePattern(nil, e.Pattern); err != nil { @@ -765,7 +768,7 @@ func (c *compiler) compileForeach(e *Foreach) error { } f() c.append(&code{op: opstore, v: v}) - if err := c.compileTerm(e.Term); err != nil { + if err := c.compileQuery(e.Query); err != nil { return err } if _, err := c.compilePattern(nil, e.Pattern); err != nil { @@ -990,6 +993,22 @@ func (c *compiler) compileFunc(e *Func) error { true, -1, ) + case "debug": + setfork := c.lazy(func() *code { + return &code{op: opfork, v: len(c.codes)} + }) + if err := c.compileQuery(e.Args[0]); err != nil { + return err + } + if err := c.compileFunc(&Func{Name: "debug"}); err != nil { + if _, ok := err.(*funcNotFoundError); ok { + err = &funcNotFoundError{e} + } + return err + } + c.append(&code{op: opbacktrack}) + setfork() + return nil default: return c.compileCall(e.Name, e.Args) } @@ -1196,35 +1215,52 @@ func (c *compiler) funcModulemeta(v any, _ []any) any { if meta == nil { meta = make(map[string]any) } - deps := []any{} - for _, i := range q.Imports { + meta["defs"] = listModuleDefs(q) + meta["deps"] = listModuleDeps(q) + return meta +} + +func listModuleDefs(q *Query) []any { + type funcNameArity struct { + name string + arity int + } + var xs []*funcNameArity + for _, fd := range q.FuncDefs { + if fd.Name[0] != '_' { + xs = append(xs, &funcNameArity{fd.Name, len(fd.Args)}) + } + } + sort.Slice(xs, func(i, j int) bool { + return xs[i].name < xs[j].name || + xs[i].name == xs[j].name && xs[i].arity < xs[j].arity + }) + defs := make([]any, len(xs)) + for i, x := range xs { + defs[i] = x.name + "/" + strconv.Itoa(x.arity) + } + return defs +} + +func listModuleDeps(q *Query) []any { + deps := make([]any, len(q.Imports)) + for j, i := range q.Imports { v := i.Meta.ToValue() if v == nil { v = make(map[string]any) - } else { - for k := range v { - // dirty hack to remove the internal fields - if strings.HasPrefix(k, "$$") { - delete(v, k) - } - } } - if i.ImportPath == "" { - v["relpath"] = i.IncludePath - } else { - v["relpath"] = i.ImportPath - } - if err != nil { - return err + relpath := i.ImportPath + if relpath == "" { + relpath = i.IncludePath } + v["relpath"] = relpath if i.ImportAlias != "" { v["as"] = strings.TrimPrefix(i.ImportAlias, "$") } v["is_data"] = strings.HasPrefix(i.ImportAlias, "$") - deps = append(deps, v) + deps[j] = v } - meta["deps"] = deps - return meta + return deps } func (c *compiler) compileObject(e *Object) error { @@ -1311,10 +1347,8 @@ func (c *compiler) compileObjectKeyVal(v [2]int, kv *ObjectKeyVal) error { } if kv.Val != nil { c.append(&code{op: opload, v: v}) - for _, e := range kv.Val.Queries { - if err := c.compileQuery(e); err != nil { - return err - } + if err := c.compileQuery(kv.Val); err != nil { + return err } } return nil diff --git a/vendor/github.com/itchyny/gojq/debug.go b/vendor/github.com/itchyny/gojq/debug.go index ad3d7216..23698280 100644 --- a/vendor/github.com/itchyny/gojq/debug.go +++ b/vendor/github.com/itchyny/gojq/debug.go @@ -103,7 +103,7 @@ func (env *env) debugCodes() { s = "\t## " + name } } - fmt.Fprintf(debugOut, "\t%d\t%s%s%s\n", i, formatOp(c.op, false), debugOperand(c), s) + fmt.Fprintf(debugOut, "\t%d\t%-*s%s%s\n", i, 25, c.op, debugOperand(c), s) } fmt.Fprintln(debugOut, "\t"+strings.Repeat("-", 40)+"+") } @@ -114,7 +114,11 @@ func (env *env) debugState(pc int, backtrack bool) { } var sb strings.Builder c := env.codes[pc] - fmt.Fprintf(&sb, "\t%d\t%s%s\t|", pc, formatOp(c.op, backtrack), debugOperand(c)) + op := c.op.String() + if backtrack { + op += " " + } + fmt.Fprintf(&sb, "\t%d\t%-*s%s\t|", pc, 25, op, debugOperand(c)) var xs []int for i := env.stack.index; i >= 0; i = env.stack.data[i].next { xs = append(xs, i) @@ -149,13 +153,6 @@ func (env *env) debugState(pc int, backtrack bool) { fmt.Fprintln(debugOut, sb.String()) } -func formatOp(c opcode, backtrack bool) string { - if backtrack { - return c.String() + " " + strings.Repeat(" ", 13-len(c.String())) - } - return c.String() + strings.Repeat(" ", 25-len(c.String())) -} - func (env *env) debugForks(pc int, op string) { if !debug { return @@ -173,7 +170,7 @@ func (env *env) debugForks(pc int, op string) { sb.WriteByte('>') } } - fmt.Fprintf(debugOut, "\t-\t%s%s%d\t|\t%s\n", op, strings.Repeat(" ", 22), pc, sb.String()) + fmt.Fprintf(debugOut, "\t-\t%-*s%d\t|\t%s\n", 25, op, pc, sb.String()) } func debugOperand(c *code) string { diff --git a/vendor/github.com/itchyny/gojq/error.go b/vendor/github.com/itchyny/gojq/error.go index 16865877..18b06b1c 100644 --- a/vendor/github.com/itchyny/gojq/error.go +++ b/vendor/github.com/itchyny/gojq/error.go @@ -85,7 +85,7 @@ func (err *expectedStartEndError) Error() string { type lengthMismatchError struct{} -func (err *lengthMismatchError) Error() string { +func (*lengthMismatchError) Error() string { return "length mismatch" } @@ -163,7 +163,6 @@ func (err *func2WrapError) Error() string { type exitCodeError struct { value any code int - halt bool } func (err *exitCodeError) Error() string { @@ -173,10 +172,6 @@ func (err *exitCodeError) Error() string { return "error: " + jsonMarshal(err.value) } -func (err *exitCodeError) IsEmptyError() bool { - return err.value == nil -} - func (err *exitCodeError) Value() any { return err.value } @@ -185,8 +180,28 @@ func (err *exitCodeError) ExitCode() int { return err.code } -func (err *exitCodeError) IsHaltError() bool { - return err.halt +// HaltError is an error emitted by halt and halt_error functions. +// It implements [ValueError], and if the value is nil, discard the error +// and stop the iteration. Consider a query like "1, halt, 2"; +// the first value is 1, and the second value is a HaltError with nil value. +// You might think the iterator should not emit an error this case, but it +// should so that we can recognize the halt error to stop the outer loop +// of iterating input values; echo 1 2 3 | gojq "., halt". +type HaltError exitCodeError + +func (err *HaltError) Error() string { + return "halt " + (*exitCodeError)(err).Error() +} + +// Value returns the value of the error. This implements [ValueError], +// but halt error is not catchable by try-catch. +func (err *HaltError) Value() any { + return (*exitCodeError)(err).Value() +} + +// ExitCode returns the exit code of the error. +func (err *HaltError) ExitCode() int { + return (*exitCodeError)(err).ExitCode() } type flattenDepthError struct { @@ -207,7 +222,7 @@ func (err *joinTypeError) Error() string { type timeArrayError struct{} -func (err *timeArrayError) Error() string { +func (*timeArrayError) Error() string { return "expected an array of 8 numbers" } @@ -264,7 +279,7 @@ func (err *formatRowError) Error() string { type tooManyVariableValuesError struct{} -func (err *tooManyVariableValuesError) Error() string { +func (*tooManyVariableValuesError) Error() string { return "too many variable values provided" } @@ -301,7 +316,7 @@ func (err *breakError) Error() string { return "label not defined: " + err.n } -func (err *breakError) ExitCode() int { +func (*breakError) ExitCode() int { return 3 } diff --git a/vendor/github.com/itchyny/gojq/execute.go b/vendor/github.com/itchyny/gojq/execute.go index dcf9d984..344d8a3c 100644 --- a/vendor/github.com/itchyny/gojq/execute.go +++ b/vendor/github.com/itchyny/gojq/execute.go @@ -66,7 +66,9 @@ loop: err = &objectKeyNotStringError{k} break loop } - m[s] = v + if _, ok := m[s]; !ok { + m[s] = v + } } env.push(m) case opappend: @@ -86,23 +88,15 @@ loop: if err == nil { break loop } - switch er := err.(type) { + switch e := err.(type) { case *tryEndError: - err = er.err + err = e.err break loop - case *breakError: + case *breakError, *HaltError: break loop case ValueError: - if er, ok := er.(*exitCodeError); ok && er.halt { - break loop - } - if v := er.Value(); v != nil { - env.pop() - env.push(v) - } else { - err = nil - break loop - } + env.pop() + env.push(e.Value()) default: env.pop() env.push(err.Error()) @@ -191,9 +185,7 @@ loop: } w := v[0].(func(any, []any) any)(x, args) if e, ok := w.(error); ok { - if er, ok := e.(*exitCodeError); !ok || er.value != nil || er.halt { - err = e - } + err = e break loop } env.push(w) diff --git a/vendor/github.com/itchyny/gojq/func.go b/vendor/github.com/itchyny/gojq/func.go index 6e8d1500..e06b4ff7 100644 --- a/vendor/github.com/itchyny/gojq/func.go +++ b/vendor/github.com/itchyny/gojq/func.go @@ -15,6 +15,7 @@ import ( "strconv" "strings" "time" + "unicode" "unicode/utf8" "github.com/itchyny/timefmt-go" @@ -50,6 +51,8 @@ func init() { "builtins": argFunc0(nil), "input": argFunc0(nil), "modulemeta": argFunc0(nil), + "debug": argFunc1(nil), + "abs": argFunc0(funcAbs), "length": argFunc0(funcLength), "utf8bytelength": argFunc0(funcUtf8ByteLength), "keys": argFunc0(funcKeys), @@ -69,6 +72,9 @@ func init() { "endswith": argFunc1(funcEndsWith), "ltrimstr": argFunc1(funcLtrimstr), "rtrimstr": argFunc1(funcRtrimstr), + "ltrim": argFunc0(funcLtrim), + "rtrim": argFunc0(funcRtrim), + "trim": argFunc0(funcTrim), "explode": argFunc0(funcExplode), "implode": argFunc0(funcImplode), "split": {argcount1 | argcount2, false, funcSplit}, @@ -127,8 +133,8 @@ func init() { "atanh": mathFunc("atanh", math.Atanh), "floor": mathFunc("floor", math.Floor), "round": mathFunc("round", math.Round), - "nearbyint": mathFunc("nearbyint", math.Round), - "rint": mathFunc("rint", math.Round), + "nearbyint": mathFunc("nearbyint", math.RoundToEven), + "rint": mathFunc("rint", math.RoundToEven), "ceil": mathFunc("ceil", math.Ceil), "trunc": mathFunc("trunc", math.Trunc), "significand": mathFunc("significand", funcSignificand), @@ -164,15 +170,14 @@ func init() { "fmod": mathFunc2("fmod", math.Mod), "hypot": mathFunc2("hypot", math.Hypot), "jn": mathFunc2("jn", funcJn), - "ldexp": mathFunc2("ldexp", funcLdexp), "nextafter": mathFunc2("nextafter", math.Nextafter), "nexttoward": mathFunc2("nexttoward", math.Nextafter), "remainder": mathFunc2("remainder", math.Remainder), - "scalb": mathFunc2("scalb", funcScalb), - "scalbln": mathFunc2("scalbln", funcScalbln), + "ldexp": mathFunc2("ldexp", funcLdexp), + "scalb": mathFunc2("scalb", funcLdexp), + "scalbln": mathFunc2("scalbln", funcLdexp), "yn": mathFunc2("yn", funcYn), "pow": mathFunc2("pow", math.Pow), - "pow10": mathFunc("pow10", funcExp10), "fma": mathFunc3("fma", math.FMA), "infinite": argFunc0(funcInfinite), "isfinite": argFunc0(funcIsfinite), @@ -274,6 +279,25 @@ func mathFunc3(name string, f func(_, _, _ float64) float64) function { }) } +func funcAbs(v any) any { + switch v := v.(type) { + case int: + if v >= 0 { + return v + } + return -v + case float64: + return math.Abs(v) + case *big.Int: + if v.Sign() >= 0 { + return v + } + return new(big.Int).Abs(v) + default: + return &func0TypeError{"abs", v} + } +} + func funcLength(v any) any { switch v := v.(type) { case nil: @@ -579,7 +603,7 @@ func indices(vs, xs []any) any { return rs } for i := 0; i <= len(vs)-len(xs); i++ { - if compare(vs[i:i+len(xs)], xs) == 0 { + if Compare(vs[i:i+len(xs)], xs) == 0 { rs = append(rs, i) } } @@ -592,7 +616,7 @@ func funcIndex(v, x any) any { return nil } for i := 0; i <= len(vs)-len(xs); i++ { - if compare(vs[i:i+len(xs)], xs) == 0 { + if Compare(vs[i:i+len(xs)], xs) == 0 { return i } } @@ -606,7 +630,7 @@ func funcRindex(v, x any) any { return nil } for i := len(vs) - len(xs); i >= 0; i-- { - if compare(vs[i:i+len(xs)], xs) == 0 { + if Compare(vs[i:i+len(xs)], xs) == 0 { return i } } @@ -662,11 +686,11 @@ func funcEndsWith(v, x any) any { func funcLtrimstr(v, x any) any { s, ok := v.(string) if !ok { - return v + return &func1TypeError{"ltrimstr", v, x} } t, ok := x.(string) if !ok { - return v + return &func1TypeError{"ltrimstr", v, x} } return strings.TrimPrefix(s, t) } @@ -674,15 +698,39 @@ func funcLtrimstr(v, x any) any { func funcRtrimstr(v, x any) any { s, ok := v.(string) if !ok { - return v + return &func1TypeError{"rtrimstr", v, x} } t, ok := x.(string) if !ok { - return v + return &func1TypeError{"rtrimstr", v, x} } return strings.TrimSuffix(s, t) } +func funcLtrim(v any) any { + s, ok := v.(string) + if !ok { + return &func0TypeError{"ltrim", v} + } + return strings.TrimLeftFunc(s, unicode.IsSpace) +} + +func funcRtrim(v any) any { + s, ok := v.(string) + if !ok { + return &func0TypeError{"rtrim", v} + } + return strings.TrimRightFunc(s, unicode.IsSpace) +} + +func funcTrim(v any) any { + s, ok := v.(string) + if !ok { + return &func0TypeError{"trim", v} + } + return strings.TrimSpace(s) +} + func funcExplode(v any) any { s, ok := v.(string) if !ok { @@ -709,8 +757,12 @@ func funcImplode(v any) any { var sb strings.Builder sb.Grow(len(vs)) for _, v := range vs { - if r, ok := toInt(v); ok && 0 <= r && r <= utf8.MaxRune { - sb.WriteRune(rune(r)) + if r, ok := toInt(v); ok { + if 0 <= r && r <= utf8.MaxRune { + sb.WriteRune(rune(r)) + } else { + sb.WriteRune(utf8.RuneError) + } } else { return &func0TypeError{"implode", vs} } @@ -1137,7 +1189,7 @@ type rangeIter struct { } func (iter *rangeIter) Next() (any, bool) { - if compare(iter.step, 0)*compare(iter.value, iter.end) >= 0 { + if Compare(iter.step, 0)*Compare(iter.value, iter.end) >= 0 { return nil, false } v := iter.value @@ -1208,7 +1260,7 @@ func minMaxBy(vs, xs []any, isMin bool) any { } i, j, x := 0, 0, xs[0] for i++; i < len(xs); i++ { - if compare(x, xs[i]) > 0 == isMin { + if Compare(x, xs[i]) > 0 == isMin { j, x = i, xs[i] } } @@ -1239,7 +1291,7 @@ func sortItems(name string, v, x any) ([]*sortItem, error) { items[i] = &sortItem{v, xs[i]} } sort.SliceStable(items, func(i, j int) bool { - return compare(items[i].key, items[j].key) < 0 + return Compare(items[i].key, items[j].key) < 0 }) return items, nil } @@ -1272,7 +1324,7 @@ func funcGroupBy(v, x any) any { rs := []any{} var last any for i, r := range items { - if i == 0 || compare(last, r.key) != 0 { + if i == 0 || Compare(last, r.key) != 0 { rs, last = append(rs, []any{r.value}), r.key } else { rs[len(rs)-1] = append(rs[len(rs)-1].([]any), r.value) @@ -1297,7 +1349,7 @@ func uniqueBy(name string, v, x any) any { rs := []any{} var last any for i, r := range items { - if i == 0 || compare(last, r.key) != 0 { + if i == 0 || Compare(last, r.key) != 0 { rs, last = append(rs, r.value), r.key } } @@ -1387,14 +1439,6 @@ func funcLdexp(l, r float64) float64 { return math.Ldexp(l, int(r)) } -func funcScalb(l, r float64) float64 { - return l * math.Pow(2, r) -} - -func funcScalbln(l, r float64) float64 { - return l * math.Pow(2, r) -} - func funcYn(l, r float64) float64 { return math.Yn(int(l), r) } @@ -1783,9 +1827,9 @@ func funcBsearch(v, t any) any { return &func1TypeError{"bsearch", v, t} } i := sort.Search(len(vs), func(i int) bool { - return compare(vs[i], t) >= 0 + return Compare(vs[i], t) >= 0 }) - if i < len(vs) && compare(vs[i], t) == 0 { + if i < len(vs) && Compare(vs[i], t) == 0 { return i } return -i - 1 @@ -2052,15 +2096,11 @@ func funcError(v any, args []any) any { if len(args) > 0 { v = args[0] } - code := 5 - if v == nil { - code = 0 - } - return &exitCodeError{v, code, false} + return &exitCodeError{v, 5} } func funcHalt(any) any { - return &exitCodeError{nil, 0, true} + return &HaltError{nil, 0} } func funcHaltError(v any, args []any) any { @@ -2071,7 +2111,7 @@ func funcHaltError(v any, args []any) any { return &func0TypeError{"halt_error", args[0]} } } - return &exitCodeError{v, code, true} + return &HaltError{v, code} } func toInt(x any) (int, bool) { diff --git a/vendor/github.com/itchyny/gojq/go.dev.mod b/vendor/github.com/itchyny/gojq/go.dev.mod index 9a0579ca..1e831626 100644 --- a/vendor/github.com/itchyny/gojq/go.dev.mod +++ b/vendor/github.com/itchyny/gojq/go.dev.mod @@ -1,8 +1,8 @@ module github.com/itchyny/gojq -go 1.18 +go 1.20 require ( - github.com/itchyny/astgen-go v0.0.0-20210914105503-cc8fccf6f972 // indirect - github.com/itchyny/timefmt-go v0.1.5 // indirect + github.com/itchyny/astgen-go v0.0.0-20231113225122-e1c22b9aaf7b // indirect + github.com/itchyny/timefmt-go v0.1.6 // indirect ) diff --git a/vendor/github.com/itchyny/gojq/go.dev.sum b/vendor/github.com/itchyny/gojq/go.dev.sum index 66aee6c5..e8691b82 100644 --- a/vendor/github.com/itchyny/gojq/go.dev.sum +++ b/vendor/github.com/itchyny/gojq/go.dev.sum @@ -1,4 +1,4 @@ -github.com/itchyny/astgen-go v0.0.0-20210914105503-cc8fccf6f972 h1:XYWolmPDLTY9B1O5o/Ad811/mtVkaHWMiZdbPLm/nDA= -github.com/itchyny/astgen-go v0.0.0-20210914105503-cc8fccf6f972/go.mod h1:jTXcxGeQMJfFN3wWjtzb4aAaWDDN+QbezE0HjH1XfNk= -github.com/itchyny/timefmt-go v0.1.5 h1:G0INE2la8S6ru/ZI5JecgyzbbJNs5lG1RcBqa7Jm6GE= -github.com/itchyny/timefmt-go v0.1.5/go.mod h1:nEP7L+2YmAbT2kZ2HfSs1d8Xtw9LY8D2stDBckWakZ8= +github.com/itchyny/astgen-go v0.0.0-20231113225122-e1c22b9aaf7b h1:72fDU7wad+r3iQObaxhlXVIpAIMRUIUMrNa3go1vb8s= +github.com/itchyny/astgen-go v0.0.0-20231113225122-e1c22b9aaf7b/go.mod h1:Zp6xzEWVc2pQ/ObfLD6t/M6gDegsJWKdGKJSiT7qlu0= +github.com/itchyny/timefmt-go v0.1.6 h1:ia3s54iciXDdzWzwaVKXZPbiXzxxnv1SPGFfM/myJ5Q= +github.com/itchyny/timefmt-go v0.1.6/go.mod h1:RRDZYC5s9ErkjQvTvvU7keJjxUYzIISJGxm9/mAERQg= diff --git a/vendor/github.com/itchyny/gojq/lexer.go b/vendor/github.com/itchyny/gojq/lexer.go index 82bb2b6b..0c2efd12 100644 --- a/vendor/github.com/itchyny/gojq/lexer.go +++ b/vendor/github.com/itchyny/gojq/lexer.go @@ -235,7 +235,8 @@ func (l *lexer) Lex(lval *yySymType) (tokenType int) { default: if ch >= utf8.RuneSelf { r, size := utf8.DecodeRuneInString(l.source[l.offset-1:]) - l.offset += size + // -1 to adjust for first byte consumed by next() + l.offset += size - 1 l.token = string(r) } } @@ -247,15 +248,9 @@ func (l *lexer) next() (byte, bool) { ch := l.source[l.offset] l.offset++ if ch == '#' { - if len(l.source) == l.offset { + if l.skipComment() { return 0, true } - for !isNewLine(l.source[l.offset]) { - l.offset++ - if len(l.source) == l.offset { - return 0, true - } - } } else if !isWhite(ch) { return ch, false } else if len(l.source) == l.offset { @@ -264,6 +259,28 @@ func (l *lexer) next() (byte, bool) { } } +func (l *lexer) skipComment() bool { + for { + switch l.peek() { + case 0: + return true + case '\\': + switch l.offset++; l.peek() { + case '\\', '\n': + l.offset++ + case '\r': + if l.offset++; l.peek() == '\n' { + l.offset++ + } + } + case '\n', '\r': + return false + default: + l.offset++ + } + } +} + func (l *lexer) peek() byte { if len(l.source) == l.offset { return 0 @@ -505,37 +522,34 @@ func quoteAndEscape(src string, quote bool, controls int) []byte { return buf } -type parseError struct { - offset int - token string +// ParseError represents a description of a query parsing error. +type ParseError struct { + Offset int // the error occurred after reading Offset bytes + Token string // the Token that caused the error (may be empty) tokenType int } -func (err *parseError) Error() string { +func (err *ParseError) Error() string { switch err.tokenType { case eof: return "unexpected EOF" case tokInvalid: - return "invalid token " + jsonMarshal(err.token) + return "invalid token " + jsonMarshal(err.Token) case tokInvalidEscapeSequence: - return `invalid escape sequence "` + err.token + `" in string literal` + return `invalid escape sequence "` + err.Token + `" in string literal` case tokUnterminatedString: return "unterminated string literal" default: - return "unexpected token " + jsonMarshal(err.token) + return "unexpected token " + jsonMarshal(err.Token) } } -func (err *parseError) Token() (string, int) { - return err.token, err.offset -} - func (l *lexer) Error(string) { offset, token := l.offset, l.token if l.tokenType != eof && l.tokenType < utf8.RuneSelf { token = string(rune(l.tokenType)) } - l.err = &parseError{offset, token, l.tokenType} + l.err = &ParseError{offset, token, l.tokenType} } func isWhite(ch byte) bool { @@ -562,12 +576,3 @@ func isHex(ch byte) bool { func isNumber(ch byte) bool { return '0' <= ch && ch <= '9' } - -func isNewLine(ch byte) bool { - switch ch { - case '\n', '\r': - return true - default: - return false - } -} diff --git a/vendor/github.com/itchyny/gojq/module_loader.go b/vendor/github.com/itchyny/gojq/module_loader.go index 599e37bf..0a73ba05 100644 --- a/vendor/github.com/itchyny/gojq/module_loader.go +++ b/vendor/github.com/itchyny/gojq/module_loader.go @@ -13,16 +13,24 @@ import ( // // Implement following optional methods. Use [NewModuleLoader] to load local modules. // +// LoadInitModules() ([]*Query, error) // LoadModule(string) (*Query, error) // LoadModuleWithMeta(string, map[string]any) (*Query, error) -// LoadInitModules() ([]*Query, error) // LoadJSON(string) (any, error) // LoadJSONWithMeta(string, map[string]any) (any, error) type ModuleLoader any -// NewModuleLoader creates a new [ModuleLoader] reading local modules in the paths. +// NewModuleLoader creates a new [ModuleLoader] loading local modules in the paths. +// Note that user can load modules outside the paths using "search" path of metadata. +// Empty paths are ignored, so specify "." for the current working directory. func NewModuleLoader(paths []string) ModuleLoader { - return &moduleLoader{expandHomeDir(paths)} + ps := make([]string, 0, len(paths)) + for _, path := range paths { + if path = resolvePath(path, ""); path != "" { + ps = append(ps, path) + } + } + return &moduleLoader{ps} } type moduleLoader struct { @@ -49,7 +57,7 @@ func (l *moduleLoader) LoadInitModules() ([]*Query, error) { if err != nil { return nil, err } - q, err := parseModule(path, string(cnt)) + q, err := parseModule(string(cnt), filepath.Dir(path)) if err != nil { return nil, &queryParseError{path, string(cnt), err} } @@ -67,7 +75,7 @@ func (l *moduleLoader) LoadModuleWithMeta(name string, meta map[string]any) (*Qu if err != nil { return nil, err } - q, err := parseModule(path, string(cnt)) + q, err := parseModule(string(cnt), filepath.Dir(path)) if err != nil { return nil, &queryParseError{path, string(cnt), err} } @@ -109,15 +117,15 @@ func (l *moduleLoader) LoadJSONWithMeta(name string, meta map[string]any) (any, func (l *moduleLoader) lookupModule(name, extension string, meta map[string]any) (string, error) { paths := l.paths - if path := searchPath(meta); path != "" { + if path, ok := meta["search"].(string); ok { paths = append([]string{path}, paths...) } for _, base := range paths { - path := filepath.Clean(filepath.Join(base, name+extension)) + path := filepath.Join(base, name+extension) if _, err := os.Stat(path); err == nil { return path, err } - path = filepath.Clean(filepath.Join(base, name, filepath.Base(name)+extension)) + path = filepath.Join(base, name, filepath.Base(name)+extension) if _, err := os.Stat(path); err == nil { return path, err } @@ -125,66 +133,50 @@ func (l *moduleLoader) lookupModule(name, extension string, meta map[string]any) return "", fmt.Errorf("module not found: %q", name) } -// This is a dirty hack to implement the "search" field. -func parseModule(path, cnt string) (*Query, error) { +func parseModule(cnt, dir string) (*Query, error) { q, err := Parse(cnt) if err != nil { return nil, err } for _, i := range q.Imports { - if i.Meta == nil { - continue + if i.Meta != nil { + for _, e := range i.Meta.KeyVals { + if e.Key == "search" || e.KeyString == "search" { + if path, ok := e.Val.toString(); ok { + if path = resolvePath(path, dir); path != "" { + e.Val.Str = path + } else { + e.Val.Null = true + } + } + } + } } - i.Meta.KeyVals = append( - i.Meta.KeyVals, - &ConstObjectKeyVal{ - Key: "$$path", - Val: &ConstTerm{Str: path}, - }, - ) } return q, nil } -func searchPath(meta map[string]any) string { - x, ok := meta["search"] - if !ok { - return "" - } - s, ok := x.(string) - if !ok { - return "" - } - if filepath.IsAbs(s) { - return s - } - if strings.HasPrefix(s, "~") { - if homeDir, err := os.UserHomeDir(); err == nil { - return filepath.Join(homeDir, s[1:]) +func resolvePath(path, dir string) string { + switch { + case filepath.IsAbs(path): + return path + case strings.HasPrefix(path, "~/"): + dir, err := os.UserHomeDir() + if err != nil { + return "" } - } - var path string - if x, ok := meta["$$path"]; ok { - path, _ = x.(string) - } - if path == "" { - return s - } - return filepath.Join(filepath.Dir(path), s) -} - -func expandHomeDir(paths []string) []string { - var homeDir string - var err error - for i, path := range paths { - if strings.HasPrefix(path, "~") { - if homeDir == "" && err == nil { - homeDir, err = os.UserHomeDir() - } - if homeDir != "" { - paths[i] = filepath.Join(homeDir, path[1:]) - } + return filepath.Join(dir, path[2:]) + case strings.HasPrefix(path, "$ORIGIN/"): + exe, err := os.Executable() + if err != nil { + return "" + } + exe, err = filepath.EvalSymlinks(exe) + if err != nil { + return "" } + return filepath.Join(filepath.Dir(exe), path[8:]) + default: + return filepath.Join(dir, path) } - return paths } diff --git a/vendor/github.com/itchyny/gojq/operator.go b/vendor/github.com/itchyny/gojq/operator.go index 73a548e0..64b74b78 100644 --- a/vendor/github.com/itchyny/gojq/operator.go +++ b/vendor/github.com/itchyny/gojq/operator.go @@ -371,7 +371,7 @@ func funcOpSub(_, l, r any) any { L: for _, l := range l { for _, r := range r { - if compare(l, r) == 0 { + if Compare(l, r) == 0 { continue L } } @@ -433,11 +433,11 @@ func deepMergeObjects(l, r map[string]any) any { } func repeatString(s string, n float64) any { - if n <= 0.0 || len(s) > 0 && n > float64(0x10000000/len(s)) || math.IsNaN(n) { + if n < 0.0 || len(s) > 0 && n > float64(0x10000000/len(s)) || math.IsNaN(n) { return nil } - if int(n) < 1 { - return s + if s == "" { + return "" } return strings.Repeat(s, int(n)) } @@ -446,9 +446,6 @@ func funcOpDiv(_, l, r any) any { return binopTypeSwitch(l, r, func(l, r int) any { if r == 0 { - if l == 0 { - return math.NaN() - } return &zeroDivisionError{l, r} } if l%r == 0 { @@ -458,18 +455,12 @@ func funcOpDiv(_, l, r any) any { }, func(l, r float64) any { if r == 0.0 { - if l == 0.0 { - return math.NaN() - } return &zeroDivisionError{l, r} } return l / r }, func(l, r *big.Int) any { if r.Sign() == 0 { - if l.Sign() == 0 { - return math.NaN() - } return &zeroDivisionError{l, r} } d, m := new(big.Int).DivMod(l, r, new(big.Int)) @@ -508,6 +499,9 @@ func funcOpMod(_, l, r any) any { if ri == 0 { return &zeroModuloError{l, r} } + if math.IsNaN(l) || math.IsNaN(r) { + return math.NaN() + } return floatToInt(l) % ri }, func(l, r *big.Int) any { @@ -531,25 +525,25 @@ func funcOpAlt(_, l, r any) any { } func funcOpEq(_, l, r any) any { - return compare(l, r) == 0 + return Compare(l, r) == 0 } func funcOpNe(_, l, r any) any { - return compare(l, r) != 0 + return Compare(l, r) != 0 } func funcOpGt(_, l, r any) any { - return compare(l, r) > 0 + return Compare(l, r) > 0 } func funcOpLt(_, l, r any) any { - return compare(l, r) < 0 + return Compare(l, r) < 0 } func funcOpGe(_, l, r any) any { - return compare(l, r) >= 0 + return Compare(l, r) >= 0 } func funcOpLe(_, l, r any) any { - return compare(l, r) <= 0 + return Compare(l, r) <= 0 } diff --git a/vendor/github.com/itchyny/gojq/parser.go b/vendor/github.com/itchyny/gojq/parser.go index 1e5e50af..5a0dfdc7 100644 --- a/vendor/github.com/itchyny/gojq/parser.go +++ b/vendor/github.com/itchyny/gojq/parser.go @@ -5,21 +5,7 @@ package gojq import __yyfmt__ "fmt" -// Parse a query string, and returns the query struct. -// -// If parsing failed, the returned error has the method Token() (string, int), -// which reports the invalid token and the byte offset in the query string. The -// token is empty if the error occurred after scanning the entire query string. -// The byte offset is the scanned bytes when the error occurred. -// //line parser.go.y:2 -func Parse(src string) (*Query, error) { - l := newLexer(src) - if yyParse(l) > 0 { - return nil, l.err - } - return l.result, nil -} func reverseFuncDef(xs []*FuncDef) []*FuncDef { for i, j := 0, len(xs)-1; i < j; i, j = i+1, j-1 { @@ -35,7 +21,7 @@ func prependFuncDef(xs []*FuncDef, x *FuncDef) []*FuncDef { return xs } -//line parser.go.y:33 +//line parser.go.y:19 type yySymType struct { yys int value any @@ -46,9 +32,9 @@ type yySymType struct { const tokAltOp = 57346 const tokUpdateOp = 57347 const tokDestAltOp = 57348 -const tokOrOp = 57349 -const tokAndOp = 57350 -const tokCompareOp = 57351 +const tokCompareOp = 57349 +const tokOrOp = 57350 +const tokAndOp = 57351 const tokModule = 57352 const tokImport = 57353 const tokInclude = 57354 @@ -59,33 +45,34 @@ const tokBreak = 57358 const tokNull = 57359 const tokTrue = 57360 const tokFalse = 57361 -const tokIdent = 57362 -const tokVariable = 57363 -const tokModuleIdent = 57364 -const tokModuleVariable = 57365 -const tokIndex = 57366 -const tokNumber = 57367 -const tokFormat = 57368 -const tokString = 57369 -const tokStringStart = 57370 -const tokStringQuery = 57371 -const tokStringEnd = 57372 -const tokIf = 57373 -const tokThen = 57374 -const tokElif = 57375 -const tokElse = 57376 -const tokEnd = 57377 -const tokTry = 57378 -const tokCatch = 57379 -const tokReduce = 57380 -const tokForeach = 57381 -const tokRecurse = 57382 -const tokFuncDefPost = 57383 -const tokTermPost = 57384 -const tokEmptyCatch = 57385 -const tokInvalid = 57386 -const tokInvalidEscapeSequence = 57387 -const tokUnterminatedString = 57388 +const tokIf = 57362 +const tokThen = 57363 +const tokElif = 57364 +const tokElse = 57365 +const tokEnd = 57366 +const tokTry = 57367 +const tokCatch = 57368 +const tokReduce = 57369 +const tokForeach = 57370 +const tokIdent = 57371 +const tokVariable = 57372 +const tokModuleIdent = 57373 +const tokModuleVariable = 57374 +const tokRecurse = 57375 +const tokIndex = 57376 +const tokNumber = 57377 +const tokFormat = 57378 +const tokString = 57379 +const tokStringStart = 57380 +const tokStringQuery = 57381 +const tokStringEnd = 57382 +const tokInvalid = 57383 +const tokInvalidEscapeSequence = 57384 +const tokUnterminatedString = 57385 +const tokFuncDefQuery = 57386 +const tokExpr = 57387 +const tokTerm = 57388 +const tokEmptyCatch = 57389 var yyToknames = [...]string{ "$end", @@ -94,9 +81,9 @@ var yyToknames = [...]string{ "tokAltOp", "tokUpdateOp", "tokDestAltOp", + "tokCompareOp", "tokOrOp", "tokAndOp", - "tokCompareOp", "tokModule", "tokImport", "tokInclude", @@ -107,10 +94,20 @@ var yyToknames = [...]string{ "tokNull", "tokTrue", "tokFalse", + "tokIf", + "tokThen", + "tokElif", + "tokElse", + "tokEnd", + "tokTry", + "tokCatch", + "tokReduce", + "tokForeach", "tokIdent", "tokVariable", "tokModuleIdent", "tokModuleVariable", + "tokRecurse", "tokIndex", "tokNumber", "tokFormat", @@ -118,22 +115,12 @@ var yyToknames = [...]string{ "tokStringStart", "tokStringQuery", "tokStringEnd", - "tokIf", - "tokThen", - "tokElif", - "tokElse", - "tokEnd", - "tokTry", - "tokCatch", - "tokReduce", - "tokForeach", - "tokRecurse", - "tokFuncDefPost", - "tokTermPost", - "tokEmptyCatch", "tokInvalid", "tokInvalidEscapeSequence", "tokUnterminatedString", + "tokFuncDefQuery", + "tokExpr", + "tokTerm", "'|'", "','", "'+'", @@ -143,6 +130,7 @@ var yyToknames = [...]string{ "'%'", "'.'", "'?'", + "tokEmptyCatch", "'['", "';'", "':'", @@ -159,218 +147,181 @@ const yyEofCode = 1 const yyErrCode = 2 const yyInitialStackSize = 16 -//line parser.go.y:693 +//line parser.go.y:671 //line yacctab:1 var yyExca = [...]int16{ -1, 1, 1, -1, -2, 0, - -1, 97, - 55, 0, - -2, 104, - -1, 130, + -1, 145, 5, 0, - -2, 32, - -1, 133, - 9, 0, - -2, 35, - -1, 194, - 58, 114, - -2, 54, + -2, 27, + -1, 148, + 7, 0, + -2, 30, + -1, 199, + 59, 114, + -2, 49, } const yyPrivate = 57344 -const yyLast = 1127 +const yyLast = 782 var yyAct = [...]int16{ - 86, 214, 174, 112, 12, 203, 9, 175, 111, 31, - 190, 6, 156, 140, 117, 47, 95, 97, 93, 94, - 89, 141, 49, 7, 179, 180, 181, 240, 246, 264, - 239, 103, 177, 106, 178, 227, 164, 119, 107, 108, - 105, 245, 102, 75, 76, 113, 77, 78, 79, 123, - 226, 163, 211, 225, 259, 210, 142, 179, 180, 181, - 158, 159, 143, 182, 122, 177, 224, 178, 219, 7, - 235, 234, 104, 127, 243, 128, 129, 130, 131, 132, - 133, 134, 135, 136, 137, 138, 72, 74, 80, 81, - 82, 83, 84, 147, 73, 88, 182, 196, 73, 229, - 195, 145, 7, 150, 228, 161, 166, 165, 157, 126, - 125, 124, 144, 88, 258, 167, 80, 81, 82, 83, - 84, 206, 73, 44, 242, 91, 90, 92, 183, 184, - 82, 83, 84, 154, 73, 153, 267, 186, 49, 173, - 42, 43, 100, 91, 90, 92, 99, 191, 120, 197, - 256, 257, 200, 192, 201, 202, 188, 75, 76, 207, - 77, 78, 79, 198, 199, 209, 42, 43, 216, 92, - 215, 215, 218, 213, 113, 98, 75, 76, 185, 77, - 78, 79, 204, 205, 101, 221, 222, 170, 155, 171, - 169, 3, 28, 27, 230, 96, 220, 232, 176, 46, - 223, 11, 80, 81, 82, 83, 84, 11, 73, 78, - 79, 157, 241, 110, 8, 152, 237, 255, 236, 72, - 74, 80, 81, 82, 83, 84, 85, 73, 79, 278, - 160, 191, 277, 121, 189, 253, 254, 192, 248, 247, - 187, 139, 249, 250, 208, 262, 260, 261, 215, 263, - 80, 81, 82, 83, 84, 149, 73, 268, 269, 10, - 270, 5, 4, 2, 1, 88, 272, 273, 80, 81, - 82, 83, 84, 0, 73, 279, 0, 0, 271, 280, - 51, 52, 0, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 115, 116, 91, 90, 92, 0, 0, - 42, 43, 0, 87, 63, 64, 65, 66, 67, 68, - 69, 70, 71, 88, 0, 20, 0, 17, 37, 24, - 25, 26, 38, 40, 39, 41, 23, 29, 30, 42, - 43, 0, 114, 15, 0, 0, 212, 0, 16, 0, - 13, 14, 22, 91, 90, 92, 0, 0, 0, 0, - 0, 33, 34, 0, 0, 0, 21, 0, 36, 0, - 148, 32, 0, 146, 35, 51, 52, 0, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 115, 116, - 0, 0, 0, 0, 0, 42, 43, 0, 0, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 18, 19, - 20, 0, 17, 37, 24, 25, 26, 38, 40, 39, - 41, 23, 29, 30, 42, 43, 0, 114, 15, 0, - 0, 109, 0, 16, 0, 13, 14, 22, 0, 0, - 0, 0, 0, 0, 0, 0, 33, 34, 0, 0, - 0, 21, 0, 36, 0, 0, 32, 0, 20, 35, - 17, 37, 24, 25, 26, 38, 40, 39, 41, 23, - 29, 30, 42, 43, 0, 0, 15, 0, 0, 0, - 0, 16, 0, 13, 14, 22, 0, 0, 0, 0, - 0, 0, 0, 0, 33, 34, 0, 0, 0, 21, - 0, 36, 0, 0, 32, 0, 231, 35, 20, 0, - 17, 37, 24, 25, 26, 38, 40, 39, 41, 23, - 29, 30, 42, 43, 0, 0, 15, 0, 0, 0, - 0, 16, 0, 13, 14, 22, 0, 0, 0, 0, - 0, 0, 0, 0, 33, 34, 0, 0, 0, 21, - 0, 36, 0, 0, 32, 0, 118, 35, 20, 0, - 17, 37, 24, 25, 26, 38, 40, 39, 41, 23, - 29, 30, 42, 43, 0, 0, 15, 0, 77, 78, - 79, 16, 0, 13, 14, 22, 0, 0, 0, 0, - 0, 0, 0, 0, 33, 34, 0, 0, 0, 21, - 0, 36, 0, 0, 32, 51, 52, 35, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 48, 0, - 80, 81, 82, 83, 84, 50, 73, 0, 0, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 51, 52, - 0, 53, 54, 55, 56, 57, 58, 59, 60, 61, - 62, 48, 0, 0, 0, 0, 0, 0, 50, 0, - 0, 172, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 51, 52, 0, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 115, 194, 0, 0, 0, 0, - 0, 42, 43, 0, 45, 63, 64, 65, 66, 67, - 68, 69, 70, 71, 37, 24, 25, 26, 38, 40, - 39, 41, 23, 29, 30, 42, 43, 75, 76, 0, - 77, 78, 79, 193, 0, 0, 0, 0, 22, 0, - 0, 0, 0, 0, 0, 0, 0, 33, 34, 0, - 0, 0, 21, 0, 36, 0, 0, 32, 75, 76, - 35, 77, 78, 79, 0, 0, 0, 0, 0, 0, - 72, 74, 80, 81, 82, 83, 84, 0, 73, 0, - 0, 0, 75, 76, 252, 77, 78, 79, 0, 0, + 78, 134, 186, 102, 103, 10, 175, 195, 32, 211, + 48, 108, 81, 176, 131, 6, 229, 5, 50, 73, + 74, 159, 14, 180, 181, 182, 124, 98, 110, 135, + 280, 97, 228, 279, 115, 104, 16, 158, 265, 121, + 114, 178, 123, 179, 244, 73, 74, 180, 181, 182, + 73, 74, 112, 113, 154, 155, 136, 117, 117, 117, + 254, 243, 137, 183, 282, 178, 255, 179, 220, 6, + 247, 116, 118, 119, 128, 129, 73, 74, 99, 73, + 74, 227, 73, 74, 246, 141, 238, 183, 201, 237, + 132, 200, 139, 6, 235, 226, 138, 163, 208, 80, + 157, 207, 241, 231, 230, 161, 162, 73, 74, 117, + 117, 117, 117, 117, 117, 117, 117, 117, 117, 83, + 82, 278, 84, 144, 145, 146, 147, 148, 149, 150, + 151, 152, 153, 184, 185, 174, 50, 160, 193, 73, + 74, 127, 196, 202, 203, 126, 197, 73, 74, 125, + 73, 74, 248, 253, 189, 204, 45, 240, 206, 73, + 74, 245, 143, 210, 214, 215, 73, 74, 104, 217, + 218, 213, 79, 219, 86, 87, 76, 90, 88, 89, + 169, 43, 44, 117, 117, 73, 74, 75, 166, 117, + 222, 224, 80, 225, 73, 74, 273, 212, 212, 232, + 132, 223, 234, 216, 120, 271, 73, 74, 191, 239, + 43, 44, 83, 82, 85, 84, 274, 270, 96, 91, + 92, 93, 94, 95, 73, 74, 93, 94, 95, 249, + 84, 164, 251, 252, 196, 236, 267, 250, 197, 130, + 25, 256, 73, 74, 262, 263, 187, 188, 3, 190, + 257, 258, 260, 261, 264, 24, 266, 73, 74, 9, + 221, 268, 269, 117, 117, 111, 171, 272, 172, 170, + 13, 275, 276, 77, 90, 277, 89, 212, 212, 13, + 177, 281, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 106, 107, 91, 92, 93, 94, 95, + 47, 43, 44, 101, 165, 259, 91, 92, 93, 94, + 95, 242, 156, 122, 194, 17, 192, 15, 37, 21, + 22, 23, 33, 133, 105, 205, 7, 34, 209, 35, + 36, 39, 41, 40, 42, 19, 20, 28, 31, 43, + 44, 8, 4, 2, 86, 87, 1, 90, 88, 89, + 0, 29, 30, 0, 168, 90, 18, 0, 0, 27, + 0, 142, 38, 0, 140, 26, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 106, 107, 91, + 92, 93, 94, 95, 0, 43, 44, 91, 92, 93, + 94, 95, 0, 0, 0, 0, 0, 11, 12, 17, + 0, 15, 37, 21, 22, 23, 33, 0, 105, 0, + 0, 34, 100, 35, 36, 39, 41, 40, 42, 19, + 20, 28, 31, 43, 44, 0, 0, 0, 0, 86, + 87, 0, 90, 88, 89, 29, 30, 0, 0, 167, + 18, 0, 0, 27, 0, 0, 38, 0, 17, 26, + 15, 37, 21, 22, 23, 33, 0, 0, 0, 0, + 34, 0, 35, 36, 39, 41, 40, 42, 19, 20, + 28, 31, 43, 44, 91, 92, 93, 94, 95, 0, + 0, 0, 0, 0, 29, 30, 90, 88, 89, 18, + 0, 0, 27, 0, 0, 38, 0, 233, 26, 17, + 0, 15, 37, 21, 22, 23, 33, 0, 0, 0, + 0, 34, 0, 35, 36, 39, 41, 40, 42, 19, + 20, 28, 31, 43, 44, 0, 0, 0, 91, 92, + 93, 94, 95, 0, 0, 29, 30, 0, 0, 0, + 18, 0, 0, 27, 0, 0, 38, 0, 109, 26, + 17, 0, 15, 37, 21, 22, 23, 33, 0, 0, + 0, 0, 34, 0, 35, 36, 39, 41, 40, 42, + 19, 20, 28, 31, 43, 44, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 29, 30, 0, 0, + 0, 18, 0, 0, 27, 0, 0, 38, 0, 0, + 26, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 49, 0, 0, 0, 0, 0, 0, 0, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 49, 0, 0, 0, 0, 173, 0, 0, + 51, 37, 21, 22, 23, 33, 0, 0, 0, 0, + 34, 0, 35, 36, 39, 41, 40, 42, 19, 20, + 28, 31, 43, 44, 0, 0, 0, 46, 0, 0, + 0, 0, 0, 0, 29, 30, 0, 0, 0, 18, + 0, 0, 27, 0, 0, 38, 0, 0, 26, 52, + 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 106, 199, 0, 0, 0, 0, 0, 0, 43, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 72, 74, 80, 81, 82, 83, 84, 0, 73, - 0, 0, 0, 75, 76, 233, 77, 78, 79, 0, - 0, 0, 0, 0, 0, 72, 74, 80, 81, 82, - 83, 84, 0, 73, 0, 0, 0, 75, 76, 168, - 77, 78, 79, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 72, 74, 80, 81, - 82, 83, 84, 0, 73, 0, 0, 75, 76, 281, - 77, 78, 79, 0, 0, 0, 0, 0, 0, 0, - 72, 74, 80, 81, 82, 83, 84, 0, 73, 0, - 0, 75, 76, 276, 77, 78, 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 72, 74, 80, 81, 82, 83, 84, 0, 73, 0, - 0, 75, 76, 251, 77, 78, 79, 0, 0, 0, - 0, 0, 0, 0, 72, 74, 80, 81, 82, 83, - 84, 0, 73, 0, 0, 75, 76, 244, 77, 78, - 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 72, 74, 80, 81, 82, 83, - 84, 0, 73, 0, 0, 75, 76, 217, 77, 78, - 79, 0, 0, 0, 0, 0, 0, 0, 72, 74, - 80, 81, 82, 83, 84, 0, 73, 0, 0, 75, - 76, 162, 77, 78, 79, 0, 0, 0, 0, 0, - 75, 76, 0, 77, 78, 79, 0, 0, 72, 74, - 80, 81, 82, 83, 84, 0, 73, 0, 275, 75, - 76, 0, 77, 78, 79, 0, 0, 0, 0, 0, - 0, 0, 72, 74, 80, 81, 82, 83, 84, 0, - 73, 0, 266, 72, 74, 80, 81, 82, 83, 84, - 0, 73, 0, 265, 75, 76, 0, 77, 78, 79, - 0, 0, 72, 74, 80, 81, 82, 83, 84, 0, - 73, 0, 238, 0, 0, 0, 75, 76, 0, 77, - 78, 79, 274, 0, 0, 75, 76, 0, 77, 78, - 79, 0, 0, 0, 0, 0, 0, 72, 74, 80, - 81, 82, 83, 84, 151, 73, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, - 74, 80, 81, 82, 83, 84, 0, 73, 72, 74, - 80, 81, 82, 83, 84, 0, 73, + 0, 198, } var yyPact = [...]int16{ - 181, -1000, -1000, -39, -1000, 387, 66, 621, -1000, 1071, - -1000, 535, 289, 678, 678, 535, 535, 154, 119, 115, - 164, 113, -1000, -1000, -1000, -1000, -1000, 13, -1000, -1000, - 139, -1000, 535, 678, 678, 358, 485, 127, -1000, -1000, - -1000, -1000, -1000, -1000, -1000, -1000, 1, -1000, 53, 52, - 51, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + 238, -1000, -1000, -48, 406, 98, 643, -1000, -1000, -1000, + 112, 150, 139, 557, 158, 184, 170, 189, 173, -1000, + -1000, -1000, -1000, -1000, 18, -1000, 368, 506, -1000, 665, + 665, 144, -1000, 557, 665, 665, 665, 174, 557, -1000, + -1000, -1000, -1000, -1000, -1000, -1000, -1000, -22, -1000, 90, + 86, 82, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, 535, -1000, 535, 535, 535, 535, 535, 535, - 535, 535, 535, 535, 535, -1000, 1071, 0, -1000, -1000, - -1000, 113, 302, 241, 89, 1062, 535, 98, 86, 174, - -39, 2, -1000, -1000, 535, -1000, 921, 71, 71, -1000, - -12, -1000, 49, 48, 535, -1000, -1000, -1000, -1000, 758, - -1000, 160, -1000, 588, 40, 40, 40, 1071, 153, 153, - 561, 201, 219, 67, 79, 79, 43, 43, 43, 131, - -1000, -1000, 0, 654, -1000, -1000, -1000, 39, 535, 0, - 0, 535, -1000, 535, 535, 162, 64, -1000, 535, 162, - -5, 1071, -1000, -1000, 273, 678, 678, 897, -1000, -1000, - -1000, 535, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, 7, -1000, -1000, 535, 0, 5, -1000, -13, - -1000, 46, 41, 535, -1000, -1000, 435, 734, 12, 11, - 1071, -1000, 1071, -39, -1000, -1000, -1000, 1005, -30, -1000, - -1000, 535, -1000, -1000, 77, 71, 77, 16, 867, -1000, - -20, -1000, 1071, -1000, -1000, 0, -1000, 654, 0, 0, - 843, -1000, 703, -1000, 535, 535, 117, 57, -1000, -4, - 162, 1071, 678, 678, -1000, -1000, 40, -1000, -1000, -1000, - -1000, -29, -1000, 986, 975, 101, 535, 535, -1000, 535, - -1000, 71, 77, -1000, 0, 535, 535, -1000, 1040, 1071, - 951, -1000, 813, 172, 535, -1000, -1000, -1000, 535, 1071, - 789, -1000, + -1000, -1000, -1000, 557, 557, 225, -48, -1000, 112, -1, + -1000, -1000, -1000, 173, 312, 115, 665, 665, 665, 665, + 665, 665, 665, 665, 665, 665, -5, -1000, -1000, 557, + -1000, -27, -1000, 78, 46, 557, -1000, -1000, -1000, -1000, + 35, 557, 65, 65, -1000, 210, 162, 65, 445, 350, + -1000, 119, 229, -1000, 613, 30, 30, 30, 112, -1000, + 217, 96, -1000, 202, -1000, -1000, -1, 721, -1000, -1000, + -1000, 29, 557, 557, 170, 499, 267, 358, 256, 175, + 175, -1000, -1000, -1000, 557, 217, 40, 112, -1000, 274, + 665, 665, 103, -1000, 557, -1000, 665, -1, -1, -1000, + -1000, -1000, 557, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, 6, -1000, -1000, -48, -1000, -1000, -1000, + 557, -1, 33, -1000, -32, -1000, 45, 44, 557, -1000, + -1000, 455, 32, 112, 177, 28, -1000, -1000, 557, -1000, + -1000, 110, 170, 110, 43, 112, -1000, 1, -16, 100, + -1000, 22, -1000, 94, 112, -1000, -1000, -1, -1000, 721, + -1, -1, 92, -1000, -2, -1000, -1000, 7, 217, 112, + 665, 665, 230, 557, 557, -1000, -1000, 30, -1000, -1000, + -1000, -1000, -1000, -21, -1000, 557, -1000, 110, 110, 212, + 557, 557, 159, 147, -1000, -1, 138, -1000, 195, 112, + 557, 557, -1000, -1000, 557, 60, -28, 112, -1000, -1000, + 557, 3, -1000, } var yyPgo = [...]int16{ - 0, 264, 263, 262, 261, 259, 12, 214, 195, 244, - 0, 241, 13, 240, 234, 10, 4, 9, 233, 20, - 230, 218, 217, 215, 213, 8, 1, 2, 7, 199, - 15, 198, 196, 5, 193, 192, 14, 3, + 0, 356, 353, 352, 351, 14, 336, 259, 265, 335, + 0, 333, 1, 326, 324, 7, 36, 22, 8, 323, + 12, 322, 321, 315, 314, 313, 3, 9, 6, 13, + 310, 10, 280, 260, 2, 255, 240, 11, 4, } var yyR1 = [...]int8{ 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, - 6, 6, 7, 7, 8, 8, 9, 9, 33, 33, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 11, 11, 12, 12, 12, 13, 13, 14, - 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 17, 17, 18, 18, 18, 34, - 34, 35, 35, 19, 19, 19, 19, 19, 20, 20, - 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, - 25, 25, 25, 37, 37, 37, 26, 26, 27, 27, - 27, 27, 27, 27, 27, 28, 28, 28, 29, 29, - 30, 30, 30, 31, 31, 32, 32, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, + 6, 6, 7, 7, 8, 8, 9, 9, 34, 34, + 10, 10, 10, 10, 10, 10, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 11, 11, 12, + 12, 12, 13, 13, 14, 14, 15, 15, 15, 15, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 18, 18, 19, 19, 19, 35, + 35, 36, 36, 20, 20, 20, 20, 20, 21, 21, + 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, + 26, 26, 26, 38, 38, 38, 27, 27, 28, 28, + 28, 28, 28, 28, 28, 29, 29, 29, 30, 30, + 31, 31, 31, 32, 32, 33, 33, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, } var yyR2 = [...]int8{ - 0, 2, 0, 3, 2, 2, 0, 2, 6, 4, - 0, 1, 0, 2, 5, 8, 1, 3, 1, 1, - 2, 3, 5, 9, 9, 11, 7, 3, 4, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 1, 1, 3, 1, 3, 3, 1, 3, 1, - 3, 3, 3, 5, 1, 1, 1, 1, 2, 2, - 1, 1, 1, 1, 4, 1, 1, 1, 2, 1, - 3, 2, 2, 2, 3, 4, 2, 3, 2, 2, + 0, 3, 0, 3, 0, 2, 6, 4, 0, 1, + 1, 1, 0, 2, 5, 8, 1, 3, 1, 1, + 2, 3, 5, 4, 3, 1, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 1, 1, 3, 1, + 3, 3, 1, 3, 1, 3, 3, 3, 5, 1, + 1, 1, 1, 2, 2, 1, 1, 1, 1, 4, + 1, 2, 3, 4, 2, 3, 1, 2, 2, 1, + 2, 1, 7, 3, 9, 9, 11, 2, 3, 2, 2, 2, 3, 3, 1, 3, 0, 2, 4, 1, 1, 1, 1, 2, 3, 4, 4, 5, 1, 3, 0, 5, 0, 2, 0, 2, 1, 3, 3, 3, - 5, 1, 1, 1, 1, 1, 1, 3, 1, 1, + 5, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 1, 3, 3, 3, 3, 2, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -378,67 +329,67 @@ var yyR2 = [...]int8{ } var yyChk = [...]int16{ - -1000, -1, -2, 10, -3, -4, -28, 62, -7, -10, - -5, -8, -16, 38, 39, 31, 36, 15, 11, 12, - 13, 54, 40, 24, 17, 18, 19, -34, -35, 25, - 26, -17, 59, 49, 50, 62, 56, 16, 20, 22, - 21, 23, 27, 28, 57, 63, -29, -30, 20, -36, - 27, 7, 8, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 47, 55, 48, 4, 5, 7, 8, 9, - 49, 50, 51, 52, 53, -7, -10, 14, 24, -19, - 55, 54, 56, -16, -16, -10, -8, -10, 21, 27, - 27, 20, -19, -17, 59, -17, -10, -16, -16, 63, - -24, -25, -37, -17, 59, 20, 21, -36, 61, -10, - 21, -18, 63, 48, 58, 58, 58, -10, -10, -10, - -10, -10, -10, -10, -10, -10, -10, -10, -10, -11, - -12, 21, 56, 62, -19, -17, 61, -10, 58, 14, - 14, 32, -23, 37, 47, 14, -6, -28, 58, 59, - -20, -10, 60, 63, 48, 58, 58, -10, 61, 30, - 27, 29, 63, -30, -27, -28, -31, 25, 27, 17, - 18, 19, 56, -27, -27, 47, 6, -13, -12, -14, - -15, -37, -17, 59, 21, 61, 58, -10, -12, -12, - -10, -10, -10, -33, 20, 21, 57, -10, -9, -33, - 60, 57, 63, -25, -26, -16, -26, 60, -10, 61, - -32, -27, -10, -12, 61, 48, 63, 48, 58, 58, - -10, 61, -10, 61, 59, 59, -21, -6, 57, 60, - 57, -10, 47, 58, 60, 61, 48, -12, -15, -12, - -12, 60, 61, -10, -10, -22, 33, 34, 57, 58, - -33, -16, -26, -27, 58, 57, 57, 35, -10, -10, - -10, -12, -10, -10, 32, 57, 60, 60, 57, -10, - -10, 60, + -1000, -1, -2, 10, -3, -29, 63, -6, -4, -7, + -10, 11, 12, -8, -17, 15, -16, 13, 54, 33, + 34, 17, 18, 19, -35, -36, 63, 57, 35, 49, + 50, 36, -18, 20, 25, 27, 28, 16, 60, 29, + 31, 30, 32, 37, 38, 58, 64, -30, -31, 29, + -37, 37, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, 28, 47, 48, 37, 37, -7, -10, 14, + 34, -20, 55, 54, 57, 30, 4, 5, 8, 9, + 7, 49, 50, 51, 52, 53, 29, -20, -18, 60, + 64, -25, -26, -38, -18, 60, 29, 30, -37, 62, + -10, -8, -17, -17, -18, -10, -16, -17, -16, -16, + 30, -10, -19, 64, 48, 59, 59, 59, -10, -10, + 14, -5, -29, -11, -12, 30, 57, 63, -20, -18, + 62, -10, 59, 47, -16, -16, -16, -16, -16, -16, + -16, -16, -16, -16, 59, 60, -21, -10, 64, 48, + 59, 59, -10, 62, 21, -24, 26, 14, 14, 61, + 40, 37, 39, 64, -31, -28, -29, -32, 35, 37, + 17, 18, 19, 57, -28, -28, -34, 29, 30, 58, + 47, 6, -13, -12, -14, -15, -38, -18, 60, 30, + 62, 59, -10, -10, -10, -9, -34, 61, 58, 64, + -26, -27, -16, -27, 61, -10, -16, -12, -12, -10, + 62, -33, -28, -5, -10, -12, 62, 48, 64, 48, + 59, 59, -10, 62, -10, 62, 58, 61, 58, -10, + 47, 59, -22, 60, 60, 61, 62, 48, 58, -12, + -15, -12, -12, 61, 62, 59, -34, -27, -27, -23, + 22, 23, -10, -10, -28, 59, -10, 24, -10, -10, + 58, 58, -12, 58, 21, -10, -10, -10, 61, 61, + 58, -10, 61, } var yyDef = [...]int16{ - 2, -2, 6, 0, 1, 12, 0, 0, 4, 5, - 7, 12, 41, 0, 0, 0, 0, 0, 0, 0, - 0, 55, 56, 57, 60, 61, 62, 63, 65, 66, - 67, 69, 0, 0, 0, 0, 0, 0, 89, 90, - 91, 92, 84, 86, 3, 125, 0, 128, 0, 0, - 0, 137, 138, 139, 140, 141, 142, 143, 144, 145, - 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, - 156, 157, 0, 29, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 13, 20, 0, 79, 80, - 81, 0, 0, 0, 0, 0, 0, -2, 0, 0, - 10, 0, 58, 59, 0, 68, 0, 71, 72, 73, - 0, 106, 111, 112, 0, 113, 114, 115, 76, 0, - 78, 0, 126, 0, 0, 0, 0, 21, 30, 31, - -2, 33, 34, -2, 36, 37, 38, 39, 40, 0, - 42, 44, 0, 0, 82, 83, 93, 0, 0, 0, - 0, 0, 27, 0, 0, 0, 0, 11, 0, 0, - 0, 98, 70, 74, 0, 0, 0, 0, 77, 85, - 87, 0, 127, 129, 130, 118, 119, 120, 121, 122, - 123, 124, 0, 131, 132, 0, 0, 0, 47, 0, - 49, 0, 0, 0, -2, 94, 0, 0, 0, 0, - 100, 105, 28, 10, 18, 19, 9, 0, 0, 16, - 64, 0, 75, 107, 108, 116, 109, 0, 0, 133, - 0, 135, 22, 43, 45, 0, 46, 0, 0, 0, - 0, 95, 0, 96, 0, 0, 102, 0, 14, 0, - 0, 99, 0, 0, 88, 134, 0, 48, 50, 51, - 52, 0, 97, 0, 0, 0, 0, 0, 8, 0, - 17, 117, 110, 136, 0, 0, 0, 26, 0, 103, - 0, 53, 0, 0, 0, 15, 23, 24, 0, 101, - 0, 25, + 2, -2, 4, 0, 12, 0, 0, 1, 5, 10, + 11, 0, 0, 12, 36, 0, 25, 0, 50, 51, + 52, 55, 56, 57, 58, 60, 0, 0, 66, 0, + 0, 69, 71, 0, 0, 0, 0, 0, 0, 89, + 90, 91, 92, 84, 86, 3, 125, 0, 128, 0, + 0, 0, 137, 138, 139, 140, 141, 142, 143, 144, + 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, + 155, 156, 157, 0, 0, 0, 8, 13, 20, 0, + 79, 80, 81, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 53, 54, 0, + 61, 0, 106, 111, 112, 0, 113, 114, 115, 64, + 0, 0, 67, 68, 70, 0, 104, 36, 0, 0, + 77, 0, 0, 126, 0, 0, 0, 0, 21, 24, + 0, 0, 9, 0, 37, 39, 0, 0, 82, 83, + 93, 0, 0, 0, 26, -2, 28, 29, -2, 31, + 32, 33, 34, 35, 0, 0, 0, 98, 62, 0, + 0, 0, 0, 65, 0, 73, 0, 0, 0, 78, + 85, 87, 0, 127, 129, 130, 118, 119, 120, 121, + 122, 123, 124, 0, 131, 132, 8, 18, 19, 7, + 0, 0, 0, 42, 0, 44, 0, 0, 0, -2, + 94, 0, 0, 23, 0, 0, 16, 59, 0, 63, + 107, 108, 117, 109, 0, 100, 105, 0, 0, 0, + 133, 0, 135, 0, 22, 38, 40, 0, 41, 0, + 0, 0, 0, 95, 0, 96, 14, 0, 0, 99, + 0, 0, 102, 0, 0, 88, 134, 0, 6, 43, + 45, 46, 47, 0, 97, 0, 17, 116, 110, 0, + 0, 0, 0, 0, 136, 0, 0, 72, 0, 103, + 0, 0, 48, 15, 0, 0, 0, 101, 74, 75, + 0, 0, 76, } var yyTok1 = [...]int8{ @@ -446,15 +397,15 @@ var yyTok1 = [...]int8{ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 53, 3, 3, - 59, 60, 51, 49, 48, 50, 54, 52, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 58, 57, + 60, 61, 51, 49, 48, 50, 54, 52, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 59, 58, 3, 3, 3, 55, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 56, 3, 61, 3, 3, 3, 3, 3, 3, + 3, 57, 3, 62, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 62, 47, 63, + 3, 3, 3, 63, 47, 64, } var yyTok2 = [...]int8{ @@ -462,7 +413,7 @@ var yyTok2 = [...]int8{ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, + 42, 43, 44, 45, 46, 56, } var yyTok3 = [...]int8{ @@ -807,933 +758,756 @@ yydefault: switch yynt { case 1: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:73 + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:58 { - if yyDollar[1].value != nil { - yyDollar[2].value.(*Query).Meta = yyDollar[1].value.(*ConstObject) - } - yylex.(*lexer).result = yyDollar[2].value.(*Query) + query := yyDollar[3].value.(*Query) + query.Meta = yyDollar[1].value.(*ConstObject) + query.Imports = yyDollar[2].value.([]*Import) + yylex.(*lexer).result = query } case 2: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.go.y:80 +//line parser.go.y:67 { - yyVAL.value = nil + yyVAL.value = (*ConstObject)(nil) } case 3: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:84 +//line parser.go.y:71 { yyVAL.value = yyDollar[2].value } case 4: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:90 - { - yyVAL.value = &Query{Imports: yyDollar[1].value.([]*Import), FuncDefs: reverseFuncDef(yyDollar[2].value.([]*FuncDef)), Term: &Term{Type: TermTypeIdentity}} - } - case 5: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:94 - { - if yyDollar[1].value != nil { - yyDollar[2].value.(*Query).Imports = yyDollar[1].value.([]*Import) - } - yyVAL.value = yyDollar[2].value - } - case 6: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.go.y:101 +//line parser.go.y:77 { yyVAL.value = []*Import(nil) } - case 7: + case 5: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:105 +//line parser.go.y:81 { yyVAL.value = append(yyDollar[1].value.([]*Import), yyDollar[2].value.(*Import)) } - case 8: + case 6: yyDollar = yyS[yypt-6 : yypt+1] -//line parser.go.y:111 +//line parser.go.y:87 { yyVAL.value = &Import{ImportPath: yyDollar[2].token, ImportAlias: yyDollar[4].token, Meta: yyDollar[5].value.(*ConstObject)} } - case 9: + case 7: yyDollar = yyS[yypt-4 : yypt+1] -//line parser.go.y:115 +//line parser.go.y:91 { yyVAL.value = &Import{IncludePath: yyDollar[2].token, Meta: yyDollar[3].value.(*ConstObject)} } - case 10: + case 8: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.go.y:121 +//line parser.go.y:97 { yyVAL.value = (*ConstObject)(nil) } - case 11: + case 10: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:124 +//line parser.go.y:104 { + yyVAL.value = &Query{FuncDefs: reverseFuncDef(yyDollar[1].value.([]*FuncDef))} } case 12: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.go.y:128 +//line parser.go.y:111 { yyVAL.value = []*FuncDef(nil) } case 13: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:132 +//line parser.go.y:115 { yyVAL.value = append(yyDollar[2].value.([]*FuncDef), yyDollar[1].value.(*FuncDef)) } case 14: yyDollar = yyS[yypt-5 : yypt+1] -//line parser.go.y:138 +//line parser.go.y:121 { yyVAL.value = &FuncDef{Name: yyDollar[2].token, Body: yyDollar[4].value.(*Query)} } case 15: yyDollar = yyS[yypt-8 : yypt+1] -//line parser.go.y:142 +//line parser.go.y:125 { yyVAL.value = &FuncDef{yyDollar[2].token, yyDollar[4].value.([]string), yyDollar[7].value.(*Query)} } case 16: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:148 +//line parser.go.y:131 { yyVAL.value = []string{yyDollar[1].token} } case 17: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:152 +//line parser.go.y:135 { yyVAL.value = append(yyDollar[1].value.([]string), yyDollar[3].token) } - case 18: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:157 - { - } - case 19: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:158 - { - } case 20: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:162 +//line parser.go.y:145 { - yyDollar[2].value.(*Query).FuncDefs = prependFuncDef(yyDollar[2].value.(*Query).FuncDefs, yyDollar[1].value.(*FuncDef)) - yyVAL.value = yyDollar[2].value + query := yyDollar[2].value.(*Query) + query.FuncDefs = prependFuncDef(query.FuncDefs, yyDollar[1].value.(*FuncDef)) + yyVAL.value = query } case 21: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:167 +//line parser.go.y:151 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpPipe, Right: yyDollar[3].value.(*Query)} } case 22: yyDollar = yyS[yypt-5 : yypt+1] -//line parser.go.y:171 +//line parser.go.y:155 { - yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, &Suffix{Bind: &Bind{yyDollar[3].value.([]*Pattern), yyDollar[5].value.(*Query)}}) - yyVAL.value = &Query{Term: yyDollar[1].value.(*Term)} + term := yyDollar[1].value.(*Term) + term.SuffixList = append(term.SuffixList, &Suffix{Bind: &Bind{yyDollar[3].value.([]*Pattern), yyDollar[5].value.(*Query)}}) + yyVAL.value = &Query{Term: term} } case 23: - yyDollar = yyS[yypt-9 : yypt+1] -//line parser.go.y:176 - { - yyVAL.value = &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{yyDollar[2].value.(*Term), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query)}}} - } - case 24: - yyDollar = yyS[yypt-9 : yypt+1] -//line parser.go.y:180 - { - yyVAL.value = &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{yyDollar[2].value.(*Term), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query), nil}}} - } - case 25: - yyDollar = yyS[yypt-11 : yypt+1] -//line parser.go.y:184 - { - yyVAL.value = &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{yyDollar[2].value.(*Term), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query), yyDollar[10].value.(*Query)}}} - } - case 26: - yyDollar = yyS[yypt-7 : yypt+1] -//line parser.go.y:188 - { - yyVAL.value = &Query{Term: &Term{Type: TermTypeIf, If: &If{yyDollar[2].value.(*Query), yyDollar[4].value.(*Query), yyDollar[5].value.([]*IfElif), yyDollar[6].value.(*Query)}}} - } - case 27: - yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:192 - { - yyVAL.value = &Query{Term: &Term{Type: TermTypeTry, Try: &Try{yyDollar[2].value.(*Query), yyDollar[3].value.(*Query)}}} - } - case 28: yyDollar = yyS[yypt-4 : yypt+1] -//line parser.go.y:196 +//line parser.go.y:161 { yyVAL.value = &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{yyDollar[2].token, yyDollar[4].value.(*Query)}}} } - case 29: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:200 - { - if t := yyDollar[1].value.(*Query).Term; t != nil { - t.SuffixList = append(t.SuffixList, &Suffix{Optional: true}) - } else { - yyVAL.value = &Query{Term: &Term{Type: TermTypeQuery, Query: yyDollar[1].value.(*Query), SuffixList: []*Suffix{{Optional: true}}}} - } - } - case 30: + case 24: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:208 +//line parser.go.y:165 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpComma, Right: yyDollar[3].value.(*Query)} } - case 31: + case 26: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:212 +//line parser.go.y:172 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: yyDollar[2].operator, Right: yyDollar[3].value.(*Query)} } - case 32: + case 27: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:216 +//line parser.go.y:176 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: yyDollar[2].operator, Right: yyDollar[3].value.(*Query)} } - case 33: + case 28: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:220 +//line parser.go.y:180 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpOr, Right: yyDollar[3].value.(*Query)} } - case 34: + case 29: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:224 +//line parser.go.y:184 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpAnd, Right: yyDollar[3].value.(*Query)} } - case 35: + case 30: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:228 +//line parser.go.y:188 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: yyDollar[2].operator, Right: yyDollar[3].value.(*Query)} } - case 36: + case 31: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:232 +//line parser.go.y:192 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpAdd, Right: yyDollar[3].value.(*Query)} } - case 37: + case 32: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:236 +//line parser.go.y:196 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpSub, Right: yyDollar[3].value.(*Query)} } - case 38: + case 33: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:240 +//line parser.go.y:200 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpMul, Right: yyDollar[3].value.(*Query)} } - case 39: + case 34: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:244 +//line parser.go.y:204 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpDiv, Right: yyDollar[3].value.(*Query)} } - case 40: + case 35: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:248 +//line parser.go.y:208 { yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpMod, Right: yyDollar[3].value.(*Query)} } - case 41: + case 36: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:252 +//line parser.go.y:212 { yyVAL.value = &Query{Term: yyDollar[1].value.(*Term)} } - case 42: + case 37: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:258 +//line parser.go.y:218 { yyVAL.value = []*Pattern{yyDollar[1].value.(*Pattern)} } - case 43: + case 38: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:262 +//line parser.go.y:222 { yyVAL.value = append(yyDollar[1].value.([]*Pattern), yyDollar[3].value.(*Pattern)) } - case 44: + case 39: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:268 +//line parser.go.y:228 { yyVAL.value = &Pattern{Name: yyDollar[1].token} } - case 45: + case 40: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:272 +//line parser.go.y:232 { yyVAL.value = &Pattern{Array: yyDollar[2].value.([]*Pattern)} } - case 46: + case 41: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:276 +//line parser.go.y:236 { yyVAL.value = &Pattern{Object: yyDollar[2].value.([]*PatternObject)} } - case 47: + case 42: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:282 +//line parser.go.y:242 { yyVAL.value = []*Pattern{yyDollar[1].value.(*Pattern)} } - case 48: + case 43: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:286 +//line parser.go.y:246 { yyVAL.value = append(yyDollar[1].value.([]*Pattern), yyDollar[3].value.(*Pattern)) } - case 49: + case 44: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:292 +//line parser.go.y:252 { yyVAL.value = []*PatternObject{yyDollar[1].value.(*PatternObject)} } - case 50: + case 45: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:296 +//line parser.go.y:256 { yyVAL.value = append(yyDollar[1].value.([]*PatternObject), yyDollar[3].value.(*PatternObject)) } - case 51: + case 46: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:302 +//line parser.go.y:262 { yyVAL.value = &PatternObject{Key: yyDollar[1].token, Val: yyDollar[3].value.(*Pattern)} } - case 52: + case 47: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:306 +//line parser.go.y:266 { yyVAL.value = &PatternObject{KeyString: yyDollar[1].value.(*String), Val: yyDollar[3].value.(*Pattern)} } - case 53: + case 48: yyDollar = yyS[yypt-5 : yypt+1] -//line parser.go.y:310 +//line parser.go.y:270 { yyVAL.value = &PatternObject{KeyQuery: yyDollar[2].value.(*Query), Val: yyDollar[5].value.(*Pattern)} } - case 54: + case 49: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:314 +//line parser.go.y:274 { yyVAL.value = &PatternObject{Key: yyDollar[1].token} } - case 55: + case 50: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:320 +//line parser.go.y:280 { yyVAL.value = &Term{Type: TermTypeIdentity} } - case 56: + case 51: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:324 +//line parser.go.y:284 { yyVAL.value = &Term{Type: TermTypeRecurse} } - case 57: + case 52: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:328 +//line parser.go.y:288 { yyVAL.value = &Term{Type: TermTypeIndex, Index: &Index{Name: yyDollar[1].token}} } - case 58: + case 53: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:332 +//line parser.go.y:292 { - if yyDollar[2].value.(*Suffix).Iter { - yyVAL.value = &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{yyDollar[2].value.(*Suffix)}} + suffix := yyDollar[2].value.(*Suffix) + if suffix.Iter { + yyVAL.value = &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{suffix}} } else { - yyVAL.value = &Term{Type: TermTypeIndex, Index: yyDollar[2].value.(*Suffix).Index} + yyVAL.value = &Term{Type: TermTypeIndex, Index: suffix.Index} } } - case 59: + case 54: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:340 +//line parser.go.y:301 { yyVAL.value = &Term{Type: TermTypeIndex, Index: &Index{Str: yyDollar[2].value.(*String)}} } - case 60: + case 55: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:344 +//line parser.go.y:305 { yyVAL.value = &Term{Type: TermTypeNull} } - case 61: + case 56: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:348 +//line parser.go.y:309 { yyVAL.value = &Term{Type: TermTypeTrue} } - case 62: + case 57: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:352 +//line parser.go.y:313 { yyVAL.value = &Term{Type: TermTypeFalse} } - case 63: + case 58: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:356 +//line parser.go.y:317 { yyVAL.value = &Term{Type: TermTypeFunc, Func: &Func{Name: yyDollar[1].token}} } - case 64: + case 59: yyDollar = yyS[yypt-4 : yypt+1] -//line parser.go.y:360 +//line parser.go.y:321 { yyVAL.value = &Term{Type: TermTypeFunc, Func: &Func{Name: yyDollar[1].token, Args: yyDollar[3].value.([]*Query)}} } - case 65: + case 60: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:364 +//line parser.go.y:325 { yyVAL.value = &Term{Type: TermTypeFunc, Func: &Func{Name: yyDollar[1].token}} } + case 61: + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.go.y:329 + { + yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{}} + } + case 62: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:333 + { + yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{yyDollar[2].value.([]*ObjectKeyVal)}} + } + case 63: + yyDollar = yyS[yypt-4 : yypt+1] +//line parser.go.y:337 + { + yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{yyDollar[2].value.([]*ObjectKeyVal)}} + } + case 64: + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.go.y:341 + { + yyVAL.value = &Term{Type: TermTypeArray, Array: &Array{}} + } + case 65: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:345 + { + yyVAL.value = &Term{Type: TermTypeArray, Array: &Array{yyDollar[2].value.(*Query)}} + } case 66: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:368 +//line parser.go.y:349 { yyVAL.value = &Term{Type: TermTypeNumber, Number: yyDollar[1].token} } case 67: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:372 + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.go.y:353 { - yyVAL.value = &Term{Type: TermTypeFormat, Format: yyDollar[1].token} + yyVAL.value = &Term{Type: TermTypeUnary, Unary: &Unary{OpAdd, yyDollar[2].value.(*Term)}} } case 68: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:376 +//line parser.go.y:357 { - yyVAL.value = &Term{Type: TermTypeFormat, Format: yyDollar[1].token, Str: yyDollar[2].value.(*String)} + yyVAL.value = &Term{Type: TermTypeUnary, Unary: &Unary{OpSub, yyDollar[2].value.(*Term)}} } case 69: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:380 +//line parser.go.y:361 { - yyVAL.value = &Term{Type: TermTypeString, Str: yyDollar[1].value.(*String)} + yyVAL.value = &Term{Type: TermTypeFormat, Format: yyDollar[1].token} } case 70: - yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:384 + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.go.y:365 { - yyVAL.value = &Term{Type: TermTypeQuery, Query: yyDollar[2].value.(*Query)} + yyVAL.value = &Term{Type: TermTypeFormat, Format: yyDollar[1].token, Str: yyDollar[2].value.(*String)} } case 71: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:388 + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.go.y:369 { - yyVAL.value = &Term{Type: TermTypeUnary, Unary: &Unary{OpAdd, yyDollar[2].value.(*Term)}} + yyVAL.value = &Term{Type: TermTypeString, Str: yyDollar[1].value.(*String)} } case 72: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:392 + yyDollar = yyS[yypt-7 : yypt+1] +//line parser.go.y:373 { - yyVAL.value = &Term{Type: TermTypeUnary, Unary: &Unary{OpSub, yyDollar[2].value.(*Term)}} + yyVAL.value = &Term{Type: TermTypeIf, If: &If{yyDollar[2].value.(*Query), yyDollar[4].value.(*Query), yyDollar[5].value.([]*IfElif), yyDollar[6].value.(*Query)}} } case 73: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:396 + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:377 { - yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{}} + yyVAL.value = &Term{Type: TermTypeTry, Try: &Try{yyDollar[2].value.(*Query), yyDollar[3].value.(*Query)}} } case 74: - yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:400 + yyDollar = yyS[yypt-9 : yypt+1] +//line parser.go.y:381 { - yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{yyDollar[2].value.([]*ObjectKeyVal)}} + yyVAL.value = &Term{Type: TermTypeReduce, Reduce: &Reduce{yyDollar[2].value.(*Query), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query)}} } case 75: - yyDollar = yyS[yypt-4 : yypt+1] -//line parser.go.y:404 + yyDollar = yyS[yypt-9 : yypt+1] +//line parser.go.y:385 { - yyVAL.value = &Term{Type: TermTypeObject, Object: &Object{yyDollar[2].value.([]*ObjectKeyVal)}} + yyVAL.value = &Term{Type: TermTypeForeach, Foreach: &Foreach{yyDollar[2].value.(*Query), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query), nil}} } case 76: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:408 + yyDollar = yyS[yypt-11 : yypt+1] +//line parser.go.y:389 { - yyVAL.value = &Term{Type: TermTypeArray, Array: &Array{}} + yyVAL.value = &Term{Type: TermTypeForeach, Foreach: &Foreach{yyDollar[2].value.(*Query), yyDollar[4].value.(*Pattern), yyDollar[6].value.(*Query), yyDollar[8].value.(*Query), yyDollar[10].value.(*Query)}} } case 77: - yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:412 + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.go.y:393 { - yyVAL.value = &Term{Type: TermTypeArray, Array: &Array{yyDollar[2].value.(*Query)}} + yyVAL.value = &Term{Type: TermTypeBreak, Break: yyDollar[2].token} } case 78: - yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:416 + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.go.y:397 { - yyVAL.value = &Term{Type: TermTypeBreak, Break: yyDollar[2].token} + yyVAL.value = &Term{Type: TermTypeQuery, Query: yyDollar[2].value.(*Query)} } case 79: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:420 +//line parser.go.y:401 { yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, &Suffix{Index: &Index{Name: yyDollar[2].token}}) } case 80: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:424 +//line parser.go.y:405 { yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, yyDollar[2].value.(*Suffix)) } case 81: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:428 +//line parser.go.y:409 { yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, &Suffix{Optional: true}) } case 82: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:432 +//line parser.go.y:413 { yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, yyDollar[3].value.(*Suffix)) } case 83: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:436 +//line parser.go.y:417 { yyDollar[1].value.(*Term).SuffixList = append(yyDollar[1].value.(*Term).SuffixList, &Suffix{Index: &Index{Str: yyDollar[3].value.(*String)}}) } case 84: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:442 +//line parser.go.y:423 { yyVAL.value = &String{Str: yyDollar[1].token} } case 85: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:446 +//line parser.go.y:427 { yyVAL.value = &String{Queries: yyDollar[2].value.([]*Query)} } case 86: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.go.y:452 +//line parser.go.y:433 { yyVAL.value = []*Query{} } case 87: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:456 +//line parser.go.y:437 { yyVAL.value = append(yyDollar[1].value.([]*Query), &Query{Term: &Term{Type: TermTypeString, Str: &String{Str: yyDollar[2].token}}}) } case 88: yyDollar = yyS[yypt-4 : yypt+1] -//line parser.go.y:460 +//line parser.go.y:441 { yylex.(*lexer).inString = true yyVAL.value = append(yyDollar[1].value.([]*Query), &Query{Term: &Term{Type: TermTypeQuery, Query: yyDollar[3].value.(*Query)}}) } - case 89: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:466 - { - } - case 90: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:467 - { - } - case 91: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:470 - { - } - case 92: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:471 - { - } case 93: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:475 +//line parser.go.y:456 { yyVAL.value = &Suffix{Iter: true} } case 94: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:479 +//line parser.go.y:460 { yyVAL.value = &Suffix{Index: &Index{Start: yyDollar[2].value.(*Query)}} } case 95: yyDollar = yyS[yypt-4 : yypt+1] -//line parser.go.y:483 +//line parser.go.y:464 { yyVAL.value = &Suffix{Index: &Index{Start: yyDollar[2].value.(*Query), IsSlice: true}} } case 96: yyDollar = yyS[yypt-4 : yypt+1] -//line parser.go.y:487 +//line parser.go.y:468 { yyVAL.value = &Suffix{Index: &Index{End: yyDollar[3].value.(*Query), IsSlice: true}} } case 97: yyDollar = yyS[yypt-5 : yypt+1] -//line parser.go.y:491 +//line parser.go.y:472 { yyVAL.value = &Suffix{Index: &Index{Start: yyDollar[2].value.(*Query), End: yyDollar[4].value.(*Query), IsSlice: true}} } case 98: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:497 +//line parser.go.y:478 { yyVAL.value = []*Query{yyDollar[1].value.(*Query)} } case 99: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:501 +//line parser.go.y:482 { yyVAL.value = append(yyDollar[1].value.([]*Query), yyDollar[3].value.(*Query)) } case 100: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.go.y:507 +//line parser.go.y:488 { yyVAL.value = []*IfElif(nil) } case 101: yyDollar = yyS[yypt-5 : yypt+1] -//line parser.go.y:511 +//line parser.go.y:492 { yyVAL.value = append(yyDollar[1].value.([]*IfElif), &IfElif{yyDollar[3].value.(*Query), yyDollar[5].value.(*Query)}) } case 102: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.go.y:517 +//line parser.go.y:498 { yyVAL.value = (*Query)(nil) } case 103: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:521 +//line parser.go.y:502 { yyVAL.value = yyDollar[2].value } case 104: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.go.y:527 +//line parser.go.y:508 { yyVAL.value = (*Query)(nil) } case 105: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:531 +//line parser.go.y:512 { yyVAL.value = yyDollar[2].value } case 106: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:537 +//line parser.go.y:518 { yyVAL.value = []*ObjectKeyVal{yyDollar[1].value.(*ObjectKeyVal)} } case 107: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:541 +//line parser.go.y:522 { yyVAL.value = append(yyDollar[1].value.([]*ObjectKeyVal), yyDollar[3].value.(*ObjectKeyVal)) } case 108: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:547 +//line parser.go.y:528 { - yyVAL.value = &ObjectKeyVal{Key: yyDollar[1].token, Val: yyDollar[3].value.(*ObjectVal)} + yyVAL.value = &ObjectKeyVal{Key: yyDollar[1].token, Val: yyDollar[3].value.(*Query)} } case 109: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:551 +//line parser.go.y:532 { - yyVAL.value = &ObjectKeyVal{KeyString: yyDollar[1].value.(*String), Val: yyDollar[3].value.(*ObjectVal)} + yyVAL.value = &ObjectKeyVal{KeyString: yyDollar[1].value.(*String), Val: yyDollar[3].value.(*Query)} } case 110: yyDollar = yyS[yypt-5 : yypt+1] -//line parser.go.y:555 +//line parser.go.y:536 { - yyVAL.value = &ObjectKeyVal{KeyQuery: yyDollar[2].value.(*Query), Val: yyDollar[5].value.(*ObjectVal)} + yyVAL.value = &ObjectKeyVal{KeyQuery: yyDollar[2].value.(*Query), Val: yyDollar[5].value.(*Query)} } case 111: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:559 +//line parser.go.y:540 { yyVAL.value = &ObjectKeyVal{Key: yyDollar[1].token} } case 112: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:563 +//line parser.go.y:544 { yyVAL.value = &ObjectKeyVal{KeyString: yyDollar[1].value.(*String)} } - case 113: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:568 - { - } - case 114: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:569 - { - } - case 115: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:570 - { - } case 116: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:574 - { - yyVAL.value = &ObjectVal{[]*Query{{Term: yyDollar[1].value.(*Term)}}} - } - case 117: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:578 +//line parser.go.y:555 { - yyVAL.value = &ObjectVal{append(yyDollar[1].value.(*ObjectVal).Queries, &Query{Term: yyDollar[3].value.(*Term)})} + yyVAL.value = &Query{Left: yyDollar[1].value.(*Query), Op: OpPipe, Right: yyDollar[3].value.(*Query)} } case 118: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:584 +//line parser.go.y:562 { yyVAL.value = &ConstTerm{Object: yyDollar[1].value.(*ConstObject)} } case 119: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:588 +//line parser.go.y:566 { yyVAL.value = &ConstTerm{Array: yyDollar[1].value.(*ConstArray)} } case 120: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:592 +//line parser.go.y:570 { yyVAL.value = &ConstTerm{Number: yyDollar[1].token} } case 121: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:596 +//line parser.go.y:574 { yyVAL.value = &ConstTerm{Str: yyDollar[1].token} } case 122: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:600 +//line parser.go.y:578 { yyVAL.value = &ConstTerm{Null: true} } case 123: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:604 +//line parser.go.y:582 { yyVAL.value = &ConstTerm{True: true} } case 124: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:608 +//line parser.go.y:586 { yyVAL.value = &ConstTerm{False: true} } case 125: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:614 +//line parser.go.y:592 { yyVAL.value = &ConstObject{} } case 126: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:618 +//line parser.go.y:596 { yyVAL.value = &ConstObject{yyDollar[2].value.([]*ConstObjectKeyVal)} } case 127: yyDollar = yyS[yypt-4 : yypt+1] -//line parser.go.y:622 +//line parser.go.y:600 { yyVAL.value = &ConstObject{yyDollar[2].value.([]*ConstObjectKeyVal)} } case 128: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:628 +//line parser.go.y:606 { yyVAL.value = []*ConstObjectKeyVal{yyDollar[1].value.(*ConstObjectKeyVal)} } case 129: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:632 +//line parser.go.y:610 { yyVAL.value = append(yyDollar[1].value.([]*ConstObjectKeyVal), yyDollar[3].value.(*ConstObjectKeyVal)) } case 130: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:638 +//line parser.go.y:616 { yyVAL.value = &ConstObjectKeyVal{Key: yyDollar[1].token, Val: yyDollar[3].value.(*ConstTerm)} } case 131: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:642 +//line parser.go.y:620 { yyVAL.value = &ConstObjectKeyVal{Key: yyDollar[1].token, Val: yyDollar[3].value.(*ConstTerm)} } case 132: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:646 +//line parser.go.y:624 { yyVAL.value = &ConstObjectKeyVal{KeyString: yyDollar[1].token, Val: yyDollar[3].value.(*ConstTerm)} } case 133: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.go.y:652 +//line parser.go.y:630 { yyVAL.value = &ConstArray{} } case 134: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:656 +//line parser.go.y:634 { yyVAL.value = &ConstArray{yyDollar[2].value.([]*ConstTerm)} } case 135: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:662 +//line parser.go.y:640 { yyVAL.value = []*ConstTerm{yyDollar[1].value.(*ConstTerm)} } case 136: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.go.y:666 +//line parser.go.y:644 { yyVAL.value = append(yyDollar[1].value.([]*ConstTerm), yyDollar[3].value.(*ConstTerm)) } - case 137: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:671 - { - } - case 138: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:672 - { - } - case 139: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:673 - { - } - case 140: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:674 - { - } - case 141: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:675 - { - } - case 142: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:676 - { - } - case 143: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:677 - { - } - case 144: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:678 - { - } - case 145: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:679 - { - } - case 146: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:680 - { - } - case 147: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:681 - { - } - case 148: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:682 - { - } - case 149: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:683 - { - } - case 150: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:684 - { - } - case 151: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:685 - { - } - case 152: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:686 - { - } - case 153: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:687 - { - } - case 154: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:688 - { - } - case 155: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:689 - { - } - case 156: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:690 - { - } - case 157: - yyDollar = yyS[yypt-1 : yypt+1] -//line parser.go.y:691 - { - } } goto yystack /* stack new state and value */ } diff --git a/vendor/github.com/itchyny/gojq/parser.go.y b/vendor/github.com/itchyny/gojq/parser.go.y index 380c3cf6..5481e211 100644 --- a/vendor/github.com/itchyny/gojq/parser.go.y +++ b/vendor/github.com/itchyny/gojq/parser.go.y @@ -1,20 +1,6 @@ %{ package gojq -// Parse a query string, and returns the query struct. -// -// If parsing failed, the returned error has the method Token() (string, int), -// which reports the invalid token and the byte offset in the query string. The -// token is empty if the error occurred after scanning the entire query string. -// The byte offset is the scanned bytes when the error occurred. -func Parse(src string) (*Query, error) { - l := newLexer(src) - if yyParse(l) > 0 { - return nil, l.err - } - return l.result, nil -} - func reverseFuncDef(xs []*FuncDef) []*FuncDef { for i, j := 0, len(xs)-1; i < j; i, j = i+1, j-1 { xs[i], xs[j] = xs[j], xs[i] @@ -36,24 +22,23 @@ func prependFuncDef(xs []*FuncDef, x *FuncDef) []*FuncDef { operator Operator } -%type program moduleheader programbody imports import metaopt funcdefs funcdef funcdefargs query +%type program header imports import meta body funcdefs funcdef funcargs query %type bindpatterns pattern arraypatterns objectpatterns objectpattern -%type term string stringparts suffix args ifelifs ifelse trycatch +%type expr term string stringparts suffix args ifelifs ifelse trycatch %type objectkeyvals objectkeyval objectval %type constterm constobject constobjectkeyvals constobjectkeyval constarray constarrayelems %type tokIdentVariable tokIdentModuleIdent tokVariableModuleVariable tokKeyword objectkey -%token tokAltOp tokUpdateOp tokDestAltOp tokOrOp tokAndOp tokCompareOp -%token tokModule tokImport tokInclude tokDef tokAs tokLabel tokBreak +%token tokAltOp tokUpdateOp tokDestAltOp tokCompareOp +%token tokOrOp tokAndOp tokModule tokImport tokInclude tokDef tokAs tokLabel tokBreak %token tokNull tokTrue tokFalse -%token tokIdent tokVariable tokModuleIdent tokModuleVariable -%token tokIndex tokNumber tokFormat -%token tokString tokStringStart tokStringQuery tokStringEnd %token tokIf tokThen tokElif tokElse tokEnd %token tokTry tokCatch tokReduce tokForeach -%token tokRecurse tokFuncDefPost tokTermPost tokEmptyCatch -%token tokInvalid tokInvalidEscapeSequence tokUnterminatedString +%token tokIdent tokVariable tokModuleIdent tokModuleVariable +%token tokRecurse tokIndex tokNumber tokFormat +%token tokString tokStringStart tokStringQuery tokStringEnd +%token tokInvalid tokInvalidEscapeSequence tokUnterminatedString -%nonassoc tokFuncDefPost tokTermPost +%nonassoc tokFuncDefQuery tokExpr tokTerm %right '|' %left ',' %right tokAltOp @@ -69,33 +54,24 @@ func prependFuncDef(xs []*FuncDef, x *FuncDef) []*FuncDef { %% program - : moduleheader programbody + : header imports body { - if $1 != nil { $2.(*Query).Meta = $1.(*ConstObject) } - yylex.(*lexer).result = $2.(*Query) + query := $3.(*Query) + query.Meta = $1.(*ConstObject) + query.Imports = $2.([]*Import) + yylex.(*lexer).result = query } -moduleheader +header : { - $$ = nil + $$ = (*ConstObject)(nil) } | tokModule constobject ';' { $$ = $2; } -programbody - : imports funcdefs - { - $$ = &Query{Imports: $1.([]*Import), FuncDefs: reverseFuncDef($2.([]*FuncDef)), Term: &Term{Type: TermTypeIdentity}} - } - | imports query - { - if $1 != nil { $2.(*Query).Imports = $1.([]*Import) } - $$ = $2 - } - imports : { @@ -107,21 +83,28 @@ imports } import - : tokImport tokString tokAs tokIdentVariable metaopt ';' + : tokImport tokString tokAs tokIdentVariable meta ';' { $$ = &Import{ImportPath: $2, ImportAlias: $4, Meta: $5.(*ConstObject)} } - | tokInclude tokString metaopt ';' + | tokInclude tokString meta ';' { $$ = &Import{IncludePath: $2, Meta: $3.(*ConstObject)} } -metaopt +meta : { $$ = (*ConstObject)(nil) } - | constobject {} + | constobject + +body + : funcdefs + { + $$ = &Query{FuncDefs: reverseFuncDef($1.([]*FuncDef))} + } + | query funcdefs : @@ -138,30 +121,31 @@ funcdef { $$ = &FuncDef{Name: $2, Body: $4.(*Query)} } - | tokDef tokIdent '(' funcdefargs ')' ':' query ';' + | tokDef tokIdent '(' funcargs ')' ':' query ';' { $$ = &FuncDef{$2, $4.([]string), $7.(*Query)} } -funcdefargs +funcargs : tokIdentVariable { $$ = []string{$1} } - | funcdefargs ';' tokIdentVariable + | funcargs ';' tokIdentVariable { $$ = append($1.([]string), $3) } tokIdentVariable - : tokIdent {} - | tokVariable {} + : tokIdent + | tokVariable query - : funcdef query %prec tokFuncDefPost + : funcdef query %prec tokFuncDefQuery { - $2.(*Query).FuncDefs = prependFuncDef($2.(*Query).FuncDefs, $1.(*FuncDef)) - $$ = $2 + query := $2.(*Query) + query.FuncDefs = prependFuncDef(query.FuncDefs, $1.(*FuncDef)) + $$ = query } | query '|' query { @@ -169,86 +153,62 @@ query } | term tokAs bindpatterns '|' query { - $1.(*Term).SuffixList = append($1.(*Term).SuffixList, &Suffix{Bind: &Bind{$3.([]*Pattern), $5.(*Query)}}) - $$ = &Query{Term: $1.(*Term)} - } - | tokReduce term tokAs pattern '(' query ';' query ')' - { - $$ = &Query{Term: &Term{Type: TermTypeReduce, Reduce: &Reduce{$2.(*Term), $4.(*Pattern), $6.(*Query), $8.(*Query)}}} - } - | tokForeach term tokAs pattern '(' query ';' query ')' - { - $$ = &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{$2.(*Term), $4.(*Pattern), $6.(*Query), $8.(*Query), nil}}} - } - | tokForeach term tokAs pattern '(' query ';' query ';' query ')' - { - $$ = &Query{Term: &Term{Type: TermTypeForeach, Foreach: &Foreach{$2.(*Term), $4.(*Pattern), $6.(*Query), $8.(*Query), $10.(*Query)}}} - } - | tokIf query tokThen query ifelifs ifelse tokEnd - { - $$ = &Query{Term: &Term{Type: TermTypeIf, If: &If{$2.(*Query), $4.(*Query), $5.([]*IfElif), $6.(*Query)}}} - } - | tokTry query trycatch - { - $$ = &Query{Term: &Term{Type: TermTypeTry, Try: &Try{$2.(*Query), $3.(*Query)}}} + term := $1.(*Term) + term.SuffixList = append(term.SuffixList, &Suffix{Bind: &Bind{$3.([]*Pattern), $5.(*Query)}}) + $$ = &Query{Term: term} } | tokLabel tokVariable '|' query { $$ = &Query{Term: &Term{Type: TermTypeLabel, Label: &Label{$2, $4.(*Query)}}} } - | query '?' - { - if t := $1.(*Query).Term; t != nil { - t.SuffixList = append(t.SuffixList, &Suffix{Optional: true}) - } else { - $$ = &Query{Term: &Term{Type: TermTypeQuery, Query: $1.(*Query), SuffixList: []*Suffix{{Optional: true}}}} - } - } | query ',' query { $$ = &Query{Left: $1.(*Query), Op: OpComma, Right: $3.(*Query)} } - | query tokAltOp query + | expr %prec tokExpr + +expr + : expr tokAltOp expr { $$ = &Query{Left: $1.(*Query), Op: $2, Right: $3.(*Query)} } - | query tokUpdateOp query + | expr tokUpdateOp expr { $$ = &Query{Left: $1.(*Query), Op: $2, Right: $3.(*Query)} } - | query tokOrOp query + | expr tokOrOp expr { $$ = &Query{Left: $1.(*Query), Op: OpOr, Right: $3.(*Query)} } - | query tokAndOp query + | expr tokAndOp expr { $$ = &Query{Left: $1.(*Query), Op: OpAnd, Right: $3.(*Query)} } - | query tokCompareOp query + | expr tokCompareOp expr { $$ = &Query{Left: $1.(*Query), Op: $2, Right: $3.(*Query)} } - | query '+' query + | expr '+' expr { $$ = &Query{Left: $1.(*Query), Op: OpAdd, Right: $3.(*Query)} } - | query '-' query + | expr '-' expr { $$ = &Query{Left: $1.(*Query), Op: OpSub, Right: $3.(*Query)} } - | query '*' query + | expr '*' expr { $$ = &Query{Left: $1.(*Query), Op: OpMul, Right: $3.(*Query)} } - | query '/' query + | expr '/' expr { $$ = &Query{Left: $1.(*Query), Op: OpDiv, Right: $3.(*Query)} } - | query '%' query + | expr '%' expr { $$ = &Query{Left: $1.(*Query), Op: OpMod, Right: $3.(*Query)} } - | term %prec tokTermPost + | term %prec tokTerm { $$ = &Query{Term: $1.(*Term)} } @@ -330,10 +290,11 @@ term } | '.' suffix { - if $2.(*Suffix).Iter { - $$ = &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{$2.(*Suffix)}} + suffix := $2.(*Suffix) + if suffix.Iter { + $$ = &Term{Type: TermTypeIdentity, SuffixList: []*Suffix{suffix}} } else { - $$ = &Term{Type: TermTypeIndex, Index: $2.(*Suffix).Index} + $$ = &Term{Type: TermTypeIndex, Index: suffix.Index} } } | '.' string @@ -364,25 +325,29 @@ term { $$ = &Term{Type: TermTypeFunc, Func: &Func{Name: $1}} } - | tokNumber + | '{' '}' { - $$ = &Term{Type: TermTypeNumber, Number: $1} + $$ = &Term{Type: TermTypeObject, Object: &Object{}} } - | tokFormat + | '{' objectkeyvals '}' { - $$ = &Term{Type: TermTypeFormat, Format: $1} + $$ = &Term{Type: TermTypeObject, Object: &Object{$2.([]*ObjectKeyVal)}} } - | tokFormat string + | '{' objectkeyvals ',' '}' { - $$ = &Term{Type: TermTypeFormat, Format: $1, Str: $2.(*String)} + $$ = &Term{Type: TermTypeObject, Object: &Object{$2.([]*ObjectKeyVal)}} } - | string + | '[' ']' { - $$ = &Term{Type: TermTypeString, Str: $1.(*String)} + $$ = &Term{Type: TermTypeArray, Array: &Array{}} } - | '(' query ')' + | '[' query ']' { - $$ = &Term{Type: TermTypeQuery, Query: $2.(*Query)} + $$ = &Term{Type: TermTypeArray, Array: &Array{$2.(*Query)}} + } + | tokNumber + { + $$ = &Term{Type: TermTypeNumber, Number: $1} } | '+' term { @@ -392,30 +357,46 @@ term { $$ = &Term{Type: TermTypeUnary, Unary: &Unary{OpSub, $2.(*Term)}} } - | '{' '}' + | tokFormat { - $$ = &Term{Type: TermTypeObject, Object: &Object{}} + $$ = &Term{Type: TermTypeFormat, Format: $1} } - | '{' objectkeyvals '}' + | tokFormat string { - $$ = &Term{Type: TermTypeObject, Object: &Object{$2.([]*ObjectKeyVal)}} + $$ = &Term{Type: TermTypeFormat, Format: $1, Str: $2.(*String)} } - | '{' objectkeyvals ',' '}' + | string { - $$ = &Term{Type: TermTypeObject, Object: &Object{$2.([]*ObjectKeyVal)}} + $$ = &Term{Type: TermTypeString, Str: $1.(*String)} } - | '[' ']' + | tokIf query tokThen query ifelifs ifelse tokEnd { - $$ = &Term{Type: TermTypeArray, Array: &Array{}} + $$ = &Term{Type: TermTypeIf, If: &If{$2.(*Query), $4.(*Query), $5.([]*IfElif), $6.(*Query)}} } - | '[' query ']' + | tokTry expr trycatch { - $$ = &Term{Type: TermTypeArray, Array: &Array{$2.(*Query)}} + $$ = &Term{Type: TermTypeTry, Try: &Try{$2.(*Query), $3.(*Query)}} + } + | tokReduce expr tokAs pattern '(' query ';' query ')' + { + $$ = &Term{Type: TermTypeReduce, Reduce: &Reduce{$2.(*Query), $4.(*Pattern), $6.(*Query), $8.(*Query)}} + } + | tokForeach expr tokAs pattern '(' query ';' query ')' + { + $$ = &Term{Type: TermTypeForeach, Foreach: &Foreach{$2.(*Query), $4.(*Pattern), $6.(*Query), $8.(*Query), nil}} + } + | tokForeach expr tokAs pattern '(' query ';' query ';' query ')' + { + $$ = &Term{Type: TermTypeForeach, Foreach: &Foreach{$2.(*Query), $4.(*Pattern), $6.(*Query), $8.(*Query), $10.(*Query)}} } | tokBreak tokVariable { $$ = &Term{Type: TermTypeBreak, Break: $2} } + | '(' query ')' + { + $$ = &Term{Type: TermTypeQuery, Query: $2.(*Query)} + } | term tokIndex { $1.(*Term).SuffixList = append($1.(*Term).SuffixList, &Suffix{Index: &Index{Name: $2}}) @@ -463,12 +444,12 @@ stringparts } tokIdentModuleIdent - : tokIdent {} - | tokModuleIdent {} + : tokIdent + | tokModuleIdent tokVariableModuleVariable - : tokVariable {} - | tokModuleVariable {} + : tokVariable + | tokModuleVariable suffix : '[' ']' @@ -527,7 +508,7 @@ trycatch { $$ = (*Query)(nil) } - | tokCatch query + | tokCatch expr { $$ = $2 } @@ -545,15 +526,15 @@ objectkeyvals objectkeyval : objectkey ':' objectval { - $$ = &ObjectKeyVal{Key: $1, Val: $3.(*ObjectVal)} + $$ = &ObjectKeyVal{Key: $1, Val: $3.(*Query)} } | string ':' objectval { - $$ = &ObjectKeyVal{KeyString: $1.(*String), Val: $3.(*ObjectVal)} + $$ = &ObjectKeyVal{KeyString: $1.(*String), Val: $3.(*Query)} } | '(' query ')' ':' objectval { - $$ = &ObjectKeyVal{KeyQuery: $2.(*Query), Val: $5.(*ObjectVal)} + $$ = &ObjectKeyVal{KeyQuery: $2.(*Query), Val: $5.(*Query)} } | objectkey { @@ -565,19 +546,16 @@ objectkeyval } objectkey - : tokIdent {} - | tokVariable {} - | tokKeyword {} + : tokIdent + | tokVariable + | tokKeyword objectval - : term + : objectval '|' objectval { - $$ = &ObjectVal{[]*Query{{Term: $1.(*Term)}}} - } - | objectval '|' term - { - $$ = &ObjectVal{append($1.(*ObjectVal).Queries, &Query{Term: $3.(*Term)})} + $$ = &Query{Left: $1.(*Query), Op: OpPipe, Right: $3.(*Query)} } + | expr constterm : constobject @@ -668,26 +646,26 @@ constarrayelems } tokKeyword - : tokOrOp {} - | tokAndOp {} - | tokModule {} - | tokImport {} - | tokInclude {} - | tokDef {} - | tokAs {} - | tokLabel {} - | tokBreak {} - | tokNull {} - | tokTrue {} - | tokFalse {} - | tokIf {} - | tokThen {} - | tokElif {} - | tokElse {} - | tokEnd {} - | tokTry {} - | tokCatch {} - | tokReduce {} - | tokForeach {} + : tokOrOp + | tokAndOp + | tokModule + | tokImport + | tokInclude + | tokDef + | tokAs + | tokLabel + | tokBreak + | tokNull + | tokTrue + | tokFalse + | tokIf + | tokThen + | tokElif + | tokElse + | tokEnd + | tokTry + | tokCatch + | tokReduce + | tokForeach %% diff --git a/vendor/github.com/itchyny/gojq/query.go b/vendor/github.com/itchyny/gojq/query.go index 5f20b4ff..e7cf7789 100644 --- a/vendor/github.com/itchyny/gojq/query.go +++ b/vendor/github.com/itchyny/gojq/query.go @@ -5,6 +5,20 @@ import ( "strings" ) +// Parse a query string, and returns the query struct. +// +// If parsing failed, it returns an error of type [*ParseError], which has +// the byte offset and the invalid token. The byte offset is the scanned bytes +// when the error occurred. The token is empty if the error occurred after +// scanning the entire query string. +func Parse(src string) (*Query, error) { + l := newLexer(src) + if yyParse(l) > 0 { + return nil, l.err + } + return l.result, nil +} + // Query represents the abstract syntax tree of a jq query. type Query struct { Meta *ConstObject @@ -49,13 +63,8 @@ func (e *Query) writeTo(s *strings.Builder) { for _, im := range e.Imports { im.writeTo(s) } - for i, fd := range e.FuncDefs { - if i > 0 { - s.WriteByte(' ') - } + for _, fd := range e.FuncDefs { fd.writeTo(s) - } - if len(e.FuncDefs) > 0 { s.WriteByte(' ') } if e.Func != "" { @@ -660,7 +669,7 @@ type ObjectKeyVal struct { Key string KeyString *String KeyQuery *Query - Val *ObjectVal + Val *Query } func (e *ObjectKeyVal) String() string { @@ -696,32 +705,6 @@ func (e *ObjectKeyVal) minify() { } } -// ObjectVal ... -type ObjectVal struct { - Queries []*Query -} - -func (e *ObjectVal) String() string { - var s strings.Builder - e.writeTo(&s) - return s.String() -} - -func (e *ObjectVal) writeTo(s *strings.Builder) { - for i, e := range e.Queries { - if i > 0 { - s.WriteString(" | ") - } - e.writeTo(s) - } -} - -func (e *ObjectVal) minify() { - for _, e := range e.Queries { - e.minify() - } -} - // Array ... type Array struct { Query *Query @@ -929,7 +912,7 @@ func (e *Try) minify() { // Reduce ... type Reduce struct { - Term *Term + Query *Query Pattern *Pattern Start *Query Update *Query @@ -943,7 +926,7 @@ func (e *Reduce) String() string { func (e *Reduce) writeTo(s *strings.Builder) { s.WriteString("reduce ") - e.Term.writeTo(s) + e.Query.writeTo(s) s.WriteString(" as ") e.Pattern.writeTo(s) s.WriteString(" (") @@ -954,14 +937,14 @@ func (e *Reduce) writeTo(s *strings.Builder) { } func (e *Reduce) minify() { - e.Term.minify() + e.Query.minify() e.Start.minify() e.Update.minify() } // Foreach ... type Foreach struct { - Term *Term + Query *Query Pattern *Pattern Start *Query Update *Query @@ -976,7 +959,7 @@ func (e *Foreach) String() string { func (e *Foreach) writeTo(s *strings.Builder) { s.WriteString("foreach ") - e.Term.writeTo(s) + e.Query.writeTo(s) s.WriteString(" as ") e.Pattern.writeTo(s) s.WriteString(" (") @@ -991,7 +974,7 @@ func (e *Foreach) writeTo(s *strings.Builder) { } func (e *Foreach) minify() { - e.Term.minify() + e.Query.minify() e.Start.minify() e.Update.minify() if e.Extract != nil { @@ -1075,6 +1058,14 @@ func (e *ConstTerm) toValue() any { } } +func (e *ConstTerm) toString() (string, bool) { + if e.Object != nil || e.Array != nil || + e.Number != "" || e.Null || e.True || e.False { + return "", false + } + return e.Str, true +} + // ConstObject ... type ConstObject struct { KeyVals []*ConstObjectKeyVal @@ -1134,7 +1125,7 @@ func (e *ConstObjectKeyVal) writeTo(s *strings.Builder) { if e.Key != "" { s.WriteString(e.Key) } else { - s.WriteString(e.KeyString) + jsonEncodeString(s, e.KeyString) } s.WriteString(": ") e.Val.writeTo(s) diff --git a/vendor/github.com/itchyny/gojq/release.go b/vendor/github.com/itchyny/gojq/release.go index c34dfb45..07fc7167 100644 --- a/vendor/github.com/itchyny/gojq/release.go +++ b/vendor/github.com/itchyny/gojq/release.go @@ -5,12 +5,12 @@ package gojq type codeinfo struct{} -func (c *compiler) appendCodeInfo(any) {} +func (*compiler) appendCodeInfo(any) {} -func (c *compiler) deleteCodeInfo(string) {} +func (*compiler) deleteCodeInfo(string) {} -func (env *env) debugCodes() {} +func (*env) debugCodes() {} -func (env *env) debugState(int, bool) {} +func (*env) debugState(int, bool) {} -func (env *env) debugForks(int, string) {} +func (*env) debugForks(int, string) {} diff --git a/vendor/github.com/itchyny/timefmt-go/CHANGELOG.md b/vendor/github.com/itchyny/timefmt-go/CHANGELOG.md index 61a4e9dc..d863ac3b 100644 --- a/vendor/github.com/itchyny/timefmt-go/CHANGELOG.md +++ b/vendor/github.com/itchyny/timefmt-go/CHANGELOG.md @@ -1,4 +1,9 @@ # Changelog +## [v0.1.6](https://github.com/itchyny/timefmt-go/compare/v0.1.5..v0.1.6) (2024-06-01) +* support parsing week directives (`%A`, `%a`, `%w`, `%u`, `%V`, `%U`, `%W`) +* validate range of values on parsing directives +* fix formatting `%l` to show `12` at midnight + ## [v0.1.5](https://github.com/itchyny/timefmt-go/compare/v0.1.4..v0.1.5) (2022-12-01) * support parsing time zone offset with name using both `%z` and `%Z` diff --git a/vendor/github.com/itchyny/timefmt-go/README.md b/vendor/github.com/itchyny/timefmt-go/README.md index f01af961..9c028c74 100644 --- a/vendor/github.com/itchyny/timefmt-go/README.md +++ b/vendor/github.com/itchyny/timefmt-go/README.md @@ -1,5 +1,5 @@ # timefmt-go -[![CI Status](https://github.com/itchyny/timefmt-go/workflows/CI/badge.svg)](https://github.com/itchyny/timefmt-go/actions) +[![CI Status](https://github.com/itchyny/timefmt-go/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/itchyny/timefmt-go/actions?query=branch:main) [![Go Report Card](https://goreportcard.com/badge/github.com/itchyny/timefmt-go)](https://goreportcard.com/report/github.com/itchyny/timefmt-go) [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/itchyny/timefmt-go/blob/main/LICENSE) [![release](https://img.shields.io/github/release/itchyny/timefmt-go/all.svg)](https://github.com/itchyny/timefmt-go/releases) @@ -54,7 +54,7 @@ Note that `E` and `O` modifier characters are not supported. - `Parse` (`strptime`) allows to parse - composed directives like `%F %T`, - century years like `%C %y`, - - week names like `%A` `%a` (parsed results are discarded). + - week directives like `%W %a` and `%G-W%V-%u`. - `ParseInLocation` is provided for configuring the default location. ![](https://user-images.githubusercontent.com/375258/88606920-de475c80-d0b8-11ea-8d40-cbfee9e35c2e.jpg) @@ -63,7 +63,7 @@ Note that `E` and `O` modifier characters are not supported. Report bug at [Issues・itchyny/timefmt-go - GitHub](https://github.com/itchyny/timefmt-go/issues). ## Author -itchyny (https://github.com/itchyny) +itchyny () ## License This software is released under the MIT License, see LICENSE. diff --git a/vendor/github.com/itchyny/timefmt-go/format.go b/vendor/github.com/itchyny/timefmt-go/format.go index eea976ee..b38202df 100644 --- a/vendor/github.com/itchyny/timefmt-go/format.go +++ b/vendor/github.com/itchyny/timefmt-go/format.go @@ -132,19 +132,13 @@ func AppendFormat(buf []byte, t time.Time, format string) []byte { case 'a': buf = appendString(buf, shortWeekNames[t.Weekday()], width, padding, upper, swap) case 'w': - for ; width > 1; width-- { - buf = append(buf, padding&paddingMask) - } - buf = append(buf, '0'+byte(t.Weekday())) + buf = appendInt(buf, int(t.Weekday()), width, padding) case 'u': w := int(t.Weekday()) if w == 0 { w = 7 } - for ; width > 1; width-- { - buf = append(buf, padding&paddingMask) - } - buf = append(buf, '0'+byte(w)) + buf = appendInt(buf, w, width, padding) case 'V': if width < 2 { width = 2 @@ -193,17 +187,10 @@ func AppendFormat(buf []byte, t time.Time, format string) []byte { } buf = appendInt(buf, hour, width, padding) case 'l': - if width < 2 { - width = 2 - } if padding < ^paddingMask { padding = ' ' } - h := hour - if h > 12 { - h -= 12 - } - buf = appendInt(buf, h, width, padding) + fallthrough case 'I': if width < 2 { width = 2 @@ -215,18 +202,15 @@ func AppendFormat(buf []byte, t time.Time, format string) []byte { h = 12 } buf = appendInt(buf, h, width, padding) + case 'P': + swap = !(upper || swap) + fallthrough case 'p': if hour < 12 { buf = appendString(buf, "AM", width, padding, upper, swap) } else { buf = appendString(buf, "PM", width, padding, upper, swap) } - case 'P': - if hour < 12 { - buf = appendString(buf, "am", width, padding, upper, swap) - } else { - buf = appendString(buf, "pm", width, padding, upper, swap) - } case 'M': if width < 2 { width = 2 @@ -271,14 +255,14 @@ func AppendFormat(buf []byte, t time.Time, format string) []byte { if buf[k] == ' ' { buf[k-1], buf[k] = buf[k], buf[k-1] } - if k = offset % 3600; colons <= 2 || k != 0 { + if offset %= 3600; colons <= 2 || offset != 0 { if colons != 0 { buf = append(buf, ':') } - buf = appendInt(buf, k/60, 2, '0') - if k %= 60; colons == 2 || colons == 3 && k != 0 { + buf = appendInt(buf, offset/60, 2, '0') + if offset %= 60; colons == 2 || colons == 3 && offset != 0 { buf = append(buf, ':') - buf = appendInt(buf, k, 2, '0') + buf = appendInt(buf, offset, 2, '0') } } colons = 0 @@ -294,9 +278,7 @@ func AppendFormat(buf []byte, t time.Time, format string) []byte { copy(buf[k:], buf[j:]) buf = buf[:l] if padding&paddingMask == '0' { - for ; k > i; k-- { - buf[k-1], buf[k] = buf[k], buf[k-1] - } + buf[i], buf[k] = buf[k], buf[i] } } case ':': @@ -444,7 +426,7 @@ func appendString(buf []byte, str string, width int, padding byte, upper, swap b } switch { case swap: - if str[len(str)-1] < 'a' { + if str[1] < 'a' { for _, b := range []byte(str) { buf = append(buf, b|0x20) } diff --git a/vendor/github.com/itchyny/timefmt-go/parse.go b/vendor/github.com/itchyny/timefmt-go/parse.go index 83b0df2c..26ae0f0c 100644 --- a/vendor/github.com/itchyny/timefmt-go/parse.go +++ b/vendor/github.com/itchyny/timefmt-go/parse.go @@ -3,6 +3,7 @@ package timefmt import ( "errors" "fmt" + "math" "time" ) @@ -18,31 +19,32 @@ func ParseInLocation(source, format string, loc *time.Location) (t time.Time, er } func parse(source, format string, loc, base *time.Location) (t time.Time, err error) { - year, month, day, hour, min, sec, nsec := 1900, 1, 1, 0, 0, 0, 0 + year, month, day, hour, min, sec, nsec := 1900, 1, 0, 0, 0, 0, 0 defer func() { if err != nil { err = fmt.Errorf("failed to parse %q with %q: %w", source, format, err) } }() - var j, century, yday, colons int - var pm, hasZoneName, hasZoneOffset bool + var j, week, weekday, yday, colons int + century, weekstart := -1, time.Weekday(-1) + var pm, hasISOYear, hasZoneName, hasZoneOffset bool var pending string for i, l := 0, len(source); i < len(format); i++ { if b := format[i]; b == '%' { i++ if i == len(format) { - err = errors.New("stray %") + err = errors.New(`stray "%"`) return } b = format[i] L: switch b { case 'Y': - if year, j, err = parseNumber(source, j, 4, 'Y'); err != nil { + if year, j, err = parseNumber(source, j, 4, 0, 9999, 'Y'); err != nil { return } case 'y': - if year, j, err = parseNumber(source, j, 2, 'y'); err != nil { + if year, j, err = parseNumber(source, j, 2, 0, 99, 'y'); err != nil { return } if year < 69 { @@ -51,65 +53,85 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er year += 1900 } case 'C': - if century, j, err = parseNumber(source, j, 2, 'C'); err != nil { + if century, j, err = parseNumber(source, j, 2, 0, 99, 'C'); err != nil { return } case 'g': - if year, j, err = parseNumber(source, j, 2, b); err != nil { + if year, j, err = parseNumber(source, j, 2, 0, 99, b); err != nil { return } year += 2000 + hasISOYear = true case 'G': - if year, j, err = parseNumber(source, j, 4, b); err != nil { + if year, j, err = parseNumber(source, j, 4, 0, 9999, b); err != nil { return } + hasISOYear = true case 'm': - if month, j, err = parseNumber(source, j, 2, 'm'); err != nil { + if month, j, err = parseNumber(source, j, 2, 1, 12, 'm'); err != nil { return } case 'B': - if month, j, err = lookup(source, j, longMonthNames, 'B'); err != nil { + if month, j, err = parseAny(source, j, longMonthNames, 'B'); err != nil { return } case 'b', 'h': - if month, j, err = lookup(source, j, shortMonthNames, b); err != nil { + if month, j, err = parseAny(source, j, shortMonthNames, b); err != nil { return } case 'A': - if _, j, err = lookup(source, j, longWeekNames, 'A'); err != nil { + if weekday, j, err = parseAny(source, j, longWeekNames, 'A'); err != nil { return } case 'a': - if _, j, err = lookup(source, j, shortWeekNames, 'a'); err != nil { + if weekday, j, err = parseAny(source, j, shortWeekNames, 'a'); err != nil { return } case 'w': - if j >= l || source[j] < '0' || '6' < source[j] { - err = parseFormatError(b) + if weekday, j, err = parseNumber(source, j, 1, 0, 6, 'w'); err != nil { return } - j++ + weekday++ case 'u': - if j >= l || source[j] < '1' || '7' < source[j] { - err = parseFormatError(b) + if weekday, j, err = parseNumber(source, j, 1, 1, 7, 'u'); err != nil { return } - j++ - case 'V', 'U', 'W': - if _, j, err = parseNumber(source, j, 2, b); err != nil { + weekday = weekday%7 + 1 + case 'V': + if week, j, err = parseNumber(source, j, 2, 1, 53, b); err != nil { + return + } + weekstart = time.Thursday + if weekday == 0 { + weekday = 2 + } + case 'U': + if week, j, err = parseNumber(source, j, 2, 0, 53, b); err != nil { return } + weekstart = time.Sunday + if weekday == 0 { + weekday = 1 + } + case 'W': + if week, j, err = parseNumber(source, j, 2, 0, 53, b); err != nil { + return + } + weekstart = time.Monday + if weekday == 0 { + weekday = 2 + } case 'e': if j < l && source[j] == ' ' { j++ } fallthrough case 'd': - if day, j, err = parseNumber(source, j, 2, b); err != nil { + if day, j, err = parseNumber(source, j, 2, 1, 31, b); err != nil { return } case 'j': - if yday, j, err = parseNumber(source, j, 3, 'j'); err != nil { + if yday, j, err = parseNumber(source, j, 3, 1, 366, 'j'); err != nil { return } case 'k': @@ -118,7 +140,7 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er } fallthrough case 'H': - if hour, j, err = parseNumber(source, j, 2, b); err != nil { + if hour, j, err = parseNumber(source, j, 2, 0, 23, b); err != nil { return } case 'l': @@ -127,29 +149,29 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er } fallthrough case 'I': - if hour, j, err = parseNumber(source, j, 2, b); err != nil { + if hour, j, err = parseNumber(source, j, 2, 1, 12, b); err != nil { return } if hour == 12 { hour = 0 } - case 'p', 'P': + case 'P', 'p': var ampm int - if ampm, j, err = lookup(source, j, []string{"AM", "PM"}, 'p'); err != nil { + if ampm, j, err = parseAny(source, j, []string{"AM", "PM"}, b); err != nil { return } pm = ampm == 2 case 'M': - if min, j, err = parseNumber(source, j, 2, 'M'); err != nil { + if min, j, err = parseNumber(source, j, 2, 0, 59, 'M'); err != nil { return } case 'S': - if sec, j, err = parseNumber(source, j, 2, 'S'); err != nil { + if sec, j, err = parseNumber(source, j, 2, 0, 60, 'S'); err != nil { return } case 's': var unix int - if unix, j, err = parseNumber(source, j, 10, 's'); err != nil { + if unix, j, err = parseNumber(source, j, 10, 0, math.MaxInt, 's'); err != nil { return } t = time.Unix(int64(unix), 0).In(time.UTC) @@ -158,24 +180,24 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er hour, min, sec = t.Clock() month = int(mon) case 'f': - var usec, k, d int - if usec, k, err = parseNumber(source, j, 6, 'f'); err != nil { + usec, i := 0, j + if usec, j, err = parseNumber(source, j, 6, 0, 999999, 'f'); err != nil { return } - for j, d = k, k-j; d < 6; d++ { + for i = j - i; i < 6; i++ { usec *= 10 } nsec = usec * 1000 case 'Z': - k := j - for ; k < l; k++ { - if c := source[k]; c < 'A' || 'Z' < c { + i := j + for ; j < l; j++ { + if c := source[j]; c < 'A' || 'Z' < c { break } } - t, err = time.ParseInLocation("MST", source[j:k], base) + t, err = time.ParseInLocation("MST", source[i:j], base) if err != nil { - err = fmt.Errorf(`cannot parse %q with "%%Z"`, source[j:k]) + err = fmt.Errorf(`cannot parse %q with "%%Z"`, source[i:j]) return } if hasZoneOffset { @@ -186,7 +208,6 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er loc = t.Location() } hasZoneName = true - j = k case 'z': if j >= l { err = parseZFormatError(colons) @@ -198,44 +219,41 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er sign = -1 fallthrough case '+': - var hour, min, sec, k int - if hour, k, _ = parseNumber(source, j+1, 2, 'z'); k != j+3 { + hour, min, sec, i := 0, 0, 0, j + if hour, j, _ = parseNumber(source, j+1, 2, 0, 23, 'z'); j != i+3 { err = parseZFormatError(colons) return } - if j = k; j >= l || source[j] != ':' { - switch colons { - case 1: - err = errors.New("expected ':' for %:z") - return - case 2: - err = errors.New("expected ':' for %::z") + if j >= l || source[j] != ':' { + if colons > 0 { + err = expectedColonForZFormatError(colons) return } } else if j++; colons == 0 { colons = 4 } - if min, k, _ = parseNumber(source, j, 2, 'z'); k != j+2 { - if colons == 0 { - k = j - } else { + i = j + if min, j, _ = parseNumber(source, j, 2, 0, 59, 'z'); j != i+2 { + if colons > 0 { err = parseZFormatError(colons & 3) return } - } - if j = k; colons > 1 { + j = i + } else if colons > 1 { if j >= l || source[j] != ':' { if colons == 2 { - err = errors.New("expected ':' for %::z") - return - } - } else if sec, k, _ = parseNumber(source, j+1, 2, 'z'); k != j+3 { - if colons == 2 { - err = parseZFormatError(colons) + err = expectedColonForZFormatError(colons) return } } else { - j = k + i = j + if sec, j, _ = parseNumber(source, j+1, 2, 0, 59, 'z'); j != i+3 { + if colons == 2 { + err = parseZFormatError(colons) + return + } + j = i + } } } var name string @@ -258,40 +276,32 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er } j++ } else { - if i++; i == len(format) { - err = errors.New(`expected 'z' after "%:"`) - return - } else if b = format[i]; b == 'z' { - colons = 1 - } else if b != ':' { - err = errors.New(`expected 'z' after "%:"`) - return - } else if i++; i == len(format) { - err = errors.New(`expected 'z' after "%::"`) - return - } else if b = format[i]; b == 'z' { - colons = 2 - } else { - err = errors.New(`expected 'z' after "%::"`) - return + for colons = 1; colons <= 2; colons++ { + if i++; i == len(format) { + break + } else if b = format[i]; b == 'z' { + goto L + } else if b != ':' || colons == 2 { + break + } } - goto L + err = expectedZAfterColonError(colons) + return } case 't', 'n': - k := j + i := j K: - for ; k < l; k++ { - switch source[k] { + for ; j < l; j++ { + switch source[j] { case ' ', '\t', '\n', '\v', '\f', '\r': default: break K } } - if k == j { - err = fmt.Errorf("expected a space for %%%c", b) + if i == j { + err = fmt.Errorf(`expected a space for "%%%c"`, b) return } - j = k case '%': if j >= l || source[j] != b { err = expectedFormatError(b) @@ -304,7 +314,7 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er if pending, ok = compositions[b]; ok { break } - err = fmt.Errorf(`unexpected format: "%%%c"`, b) + err = fmt.Errorf(`unexpected format "%%%c"`, b) return } if j >= l || source[j] != b { @@ -317,7 +327,7 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er b, pending = pending[0], pending[1:] goto L } - } else if j >= len(source) || source[j] != b { + } else if j >= l || source[j] != b { err = expectedFormatError(b) return } else { @@ -325,17 +335,40 @@ func parse(source, format string, loc, base *time.Location) (t time.Time, err er } } if j < len(source) { - err = fmt.Errorf("unconverted string: %q", source[j:]) + err = fmt.Errorf("unparsed string %q", source[j:]) return } if pm { hour += 12 } - if century > 0 { + if century >= 0 { year = century*100 + year%100 } - if yday > 0 { - return time.Date(year, time.January, 1, hour, min, sec, nsec, loc).AddDate(0, 0, yday-1), nil + if day == 0 { + if yday > 0 { + if hasISOYear { + err = errors.New(`use "%Y" to parse non-ISO year for "%j"`) + return + } + return time.Date(year, time.January, yday, hour, min, sec, nsec, loc), nil + } + if weekstart >= time.Sunday { + if weekstart == time.Thursday { + if !hasISOYear { + err = errors.New(`use "%G" to parse ISO year for "%V"`) + return + } + } else if hasISOYear { + err = errors.New(`use "%Y" to parse non-ISO year for "%U" or "%W"`) + return + } + if weekstart > time.Sunday && weekday == 1 { + week++ + } + t := time.Date(year, time.January, -int(weekstart), hour, min, sec, nsec, loc) + return t.AddDate(0, 0, week*7-int(t.Weekday())+weekday-1), nil + } + day = 1 } return time.Date(year, time.Month(month), day, hour, min, sec, nsec, loc), nil } @@ -347,7 +380,7 @@ func locationZone(loc *time.Location) (name string, offset int) { type parseFormatError byte func (err parseFormatError) Error() string { - return fmt.Sprintf("cannot parse %%%c", byte(err)) + return fmt.Sprintf(`cannot parse "%%%c"`, byte(err)) } type expectedFormatError byte @@ -359,46 +392,51 @@ func (err expectedFormatError) Error() string { type parseZFormatError int func (err parseZFormatError) Error() string { - switch int(err) { - case 0: - return "cannot parse %z" - case 1: - return "cannot parse %:z" - default: - return "cannot parse %::z" - } + return `cannot parse "%` + `::z"`[2-err:] +} + +type expectedColonForZFormatError int + +func (err expectedColonForZFormatError) Error() string { + return `expected ':' for "%` + `::z"`[2-err:] +} + +type expectedZAfterColonError int + +func (err expectedZAfterColonError) Error() string { + return `expected 'z' after "%` + `::"`[2-err:] } -func parseNumber(source string, min, size int, format byte) (int, int, error) { - var val int - if l := len(source); min+size > l { +func parseNumber(source string, index, size, min, max int, format byte) (int, int, error) { + var value int + if l := len(source); index+size > l { size = l } else { - size += min + size += index } - i := min + i := index for ; i < size; i++ { if b := source[i]; '0' <= b && b <= '9' { - val = val*10 + int(b&0x0F) + value = value*10 + int(b&0x0F) } else { break } } - if i == min { + if i == index || value < min || max < value { return 0, 0, parseFormatError(format) } - return val, i, nil + return value, i, nil } -func lookup(source string, min int, candidates []string, format byte) (int, int, error) { +func parseAny(source string, index int, candidates []string, format byte) (int, int, error) { L: for i, xs := range candidates { - j := min + j := index for k := 0; k < len(xs); k, j = k+1, j+1 { if j >= len(source) { continue L } - if x, y := xs[k], source[j]; x != y && x|('a'-'A') != y|('a'-'A') { + if x, y := xs[k], source[j]; x != y && x|0x20 != y|0x20 { continue L } } diff --git a/vendor/github.com/jackc/chunkreader/v2/.travis.yml b/vendor/github.com/jackc/chunkreader/v2/.travis.yml deleted file mode 100644 index e176228e..00000000 --- a/vendor/github.com/jackc/chunkreader/v2/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: go - -go: - - 1.x - - tip - -matrix: - allow_failures: - - go: tip diff --git a/vendor/github.com/jackc/chunkreader/v2/README.md b/vendor/github.com/jackc/chunkreader/v2/README.md deleted file mode 100644 index 01209bfa..00000000 --- a/vendor/github.com/jackc/chunkreader/v2/README.md +++ /dev/null @@ -1,8 +0,0 @@ -[![](https://godoc.org/github.com/jackc/chunkreader?status.svg)](https://godoc.org/github.com/jackc/chunkreader) -[![Build Status](https://travis-ci.org/jackc/chunkreader.svg)](https://travis-ci.org/jackc/chunkreader) - -# chunkreader - -Package chunkreader provides an io.Reader wrapper that minimizes IO reads and memory allocations. - -Extracted from original implementation in https://github.com/jackc/pgx. diff --git a/vendor/github.com/jackc/chunkreader/v2/chunkreader.go b/vendor/github.com/jackc/chunkreader/v2/chunkreader.go deleted file mode 100644 index afea1c52..00000000 --- a/vendor/github.com/jackc/chunkreader/v2/chunkreader.go +++ /dev/null @@ -1,104 +0,0 @@ -// Package chunkreader provides an io.Reader wrapper that minimizes IO reads and memory allocations. -package chunkreader - -import ( - "io" -) - -// ChunkReader is a io.Reader wrapper that minimizes IO reads and memory allocations. It allocates memory in chunks and -// will read as much as will fit in the current buffer in a single call regardless of how large a read is actually -// requested. The memory returned via Next is owned by the caller. This avoids the need for an additional copy. -// -// The downside of this approach is that a large buffer can be pinned in memory even if only a small slice is -// referenced. For example, an entire 4096 byte block could be pinned in memory by even a 1 byte slice. In these rare -// cases it would be advantageous to copy the bytes to another slice. -type ChunkReader struct { - r io.Reader - - buf []byte - rp, wp int // buf read position and write position - - config Config -} - -// Config contains configuration parameters for ChunkReader. -type Config struct { - MinBufLen int // Minimum buffer length -} - -// New creates and returns a new ChunkReader for r with default configuration. -func New(r io.Reader) *ChunkReader { - cr, err := NewConfig(r, Config{}) - if err != nil { - panic("default config can't be bad") - } - - return cr -} - -// NewConfig creates and a new ChunkReader for r configured by config. -func NewConfig(r io.Reader, config Config) (*ChunkReader, error) { - if config.MinBufLen == 0 { - // By historical reasons Postgres currently has 8KB send buffer inside, - // so here we want to have at least the same size buffer. - // @see https://github.com/postgres/postgres/blob/249d64999615802752940e017ee5166e726bc7cd/src/backend/libpq/pqcomm.c#L134 - // @see https://www.postgresql.org/message-id/0cdc5485-cb3c-5e16-4a46-e3b2f7a41322%40ya.ru - config.MinBufLen = 8192 - } - - return &ChunkReader{ - r: r, - buf: make([]byte, config.MinBufLen), - config: config, - }, nil -} - -// Next returns buf filled with the next n bytes. The caller gains ownership of buf. It is not necessary to make a copy -// of buf. If an error occurs, buf will be nil. -func (r *ChunkReader) Next(n int) (buf []byte, err error) { - // n bytes already in buf - if (r.wp - r.rp) >= n { - buf = r.buf[r.rp : r.rp+n] - r.rp += n - return buf, err - } - - // available space in buf is less than n - if len(r.buf) < n { - r.copyBufContents(r.newBuf(n)) - } - - // buf is large enough, but need to shift filled area to start to make enough contiguous space - minReadCount := n - (r.wp - r.rp) - if (len(r.buf) - r.wp) < minReadCount { - newBuf := r.newBuf(n) - r.copyBufContents(newBuf) - } - - if err := r.appendAtLeast(minReadCount); err != nil { - return nil, err - } - - buf = r.buf[r.rp : r.rp+n] - r.rp += n - return buf, nil -} - -func (r *ChunkReader) appendAtLeast(fillLen int) error { - n, err := io.ReadAtLeast(r.r, r.buf[r.wp:], fillLen) - r.wp += n - return err -} - -func (r *ChunkReader) newBuf(size int) []byte { - if size < r.config.MinBufLen { - size = r.config.MinBufLen - } - return make([]byte, size) -} - -func (r *ChunkReader) copyBufContents(dest []byte) { - r.wp = copy(dest, r.buf[r.rp:r.wp]) - r.rp = 0 - r.buf = dest -} diff --git a/vendor/github.com/jackc/pgconn/.gitignore b/vendor/github.com/jackc/pgconn/.gitignore deleted file mode 100644 index e980f555..00000000 --- a/vendor/github.com/jackc/pgconn/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -.envrc -vendor/ -.vscode diff --git a/vendor/github.com/jackc/pgconn/CHANGELOG.md b/vendor/github.com/jackc/pgconn/CHANGELOG.md deleted file mode 100644 index 519996c8..00000000 --- a/vendor/github.com/jackc/pgconn/CHANGELOG.md +++ /dev/null @@ -1,177 +0,0 @@ -# 1.14.3 (March 4, 2024) - -* Update golang.org/x/crypto and golang.org/x/text - -# 1.14.2 (March 4, 2024) - -* Fix CVE-2024-27304. SQL injection can occur if an attacker can cause a single query or bind message to exceed 4 GB in -size. An integer overflow in the calculated message size can cause the one large message to be sent as multiple messages -under the attacker's control. - -# 1.14.1 (July 19, 2023) - -* Fix: Enable failover efforts when pg_hba.conf disallows non-ssl connections (Brandon Kauffman) -* Fix: connect_timeout is not obeyed for sslmode=allow|prefer (smaher-edb) -* Optimize redundant pgpass parsing in case password is explicitly set (Aleksandr Alekseev) - -# 1.14.0 (February 11, 2023) - -* Fix: each connection attempt to new node gets own timeout (Nathan Giardina) -* Set SNI for SSL connections (Stas Kelvich) -* Fix: CopyFrom I/O race (Tommy Reilly) -* Minor dependency upgrades - -# 1.13.0 (August 6, 2022) - -* Add sslpassword support (Eric McCormack and yun.xu) -* Add prefer-standby target_session_attrs support (sergey.bashilov) -* Fix GSS ErrorResponse handling (Oliver Tan) - -# 1.12.1 (May 7, 2022) - -* Fix: setting krbspn and krbsrvname in connection string (sireax) -* Add support for Unix sockets on Windows (Eno Compton) -* Stop ignoring ErrorResponse during SCRAM auth (Rafi Shamim) - -# 1.12.0 (April 21, 2022) - -* Add pluggable GSSAPI support (Oliver Tan) -* Fix: Consider any "0A000" error a possible cached plan changed error due to locale -* Better match psql fallback behavior with multiple hosts - -# 1.11.0 (February 7, 2022) - -* Support port in ip from LookupFunc to override config (James Hartig) -* Fix TLS connection timeout (Blake Embrey) -* Add support for read-only, primary, standby, prefer-standby target_session_attributes (Oscar) -* Fix connect when receiving NoticeResponse - -# 1.10.1 (November 20, 2021) - -* Close without waiting for response (Kei Kamikawa) -* Save waiting for network round-trip in CopyFrom (Rueian) -* Fix concurrency issue with ContextWatcher -* LRU.Get always checks context for cancellation / expiration (Georges Varouchas) - -# 1.10.0 (July 24, 2021) - -* net.Timeout errors are no longer returned when a query is canceled via context. A wrapped context error is returned. - -# 1.9.0 (July 10, 2021) - -* pgconn.Timeout only is true for errors originating in pgconn (Michael Darr) -* Add defaults for sslcert, sslkey, and sslrootcert (Joshua Brindle) -* Solve issue with 'sslmode=verify-full' when there are multiple hosts (mgoddard) -* Fix default host when parsing URL without host but with port -* Allow dbname query parameter in URL conn string -* Update underlying dependencies - -# 1.8.1 (March 25, 2021) - -* Better connection string sanitization (ip.novikov) -* Use proper pgpass location on Windows (Moshe Katz) -* Use errors instead of golang.org/x/xerrors -* Resume fallback on server error in Connect (Andrey Borodin) - -# 1.8.0 (December 3, 2020) - -* Add StatementErrored method to stmtcache.Cache. This allows the cache to purge invalidated prepared statements. (Ethan Pailes) - -# 1.7.2 (November 3, 2020) - -* Fix data value slices into work buffer with capacities larger than length. - -# 1.7.1 (October 31, 2020) - -* Do not asyncClose after receiving FATAL error from PostgreSQL server - -# 1.7.0 (September 26, 2020) - -* Exec(Params|Prepared) return ResultReader with FieldDescriptions loaded -* Add ReceiveResults (Sebastiaan Mannem) -* Fix parsing DSN connection with bad backslash -* Add PgConn.CleanupDone so connection pools can determine when async close is complete - -# 1.6.4 (July 29, 2020) - -* Fix deadlock on error after CommandComplete but before ReadyForQuery -* Fix panic on parsing DSN with trailing '=' - -# 1.6.3 (July 22, 2020) - -* Fix error message after AppendCertsFromPEM failure (vahid-sohrabloo) - -# 1.6.2 (July 14, 2020) - -* Update pgservicefile library - -# 1.6.1 (June 27, 2020) - -* Update golang.org/x/crypto to latest -* Update golang.org/x/text to 0.3.3 -* Fix error handling for bad PGSERVICE definition -* Redact passwords in ParseConfig errors (Lukas Vogel) - -# 1.6.0 (June 6, 2020) - -* Fix panic when closing conn during cancellable query -* Fix behavior of sslmode=require with sslrootcert present (Petr Jediný) -* Fix field descriptions available after command concluded (Tobias Salzmann) -* Support connect_timeout (georgysavva) -* Handle IPv6 in connection URLs (Lukas Vogel) -* Fix ValidateConnect with cancelable context -* Improve CopyFrom performance -* Add Config.Copy (georgysavva) - -# 1.5.0 (March 30, 2020) - -* Update golang.org/x/crypto for security fix -* Implement "verify-ca" SSL mode (Greg Curtis) - -# 1.4.0 (March 7, 2020) - -* Fix ExecParams and ExecPrepared handling of empty query. -* Support reading config from PostgreSQL service files. - -# 1.3.2 (February 14, 2020) - -* Update chunkreader to v2.0.1 for optimized default buffer size. - -# 1.3.1 (February 5, 2020) - -* Fix CopyFrom deadlock when multiple NoticeResponse received during copy - -# 1.3.0 (January 23, 2020) - -* Add Hijack and Construct. -* Update pgproto3 to v2.0.1. - -# 1.2.1 (January 13, 2020) - -* Fix data race in context cancellation introduced in v1.2.0. - -# 1.2.0 (January 11, 2020) - -## Features - -* Add Insert(), Update(), Delete(), and Select() statement type query methods to CommandTag. -* Add PgError.SQLState method. This could be used for compatibility with other drivers and databases. - -## Performance - -* Improve performance when context.Background() is used. (bakape) -* CommandTag.RowsAffected is faster and does not allocate. - -## Fixes - -* Try to cancel any in-progress query when a conn is closed by ctx cancel. -* Handle NoticeResponse during CopyFrom. -* Ignore errors sending Terminate message while closing connection. This mimics the behavior of libpq PGfinish. - -# 1.1.0 (October 12, 2019) - -* Add PgConn.IsBusy() method. - -# 1.0.1 (September 19, 2019) - -* Fix statement cache not properly cleaning discarded statements. diff --git a/vendor/github.com/jackc/pgconn/LICENSE b/vendor/github.com/jackc/pgconn/LICENSE deleted file mode 100644 index aebadd6c..00000000 --- a/vendor/github.com/jackc/pgconn/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2019-2021 Jack Christensen - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/jackc/pgconn/README.md b/vendor/github.com/jackc/pgconn/README.md deleted file mode 100644 index 9af04fe7..00000000 --- a/vendor/github.com/jackc/pgconn/README.md +++ /dev/null @@ -1,62 +0,0 @@ -[![](https://godoc.org/github.com/jackc/pgconn?status.svg)](https://godoc.org/github.com/jackc/pgconn) -![CI](https://github.com/jackc/pgconn/workflows/CI/badge.svg) - ---- - -This version is used with pgx `v4`. In pgx `v5` it is part of the https://github.com/jackc/pgx repository. - ---- - -# pgconn - -Package pgconn is a low-level PostgreSQL database driver. It operates at nearly the same level as the C library libpq. -It is primarily intended to serve as the foundation for higher level libraries such as https://github.com/jackc/pgx. -Applications should handle normal queries with a higher level library and only use pgconn directly when required for -low-level access to PostgreSQL functionality. - -## Example Usage - -```go -pgConn, err := pgconn.Connect(context.Background(), os.Getenv("DATABASE_URL")) -if err != nil { - log.Fatalln("pgconn failed to connect:", err) -} -defer pgConn.Close(context.Background()) - -result := pgConn.ExecParams(context.Background(), "SELECT email FROM users WHERE id=$1", [][]byte{[]byte("123")}, nil, nil, nil) -for result.NextRow() { - fmt.Println("User 123 has email:", string(result.Values()[0])) -} -_, err = result.Close() -if err != nil { - log.Fatalln("failed reading result:", err) -} -``` - -## Testing - -The pgconn tests require a PostgreSQL database. It will connect to the database specified in the `PGX_TEST_CONN_STRING` -environment variable. The `PGX_TEST_CONN_STRING` environment variable can be a URL or DSN. In addition, the standard `PG*` -environment variables will be respected. Consider using [direnv](https://github.com/direnv/direnv) to simplify -environment variable handling. - -### Example Test Environment - -Connect to your PostgreSQL server and run: - -``` -create database pgx_test; -``` - -Now you can run the tests: - -```bash -PGX_TEST_CONN_STRING="host=/var/run/postgresql dbname=pgx_test" go test ./... -``` - -### Connection and Authentication Tests - -Pgconn supports multiple connection types and means of authentication. These tests are optional. They -will only run if the appropriate environment variable is set. Run `go test -v | grep SKIP` to see if any tests are being -skipped. Most developers will not need to enable these tests. See `ci/setup_test.bash` for an example set up if you need change -authentication code. diff --git a/vendor/github.com/jackc/pgconn/doc.go b/vendor/github.com/jackc/pgconn/doc.go deleted file mode 100644 index cde58cd8..00000000 --- a/vendor/github.com/jackc/pgconn/doc.go +++ /dev/null @@ -1,29 +0,0 @@ -// Package pgconn is a low-level PostgreSQL database driver. -/* -pgconn provides lower level access to a PostgreSQL connection than a database/sql or pgx connection. It operates at -nearly the same level is the C library libpq. - -Establishing a Connection - -Use Connect to establish a connection. It accepts a connection string in URL or DSN and will read the environment for -libpq style environment variables. - -Executing a Query - -ExecParams and ExecPrepared execute a single query. They return readers that iterate over each row. The Read method -reads all rows into memory. - -Executing Multiple Queries in a Single Round Trip - -Exec and ExecBatch can execute multiple queries in a single round trip. They return readers that iterate over each query -result. The ReadAll method reads all query results into memory. - -Context Support - -All potentially blocking operations take a context.Context. If a context is canceled while the method is in progress the -method immediately returns. In most circumstances, this will close the underlying connection. - -The CancelRequest method may be used to request the PostgreSQL server cancel an in-progress query without forcing the -client to abort. -*/ -package pgconn diff --git a/vendor/github.com/jackc/pgconn/stmtcache/lru.go b/vendor/github.com/jackc/pgconn/stmtcache/lru.go deleted file mode 100644 index f0fb53b9..00000000 --- a/vendor/github.com/jackc/pgconn/stmtcache/lru.go +++ /dev/null @@ -1,169 +0,0 @@ -package stmtcache - -import ( - "container/list" - "context" - "fmt" - "sync/atomic" - - "github.com/jackc/pgconn" -) - -var lruCount uint64 - -// LRU implements Cache with a Least Recently Used (LRU) cache. -type LRU struct { - conn *pgconn.PgConn - mode int - cap int - prepareCount int - m map[string]*list.Element - l *list.List - psNamePrefix string - stmtsToClear []string -} - -// NewLRU creates a new LRU. mode is either ModePrepare or ModeDescribe. cap is the maximum size of the cache. -func NewLRU(conn *pgconn.PgConn, mode int, cap int) *LRU { - mustBeValidMode(mode) - mustBeValidCap(cap) - - n := atomic.AddUint64(&lruCount, 1) - - return &LRU{ - conn: conn, - mode: mode, - cap: cap, - m: make(map[string]*list.Element), - l: list.New(), - psNamePrefix: fmt.Sprintf("lrupsc_%d", n), - } -} - -// Get returns the prepared statement description for sql preparing or describing the sql on the server as needed. -func (c *LRU) Get(ctx context.Context, sql string) (*pgconn.StatementDescription, error) { - if ctx != context.Background() { - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - } - } - - // flush an outstanding bad statements - txStatus := c.conn.TxStatus() - if (txStatus == 'I' || txStatus == 'T') && len(c.stmtsToClear) > 0 { - for _, stmt := range c.stmtsToClear { - err := c.clearStmt(ctx, stmt) - if err != nil { - return nil, err - } - } - } - - if el, ok := c.m[sql]; ok { - c.l.MoveToFront(el) - return el.Value.(*pgconn.StatementDescription), nil - } - - if c.l.Len() == c.cap { - err := c.removeOldest(ctx) - if err != nil { - return nil, err - } - } - - psd, err := c.prepare(ctx, sql) - if err != nil { - return nil, err - } - - el := c.l.PushFront(psd) - c.m[sql] = el - - return psd, nil -} - -// Clear removes all entries in the cache. Any prepared statements will be deallocated from the PostgreSQL session. -func (c *LRU) Clear(ctx context.Context) error { - for c.l.Len() > 0 { - err := c.removeOldest(ctx) - if err != nil { - return err - } - } - - return nil -} - -func (c *LRU) StatementErrored(sql string, err error) { - pgErr, ok := err.(*pgconn.PgError) - if !ok { - return - } - - // https://github.com/jackc/pgx/issues/1162 - // - // We used to look for the message "cached plan must not change result type". However, that message can be localized. - // Unfortunately, error code "0A000" - "FEATURE NOT SUPPORTED" is used for many different errors and the only way to - // tell the difference is by the message. But all that happens is we clear a statement that we otherwise wouldn't - // have so it should be safe. - possibleInvalidCachedPlanError := pgErr.Code == "0A000" - if possibleInvalidCachedPlanError { - c.stmtsToClear = append(c.stmtsToClear, sql) - } -} - -func (c *LRU) clearStmt(ctx context.Context, sql string) error { - elem, inMap := c.m[sql] - if !inMap { - // The statement probably fell off the back of the list. In that case, we've - // ensured that it isn't in the cache, so we can declare victory. - return nil - } - - c.l.Remove(elem) - - psd := elem.Value.(*pgconn.StatementDescription) - delete(c.m, psd.SQL) - if c.mode == ModePrepare { - return c.conn.Exec(ctx, fmt.Sprintf("deallocate %s", psd.Name)).Close() - } - return nil -} - -// Len returns the number of cached prepared statement descriptions. -func (c *LRU) Len() int { - return c.l.Len() -} - -// Cap returns the maximum number of cached prepared statement descriptions. -func (c *LRU) Cap() int { - return c.cap -} - -// Mode returns the mode of the cache (ModePrepare or ModeDescribe) -func (c *LRU) Mode() int { - return c.mode -} - -func (c *LRU) prepare(ctx context.Context, sql string) (*pgconn.StatementDescription, error) { - var name string - if c.mode == ModePrepare { - name = fmt.Sprintf("%s_%d", c.psNamePrefix, c.prepareCount) - c.prepareCount += 1 - } - - return c.conn.Prepare(ctx, name, sql, nil) -} - -func (c *LRU) removeOldest(ctx context.Context) error { - oldest := c.l.Back() - c.l.Remove(oldest) - psd := oldest.Value.(*pgconn.StatementDescription) - delete(c.m, psd.SQL) - if c.mode == ModePrepare { - return c.conn.Exec(ctx, fmt.Sprintf("deallocate %s", psd.Name)).Close() - } - return nil -} diff --git a/vendor/github.com/jackc/pgconn/stmtcache/stmtcache.go b/vendor/github.com/jackc/pgconn/stmtcache/stmtcache.go deleted file mode 100644 index d083e1b4..00000000 --- a/vendor/github.com/jackc/pgconn/stmtcache/stmtcache.go +++ /dev/null @@ -1,58 +0,0 @@ -// Package stmtcache is a cache that can be used to implement lazy prepared statements. -package stmtcache - -import ( - "context" - - "github.com/jackc/pgconn" -) - -const ( - ModePrepare = iota // Cache should prepare named statements. - ModeDescribe // Cache should prepare the anonymous prepared statement to only fetch the description of the statement. -) - -// Cache prepares and caches prepared statement descriptions. -type Cache interface { - // Get returns the prepared statement description for sql preparing or describing the sql on the server as needed. - Get(ctx context.Context, sql string) (*pgconn.StatementDescription, error) - - // Clear removes all entries in the cache. Any prepared statements will be deallocated from the PostgreSQL session. - Clear(ctx context.Context) error - - // StatementErrored informs the cache that the given statement resulted in an error when it - // was last used against the database. In some cases, this will cause the cache to maer that - // statement as bad. The bad statement will instead be flushed during the next call to Get - // that occurs outside of a failed transaction. - StatementErrored(sql string, err error) - - // Len returns the number of cached prepared statement descriptions. - Len() int - - // Cap returns the maximum number of cached prepared statement descriptions. - Cap() int - - // Mode returns the mode of the cache (ModePrepare or ModeDescribe) - Mode() int -} - -// New returns the preferred cache implementation for mode and cap. mode is either ModePrepare or ModeDescribe. cap is -// the maximum size of the cache. -func New(conn *pgconn.PgConn, mode int, cap int) Cache { - mustBeValidMode(mode) - mustBeValidCap(cap) - - return NewLRU(conn, mode, cap) -} - -func mustBeValidMode(mode int) { - if mode != ModePrepare && mode != ModeDescribe { - panic("mode must be ModePrepare or ModeDescribe") - } -} - -func mustBeValidCap(cap int) { - if cap < 1 { - panic("cache must have cap of >= 1") - } -} diff --git a/vendor/github.com/jackc/pgio/.travis.yml b/vendor/github.com/jackc/pgio/.travis.yml deleted file mode 100644 index e176228e..00000000 --- a/vendor/github.com/jackc/pgio/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: go - -go: - - 1.x - - tip - -matrix: - allow_failures: - - go: tip diff --git a/vendor/github.com/jackc/pgio/LICENSE b/vendor/github.com/jackc/pgio/LICENSE deleted file mode 100644 index c1c4f50f..00000000 --- a/vendor/github.com/jackc/pgio/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2019 Jack Christensen - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/jackc/pgio/README.md b/vendor/github.com/jackc/pgio/README.md deleted file mode 100644 index 1952ed86..00000000 --- a/vendor/github.com/jackc/pgio/README.md +++ /dev/null @@ -1,11 +0,0 @@ -[![](https://godoc.org/github.com/jackc/pgio?status.svg)](https://godoc.org/github.com/jackc/pgio) -[![Build Status](https://travis-ci.org/jackc/pgio.svg)](https://travis-ci.org/jackc/pgio) - -# pgio - -Package pgio is a low-level toolkit building messages in the PostgreSQL wire protocol. - -pgio provides functions for appending integers to a []byte while doing byte -order conversion. - -Extracted from original implementation in https://github.com/jackc/pgx. diff --git a/vendor/github.com/jackc/pgproto3/v2/.travis.yml b/vendor/github.com/jackc/pgproto3/v2/.travis.yml deleted file mode 100644 index e176228e..00000000 --- a/vendor/github.com/jackc/pgproto3/v2/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: go - -go: - - 1.x - - tip - -matrix: - allow_failures: - - go: tip diff --git a/vendor/github.com/jackc/pgproto3/v2/LICENSE b/vendor/github.com/jackc/pgproto3/v2/LICENSE deleted file mode 100644 index c1c4f50f..00000000 --- a/vendor/github.com/jackc/pgproto3/v2/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2019 Jack Christensen - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/jackc/pgproto3/v2/README.md b/vendor/github.com/jackc/pgproto3/v2/README.md deleted file mode 100644 index 77a31700..00000000 --- a/vendor/github.com/jackc/pgproto3/v2/README.md +++ /dev/null @@ -1,18 +0,0 @@ -[![](https://godoc.org/github.com/jackc/pgproto3?status.svg)](https://godoc.org/github.com/jackc/pgproto3) -[![Build Status](https://travis-ci.org/jackc/pgproto3.svg)](https://travis-ci.org/jackc/pgproto3) - ---- - -This version is used with pgx `v4`. In pgx `v5` it is part of the https://github.com/jackc/pgx repository. - ---- - -# pgproto3 - -Package pgproto3 is a encoder and decoder of the PostgreSQL wire protocol version 3. - -pgproto3 can be used as a foundation for PostgreSQL drivers, proxies, mock servers, load balancers and more. - -See example/pgfortune for a playful example of a fake PostgreSQL server. - -Extracted from original implementation in https://github.com/jackc/pgx. diff --git a/vendor/github.com/jackc/pgproto3/v2/chunkreader.go b/vendor/github.com/jackc/pgproto3/v2/chunkreader.go deleted file mode 100644 index 92206f35..00000000 --- a/vendor/github.com/jackc/pgproto3/v2/chunkreader.go +++ /dev/null @@ -1,19 +0,0 @@ -package pgproto3 - -import ( - "io" - - "github.com/jackc/chunkreader/v2" -) - -// ChunkReader is an interface to decouple github.com/jackc/chunkreader from this package. -type ChunkReader interface { - // Next returns buf filled with the next n bytes. If an error (including a partial read) occurs, - // buf must be nil. Next must preserve any partially read data. Next must not reuse buf. - Next(n int) (buf []byte, err error) -} - -// NewChunkReader creates and returns a new default ChunkReader. -func NewChunkReader(r io.Reader) ChunkReader { - return chunkreader.New(r) -} diff --git a/vendor/github.com/jackc/pgproto3/v2/doc.go b/vendor/github.com/jackc/pgproto3/v2/doc.go deleted file mode 100644 index 8226dc98..00000000 --- a/vendor/github.com/jackc/pgproto3/v2/doc.go +++ /dev/null @@ -1,4 +0,0 @@ -// Package pgproto3 is a encoder and decoder of the PostgreSQL wire protocol version 3. -// -// See https://www.postgresql.org/docs/current/protocol-message-formats.html for meanings of the different messages. -package pgproto3 diff --git a/vendor/github.com/jackc/pgproto3/v2/frontend.go b/vendor/github.com/jackc/pgproto3/v2/frontend.go deleted file mode 100644 index 623b0a98..00000000 --- a/vendor/github.com/jackc/pgproto3/v2/frontend.go +++ /dev/null @@ -1,210 +0,0 @@ -package pgproto3 - -import ( - "encoding/binary" - "errors" - "fmt" - "io" -) - -// Frontend acts as a client for the PostgreSQL wire protocol version 3. -type Frontend struct { - cr ChunkReader - w io.Writer - - // Backend message flyweights - authenticationOk AuthenticationOk - authenticationCleartextPassword AuthenticationCleartextPassword - authenticationMD5Password AuthenticationMD5Password - authenticationGSS AuthenticationGSS - authenticationGSSContinue AuthenticationGSSContinue - authenticationSASL AuthenticationSASL - authenticationSASLContinue AuthenticationSASLContinue - authenticationSASLFinal AuthenticationSASLFinal - backendKeyData BackendKeyData - bindComplete BindComplete - closeComplete CloseComplete - commandComplete CommandComplete - copyBothResponse CopyBothResponse - copyData CopyData - copyInResponse CopyInResponse - copyOutResponse CopyOutResponse - copyDone CopyDone - dataRow DataRow - emptyQueryResponse EmptyQueryResponse - errorResponse ErrorResponse - functionCallResponse FunctionCallResponse - noData NoData - noticeResponse NoticeResponse - notificationResponse NotificationResponse - parameterDescription ParameterDescription - parameterStatus ParameterStatus - parseComplete ParseComplete - readyForQuery ReadyForQuery - rowDescription RowDescription - portalSuspended PortalSuspended - - bodyLen int - msgType byte - partialMsg bool - authType uint32 -} - -// NewFrontend creates a new Frontend. -func NewFrontend(cr ChunkReader, w io.Writer) *Frontend { - return &Frontend{cr: cr, w: w} -} - -// Send sends a message to the backend. -func (f *Frontend) Send(msg FrontendMessage) error { - buf, err := msg.Encode(nil) - if err != nil { - return err - } - _, err = f.w.Write(buf) - return err -} - -func translateEOFtoErrUnexpectedEOF(err error) error { - if err == io.EOF { - return io.ErrUnexpectedEOF - } - return err -} - -// Receive receives a message from the backend. The returned message is only valid until the next call to Receive. -func (f *Frontend) Receive() (BackendMessage, error) { - if !f.partialMsg { - header, err := f.cr.Next(5) - if err != nil { - return nil, translateEOFtoErrUnexpectedEOF(err) - } - - f.msgType = header[0] - f.bodyLen = int(binary.BigEndian.Uint32(header[1:])) - 4 - f.partialMsg = true - if f.bodyLen < 0 { - return nil, errors.New("invalid message with negative body length received") - } - } - - msgBody, err := f.cr.Next(f.bodyLen) - if err != nil { - return nil, translateEOFtoErrUnexpectedEOF(err) - } - - f.partialMsg = false - - var msg BackendMessage - switch f.msgType { - case '1': - msg = &f.parseComplete - case '2': - msg = &f.bindComplete - case '3': - msg = &f.closeComplete - case 'A': - msg = &f.notificationResponse - case 'c': - msg = &f.copyDone - case 'C': - msg = &f.commandComplete - case 'd': - msg = &f.copyData - case 'D': - msg = &f.dataRow - case 'E': - msg = &f.errorResponse - case 'G': - msg = &f.copyInResponse - case 'H': - msg = &f.copyOutResponse - case 'I': - msg = &f.emptyQueryResponse - case 'K': - msg = &f.backendKeyData - case 'n': - msg = &f.noData - case 'N': - msg = &f.noticeResponse - case 'R': - var err error - msg, err = f.findAuthenticationMessageType(msgBody) - if err != nil { - return nil, err - } - case 's': - msg = &f.portalSuspended - case 'S': - msg = &f.parameterStatus - case 't': - msg = &f.parameterDescription - case 'T': - msg = &f.rowDescription - case 'V': - msg = &f.functionCallResponse - case 'W': - msg = &f.copyBothResponse - case 'Z': - msg = &f.readyForQuery - default: - return nil, fmt.Errorf("unknown message type: %c", f.msgType) - } - - err = msg.Decode(msgBody) - return msg, err -} - -// Authentication message type constants. -// See src/include/libpq/pqcomm.h for all -// constants. -const ( - AuthTypeOk = 0 - AuthTypeCleartextPassword = 3 - AuthTypeMD5Password = 5 - AuthTypeSCMCreds = 6 - AuthTypeGSS = 7 - AuthTypeGSSCont = 8 - AuthTypeSSPI = 9 - AuthTypeSASL = 10 - AuthTypeSASLContinue = 11 - AuthTypeSASLFinal = 12 -) - -func (f *Frontend) findAuthenticationMessageType(src []byte) (BackendMessage, error) { - if len(src) < 4 { - return nil, errors.New("authentication message too short") - } - f.authType = binary.BigEndian.Uint32(src[:4]) - - switch f.authType { - case AuthTypeOk: - return &f.authenticationOk, nil - case AuthTypeCleartextPassword: - return &f.authenticationCleartextPassword, nil - case AuthTypeMD5Password: - return &f.authenticationMD5Password, nil - case AuthTypeSCMCreds: - return nil, errors.New("AuthTypeSCMCreds is unimplemented") - case AuthTypeGSS: - return &f.authenticationGSS, nil - case AuthTypeGSSCont: - return &f.authenticationGSSContinue, nil - case AuthTypeSSPI: - return nil, errors.New("AuthTypeSSPI is unimplemented") - case AuthTypeSASL: - return &f.authenticationSASL, nil - case AuthTypeSASLContinue: - return &f.authenticationSASLContinue, nil - case AuthTypeSASLFinal: - return &f.authenticationSASLFinal, nil - default: - return nil, fmt.Errorf("unknown authentication type: %d", f.authType) - } -} - -// GetAuthType returns the authType used in the current state of the frontend. -// See SetAuthType for more information. -func (f *Frontend) GetAuthType() uint32 { - return f.authType -} diff --git a/vendor/github.com/jackc/pgservicefile/.travis.yml b/vendor/github.com/jackc/pgservicefile/.travis.yml deleted file mode 100644 index e176228e..00000000 --- a/vendor/github.com/jackc/pgservicefile/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: go - -go: - - 1.x - - tip - -matrix: - allow_failures: - - go: tip diff --git a/vendor/github.com/jackc/pgservicefile/README.md b/vendor/github.com/jackc/pgservicefile/README.md index e50ca126..2fc7e012 100644 --- a/vendor/github.com/jackc/pgservicefile/README.md +++ b/vendor/github.com/jackc/pgservicefile/README.md @@ -1,5 +1,6 @@ -[![](https://godoc.org/github.com/jackc/pgservicefile?status.svg)](https://godoc.org/github.com/jackc/pgservicefile) -[![Build Status](https://travis-ci.org/jackc/pgservicefile.svg)](https://travis-ci.org/jackc/pgservicefile) +[![Go Reference](https://pkg.go.dev/badge/github.com/jackc/pgservicefile.svg)](https://pkg.go.dev/github.com/jackc/pgservicefile) +[![Build Status](https://github.com/jackc/pgservicefile/actions/workflows/ci.yml/badge.svg)](https://github.com/jackc/pgservicefile/actions/workflows/ci.yml) + # pgservicefile diff --git a/vendor/github.com/jackc/pgservicefile/pgservicefile.go b/vendor/github.com/jackc/pgservicefile/pgservicefile.go index 797bbab9..c62caa7f 100644 --- a/vendor/github.com/jackc/pgservicefile/pgservicefile.go +++ b/vendor/github.com/jackc/pgservicefile/pgservicefile.go @@ -57,7 +57,7 @@ func ParseServicefile(r io.Reader) (*Servicefile, error) { } else if strings.HasPrefix(line, "[") && strings.HasSuffix(line, "]") { service = &Service{Name: line[1 : len(line)-1], Settings: make(map[string]string)} servicefile.Services = append(servicefile.Services, service) - } else { + } else if service != nil { parts := strings.SplitN(line, "=", 2) if len(parts) != 2 { return nil, fmt.Errorf("unable to parse line %d", lineNum) @@ -67,6 +67,8 @@ func ParseServicefile(r io.Reader) (*Servicefile, error) { value := strings.TrimSpace(parts[1]) service.Settings[key] = value + } else { + return nil, fmt.Errorf("line %d is not in a section", lineNum) } } diff --git a/vendor/github.com/jackc/pgtype/CHANGELOG.md b/vendor/github.com/jackc/pgtype/CHANGELOG.md deleted file mode 100644 index a362a1df..00000000 --- a/vendor/github.com/jackc/pgtype/CHANGELOG.md +++ /dev/null @@ -1,164 +0,0 @@ -# 1.14.0 (February 11, 2023) - -* Fix: BC timestamp text format support (jozeflami) -* Add Scanner and Valuer interfaces to CIDR (Yurii Popivniak) -* Fix crash when nilifying pointer to sql.Scanner - -# 1.13.0 (December 1, 2022) - -* Fix: Reset jsonb before unmarshal (Tomas Odinas) -* Fix: return correct zero value when UUID conversion fails (ndrpnt) -* Fix: EncodeText for Lseg includes [ and ] -* Support sql Value and Scan for custom date type (Hubert Krauze) -* Support Ltree binary encoding (AmineChikhaoui) -* Fix: dates with "BC" (jozeflami) - -# 1.12.0 (August 6, 2022) - -* Add JSONArray (Jakob Ackermann) -* Support Inet from fmt.Stringer and encoding.TextMarshaler (Ville Skyttä) -* Support UUID from fmt.Stringer interface (Lasse Hyldahl Jensen) -* Fix: shopspring-numeric extension does not panic on NaN -* Numeric can be assigned to string -* Fix: Do not send IPv4 networks as IPv4-mapped IPv6 (William Storey) -* Fix: PlanScan for interface{}(nil) (James Hartig) -* Fix: *sql.Scanner for NULL handling (James Hartig) -* Timestamp[tz].Set() supports string (Harmen) -* Fix: Hstore AssignTo with map of *string (Diego Becciolini) - -# 1.11.0 (April 21, 2022) - -* Add multirange for numeric, int4, and int8 (Vu) -* JSONBArray now supports json.RawMessage (Jens Emil Schulz Østergaard) -* Add RecordArray (WGH) -* Add UnmarshalJSON to pgtype.Int2 -* Hstore.Set accepts map[string]Text - -# 1.10.0 (February 7, 2022) - -* Normalize UTC timestamps to comply with stdlib (Torkel Rogstad) -* Assign Numeric to *big.Rat (Oleg Lomaka) -* Fix typo in float8 error message (Pinank Solanki) -* Scan type aliases for floating point types (Collin Forsyth) - -# 1.9.1 (November 28, 2021) - -* Fix: binary timestamp is assumed to be in UTC (restored behavior changed in v1.9.0) - -# 1.9.0 (November 20, 2021) - -* Fix binary hstore null decoding -* Add shopspring/decimal.NullDecimal support to integration (Eli Treuherz) -* Inet.Set supports bare IP address (Carl Dunham) -* Add zeronull.Float8 -* Fix NULL being lost when scanning unknown OID into sql.Scanner -* Fix BPChar.AssignTo **rune -* Add support for fmt.Stringer and driver.Valuer in String fields encoding (Jan Dubsky) -* Fix really big timestamp(tz)s binary format parsing (e.g. year 294276) (Jim Tsao) -* Support `map[string]*string` as hstore (Adrian Sieger) -* Fix parsing text array with negative bounds -* Add infinity support for numeric (Jim Tsao) - -# 1.8.1 (July 24, 2021) - -* Cleaned up Go module dependency chain - -# 1.8.0 (July 10, 2021) - -* Maintain host bits for inet types (Cameron Daniel) -* Support pointers of wrapping structs (Ivan Daunis) -* Register JSONBArray at NewConnInfo() (Rueian) -* CompositeTextScanner handles backslash escapes - -# 1.7.0 (March 25, 2021) - -* Fix scanning int into **sql.Scanner implementor -* Add tsrange array type (Vasilii Novikov) -* Fix: escaped strings when they start or end with a newline char (Stephane Martin) -* Accept nil *time.Time in Time.Set -* Fix numeric NaN support -* Use Go 1.13 errors instead of xerrors - -# 1.6.2 (December 3, 2020) - -* Fix panic on assigning empty array to non-slice or array -* Fix text array parsing disambiguates NULL and "NULL" -* Fix Timestamptz.DecodeText with too short text - -# 1.6.1 (October 31, 2020) - -* Fix simple protocol empty array support - -# 1.6.0 (October 24, 2020) - -* Fix AssignTo pointer to pointer to slice and named types. -* Fix zero length array assignment (Simo Haasanen) -* Add float64, float32 convert to int2, int4, int8 (lqu3j) -* Support setting infinite timestamps (Erik Agsjö) -* Polygon improvements (duohedron) -* Fix Inet.Set with nil (Tomas Volf) - -# 1.5.0 (September 26, 2020) - -* Add slice of slice mapping to multi-dimensional arrays (Simo Haasanen) -* Fix JSONBArray -* Fix selecting empty array -* Text formatted values except bytea can be directly scanned to []byte -* Add JSON marshalling for UUID (bakmataliev) -* Improve point type conversions (bakmataliev) - -# 1.4.2 (July 22, 2020) - -* Fix encoding of a large composite data type (Yaz Saito) - -# 1.4.1 (July 14, 2020) - -* Fix ArrayType DecodeBinary empty array breaks future reads - -# 1.4.0 (June 27, 2020) - -* Add JSON support to ext/gofrs-uuid -* Performance improvements in Scan path -* Improved ext/shopspring-numeric binary decoding performance -* Add composite type support (Maxim Ivanov and Jack Christensen) -* Add better generic enum type support -* Add generic array type support -* Clarify and normalize Value semantics -* Fix hstore with empty string values -* Numeric supports NaN values (leighhopcroft) -* Add slice of pointer support to array types (megaturbo) -* Add jsonb array type (tserakhau) -* Allow converting intervals with months and days to duration - -# 1.3.0 (March 30, 2020) - -* Get implemented on T instead of *T -* Set will call Get on src if possible -* Range types Set method supports its own type, string, and nil -* Date.Set parses string -* Fix correct format verb for unknown type error (Robert Welin) -* Truncate nanoseconds in EncodeText for Timestamptz and Timestamp - -# 1.2.0 (February 5, 2020) - -* Add zeronull package for easier NULL <-> zero conversion -* Add JSON marshalling for shopspring-numeric extension -* Add JSON marshalling for Bool, Date, JSON/B, Timestamptz (Jeffrey Stiles) -* Fix null status in UnmarshalJSON for some types (Jeffrey Stiles) - -# 1.1.0 (January 11, 2020) - -* Add PostgreSQL time type support -* Add more automatic conversions of integer arrays of different types (Jean-Philippe Quéméner) - -# 1.0.3 (November 16, 2019) - -* Support initializing Array types from a slice of the value (Alex Gaynor) - -# 1.0.2 (October 22, 2019) - -* Fix scan into null into pointer to pointer implementing Decode* interface. (Jeremy Altavilla) - -# 1.0.1 (September 19, 2019) - -* Fix daterange OID diff --git a/vendor/github.com/jackc/pgtype/README.md b/vendor/github.com/jackc/pgtype/README.md deleted file mode 100644 index 72dadcfc..00000000 --- a/vendor/github.com/jackc/pgtype/README.md +++ /dev/null @@ -1,14 +0,0 @@ -[![](https://godoc.org/github.com/jackc/pgtype?status.svg)](https://godoc.org/github.com/jackc/pgtype) -![CI](https://github.com/jackc/pgtype/workflows/CI/badge.svg) - ---- - -This version is used with pgx `v4`. In pgx `v5` it is part of the https://github.com/jackc/pgx repository. - ---- - -# pgtype - -pgtype implements Go types for over 70 PostgreSQL types. pgtype is the type system underlying the -https://github.com/jackc/pgx PostgreSQL driver. These types support the binary format for enhanced performance with pgx. -They also support the database/sql `Scan` and `Value` interfaces and can be used with https://github.com/lib/pq. diff --git a/vendor/github.com/jackc/pgtype/aclitem.go b/vendor/github.com/jackc/pgtype/aclitem.go deleted file mode 100644 index 9f6587be..00000000 --- a/vendor/github.com/jackc/pgtype/aclitem.go +++ /dev/null @@ -1,138 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" -) - -// ACLItem is used for PostgreSQL's aclitem data type. A sample aclitem -// might look like this: -// -// postgres=arwdDxt/postgres -// -// Note, however, that because the user/role name part of an aclitem is -// an identifier, it follows all the usual formatting rules for SQL -// identifiers: if it contains spaces and other special characters, -// it should appear in double-quotes: -// -// postgres=arwdDxt/"role with spaces" -// -type ACLItem struct { - String string - Status Status -} - -func (dst *ACLItem) Set(src interface{}) error { - if src == nil { - *dst = ACLItem{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case string: - *dst = ACLItem{String: value, Status: Present} - case *string: - if value == nil { - *dst = ACLItem{Status: Null} - } else { - *dst = ACLItem{String: *value, Status: Present} - } - default: - if originalSrc, ok := underlyingStringType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to ACLItem", value) - } - - return nil -} - -func (dst ACLItem) Get() interface{} { - switch dst.Status { - case Present: - return dst.String - case Null: - return nil - default: - return dst.Status - } -} - -func (src *ACLItem) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *string: - *v = src.String - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *ACLItem) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = ACLItem{Status: Null} - return nil - } - - *dst = ACLItem{String: string(src), Status: Present} - return nil -} - -func (src ACLItem) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.String...), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *ACLItem) Scan(src interface{}) error { - if src == nil { - *dst = ACLItem{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src ACLItem) Value() (driver.Value, error) { - switch src.Status { - case Present: - return src.String, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} diff --git a/vendor/github.com/jackc/pgtype/aclitem_array.go b/vendor/github.com/jackc/pgtype/aclitem_array.go deleted file mode 100644 index 4e3be3bd..00000000 --- a/vendor/github.com/jackc/pgtype/aclitem_array.go +++ /dev/null @@ -1,428 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "fmt" - "reflect" -) - -type ACLItemArray struct { - Elements []ACLItem - Dimensions []ArrayDimension - Status Status -} - -func (dst *ACLItemArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = ACLItemArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []string: - if value == nil { - *dst = ACLItemArray{Status: Null} - } else if len(value) == 0 { - *dst = ACLItemArray{Status: Present} - } else { - elements := make([]ACLItem, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = ACLItemArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*string: - if value == nil { - *dst = ACLItemArray{Status: Null} - } else if len(value) == 0 { - *dst = ACLItemArray{Status: Present} - } else { - elements := make([]ACLItem, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = ACLItemArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []ACLItem: - if value == nil { - *dst = ACLItemArray{Status: Null} - } else if len(value) == 0 { - *dst = ACLItemArray{Status: Present} - } else { - *dst = ACLItemArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = ACLItemArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for ACLItemArray", src) - } - if elementsLength == 0 { - *dst = ACLItemArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to ACLItemArray", src) - } - - *dst = ACLItemArray{ - Elements: make([]ACLItem, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]ACLItem, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to ACLItemArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *ACLItemArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to ACLItemArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in ACLItemArray", err) - } - index++ - - return index, nil -} - -func (dst ACLItemArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *ACLItemArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]string: - *v = make([]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*string: - *v = make([]*string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *ACLItemArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from ACLItemArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from ACLItemArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *ACLItemArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = ACLItemArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []ACLItem - - if len(uta.Elements) > 0 { - elements = make([]ACLItem, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem ACLItem - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = ACLItemArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (src ACLItemArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *ACLItemArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src ACLItemArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/array_type.go b/vendor/github.com/jackc/pgtype/array_type.go deleted file mode 100644 index 71466554..00000000 --- a/vendor/github.com/jackc/pgtype/array_type.go +++ /dev/null @@ -1,353 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -// ArrayType represents an array type. While it implements Value, this is only in service of its type conversion duties -// when registered as a data type in a ConnType. It should not be used directly as a Value. ArrayType is a convenience -// type for types that do not have a concrete array type. -type ArrayType struct { - elements []ValueTranscoder - dimensions []ArrayDimension - - typeName string - newElement func() ValueTranscoder - - elementOID uint32 - status Status -} - -func NewArrayType(typeName string, elementOID uint32, newElement func() ValueTranscoder) *ArrayType { - return &ArrayType{typeName: typeName, elementOID: elementOID, newElement: newElement} -} - -func (at *ArrayType) NewTypeValue() Value { - return &ArrayType{ - elements: at.elements, - dimensions: at.dimensions, - status: at.status, - - typeName: at.typeName, - elementOID: at.elementOID, - newElement: at.newElement, - } -} - -func (at *ArrayType) TypeName() string { - return at.typeName -} - -func (dst *ArrayType) setNil() { - dst.elements = nil - dst.dimensions = nil - dst.status = Null -} - -func (dst *ArrayType) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - dst.setNil() - return nil - } - - sliceVal := reflect.ValueOf(src) - if sliceVal.Kind() != reflect.Slice { - return fmt.Errorf("cannot set non-slice") - } - - if sliceVal.IsNil() { - dst.setNil() - return nil - } - - dst.elements = make([]ValueTranscoder, sliceVal.Len()) - for i := range dst.elements { - v := dst.newElement() - err := v.Set(sliceVal.Index(i).Interface()) - if err != nil { - return err - } - - dst.elements[i] = v - } - dst.dimensions = []ArrayDimension{{Length: int32(len(dst.elements)), LowerBound: 1}} - dst.status = Present - - return nil -} - -func (dst ArrayType) Get() interface{} { - switch dst.status { - case Present: - elementValues := make([]interface{}, len(dst.elements)) - for i := range dst.elements { - elementValues[i] = dst.elements[i].Get() - } - return elementValues - case Null: - return nil - default: - return dst.status - } -} - -func (src *ArrayType) AssignTo(dst interface{}) error { - ptrSlice := reflect.ValueOf(dst) - if ptrSlice.Kind() != reflect.Ptr { - return fmt.Errorf("cannot assign to non-pointer") - } - - sliceVal := ptrSlice.Elem() - sliceType := sliceVal.Type() - - if sliceType.Kind() != reflect.Slice { - return fmt.Errorf("cannot assign to pointer to non-slice") - } - - switch src.status { - case Present: - slice := reflect.MakeSlice(sliceType, len(src.elements), len(src.elements)) - elemType := sliceType.Elem() - - for i := range src.elements { - ptrElem := reflect.New(elemType) - err := src.elements[i].AssignTo(ptrElem.Interface()) - if err != nil { - return err - } - - slice.Index(i).Set(ptrElem.Elem()) - } - - sliceVal.Set(slice) - return nil - case Null: - sliceVal.Set(reflect.Zero(sliceType)) - return nil - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *ArrayType) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - dst.setNil() - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []ValueTranscoder - - if len(uta.Elements) > 0 { - elements = make([]ValueTranscoder, len(uta.Elements)) - - for i, s := range uta.Elements { - elem := dst.newElement() - var elemSrc []byte - if s != "NULL" { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - dst.elements = elements - dst.dimensions = uta.Dimensions - dst.status = Present - - return nil -} - -func (dst *ArrayType) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - dst.setNil() - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - var elements []ValueTranscoder - - if len(arrayHeader.Dimensions) == 0 { - dst.elements = elements - dst.dimensions = arrayHeader.Dimensions - dst.status = Present - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements = make([]ValueTranscoder, elementCount) - - for i := range elements { - elem := dst.newElement() - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elem.DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - - dst.elements = elements - dst.dimensions = arrayHeader.Dimensions - dst.status = Present - - return nil -} - -func (src ArrayType) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.dimensions)) - dimElemCounts[len(src.dimensions)-1] = int(src.dimensions[len(src.dimensions)-1].Length) - for i := len(src.dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src ArrayType) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.dimensions, - ElementOID: int32(src.elementOID), - } - - for i := range src.elements { - if src.elements[i].Get() == nil { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *ArrayType) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src ArrayType) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/bit.go b/vendor/github.com/jackc/pgtype/bit.go deleted file mode 100644 index c1709e6b..00000000 --- a/vendor/github.com/jackc/pgtype/bit.go +++ /dev/null @@ -1,45 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" -) - -type Bit Varbit - -func (dst *Bit) Set(src interface{}) error { - return (*Varbit)(dst).Set(src) -} - -func (dst Bit) Get() interface{} { - return (Varbit)(dst).Get() -} - -func (src *Bit) AssignTo(dst interface{}) error { - return (*Varbit)(src).AssignTo(dst) -} - -func (dst *Bit) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*Varbit)(dst).DecodeBinary(ci, src) -} - -func (src Bit) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Varbit)(src).EncodeBinary(ci, buf) -} - -func (dst *Bit) DecodeText(ci *ConnInfo, src []byte) error { - return (*Varbit)(dst).DecodeText(ci, src) -} - -func (src Bit) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Varbit)(src).EncodeText(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *Bit) Scan(src interface{}) error { - return (*Varbit)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Bit) Value() (driver.Value, error) { - return (Varbit)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/bool.go b/vendor/github.com/jackc/pgtype/bool.go deleted file mode 100644 index 676c8e5d..00000000 --- a/vendor/github.com/jackc/pgtype/bool.go +++ /dev/null @@ -1,217 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/json" - "fmt" - "strconv" -) - -type Bool struct { - Bool bool - Status Status -} - -func (dst *Bool) Set(src interface{}) error { - if src == nil { - *dst = Bool{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case bool: - *dst = Bool{Bool: value, Status: Present} - case string: - bb, err := strconv.ParseBool(value) - if err != nil { - return err - } - *dst = Bool{Bool: bb, Status: Present} - case *bool: - if value == nil { - *dst = Bool{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Bool{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingBoolType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Bool", value) - } - - return nil -} - -func (dst Bool) Get() interface{} { - switch dst.Status { - case Present: - return dst.Bool - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Bool) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *bool: - *v = src.Bool - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *Bool) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Bool{Status: Null} - return nil - } - - if len(src) != 1 { - return fmt.Errorf("invalid length for bool: %v", len(src)) - } - - *dst = Bool{Bool: src[0] == 't', Status: Present} - return nil -} - -func (dst *Bool) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Bool{Status: Null} - return nil - } - - if len(src) != 1 { - return fmt.Errorf("invalid length for bool: %v", len(src)) - } - - *dst = Bool{Bool: src[0] == 1, Status: Present} - return nil -} - -func (src Bool) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if src.Bool { - buf = append(buf, 't') - } else { - buf = append(buf, 'f') - } - - return buf, nil -} - -func (src Bool) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if src.Bool { - buf = append(buf, 1) - } else { - buf = append(buf, 0) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Bool) Scan(src interface{}) error { - if src == nil { - *dst = Bool{Status: Null} - return nil - } - - switch src := src.(type) { - case bool: - *dst = Bool{Bool: src, Status: Present} - return nil - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Bool) Value() (driver.Value, error) { - switch src.Status { - case Present: - return src.Bool, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func (src Bool) MarshalJSON() ([]byte, error) { - switch src.Status { - case Present: - if src.Bool { - return []byte("true"), nil - } else { - return []byte("false"), nil - } - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - - return nil, errBadStatus -} - -func (dst *Bool) UnmarshalJSON(b []byte) error { - var v *bool - err := json.Unmarshal(b, &v) - if err != nil { - return err - } - - if v == nil { - *dst = Bool{Status: Null} - } else { - *dst = Bool{Bool: *v, Status: Present} - } - - return nil -} diff --git a/vendor/github.com/jackc/pgtype/bool_array.go b/vendor/github.com/jackc/pgtype/bool_array.go deleted file mode 100644 index 6558d971..00000000 --- a/vendor/github.com/jackc/pgtype/bool_array.go +++ /dev/null @@ -1,517 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type BoolArray struct { - Elements []Bool - Dimensions []ArrayDimension - Status Status -} - -func (dst *BoolArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = BoolArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []bool: - if value == nil { - *dst = BoolArray{Status: Null} - } else if len(value) == 0 { - *dst = BoolArray{Status: Present} - } else { - elements := make([]Bool, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = BoolArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*bool: - if value == nil { - *dst = BoolArray{Status: Null} - } else if len(value) == 0 { - *dst = BoolArray{Status: Present} - } else { - elements := make([]Bool, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = BoolArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Bool: - if value == nil { - *dst = BoolArray{Status: Null} - } else if len(value) == 0 { - *dst = BoolArray{Status: Present} - } else { - *dst = BoolArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = BoolArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for BoolArray", src) - } - if elementsLength == 0 { - *dst = BoolArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to BoolArray", src) - } - - *dst = BoolArray{ - Elements: make([]Bool, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Bool, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to BoolArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *BoolArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to BoolArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in BoolArray", err) - } - index++ - - return index, nil -} - -func (dst BoolArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *BoolArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]bool: - *v = make([]bool, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*bool: - *v = make([]*bool, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *BoolArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from BoolArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from BoolArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *BoolArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = BoolArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Bool - - if len(uta.Elements) > 0 { - elements = make([]Bool, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Bool - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = BoolArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *BoolArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = BoolArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = BoolArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Bool, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = BoolArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src BoolArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src BoolArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("bool"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "bool") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *BoolArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src BoolArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/box.go b/vendor/github.com/jackc/pgtype/box.go deleted file mode 100644 index 27fb829e..00000000 --- a/vendor/github.com/jackc/pgtype/box.go +++ /dev/null @@ -1,165 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -type Box struct { - P [2]Vec2 - Status Status -} - -func (dst *Box) Set(src interface{}) error { - return fmt.Errorf("cannot convert %v to Box", src) -} - -func (dst Box) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Box) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Box) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Box{Status: Null} - return nil - } - - if len(src) < 11 { - return fmt.Errorf("invalid length for Box: %v", len(src)) - } - - str := string(src[1:]) - - var end int - end = strings.IndexByte(str, ',') - - x1, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+1:] - end = strings.IndexByte(str, ')') - - y1, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+3:] - end = strings.IndexByte(str, ',') - - x2, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+1 : len(str)-1] - - y2, err := strconv.ParseFloat(str, 64) - if err != nil { - return err - } - - *dst = Box{P: [2]Vec2{{x1, y1}, {x2, y2}}, Status: Present} - return nil -} - -func (dst *Box) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Box{Status: Null} - return nil - } - - if len(src) != 32 { - return fmt.Errorf("invalid length for Box: %v", len(src)) - } - - x1 := binary.BigEndian.Uint64(src) - y1 := binary.BigEndian.Uint64(src[8:]) - x2 := binary.BigEndian.Uint64(src[16:]) - y2 := binary.BigEndian.Uint64(src[24:]) - - *dst = Box{ - P: [2]Vec2{ - {math.Float64frombits(x1), math.Float64frombits(y1)}, - {math.Float64frombits(x2), math.Float64frombits(y2)}, - }, - Status: Present, - } - return nil -} - -func (src Box) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, fmt.Sprintf(`(%s,%s),(%s,%s)`, - strconv.FormatFloat(src.P[0].X, 'f', -1, 64), - strconv.FormatFloat(src.P[0].Y, 'f', -1, 64), - strconv.FormatFloat(src.P[1].X, 'f', -1, 64), - strconv.FormatFloat(src.P[1].Y, 'f', -1, 64), - )...) - return buf, nil -} - -func (src Box) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendUint64(buf, math.Float64bits(src.P[0].X)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.P[0].Y)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.P[1].X)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.P[1].Y)) - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Box) Scan(src interface{}) error { - if src == nil { - *dst = Box{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Box) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/bpchar.go b/vendor/github.com/jackc/pgtype/bpchar.go deleted file mode 100644 index c5fa42ea..00000000 --- a/vendor/github.com/jackc/pgtype/bpchar.go +++ /dev/null @@ -1,93 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" -) - -// BPChar is fixed-length, blank padded char type -// character(n), char(n) -type BPChar Text - -// Set converts from src to dst. -func (dst *BPChar) Set(src interface{}) error { - return (*Text)(dst).Set(src) -} - -// Get returns underlying value -func (dst BPChar) Get() interface{} { - return (Text)(dst).Get() -} - -// AssignTo assigns from src to dst. -func (src *BPChar) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *rune: - runes := []rune(src.String) - if len(runes) == 1 { - *v = runes[0] - return nil - } - case *string: - *v = src.String - return nil - case *[]byte: - *v = make([]byte, len(src.String)) - copy(*v, src.String) - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (BPChar) PreferredResultFormat() int16 { - return TextFormatCode -} - -func (dst *BPChar) DecodeText(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeText(ci, src) -} - -func (dst *BPChar) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeBinary(ci, src) -} - -func (BPChar) PreferredParamFormat() int16 { - return TextFormatCode -} - -func (src BPChar) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Text)(src).EncodeText(ci, buf) -} - -func (src BPChar) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Text)(src).EncodeBinary(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *BPChar) Scan(src interface{}) error { - return (*Text)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src BPChar) Value() (driver.Value, error) { - return (Text)(src).Value() -} - -func (src BPChar) MarshalJSON() ([]byte, error) { - return (Text)(src).MarshalJSON() -} - -func (dst *BPChar) UnmarshalJSON(b []byte) error { - return (*Text)(dst).UnmarshalJSON(b) -} diff --git a/vendor/github.com/jackc/pgtype/bpchar_array.go b/vendor/github.com/jackc/pgtype/bpchar_array.go deleted file mode 100644 index 8e792214..00000000 --- a/vendor/github.com/jackc/pgtype/bpchar_array.go +++ /dev/null @@ -1,517 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type BPCharArray struct { - Elements []BPChar - Dimensions []ArrayDimension - Status Status -} - -func (dst *BPCharArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = BPCharArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []string: - if value == nil { - *dst = BPCharArray{Status: Null} - } else if len(value) == 0 { - *dst = BPCharArray{Status: Present} - } else { - elements := make([]BPChar, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = BPCharArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*string: - if value == nil { - *dst = BPCharArray{Status: Null} - } else if len(value) == 0 { - *dst = BPCharArray{Status: Present} - } else { - elements := make([]BPChar, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = BPCharArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []BPChar: - if value == nil { - *dst = BPCharArray{Status: Null} - } else if len(value) == 0 { - *dst = BPCharArray{Status: Present} - } else { - *dst = BPCharArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = BPCharArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for BPCharArray", src) - } - if elementsLength == 0 { - *dst = BPCharArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to BPCharArray", src) - } - - *dst = BPCharArray{ - Elements: make([]BPChar, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]BPChar, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to BPCharArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *BPCharArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to BPCharArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in BPCharArray", err) - } - index++ - - return index, nil -} - -func (dst BPCharArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *BPCharArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]string: - *v = make([]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*string: - *v = make([]*string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *BPCharArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from BPCharArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from BPCharArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *BPCharArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = BPCharArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []BPChar - - if len(uta.Elements) > 0 { - elements = make([]BPChar, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem BPChar - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = BPCharArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *BPCharArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = BPCharArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = BPCharArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]BPChar, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = BPCharArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src BPCharArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src BPCharArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("bpchar"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "bpchar") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *BPCharArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src BPCharArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/bytea.go b/vendor/github.com/jackc/pgtype/bytea.go deleted file mode 100644 index 67eba350..00000000 --- a/vendor/github.com/jackc/pgtype/bytea.go +++ /dev/null @@ -1,163 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/hex" - "fmt" -) - -type Bytea struct { - Bytes []byte - Status Status -} - -func (dst *Bytea) Set(src interface{}) error { - if src == nil { - *dst = Bytea{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case []byte: - if value != nil { - *dst = Bytea{Bytes: value, Status: Present} - } else { - *dst = Bytea{Status: Null} - } - default: - if originalSrc, ok := underlyingBytesType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Bytea", value) - } - - return nil -} - -func (dst Bytea) Get() interface{} { - switch dst.Status { - case Present: - return dst.Bytes - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Bytea) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *[]byte: - buf := make([]byte, len(src.Bytes)) - copy(buf, src.Bytes) - *v = buf - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -// DecodeText only supports the hex format. This has been the default since -// PostgreSQL 9.0. -func (dst *Bytea) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Bytea{Status: Null} - return nil - } - - if len(src) < 2 || src[0] != '\\' || src[1] != 'x' { - return fmt.Errorf("invalid hex format") - } - - buf := make([]byte, (len(src)-2)/2) - _, err := hex.Decode(buf, src[2:]) - if err != nil { - return err - } - - *dst = Bytea{Bytes: buf, Status: Present} - return nil -} - -func (dst *Bytea) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Bytea{Status: Null} - return nil - } - - *dst = Bytea{Bytes: src, Status: Present} - return nil -} - -func (src Bytea) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, `\x`...) - buf = append(buf, hex.EncodeToString(src.Bytes)...) - return buf, nil -} - -func (src Bytea) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.Bytes...), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Bytea) Scan(src interface{}) error { - if src == nil { - *dst = Bytea{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - buf := make([]byte, len(src)) - copy(buf, src) - *dst = Bytea{Bytes: buf, Status: Present} - return nil - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Bytea) Value() (driver.Value, error) { - switch src.Status { - case Present: - return src.Bytes, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} diff --git a/vendor/github.com/jackc/pgtype/bytea_array.go b/vendor/github.com/jackc/pgtype/bytea_array.go deleted file mode 100644 index 69d1ceb9..00000000 --- a/vendor/github.com/jackc/pgtype/bytea_array.go +++ /dev/null @@ -1,489 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type ByteaArray struct { - Elements []Bytea - Dimensions []ArrayDimension - Status Status -} - -func (dst *ByteaArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = ByteaArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case [][]byte: - if value == nil { - *dst = ByteaArray{Status: Null} - } else if len(value) == 0 { - *dst = ByteaArray{Status: Present} - } else { - elements := make([]Bytea, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = ByteaArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Bytea: - if value == nil { - *dst = ByteaArray{Status: Null} - } else if len(value) == 0 { - *dst = ByteaArray{Status: Present} - } else { - *dst = ByteaArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = ByteaArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for ByteaArray", src) - } - if elementsLength == 0 { - *dst = ByteaArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to ByteaArray", src) - } - - *dst = ByteaArray{ - Elements: make([]Bytea, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Bytea, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to ByteaArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *ByteaArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to ByteaArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in ByteaArray", err) - } - index++ - - return index, nil -} - -func (dst ByteaArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *ByteaArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[][]byte: - *v = make([][]byte, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *ByteaArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from ByteaArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from ByteaArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *ByteaArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = ByteaArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Bytea - - if len(uta.Elements) > 0 { - elements = make([]Bytea, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Bytea - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = ByteaArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *ByteaArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = ByteaArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = ByteaArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Bytea, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = ByteaArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src ByteaArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src ByteaArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("bytea"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "bytea") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *ByteaArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src ByteaArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/cid.go b/vendor/github.com/jackc/pgtype/cid.go deleted file mode 100644 index b944748c..00000000 --- a/vendor/github.com/jackc/pgtype/cid.go +++ /dev/null @@ -1,61 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" -) - -// CID is PostgreSQL's Command Identifier type. -// -// When one does -// -// select cmin, cmax, * from some_table; -// -// it is the data type of the cmin and cmax hidden system columns. -// -// It is currently implemented as an unsigned four byte integer. -// Its definition can be found in src/include/c.h as CommandId -// in the PostgreSQL sources. -type CID pguint32 - -// Set converts from src to dst. Note that as CID is not a general -// number type Set does not do automatic type conversion as other number -// types do. -func (dst *CID) Set(src interface{}) error { - return (*pguint32)(dst).Set(src) -} - -func (dst CID) Get() interface{} { - return (pguint32)(dst).Get() -} - -// AssignTo assigns from src to dst. Note that as CID is not a general number -// type AssignTo does not do automatic type conversion as other number types do. -func (src *CID) AssignTo(dst interface{}) error { - return (*pguint32)(src).AssignTo(dst) -} - -func (dst *CID) DecodeText(ci *ConnInfo, src []byte) error { - return (*pguint32)(dst).DecodeText(ci, src) -} - -func (dst *CID) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*pguint32)(dst).DecodeBinary(ci, src) -} - -func (src CID) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (pguint32)(src).EncodeText(ci, buf) -} - -func (src CID) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (pguint32)(src).EncodeBinary(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *CID) Scan(src interface{}) error { - return (*pguint32)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src CID) Value() (driver.Value, error) { - return (pguint32)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/cidr.go b/vendor/github.com/jackc/pgtype/cidr.go deleted file mode 100644 index 7c562cf2..00000000 --- a/vendor/github.com/jackc/pgtype/cidr.go +++ /dev/null @@ -1,43 +0,0 @@ -package pgtype - -import "database/sql/driver" - -type CIDR Inet - -func (dst *CIDR) Set(src interface{}) error { - return (*Inet)(dst).Set(src) -} - -func (dst CIDR) Get() interface{} { - return (Inet)(dst).Get() -} - -func (src *CIDR) AssignTo(dst interface{}) error { - return (*Inet)(src).AssignTo(dst) -} - -func (dst *CIDR) DecodeText(ci *ConnInfo, src []byte) error { - return (*Inet)(dst).DecodeText(ci, src) -} - -func (dst *CIDR) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*Inet)(dst).DecodeBinary(ci, src) -} - -func (src CIDR) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Inet)(src).EncodeText(ci, buf) -} - -func (src CIDR) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Inet)(src).EncodeBinary(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *CIDR) Scan(src interface{}) error { - return (*Inet)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src CIDR) Value() (driver.Value, error) { - return (Inet)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/cidr_array.go b/vendor/github.com/jackc/pgtype/cidr_array.go deleted file mode 100644 index 783c599c..00000000 --- a/vendor/github.com/jackc/pgtype/cidr_array.go +++ /dev/null @@ -1,546 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "net" - "reflect" - - "github.com/jackc/pgio" -) - -type CIDRArray struct { - Elements []CIDR - Dimensions []ArrayDimension - Status Status -} - -func (dst *CIDRArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = CIDRArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []*net.IPNet: - if value == nil { - *dst = CIDRArray{Status: Null} - } else if len(value) == 0 { - *dst = CIDRArray{Status: Present} - } else { - elements := make([]CIDR, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = CIDRArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []net.IP: - if value == nil { - *dst = CIDRArray{Status: Null} - } else if len(value) == 0 { - *dst = CIDRArray{Status: Present} - } else { - elements := make([]CIDR, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = CIDRArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*net.IP: - if value == nil { - *dst = CIDRArray{Status: Null} - } else if len(value) == 0 { - *dst = CIDRArray{Status: Present} - } else { - elements := make([]CIDR, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = CIDRArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []CIDR: - if value == nil { - *dst = CIDRArray{Status: Null} - } else if len(value) == 0 { - *dst = CIDRArray{Status: Present} - } else { - *dst = CIDRArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = CIDRArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for CIDRArray", src) - } - if elementsLength == 0 { - *dst = CIDRArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to CIDRArray", src) - } - - *dst = CIDRArray{ - Elements: make([]CIDR, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]CIDR, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to CIDRArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *CIDRArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to CIDRArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in CIDRArray", err) - } - index++ - - return index, nil -} - -func (dst CIDRArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *CIDRArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]*net.IPNet: - *v = make([]*net.IPNet, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]net.IP: - *v = make([]net.IP, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*net.IP: - *v = make([]*net.IP, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *CIDRArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from CIDRArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from CIDRArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *CIDRArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = CIDRArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []CIDR - - if len(uta.Elements) > 0 { - elements = make([]CIDR, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem CIDR - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = CIDRArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *CIDRArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = CIDRArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = CIDRArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]CIDR, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = CIDRArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src CIDRArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src CIDRArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("cidr"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "cidr") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *CIDRArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src CIDRArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/circle.go b/vendor/github.com/jackc/pgtype/circle.go deleted file mode 100644 index 4279650e..00000000 --- a/vendor/github.com/jackc/pgtype/circle.go +++ /dev/null @@ -1,150 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -type Circle struct { - P Vec2 - R float64 - Status Status -} - -func (dst *Circle) Set(src interface{}) error { - return fmt.Errorf("cannot convert %v to Circle", src) -} - -func (dst Circle) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Circle) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Circle) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Circle{Status: Null} - return nil - } - - if len(src) < 9 { - return fmt.Errorf("invalid length for Circle: %v", len(src)) - } - - str := string(src[2:]) - end := strings.IndexByte(str, ',') - x, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+1:] - end = strings.IndexByte(str, ')') - - y, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+2 : len(str)-1] - - r, err := strconv.ParseFloat(str, 64) - if err != nil { - return err - } - - *dst = Circle{P: Vec2{x, y}, R: r, Status: Present} - return nil -} - -func (dst *Circle) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Circle{Status: Null} - return nil - } - - if len(src) != 24 { - return fmt.Errorf("invalid length for Circle: %v", len(src)) - } - - x := binary.BigEndian.Uint64(src) - y := binary.BigEndian.Uint64(src[8:]) - r := binary.BigEndian.Uint64(src[16:]) - - *dst = Circle{ - P: Vec2{math.Float64frombits(x), math.Float64frombits(y)}, - R: math.Float64frombits(r), - Status: Present, - } - return nil -} - -func (src Circle) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, fmt.Sprintf(`<(%s,%s),%s>`, - strconv.FormatFloat(src.P.X, 'f', -1, 64), - strconv.FormatFloat(src.P.Y, 'f', -1, 64), - strconv.FormatFloat(src.R, 'f', -1, 64), - )...) - - return buf, nil -} - -func (src Circle) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendUint64(buf, math.Float64bits(src.P.X)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.P.Y)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.R)) - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Circle) Scan(src interface{}) error { - if src == nil { - *dst = Circle{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Circle) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/composite_fields.go b/vendor/github.com/jackc/pgtype/composite_fields.go deleted file mode 100644 index b6d09fcf..00000000 --- a/vendor/github.com/jackc/pgtype/composite_fields.go +++ /dev/null @@ -1,107 +0,0 @@ -package pgtype - -import "fmt" - -// CompositeFields scans the fields of a composite type into the elements of the CompositeFields value. To scan a -// nullable value use a *CompositeFields. It will be set to nil in case of null. -// -// CompositeFields implements EncodeBinary and EncodeText. However, functionality is limited due to CompositeFields not -// knowing the PostgreSQL schema of the composite type. Prefer using a registered CompositeType. -type CompositeFields []interface{} - -func (cf CompositeFields) DecodeBinary(ci *ConnInfo, src []byte) error { - if len(cf) == 0 { - return fmt.Errorf("cannot decode into empty CompositeFields") - } - - if src == nil { - return fmt.Errorf("cannot decode unexpected null into CompositeFields") - } - - scanner := NewCompositeBinaryScanner(ci, src) - - for _, f := range cf { - scanner.ScanValue(f) - } - - if scanner.Err() != nil { - return scanner.Err() - } - - return nil -} - -func (cf CompositeFields) DecodeText(ci *ConnInfo, src []byte) error { - if len(cf) == 0 { - return fmt.Errorf("cannot decode into empty CompositeFields") - } - - if src == nil { - return fmt.Errorf("cannot decode unexpected null into CompositeFields") - } - - scanner := NewCompositeTextScanner(ci, src) - - for _, f := range cf { - scanner.ScanValue(f) - } - - if scanner.Err() != nil { - return scanner.Err() - } - - return nil -} - -// EncodeText encodes composite fields into the text format. Prefer registering a CompositeType to using -// CompositeFields to encode directly. -func (cf CompositeFields) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - b := NewCompositeTextBuilder(ci, buf) - - for _, f := range cf { - if textEncoder, ok := f.(TextEncoder); ok { - b.AppendEncoder(textEncoder) - } else { - b.AppendValue(f) - } - } - - return b.Finish() -} - -// EncodeBinary encodes composite fields into the binary format. Unlike CompositeType the schema of the destination is -// unknown. Prefer registering a CompositeType to using CompositeFields to encode directly. Because the binary -// composite format requires the OID of each field to be specified the only types that will work are those known to -// ConnInfo. -// -// In particular: -// -// * Nil cannot be used because there is no way to determine what type it. -// * Integer types must be exact matches. e.g. A Go int32 into a PostgreSQL bigint will fail. -// * No dereferencing will be done. e.g. *Text must be used instead of Text. -func (cf CompositeFields) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - b := NewCompositeBinaryBuilder(ci, buf) - - for _, f := range cf { - dt, ok := ci.DataTypeForValue(f) - if !ok { - return nil, fmt.Errorf("Unknown OID for %#v", f) - } - - if binaryEncoder, ok := f.(BinaryEncoder); ok { - b.AppendEncoder(dt.OID, binaryEncoder) - } else { - err := dt.Value.Set(f) - if err != nil { - return nil, err - } - if binaryEncoder, ok := dt.Value.(BinaryEncoder); ok { - b.AppendEncoder(dt.OID, binaryEncoder) - } else { - return nil, fmt.Errorf("Cannot encode binary format for %v", f) - } - } - } - - return b.Finish() -} diff --git a/vendor/github.com/jackc/pgtype/composite_type.go b/vendor/github.com/jackc/pgtype/composite_type.go deleted file mode 100644 index 32e0aa26..00000000 --- a/vendor/github.com/jackc/pgtype/composite_type.go +++ /dev/null @@ -1,682 +0,0 @@ -package pgtype - -import ( - "encoding/binary" - "errors" - "fmt" - "reflect" - "strings" - - "github.com/jackc/pgio" -) - -type CompositeTypeField struct { - Name string - OID uint32 -} - -type CompositeType struct { - status Status - - typeName string - - fields []CompositeTypeField - valueTranscoders []ValueTranscoder -} - -// NewCompositeType creates a CompositeType from fields and ci. ci is used to find the ValueTranscoders used -// for fields. All field OIDs must be previously registered in ci. -func NewCompositeType(typeName string, fields []CompositeTypeField, ci *ConnInfo) (*CompositeType, error) { - valueTranscoders := make([]ValueTranscoder, len(fields)) - - for i := range fields { - dt, ok := ci.DataTypeForOID(fields[i].OID) - if !ok { - return nil, fmt.Errorf("no data type registered for oid: %d", fields[i].OID) - } - - value := NewValue(dt.Value) - valueTranscoder, ok := value.(ValueTranscoder) - if !ok { - return nil, fmt.Errorf("data type for oid does not implement ValueTranscoder: %d", fields[i].OID) - } - - valueTranscoders[i] = valueTranscoder - } - - return &CompositeType{typeName: typeName, fields: fields, valueTranscoders: valueTranscoders}, nil -} - -// NewCompositeTypeValues creates a CompositeType from fields and values. fields and values must have the same length. -// Prefer NewCompositeType unless overriding the transcoding of fields is required. -func NewCompositeTypeValues(typeName string, fields []CompositeTypeField, values []ValueTranscoder) (*CompositeType, error) { - if len(fields) != len(values) { - return nil, errors.New("fields and valueTranscoders must have same length") - } - - return &CompositeType{typeName: typeName, fields: fields, valueTranscoders: values}, nil -} - -func (src CompositeType) Get() interface{} { - switch src.status { - case Present: - results := make(map[string]interface{}, len(src.valueTranscoders)) - for i := range src.valueTranscoders { - results[src.fields[i].Name] = src.valueTranscoders[i].Get() - } - return results - case Null: - return nil - default: - return src.status - } -} - -func (ct *CompositeType) NewTypeValue() Value { - a := &CompositeType{ - typeName: ct.typeName, - fields: ct.fields, - valueTranscoders: make([]ValueTranscoder, len(ct.valueTranscoders)), - } - - for i := range ct.valueTranscoders { - a.valueTranscoders[i] = NewValue(ct.valueTranscoders[i]).(ValueTranscoder) - } - - return a -} - -func (ct *CompositeType) TypeName() string { - return ct.typeName -} - -func (ct *CompositeType) Fields() []CompositeTypeField { - return ct.fields -} - -func (dst *CompositeType) Set(src interface{}) error { - if src == nil { - dst.status = Null - return nil - } - - switch value := src.(type) { - case []interface{}: - if len(value) != len(dst.valueTranscoders) { - return fmt.Errorf("Number of fields don't match. CompositeType has %d fields", len(dst.valueTranscoders)) - } - for i, v := range value { - if err := dst.valueTranscoders[i].Set(v); err != nil { - return err - } - } - dst.status = Present - case *[]interface{}: - if value == nil { - dst.status = Null - return nil - } - return dst.Set(*value) - default: - return fmt.Errorf("Can not convert %v to Composite", src) - } - - return nil -} - -// AssignTo should never be called on composite value directly -func (src CompositeType) AssignTo(dst interface{}) error { - switch src.status { - case Present: - switch v := dst.(type) { - case []interface{}: - if len(v) != len(src.valueTranscoders) { - return fmt.Errorf("Number of fields don't match. CompositeType has %d fields", len(src.valueTranscoders)) - } - for i := range src.valueTranscoders { - if v[i] == nil { - continue - } - - err := assignToOrSet(src.valueTranscoders[i], v[i]) - if err != nil { - return fmt.Errorf("unable to assign to dst[%d]: %v", i, err) - } - } - return nil - case *[]interface{}: - return src.AssignTo(*v) - default: - if isPtrStruct, err := src.assignToPtrStruct(dst); isPtrStruct { - return err - } - - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func assignToOrSet(src Value, dst interface{}) error { - assignToErr := src.AssignTo(dst) - if assignToErr != nil { - // Try to use get / set instead -- this avoids every type having to be able to AssignTo type of self. - setSucceeded := false - if setter, ok := dst.(Value); ok { - err := setter.Set(src.Get()) - setSucceeded = err == nil - } - if !setSucceeded { - return assignToErr - } - } - - return nil -} - -func (src CompositeType) assignToPtrStruct(dst interface{}) (bool, error) { - dstValue := reflect.ValueOf(dst) - if dstValue.Kind() != reflect.Ptr { - return false, nil - } - - if dstValue.IsNil() { - return false, nil - } - - dstElemValue := dstValue.Elem() - dstElemType := dstElemValue.Type() - - if dstElemType.Kind() != reflect.Struct { - return false, nil - } - - exportedFields := make([]int, 0, dstElemType.NumField()) - for i := 0; i < dstElemType.NumField(); i++ { - sf := dstElemType.Field(i) - if sf.PkgPath == "" { - exportedFields = append(exportedFields, i) - } - } - - if len(exportedFields) != len(src.valueTranscoders) { - return false, nil - } - - for i := range exportedFields { - err := assignToOrSet(src.valueTranscoders[i], dstElemValue.Field(exportedFields[i]).Addr().Interface()) - if err != nil { - return true, fmt.Errorf("unable to assign to field %s: %v", dstElemType.Field(exportedFields[i]).Name, err) - } - } - - return true, nil -} - -func (src CompositeType) EncodeBinary(ci *ConnInfo, buf []byte) (newBuf []byte, err error) { - switch src.status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - b := NewCompositeBinaryBuilder(ci, buf) - for i := range src.valueTranscoders { - b.AppendEncoder(src.fields[i].OID, src.valueTranscoders[i]) - } - - return b.Finish() -} - -// DecodeBinary implements BinaryDecoder interface. -// Opposite to Record, fields in a composite act as a "schema" -// and decoding fails if SQL value can't be assigned due to -// type mismatch -func (dst *CompositeType) DecodeBinary(ci *ConnInfo, buf []byte) error { - if buf == nil { - dst.status = Null - return nil - } - - scanner := NewCompositeBinaryScanner(ci, buf) - - for _, f := range dst.valueTranscoders { - scanner.ScanDecoder(f) - } - - if scanner.Err() != nil { - return scanner.Err() - } - - dst.status = Present - - return nil -} - -func (dst *CompositeType) DecodeText(ci *ConnInfo, buf []byte) error { - if buf == nil { - dst.status = Null - return nil - } - - scanner := NewCompositeTextScanner(ci, buf) - - for _, f := range dst.valueTranscoders { - scanner.ScanDecoder(f) - } - - if scanner.Err() != nil { - return scanner.Err() - } - - dst.status = Present - - return nil -} - -func (src CompositeType) EncodeText(ci *ConnInfo, buf []byte) (newBuf []byte, err error) { - switch src.status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - b := NewCompositeTextBuilder(ci, buf) - for _, f := range src.valueTranscoders { - b.AppendEncoder(f) - } - - return b.Finish() -} - -type CompositeBinaryScanner struct { - ci *ConnInfo - rp int - src []byte - - fieldCount int32 - fieldBytes []byte - fieldOID uint32 - err error -} - -// NewCompositeBinaryScanner a scanner over a binary encoded composite balue. -func NewCompositeBinaryScanner(ci *ConnInfo, src []byte) *CompositeBinaryScanner { - rp := 0 - if len(src[rp:]) < 4 { - return &CompositeBinaryScanner{err: fmt.Errorf("Record incomplete %v", src)} - } - - fieldCount := int32(binary.BigEndian.Uint32(src[rp:])) - rp += 4 - - return &CompositeBinaryScanner{ - ci: ci, - rp: rp, - src: src, - fieldCount: fieldCount, - } -} - -// ScanDecoder calls Next and decodes the result with d. -func (cfs *CompositeBinaryScanner) ScanDecoder(d BinaryDecoder) { - if cfs.err != nil { - return - } - - if cfs.Next() { - cfs.err = d.DecodeBinary(cfs.ci, cfs.fieldBytes) - } else { - cfs.err = errors.New("read past end of composite") - } -} - -// ScanDecoder calls Next and scans the result into d. -func (cfs *CompositeBinaryScanner) ScanValue(d interface{}) { - if cfs.err != nil { - return - } - - if cfs.Next() { - cfs.err = cfs.ci.Scan(cfs.OID(), BinaryFormatCode, cfs.Bytes(), d) - } else { - cfs.err = errors.New("read past end of composite") - } -} - -// Next advances the scanner to the next field. It returns false after the last field is read or an error occurs. After -// Next returns false, the Err method can be called to check if any errors occurred. -func (cfs *CompositeBinaryScanner) Next() bool { - if cfs.err != nil { - return false - } - - if cfs.rp == len(cfs.src) { - return false - } - - if len(cfs.src[cfs.rp:]) < 8 { - cfs.err = fmt.Errorf("Record incomplete %v", cfs.src) - return false - } - cfs.fieldOID = binary.BigEndian.Uint32(cfs.src[cfs.rp:]) - cfs.rp += 4 - - fieldLen := int(int32(binary.BigEndian.Uint32(cfs.src[cfs.rp:]))) - cfs.rp += 4 - - if fieldLen >= 0 { - if len(cfs.src[cfs.rp:]) < fieldLen { - cfs.err = fmt.Errorf("Record incomplete rp=%d src=%v", cfs.rp, cfs.src) - return false - } - cfs.fieldBytes = cfs.src[cfs.rp : cfs.rp+fieldLen] - cfs.rp += fieldLen - } else { - cfs.fieldBytes = nil - } - - return true -} - -func (cfs *CompositeBinaryScanner) FieldCount() int { - return int(cfs.fieldCount) -} - -// Bytes returns the bytes of the field most recently read by Scan(). -func (cfs *CompositeBinaryScanner) Bytes() []byte { - return cfs.fieldBytes -} - -// OID returns the OID of the field most recently read by Scan(). -func (cfs *CompositeBinaryScanner) OID() uint32 { - return cfs.fieldOID -} - -// Err returns any error encountered by the scanner. -func (cfs *CompositeBinaryScanner) Err() error { - return cfs.err -} - -type CompositeTextScanner struct { - ci *ConnInfo - rp int - src []byte - - fieldBytes []byte - err error -} - -// NewCompositeTextScanner a scanner over a text encoded composite value. -func NewCompositeTextScanner(ci *ConnInfo, src []byte) *CompositeTextScanner { - if len(src) < 2 { - return &CompositeTextScanner{err: fmt.Errorf("Record incomplete %v", src)} - } - - if src[0] != '(' { - return &CompositeTextScanner{err: fmt.Errorf("composite text format must start with '('")} - } - - if src[len(src)-1] != ')' { - return &CompositeTextScanner{err: fmt.Errorf("composite text format must end with ')'")} - } - - return &CompositeTextScanner{ - ci: ci, - rp: 1, - src: src, - } -} - -// ScanDecoder calls Next and decodes the result with d. -func (cfs *CompositeTextScanner) ScanDecoder(d TextDecoder) { - if cfs.err != nil { - return - } - - if cfs.Next() { - cfs.err = d.DecodeText(cfs.ci, cfs.fieldBytes) - } else { - cfs.err = errors.New("read past end of composite") - } -} - -// ScanDecoder calls Next and scans the result into d. -func (cfs *CompositeTextScanner) ScanValue(d interface{}) { - if cfs.err != nil { - return - } - - if cfs.Next() { - cfs.err = cfs.ci.Scan(0, TextFormatCode, cfs.Bytes(), d) - } else { - cfs.err = errors.New("read past end of composite") - } -} - -// Next advances the scanner to the next field. It returns false after the last field is read or an error occurs. After -// Next returns false, the Err method can be called to check if any errors occurred. -func (cfs *CompositeTextScanner) Next() bool { - if cfs.err != nil { - return false - } - - if cfs.rp == len(cfs.src) { - return false - } - - switch cfs.src[cfs.rp] { - case ',', ')': // null - cfs.rp++ - cfs.fieldBytes = nil - return true - case '"': // quoted value - cfs.rp++ - cfs.fieldBytes = make([]byte, 0, 16) - for { - ch := cfs.src[cfs.rp] - - if ch == '"' { - cfs.rp++ - if cfs.src[cfs.rp] == '"' { - cfs.fieldBytes = append(cfs.fieldBytes, '"') - cfs.rp++ - } else { - break - } - } else if ch == '\\' { - cfs.rp++ - cfs.fieldBytes = append(cfs.fieldBytes, cfs.src[cfs.rp]) - cfs.rp++ - } else { - cfs.fieldBytes = append(cfs.fieldBytes, ch) - cfs.rp++ - } - } - cfs.rp++ - return true - default: // unquoted value - start := cfs.rp - for { - ch := cfs.src[cfs.rp] - if ch == ',' || ch == ')' { - break - } - cfs.rp++ - } - cfs.fieldBytes = cfs.src[start:cfs.rp] - cfs.rp++ - return true - } -} - -// Bytes returns the bytes of the field most recently read by Scan(). -func (cfs *CompositeTextScanner) Bytes() []byte { - return cfs.fieldBytes -} - -// Err returns any error encountered by the scanner. -func (cfs *CompositeTextScanner) Err() error { - return cfs.err -} - -type CompositeBinaryBuilder struct { - ci *ConnInfo - buf []byte - startIdx int - fieldCount uint32 - err error -} - -func NewCompositeBinaryBuilder(ci *ConnInfo, buf []byte) *CompositeBinaryBuilder { - startIdx := len(buf) - buf = append(buf, 0, 0, 0, 0) // allocate room for number of fields - return &CompositeBinaryBuilder{ci: ci, buf: buf, startIdx: startIdx} -} - -func (b *CompositeBinaryBuilder) AppendValue(oid uint32, field interface{}) { - if b.err != nil { - return - } - - dt, ok := b.ci.DataTypeForOID(oid) - if !ok { - b.err = fmt.Errorf("unknown data type for OID: %d", oid) - return - } - - err := dt.Value.Set(field) - if err != nil { - b.err = err - return - } - - binaryEncoder, ok := dt.Value.(BinaryEncoder) - if !ok { - b.err = fmt.Errorf("unable to encode binary for OID: %d", oid) - return - } - - b.AppendEncoder(oid, binaryEncoder) -} - -func (b *CompositeBinaryBuilder) AppendEncoder(oid uint32, field BinaryEncoder) { - if b.err != nil { - return - } - - b.buf = pgio.AppendUint32(b.buf, oid) - lengthPos := len(b.buf) - b.buf = pgio.AppendInt32(b.buf, -1) - fieldBuf, err := field.EncodeBinary(b.ci, b.buf) - if err != nil { - b.err = err - return - } - if fieldBuf != nil { - binary.BigEndian.PutUint32(fieldBuf[lengthPos:], uint32(len(fieldBuf)-len(b.buf))) - b.buf = fieldBuf - } - - b.fieldCount++ -} - -func (b *CompositeBinaryBuilder) Finish() ([]byte, error) { - if b.err != nil { - return nil, b.err - } - - binary.BigEndian.PutUint32(b.buf[b.startIdx:], b.fieldCount) - return b.buf, nil -} - -type CompositeTextBuilder struct { - ci *ConnInfo - buf []byte - startIdx int - fieldCount uint32 - err error - fieldBuf [32]byte -} - -func NewCompositeTextBuilder(ci *ConnInfo, buf []byte) *CompositeTextBuilder { - buf = append(buf, '(') // allocate room for number of fields - return &CompositeTextBuilder{ci: ci, buf: buf} -} - -func (b *CompositeTextBuilder) AppendValue(field interface{}) { - if b.err != nil { - return - } - - if field == nil { - b.buf = append(b.buf, ',') - return - } - - dt, ok := b.ci.DataTypeForValue(field) - if !ok { - b.err = fmt.Errorf("unknown data type for field: %v", field) - return - } - - err := dt.Value.Set(field) - if err != nil { - b.err = err - return - } - - textEncoder, ok := dt.Value.(TextEncoder) - if !ok { - b.err = fmt.Errorf("unable to encode text for value: %v", field) - return - } - - b.AppendEncoder(textEncoder) -} - -func (b *CompositeTextBuilder) AppendEncoder(field TextEncoder) { - if b.err != nil { - return - } - - fieldBuf, err := field.EncodeText(b.ci, b.fieldBuf[0:0]) - if err != nil { - b.err = err - return - } - if fieldBuf != nil { - b.buf = append(b.buf, quoteCompositeFieldIfNeeded(string(fieldBuf))...) - } - - b.buf = append(b.buf, ',') -} - -func (b *CompositeTextBuilder) Finish() ([]byte, error) { - if b.err != nil { - return nil, b.err - } - - b.buf[len(b.buf)-1] = ')' - return b.buf, nil -} - -var quoteCompositeReplacer = strings.NewReplacer(`\`, `\\`, `"`, `\"`) - -func quoteCompositeField(src string) string { - return `"` + quoteCompositeReplacer.Replace(src) + `"` -} - -func quoteCompositeFieldIfNeeded(src string) string { - if src == "" || src[0] == ' ' || src[len(src)-1] == ' ' || strings.ContainsAny(src, `(),"\`) { - return quoteCompositeField(src) - } - return src -} diff --git a/vendor/github.com/jackc/pgtype/convert.go b/vendor/github.com/jackc/pgtype/convert.go deleted file mode 100644 index 377fe3ea..00000000 --- a/vendor/github.com/jackc/pgtype/convert.go +++ /dev/null @@ -1,476 +0,0 @@ -package pgtype - -import ( - "database/sql" - "fmt" - "math" - "reflect" - "time" -) - -const ( - maxUint = ^uint(0) - maxInt = int(maxUint >> 1) - minInt = -maxInt - 1 -) - -// underlyingNumberType gets the underlying type that can be converted to Int2, Int4, Int8, Float4, or Float8 -func underlyingNumberType(val interface{}) (interface{}, bool) { - refVal := reflect.ValueOf(val) - - switch refVal.Kind() { - case reflect.Ptr: - if refVal.IsNil() { - return nil, false - } - convVal := refVal.Elem().Interface() - return convVal, true - case reflect.Int: - convVal := int(refVal.Int()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Int8: - convVal := int8(refVal.Int()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Int16: - convVal := int16(refVal.Int()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Int32: - convVal := int32(refVal.Int()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Int64: - convVal := int64(refVal.Int()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Uint: - convVal := uint(refVal.Uint()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Uint8: - convVal := uint8(refVal.Uint()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Uint16: - convVal := uint16(refVal.Uint()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Uint32: - convVal := uint32(refVal.Uint()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Uint64: - convVal := uint64(refVal.Uint()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Float32: - convVal := float32(refVal.Float()) - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.Float64: - convVal := refVal.Float() - return convVal, reflect.TypeOf(convVal) != refVal.Type() - case reflect.String: - convVal := refVal.String() - return convVal, reflect.TypeOf(convVal) != refVal.Type() - } - - return nil, false -} - -// underlyingBoolType gets the underlying type that can be converted to Bool -func underlyingBoolType(val interface{}) (interface{}, bool) { - refVal := reflect.ValueOf(val) - - switch refVal.Kind() { - case reflect.Ptr: - if refVal.IsNil() { - return nil, false - } - convVal := refVal.Elem().Interface() - return convVal, true - case reflect.Bool: - convVal := refVal.Bool() - return convVal, reflect.TypeOf(convVal) != refVal.Type() - } - - return nil, false -} - -// underlyingBytesType gets the underlying type that can be converted to []byte -func underlyingBytesType(val interface{}) (interface{}, bool) { - refVal := reflect.ValueOf(val) - - switch refVal.Kind() { - case reflect.Ptr: - if refVal.IsNil() { - return nil, false - } - convVal := refVal.Elem().Interface() - return convVal, true - case reflect.Slice: - if refVal.Type().Elem().Kind() == reflect.Uint8 { - convVal := refVal.Bytes() - return convVal, reflect.TypeOf(convVal) != refVal.Type() - } - } - - return nil, false -} - -// underlyingStringType gets the underlying type that can be converted to String -func underlyingStringType(val interface{}) (interface{}, bool) { - refVal := reflect.ValueOf(val) - - switch refVal.Kind() { - case reflect.Ptr: - if refVal.IsNil() { - return nil, false - } - convVal := refVal.Elem().Interface() - return convVal, true - case reflect.String: - convVal := refVal.String() - return convVal, reflect.TypeOf(convVal) != refVal.Type() - } - - return nil, false -} - -// underlyingPtrType dereferences a pointer -func underlyingPtrType(val interface{}) (interface{}, bool) { - refVal := reflect.ValueOf(val) - - switch refVal.Kind() { - case reflect.Ptr: - if refVal.IsNil() { - return nil, false - } - convVal := refVal.Elem().Interface() - return convVal, true - } - - return nil, false -} - -// underlyingTimeType gets the underlying type that can be converted to time.Time -func underlyingTimeType(val interface{}) (interface{}, bool) { - refVal := reflect.ValueOf(val) - - switch refVal.Kind() { - case reflect.Ptr: - if refVal.IsNil() { - return nil, false - } - convVal := refVal.Elem().Interface() - return convVal, true - } - - timeType := reflect.TypeOf(time.Time{}) - if refVal.Type().ConvertibleTo(timeType) { - return refVal.Convert(timeType).Interface(), true - } - - return nil, false -} - -// underlyingUUIDType gets the underlying type that can be converted to [16]byte -func underlyingUUIDType(val interface{}) (interface{}, bool) { - refVal := reflect.ValueOf(val) - - switch refVal.Kind() { - case reflect.Ptr: - if refVal.IsNil() { - return nil, false - } - convVal := refVal.Elem().Interface() - return convVal, true - } - - uuidType := reflect.TypeOf([16]byte{}) - if refVal.Type().ConvertibleTo(uuidType) { - return refVal.Convert(uuidType).Interface(), true - } - - return nil, false -} - -// underlyingSliceType gets the underlying slice type -func underlyingSliceType(val interface{}) (interface{}, bool) { - refVal := reflect.ValueOf(val) - - switch refVal.Kind() { - case reflect.Ptr: - if refVal.IsNil() { - return nil, false - } - convVal := refVal.Elem().Interface() - return convVal, true - case reflect.Slice: - baseSliceType := reflect.SliceOf(refVal.Type().Elem()) - if refVal.Type().ConvertibleTo(baseSliceType) { - convVal := refVal.Convert(baseSliceType) - return convVal.Interface(), reflect.TypeOf(convVal.Interface()) != refVal.Type() - } - } - - return nil, false -} - -func int64AssignTo(srcVal int64, srcStatus Status, dst interface{}) error { - if srcStatus == Present { - switch v := dst.(type) { - case *int: - if srcVal < int64(minInt) { - return fmt.Errorf("%d is less than minimum value for int", srcVal) - } else if srcVal > int64(maxInt) { - return fmt.Errorf("%d is greater than maximum value for int", srcVal) - } - *v = int(srcVal) - case *int8: - if srcVal < math.MinInt8 { - return fmt.Errorf("%d is less than minimum value for int8", srcVal) - } else if srcVal > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for int8", srcVal) - } - *v = int8(srcVal) - case *int16: - if srcVal < math.MinInt16 { - return fmt.Errorf("%d is less than minimum value for int16", srcVal) - } else if srcVal > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for int16", srcVal) - } - *v = int16(srcVal) - case *int32: - if srcVal < math.MinInt32 { - return fmt.Errorf("%d is less than minimum value for int32", srcVal) - } else if srcVal > math.MaxInt32 { - return fmt.Errorf("%d is greater than maximum value for int32", srcVal) - } - *v = int32(srcVal) - case *int64: - if srcVal < math.MinInt64 { - return fmt.Errorf("%d is less than minimum value for int64", srcVal) - } else if srcVal > math.MaxInt64 { - return fmt.Errorf("%d is greater than maximum value for int64", srcVal) - } - *v = int64(srcVal) - case *uint: - if srcVal < 0 { - return fmt.Errorf("%d is less than zero for uint", srcVal) - } else if uint64(srcVal) > uint64(maxUint) { - return fmt.Errorf("%d is greater than maximum value for uint", srcVal) - } - *v = uint(srcVal) - case *uint8: - if srcVal < 0 { - return fmt.Errorf("%d is less than zero for uint8", srcVal) - } else if srcVal > math.MaxUint8 { - return fmt.Errorf("%d is greater than maximum value for uint8", srcVal) - } - *v = uint8(srcVal) - case *uint16: - if srcVal < 0 { - return fmt.Errorf("%d is less than zero for uint32", srcVal) - } else if srcVal > math.MaxUint16 { - return fmt.Errorf("%d is greater than maximum value for uint16", srcVal) - } - *v = uint16(srcVal) - case *uint32: - if srcVal < 0 { - return fmt.Errorf("%d is less than zero for uint32", srcVal) - } else if srcVal > math.MaxUint32 { - return fmt.Errorf("%d is greater than maximum value for uint32", srcVal) - } - *v = uint32(srcVal) - case *uint64: - if srcVal < 0 { - return fmt.Errorf("%d is less than zero for uint64", srcVal) - } - *v = uint64(srcVal) - case sql.Scanner: - return v.Scan(srcVal) - default: - if v := reflect.ValueOf(dst); v.Kind() == reflect.Ptr { - el := v.Elem() - switch el.Kind() { - // if dst is a pointer to pointer, strip the pointer and try again - case reflect.Ptr: - if el.IsNil() { - // allocate destination - el.Set(reflect.New(el.Type().Elem())) - } - return int64AssignTo(srcVal, srcStatus, el.Interface()) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if el.OverflowInt(int64(srcVal)) { - return fmt.Errorf("cannot put %d into %T", srcVal, dst) - } - el.SetInt(int64(srcVal)) - return nil - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - if srcVal < 0 { - return fmt.Errorf("%d is less than zero for %T", srcVal, dst) - } - if el.OverflowUint(uint64(srcVal)) { - return fmt.Errorf("cannot put %d into %T", srcVal, dst) - } - el.SetUint(uint64(srcVal)) - return nil - } - } - return fmt.Errorf("cannot assign %v into %T", srcVal, dst) - } - return nil - } - - // if dst is a pointer to pointer and srcStatus is not Present, nil it out - if v := reflect.ValueOf(dst); v.Kind() == reflect.Ptr { - el := v.Elem() - if el.Kind() == reflect.Ptr { - el.Set(reflect.Zero(el.Type())) - return nil - } - } - - return fmt.Errorf("cannot assign %v %v into %T", srcVal, srcStatus, dst) -} - -func float64AssignTo(srcVal float64, srcStatus Status, dst interface{}) error { - if srcStatus == Present { - switch v := dst.(type) { - case *float32: - *v = float32(srcVal) - case *float64: - *v = srcVal - default: - if v := reflect.ValueOf(dst); v.Kind() == reflect.Ptr { - el := v.Elem() - switch el.Kind() { - // if dst is a type alias of a float32 or 64, set dst val - case reflect.Float32, reflect.Float64: - el.SetFloat(srcVal) - return nil - // if dst is a pointer to pointer, strip the pointer and try again - case reflect.Ptr: - if el.IsNil() { - // allocate destination - el.Set(reflect.New(el.Type().Elem())) - } - return float64AssignTo(srcVal, srcStatus, el.Interface()) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - i64 := int64(srcVal) - if float64(i64) == srcVal { - return int64AssignTo(i64, srcStatus, dst) - } - } - } - return fmt.Errorf("cannot assign %v into %T", srcVal, dst) - } - return nil - } - - // if dst is a pointer to pointer and srcStatus is not Present, nil it out - if v := reflect.ValueOf(dst); v.Kind() == reflect.Ptr { - el := v.Elem() - if el.Kind() == reflect.Ptr { - el.Set(reflect.Zero(el.Type())) - return nil - } - } - - return fmt.Errorf("cannot assign %v %v into %T", srcVal, srcStatus, dst) -} - -func NullAssignTo(dst interface{}) error { - dstPtr := reflect.ValueOf(dst) - - // AssignTo dst must always be a pointer - if dstPtr.Kind() != reflect.Ptr { - return &nullAssignmentError{dst: dst} - } - - dstVal := dstPtr.Elem() - - switch dstVal.Kind() { - case reflect.Ptr, reflect.Slice, reflect.Map: - dstVal.Set(reflect.Zero(dstVal.Type())) - return nil - } - - return &nullAssignmentError{dst: dst} -} - -var kindTypes map[reflect.Kind]reflect.Type - -func toInterface(dst reflect.Value, t reflect.Type) (interface{}, bool) { - nextDst := dst.Convert(t) - return nextDst.Interface(), dst.Type() != nextDst.Type() -} - -// GetAssignToDstType attempts to convert dst to something AssignTo can assign -// to. If dst is a pointer to pointer it allocates a value and returns the -// dereferences pointer. If dst is a named type such as *Foo where Foo is type -// Foo int16, it converts dst to *int16. -// -// GetAssignToDstType returns the converted dst and a bool representing if any -// change was made. -func GetAssignToDstType(dst interface{}) (interface{}, bool) { - dstPtr := reflect.ValueOf(dst) - - // AssignTo dst must always be a pointer - if dstPtr.Kind() != reflect.Ptr { - return nil, false - } - - dstVal := dstPtr.Elem() - - // if dst is a pointer to pointer, allocate space try again with the dereferenced pointer - if dstVal.Kind() == reflect.Ptr { - dstVal.Set(reflect.New(dstVal.Type().Elem())) - return dstVal.Interface(), true - } - - // if dst is pointer to a base type that has been renamed - if baseValType, ok := kindTypes[dstVal.Kind()]; ok { - return toInterface(dstPtr, reflect.PtrTo(baseValType)) - } - - if dstVal.Kind() == reflect.Slice { - if baseElemType, ok := kindTypes[dstVal.Type().Elem().Kind()]; ok { - return toInterface(dstPtr, reflect.PtrTo(reflect.SliceOf(baseElemType))) - } - } - - if dstVal.Kind() == reflect.Array { - if baseElemType, ok := kindTypes[dstVal.Type().Elem().Kind()]; ok { - return toInterface(dstPtr, reflect.PtrTo(reflect.ArrayOf(dstVal.Len(), baseElemType))) - } - } - - if dstVal.Kind() == reflect.Struct { - if dstVal.Type().NumField() == 1 && dstVal.Type().Field(0).Anonymous { - dstPtr = dstVal.Field(0).Addr() - nested := dstVal.Type().Field(0).Type - if nested.Kind() == reflect.Array { - if baseElemType, ok := kindTypes[nested.Elem().Kind()]; ok { - return toInterface(dstPtr, reflect.PtrTo(reflect.ArrayOf(nested.Len(), baseElemType))) - } - } - if _, ok := kindTypes[nested.Kind()]; ok && dstPtr.CanInterface() { - return dstPtr.Interface(), true - } - } - } - - return nil, false -} - -func init() { - kindTypes = map[reflect.Kind]reflect.Type{ - reflect.Bool: reflect.TypeOf(false), - reflect.Float32: reflect.TypeOf(float32(0)), - reflect.Float64: reflect.TypeOf(float64(0)), - reflect.Int: reflect.TypeOf(int(0)), - reflect.Int8: reflect.TypeOf(int8(0)), - reflect.Int16: reflect.TypeOf(int16(0)), - reflect.Int32: reflect.TypeOf(int32(0)), - reflect.Int64: reflect.TypeOf(int64(0)), - reflect.Uint: reflect.TypeOf(uint(0)), - reflect.Uint8: reflect.TypeOf(uint8(0)), - reflect.Uint16: reflect.TypeOf(uint16(0)), - reflect.Uint32: reflect.TypeOf(uint32(0)), - reflect.Uint64: reflect.TypeOf(uint64(0)), - reflect.String: reflect.TypeOf(""), - } -} diff --git a/vendor/github.com/jackc/pgtype/database_sql.go b/vendor/github.com/jackc/pgtype/database_sql.go deleted file mode 100644 index 9d1cf822..00000000 --- a/vendor/github.com/jackc/pgtype/database_sql.go +++ /dev/null @@ -1,41 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "errors" -) - -func DatabaseSQLValue(ci *ConnInfo, src Value) (interface{}, error) { - if valuer, ok := src.(driver.Valuer); ok { - return valuer.Value() - } - - if textEncoder, ok := src.(TextEncoder); ok { - buf, err := textEncoder.EncodeText(ci, nil) - if err != nil { - return nil, err - } - return string(buf), nil - } - - if binaryEncoder, ok := src.(BinaryEncoder); ok { - buf, err := binaryEncoder.EncodeBinary(ci, nil) - if err != nil { - return nil, err - } - return buf, nil - } - - return nil, errors.New("cannot convert to database/sql compatible value") -} - -func EncodeValueText(src TextEncoder) (interface{}, error) { - buf, err := src.EncodeText(nil, make([]byte, 0, 32)) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - return string(buf), err -} diff --git a/vendor/github.com/jackc/pgtype/date.go b/vendor/github.com/jackc/pgtype/date.go deleted file mode 100644 index e68abf01..00000000 --- a/vendor/github.com/jackc/pgtype/date.go +++ /dev/null @@ -1,324 +0,0 @@ -package pgtype - -import ( - "database/sql" - "database/sql/driver" - "encoding/binary" - "encoding/json" - "fmt" - "strings" - "time" - - "github.com/jackc/pgio" -) - -type Date struct { - Time time.Time - Status Status - InfinityModifier InfinityModifier -} - -const ( - negativeInfinityDayOffset = -2147483648 - infinityDayOffset = 2147483647 -) - -func (dst *Date) Set(src interface{}) error { - if src == nil { - *dst = Date{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - if value, ok := src.(interface{ Value() (driver.Value, error) }); ok { - v, err := value.Value() - if err != nil { - return fmt.Errorf("cannot get value %v for Date: %v", value, err) - } - return dst.Set(v) - } - - switch value := src.(type) { - case time.Time: - *dst = Date{Time: value, Status: Present} - case *time.Time: - if value == nil { - *dst = Date{Status: Null} - } else { - return dst.Set(*value) - } - case string: - return dst.DecodeText(nil, []byte(value)) - case *string: - if value == nil { - *dst = Date{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingTimeType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Date", value) - } - - return nil -} - -func (dst Date) Get() interface{} { - switch dst.Status { - case Present: - if dst.InfinityModifier != None { - return dst.InfinityModifier - } - return dst.Time - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Date) AssignTo(dst interface{}) error { - if scanner, ok := dst.(sql.Scanner); ok { - var err error - switch src.Status { - case Present: - if src.InfinityModifier != None { - err = scanner.Scan(src.InfinityModifier.String()) - } else { - err = scanner.Scan(src.Time) - } - case Null: - err = scanner.Scan(nil) - } - if err != nil { - return fmt.Errorf("unable assign %v to %T: %s", src, dst, err) - } - return nil - } - - switch src.Status { - case Present: - switch v := dst.(type) { - case *time.Time: - if src.InfinityModifier != None { - return fmt.Errorf("cannot assign %v to %T", src, dst) - } - *v = src.Time - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *Date) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Date{Status: Null} - return nil - } - - sbuf := string(src) - switch sbuf { - case "infinity": - *dst = Date{Status: Present, InfinityModifier: Infinity} - case "-infinity": - *dst = Date{Status: Present, InfinityModifier: -Infinity} - default: - if strings.HasSuffix(sbuf, " BC") { - t, err := time.ParseInLocation("2006-01-02", strings.TrimRight(sbuf, " BC"), time.UTC) - t2 := time.Date(1-t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), t.Location()) - if err != nil { - return err - } - *dst = Date{Time: t2, Status: Present} - return nil - } - t, err := time.ParseInLocation("2006-01-02", sbuf, time.UTC) - if err != nil { - return err - } - - *dst = Date{Time: t, Status: Present} - } - - return nil -} - -func (dst *Date) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Date{Status: Null} - return nil - } - - if len(src) != 4 { - return fmt.Errorf("invalid length for date: %v", len(src)) - } - - dayOffset := int32(binary.BigEndian.Uint32(src)) - - switch dayOffset { - case infinityDayOffset: - *dst = Date{Status: Present, InfinityModifier: Infinity} - case negativeInfinityDayOffset: - *dst = Date{Status: Present, InfinityModifier: -Infinity} - default: - t := time.Date(2000, 1, int(1+dayOffset), 0, 0, 0, 0, time.UTC) - *dst = Date{Time: t, Status: Present} - } - - return nil -} - -func (src Date) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var s string - - switch src.InfinityModifier { - case None: - s = src.Time.Format("2006-01-02") - case Infinity: - s = "infinity" - case NegativeInfinity: - s = "-infinity" - } - - return append(buf, s...), nil -} - -func (src Date) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var daysSinceDateEpoch int32 - switch src.InfinityModifier { - case None: - tUnix := time.Date(src.Time.Year(), src.Time.Month(), src.Time.Day(), 0, 0, 0, 0, time.UTC).Unix() - dateEpoch := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC).Unix() - - secSinceDateEpoch := tUnix - dateEpoch - daysSinceDateEpoch = int32(secSinceDateEpoch / 86400) - case Infinity: - daysSinceDateEpoch = infinityDayOffset - case NegativeInfinity: - daysSinceDateEpoch = negativeInfinityDayOffset - } - - return pgio.AppendInt32(buf, daysSinceDateEpoch), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Date) Scan(src interface{}) error { - if src == nil { - *dst = Date{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - case time.Time: - *dst = Date{Time: src, Status: Present} - return nil - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Date) Value() (driver.Value, error) { - switch src.Status { - case Present: - if src.InfinityModifier != None { - return src.InfinityModifier.String(), nil - } - return src.Time, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func (src Date) MarshalJSON() ([]byte, error) { - switch src.Status { - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - - if src.Status != Present { - return nil, errBadStatus - } - - var s string - - switch src.InfinityModifier { - case None: - s = src.Time.Format("2006-01-02") - case Infinity: - s = "infinity" - case NegativeInfinity: - s = "-infinity" - } - - return json.Marshal(s) -} - -func (dst *Date) UnmarshalJSON(b []byte) error { - var s *string - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - if s == nil { - *dst = Date{Status: Null} - return nil - } - - switch *s { - case "infinity": - *dst = Date{Status: Present, InfinityModifier: Infinity} - case "-infinity": - *dst = Date{Status: Present, InfinityModifier: -Infinity} - default: - t, err := time.ParseInLocation("2006-01-02", *s, time.UTC) - if err != nil { - return err - } - - *dst = Date{Time: t, Status: Present} - } - - return nil -} diff --git a/vendor/github.com/jackc/pgtype/date_array.go b/vendor/github.com/jackc/pgtype/date_array.go deleted file mode 100644 index 24152fa0..00000000 --- a/vendor/github.com/jackc/pgtype/date_array.go +++ /dev/null @@ -1,518 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - "time" - - "github.com/jackc/pgio" -) - -type DateArray struct { - Elements []Date - Dimensions []ArrayDimension - Status Status -} - -func (dst *DateArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = DateArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []time.Time: - if value == nil { - *dst = DateArray{Status: Null} - } else if len(value) == 0 { - *dst = DateArray{Status: Present} - } else { - elements := make([]Date, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = DateArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*time.Time: - if value == nil { - *dst = DateArray{Status: Null} - } else if len(value) == 0 { - *dst = DateArray{Status: Present} - } else { - elements := make([]Date, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = DateArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Date: - if value == nil { - *dst = DateArray{Status: Null} - } else if len(value) == 0 { - *dst = DateArray{Status: Present} - } else { - *dst = DateArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = DateArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for DateArray", src) - } - if elementsLength == 0 { - *dst = DateArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to DateArray", src) - } - - *dst = DateArray{ - Elements: make([]Date, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Date, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to DateArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *DateArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to DateArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in DateArray", err) - } - index++ - - return index, nil -} - -func (dst DateArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *DateArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]time.Time: - *v = make([]time.Time, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*time.Time: - *v = make([]*time.Time, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *DateArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from DateArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from DateArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *DateArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = DateArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Date - - if len(uta.Elements) > 0 { - elements = make([]Date, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Date - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = DateArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *DateArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = DateArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = DateArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Date, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = DateArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src DateArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src DateArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("date"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "date") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *DateArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src DateArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/daterange.go b/vendor/github.com/jackc/pgtype/daterange.go deleted file mode 100644 index 63164a5a..00000000 --- a/vendor/github.com/jackc/pgtype/daterange.go +++ /dev/null @@ -1,267 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" - - "github.com/jackc/pgio" -) - -type Daterange struct { - Lower Date - Upper Date - LowerType BoundType - UpperType BoundType - Status Status -} - -func (dst *Daterange) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Daterange{Status: Null} - return nil - } - - switch value := src.(type) { - case Daterange: - *dst = value - case *Daterange: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - default: - return fmt.Errorf("cannot convert %v to Daterange", src) - } - - return nil -} - -func (dst Daterange) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Daterange) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Daterange) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Daterange{Status: Null} - return nil - } - - utr, err := ParseUntypedTextRange(string(src)) - if err != nil { - return err - } - - *dst = Daterange{Status: Present} - - dst.LowerType = utr.LowerType - dst.UpperType = utr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeText(ci, []byte(utr.Lower)); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeText(ci, []byte(utr.Upper)); err != nil { - return err - } - } - - return nil -} - -func (dst *Daterange) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Daterange{Status: Null} - return nil - } - - ubr, err := ParseUntypedBinaryRange(src) - if err != nil { - return err - } - - *dst = Daterange{Status: Present} - - dst.LowerType = ubr.LowerType - dst.UpperType = ubr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeBinary(ci, ubr.Lower); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeBinary(ci, ubr.Upper); err != nil { - return err - } - } - - return nil -} - -func (src Daterange) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - switch src.LowerType { - case Exclusive, Unbounded: - buf = append(buf, '(') - case Inclusive: - buf = append(buf, '[') - case Empty: - return append(buf, "empty"...), nil - default: - return nil, fmt.Errorf("unknown lower bound type %v", src.LowerType) - } - - var err error - - if src.LowerType != Unbounded { - buf, err = src.Lower.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - } - - buf = append(buf, ',') - - if src.UpperType != Unbounded { - buf, err = src.Upper.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - } - - switch src.UpperType { - case Exclusive, Unbounded: - buf = append(buf, ')') - case Inclusive: - buf = append(buf, ']') - default: - return nil, fmt.Errorf("unknown upper bound type %v", src.UpperType) - } - - return buf, nil -} - -func (src Daterange) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var rangeType byte - switch src.LowerType { - case Inclusive: - rangeType |= lowerInclusiveMask - case Unbounded: - rangeType |= lowerUnboundedMask - case Exclusive: - case Empty: - return append(buf, emptyMask), nil - default: - return nil, fmt.Errorf("unknown LowerType: %v", src.LowerType) - } - - switch src.UpperType { - case Inclusive: - rangeType |= upperInclusiveMask - case Unbounded: - rangeType |= upperUnboundedMask - case Exclusive: - default: - return nil, fmt.Errorf("unknown UpperType: %v", src.UpperType) - } - - buf = append(buf, rangeType) - - var err error - - if src.LowerType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Lower.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - if src.UpperType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Upper.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Daterange) Scan(src interface{}) error { - if src == nil { - *dst = Daterange{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Daterange) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/enum_array.go b/vendor/github.com/jackc/pgtype/enum_array.go deleted file mode 100644 index 59b5a3ed..00000000 --- a/vendor/github.com/jackc/pgtype/enum_array.go +++ /dev/null @@ -1,428 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "fmt" - "reflect" -) - -type EnumArray struct { - Elements []GenericText - Dimensions []ArrayDimension - Status Status -} - -func (dst *EnumArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = EnumArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []string: - if value == nil { - *dst = EnumArray{Status: Null} - } else if len(value) == 0 { - *dst = EnumArray{Status: Present} - } else { - elements := make([]GenericText, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = EnumArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*string: - if value == nil { - *dst = EnumArray{Status: Null} - } else if len(value) == 0 { - *dst = EnumArray{Status: Present} - } else { - elements := make([]GenericText, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = EnumArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []GenericText: - if value == nil { - *dst = EnumArray{Status: Null} - } else if len(value) == 0 { - *dst = EnumArray{Status: Present} - } else { - *dst = EnumArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = EnumArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for EnumArray", src) - } - if elementsLength == 0 { - *dst = EnumArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to EnumArray", src) - } - - *dst = EnumArray{ - Elements: make([]GenericText, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]GenericText, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to EnumArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *EnumArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to EnumArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in EnumArray", err) - } - index++ - - return index, nil -} - -func (dst EnumArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *EnumArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]string: - *v = make([]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*string: - *v = make([]*string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *EnumArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from EnumArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from EnumArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *EnumArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = EnumArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []GenericText - - if len(uta.Elements) > 0 { - elements = make([]GenericText, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem GenericText - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = EnumArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (src EnumArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *EnumArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src EnumArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/enum_type.go b/vendor/github.com/jackc/pgtype/enum_type.go deleted file mode 100644 index 52657822..00000000 --- a/vendor/github.com/jackc/pgtype/enum_type.go +++ /dev/null @@ -1,168 +0,0 @@ -package pgtype - -import "fmt" - -// EnumType represents an enum type. While it implements Value, this is only in service of its type conversion duties -// when registered as a data type in a ConnType. It should not be used directly as a Value. -type EnumType struct { - value string - status Status - - typeName string // PostgreSQL type name - members []string // enum members - membersMap map[string]string // map to quickly lookup member and reuse string instead of allocating -} - -// NewEnumType initializes a new EnumType. It retains a read-only reference to members. members must not be changed. -func NewEnumType(typeName string, members []string) *EnumType { - et := &EnumType{typeName: typeName, members: members} - et.membersMap = make(map[string]string, len(members)) - for _, m := range members { - et.membersMap[m] = m - } - return et -} - -func (et *EnumType) NewTypeValue() Value { - return &EnumType{ - value: et.value, - status: et.status, - - typeName: et.typeName, - members: et.members, - membersMap: et.membersMap, - } -} - -func (et *EnumType) TypeName() string { - return et.typeName -} - -func (et *EnumType) Members() []string { - return et.members -} - -// Set assigns src to dst. Set purposely does not check that src is a member. This allows continued error free -// operation in the event the PostgreSQL enum type is modified during a connection. -func (dst *EnumType) Set(src interface{}) error { - if src == nil { - dst.status = Null - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case string: - dst.value = value - dst.status = Present - case *string: - if value == nil { - dst.status = Null - } else { - dst.value = *value - dst.status = Present - } - case []byte: - if value == nil { - dst.status = Null - } else { - dst.value = string(value) - dst.status = Present - } - default: - if originalSrc, ok := underlyingStringType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to enum %s", value, dst.typeName) - } - - return nil -} - -func (dst EnumType) Get() interface{} { - switch dst.status { - case Present: - return dst.value - case Null: - return nil - default: - return dst.status - } -} - -func (src *EnumType) AssignTo(dst interface{}) error { - switch src.status { - case Present: - switch v := dst.(type) { - case *string: - *v = src.value - return nil - case *[]byte: - *v = make([]byte, len(src.value)) - copy(*v, src.value) - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (EnumType) PreferredResultFormat() int16 { - return TextFormatCode -} - -func (dst *EnumType) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - dst.status = Null - return nil - } - - // Lookup the string in membersMap to avoid an allocation. - if s, found := dst.membersMap[string(src)]; found { - dst.value = s - } else { - // If an enum type is modified after the initial connection it is possible to receive an unexpected value. - // Gracefully handle this situation. Purposely NOT modifying members and membersMap to allow for sharing members - // and membersMap between connections. - dst.value = string(src) - } - dst.status = Present - - return nil -} - -func (dst *EnumType) DecodeBinary(ci *ConnInfo, src []byte) error { - return dst.DecodeText(ci, src) -} - -func (EnumType) PreferredParamFormat() int16 { - return TextFormatCode -} - -func (src EnumType) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.value...), nil -} - -func (src EnumType) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return src.EncodeText(ci, buf) -} diff --git a/vendor/github.com/jackc/pgtype/float4.go b/vendor/github.com/jackc/pgtype/float4.go deleted file mode 100644 index 89b9e8fa..00000000 --- a/vendor/github.com/jackc/pgtype/float4.go +++ /dev/null @@ -1,282 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - - "github.com/jackc/pgio" -) - -type Float4 struct { - Float float32 - Status Status -} - -func (dst *Float4) Set(src interface{}) error { - if src == nil { - *dst = Float4{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case float32: - *dst = Float4{Float: value, Status: Present} - case float64: - *dst = Float4{Float: float32(value), Status: Present} - case int8: - *dst = Float4{Float: float32(value), Status: Present} - case uint8: - *dst = Float4{Float: float32(value), Status: Present} - case int16: - *dst = Float4{Float: float32(value), Status: Present} - case uint16: - *dst = Float4{Float: float32(value), Status: Present} - case int32: - f32 := float32(value) - if int32(f32) == value { - *dst = Float4{Float: f32, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float32", value) - } - case uint32: - f32 := float32(value) - if uint32(f32) == value { - *dst = Float4{Float: f32, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float32", value) - } - case int64: - f32 := float32(value) - if int64(f32) == value { - *dst = Float4{Float: f32, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float32", value) - } - case uint64: - f32 := float32(value) - if uint64(f32) == value { - *dst = Float4{Float: f32, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float32", value) - } - case int: - f32 := float32(value) - if int(f32) == value { - *dst = Float4{Float: f32, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float32", value) - } - case uint: - f32 := float32(value) - if uint(f32) == value { - *dst = Float4{Float: f32, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float32", value) - } - case string: - num, err := strconv.ParseFloat(value, 32) - if err != nil { - return err - } - *dst = Float4{Float: float32(num), Status: Present} - case *float64: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *float32: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *int8: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint8: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *int16: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint16: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *int32: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint32: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *int64: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint64: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *int: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Float4{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingNumberType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Float8", value) - } - - return nil -} - -func (dst Float4) Get() interface{} { - switch dst.Status { - case Present: - return dst.Float - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Float4) AssignTo(dst interface{}) error { - return float64AssignTo(float64(src.Float), src.Status, dst) -} - -func (dst *Float4) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Float4{Status: Null} - return nil - } - - n, err := strconv.ParseFloat(string(src), 32) - if err != nil { - return err - } - - *dst = Float4{Float: float32(n), Status: Present} - return nil -} - -func (dst *Float4) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Float4{Status: Null} - return nil - } - - if len(src) != 4 { - return fmt.Errorf("invalid length for float4: %v", len(src)) - } - - n := int32(binary.BigEndian.Uint32(src)) - - *dst = Float4{Float: math.Float32frombits(uint32(n)), Status: Present} - return nil -} - -func (src Float4) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, strconv.FormatFloat(float64(src.Float), 'f', -1, 32)...) - return buf, nil -} - -func (src Float4) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendUint32(buf, math.Float32bits(src.Float)) - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Float4) Scan(src interface{}) error { - if src == nil { - *dst = Float4{Status: Null} - return nil - } - - switch src := src.(type) { - case float64: - *dst = Float4{Float: float32(src), Status: Present} - return nil - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Float4) Value() (driver.Value, error) { - switch src.Status { - case Present: - return float64(src.Float), nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} diff --git a/vendor/github.com/jackc/pgtype/float4_array.go b/vendor/github.com/jackc/pgtype/float4_array.go deleted file mode 100644 index 41f2ec8f..00000000 --- a/vendor/github.com/jackc/pgtype/float4_array.go +++ /dev/null @@ -1,517 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type Float4Array struct { - Elements []Float4 - Dimensions []ArrayDimension - Status Status -} - -func (dst *Float4Array) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Float4Array{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []float32: - if value == nil { - *dst = Float4Array{Status: Null} - } else if len(value) == 0 { - *dst = Float4Array{Status: Present} - } else { - elements := make([]Float4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Float4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*float32: - if value == nil { - *dst = Float4Array{Status: Null} - } else if len(value) == 0 { - *dst = Float4Array{Status: Present} - } else { - elements := make([]Float4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Float4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Float4: - if value == nil { - *dst = Float4Array{Status: Null} - } else if len(value) == 0 { - *dst = Float4Array{Status: Present} - } else { - *dst = Float4Array{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = Float4Array{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for Float4Array", src) - } - if elementsLength == 0 { - *dst = Float4Array{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Float4Array", src) - } - - *dst = Float4Array{ - Elements: make([]Float4, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Float4, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to Float4Array, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *Float4Array) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to Float4Array") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in Float4Array", err) - } - index++ - - return index, nil -} - -func (dst Float4Array) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Float4Array) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]float32: - *v = make([]float32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*float32: - *v = make([]*float32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *Float4Array) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from Float4Array") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from Float4Array") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *Float4Array) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Float4Array{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Float4 - - if len(uta.Elements) > 0 { - elements = make([]Float4, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Float4 - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = Float4Array{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *Float4Array) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Float4Array{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = Float4Array{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Float4, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = Float4Array{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src Float4Array) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src Float4Array) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("float4"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "float4") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Float4Array) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Float4Array) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/float8.go b/vendor/github.com/jackc/pgtype/float8.go deleted file mode 100644 index 6297ab5e..00000000 --- a/vendor/github.com/jackc/pgtype/float8.go +++ /dev/null @@ -1,272 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - - "github.com/jackc/pgio" -) - -type Float8 struct { - Float float64 - Status Status -} - -func (dst *Float8) Set(src interface{}) error { - if src == nil { - *dst = Float8{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case float32: - *dst = Float8{Float: float64(value), Status: Present} - case float64: - *dst = Float8{Float: value, Status: Present} - case int8: - *dst = Float8{Float: float64(value), Status: Present} - case uint8: - *dst = Float8{Float: float64(value), Status: Present} - case int16: - *dst = Float8{Float: float64(value), Status: Present} - case uint16: - *dst = Float8{Float: float64(value), Status: Present} - case int32: - *dst = Float8{Float: float64(value), Status: Present} - case uint32: - *dst = Float8{Float: float64(value), Status: Present} - case int64: - f64 := float64(value) - if int64(f64) == value { - *dst = Float8{Float: f64, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float64", value) - } - case uint64: - f64 := float64(value) - if uint64(f64) == value { - *dst = Float8{Float: f64, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float64", value) - } - case int: - f64 := float64(value) - if int(f64) == value { - *dst = Float8{Float: f64, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float64", value) - } - case uint: - f64 := float64(value) - if uint(f64) == value { - *dst = Float8{Float: f64, Status: Present} - } else { - return fmt.Errorf("%v cannot be exactly represented as float64", value) - } - case string: - num, err := strconv.ParseFloat(value, 64) - if err != nil { - return err - } - *dst = Float8{Float: float64(num), Status: Present} - case *float64: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *float32: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *int8: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint8: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *int16: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint16: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *int32: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint32: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *int64: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint64: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *int: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Float8{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingNumberType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Float8", value) - } - - return nil -} - -func (dst Float8) Get() interface{} { - switch dst.Status { - case Present: - return dst.Float - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Float8) AssignTo(dst interface{}) error { - return float64AssignTo(src.Float, src.Status, dst) -} - -func (dst *Float8) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Float8{Status: Null} - return nil - } - - n, err := strconv.ParseFloat(string(src), 64) - if err != nil { - return err - } - - *dst = Float8{Float: n, Status: Present} - return nil -} - -func (dst *Float8) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Float8{Status: Null} - return nil - } - - if len(src) != 8 { - return fmt.Errorf("invalid length for float8: %v", len(src)) - } - - n := int64(binary.BigEndian.Uint64(src)) - - *dst = Float8{Float: math.Float64frombits(uint64(n)), Status: Present} - return nil -} - -func (src Float8) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, strconv.FormatFloat(float64(src.Float), 'f', -1, 64)...) - return buf, nil -} - -func (src Float8) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendUint64(buf, math.Float64bits(src.Float)) - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Float8) Scan(src interface{}) error { - if src == nil { - *dst = Float8{Status: Null} - return nil - } - - switch src := src.(type) { - case float64: - *dst = Float8{Float: src, Status: Present} - return nil - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Float8) Value() (driver.Value, error) { - switch src.Status { - case Present: - return src.Float, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} diff --git a/vendor/github.com/jackc/pgtype/float8_array.go b/vendor/github.com/jackc/pgtype/float8_array.go deleted file mode 100644 index 836ee19d..00000000 --- a/vendor/github.com/jackc/pgtype/float8_array.go +++ /dev/null @@ -1,517 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type Float8Array struct { - Elements []Float8 - Dimensions []ArrayDimension - Status Status -} - -func (dst *Float8Array) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Float8Array{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []float64: - if value == nil { - *dst = Float8Array{Status: Null} - } else if len(value) == 0 { - *dst = Float8Array{Status: Present} - } else { - elements := make([]Float8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Float8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*float64: - if value == nil { - *dst = Float8Array{Status: Null} - } else if len(value) == 0 { - *dst = Float8Array{Status: Present} - } else { - elements := make([]Float8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Float8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Float8: - if value == nil { - *dst = Float8Array{Status: Null} - } else if len(value) == 0 { - *dst = Float8Array{Status: Present} - } else { - *dst = Float8Array{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = Float8Array{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for Float8Array", src) - } - if elementsLength == 0 { - *dst = Float8Array{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Float8Array", src) - } - - *dst = Float8Array{ - Elements: make([]Float8, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Float8, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to Float8Array, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *Float8Array) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to Float8Array") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in Float8Array", err) - } - index++ - - return index, nil -} - -func (dst Float8Array) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Float8Array) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]float64: - *v = make([]float64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*float64: - *v = make([]*float64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *Float8Array) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from Float8Array") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from Float8Array") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *Float8Array) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Float8Array{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Float8 - - if len(uta.Elements) > 0 { - elements = make([]Float8, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Float8 - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = Float8Array{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *Float8Array) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Float8Array{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = Float8Array{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Float8, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = Float8Array{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src Float8Array) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src Float8Array) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("float8"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "float8") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Float8Array) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Float8Array) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/generic_binary.go b/vendor/github.com/jackc/pgtype/generic_binary.go deleted file mode 100644 index 76a1d351..00000000 --- a/vendor/github.com/jackc/pgtype/generic_binary.go +++ /dev/null @@ -1,39 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" -) - -// GenericBinary is a placeholder for binary format values that no other type exists -// to handle. -type GenericBinary Bytea - -func (dst *GenericBinary) Set(src interface{}) error { - return (*Bytea)(dst).Set(src) -} - -func (dst GenericBinary) Get() interface{} { - return (Bytea)(dst).Get() -} - -func (src *GenericBinary) AssignTo(dst interface{}) error { - return (*Bytea)(src).AssignTo(dst) -} - -func (dst *GenericBinary) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*Bytea)(dst).DecodeBinary(ci, src) -} - -func (src GenericBinary) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Bytea)(src).EncodeBinary(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *GenericBinary) Scan(src interface{}) error { - return (*Bytea)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src GenericBinary) Value() (driver.Value, error) { - return (Bytea)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/generic_text.go b/vendor/github.com/jackc/pgtype/generic_text.go deleted file mode 100644 index dbf5b47e..00000000 --- a/vendor/github.com/jackc/pgtype/generic_text.go +++ /dev/null @@ -1,39 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" -) - -// GenericText is a placeholder for text format values that no other type exists -// to handle. -type GenericText Text - -func (dst *GenericText) Set(src interface{}) error { - return (*Text)(dst).Set(src) -} - -func (dst GenericText) Get() interface{} { - return (Text)(dst).Get() -} - -func (src *GenericText) AssignTo(dst interface{}) error { - return (*Text)(src).AssignTo(dst) -} - -func (dst *GenericText) DecodeText(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeText(ci, src) -} - -func (src GenericText) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Text)(src).EncodeText(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *GenericText) Scan(src interface{}) error { - return (*Text)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src GenericText) Value() (driver.Value, error) { - return (Text)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/hstore.go b/vendor/github.com/jackc/pgtype/hstore.go deleted file mode 100644 index e42b7551..00000000 --- a/vendor/github.com/jackc/pgtype/hstore.go +++ /dev/null @@ -1,465 +0,0 @@ -package pgtype - -import ( - "bytes" - "database/sql/driver" - "encoding/binary" - "errors" - "fmt" - "strings" - "unicode" - "unicode/utf8" - - "github.com/jackc/pgio" -) - -// Hstore represents an hstore column that can be null or have null values -// associated with its keys. -type Hstore struct { - Map map[string]Text - Status Status -} - -func (dst *Hstore) Set(src interface{}) error { - if src == nil { - *dst = Hstore{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case map[string]string: - m := make(map[string]Text, len(value)) - for k, v := range value { - m[k] = Text{String: v, Status: Present} - } - *dst = Hstore{Map: m, Status: Present} - case map[string]*string: - m := make(map[string]Text, len(value)) - for k, v := range value { - if v == nil { - m[k] = Text{Status: Null} - } else { - m[k] = Text{String: *v, Status: Present} - } - } - *dst = Hstore{Map: m, Status: Present} - case map[string]Text: - *dst = Hstore{Map: value, Status: Present} - default: - return fmt.Errorf("cannot convert %v to Hstore", src) - } - - return nil -} - -func (dst Hstore) Get() interface{} { - switch dst.Status { - case Present: - return dst.Map - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Hstore) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *map[string]string: - *v = make(map[string]string, len(src.Map)) - for k, val := range src.Map { - if val.Status != Present { - return fmt.Errorf("cannot decode %#v into %T", src, dst) - } - (*v)[k] = val.String - } - return nil - case *map[string]*string: - *v = make(map[string]*string, len(src.Map)) - for k, val := range src.Map { - switch val.Status { - case Null: - (*v)[k] = nil - case Present: - str := val.String - (*v)[k] = &str - default: - return fmt.Errorf("cannot decode %#v into %T", src, dst) - } - } - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *Hstore) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Hstore{Status: Null} - return nil - } - - keys, values, err := parseHstore(string(src)) - if err != nil { - return err - } - - m := make(map[string]Text, len(keys)) - for i := range keys { - m[keys[i]] = values[i] - } - - *dst = Hstore{Map: m, Status: Present} - return nil -} - -func (dst *Hstore) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Hstore{Status: Null} - return nil - } - - rp := 0 - - if len(src[rp:]) < 4 { - return fmt.Errorf("hstore incomplete %v", src) - } - pairCount := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - - m := make(map[string]Text, pairCount) - - for i := 0; i < pairCount; i++ { - if len(src[rp:]) < 4 { - return fmt.Errorf("hstore incomplete %v", src) - } - keyLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - - if len(src[rp:]) < keyLen { - return fmt.Errorf("hstore incomplete %v", src) - } - key := string(src[rp : rp+keyLen]) - rp += keyLen - - if len(src[rp:]) < 4 { - return fmt.Errorf("hstore incomplete %v", src) - } - valueLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - - var valueBuf []byte - if valueLen >= 0 { - valueBuf = src[rp : rp+valueLen] - rp += valueLen - } - - var value Text - err := value.DecodeBinary(ci, valueBuf) - if err != nil { - return err - } - m[key] = value - } - - *dst = Hstore{Map: m, Status: Present} - - return nil -} - -func (src Hstore) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - firstPair := true - - inElemBuf := make([]byte, 0, 32) - for k, v := range src.Map { - if firstPair { - firstPair = false - } else { - buf = append(buf, ',') - } - - buf = append(buf, quoteHstoreElementIfNeeded(k)...) - buf = append(buf, "=>"...) - - elemBuf, err := v.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - - if elemBuf == nil { - buf = append(buf, "NULL"...) - } else { - buf = append(buf, quoteHstoreElementIfNeeded(string(elemBuf))...) - } - } - - return buf, nil -} - -func (src Hstore) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendInt32(buf, int32(len(src.Map))) - - var err error - for k, v := range src.Map { - buf = pgio.AppendInt32(buf, int32(len(k))) - buf = append(buf, k...) - - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := v.EncodeText(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, err -} - -var quoteHstoreReplacer = strings.NewReplacer(`\`, `\\`, `"`, `\"`) - -func quoteHstoreElement(src string) string { - return `"` + quoteArrayReplacer.Replace(src) + `"` -} - -func quoteHstoreElementIfNeeded(src string) string { - if src == "" || (len(src) == 4 && strings.ToLower(src) == "null") || strings.ContainsAny(src, ` {},"\=>`) { - return quoteArrayElement(src) - } - return src -} - -const ( - hsPre = iota - hsKey - hsSep - hsVal - hsNul - hsNext -) - -type hstoreParser struct { - str string - pos int -} - -func newHSP(in string) *hstoreParser { - return &hstoreParser{ - pos: 0, - str: in, - } -} - -func (p *hstoreParser) Consume() (r rune, end bool) { - if p.pos >= len(p.str) { - end = true - return - } - r, w := utf8.DecodeRuneInString(p.str[p.pos:]) - p.pos += w - return -} - -func (p *hstoreParser) Peek() (r rune, end bool) { - if p.pos >= len(p.str) { - end = true - return - } - r, _ = utf8.DecodeRuneInString(p.str[p.pos:]) - return -} - -// parseHstore parses the string representation of an hstore column (the same -// you would get from an ordinary SELECT) into two slices of keys and values. it -// is used internally in the default parsing of hstores. -func parseHstore(s string) (k []string, v []Text, err error) { - if s == "" { - return - } - - buf := bytes.Buffer{} - keys := []string{} - values := []Text{} - p := newHSP(s) - - r, end := p.Consume() - state := hsPre - - for !end { - switch state { - case hsPre: - if r == '"' { - state = hsKey - } else { - err = errors.New("String does not begin with \"") - } - case hsKey: - switch r { - case '"': //End of the key - keys = append(keys, buf.String()) - buf = bytes.Buffer{} - state = hsSep - case '\\': //Potential escaped character - n, end := p.Consume() - switch { - case end: - err = errors.New("Found EOS in key, expecting character or \"") - case n == '"', n == '\\': - buf.WriteRune(n) - default: - buf.WriteRune(r) - buf.WriteRune(n) - } - default: //Any other character - buf.WriteRune(r) - } - case hsSep: - if r == '=' { - r, end = p.Consume() - switch { - case end: - err = errors.New("Found EOS after '=', expecting '>'") - case r == '>': - r, end = p.Consume() - switch { - case end: - err = errors.New("Found EOS after '=>', expecting '\"' or 'NULL'") - case r == '"': - state = hsVal - case r == 'N': - state = hsNul - default: - err = fmt.Errorf("Invalid character '%c' after '=>', expecting '\"' or 'NULL'", r) - } - default: - err = fmt.Errorf("Invalid character after '=', expecting '>'") - } - } else { - err = fmt.Errorf("Invalid character '%c' after value, expecting '='", r) - } - case hsVal: - switch r { - case '"': //End of the value - values = append(values, Text{String: buf.String(), Status: Present}) - buf = bytes.Buffer{} - state = hsNext - case '\\': //Potential escaped character - n, end := p.Consume() - switch { - case end: - err = errors.New("Found EOS in key, expecting character or \"") - case n == '"', n == '\\': - buf.WriteRune(n) - default: - buf.WriteRune(r) - buf.WriteRune(n) - } - default: //Any other character - buf.WriteRune(r) - } - case hsNul: - nulBuf := make([]rune, 3) - nulBuf[0] = r - for i := 1; i < 3; i++ { - r, end = p.Consume() - if end { - err = errors.New("Found EOS in NULL value") - return - } - nulBuf[i] = r - } - if nulBuf[0] == 'U' && nulBuf[1] == 'L' && nulBuf[2] == 'L' { - values = append(values, Text{Status: Null}) - state = hsNext - } else { - err = fmt.Errorf("Invalid NULL value: 'N%s'", string(nulBuf)) - } - case hsNext: - if r == ',' { - r, end = p.Consume() - switch { - case end: - err = errors.New("Found EOS after ',', expecting space") - case (unicode.IsSpace(r)): - r, end = p.Consume() - state = hsKey - default: - err = fmt.Errorf("Invalid character '%c' after ', ', expecting \"", r) - } - } else { - err = fmt.Errorf("Invalid character '%c' after value, expecting ','", r) - } - } - - if err != nil { - return - } - r, end = p.Consume() - } - if state != hsNext { - err = errors.New("Improperly formatted hstore") - return - } - k = keys - v = values - return -} - -// Scan implements the database/sql Scanner interface. -func (dst *Hstore) Scan(src interface{}) error { - if src == nil { - *dst = Hstore{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Hstore) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/hstore_array.go b/vendor/github.com/jackc/pgtype/hstore_array.go deleted file mode 100644 index 47b4b3ff..00000000 --- a/vendor/github.com/jackc/pgtype/hstore_array.go +++ /dev/null @@ -1,489 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type HstoreArray struct { - Elements []Hstore - Dimensions []ArrayDimension - Status Status -} - -func (dst *HstoreArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = HstoreArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []map[string]string: - if value == nil { - *dst = HstoreArray{Status: Null} - } else if len(value) == 0 { - *dst = HstoreArray{Status: Present} - } else { - elements := make([]Hstore, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = HstoreArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Hstore: - if value == nil { - *dst = HstoreArray{Status: Null} - } else if len(value) == 0 { - *dst = HstoreArray{Status: Present} - } else { - *dst = HstoreArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = HstoreArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for HstoreArray", src) - } - if elementsLength == 0 { - *dst = HstoreArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to HstoreArray", src) - } - - *dst = HstoreArray{ - Elements: make([]Hstore, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Hstore, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to HstoreArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *HstoreArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to HstoreArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in HstoreArray", err) - } - index++ - - return index, nil -} - -func (dst HstoreArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *HstoreArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]map[string]string: - *v = make([]map[string]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *HstoreArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from HstoreArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from HstoreArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *HstoreArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = HstoreArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Hstore - - if len(uta.Elements) > 0 { - elements = make([]Hstore, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Hstore - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = HstoreArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *HstoreArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = HstoreArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = HstoreArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Hstore, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = HstoreArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src HstoreArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src HstoreArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("hstore"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "hstore") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *HstoreArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src HstoreArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/inet.go b/vendor/github.com/jackc/pgtype/inet.go deleted file mode 100644 index 976f0d7b..00000000 --- a/vendor/github.com/jackc/pgtype/inet.go +++ /dev/null @@ -1,304 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding" - "fmt" - "net" - "strings" -) - -// Network address family is dependent on server socket.h value for AF_INET. -// In practice, all platforms appear to have the same value. See -// src/include/utils/inet.h for more information. -const ( - defaultAFInet = 2 - defaultAFInet6 = 3 -) - -// Inet represents both inet and cidr PostgreSQL types. -type Inet struct { - IPNet *net.IPNet - Status Status -} - -func (dst *Inet) Set(src interface{}) error { - if src == nil { - *dst = Inet{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case net.IPNet: - *dst = Inet{IPNet: &value, Status: Present} - case net.IP: - if len(value) == 0 { - *dst = Inet{Status: Null} - } else { - bitCount := len(value) * 8 - mask := net.CIDRMask(bitCount, bitCount) - *dst = Inet{IPNet: &net.IPNet{Mask: mask, IP: value}, Status: Present} - } - case string: - ip, ipnet, err := net.ParseCIDR(value) - if err != nil { - ip := net.ParseIP(value) - if ip == nil { - return fmt.Errorf("unable to parse inet address: %s", value) - } - - if ipv4 := maybeGetIPv4(value, ip); ipv4 != nil { - ipnet = &net.IPNet{IP: ipv4, Mask: net.CIDRMask(32, 32)} - } else { - ipnet = &net.IPNet{IP: ip, Mask: net.CIDRMask(128, 128)} - } - } else { - ipnet.IP = ip - if ipv4 := maybeGetIPv4(value, ipnet.IP); ipv4 != nil { - ipnet.IP = ipv4 - if len(ipnet.Mask) == 16 { - ipnet.Mask = ipnet.Mask[12:] // Not sure this is ever needed. - } - } - } - - *dst = Inet{IPNet: ipnet, Status: Present} - case *net.IPNet: - if value == nil { - *dst = Inet{Status: Null} - } else { - return dst.Set(*value) - } - case *net.IP: - if value == nil { - *dst = Inet{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Inet{Status: Null} - } else { - return dst.Set(*value) - } - default: - if tv, ok := src.(encoding.TextMarshaler); ok { - text, err := tv.MarshalText() - if err != nil { - return fmt.Errorf("cannot marshal %v: %w", value, err) - } - return dst.Set(string(text)) - } - if sv, ok := src.(fmt.Stringer); ok { - return dst.Set(sv.String()) - } - if originalSrc, ok := underlyingPtrType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Inet", value) - } - - return nil -} - -// Convert the net.IP to IPv4, if appropriate. -// -// When parsing a string to a net.IP using net.ParseIP() and the like, we get a -// 16 byte slice for IPv4 addresses as well as IPv6 addresses. This function -// calls To4() to convert them to a 4 byte slice. This is useful as it allows -// users of the net.IP check for IPv4 addresses based on the length and makes -// it clear we are handling IPv4 as opposed to IPv6 or IPv4-mapped IPv6 -// addresses. -func maybeGetIPv4(input string, ip net.IP) net.IP { - // Do not do this if the provided input looks like IPv6. This is because - // To4() on IPv4-mapped IPv6 addresses converts them to IPv4, which behave - // different in some cases. - if strings.Contains(input, ":") { - return nil - } - - return ip.To4() -} - -func (dst Inet) Get() interface{} { - switch dst.Status { - case Present: - return dst.IPNet - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Inet) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *net.IPNet: - *v = net.IPNet{ - IP: make(net.IP, len(src.IPNet.IP)), - Mask: make(net.IPMask, len(src.IPNet.Mask)), - } - copy(v.IP, src.IPNet.IP) - copy(v.Mask, src.IPNet.Mask) - return nil - case *net.IP: - if oneCount, bitCount := src.IPNet.Mask.Size(); oneCount != bitCount { - return fmt.Errorf("cannot assign %v to %T", src, dst) - } - *v = make(net.IP, len(src.IPNet.IP)) - copy(*v, src.IPNet.IP) - return nil - default: - if tv, ok := dst.(encoding.TextUnmarshaler); ok { - if err := tv.UnmarshalText([]byte(src.IPNet.String())); err != nil { - return fmt.Errorf("cannot unmarshal %v to %T: %w", src, dst, err) - } - return nil - } - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *Inet) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Inet{Status: Null} - return nil - } - - var ipnet *net.IPNet - var err error - - if ip := net.ParseIP(string(src)); ip != nil { - if ipv4 := ip.To4(); ipv4 != nil { - ip = ipv4 - } - bitCount := len(ip) * 8 - mask := net.CIDRMask(bitCount, bitCount) - ipnet = &net.IPNet{Mask: mask, IP: ip} - } else { - ip, ipnet, err = net.ParseCIDR(string(src)) - if err != nil { - return err - } - if ipv4 := ip.To4(); ipv4 != nil { - ip = ipv4 - } - ones, _ := ipnet.Mask.Size() - *ipnet = net.IPNet{IP: ip, Mask: net.CIDRMask(ones, len(ip)*8)} - } - - *dst = Inet{IPNet: ipnet, Status: Present} - return nil -} - -func (dst *Inet) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Inet{Status: Null} - return nil - } - - if len(src) != 8 && len(src) != 20 { - return fmt.Errorf("Received an invalid size for an inet: %d", len(src)) - } - - // ignore family - bits := src[1] - // ignore is_cidr - addressLength := src[3] - - var ipnet net.IPNet - ipnet.IP = make(net.IP, int(addressLength)) - copy(ipnet.IP, src[4:]) - if ipv4 := ipnet.IP.To4(); ipv4 != nil { - ipnet.IP = ipv4 - } - ipnet.Mask = net.CIDRMask(int(bits), len(ipnet.IP)*8) - - *dst = Inet{IPNet: &ipnet, Status: Present} - - return nil -} - -func (src Inet) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.IPNet.String()...), nil -} - -// EncodeBinary encodes src into w. -func (src Inet) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var family byte - switch len(src.IPNet.IP) { - case net.IPv4len: - family = defaultAFInet - case net.IPv6len: - family = defaultAFInet6 - default: - return nil, fmt.Errorf("Unexpected IP length: %v", len(src.IPNet.IP)) - } - - buf = append(buf, family) - - ones, _ := src.IPNet.Mask.Size() - buf = append(buf, byte(ones)) - - // is_cidr is ignored on server - buf = append(buf, 0) - - buf = append(buf, byte(len(src.IPNet.IP))) - - return append(buf, src.IPNet.IP...), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Inet) Scan(src interface{}) error { - if src == nil { - *dst = Inet{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Inet) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/inet_array.go b/vendor/github.com/jackc/pgtype/inet_array.go deleted file mode 100644 index 2460a1c4..00000000 --- a/vendor/github.com/jackc/pgtype/inet_array.go +++ /dev/null @@ -1,546 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "net" - "reflect" - - "github.com/jackc/pgio" -) - -type InetArray struct { - Elements []Inet - Dimensions []ArrayDimension - Status Status -} - -func (dst *InetArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = InetArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []*net.IPNet: - if value == nil { - *dst = InetArray{Status: Null} - } else if len(value) == 0 { - *dst = InetArray{Status: Present} - } else { - elements := make([]Inet, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = InetArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []net.IP: - if value == nil { - *dst = InetArray{Status: Null} - } else if len(value) == 0 { - *dst = InetArray{Status: Present} - } else { - elements := make([]Inet, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = InetArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*net.IP: - if value == nil { - *dst = InetArray{Status: Null} - } else if len(value) == 0 { - *dst = InetArray{Status: Present} - } else { - elements := make([]Inet, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = InetArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Inet: - if value == nil { - *dst = InetArray{Status: Null} - } else if len(value) == 0 { - *dst = InetArray{Status: Present} - } else { - *dst = InetArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = InetArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for InetArray", src) - } - if elementsLength == 0 { - *dst = InetArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to InetArray", src) - } - - *dst = InetArray{ - Elements: make([]Inet, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Inet, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to InetArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *InetArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to InetArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in InetArray", err) - } - index++ - - return index, nil -} - -func (dst InetArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *InetArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]*net.IPNet: - *v = make([]*net.IPNet, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]net.IP: - *v = make([]net.IP, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*net.IP: - *v = make([]*net.IP, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *InetArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from InetArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from InetArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *InetArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = InetArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Inet - - if len(uta.Elements) > 0 { - elements = make([]Inet, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Inet - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = InetArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *InetArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = InetArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = InetArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Inet, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = InetArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src InetArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src InetArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("inet"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "inet") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *InetArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src InetArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/int2.go b/vendor/github.com/jackc/pgtype/int2.go deleted file mode 100644 index 0775882a..00000000 --- a/vendor/github.com/jackc/pgtype/int2.go +++ /dev/null @@ -1,321 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "encoding/json" - "fmt" - "math" - "strconv" - - "github.com/jackc/pgio" -) - -type Int2 struct { - Int int16 - Status Status -} - -func (dst *Int2) Set(src interface{}) error { - if src == nil { - *dst = Int2{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case int8: - *dst = Int2{Int: int16(value), Status: Present} - case uint8: - *dst = Int2{Int: int16(value), Status: Present} - case int16: - *dst = Int2{Int: int16(value), Status: Present} - case uint16: - if value > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case int32: - if value < math.MinInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - if value > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case uint32: - if value > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case int64: - if value < math.MinInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - if value > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case uint64: - if value > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case int: - if value < math.MinInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - if value > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case uint: - if value > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case string: - num, err := strconv.ParseInt(value, 10, 16) - if err != nil { - return err - } - *dst = Int2{Int: int16(num), Status: Present} - case float32: - if value > math.MaxInt16 { - return fmt.Errorf("%f is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case float64: - if value > math.MaxInt16 { - return fmt.Errorf("%f is greater than maximum value for Int2", value) - } - *dst = Int2{Int: int16(value), Status: Present} - case *int8: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *uint8: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *int16: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *uint16: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *int32: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *uint32: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *int64: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *uint64: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *int: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *uint: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *float32: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - case *float64: - if value == nil { - *dst = Int2{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingNumberType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Int2", value) - } - - return nil -} - -func (dst Int2) Get() interface{} { - switch dst.Status { - case Present: - return dst.Int - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int2) AssignTo(dst interface{}) error { - return int64AssignTo(int64(src.Int), src.Status, dst) -} - -func (dst *Int2) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int2{Status: Null} - return nil - } - - n, err := strconv.ParseInt(string(src), 10, 16) - if err != nil { - return err - } - - *dst = Int2{Int: int16(n), Status: Present} - return nil -} - -func (dst *Int2) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int2{Status: Null} - return nil - } - - if len(src) != 2 { - return fmt.Errorf("invalid length for int2: %v", len(src)) - } - - n := int16(binary.BigEndian.Uint16(src)) - *dst = Int2{Int: n, Status: Present} - return nil -} - -func (src Int2) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, strconv.FormatInt(int64(src.Int), 10)...), nil -} - -func (src Int2) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return pgio.AppendInt16(buf, src.Int), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int2) Scan(src interface{}) error { - if src == nil { - *dst = Int2{Status: Null} - return nil - } - - switch src := src.(type) { - case int64: - if src < math.MinInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", src) - } - if src > math.MaxInt16 { - return fmt.Errorf("%d is greater than maximum value for Int2", src) - } - *dst = Int2{Int: int16(src), Status: Present} - return nil - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int2) Value() (driver.Value, error) { - switch src.Status { - case Present: - return int64(src.Int), nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func (src Int2) MarshalJSON() ([]byte, error) { - switch src.Status { - case Present: - return []byte(strconv.FormatInt(int64(src.Int), 10)), nil - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - - return nil, errBadStatus -} - -func (dst *Int2) UnmarshalJSON(b []byte) error { - var n *int16 - err := json.Unmarshal(b, &n) - if err != nil { - return err - } - - if n == nil { - *dst = Int2{Status: Null} - } else { - *dst = Int2{Int: *n, Status: Present} - } - - return nil -} diff --git a/vendor/github.com/jackc/pgtype/int2_array.go b/vendor/github.com/jackc/pgtype/int2_array.go deleted file mode 100644 index a5133845..00000000 --- a/vendor/github.com/jackc/pgtype/int2_array.go +++ /dev/null @@ -1,909 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type Int2Array struct { - Elements []Int2 - Dimensions []ArrayDimension - Status Status -} - -func (dst *Int2Array) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Int2Array{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []int16: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int16: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint16: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint16: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int32: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int32: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint32: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint32: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int64: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int64: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint64: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint64: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - elements := make([]Int2, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int2Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Int2: - if value == nil { - *dst = Int2Array{Status: Null} - } else if len(value) == 0 { - *dst = Int2Array{Status: Present} - } else { - *dst = Int2Array{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = Int2Array{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for Int2Array", src) - } - if elementsLength == 0 { - *dst = Int2Array{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Int2Array", src) - } - - *dst = Int2Array{ - Elements: make([]Int2, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Int2, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to Int2Array, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *Int2Array) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to Int2Array") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in Int2Array", err) - } - index++ - - return index, nil -} - -func (dst Int2Array) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int2Array) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]int16: - *v = make([]int16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int16: - *v = make([]*int16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint16: - *v = make([]uint16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint16: - *v = make([]*uint16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int32: - *v = make([]int32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int32: - *v = make([]*int32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint32: - *v = make([]uint32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint32: - *v = make([]*uint32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int64: - *v = make([]int64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int64: - *v = make([]*int64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint64: - *v = make([]uint64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint64: - *v = make([]*uint64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int: - *v = make([]int, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int: - *v = make([]*int, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint: - *v = make([]uint, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint: - *v = make([]*uint, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *Int2Array) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from Int2Array") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from Int2Array") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *Int2Array) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int2Array{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Int2 - - if len(uta.Elements) > 0 { - elements = make([]Int2, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Int2 - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = Int2Array{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *Int2Array) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int2Array{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = Int2Array{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Int2, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = Int2Array{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src Int2Array) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src Int2Array) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("int2"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "int2") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int2Array) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int2Array) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/int4.go b/vendor/github.com/jackc/pgtype/int4.go deleted file mode 100644 index 22b48e5e..00000000 --- a/vendor/github.com/jackc/pgtype/int4.go +++ /dev/null @@ -1,312 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "encoding/json" - "fmt" - "math" - "strconv" - - "github.com/jackc/pgio" -) - -type Int4 struct { - Int int32 - Status Status -} - -func (dst *Int4) Set(src interface{}) error { - if src == nil { - *dst = Int4{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case int8: - *dst = Int4{Int: int32(value), Status: Present} - case uint8: - *dst = Int4{Int: int32(value), Status: Present} - case int16: - *dst = Int4{Int: int32(value), Status: Present} - case uint16: - *dst = Int4{Int: int32(value), Status: Present} - case int32: - *dst = Int4{Int: int32(value), Status: Present} - case uint32: - if value > math.MaxInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", value) - } - *dst = Int4{Int: int32(value), Status: Present} - case int64: - if value < math.MinInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", value) - } - if value > math.MaxInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", value) - } - *dst = Int4{Int: int32(value), Status: Present} - case uint64: - if value > math.MaxInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", value) - } - *dst = Int4{Int: int32(value), Status: Present} - case int: - if value < math.MinInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", value) - } - if value > math.MaxInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", value) - } - *dst = Int4{Int: int32(value), Status: Present} - case uint: - if value > math.MaxInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", value) - } - *dst = Int4{Int: int32(value), Status: Present} - case string: - num, err := strconv.ParseInt(value, 10, 32) - if err != nil { - return err - } - *dst = Int4{Int: int32(num), Status: Present} - case float32: - if value > math.MaxInt32 { - return fmt.Errorf("%f is greater than maximum value for Int4", value) - } - *dst = Int4{Int: int32(value), Status: Present} - case float64: - if value > math.MaxInt32 { - return fmt.Errorf("%f is greater than maximum value for Int4", value) - } - *dst = Int4{Int: int32(value), Status: Present} - case *int8: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint8: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *int16: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint16: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *int32: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint32: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *int64: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint64: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *int: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *uint: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *float32: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - case *float64: - if value == nil { - *dst = Int4{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingNumberType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Int4", value) - } - - return nil -} - -func (dst Int4) Get() interface{} { - switch dst.Status { - case Present: - return dst.Int - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int4) AssignTo(dst interface{}) error { - return int64AssignTo(int64(src.Int), src.Status, dst) -} - -func (dst *Int4) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int4{Status: Null} - return nil - } - - n, err := strconv.ParseInt(string(src), 10, 32) - if err != nil { - return err - } - - *dst = Int4{Int: int32(n), Status: Present} - return nil -} - -func (dst *Int4) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int4{Status: Null} - return nil - } - - if len(src) != 4 { - return fmt.Errorf("invalid length for int4: %v", len(src)) - } - - n := int32(binary.BigEndian.Uint32(src)) - *dst = Int4{Int: n, Status: Present} - return nil -} - -func (src Int4) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, strconv.FormatInt(int64(src.Int), 10)...), nil -} - -func (src Int4) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return pgio.AppendInt32(buf, src.Int), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int4) Scan(src interface{}) error { - if src == nil { - *dst = Int4{Status: Null} - return nil - } - - switch src := src.(type) { - case int64: - if src < math.MinInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", src) - } - if src > math.MaxInt32 { - return fmt.Errorf("%d is greater than maximum value for Int4", src) - } - *dst = Int4{Int: int32(src), Status: Present} - return nil - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int4) Value() (driver.Value, error) { - switch src.Status { - case Present: - return int64(src.Int), nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func (src Int4) MarshalJSON() ([]byte, error) { - switch src.Status { - case Present: - return []byte(strconv.FormatInt(int64(src.Int), 10)), nil - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - - return nil, errBadStatus -} - -func (dst *Int4) UnmarshalJSON(b []byte) error { - var n *int32 - err := json.Unmarshal(b, &n) - if err != nil { - return err - } - - if n == nil { - *dst = Int4{Status: Null} - } else { - *dst = Int4{Int: *n, Status: Present} - } - - return nil -} diff --git a/vendor/github.com/jackc/pgtype/int4_array.go b/vendor/github.com/jackc/pgtype/int4_array.go deleted file mode 100644 index de26236f..00000000 --- a/vendor/github.com/jackc/pgtype/int4_array.go +++ /dev/null @@ -1,909 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type Int4Array struct { - Elements []Int4 - Dimensions []ArrayDimension - Status Status -} - -func (dst *Int4Array) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Int4Array{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []int16: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int16: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint16: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint16: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int32: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int32: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint32: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint32: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int64: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int64: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint64: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint64: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - elements := make([]Int4, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Int4: - if value == nil { - *dst = Int4Array{Status: Null} - } else if len(value) == 0 { - *dst = Int4Array{Status: Present} - } else { - *dst = Int4Array{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = Int4Array{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for Int4Array", src) - } - if elementsLength == 0 { - *dst = Int4Array{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Int4Array", src) - } - - *dst = Int4Array{ - Elements: make([]Int4, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Int4, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to Int4Array, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *Int4Array) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to Int4Array") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in Int4Array", err) - } - index++ - - return index, nil -} - -func (dst Int4Array) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int4Array) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]int16: - *v = make([]int16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int16: - *v = make([]*int16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint16: - *v = make([]uint16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint16: - *v = make([]*uint16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int32: - *v = make([]int32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int32: - *v = make([]*int32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint32: - *v = make([]uint32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint32: - *v = make([]*uint32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int64: - *v = make([]int64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int64: - *v = make([]*int64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint64: - *v = make([]uint64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint64: - *v = make([]*uint64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int: - *v = make([]int, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int: - *v = make([]*int, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint: - *v = make([]uint, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint: - *v = make([]*uint, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *Int4Array) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from Int4Array") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from Int4Array") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *Int4Array) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int4Array{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Int4 - - if len(uta.Elements) > 0 { - elements = make([]Int4, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Int4 - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = Int4Array{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *Int4Array) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int4Array{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = Int4Array{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Int4, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = Int4Array{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src Int4Array) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src Int4Array) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("int4"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "int4") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int4Array) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int4Array) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/int4_multirange.go b/vendor/github.com/jackc/pgtype/int4_multirange.go deleted file mode 100644 index c3432ce6..00000000 --- a/vendor/github.com/jackc/pgtype/int4_multirange.go +++ /dev/null @@ -1,239 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - - "github.com/jackc/pgio" -) - -type Int4multirange struct { - Ranges []Int4range - Status Status -} - -func (dst *Int4multirange) Set(src interface{}) error { - //untyped nil and typed nil interfaces are different - if src == nil { - *dst = Int4multirange{Status: Null} - return nil - } - - switch value := src.(type) { - case Int4multirange: - *dst = value - case *Int4multirange: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - case []Int4range: - if value == nil { - *dst = Int4multirange{Status: Null} - } else if len(value) == 0 { - *dst = Int4multirange{Status: Present} - } else { - elements := make([]Int4range, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4multirange{ - Ranges: elements, - Status: Present, - } - } - case []*Int4range: - if value == nil { - *dst = Int4multirange{Status: Null} - } else if len(value) == 0 { - *dst = Int4multirange{Status: Present} - } else { - elements := make([]Int4range, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int4multirange{ - Ranges: elements, - Status: Present, - } - } - default: - return fmt.Errorf("cannot convert %v to Int4multirange", src) - } - - return nil - -} - -func (dst Int4multirange) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int4multirange) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Int4multirange) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int4multirange{Status: Null} - return nil - } - - utmr, err := ParseUntypedTextMultirange(string(src)) - if err != nil { - return err - } - - var elements []Int4range - - if len(utmr.Elements) > 0 { - elements = make([]Int4range, len(utmr.Elements)) - - for i, s := range utmr.Elements { - var elem Int4range - - elemSrc := []byte(s) - - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = Int4multirange{Ranges: elements, Status: Present} - - return nil -} - -func (dst *Int4multirange) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int4multirange{Status: Null} - return nil - } - - rp := 0 - - numElems := int(binary.BigEndian.Uint32(src[rp:])) - rp += 4 - - if numElems == 0 { - *dst = Int4multirange{Status: Present} - return nil - } - - elements := make([]Int4range, numElems) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err := elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = Int4multirange{Ranges: elements, Status: Present} - return nil -} - -func (src Int4multirange) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, '{') - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Ranges { - if i > 0 { - buf = append(buf, ',') - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - return nil, fmt.Errorf("multi-range does not allow null range") - } else { - buf = append(buf, string(elemBuf)...) - } - - } - - buf = append(buf, '}') - - return buf, nil -} - -func (src Int4multirange) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendInt32(buf, int32(len(src.Ranges))) - - for i := range src.Ranges { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Ranges[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int4multirange) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int4multirange) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/int4range.go b/vendor/github.com/jackc/pgtype/int4range.go deleted file mode 100644 index c7f51fa6..00000000 --- a/vendor/github.com/jackc/pgtype/int4range.go +++ /dev/null @@ -1,267 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" - - "github.com/jackc/pgio" -) - -type Int4range struct { - Lower Int4 - Upper Int4 - LowerType BoundType - UpperType BoundType - Status Status -} - -func (dst *Int4range) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Int4range{Status: Null} - return nil - } - - switch value := src.(type) { - case Int4range: - *dst = value - case *Int4range: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - default: - return fmt.Errorf("cannot convert %v to Int4range", src) - } - - return nil -} - -func (dst Int4range) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int4range) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Int4range) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int4range{Status: Null} - return nil - } - - utr, err := ParseUntypedTextRange(string(src)) - if err != nil { - return err - } - - *dst = Int4range{Status: Present} - - dst.LowerType = utr.LowerType - dst.UpperType = utr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeText(ci, []byte(utr.Lower)); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeText(ci, []byte(utr.Upper)); err != nil { - return err - } - } - - return nil -} - -func (dst *Int4range) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int4range{Status: Null} - return nil - } - - ubr, err := ParseUntypedBinaryRange(src) - if err != nil { - return err - } - - *dst = Int4range{Status: Present} - - dst.LowerType = ubr.LowerType - dst.UpperType = ubr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeBinary(ci, ubr.Lower); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeBinary(ci, ubr.Upper); err != nil { - return err - } - } - - return nil -} - -func (src Int4range) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - switch src.LowerType { - case Exclusive, Unbounded: - buf = append(buf, '(') - case Inclusive: - buf = append(buf, '[') - case Empty: - return append(buf, "empty"...), nil - default: - return nil, fmt.Errorf("unknown lower bound type %v", src.LowerType) - } - - var err error - - if src.LowerType != Unbounded { - buf, err = src.Lower.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - } - - buf = append(buf, ',') - - if src.UpperType != Unbounded { - buf, err = src.Upper.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - } - - switch src.UpperType { - case Exclusive, Unbounded: - buf = append(buf, ')') - case Inclusive: - buf = append(buf, ']') - default: - return nil, fmt.Errorf("unknown upper bound type %v", src.UpperType) - } - - return buf, nil -} - -func (src Int4range) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var rangeType byte - switch src.LowerType { - case Inclusive: - rangeType |= lowerInclusiveMask - case Unbounded: - rangeType |= lowerUnboundedMask - case Exclusive: - case Empty: - return append(buf, emptyMask), nil - default: - return nil, fmt.Errorf("unknown LowerType: %v", src.LowerType) - } - - switch src.UpperType { - case Inclusive: - rangeType |= upperInclusiveMask - case Unbounded: - rangeType |= upperUnboundedMask - case Exclusive: - default: - return nil, fmt.Errorf("unknown UpperType: %v", src.UpperType) - } - - buf = append(buf, rangeType) - - var err error - - if src.LowerType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Lower.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - if src.UpperType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Upper.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int4range) Scan(src interface{}) error { - if src == nil { - *dst = Int4range{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int4range) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/int8.go b/vendor/github.com/jackc/pgtype/int8.go deleted file mode 100644 index 0e089979..00000000 --- a/vendor/github.com/jackc/pgtype/int8.go +++ /dev/null @@ -1,298 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "encoding/json" - "fmt" - "math" - "strconv" - - "github.com/jackc/pgio" -) - -type Int8 struct { - Int int64 - Status Status -} - -func (dst *Int8) Set(src interface{}) error { - if src == nil { - *dst = Int8{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case int8: - *dst = Int8{Int: int64(value), Status: Present} - case uint8: - *dst = Int8{Int: int64(value), Status: Present} - case int16: - *dst = Int8{Int: int64(value), Status: Present} - case uint16: - *dst = Int8{Int: int64(value), Status: Present} - case int32: - *dst = Int8{Int: int64(value), Status: Present} - case uint32: - *dst = Int8{Int: int64(value), Status: Present} - case int64: - *dst = Int8{Int: int64(value), Status: Present} - case uint64: - if value > math.MaxInt64 { - return fmt.Errorf("%d is greater than maximum value for Int8", value) - } - *dst = Int8{Int: int64(value), Status: Present} - case int: - if int64(value) < math.MinInt64 { - return fmt.Errorf("%d is greater than maximum value for Int8", value) - } - if int64(value) > math.MaxInt64 { - return fmt.Errorf("%d is greater than maximum value for Int8", value) - } - *dst = Int8{Int: int64(value), Status: Present} - case uint: - if uint64(value) > math.MaxInt64 { - return fmt.Errorf("%d is greater than maximum value for Int8", value) - } - *dst = Int8{Int: int64(value), Status: Present} - case string: - num, err := strconv.ParseInt(value, 10, 64) - if err != nil { - return err - } - *dst = Int8{Int: num, Status: Present} - case float32: - if value > math.MaxInt64 { - return fmt.Errorf("%f is greater than maximum value for Int8", value) - } - *dst = Int8{Int: int64(value), Status: Present} - case float64: - if value > math.MaxInt64 { - return fmt.Errorf("%f is greater than maximum value for Int8", value) - } - *dst = Int8{Int: int64(value), Status: Present} - case *int8: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint8: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *int16: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint16: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *int32: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint32: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *int64: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint64: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *int: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *uint: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *float32: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - case *float64: - if value == nil { - *dst = Int8{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingNumberType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Int8", value) - } - - return nil -} - -func (dst Int8) Get() interface{} { - switch dst.Status { - case Present: - return dst.Int - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int8) AssignTo(dst interface{}) error { - return int64AssignTo(int64(src.Int), src.Status, dst) -} - -func (dst *Int8) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int8{Status: Null} - return nil - } - - n, err := strconv.ParseInt(string(src), 10, 64) - if err != nil { - return err - } - - *dst = Int8{Int: n, Status: Present} - return nil -} - -func (dst *Int8) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int8{Status: Null} - return nil - } - - if len(src) != 8 { - return fmt.Errorf("invalid length for int8: %v", len(src)) - } - - n := int64(binary.BigEndian.Uint64(src)) - - *dst = Int8{Int: n, Status: Present} - return nil -} - -func (src Int8) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, strconv.FormatInt(src.Int, 10)...), nil -} - -func (src Int8) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return pgio.AppendInt64(buf, src.Int), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int8) Scan(src interface{}) error { - if src == nil { - *dst = Int8{Status: Null} - return nil - } - - switch src := src.(type) { - case int64: - *dst = Int8{Int: src, Status: Present} - return nil - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int8) Value() (driver.Value, error) { - switch src.Status { - case Present: - return int64(src.Int), nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func (src Int8) MarshalJSON() ([]byte, error) { - switch src.Status { - case Present: - return []byte(strconv.FormatInt(src.Int, 10)), nil - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - - return nil, errBadStatus -} - -func (dst *Int8) UnmarshalJSON(b []byte) error { - var n *int64 - err := json.Unmarshal(b, &n) - if err != nil { - return err - } - - if n == nil { - *dst = Int8{Status: Null} - } else { - *dst = Int8{Int: *n, Status: Present} - } - - return nil -} diff --git a/vendor/github.com/jackc/pgtype/int8_array.go b/vendor/github.com/jackc/pgtype/int8_array.go deleted file mode 100644 index e405b326..00000000 --- a/vendor/github.com/jackc/pgtype/int8_array.go +++ /dev/null @@ -1,909 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type Int8Array struct { - Elements []Int8 - Dimensions []ArrayDimension - Status Status -} - -func (dst *Int8Array) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Int8Array{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []int16: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int16: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint16: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint16: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int32: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int32: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint32: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint32: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int64: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int64: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint64: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint64: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - elements := make([]Int8, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8Array{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Int8: - if value == nil { - *dst = Int8Array{Status: Null} - } else if len(value) == 0 { - *dst = Int8Array{Status: Present} - } else { - *dst = Int8Array{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = Int8Array{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for Int8Array", src) - } - if elementsLength == 0 { - *dst = Int8Array{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Int8Array", src) - } - - *dst = Int8Array{ - Elements: make([]Int8, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Int8, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to Int8Array, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *Int8Array) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to Int8Array") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in Int8Array", err) - } - index++ - - return index, nil -} - -func (dst Int8Array) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int8Array) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]int16: - *v = make([]int16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int16: - *v = make([]*int16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint16: - *v = make([]uint16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint16: - *v = make([]*uint16, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int32: - *v = make([]int32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int32: - *v = make([]*int32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint32: - *v = make([]uint32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint32: - *v = make([]*uint32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int64: - *v = make([]int64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int64: - *v = make([]*int64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint64: - *v = make([]uint64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint64: - *v = make([]*uint64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int: - *v = make([]int, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int: - *v = make([]*int, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint: - *v = make([]uint, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint: - *v = make([]*uint, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *Int8Array) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from Int8Array") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from Int8Array") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *Int8Array) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int8Array{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Int8 - - if len(uta.Elements) > 0 { - elements = make([]Int8, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Int8 - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = Int8Array{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *Int8Array) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int8Array{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = Int8Array{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Int8, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = Int8Array{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src Int8Array) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src Int8Array) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("int8"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "int8") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int8Array) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int8Array) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/int8_multirange.go b/vendor/github.com/jackc/pgtype/int8_multirange.go deleted file mode 100644 index e0976427..00000000 --- a/vendor/github.com/jackc/pgtype/int8_multirange.go +++ /dev/null @@ -1,239 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - - "github.com/jackc/pgio" -) - -type Int8multirange struct { - Ranges []Int8range - Status Status -} - -func (dst *Int8multirange) Set(src interface{}) error { - //untyped nil and typed nil interfaces are different - if src == nil { - *dst = Int8multirange{Status: Null} - return nil - } - - switch value := src.(type) { - case Int8multirange: - *dst = value - case *Int8multirange: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - case []Int8range: - if value == nil { - *dst = Int8multirange{Status: Null} - } else if len(value) == 0 { - *dst = Int8multirange{Status: Present} - } else { - elements := make([]Int8range, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8multirange{ - Ranges: elements, - Status: Present, - } - } - case []*Int8range: - if value == nil { - *dst = Int8multirange{Status: Null} - } else if len(value) == 0 { - *dst = Int8multirange{Status: Present} - } else { - elements := make([]Int8range, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Int8multirange{ - Ranges: elements, - Status: Present, - } - } - default: - return fmt.Errorf("cannot convert %v to Int8multirange", src) - } - - return nil - -} - -func (dst Int8multirange) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int8multirange) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Int8multirange) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int8multirange{Status: Null} - return nil - } - - utmr, err := ParseUntypedTextMultirange(string(src)) - if err != nil { - return err - } - - var elements []Int8range - - if len(utmr.Elements) > 0 { - elements = make([]Int8range, len(utmr.Elements)) - - for i, s := range utmr.Elements { - var elem Int8range - - elemSrc := []byte(s) - - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = Int8multirange{Ranges: elements, Status: Present} - - return nil -} - -func (dst *Int8multirange) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int8multirange{Status: Null} - return nil - } - - rp := 0 - - numElems := int(binary.BigEndian.Uint32(src[rp:])) - rp += 4 - - if numElems == 0 { - *dst = Int8multirange{Status: Present} - return nil - } - - elements := make([]Int8range, numElems) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err := elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = Int8multirange{Ranges: elements, Status: Present} - return nil -} - -func (src Int8multirange) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, '{') - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Ranges { - if i > 0 { - buf = append(buf, ',') - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - return nil, fmt.Errorf("multi-range does not allow null range") - } else { - buf = append(buf, string(elemBuf)...) - } - - } - - buf = append(buf, '}') - - return buf, nil -} - -func (src Int8multirange) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendInt32(buf, int32(len(src.Ranges))) - - for i := range src.Ranges { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Ranges[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int8multirange) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int8multirange) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/int8range.go b/vendor/github.com/jackc/pgtype/int8range.go deleted file mode 100644 index 71369373..00000000 --- a/vendor/github.com/jackc/pgtype/int8range.go +++ /dev/null @@ -1,267 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" - - "github.com/jackc/pgio" -) - -type Int8range struct { - Lower Int8 - Upper Int8 - LowerType BoundType - UpperType BoundType - Status Status -} - -func (dst *Int8range) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Int8range{Status: Null} - return nil - } - - switch value := src.(type) { - case Int8range: - *dst = value - case *Int8range: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - default: - return fmt.Errorf("cannot convert %v to Int8range", src) - } - - return nil -} - -func (dst Int8range) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Int8range) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Int8range) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int8range{Status: Null} - return nil - } - - utr, err := ParseUntypedTextRange(string(src)) - if err != nil { - return err - } - - *dst = Int8range{Status: Present} - - dst.LowerType = utr.LowerType - dst.UpperType = utr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeText(ci, []byte(utr.Lower)); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeText(ci, []byte(utr.Upper)); err != nil { - return err - } - } - - return nil -} - -func (dst *Int8range) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Int8range{Status: Null} - return nil - } - - ubr, err := ParseUntypedBinaryRange(src) - if err != nil { - return err - } - - *dst = Int8range{Status: Present} - - dst.LowerType = ubr.LowerType - dst.UpperType = ubr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeBinary(ci, ubr.Lower); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeBinary(ci, ubr.Upper); err != nil { - return err - } - } - - return nil -} - -func (src Int8range) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - switch src.LowerType { - case Exclusive, Unbounded: - buf = append(buf, '(') - case Inclusive: - buf = append(buf, '[') - case Empty: - return append(buf, "empty"...), nil - default: - return nil, fmt.Errorf("unknown lower bound type %v", src.LowerType) - } - - var err error - - if src.LowerType != Unbounded { - buf, err = src.Lower.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - } - - buf = append(buf, ',') - - if src.UpperType != Unbounded { - buf, err = src.Upper.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - } - - switch src.UpperType { - case Exclusive, Unbounded: - buf = append(buf, ')') - case Inclusive: - buf = append(buf, ']') - default: - return nil, fmt.Errorf("unknown upper bound type %v", src.UpperType) - } - - return buf, nil -} - -func (src Int8range) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var rangeType byte - switch src.LowerType { - case Inclusive: - rangeType |= lowerInclusiveMask - case Unbounded: - rangeType |= lowerUnboundedMask - case Exclusive: - case Empty: - return append(buf, emptyMask), nil - default: - return nil, fmt.Errorf("unknown LowerType: %v", src.LowerType) - } - - switch src.UpperType { - case Inclusive: - rangeType |= upperInclusiveMask - case Unbounded: - rangeType |= upperUnboundedMask - case Exclusive: - default: - return nil, fmt.Errorf("unknown UpperType: %v", src.UpperType) - } - - buf = append(buf, rangeType) - - var err error - - if src.LowerType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Lower.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - if src.UpperType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Upper.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Int8range) Scan(src interface{}) error { - if src == nil { - *dst = Int8range{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Int8range) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/interval.go b/vendor/github.com/jackc/pgtype/interval.go deleted file mode 100644 index 00ec47c5..00000000 --- a/vendor/github.com/jackc/pgtype/interval.go +++ /dev/null @@ -1,257 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "strconv" - "strings" - "time" - - "github.com/jackc/pgio" -) - -const ( - microsecondsPerSecond = 1000000 - microsecondsPerMinute = 60 * microsecondsPerSecond - microsecondsPerHour = 60 * microsecondsPerMinute - microsecondsPerDay = 24 * microsecondsPerHour - microsecondsPerMonth = 30 * microsecondsPerDay -) - -type Interval struct { - Microseconds int64 - Days int32 - Months int32 - Status Status -} - -func (dst *Interval) Set(src interface{}) error { - if src == nil { - *dst = Interval{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case time.Duration: - *dst = Interval{Microseconds: int64(value) / 1000, Status: Present} - default: - if originalSrc, ok := underlyingPtrType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Interval", value) - } - - return nil -} - -func (dst Interval) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Interval) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *time.Duration: - us := int64(src.Months)*microsecondsPerMonth + int64(src.Days)*microsecondsPerDay + src.Microseconds - *v = time.Duration(us) * time.Microsecond - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *Interval) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Interval{Status: Null} - return nil - } - - var microseconds int64 - var days int32 - var months int32 - - parts := strings.Split(string(src), " ") - - for i := 0; i < len(parts)-1; i += 2 { - scalar, err := strconv.ParseInt(parts[i], 10, 64) - if err != nil { - return fmt.Errorf("bad interval format") - } - - switch parts[i+1] { - case "year", "years": - months += int32(scalar * 12) - case "mon", "mons": - months += int32(scalar) - case "day", "days": - days = int32(scalar) - } - } - - if len(parts)%2 == 1 { - timeParts := strings.SplitN(parts[len(parts)-1], ":", 3) - if len(timeParts) != 3 { - return fmt.Errorf("bad interval format") - } - - var negative bool - if timeParts[0][0] == '-' { - negative = true - timeParts[0] = timeParts[0][1:] - } - - hours, err := strconv.ParseInt(timeParts[0], 10, 64) - if err != nil { - return fmt.Errorf("bad interval hour format: %s", timeParts[0]) - } - - minutes, err := strconv.ParseInt(timeParts[1], 10, 64) - if err != nil { - return fmt.Errorf("bad interval minute format: %s", timeParts[1]) - } - - secondParts := strings.SplitN(timeParts[2], ".", 2) - - seconds, err := strconv.ParseInt(secondParts[0], 10, 64) - if err != nil { - return fmt.Errorf("bad interval second format: %s", secondParts[0]) - } - - var uSeconds int64 - if len(secondParts) == 2 { - uSeconds, err = strconv.ParseInt(secondParts[1], 10, 64) - if err != nil { - return fmt.Errorf("bad interval decimal format: %s", secondParts[1]) - } - - for i := 0; i < 6-len(secondParts[1]); i++ { - uSeconds *= 10 - } - } - - microseconds = hours * microsecondsPerHour - microseconds += minutes * microsecondsPerMinute - microseconds += seconds * microsecondsPerSecond - microseconds += uSeconds - - if negative { - microseconds = -microseconds - } - } - - *dst = Interval{Months: months, Days: days, Microseconds: microseconds, Status: Present} - return nil -} - -func (dst *Interval) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Interval{Status: Null} - return nil - } - - if len(src) != 16 { - return fmt.Errorf("Received an invalid size for an interval: %d", len(src)) - } - - microseconds := int64(binary.BigEndian.Uint64(src)) - days := int32(binary.BigEndian.Uint32(src[8:])) - months := int32(binary.BigEndian.Uint32(src[12:])) - - *dst = Interval{Microseconds: microseconds, Days: days, Months: months, Status: Present} - return nil -} - -func (src Interval) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if src.Months != 0 { - buf = append(buf, strconv.FormatInt(int64(src.Months), 10)...) - buf = append(buf, " mon "...) - } - - if src.Days != 0 { - buf = append(buf, strconv.FormatInt(int64(src.Days), 10)...) - buf = append(buf, " day "...) - } - - absMicroseconds := src.Microseconds - if absMicroseconds < 0 { - absMicroseconds = -absMicroseconds - buf = append(buf, '-') - } - - hours := absMicroseconds / microsecondsPerHour - minutes := (absMicroseconds % microsecondsPerHour) / microsecondsPerMinute - seconds := (absMicroseconds % microsecondsPerMinute) / microsecondsPerSecond - microseconds := absMicroseconds % microsecondsPerSecond - - timeStr := fmt.Sprintf("%02d:%02d:%02d.%06d", hours, minutes, seconds, microseconds) - return append(buf, timeStr...), nil -} - -// EncodeBinary encodes src into w. -func (src Interval) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendInt64(buf, src.Microseconds) - buf = pgio.AppendInt32(buf, src.Days) - return pgio.AppendInt32(buf, src.Months), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Interval) Scan(src interface{}) error { - if src == nil { - *dst = Interval{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Interval) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/json.go b/vendor/github.com/jackc/pgtype/json.go deleted file mode 100644 index a9508bdd..00000000 --- a/vendor/github.com/jackc/pgtype/json.go +++ /dev/null @@ -1,209 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/json" - "errors" - "fmt" - "reflect" -) - -type JSON struct { - Bytes []byte - Status Status -} - -func (dst *JSON) Set(src interface{}) error { - if src == nil { - *dst = JSON{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case string: - *dst = JSON{Bytes: []byte(value), Status: Present} - case *string: - if value == nil { - *dst = JSON{Status: Null} - } else { - *dst = JSON{Bytes: []byte(*value), Status: Present} - } - case []byte: - if value == nil { - *dst = JSON{Status: Null} - } else { - *dst = JSON{Bytes: value, Status: Present} - } - // Encode* methods are defined on *JSON. If JSON is passed directly then the - // struct itself would be encoded instead of Bytes. This is clearly a footgun - // so detect and return an error. See https://github.com/jackc/pgx/issues/350. - case JSON: - return errors.New("use pointer to pgtype.JSON instead of value") - // Same as above but for JSONB (because they share implementation) - case JSONB: - return errors.New("use pointer to pgtype.JSONB instead of value") - - default: - buf, err := json.Marshal(value) - if err != nil { - return err - } - *dst = JSON{Bytes: buf, Status: Present} - } - - return nil -} - -func (dst JSON) Get() interface{} { - switch dst.Status { - case Present: - var i interface{} - err := json.Unmarshal(dst.Bytes, &i) - if err != nil { - return dst - } - return i - case Null: - return nil - default: - return dst.Status - } -} - -func (src *JSON) AssignTo(dst interface{}) error { - switch v := dst.(type) { - case *string: - if src.Status == Present { - *v = string(src.Bytes) - } else { - return fmt.Errorf("cannot assign non-present status to %T", dst) - } - case **string: - if src.Status == Present { - s := string(src.Bytes) - *v = &s - return nil - } else { - *v = nil - return nil - } - case *[]byte: - if src.Status != Present { - *v = nil - } else { - buf := make([]byte, len(src.Bytes)) - copy(buf, src.Bytes) - *v = buf - } - default: - data := src.Bytes - if data == nil || src.Status != Present { - data = []byte("null") - } - - p := reflect.ValueOf(dst).Elem() - p.Set(reflect.Zero(p.Type())) - - return json.Unmarshal(data, dst) - } - - return nil -} - -func (JSON) PreferredResultFormat() int16 { - return TextFormatCode -} - -func (dst *JSON) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = JSON{Status: Null} - return nil - } - - *dst = JSON{Bytes: src, Status: Present} - return nil -} - -func (dst *JSON) DecodeBinary(ci *ConnInfo, src []byte) error { - return dst.DecodeText(ci, src) -} - -func (JSON) PreferredParamFormat() int16 { - return TextFormatCode -} - -func (src JSON) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.Bytes...), nil -} - -func (src JSON) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return src.EncodeText(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *JSON) Scan(src interface{}) error { - if src == nil { - *dst = JSON{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src JSON) Value() (driver.Value, error) { - switch src.Status { - case Present: - return src.Bytes, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func (src JSON) MarshalJSON() ([]byte, error) { - switch src.Status { - case Present: - return src.Bytes, nil - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - - return nil, errBadStatus -} - -func (dst *JSON) UnmarshalJSON(b []byte) error { - if b == nil || string(b) == "null" { - *dst = JSON{Status: Null} - } else { - *dst = JSON{Bytes: b, Status: Present} - } - return nil - -} diff --git a/vendor/github.com/jackc/pgtype/json_array.go b/vendor/github.com/jackc/pgtype/json_array.go deleted file mode 100644 index 8d68882f..00000000 --- a/vendor/github.com/jackc/pgtype/json_array.go +++ /dev/null @@ -1,546 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "encoding/json" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type JSONArray struct { - Elements []JSON - Dimensions []ArrayDimension - Status Status -} - -func (dst *JSONArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = JSONArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []string: - if value == nil { - *dst = JSONArray{Status: Null} - } else if len(value) == 0 { - *dst = JSONArray{Status: Present} - } else { - elements := make([]JSON, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = JSONArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case [][]byte: - if value == nil { - *dst = JSONArray{Status: Null} - } else if len(value) == 0 { - *dst = JSONArray{Status: Present} - } else { - elements := make([]JSON, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = JSONArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []json.RawMessage: - if value == nil { - *dst = JSONArray{Status: Null} - } else if len(value) == 0 { - *dst = JSONArray{Status: Present} - } else { - elements := make([]JSON, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = JSONArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []JSON: - if value == nil { - *dst = JSONArray{Status: Null} - } else if len(value) == 0 { - *dst = JSONArray{Status: Present} - } else { - *dst = JSONArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = JSONArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for JSONArray", src) - } - if elementsLength == 0 { - *dst = JSONArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to JSONArray", src) - } - - *dst = JSONArray{ - Elements: make([]JSON, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]JSON, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to JSONArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *JSONArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to JSONArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in JSONArray", err) - } - index++ - - return index, nil -} - -func (dst JSONArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *JSONArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]string: - *v = make([]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[][]byte: - *v = make([][]byte, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]json.RawMessage: - *v = make([]json.RawMessage, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *JSONArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from JSONArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from JSONArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *JSONArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = JSONArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []JSON - - if len(uta.Elements) > 0 { - elements = make([]JSON, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem JSON - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = JSONArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *JSONArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = JSONArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = JSONArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]JSON, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = JSONArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src JSONArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src JSONArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("json"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "json") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *JSONArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src JSONArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/jsonb.go b/vendor/github.com/jackc/pgtype/jsonb.go deleted file mode 100644 index c9dafc93..00000000 --- a/vendor/github.com/jackc/pgtype/jsonb.go +++ /dev/null @@ -1,85 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" -) - -type JSONB JSON - -func (dst *JSONB) Set(src interface{}) error { - return (*JSON)(dst).Set(src) -} - -func (dst JSONB) Get() interface{} { - return (JSON)(dst).Get() -} - -func (src *JSONB) AssignTo(dst interface{}) error { - return (*JSON)(src).AssignTo(dst) -} - -func (JSONB) PreferredResultFormat() int16 { - return TextFormatCode -} - -func (dst *JSONB) DecodeText(ci *ConnInfo, src []byte) error { - return (*JSON)(dst).DecodeText(ci, src) -} - -func (dst *JSONB) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = JSONB{Status: Null} - return nil - } - - if len(src) == 0 { - return fmt.Errorf("jsonb too short") - } - - if src[0] != 1 { - return fmt.Errorf("unknown jsonb version number %d", src[0]) - } - - *dst = JSONB{Bytes: src[1:], Status: Present} - return nil - -} - -func (JSONB) PreferredParamFormat() int16 { - return TextFormatCode -} - -func (src JSONB) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (JSON)(src).EncodeText(ci, buf) -} - -func (src JSONB) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, 1) - return append(buf, src.Bytes...), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *JSONB) Scan(src interface{}) error { - return (*JSON)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src JSONB) Value() (driver.Value, error) { - return (JSON)(src).Value() -} - -func (src JSONB) MarshalJSON() ([]byte, error) { - return (JSON)(src).MarshalJSON() -} - -func (dst *JSONB) UnmarshalJSON(b []byte) error { - return (*JSON)(dst).UnmarshalJSON(b) -} diff --git a/vendor/github.com/jackc/pgtype/jsonb_array.go b/vendor/github.com/jackc/pgtype/jsonb_array.go deleted file mode 100644 index e78ad377..00000000 --- a/vendor/github.com/jackc/pgtype/jsonb_array.go +++ /dev/null @@ -1,546 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "encoding/json" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type JSONBArray struct { - Elements []JSONB - Dimensions []ArrayDimension - Status Status -} - -func (dst *JSONBArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = JSONBArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []string: - if value == nil { - *dst = JSONBArray{Status: Null} - } else if len(value) == 0 { - *dst = JSONBArray{Status: Present} - } else { - elements := make([]JSONB, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = JSONBArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case [][]byte: - if value == nil { - *dst = JSONBArray{Status: Null} - } else if len(value) == 0 { - *dst = JSONBArray{Status: Present} - } else { - elements := make([]JSONB, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = JSONBArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []json.RawMessage: - if value == nil { - *dst = JSONBArray{Status: Null} - } else if len(value) == 0 { - *dst = JSONBArray{Status: Present} - } else { - elements := make([]JSONB, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = JSONBArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []JSONB: - if value == nil { - *dst = JSONBArray{Status: Null} - } else if len(value) == 0 { - *dst = JSONBArray{Status: Present} - } else { - *dst = JSONBArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = JSONBArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for JSONBArray", src) - } - if elementsLength == 0 { - *dst = JSONBArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to JSONBArray", src) - } - - *dst = JSONBArray{ - Elements: make([]JSONB, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]JSONB, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to JSONBArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *JSONBArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to JSONBArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in JSONBArray", err) - } - index++ - - return index, nil -} - -func (dst JSONBArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *JSONBArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]string: - *v = make([]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[][]byte: - *v = make([][]byte, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]json.RawMessage: - *v = make([]json.RawMessage, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *JSONBArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from JSONBArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from JSONBArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *JSONBArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = JSONBArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []JSONB - - if len(uta.Elements) > 0 { - elements = make([]JSONB, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem JSONB - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = JSONBArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *JSONBArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = JSONBArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = JSONBArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]JSONB, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = JSONBArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src JSONBArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src JSONBArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("jsonb"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "jsonb") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *JSONBArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src JSONBArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/line.go b/vendor/github.com/jackc/pgtype/line.go deleted file mode 100644 index 3564b174..00000000 --- a/vendor/github.com/jackc/pgtype/line.go +++ /dev/null @@ -1,148 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -type Line struct { - A, B, C float64 - Status Status -} - -func (dst *Line) Set(src interface{}) error { - return fmt.Errorf("cannot convert %v to Line", src) -} - -func (dst Line) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Line) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Line) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Line{Status: Null} - return nil - } - - if len(src) < 7 { - return fmt.Errorf("invalid length for Line: %v", len(src)) - } - - parts := strings.SplitN(string(src[1:len(src)-1]), ",", 3) - if len(parts) < 3 { - return fmt.Errorf("invalid format for line") - } - - a, err := strconv.ParseFloat(parts[0], 64) - if err != nil { - return err - } - - b, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return err - } - - c, err := strconv.ParseFloat(parts[2], 64) - if err != nil { - return err - } - - *dst = Line{A: a, B: b, C: c, Status: Present} - return nil -} - -func (dst *Line) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Line{Status: Null} - return nil - } - - if len(src) != 24 { - return fmt.Errorf("invalid length for Line: %v", len(src)) - } - - a := binary.BigEndian.Uint64(src) - b := binary.BigEndian.Uint64(src[8:]) - c := binary.BigEndian.Uint64(src[16:]) - - *dst = Line{ - A: math.Float64frombits(a), - B: math.Float64frombits(b), - C: math.Float64frombits(c), - Status: Present, - } - return nil -} - -func (src Line) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, fmt.Sprintf(`{%s,%s,%s}`, - strconv.FormatFloat(src.A, 'f', -1, 64), - strconv.FormatFloat(src.B, 'f', -1, 64), - strconv.FormatFloat(src.C, 'f', -1, 64), - )...) - - return buf, nil -} - -func (src Line) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendUint64(buf, math.Float64bits(src.A)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.B)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.C)) - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Line) Scan(src interface{}) error { - if src == nil { - *dst = Line{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Line) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/lseg.go b/vendor/github.com/jackc/pgtype/lseg.go deleted file mode 100644 index 894dae86..00000000 --- a/vendor/github.com/jackc/pgtype/lseg.go +++ /dev/null @@ -1,165 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -type Lseg struct { - P [2]Vec2 - Status Status -} - -func (dst *Lseg) Set(src interface{}) error { - return fmt.Errorf("cannot convert %v to Lseg", src) -} - -func (dst Lseg) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Lseg) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Lseg) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Lseg{Status: Null} - return nil - } - - if len(src) < 11 { - return fmt.Errorf("invalid length for Lseg: %v", len(src)) - } - - str := string(src[2:]) - - var end int - end = strings.IndexByte(str, ',') - - x1, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+1:] - end = strings.IndexByte(str, ')') - - y1, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+3:] - end = strings.IndexByte(str, ',') - - x2, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+1 : len(str)-2] - - y2, err := strconv.ParseFloat(str, 64) - if err != nil { - return err - } - - *dst = Lseg{P: [2]Vec2{{x1, y1}, {x2, y2}}, Status: Present} - return nil -} - -func (dst *Lseg) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Lseg{Status: Null} - return nil - } - - if len(src) != 32 { - return fmt.Errorf("invalid length for Lseg: %v", len(src)) - } - - x1 := binary.BigEndian.Uint64(src) - y1 := binary.BigEndian.Uint64(src[8:]) - x2 := binary.BigEndian.Uint64(src[16:]) - y2 := binary.BigEndian.Uint64(src[24:]) - - *dst = Lseg{ - P: [2]Vec2{ - {math.Float64frombits(x1), math.Float64frombits(y1)}, - {math.Float64frombits(x2), math.Float64frombits(y2)}, - }, - Status: Present, - } - return nil -} - -func (src Lseg) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, fmt.Sprintf(`[(%s,%s),(%s,%s)]`, - strconv.FormatFloat(src.P[0].X, 'f', -1, 64), - strconv.FormatFloat(src.P[0].Y, 'f', -1, 64), - strconv.FormatFloat(src.P[1].X, 'f', -1, 64), - strconv.FormatFloat(src.P[1].Y, 'f', -1, 64), - )...) - - return buf, nil -} - -func (src Lseg) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendUint64(buf, math.Float64bits(src.P[0].X)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.P[0].Y)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.P[1].X)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.P[1].Y)) - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Lseg) Scan(src interface{}) error { - if src == nil { - *dst = Lseg{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Lseg) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/ltree.go b/vendor/github.com/jackc/pgtype/ltree.go deleted file mode 100644 index 8c8d4213..00000000 --- a/vendor/github.com/jackc/pgtype/ltree.go +++ /dev/null @@ -1,72 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" -) - -type Ltree Text - -func (dst *Ltree) Set(src interface{}) error { - return (*Text)(dst).Set(src) -} - -func (dst Ltree) Get() interface{} { - return (Text)(dst).Get() -} - -func (src *Ltree) AssignTo(dst interface{}) error { - return (*Text)(src).AssignTo(dst) -} - -func (src Ltree) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Text)(src).EncodeText(ci, buf) -} - -func (src Ltree) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - buf = append(buf, 1) - return append(buf, src.String...), nil -} - -func (Ltree) PreferredResultFormat() int16 { - return TextFormatCode -} - -func (dst *Ltree) DecodeText(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeText(ci, src) -} - -func (dst *Ltree) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Ltree{Status: Null} - return nil - } - - // Get Ltree version, only 1 is allowed - version := src[0] - if version != 1 { - return fmt.Errorf("unsupported ltree version %d", version) - } - - ltreeStr := string(src[1:]) - *dst = Ltree{String: ltreeStr, Status: Present} - return nil -} - -func (Ltree) PreferredParamFormat() int16 { - return TextFormatCode -} - -func (dst *Ltree) Scan(src interface{}) error { - return (*Text)(dst).Scan(src) -} - -func (src Ltree) Value() (driver.Value, error) { - return (Text)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/macaddr.go b/vendor/github.com/jackc/pgtype/macaddr.go deleted file mode 100644 index 1d3cfe7b..00000000 --- a/vendor/github.com/jackc/pgtype/macaddr.go +++ /dev/null @@ -1,173 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" - "net" -) - -type Macaddr struct { - Addr net.HardwareAddr - Status Status -} - -func (dst *Macaddr) Set(src interface{}) error { - if src == nil { - *dst = Macaddr{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case net.HardwareAddr: - addr := make(net.HardwareAddr, len(value)) - copy(addr, value) - *dst = Macaddr{Addr: addr, Status: Present} - case string: - addr, err := net.ParseMAC(value) - if err != nil { - return err - } - *dst = Macaddr{Addr: addr, Status: Present} - case *net.HardwareAddr: - if value == nil { - *dst = Macaddr{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Macaddr{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingPtrType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Macaddr", value) - } - - return nil -} - -func (dst Macaddr) Get() interface{} { - switch dst.Status { - case Present: - return dst.Addr - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Macaddr) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *net.HardwareAddr: - *v = make(net.HardwareAddr, len(src.Addr)) - copy(*v, src.Addr) - return nil - case *string: - *v = src.Addr.String() - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *Macaddr) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Macaddr{Status: Null} - return nil - } - - addr, err := net.ParseMAC(string(src)) - if err != nil { - return err - } - - *dst = Macaddr{Addr: addr, Status: Present} - return nil -} - -func (dst *Macaddr) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Macaddr{Status: Null} - return nil - } - - if len(src) != 6 { - return fmt.Errorf("Received an invalid size for a macaddr: %d", len(src)) - } - - addr := make(net.HardwareAddr, 6) - copy(addr, src) - - *dst = Macaddr{Addr: addr, Status: Present} - - return nil -} - -func (src Macaddr) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.Addr.String()...), nil -} - -// EncodeBinary encodes src into w. -func (src Macaddr) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.Addr...), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Macaddr) Scan(src interface{}) error { - if src == nil { - *dst = Macaddr{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Macaddr) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/macaddr_array.go b/vendor/github.com/jackc/pgtype/macaddr_array.go deleted file mode 100644 index bdb1f203..00000000 --- a/vendor/github.com/jackc/pgtype/macaddr_array.go +++ /dev/null @@ -1,518 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "net" - "reflect" - - "github.com/jackc/pgio" -) - -type MacaddrArray struct { - Elements []Macaddr - Dimensions []ArrayDimension - Status Status -} - -func (dst *MacaddrArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = MacaddrArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []net.HardwareAddr: - if value == nil { - *dst = MacaddrArray{Status: Null} - } else if len(value) == 0 { - *dst = MacaddrArray{Status: Present} - } else { - elements := make([]Macaddr, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = MacaddrArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*net.HardwareAddr: - if value == nil { - *dst = MacaddrArray{Status: Null} - } else if len(value) == 0 { - *dst = MacaddrArray{Status: Present} - } else { - elements := make([]Macaddr, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = MacaddrArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Macaddr: - if value == nil { - *dst = MacaddrArray{Status: Null} - } else if len(value) == 0 { - *dst = MacaddrArray{Status: Present} - } else { - *dst = MacaddrArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = MacaddrArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for MacaddrArray", src) - } - if elementsLength == 0 { - *dst = MacaddrArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to MacaddrArray", src) - } - - *dst = MacaddrArray{ - Elements: make([]Macaddr, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Macaddr, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to MacaddrArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *MacaddrArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to MacaddrArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in MacaddrArray", err) - } - index++ - - return index, nil -} - -func (dst MacaddrArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *MacaddrArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]net.HardwareAddr: - *v = make([]net.HardwareAddr, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*net.HardwareAddr: - *v = make([]*net.HardwareAddr, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *MacaddrArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from MacaddrArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from MacaddrArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *MacaddrArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = MacaddrArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Macaddr - - if len(uta.Elements) > 0 { - elements = make([]Macaddr, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Macaddr - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = MacaddrArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *MacaddrArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = MacaddrArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = MacaddrArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Macaddr, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = MacaddrArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src MacaddrArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src MacaddrArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("macaddr"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "macaddr") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *MacaddrArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src MacaddrArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/multirange.go b/vendor/github.com/jackc/pgtype/multirange.go deleted file mode 100644 index beb11f70..00000000 --- a/vendor/github.com/jackc/pgtype/multirange.go +++ /dev/null @@ -1,83 +0,0 @@ -package pgtype - -import ( - "bytes" - "fmt" -) - -type UntypedTextMultirange struct { - Elements []string -} - -func ParseUntypedTextMultirange(src string) (*UntypedTextMultirange, error) { - utmr := &UntypedTextMultirange{} - utmr.Elements = make([]string, 0) - - buf := bytes.NewBufferString(src) - - skipWhitespace(buf) - - r, _, err := buf.ReadRune() - if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) - } - - if r != '{' { - return nil, fmt.Errorf("invalid multirange, expected '{': %v", err) - } - -parseValueLoop: - for { - r, _, err = buf.ReadRune() - if err != nil { - return nil, fmt.Errorf("invalid multirange: %v", err) - } - - switch r { - case ',': // skip range separator - case '}': - break parseValueLoop - default: - buf.UnreadRune() - value, err := parseRange(buf) - if err != nil { - return nil, fmt.Errorf("invalid multirange value: %v", err) - } - utmr.Elements = append(utmr.Elements, value) - } - } - - skipWhitespace(buf) - - if buf.Len() > 0 { - return nil, fmt.Errorf("unexpected trailing data: %v", buf.String()) - } - - return utmr, nil - -} - -func parseRange(buf *bytes.Buffer) (string, error) { - - s := &bytes.Buffer{} - - boundSepRead := false - for { - r, _, err := buf.ReadRune() - if err != nil { - return "", err - } - - switch r { - case ',', '}': - if r == ',' && !boundSepRead { - boundSepRead = true - break - } - buf.UnreadRune() - return s.String(), nil - } - - s.WriteRune(r) - } -} diff --git a/vendor/github.com/jackc/pgtype/name.go b/vendor/github.com/jackc/pgtype/name.go deleted file mode 100644 index 7ce8d25e..00000000 --- a/vendor/github.com/jackc/pgtype/name.go +++ /dev/null @@ -1,58 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" -) - -// Name is a type used for PostgreSQL's special 63-byte -// name data type, used for identifiers like table names. -// The pg_class.relname column is a good example of where the -// name data type is used. -// -// Note that the underlying Go data type of pgx.Name is string, -// so there is no way to enforce the 63-byte length. Inputting -// a longer name into PostgreSQL will result in silent truncation -// to 63 bytes. -// -// Also, if you have custom-compiled PostgreSQL and set -// NAMEDATALEN to a different value, obviously that number of -// bytes applies, rather than the default 63. -type Name Text - -func (dst *Name) Set(src interface{}) error { - return (*Text)(dst).Set(src) -} - -func (dst Name) Get() interface{} { - return (Text)(dst).Get() -} - -func (src *Name) AssignTo(dst interface{}) error { - return (*Text)(src).AssignTo(dst) -} - -func (dst *Name) DecodeText(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeText(ci, src) -} - -func (dst *Name) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeBinary(ci, src) -} - -func (src Name) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Text)(src).EncodeText(ci, buf) -} - -func (src Name) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Text)(src).EncodeBinary(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *Name) Scan(src interface{}) error { - return (*Text)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Name) Value() (driver.Value, error) { - return (Text)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/num_multirange.go b/vendor/github.com/jackc/pgtype/num_multirange.go deleted file mode 100644 index cbabc8ac..00000000 --- a/vendor/github.com/jackc/pgtype/num_multirange.go +++ /dev/null @@ -1,239 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - - "github.com/jackc/pgio" -) - -type Nummultirange struct { - Ranges []Numrange - Status Status -} - -func (dst *Nummultirange) Set(src interface{}) error { - //untyped nil and typed nil interfaces are different - if src == nil { - *dst = Nummultirange{Status: Null} - return nil - } - - switch value := src.(type) { - case Nummultirange: - *dst = value - case *Nummultirange: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - case []Numrange: - if value == nil { - *dst = Nummultirange{Status: Null} - } else if len(value) == 0 { - *dst = Nummultirange{Status: Present} - } else { - elements := make([]Numrange, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Nummultirange{ - Ranges: elements, - Status: Present, - } - } - case []*Numrange: - if value == nil { - *dst = Nummultirange{Status: Null} - } else if len(value) == 0 { - *dst = Nummultirange{Status: Present} - } else { - elements := make([]Numrange, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = Nummultirange{ - Ranges: elements, - Status: Present, - } - } - default: - return fmt.Errorf("cannot convert %v to Nummultirange", src) - } - - return nil - -} - -func (dst Nummultirange) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Nummultirange) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Nummultirange) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Nummultirange{Status: Null} - return nil - } - - utmr, err := ParseUntypedTextMultirange(string(src)) - if err != nil { - return err - } - - var elements []Numrange - - if len(utmr.Elements) > 0 { - elements = make([]Numrange, len(utmr.Elements)) - - for i, s := range utmr.Elements { - var elem Numrange - - elemSrc := []byte(s) - - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = Nummultirange{Ranges: elements, Status: Present} - - return nil -} - -func (dst *Nummultirange) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Nummultirange{Status: Null} - return nil - } - - rp := 0 - - numElems := int(binary.BigEndian.Uint32(src[rp:])) - rp += 4 - - if numElems == 0 { - *dst = Nummultirange{Status: Present} - return nil - } - - elements := make([]Numrange, numElems) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err := elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = Nummultirange{Ranges: elements, Status: Present} - return nil -} - -func (src Nummultirange) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, '{') - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Ranges { - if i > 0 { - buf = append(buf, ',') - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - return nil, fmt.Errorf("multi-range does not allow null range") - } else { - buf = append(buf, string(elemBuf)...) - } - - } - - buf = append(buf, '}') - - return buf, nil -} - -func (src Nummultirange) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendInt32(buf, int32(len(src.Ranges))) - - for i := range src.Ranges { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Ranges[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Nummultirange) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Nummultirange) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/numeric.go b/vendor/github.com/jackc/pgtype/numeric.go deleted file mode 100644 index 1f32b36b..00000000 --- a/vendor/github.com/jackc/pgtype/numeric.go +++ /dev/null @@ -1,853 +0,0 @@ -package pgtype - -import ( - "bytes" - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "math/big" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -// PostgreSQL internal numeric storage uses 16-bit "digits" with base of 10,000 -const nbase = 10000 - -const ( - pgNumericNaN = 0x00000000c0000000 - pgNumericNaNSign = 0xc000 - - pgNumericPosInf = 0x00000000d0000000 - pgNumericPosInfSign = 0xd000 - - pgNumericNegInf = 0x00000000f0000000 - pgNumericNegInfSign = 0xf000 -) - -var big0 *big.Int = big.NewInt(0) -var big1 *big.Int = big.NewInt(1) -var big10 *big.Int = big.NewInt(10) -var big100 *big.Int = big.NewInt(100) -var big1000 *big.Int = big.NewInt(1000) - -var bigMaxInt8 *big.Int = big.NewInt(math.MaxInt8) -var bigMinInt8 *big.Int = big.NewInt(math.MinInt8) -var bigMaxInt16 *big.Int = big.NewInt(math.MaxInt16) -var bigMinInt16 *big.Int = big.NewInt(math.MinInt16) -var bigMaxInt32 *big.Int = big.NewInt(math.MaxInt32) -var bigMinInt32 *big.Int = big.NewInt(math.MinInt32) -var bigMaxInt64 *big.Int = big.NewInt(math.MaxInt64) -var bigMinInt64 *big.Int = big.NewInt(math.MinInt64) -var bigMaxInt *big.Int = big.NewInt(int64(maxInt)) -var bigMinInt *big.Int = big.NewInt(int64(minInt)) - -var bigMaxUint8 *big.Int = big.NewInt(math.MaxUint8) -var bigMaxUint16 *big.Int = big.NewInt(math.MaxUint16) -var bigMaxUint32 *big.Int = big.NewInt(math.MaxUint32) -var bigMaxUint64 *big.Int = (&big.Int{}).SetUint64(uint64(math.MaxUint64)) -var bigMaxUint *big.Int = (&big.Int{}).SetUint64(uint64(maxUint)) - -var bigNBase *big.Int = big.NewInt(nbase) -var bigNBaseX2 *big.Int = big.NewInt(nbase * nbase) -var bigNBaseX3 *big.Int = big.NewInt(nbase * nbase * nbase) -var bigNBaseX4 *big.Int = big.NewInt(nbase * nbase * nbase * nbase) - -type Numeric struct { - Int *big.Int - Exp int32 - Status Status - NaN bool - InfinityModifier InfinityModifier -} - -func (dst *Numeric) Set(src interface{}) error { - if src == nil { - *dst = Numeric{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case float32: - if math.IsNaN(float64(value)) { - *dst = Numeric{Status: Present, NaN: true} - return nil - } else if math.IsInf(float64(value), 1) { - *dst = Numeric{Status: Present, InfinityModifier: Infinity} - return nil - } else if math.IsInf(float64(value), -1) { - *dst = Numeric{Status: Present, InfinityModifier: NegativeInfinity} - return nil - } - num, exp, err := parseNumericString(strconv.FormatFloat(float64(value), 'f', -1, 64)) - if err != nil { - return err - } - *dst = Numeric{Int: num, Exp: exp, Status: Present} - case float64: - if math.IsNaN(value) { - *dst = Numeric{Status: Present, NaN: true} - return nil - } else if math.IsInf(value, 1) { - *dst = Numeric{Status: Present, InfinityModifier: Infinity} - return nil - } else if math.IsInf(value, -1) { - *dst = Numeric{Status: Present, InfinityModifier: NegativeInfinity} - return nil - } - num, exp, err := parseNumericString(strconv.FormatFloat(value, 'f', -1, 64)) - if err != nil { - return err - } - *dst = Numeric{Int: num, Exp: exp, Status: Present} - case int8: - *dst = Numeric{Int: big.NewInt(int64(value)), Status: Present} - case uint8: - *dst = Numeric{Int: big.NewInt(int64(value)), Status: Present} - case int16: - *dst = Numeric{Int: big.NewInt(int64(value)), Status: Present} - case uint16: - *dst = Numeric{Int: big.NewInt(int64(value)), Status: Present} - case int32: - *dst = Numeric{Int: big.NewInt(int64(value)), Status: Present} - case uint32: - *dst = Numeric{Int: big.NewInt(int64(value)), Status: Present} - case int64: - *dst = Numeric{Int: big.NewInt(value), Status: Present} - case uint64: - *dst = Numeric{Int: (&big.Int{}).SetUint64(value), Status: Present} - case int: - *dst = Numeric{Int: big.NewInt(int64(value)), Status: Present} - case uint: - *dst = Numeric{Int: (&big.Int{}).SetUint64(uint64(value)), Status: Present} - case string: - num, exp, err := parseNumericString(value) - if err != nil { - return err - } - *dst = Numeric{Int: num, Exp: exp, Status: Present} - case *float64: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *float32: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *int8: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *uint8: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *int16: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *uint16: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *int32: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *uint32: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *int64: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *uint64: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *int: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *uint: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case *string: - if value == nil { - *dst = Numeric{Status: Null} - } else { - return dst.Set(*value) - } - case InfinityModifier: - *dst = Numeric{InfinityModifier: value, Status: Present} - default: - if originalSrc, ok := underlyingNumberType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Numeric", value) - } - - return nil -} - -func (dst Numeric) Get() interface{} { - switch dst.Status { - case Present: - if dst.InfinityModifier != None { - return dst.InfinityModifier - } - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Numeric) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *float32: - f, err := src.toFloat64() - if err != nil { - return err - } - return float64AssignTo(f, src.Status, dst) - case *float64: - f, err := src.toFloat64() - if err != nil { - return err - } - return float64AssignTo(f, src.Status, dst) - case *int: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(bigMaxInt) > 0 { - return fmt.Errorf("%v is greater than maximum value for %T", normalizedInt, *v) - } - if normalizedInt.Cmp(bigMinInt) < 0 { - return fmt.Errorf("%v is less than minimum value for %T", normalizedInt, *v) - } - *v = int(normalizedInt.Int64()) - case *int8: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(bigMaxInt8) > 0 { - return fmt.Errorf("%v is greater than maximum value for %T", normalizedInt, *v) - } - if normalizedInt.Cmp(bigMinInt8) < 0 { - return fmt.Errorf("%v is less than minimum value for %T", normalizedInt, *v) - } - *v = int8(normalizedInt.Int64()) - case *int16: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(bigMaxInt16) > 0 { - return fmt.Errorf("%v is greater than maximum value for %T", normalizedInt, *v) - } - if normalizedInt.Cmp(bigMinInt16) < 0 { - return fmt.Errorf("%v is less than minimum value for %T", normalizedInt, *v) - } - *v = int16(normalizedInt.Int64()) - case *int32: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(bigMaxInt32) > 0 { - return fmt.Errorf("%v is greater than maximum value for %T", normalizedInt, *v) - } - if normalizedInt.Cmp(bigMinInt32) < 0 { - return fmt.Errorf("%v is less than minimum value for %T", normalizedInt, *v) - } - *v = int32(normalizedInt.Int64()) - case *int64: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(bigMaxInt64) > 0 { - return fmt.Errorf("%v is greater than maximum value for %T", normalizedInt, *v) - } - if normalizedInt.Cmp(bigMinInt64) < 0 { - return fmt.Errorf("%v is less than minimum value for %T", normalizedInt, *v) - } - *v = normalizedInt.Int64() - case *uint: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(big0) < 0 { - return fmt.Errorf("%d is less than zero for %T", normalizedInt, *v) - } else if normalizedInt.Cmp(bigMaxUint) > 0 { - return fmt.Errorf("%d is greater than maximum value for %T", normalizedInt, *v) - } - *v = uint(normalizedInt.Uint64()) - case *uint8: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(big0) < 0 { - return fmt.Errorf("%d is less than zero for %T", normalizedInt, *v) - } else if normalizedInt.Cmp(bigMaxUint8) > 0 { - return fmt.Errorf("%d is greater than maximum value for %T", normalizedInt, *v) - } - *v = uint8(normalizedInt.Uint64()) - case *uint16: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(big0) < 0 { - return fmt.Errorf("%d is less than zero for %T", normalizedInt, *v) - } else if normalizedInt.Cmp(bigMaxUint16) > 0 { - return fmt.Errorf("%d is greater than maximum value for %T", normalizedInt, *v) - } - *v = uint16(normalizedInt.Uint64()) - case *uint32: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(big0) < 0 { - return fmt.Errorf("%d is less than zero for %T", normalizedInt, *v) - } else if normalizedInt.Cmp(bigMaxUint32) > 0 { - return fmt.Errorf("%d is greater than maximum value for %T", normalizedInt, *v) - } - *v = uint32(normalizedInt.Uint64()) - case *uint64: - normalizedInt, err := src.toBigInt() - if err != nil { - return err - } - if normalizedInt.Cmp(big0) < 0 { - return fmt.Errorf("%d is less than zero for %T", normalizedInt, *v) - } else if normalizedInt.Cmp(bigMaxUint64) > 0 { - return fmt.Errorf("%d is greater than maximum value for %T", normalizedInt, *v) - } - *v = normalizedInt.Uint64() - case *big.Rat: - rat, err := src.toBigRat() - if err != nil { - return err - } - v.Set(rat) - case *string: - buf, err := encodeNumericText(*src, nil) - if err != nil { - return err - } - *v = string(buf) - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return nil -} - -func (dst *Numeric) toBigInt() (*big.Int, error) { - if dst.Exp == 0 { - return dst.Int, nil - } - - num := &big.Int{} - num.Set(dst.Int) - if dst.Exp > 0 { - mul := &big.Int{} - mul.Exp(big10, big.NewInt(int64(dst.Exp)), nil) - num.Mul(num, mul) - return num, nil - } - - div := &big.Int{} - div.Exp(big10, big.NewInt(int64(-dst.Exp)), nil) - remainder := &big.Int{} - num.DivMod(num, div, remainder) - if remainder.Cmp(big0) != 0 { - return nil, fmt.Errorf("cannot convert %v to integer", dst) - } - return num, nil -} - -func (dst *Numeric) toBigRat() (*big.Rat, error) { - if dst.NaN { - return nil, fmt.Errorf("%v is not a number", dst) - } else if dst.InfinityModifier == Infinity { - return nil, fmt.Errorf("%v is infinity", dst) - } else if dst.InfinityModifier == NegativeInfinity { - return nil, fmt.Errorf("%v is -infinity", dst) - } - - num := new(big.Rat).SetInt(dst.Int) - if dst.Exp > 0 { - mul := new(big.Int).Exp(big10, big.NewInt(int64(dst.Exp)), nil) - num.Mul(num, new(big.Rat).SetInt(mul)) - } else if dst.Exp < 0 { - mul := new(big.Int).Exp(big10, big.NewInt(int64(-dst.Exp)), nil) - num.Quo(num, new(big.Rat).SetInt(mul)) - } - return num, nil -} - -func (src *Numeric) toFloat64() (float64, error) { - if src.NaN { - return math.NaN(), nil - } else if src.InfinityModifier == Infinity { - return math.Inf(1), nil - } else if src.InfinityModifier == NegativeInfinity { - return math.Inf(-1), nil - } - - buf := make([]byte, 0, 32) - - buf = append(buf, src.Int.String()...) - buf = append(buf, 'e') - buf = append(buf, strconv.FormatInt(int64(src.Exp), 10)...) - - f, err := strconv.ParseFloat(string(buf), 64) - if err != nil { - return 0, err - } - return f, nil -} - -func (dst *Numeric) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Numeric{Status: Null} - return nil - } - - if string(src) == "NaN" { - *dst = Numeric{Status: Present, NaN: true} - return nil - } else if string(src) == "Infinity" { - *dst = Numeric{Status: Present, InfinityModifier: Infinity} - return nil - } else if string(src) == "-Infinity" { - *dst = Numeric{Status: Present, InfinityModifier: NegativeInfinity} - return nil - } - - num, exp, err := parseNumericString(string(src)) - if err != nil { - return err - } - - *dst = Numeric{Int: num, Exp: exp, Status: Present} - return nil -} - -func parseNumericString(str string) (n *big.Int, exp int32, err error) { - parts := strings.SplitN(str, ".", 2) - digits := strings.Join(parts, "") - - if len(parts) > 1 { - exp = int32(-len(parts[1])) - } else { - for len(digits) > 1 && digits[len(digits)-1] == '0' && digits[len(digits)-2] != '-' { - digits = digits[:len(digits)-1] - exp++ - } - } - - accum := &big.Int{} - if _, ok := accum.SetString(digits, 10); !ok { - return nil, 0, fmt.Errorf("%s is not a number", str) - } - - return accum, exp, nil -} - -func (dst *Numeric) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Numeric{Status: Null} - return nil - } - - if len(src) < 8 { - return fmt.Errorf("numeric incomplete %v", src) - } - - rp := 0 - ndigits := binary.BigEndian.Uint16(src[rp:]) - rp += 2 - weight := int16(binary.BigEndian.Uint16(src[rp:])) - rp += 2 - sign := binary.BigEndian.Uint16(src[rp:]) - rp += 2 - dscale := int16(binary.BigEndian.Uint16(src[rp:])) - rp += 2 - - if sign == pgNumericNaNSign { - *dst = Numeric{Status: Present, NaN: true} - return nil - } else if sign == pgNumericPosInfSign { - *dst = Numeric{Status: Present, InfinityModifier: Infinity} - return nil - } else if sign == pgNumericNegInfSign { - *dst = Numeric{Status: Present, InfinityModifier: NegativeInfinity} - return nil - } - - if ndigits == 0 { - *dst = Numeric{Int: big.NewInt(0), Status: Present} - return nil - } - - if len(src[rp:]) < int(ndigits)*2 { - return fmt.Errorf("numeric incomplete %v", src) - } - - accum := &big.Int{} - - for i := 0; i < int(ndigits+3)/4; i++ { - int64accum, bytesRead, digitsRead := nbaseDigitsToInt64(src[rp:]) - rp += bytesRead - - if i > 0 { - var mul *big.Int - switch digitsRead { - case 1: - mul = bigNBase - case 2: - mul = bigNBaseX2 - case 3: - mul = bigNBaseX3 - case 4: - mul = bigNBaseX4 - default: - return fmt.Errorf("invalid digitsRead: %d (this can't happen)", digitsRead) - } - accum.Mul(accum, mul) - } - - accum.Add(accum, big.NewInt(int64accum)) - } - - exp := (int32(weight) - int32(ndigits) + 1) * 4 - - if dscale > 0 { - fracNBaseDigits := int16(int32(ndigits) - int32(weight) - 1) - fracDecimalDigits := fracNBaseDigits * 4 - - if dscale > fracDecimalDigits { - multCount := int(dscale - fracDecimalDigits) - for i := 0; i < multCount; i++ { - accum.Mul(accum, big10) - exp-- - } - } else if dscale < fracDecimalDigits { - divCount := int(fracDecimalDigits - dscale) - for i := 0; i < divCount; i++ { - accum.Div(accum, big10) - exp++ - } - } - } - - reduced := &big.Int{} - remainder := &big.Int{} - if exp >= 0 { - for { - reduced.DivMod(accum, big10, remainder) - if remainder.Cmp(big0) != 0 { - break - } - accum.Set(reduced) - exp++ - } - } - - if sign != 0 { - accum.Neg(accum) - } - - *dst = Numeric{Int: accum, Exp: exp, Status: Present} - - return nil - -} - -func nbaseDigitsToInt64(src []byte) (accum int64, bytesRead, digitsRead int) { - digits := len(src) / 2 - if digits > 4 { - digits = 4 - } - - rp := 0 - - for i := 0; i < digits; i++ { - if i > 0 { - accum *= nbase - } - accum += int64(binary.BigEndian.Uint16(src[rp:])) - rp += 2 - } - - return accum, rp, digits -} - -func (src Numeric) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if src.NaN { - buf = append(buf, "NaN"...) - return buf, nil - } else if src.InfinityModifier == Infinity { - buf = append(buf, "Infinity"...) - return buf, nil - } else if src.InfinityModifier == NegativeInfinity { - buf = append(buf, "-Infinity"...) - return buf, nil - } - - buf = append(buf, src.Int.String()...) - buf = append(buf, 'e') - buf = append(buf, strconv.FormatInt(int64(src.Exp), 10)...) - return buf, nil -} - -func (src Numeric) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if src.NaN { - buf = pgio.AppendUint64(buf, pgNumericNaN) - return buf, nil - } else if src.InfinityModifier == Infinity { - buf = pgio.AppendUint64(buf, pgNumericPosInf) - return buf, nil - } else if src.InfinityModifier == NegativeInfinity { - buf = pgio.AppendUint64(buf, pgNumericNegInf) - return buf, nil - } - - var sign int16 - if src.Int.Cmp(big0) < 0 { - sign = 16384 - } - - absInt := &big.Int{} - wholePart := &big.Int{} - fracPart := &big.Int{} - remainder := &big.Int{} - absInt.Abs(src.Int) - - // Normalize absInt and exp to where exp is always a multiple of 4. This makes - // converting to 16-bit base 10,000 digits easier. - var exp int32 - switch src.Exp % 4 { - case 1, -3: - exp = src.Exp - 1 - absInt.Mul(absInt, big10) - case 2, -2: - exp = src.Exp - 2 - absInt.Mul(absInt, big100) - case 3, -1: - exp = src.Exp - 3 - absInt.Mul(absInt, big1000) - default: - exp = src.Exp - } - - if exp < 0 { - divisor := &big.Int{} - divisor.Exp(big10, big.NewInt(int64(-exp)), nil) - wholePart.DivMod(absInt, divisor, fracPart) - fracPart.Add(fracPart, divisor) - } else { - wholePart = absInt - } - - var wholeDigits, fracDigits []int16 - - for wholePart.Cmp(big0) != 0 { - wholePart.DivMod(wholePart, bigNBase, remainder) - wholeDigits = append(wholeDigits, int16(remainder.Int64())) - } - - if fracPart.Cmp(big0) != 0 { - for fracPart.Cmp(big1) != 0 { - fracPart.DivMod(fracPart, bigNBase, remainder) - fracDigits = append(fracDigits, int16(remainder.Int64())) - } - } - - buf = pgio.AppendInt16(buf, int16(len(wholeDigits)+len(fracDigits))) - - var weight int16 - if len(wholeDigits) > 0 { - weight = int16(len(wholeDigits) - 1) - if exp > 0 { - weight += int16(exp / 4) - } - } else { - weight = int16(exp/4) - 1 + int16(len(fracDigits)) - } - buf = pgio.AppendInt16(buf, weight) - - buf = pgio.AppendInt16(buf, sign) - - var dscale int16 - if src.Exp < 0 { - dscale = int16(-src.Exp) - } - buf = pgio.AppendInt16(buf, dscale) - - for i := len(wholeDigits) - 1; i >= 0; i-- { - buf = pgio.AppendInt16(buf, wholeDigits[i]) - } - - for i := len(fracDigits) - 1; i >= 0; i-- { - buf = pgio.AppendInt16(buf, fracDigits[i]) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Numeric) Scan(src interface{}) error { - if src == nil { - *dst = Numeric{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Numeric) Value() (driver.Value, error) { - switch src.Status { - case Present: - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - - return string(buf), nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func encodeNumericText(n Numeric, buf []byte) (newBuf []byte, err error) { - // if !n.Valid { - // return nil, nil - // } - - if n.NaN { - buf = append(buf, "NaN"...) - return buf, nil - } else if n.InfinityModifier == Infinity { - buf = append(buf, "Infinity"...) - return buf, nil - } else if n.InfinityModifier == NegativeInfinity { - buf = append(buf, "-Infinity"...) - return buf, nil - } - - buf = append(buf, n.numberTextBytes()...) - - return buf, nil -} - -// numberString returns a string of the number. undefined if NaN, infinite, or NULL -func (n Numeric) numberTextBytes() []byte { - intStr := n.Int.String() - buf := &bytes.Buffer{} - exp := int(n.Exp) - if exp > 0 { - buf.WriteString(intStr) - for i := 0; i < exp; i++ { - buf.WriteByte('0') - } - } else if exp < 0 { - if len(intStr) <= -exp { - buf.WriteString("0.") - leadingZeros := -exp - len(intStr) - for i := 0; i < leadingZeros; i++ { - buf.WriteByte('0') - } - buf.WriteString(intStr) - } else if len(intStr) > -exp { - dpPos := len(intStr) + exp - buf.WriteString(intStr[:dpPos]) - buf.WriteByte('.') - buf.WriteString(intStr[dpPos:]) - } - } else { - buf.WriteString(intStr) - } - - return buf.Bytes() -} diff --git a/vendor/github.com/jackc/pgtype/numeric_array.go b/vendor/github.com/jackc/pgtype/numeric_array.go deleted file mode 100644 index 31899dec..00000000 --- a/vendor/github.com/jackc/pgtype/numeric_array.go +++ /dev/null @@ -1,685 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type NumericArray struct { - Elements []Numeric - Dimensions []ArrayDimension - Status Status -} - -func (dst *NumericArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = NumericArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []float32: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - elements := make([]Numeric, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = NumericArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*float32: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - elements := make([]Numeric, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = NumericArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []float64: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - elements := make([]Numeric, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = NumericArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*float64: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - elements := make([]Numeric, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = NumericArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []int64: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - elements := make([]Numeric, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = NumericArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*int64: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - elements := make([]Numeric, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = NumericArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []uint64: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - elements := make([]Numeric, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = NumericArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*uint64: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - elements := make([]Numeric, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = NumericArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Numeric: - if value == nil { - *dst = NumericArray{Status: Null} - } else if len(value) == 0 { - *dst = NumericArray{Status: Present} - } else { - *dst = NumericArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = NumericArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for NumericArray", src) - } - if elementsLength == 0 { - *dst = NumericArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to NumericArray", src) - } - - *dst = NumericArray{ - Elements: make([]Numeric, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Numeric, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to NumericArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *NumericArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to NumericArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in NumericArray", err) - } - index++ - - return index, nil -} - -func (dst NumericArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *NumericArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]float32: - *v = make([]float32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*float32: - *v = make([]*float32, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]float64: - *v = make([]float64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*float64: - *v = make([]*float64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]int64: - *v = make([]int64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*int64: - *v = make([]*int64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]uint64: - *v = make([]uint64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*uint64: - *v = make([]*uint64, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *NumericArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from NumericArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from NumericArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *NumericArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = NumericArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Numeric - - if len(uta.Elements) > 0 { - elements = make([]Numeric, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Numeric - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = NumericArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *NumericArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = NumericArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = NumericArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Numeric, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = NumericArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src NumericArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src NumericArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("numeric"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "numeric") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *NumericArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src NumericArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/numrange.go b/vendor/github.com/jackc/pgtype/numrange.go deleted file mode 100644 index 3d5951a2..00000000 --- a/vendor/github.com/jackc/pgtype/numrange.go +++ /dev/null @@ -1,267 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" - - "github.com/jackc/pgio" -) - -type Numrange struct { - Lower Numeric - Upper Numeric - LowerType BoundType - UpperType BoundType - Status Status -} - -func (dst *Numrange) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Numrange{Status: Null} - return nil - } - - switch value := src.(type) { - case Numrange: - *dst = value - case *Numrange: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - default: - return fmt.Errorf("cannot convert %v to Numrange", src) - } - - return nil -} - -func (dst Numrange) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Numrange) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Numrange) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Numrange{Status: Null} - return nil - } - - utr, err := ParseUntypedTextRange(string(src)) - if err != nil { - return err - } - - *dst = Numrange{Status: Present} - - dst.LowerType = utr.LowerType - dst.UpperType = utr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeText(ci, []byte(utr.Lower)); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeText(ci, []byte(utr.Upper)); err != nil { - return err - } - } - - return nil -} - -func (dst *Numrange) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Numrange{Status: Null} - return nil - } - - ubr, err := ParseUntypedBinaryRange(src) - if err != nil { - return err - } - - *dst = Numrange{Status: Present} - - dst.LowerType = ubr.LowerType - dst.UpperType = ubr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeBinary(ci, ubr.Lower); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeBinary(ci, ubr.Upper); err != nil { - return err - } - } - - return nil -} - -func (src Numrange) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - switch src.LowerType { - case Exclusive, Unbounded: - buf = append(buf, '(') - case Inclusive: - buf = append(buf, '[') - case Empty: - return append(buf, "empty"...), nil - default: - return nil, fmt.Errorf("unknown lower bound type %v", src.LowerType) - } - - var err error - - if src.LowerType != Unbounded { - buf, err = src.Lower.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - } - - buf = append(buf, ',') - - if src.UpperType != Unbounded { - buf, err = src.Upper.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - } - - switch src.UpperType { - case Exclusive, Unbounded: - buf = append(buf, ')') - case Inclusive: - buf = append(buf, ']') - default: - return nil, fmt.Errorf("unknown upper bound type %v", src.UpperType) - } - - return buf, nil -} - -func (src Numrange) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var rangeType byte - switch src.LowerType { - case Inclusive: - rangeType |= lowerInclusiveMask - case Unbounded: - rangeType |= lowerUnboundedMask - case Exclusive: - case Empty: - return append(buf, emptyMask), nil - default: - return nil, fmt.Errorf("unknown LowerType: %v", src.LowerType) - } - - switch src.UpperType { - case Inclusive: - rangeType |= upperInclusiveMask - case Unbounded: - rangeType |= upperUnboundedMask - case Exclusive: - default: - return nil, fmt.Errorf("unknown UpperType: %v", src.UpperType) - } - - buf = append(buf, rangeType) - - var err error - - if src.LowerType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Lower.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - if src.UpperType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Upper.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Numrange) Scan(src interface{}) error { - if src == nil { - *dst = Numrange{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Numrange) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/oid.go b/vendor/github.com/jackc/pgtype/oid.go deleted file mode 100644 index 31677e89..00000000 --- a/vendor/github.com/jackc/pgtype/oid.go +++ /dev/null @@ -1,81 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "strconv" - - "github.com/jackc/pgio" -) - -// OID (Object Identifier Type) is, according to -// https://www.postgresql.org/docs/current/static/datatype-oid.html, used -// internally by PostgreSQL as a primary key for various system tables. It is -// currently implemented as an unsigned four-byte integer. Its definition can be -// found in src/include/postgres_ext.h in the PostgreSQL sources. Because it is -// so frequently required to be in a NOT NULL condition OID cannot be NULL. To -// allow for NULL OIDs use OIDValue. -type OID uint32 - -func (dst *OID) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - return fmt.Errorf("cannot decode nil into OID") - } - - n, err := strconv.ParseUint(string(src), 10, 32) - if err != nil { - return err - } - - *dst = OID(n) - return nil -} - -func (dst *OID) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - return fmt.Errorf("cannot decode nil into OID") - } - - if len(src) != 4 { - return fmt.Errorf("invalid length: %v", len(src)) - } - - n := binary.BigEndian.Uint32(src) - *dst = OID(n) - return nil -} - -func (src OID) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return append(buf, strconv.FormatUint(uint64(src), 10)...), nil -} - -func (src OID) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return pgio.AppendUint32(buf, uint32(src)), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *OID) Scan(src interface{}) error { - if src == nil { - return fmt.Errorf("cannot scan NULL into %T", src) - } - - switch src := src.(type) { - case int64: - *dst = OID(src) - return nil - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src OID) Value() (driver.Value, error) { - return int64(src), nil -} diff --git a/vendor/github.com/jackc/pgtype/oid_value.go b/vendor/github.com/jackc/pgtype/oid_value.go deleted file mode 100644 index 5dc9136c..00000000 --- a/vendor/github.com/jackc/pgtype/oid_value.go +++ /dev/null @@ -1,55 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" -) - -// OIDValue (Object Identifier Type) is, according to -// https://www.postgresql.org/docs/current/static/datatype-OIDValue.html, used -// internally by PostgreSQL as a primary key for various system tables. It is -// currently implemented as an unsigned four-byte integer. Its definition can be -// found in src/include/postgres_ext.h in the PostgreSQL sources. -type OIDValue pguint32 - -// Set converts from src to dst. Note that as OIDValue is not a general -// number type Set does not do automatic type conversion as other number -// types do. -func (dst *OIDValue) Set(src interface{}) error { - return (*pguint32)(dst).Set(src) -} - -func (dst OIDValue) Get() interface{} { - return (pguint32)(dst).Get() -} - -// AssignTo assigns from src to dst. Note that as OIDValue is not a general number -// type AssignTo does not do automatic type conversion as other number types do. -func (src *OIDValue) AssignTo(dst interface{}) error { - return (*pguint32)(src).AssignTo(dst) -} - -func (dst *OIDValue) DecodeText(ci *ConnInfo, src []byte) error { - return (*pguint32)(dst).DecodeText(ci, src) -} - -func (dst *OIDValue) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*pguint32)(dst).DecodeBinary(ci, src) -} - -func (src OIDValue) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (pguint32)(src).EncodeText(ci, buf) -} - -func (src OIDValue) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (pguint32)(src).EncodeBinary(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *OIDValue) Scan(src interface{}) error { - return (*pguint32)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src OIDValue) Value() (driver.Value, error) { - return (pguint32)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/path.go b/vendor/github.com/jackc/pgtype/path.go deleted file mode 100644 index 9f89969e..00000000 --- a/vendor/github.com/jackc/pgtype/path.go +++ /dev/null @@ -1,195 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -type Path struct { - P []Vec2 - Closed bool - Status Status -} - -func (dst *Path) Set(src interface{}) error { - return fmt.Errorf("cannot convert %v to Path", src) -} - -func (dst Path) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Path) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Path) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Path{Status: Null} - return nil - } - - if len(src) < 7 { - return fmt.Errorf("invalid length for Path: %v", len(src)) - } - - closed := src[0] == '(' - points := make([]Vec2, 0) - - str := string(src[2:]) - - for { - end := strings.IndexByte(str, ',') - x, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+1:] - end = strings.IndexByte(str, ')') - - y, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - points = append(points, Vec2{x, y}) - - if end+3 < len(str) { - str = str[end+3:] - } else { - break - } - } - - *dst = Path{P: points, Closed: closed, Status: Present} - return nil -} - -func (dst *Path) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Path{Status: Null} - return nil - } - - if len(src) < 5 { - return fmt.Errorf("invalid length for Path: %v", len(src)) - } - - closed := src[0] == 1 - pointCount := int(binary.BigEndian.Uint32(src[1:])) - - rp := 5 - - if 5+pointCount*16 != len(src) { - return fmt.Errorf("invalid length for Path with %d points: %v", pointCount, len(src)) - } - - points := make([]Vec2, pointCount) - for i := 0; i < len(points); i++ { - x := binary.BigEndian.Uint64(src[rp:]) - rp += 8 - y := binary.BigEndian.Uint64(src[rp:]) - rp += 8 - points[i] = Vec2{math.Float64frombits(x), math.Float64frombits(y)} - } - - *dst = Path{ - P: points, - Closed: closed, - Status: Present, - } - return nil -} - -func (src Path) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var startByte, endByte byte - if src.Closed { - startByte = '(' - endByte = ')' - } else { - startByte = '[' - endByte = ']' - } - buf = append(buf, startByte) - - for i, p := range src.P { - if i > 0 { - buf = append(buf, ',') - } - buf = append(buf, fmt.Sprintf(`(%s,%s)`, - strconv.FormatFloat(p.X, 'f', -1, 64), - strconv.FormatFloat(p.Y, 'f', -1, 64), - )...) - } - - return append(buf, endByte), nil -} - -func (src Path) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var closeByte byte - if src.Closed { - closeByte = 1 - } - buf = append(buf, closeByte) - - buf = pgio.AppendInt32(buf, int32(len(src.P))) - - for _, p := range src.P { - buf = pgio.AppendUint64(buf, math.Float64bits(p.X)) - buf = pgio.AppendUint64(buf, math.Float64bits(p.Y)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Path) Scan(src interface{}) error { - if src == nil { - *dst = Path{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Path) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/pgtype.go b/vendor/github.com/jackc/pgtype/pgtype.go deleted file mode 100644 index a52740e7..00000000 --- a/vendor/github.com/jackc/pgtype/pgtype.go +++ /dev/null @@ -1,1001 +0,0 @@ -package pgtype - -import ( - "database/sql" - "encoding/binary" - "errors" - "fmt" - "math" - "net" - "reflect" - "time" -) - -// PostgreSQL oids for common types -const ( - BoolOID = 16 - ByteaOID = 17 - QCharOID = 18 - NameOID = 19 - Int8OID = 20 - Int2OID = 21 - Int4OID = 23 - TextOID = 25 - OIDOID = 26 - TIDOID = 27 - XIDOID = 28 - CIDOID = 29 - JSONOID = 114 - JSONArrayOID = 199 - PointOID = 600 - LsegOID = 601 - PathOID = 602 - BoxOID = 603 - PolygonOID = 604 - LineOID = 628 - CIDROID = 650 - CIDRArrayOID = 651 - Float4OID = 700 - Float8OID = 701 - CircleOID = 718 - UnknownOID = 705 - MacaddrOID = 829 - InetOID = 869 - BoolArrayOID = 1000 - Int2ArrayOID = 1005 - Int4ArrayOID = 1007 - TextArrayOID = 1009 - ByteaArrayOID = 1001 - BPCharArrayOID = 1014 - VarcharArrayOID = 1015 - Int8ArrayOID = 1016 - Float4ArrayOID = 1021 - Float8ArrayOID = 1022 - ACLItemOID = 1033 - ACLItemArrayOID = 1034 - InetArrayOID = 1041 - BPCharOID = 1042 - VarcharOID = 1043 - DateOID = 1082 - TimeOID = 1083 - TimestampOID = 1114 - TimestampArrayOID = 1115 - DateArrayOID = 1182 - TimestamptzOID = 1184 - TimestamptzArrayOID = 1185 - IntervalOID = 1186 - NumericArrayOID = 1231 - BitOID = 1560 - VarbitOID = 1562 - NumericOID = 1700 - RecordOID = 2249 - UUIDOID = 2950 - UUIDArrayOID = 2951 - JSONBOID = 3802 - JSONBArrayOID = 3807 - DaterangeOID = 3912 - Int4rangeOID = 3904 - Int4multirangeOID = 4451 - NumrangeOID = 3906 - NummultirangeOID = 4532 - TsrangeOID = 3908 - TsrangeArrayOID = 3909 - TstzrangeOID = 3910 - TstzrangeArrayOID = 3911 - Int8rangeOID = 3926 - Int8multirangeOID = 4536 -) - -type Status byte - -const ( - Undefined Status = iota - Null - Present -) - -type InfinityModifier int8 - -const ( - Infinity InfinityModifier = 1 - None InfinityModifier = 0 - NegativeInfinity InfinityModifier = -Infinity -) - -func (im InfinityModifier) String() string { - switch im { - case None: - return "none" - case Infinity: - return "infinity" - case NegativeInfinity: - return "-infinity" - default: - return "invalid" - } -} - -// PostgreSQL format codes -const ( - TextFormatCode = 0 - BinaryFormatCode = 1 -) - -// Value translates values to and from an internal canonical representation for the type. To actually be usable a type -// that implements Value should also implement some combination of BinaryDecoder, BinaryEncoder, TextDecoder, -// and TextEncoder. -// -// Operations that update a Value (e.g. Set, DecodeText, DecodeBinary) should entirely replace the value. e.g. Internal -// slices should be replaced not resized and reused. This allows Get and AssignTo to return a slice directly rather -// than incur a usually unnecessary copy. -type Value interface { - // Set converts and assigns src to itself. Value takes ownership of src. - Set(src interface{}) error - - // Get returns the simplest representation of Value. Get may return a pointer to an internal value but it must never - // mutate that value. e.g. If Get returns a []byte Value must never change the contents of the []byte. - Get() interface{} - - // AssignTo converts and assigns the Value to dst. AssignTo may a pointer to an internal value but it must never - // mutate that value. e.g. If Get returns a []byte Value must never change the contents of the []byte. - AssignTo(dst interface{}) error -} - -// TypeValue is a Value where instances can represent different PostgreSQL types. This can be useful for -// representing types such as enums, composites, and arrays. -// -// In general, instances of TypeValue should not be used to directly represent a value. It should only be used as an -// encoder and decoder internal to ConnInfo. -type TypeValue interface { - Value - - // NewTypeValue creates a TypeValue including references to internal type information. e.g. the list of members - // in an EnumType. - NewTypeValue() Value - - // TypeName returns the PostgreSQL name of this type. - TypeName() string -} - -// ValueTranscoder is a value that implements the text and binary encoding and decoding interfaces. -type ValueTranscoder interface { - Value - TextEncoder - BinaryEncoder - TextDecoder - BinaryDecoder -} - -// ResultFormatPreferrer allows a type to specify its preferred result format instead of it being inferred from -// whether it is also a BinaryDecoder. -type ResultFormatPreferrer interface { - PreferredResultFormat() int16 -} - -// ParamFormatPreferrer allows a type to specify its preferred param format instead of it being inferred from -// whether it is also a BinaryEncoder. -type ParamFormatPreferrer interface { - PreferredParamFormat() int16 -} - -type BinaryDecoder interface { - // DecodeBinary decodes src into BinaryDecoder. If src is nil then the - // original SQL value is NULL. BinaryDecoder takes ownership of src. The - // caller MUST not use it again. - DecodeBinary(ci *ConnInfo, src []byte) error -} - -type TextDecoder interface { - // DecodeText decodes src into TextDecoder. If src is nil then the original - // SQL value is NULL. TextDecoder takes ownership of src. The caller MUST not - // use it again. - DecodeText(ci *ConnInfo, src []byte) error -} - -// BinaryEncoder is implemented by types that can encode themselves into the -// PostgreSQL binary wire format. -type BinaryEncoder interface { - // EncodeBinary should append the binary format of self to buf. If self is the - // SQL value NULL then append nothing and return (nil, nil). The caller of - // EncodeBinary is responsible for writing the correct NULL value or the - // length of the data written. - EncodeBinary(ci *ConnInfo, buf []byte) (newBuf []byte, err error) -} - -// TextEncoder is implemented by types that can encode themselves into the -// PostgreSQL text wire format. -type TextEncoder interface { - // EncodeText should append the text format of self to buf. If self is the - // SQL value NULL then append nothing and return (nil, nil). The caller of - // EncodeText is responsible for writing the correct NULL value or the - // length of the data written. - EncodeText(ci *ConnInfo, buf []byte) (newBuf []byte, err error) -} - -var errUndefined = errors.New("cannot encode status undefined") -var errBadStatus = errors.New("invalid status") - -type nullAssignmentError struct { - dst interface{} -} - -func (e *nullAssignmentError) Error() string { - return fmt.Sprintf("cannot assign NULL to %T", e.dst) -} - -type DataType struct { - Value Value - - textDecoder TextDecoder - binaryDecoder BinaryDecoder - - Name string - OID uint32 -} - -type ConnInfo struct { - oidToDataType map[uint32]*DataType - nameToDataType map[string]*DataType - reflectTypeToName map[reflect.Type]string - oidToParamFormatCode map[uint32]int16 - oidToResultFormatCode map[uint32]int16 - - reflectTypeToDataType map[reflect.Type]*DataType -} - -func newConnInfo() *ConnInfo { - return &ConnInfo{ - oidToDataType: make(map[uint32]*DataType), - nameToDataType: make(map[string]*DataType), - reflectTypeToName: make(map[reflect.Type]string), - oidToParamFormatCode: make(map[uint32]int16), - oidToResultFormatCode: make(map[uint32]int16), - } -} - -func NewConnInfo() *ConnInfo { - ci := newConnInfo() - - ci.RegisterDataType(DataType{Value: &ACLItemArray{}, Name: "_aclitem", OID: ACLItemArrayOID}) - ci.RegisterDataType(DataType{Value: &BoolArray{}, Name: "_bool", OID: BoolArrayOID}) - ci.RegisterDataType(DataType{Value: &BPCharArray{}, Name: "_bpchar", OID: BPCharArrayOID}) - ci.RegisterDataType(DataType{Value: &ByteaArray{}, Name: "_bytea", OID: ByteaArrayOID}) - ci.RegisterDataType(DataType{Value: &CIDRArray{}, Name: "_cidr", OID: CIDRArrayOID}) - ci.RegisterDataType(DataType{Value: &DateArray{}, Name: "_date", OID: DateArrayOID}) - ci.RegisterDataType(DataType{Value: &Float4Array{}, Name: "_float4", OID: Float4ArrayOID}) - ci.RegisterDataType(DataType{Value: &Float8Array{}, Name: "_float8", OID: Float8ArrayOID}) - ci.RegisterDataType(DataType{Value: &InetArray{}, Name: "_inet", OID: InetArrayOID}) - ci.RegisterDataType(DataType{Value: &Int2Array{}, Name: "_int2", OID: Int2ArrayOID}) - ci.RegisterDataType(DataType{Value: &Int4Array{}, Name: "_int4", OID: Int4ArrayOID}) - ci.RegisterDataType(DataType{Value: &Int8Array{}, Name: "_int8", OID: Int8ArrayOID}) - ci.RegisterDataType(DataType{Value: &NumericArray{}, Name: "_numeric", OID: NumericArrayOID}) - ci.RegisterDataType(DataType{Value: &TextArray{}, Name: "_text", OID: TextArrayOID}) - ci.RegisterDataType(DataType{Value: &TimestampArray{}, Name: "_timestamp", OID: TimestampArrayOID}) - ci.RegisterDataType(DataType{Value: &TimestamptzArray{}, Name: "_timestamptz", OID: TimestamptzArrayOID}) - ci.RegisterDataType(DataType{Value: &UUIDArray{}, Name: "_uuid", OID: UUIDArrayOID}) - ci.RegisterDataType(DataType{Value: &VarcharArray{}, Name: "_varchar", OID: VarcharArrayOID}) - ci.RegisterDataType(DataType{Value: &ACLItem{}, Name: "aclitem", OID: ACLItemOID}) - ci.RegisterDataType(DataType{Value: &Bit{}, Name: "bit", OID: BitOID}) - ci.RegisterDataType(DataType{Value: &Bool{}, Name: "bool", OID: BoolOID}) - ci.RegisterDataType(DataType{Value: &Box{}, Name: "box", OID: BoxOID}) - ci.RegisterDataType(DataType{Value: &BPChar{}, Name: "bpchar", OID: BPCharOID}) - ci.RegisterDataType(DataType{Value: &Bytea{}, Name: "bytea", OID: ByteaOID}) - ci.RegisterDataType(DataType{Value: &QChar{}, Name: "char", OID: QCharOID}) - ci.RegisterDataType(DataType{Value: &CID{}, Name: "cid", OID: CIDOID}) - ci.RegisterDataType(DataType{Value: &CIDR{}, Name: "cidr", OID: CIDROID}) - ci.RegisterDataType(DataType{Value: &Circle{}, Name: "circle", OID: CircleOID}) - ci.RegisterDataType(DataType{Value: &Date{}, Name: "date", OID: DateOID}) - ci.RegisterDataType(DataType{Value: &Daterange{}, Name: "daterange", OID: DaterangeOID}) - ci.RegisterDataType(DataType{Value: &Float4{}, Name: "float4", OID: Float4OID}) - ci.RegisterDataType(DataType{Value: &Float8{}, Name: "float8", OID: Float8OID}) - ci.RegisterDataType(DataType{Value: &Inet{}, Name: "inet", OID: InetOID}) - ci.RegisterDataType(DataType{Value: &Int2{}, Name: "int2", OID: Int2OID}) - ci.RegisterDataType(DataType{Value: &Int4{}, Name: "int4", OID: Int4OID}) - ci.RegisterDataType(DataType{Value: &Int4range{}, Name: "int4range", OID: Int4rangeOID}) - ci.RegisterDataType(DataType{Value: &Int4multirange{}, Name: "int4multirange", OID: Int4multirangeOID}) - ci.RegisterDataType(DataType{Value: &Int8{}, Name: "int8", OID: Int8OID}) - ci.RegisterDataType(DataType{Value: &Int8range{}, Name: "int8range", OID: Int8rangeOID}) - ci.RegisterDataType(DataType{Value: &Int8multirange{}, Name: "int8multirange", OID: Int8multirangeOID}) - ci.RegisterDataType(DataType{Value: &Interval{}, Name: "interval", OID: IntervalOID}) - ci.RegisterDataType(DataType{Value: &JSON{}, Name: "json", OID: JSONOID}) - ci.RegisterDataType(DataType{Value: &JSONArray{}, Name: "_json", OID: JSONArrayOID}) - ci.RegisterDataType(DataType{Value: &JSONB{}, Name: "jsonb", OID: JSONBOID}) - ci.RegisterDataType(DataType{Value: &JSONBArray{}, Name: "_jsonb", OID: JSONBArrayOID}) - ci.RegisterDataType(DataType{Value: &Line{}, Name: "line", OID: LineOID}) - ci.RegisterDataType(DataType{Value: &Lseg{}, Name: "lseg", OID: LsegOID}) - ci.RegisterDataType(DataType{Value: &Macaddr{}, Name: "macaddr", OID: MacaddrOID}) - ci.RegisterDataType(DataType{Value: &Name{}, Name: "name", OID: NameOID}) - ci.RegisterDataType(DataType{Value: &Numeric{}, Name: "numeric", OID: NumericOID}) - ci.RegisterDataType(DataType{Value: &Numrange{}, Name: "numrange", OID: NumrangeOID}) - ci.RegisterDataType(DataType{Value: &Nummultirange{}, Name: "nummultirange", OID: NummultirangeOID}) - ci.RegisterDataType(DataType{Value: &OIDValue{}, Name: "oid", OID: OIDOID}) - ci.RegisterDataType(DataType{Value: &Path{}, Name: "path", OID: PathOID}) - ci.RegisterDataType(DataType{Value: &Point{}, Name: "point", OID: PointOID}) - ci.RegisterDataType(DataType{Value: &Polygon{}, Name: "polygon", OID: PolygonOID}) - ci.RegisterDataType(DataType{Value: &Record{}, Name: "record", OID: RecordOID}) - ci.RegisterDataType(DataType{Value: &Text{}, Name: "text", OID: TextOID}) - ci.RegisterDataType(DataType{Value: &TID{}, Name: "tid", OID: TIDOID}) - ci.RegisterDataType(DataType{Value: &Time{}, Name: "time", OID: TimeOID}) - ci.RegisterDataType(DataType{Value: &Timestamp{}, Name: "timestamp", OID: TimestampOID}) - ci.RegisterDataType(DataType{Value: &Timestamptz{}, Name: "timestamptz", OID: TimestamptzOID}) - ci.RegisterDataType(DataType{Value: &Tsrange{}, Name: "tsrange", OID: TsrangeOID}) - ci.RegisterDataType(DataType{Value: &TsrangeArray{}, Name: "_tsrange", OID: TsrangeArrayOID}) - ci.RegisterDataType(DataType{Value: &Tstzrange{}, Name: "tstzrange", OID: TstzrangeOID}) - ci.RegisterDataType(DataType{Value: &TstzrangeArray{}, Name: "_tstzrange", OID: TstzrangeArrayOID}) - ci.RegisterDataType(DataType{Value: &Unknown{}, Name: "unknown", OID: UnknownOID}) - ci.RegisterDataType(DataType{Value: &UUID{}, Name: "uuid", OID: UUIDOID}) - ci.RegisterDataType(DataType{Value: &Varbit{}, Name: "varbit", OID: VarbitOID}) - ci.RegisterDataType(DataType{Value: &Varchar{}, Name: "varchar", OID: VarcharOID}) - ci.RegisterDataType(DataType{Value: &XID{}, Name: "xid", OID: XIDOID}) - - registerDefaultPgTypeVariants := func(name, arrayName string, value interface{}) { - ci.RegisterDefaultPgType(value, name) - valueType := reflect.TypeOf(value) - - ci.RegisterDefaultPgType(reflect.New(valueType).Interface(), name) - - sliceType := reflect.SliceOf(valueType) - ci.RegisterDefaultPgType(reflect.MakeSlice(sliceType, 0, 0).Interface(), arrayName) - - ci.RegisterDefaultPgType(reflect.New(sliceType).Interface(), arrayName) - } - - // Integer types that directly map to a PostgreSQL type - registerDefaultPgTypeVariants("int2", "_int2", int16(0)) - registerDefaultPgTypeVariants("int4", "_int4", int32(0)) - registerDefaultPgTypeVariants("int8", "_int8", int64(0)) - - // Integer types that do not have a direct match to a PostgreSQL type - registerDefaultPgTypeVariants("int8", "_int8", uint16(0)) - registerDefaultPgTypeVariants("int8", "_int8", uint32(0)) - registerDefaultPgTypeVariants("int8", "_int8", uint64(0)) - registerDefaultPgTypeVariants("int8", "_int8", int(0)) - registerDefaultPgTypeVariants("int8", "_int8", uint(0)) - - registerDefaultPgTypeVariants("float4", "_float4", float32(0)) - registerDefaultPgTypeVariants("float8", "_float8", float64(0)) - - registerDefaultPgTypeVariants("bool", "_bool", false) - registerDefaultPgTypeVariants("timestamptz", "_timestamptz", time.Time{}) - registerDefaultPgTypeVariants("text", "_text", "") - registerDefaultPgTypeVariants("bytea", "_bytea", []byte(nil)) - - registerDefaultPgTypeVariants("inet", "_inet", net.IP{}) - ci.RegisterDefaultPgType((*net.IPNet)(nil), "cidr") - ci.RegisterDefaultPgType([]*net.IPNet(nil), "_cidr") - - return ci -} - -func (ci *ConnInfo) InitializeDataTypes(nameOIDs map[string]uint32) { - for name, oid := range nameOIDs { - var value Value - if t, ok := nameValues[name]; ok { - value = reflect.New(reflect.ValueOf(t).Elem().Type()).Interface().(Value) - } else { - value = &GenericText{} - } - ci.RegisterDataType(DataType{Value: value, Name: name, OID: oid}) - } -} - -func (ci *ConnInfo) RegisterDataType(t DataType) { - t.Value = NewValue(t.Value) - - ci.oidToDataType[t.OID] = &t - ci.nameToDataType[t.Name] = &t - - { - var formatCode int16 - if pfp, ok := t.Value.(ParamFormatPreferrer); ok { - formatCode = pfp.PreferredParamFormat() - } else if _, ok := t.Value.(BinaryEncoder); ok { - formatCode = BinaryFormatCode - } - ci.oidToParamFormatCode[t.OID] = formatCode - } - - { - var formatCode int16 - if rfp, ok := t.Value.(ResultFormatPreferrer); ok { - formatCode = rfp.PreferredResultFormat() - } else if _, ok := t.Value.(BinaryDecoder); ok { - formatCode = BinaryFormatCode - } - ci.oidToResultFormatCode[t.OID] = formatCode - } - - if d, ok := t.Value.(TextDecoder); ok { - t.textDecoder = d - } - - if d, ok := t.Value.(BinaryDecoder); ok { - t.binaryDecoder = d - } - - ci.reflectTypeToDataType = nil // Invalidated by type registration -} - -// RegisterDefaultPgType registers a mapping of a Go type to a PostgreSQL type name. Typically the data type to be -// encoded or decoded is determined by the PostgreSQL OID. But if the OID of a value to be encoded or decoded is -// unknown, this additional mapping will be used by DataTypeForValue to determine a suitable data type. -func (ci *ConnInfo) RegisterDefaultPgType(value interface{}, name string) { - ci.reflectTypeToName[reflect.TypeOf(value)] = name - ci.reflectTypeToDataType = nil // Invalidated by registering a default type -} - -func (ci *ConnInfo) DataTypeForOID(oid uint32) (*DataType, bool) { - dt, ok := ci.oidToDataType[oid] - return dt, ok -} - -func (ci *ConnInfo) DataTypeForName(name string) (*DataType, bool) { - dt, ok := ci.nameToDataType[name] - return dt, ok -} - -func (ci *ConnInfo) buildReflectTypeToDataType() { - ci.reflectTypeToDataType = make(map[reflect.Type]*DataType) - - for _, dt := range ci.oidToDataType { - if _, is := dt.Value.(TypeValue); !is { - ci.reflectTypeToDataType[reflect.ValueOf(dt.Value).Type()] = dt - } - } - - for reflectType, name := range ci.reflectTypeToName { - if dt, ok := ci.nameToDataType[name]; ok { - ci.reflectTypeToDataType[reflectType] = dt - } - } -} - -// DataTypeForValue finds a data type suitable for v. Use RegisterDataType to register types that can encode and decode -// themselves. Use RegisterDefaultPgType to register that can be handled by a registered data type. -func (ci *ConnInfo) DataTypeForValue(v interface{}) (*DataType, bool) { - if ci.reflectTypeToDataType == nil { - ci.buildReflectTypeToDataType() - } - - if tv, ok := v.(TypeValue); ok { - dt, ok := ci.nameToDataType[tv.TypeName()] - return dt, ok - } - - dt, ok := ci.reflectTypeToDataType[reflect.TypeOf(v)] - return dt, ok -} - -func (ci *ConnInfo) ParamFormatCodeForOID(oid uint32) int16 { - fc, ok := ci.oidToParamFormatCode[oid] - if ok { - return fc - } - return TextFormatCode -} - -func (ci *ConnInfo) ResultFormatCodeForOID(oid uint32) int16 { - fc, ok := ci.oidToResultFormatCode[oid] - if ok { - return fc - } - return TextFormatCode -} - -// DeepCopy makes a deep copy of the ConnInfo. -func (ci *ConnInfo) DeepCopy() *ConnInfo { - ci2 := newConnInfo() - - for _, dt := range ci.oidToDataType { - ci2.RegisterDataType(DataType{ - Value: NewValue(dt.Value), - Name: dt.Name, - OID: dt.OID, - }) - } - - for t, n := range ci.reflectTypeToName { - ci2.reflectTypeToName[t] = n - } - - return ci2 -} - -// ScanPlan is a precompiled plan to scan into a type of destination. -type ScanPlan interface { - // Scan scans src into dst. If the dst type has changed in an incompatible way a ScanPlan should automatically - // replan and scan. - Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error -} - -type scanPlanDstBinaryDecoder struct{} - -func (scanPlanDstBinaryDecoder) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if d, ok := (dst).(BinaryDecoder); ok { - return d.DecodeBinary(ci, src) - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -type scanPlanDstTextDecoder struct{} - -func (plan scanPlanDstTextDecoder) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if d, ok := (dst).(TextDecoder); ok { - return d.DecodeText(ci, src) - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -type scanPlanDataTypeSQLScanner DataType - -func (plan *scanPlanDataTypeSQLScanner) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - scanner, ok := dst.(sql.Scanner) - if !ok { - dv := reflect.ValueOf(dst) - if dv.Kind() != reflect.Ptr || !dv.Type().Elem().Implements(scannerType) { - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) - } - if src == nil { - // Ensure the pointer points to a zero version of the value - dv.Elem().Set(reflect.Zero(dv.Type().Elem())) - return nil - } - dv = dv.Elem() - // If the pointer is to a nil pointer then set that before scanning - if dv.Kind() == reflect.Ptr && dv.IsNil() { - dv.Set(reflect.New(dv.Type().Elem())) - } - scanner = dv.Interface().(sql.Scanner) - } - - dt := (*DataType)(plan) - var err error - switch formatCode { - case BinaryFormatCode: - err = dt.binaryDecoder.DecodeBinary(ci, src) - case TextFormatCode: - err = dt.textDecoder.DecodeText(ci, src) - } - if err != nil { - return err - } - - sqlSrc, err := DatabaseSQLValue(ci, dt.Value) - if err != nil { - return err - } - return scanner.Scan(sqlSrc) -} - -type scanPlanDataTypeAssignTo DataType - -func (plan *scanPlanDataTypeAssignTo) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - dt := (*DataType)(plan) - var err error - switch formatCode { - case BinaryFormatCode: - err = dt.binaryDecoder.DecodeBinary(ci, src) - case TextFormatCode: - err = dt.textDecoder.DecodeText(ci, src) - } - if err != nil { - return err - } - - assignToErr := dt.Value.AssignTo(dst) - if assignToErr == nil { - return nil - } - - if dstPtr, ok := dst.(*interface{}); ok { - *dstPtr = dt.Value.Get() - return nil - } - - // assignToErr might have failed because the type of destination has changed - newPlan := ci.PlanScan(oid, formatCode, dst) - if newPlan, sameType := newPlan.(*scanPlanDataTypeAssignTo); !sameType { - return newPlan.Scan(ci, oid, formatCode, src, dst) - } - - return assignToErr -} - -type scanPlanSQLScanner struct{} - -func (scanPlanSQLScanner) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - scanner, ok := dst.(sql.Scanner) - if !ok { - dv := reflect.ValueOf(dst) - if dv.Kind() != reflect.Ptr || !dv.Type().Elem().Implements(scannerType) { - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) - } - if src == nil { - // Ensure the pointer points to a zero version of the value - dv.Elem().Set(reflect.Zero(dv.Elem().Type())) - return nil - } - dv = dv.Elem() - // If the pointer is to a nil pointer then set that before scanning - if dv.Kind() == reflect.Ptr && dv.IsNil() { - dv.Set(reflect.New(dv.Type().Elem())) - } - scanner = dv.Interface().(sql.Scanner) - } - if src == nil { - // This is necessary because interface value []byte:nil does not equal nil:nil for the binary format path and the - // text format path would be converted to empty string. - return scanner.Scan(nil) - } else if formatCode == BinaryFormatCode { - return scanner.Scan(src) - } else { - return scanner.Scan(string(src)) - } -} - -type scanPlanReflection struct{} - -func (scanPlanReflection) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - // We might be given a pointer to something that implements the decoder interface(s), - // even though the pointer itself doesn't. - refVal := reflect.ValueOf(dst) - if refVal.Kind() == reflect.Ptr && refVal.Type().Elem().Kind() == reflect.Ptr { - // If the database returned NULL, then we set dest as nil to indicate that. - if src == nil { - nilPtr := reflect.Zero(refVal.Type().Elem()) - refVal.Elem().Set(nilPtr) - return nil - } - - // We need to allocate an element, and set the destination to it - // Then we can retry as that element. - elemPtr := reflect.New(refVal.Type().Elem().Elem()) - refVal.Elem().Set(elemPtr) - - plan := ci.PlanScan(oid, formatCode, elemPtr.Interface()) - return plan.Scan(ci, oid, formatCode, src, elemPtr.Interface()) - } - - return scanUnknownType(oid, formatCode, src, dst) -} - -type scanPlanBinaryInt16 struct{} - -func (scanPlanBinaryInt16) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if src == nil { - return fmt.Errorf("cannot scan null into %T", dst) - } - - if len(src) != 2 { - return fmt.Errorf("invalid length for int2: %v", len(src)) - } - - if p, ok := (dst).(*int16); ok { - *p = int16(binary.BigEndian.Uint16(src)) - return nil - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -type scanPlanBinaryInt32 struct{} - -func (scanPlanBinaryInt32) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if src == nil { - return fmt.Errorf("cannot scan null into %T", dst) - } - - if len(src) != 4 { - return fmt.Errorf("invalid length for int4: %v", len(src)) - } - - if p, ok := (dst).(*int32); ok { - *p = int32(binary.BigEndian.Uint32(src)) - return nil - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -type scanPlanBinaryInt64 struct{} - -func (scanPlanBinaryInt64) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if src == nil { - return fmt.Errorf("cannot scan null into %T", dst) - } - - if len(src) != 8 { - return fmt.Errorf("invalid length for int8: %v", len(src)) - } - - if p, ok := (dst).(*int64); ok { - *p = int64(binary.BigEndian.Uint64(src)) - return nil - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -type scanPlanBinaryFloat32 struct{} - -func (scanPlanBinaryFloat32) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if src == nil { - return fmt.Errorf("cannot scan null into %T", dst) - } - - if len(src) != 4 { - return fmt.Errorf("invalid length for int4: %v", len(src)) - } - - if p, ok := (dst).(*float32); ok { - n := int32(binary.BigEndian.Uint32(src)) - *p = float32(math.Float32frombits(uint32(n))) - return nil - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -type scanPlanBinaryFloat64 struct{} - -func (scanPlanBinaryFloat64) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if src == nil { - return fmt.Errorf("cannot scan null into %T", dst) - } - - if len(src) != 8 { - return fmt.Errorf("invalid length for int8: %v", len(src)) - } - - if p, ok := (dst).(*float64); ok { - n := int64(binary.BigEndian.Uint64(src)) - *p = float64(math.Float64frombits(uint64(n))) - return nil - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -type scanPlanBinaryBytes struct{} - -func (scanPlanBinaryBytes) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if p, ok := (dst).(*[]byte); ok { - *p = src - return nil - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -type scanPlanString struct{} - -func (scanPlanString) Scan(ci *ConnInfo, oid uint32, formatCode int16, src []byte, dst interface{}) error { - if src == nil { - return fmt.Errorf("cannot scan null into %T", dst) - } - - if p, ok := (dst).(*string); ok { - *p = string(src) - return nil - } - - newPlan := ci.PlanScan(oid, formatCode, dst) - return newPlan.Scan(ci, oid, formatCode, src, dst) -} - -var scannerType = reflect.TypeOf((*sql.Scanner)(nil)).Elem() - -func isScanner(dst interface{}) bool { - if _, ok := dst.(sql.Scanner); ok { - return true - } - if t := reflect.TypeOf(dst); t != nil && t.Kind() == reflect.Ptr && t.Elem().Implements(scannerType) { - return true - } - return false -} - -// PlanScan prepares a plan to scan a value into dst. -func (ci *ConnInfo) PlanScan(oid uint32, formatCode int16, dst interface{}) ScanPlan { - switch formatCode { - case BinaryFormatCode: - switch dst.(type) { - case *string: - switch oid { - case TextOID, VarcharOID: - return scanPlanString{} - } - case *int16: - if oid == Int2OID { - return scanPlanBinaryInt16{} - } - case *int32: - if oid == Int4OID { - return scanPlanBinaryInt32{} - } - case *int64: - if oid == Int8OID { - return scanPlanBinaryInt64{} - } - case *float32: - if oid == Float4OID { - return scanPlanBinaryFloat32{} - } - case *float64: - if oid == Float8OID { - return scanPlanBinaryFloat64{} - } - case *[]byte: - switch oid { - case ByteaOID, TextOID, VarcharOID, JSONOID: - return scanPlanBinaryBytes{} - } - case BinaryDecoder: - return scanPlanDstBinaryDecoder{} - } - case TextFormatCode: - switch dst.(type) { - case *string: - return scanPlanString{} - case *[]byte: - if oid != ByteaOID { - return scanPlanBinaryBytes{} - } - case TextDecoder: - return scanPlanDstTextDecoder{} - } - } - - var dt *DataType - - if oid == 0 { - if dataType, ok := ci.DataTypeForValue(dst); ok { - dt = dataType - } - } else { - if dataType, ok := ci.DataTypeForOID(oid); ok { - dt = dataType - } - } - - if dt != nil { - if isScanner(dst) { - return (*scanPlanDataTypeSQLScanner)(dt) - } - return (*scanPlanDataTypeAssignTo)(dt) - } - - if isScanner(dst) { - return scanPlanSQLScanner{} - } - - return scanPlanReflection{} -} - -func (ci *ConnInfo) Scan(oid uint32, formatCode int16, src []byte, dst interface{}) error { - if dst == nil { - return nil - } - - plan := ci.PlanScan(oid, formatCode, dst) - return plan.Scan(ci, oid, formatCode, src, dst) -} - -func scanUnknownType(oid uint32, formatCode int16, buf []byte, dest interface{}) error { - switch dest := dest.(type) { - case *string: - if formatCode == BinaryFormatCode { - return fmt.Errorf("unknown oid %d in binary format cannot be scanned into %T", oid, dest) - } - *dest = string(buf) - return nil - case *[]byte: - *dest = buf - return nil - default: - if nextDst, retry := GetAssignToDstType(dest); retry { - return scanUnknownType(oid, formatCode, buf, nextDst) - } - return fmt.Errorf("unknown oid %d cannot be scanned into %T", oid, dest) - } -} - -// NewValue returns a new instance of the same type as v. -func NewValue(v Value) Value { - if tv, ok := v.(TypeValue); ok { - return tv.NewTypeValue() - } else { - return reflect.New(reflect.ValueOf(v).Elem().Type()).Interface().(Value) - } -} - -var nameValues map[string]Value - -func init() { - nameValues = map[string]Value{ - "_aclitem": &ACLItemArray{}, - "_bool": &BoolArray{}, - "_bpchar": &BPCharArray{}, - "_bytea": &ByteaArray{}, - "_cidr": &CIDRArray{}, - "_date": &DateArray{}, - "_float4": &Float4Array{}, - "_float8": &Float8Array{}, - "_inet": &InetArray{}, - "_int2": &Int2Array{}, - "_int4": &Int4Array{}, - "_int8": &Int8Array{}, - "_numeric": &NumericArray{}, - "_text": &TextArray{}, - "_timestamp": &TimestampArray{}, - "_timestamptz": &TimestamptzArray{}, - "_uuid": &UUIDArray{}, - "_varchar": &VarcharArray{}, - "_json": &JSONArray{}, - "_jsonb": &JSONBArray{}, - "aclitem": &ACLItem{}, - "bit": &Bit{}, - "bool": &Bool{}, - "box": &Box{}, - "bpchar": &BPChar{}, - "bytea": &Bytea{}, - "char": &QChar{}, - "cid": &CID{}, - "cidr": &CIDR{}, - "circle": &Circle{}, - "date": &Date{}, - "daterange": &Daterange{}, - "float4": &Float4{}, - "float8": &Float8{}, - "hstore": &Hstore{}, - "inet": &Inet{}, - "int2": &Int2{}, - "int4": &Int4{}, - "int4range": &Int4range{}, - "int4multirange": &Int4multirange{}, - "int8": &Int8{}, - "int8range": &Int8range{}, - "int8multirange": &Int8multirange{}, - "interval": &Interval{}, - "json": &JSON{}, - "jsonb": &JSONB{}, - "line": &Line{}, - "lseg": &Lseg{}, - "ltree": &Ltree{}, - "macaddr": &Macaddr{}, - "name": &Name{}, - "numeric": &Numeric{}, - "numrange": &Numrange{}, - "nummultirange": &Nummultirange{}, - "oid": &OIDValue{}, - "path": &Path{}, - "point": &Point{}, - "polygon": &Polygon{}, - "record": &Record{}, - "text": &Text{}, - "tid": &TID{}, - "timestamp": &Timestamp{}, - "timestamptz": &Timestamptz{}, - "tsrange": &Tsrange{}, - "_tsrange": &TsrangeArray{}, - "tstzrange": &Tstzrange{}, - "_tstzrange": &TstzrangeArray{}, - "unknown": &Unknown{}, - "uuid": &UUID{}, - "varbit": &Varbit{}, - "varchar": &Varchar{}, - "xid": &XID{}, - } -} diff --git a/vendor/github.com/jackc/pgtype/pguint32.go b/vendor/github.com/jackc/pgtype/pguint32.go deleted file mode 100644 index a0e88ca2..00000000 --- a/vendor/github.com/jackc/pgtype/pguint32.go +++ /dev/null @@ -1,162 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - - "github.com/jackc/pgio" -) - -// pguint32 is the core type that is used to implement PostgreSQL types such as -// CID and XID. -type pguint32 struct { - Uint uint32 - Status Status -} - -// Set converts from src to dst. Note that as pguint32 is not a general -// number type Set does not do automatic type conversion as other number -// types do. -func (dst *pguint32) Set(src interface{}) error { - switch value := src.(type) { - case int64: - if value < 0 { - return fmt.Errorf("%d is less than minimum value for pguint32", value) - } - if value > math.MaxUint32 { - return fmt.Errorf("%d is greater than maximum value for pguint32", value) - } - *dst = pguint32{Uint: uint32(value), Status: Present} - case uint32: - *dst = pguint32{Uint: value, Status: Present} - default: - return fmt.Errorf("cannot convert %v to pguint32", value) - } - - return nil -} - -func (dst pguint32) Get() interface{} { - switch dst.Status { - case Present: - return dst.Uint - case Null: - return nil - default: - return dst.Status - } -} - -// AssignTo assigns from src to dst. Note that as pguint32 is not a general number -// type AssignTo does not do automatic type conversion as other number types do. -func (src *pguint32) AssignTo(dst interface{}) error { - switch v := dst.(type) { - case *uint32: - if src.Status == Present { - *v = src.Uint - } else { - return fmt.Errorf("cannot assign %v into %T", src, dst) - } - case **uint32: - if src.Status == Present { - n := src.Uint - *v = &n - } else { - *v = nil - } - } - - return nil -} - -func (dst *pguint32) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = pguint32{Status: Null} - return nil - } - - n, err := strconv.ParseUint(string(src), 10, 32) - if err != nil { - return err - } - - *dst = pguint32{Uint: uint32(n), Status: Present} - return nil -} - -func (dst *pguint32) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = pguint32{Status: Null} - return nil - } - - if len(src) != 4 { - return fmt.Errorf("invalid length: %v", len(src)) - } - - n := binary.BigEndian.Uint32(src) - *dst = pguint32{Uint: n, Status: Present} - return nil -} - -func (src pguint32) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, strconv.FormatUint(uint64(src.Uint), 10)...), nil -} - -func (src pguint32) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return pgio.AppendUint32(buf, src.Uint), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *pguint32) Scan(src interface{}) error { - if src == nil { - *dst = pguint32{Status: Null} - return nil - } - - switch src := src.(type) { - case uint32: - *dst = pguint32{Uint: src, Status: Present} - return nil - case int64: - *dst = pguint32{Uint: uint32(src), Status: Present} - return nil - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src pguint32) Value() (driver.Value, error) { - switch src.Status { - case Present: - return int64(src.Uint), nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} diff --git a/vendor/github.com/jackc/pgtype/point.go b/vendor/github.com/jackc/pgtype/point.go deleted file mode 100644 index 0c799106..00000000 --- a/vendor/github.com/jackc/pgtype/point.go +++ /dev/null @@ -1,214 +0,0 @@ -package pgtype - -import ( - "bytes" - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -type Vec2 struct { - X float64 - Y float64 -} - -type Point struct { - P Vec2 - Status Status -} - -func (dst *Point) Set(src interface{}) error { - if src == nil { - dst.Status = Null - return nil - } - err := fmt.Errorf("cannot convert %v to Point", src) - var p *Point - switch value := src.(type) { - case string: - p, err = parsePoint([]byte(value)) - case []byte: - p, err = parsePoint(value) - default: - return err - } - if err != nil { - return err - } - *dst = *p - return nil -} - -func parsePoint(src []byte) (*Point, error) { - if src == nil || bytes.Compare(src, []byte("null")) == 0 { - return &Point{Status: Null}, nil - } - - if len(src) < 5 { - return nil, fmt.Errorf("invalid length for point: %v", len(src)) - } - if src[0] == '"' && src[len(src)-1] == '"' { - src = src[1 : len(src)-1] - } - parts := strings.SplitN(string(src[1:len(src)-1]), ",", 2) - if len(parts) < 2 { - return nil, fmt.Errorf("invalid format for point") - } - - x, err := strconv.ParseFloat(parts[0], 64) - if err != nil { - return nil, err - } - - y, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return nil, err - } - - return &Point{P: Vec2{x, y}, Status: Present}, nil -} - -func (dst Point) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Point) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Point) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Point{Status: Null} - return nil - } - - if len(src) < 5 { - return fmt.Errorf("invalid length for point: %v", len(src)) - } - - parts := strings.SplitN(string(src[1:len(src)-1]), ",", 2) - if len(parts) < 2 { - return fmt.Errorf("invalid format for point") - } - - x, err := strconv.ParseFloat(parts[0], 64) - if err != nil { - return err - } - - y, err := strconv.ParseFloat(parts[1], 64) - if err != nil { - return err - } - - *dst = Point{P: Vec2{x, y}, Status: Present} - return nil -} - -func (dst *Point) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Point{Status: Null} - return nil - } - - if len(src) != 16 { - return fmt.Errorf("invalid length for point: %v", len(src)) - } - - x := binary.BigEndian.Uint64(src) - y := binary.BigEndian.Uint64(src[8:]) - - *dst = Point{ - P: Vec2{math.Float64frombits(x), math.Float64frombits(y)}, - Status: Present, - } - return nil -} - -func (src Point) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, fmt.Sprintf(`(%s,%s)`, - strconv.FormatFloat(src.P.X, 'f', -1, 64), - strconv.FormatFloat(src.P.Y, 'f', -1, 64), - )...), nil -} - -func (src Point) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendUint64(buf, math.Float64bits(src.P.X)) - buf = pgio.AppendUint64(buf, math.Float64bits(src.P.Y)) - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Point) Scan(src interface{}) error { - if src == nil { - *dst = Point{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Point) Value() (driver.Value, error) { - return EncodeValueText(src) -} - -func (src Point) MarshalJSON() ([]byte, error) { - switch src.Status { - case Present: - var buff bytes.Buffer - buff.WriteByte('"') - buff.WriteString(fmt.Sprintf("(%g,%g)", src.P.X, src.P.Y)) - buff.WriteByte('"') - return buff.Bytes(), nil - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - return nil, errBadStatus -} - -func (dst *Point) UnmarshalJSON(point []byte) error { - p, err := parsePoint(point) - if err != nil { - return err - } - *dst = *p - return nil -} diff --git a/vendor/github.com/jackc/pgtype/polygon.go b/vendor/github.com/jackc/pgtype/polygon.go deleted file mode 100644 index 207cadc0..00000000 --- a/vendor/github.com/jackc/pgtype/polygon.go +++ /dev/null @@ -1,226 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "math" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -type Polygon struct { - P []Vec2 - Status Status -} - -// Set converts src to dest. -// -// src can be nil, string, []float64, and []pgtype.Vec2. -// -// If src is string the format must be ((x1,y1),(x2,y2),...,(xn,yn)). -// Important that there are no spaces in it. -func (dst *Polygon) Set(src interface{}) error { - if src == nil { - dst.Status = Null - return nil - } - err := fmt.Errorf("cannot convert %v to Polygon", src) - var p *Polygon - switch value := src.(type) { - case string: - p, err = stringToPolygon(value) - case []Vec2: - p = &Polygon{Status: Present, P: value} - err = nil - case []float64: - p, err = float64ToPolygon(value) - default: - return err - } - if err != nil { - return err - } - *dst = *p - return nil -} - -func stringToPolygon(src string) (*Polygon, error) { - p := &Polygon{} - err := p.DecodeText(nil, []byte(src)) - return p, err -} - -func float64ToPolygon(src []float64) (*Polygon, error) { - p := &Polygon{Status: Null} - if len(src) == 0 { - return p, nil - } - if len(src)%2 != 0 { - p.Status = Undefined - return p, fmt.Errorf("invalid length for polygon: %v", len(src)) - } - p.Status = Present - p.P = make([]Vec2, 0) - for i := 0; i < len(src); i += 2 { - p.P = append(p.P, Vec2{X: src[i], Y: src[i+1]}) - } - return p, nil -} - -func (dst Polygon) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Polygon) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Polygon) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Polygon{Status: Null} - return nil - } - - if len(src) < 7 { - return fmt.Errorf("invalid length for Polygon: %v", len(src)) - } - - points := make([]Vec2, 0) - - str := string(src[2:]) - - for { - end := strings.IndexByte(str, ',') - x, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - str = str[end+1:] - end = strings.IndexByte(str, ')') - - y, err := strconv.ParseFloat(str[:end], 64) - if err != nil { - return err - } - - points = append(points, Vec2{x, y}) - - if end+3 < len(str) { - str = str[end+3:] - } else { - break - } - } - - *dst = Polygon{P: points, Status: Present} - return nil -} - -func (dst *Polygon) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Polygon{Status: Null} - return nil - } - - if len(src) < 5 { - return fmt.Errorf("invalid length for Polygon: %v", len(src)) - } - - pointCount := int(binary.BigEndian.Uint32(src)) - rp := 4 - - if 4+pointCount*16 != len(src) { - return fmt.Errorf("invalid length for Polygon with %d points: %v", pointCount, len(src)) - } - - points := make([]Vec2, pointCount) - for i := 0; i < len(points); i++ { - x := binary.BigEndian.Uint64(src[rp:]) - rp += 8 - y := binary.BigEndian.Uint64(src[rp:]) - rp += 8 - points[i] = Vec2{math.Float64frombits(x), math.Float64frombits(y)} - } - - *dst = Polygon{ - P: points, - Status: Present, - } - return nil -} - -func (src Polygon) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, '(') - - for i, p := range src.P { - if i > 0 { - buf = append(buf, ',') - } - buf = append(buf, fmt.Sprintf(`(%s,%s)`, - strconv.FormatFloat(p.X, 'f', -1, 64), - strconv.FormatFloat(p.Y, 'f', -1, 64), - )...) - } - - return append(buf, ')'), nil -} - -func (src Polygon) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendInt32(buf, int32(len(src.P))) - - for _, p := range src.P { - buf = pgio.AppendUint64(buf, math.Float64bits(p.X)) - buf = pgio.AppendUint64(buf, math.Float64bits(p.Y)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Polygon) Scan(src interface{}) error { - if src == nil { - *dst = Polygon{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Polygon) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/qchar.go b/vendor/github.com/jackc/pgtype/qchar.go deleted file mode 100644 index 574f6066..00000000 --- a/vendor/github.com/jackc/pgtype/qchar.go +++ /dev/null @@ -1,152 +0,0 @@ -package pgtype - -import ( - "fmt" - "math" - "strconv" -) - -// QChar is for PostgreSQL's special 8-bit-only "char" type more akin to the C -// language's char type, or Go's byte type. (Note that the name in PostgreSQL -// itself is "char", in double-quotes, and not char.) It gets used a lot in -// PostgreSQL's system tables to hold a single ASCII character value (eg -// pg_class.relkind). It is named Qchar for quoted char to disambiguate from SQL -// standard type char. -// -// Not all possible values of QChar are representable in the text format. -// Therefore, QChar does not implement TextEncoder and TextDecoder. In -// addition, database/sql Scanner and database/sql/driver Value are not -// implemented. -type QChar struct { - Int int8 - Status Status -} - -func (dst *QChar) Set(src interface{}) error { - if src == nil { - *dst = QChar{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case int8: - *dst = QChar{Int: value, Status: Present} - case uint8: - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case int16: - if value < math.MinInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case uint16: - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case int32: - if value < math.MinInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case uint32: - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case int64: - if value < math.MinInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case uint64: - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case int: - if value < math.MinInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case uint: - if value > math.MaxInt8 { - return fmt.Errorf("%d is greater than maximum value for QChar", value) - } - *dst = QChar{Int: int8(value), Status: Present} - case string: - num, err := strconv.ParseInt(value, 10, 8) - if err != nil { - return err - } - *dst = QChar{Int: int8(num), Status: Present} - default: - if originalSrc, ok := underlyingNumberType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to QChar", value) - } - - return nil -} - -func (dst QChar) Get() interface{} { - switch dst.Status { - case Present: - return dst.Int - case Null: - return nil - default: - return dst.Status - } -} - -func (src *QChar) AssignTo(dst interface{}) error { - return int64AssignTo(int64(src.Int), src.Status, dst) -} - -func (dst *QChar) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = QChar{Status: Null} - return nil - } - - if len(src) != 1 { - return fmt.Errorf(`invalid length for "char": %v`, len(src)) - } - - *dst = QChar{Int: int8(src[0]), Status: Present} - return nil -} - -func (src QChar) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, byte(src.Int)), nil -} diff --git a/vendor/github.com/jackc/pgtype/record.go b/vendor/github.com/jackc/pgtype/record.go deleted file mode 100644 index 5cf2c93a..00000000 --- a/vendor/github.com/jackc/pgtype/record.go +++ /dev/null @@ -1,126 +0,0 @@ -package pgtype - -import ( - "fmt" - "reflect" -) - -// Record is the generic PostgreSQL record type such as is created with the -// "row" function. Record only implements BinaryDecoder and Value. The text -// format output format from PostgreSQL does not include type information and is -// therefore impossible to decode. No encoders are implemented because -// PostgreSQL does not support input of generic records. -type Record struct { - Fields []Value - Status Status -} - -func (dst *Record) Set(src interface{}) error { - if src == nil { - *dst = Record{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case []Value: - *dst = Record{Fields: value, Status: Present} - default: - return fmt.Errorf("cannot convert %v to Record", src) - } - - return nil -} - -func (dst Record) Get() interface{} { - switch dst.Status { - case Present: - return dst.Fields - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Record) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *[]Value: - *v = make([]Value, len(src.Fields)) - copy(*v, src.Fields) - return nil - case *[]interface{}: - *v = make([]interface{}, len(src.Fields)) - for i := range *v { - (*v)[i] = src.Fields[i].Get() - } - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func prepareNewBinaryDecoder(ci *ConnInfo, fieldOID uint32, v *Value) (BinaryDecoder, error) { - var binaryDecoder BinaryDecoder - - if dt, ok := ci.DataTypeForOID(fieldOID); ok { - binaryDecoder, _ = dt.Value.(BinaryDecoder) - } else { - return nil, fmt.Errorf("unknown oid while decoding record: %v", fieldOID) - } - - if binaryDecoder == nil { - return nil, fmt.Errorf("no binary decoder registered for: %v", fieldOID) - } - - // Duplicate struct to scan into - binaryDecoder = reflect.New(reflect.ValueOf(binaryDecoder).Elem().Type()).Interface().(BinaryDecoder) - *v = binaryDecoder.(Value) - return binaryDecoder, nil -} - -func (dst *Record) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Record{Status: Null} - return nil - } - - scanner := NewCompositeBinaryScanner(ci, src) - - fields := make([]Value, scanner.FieldCount()) - - for i := 0; scanner.Next(); i++ { - binaryDecoder, err := prepareNewBinaryDecoder(ci, scanner.OID(), &fields[i]) - if err != nil { - return err - } - - if err = binaryDecoder.DecodeBinary(ci, scanner.Bytes()); err != nil { - return err - } - } - - if scanner.Err() != nil { - return scanner.Err() - } - - *dst = Record{Fields: fields, Status: Present} - - return nil -} diff --git a/vendor/github.com/jackc/pgtype/record_array.go b/vendor/github.com/jackc/pgtype/record_array.go deleted file mode 100644 index 2271717a..00000000 --- a/vendor/github.com/jackc/pgtype/record_array.go +++ /dev/null @@ -1,318 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "encoding/binary" - "fmt" - "reflect" -) - -type RecordArray struct { - Elements []Record - Dimensions []ArrayDimension - Status Status -} - -func (dst *RecordArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = RecordArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case [][]Value: - if value == nil { - *dst = RecordArray{Status: Null} - } else if len(value) == 0 { - *dst = RecordArray{Status: Present} - } else { - elements := make([]Record, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = RecordArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Record: - if value == nil { - *dst = RecordArray{Status: Null} - } else if len(value) == 0 { - *dst = RecordArray{Status: Present} - } else { - *dst = RecordArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = RecordArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for RecordArray", src) - } - if elementsLength == 0 { - *dst = RecordArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to RecordArray", src) - } - - *dst = RecordArray{ - Elements: make([]Record, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Record, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to RecordArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *RecordArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to RecordArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in RecordArray", err) - } - index++ - - return index, nil -} - -func (dst RecordArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *RecordArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[][]Value: - *v = make([][]Value, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *RecordArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from RecordArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from RecordArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *RecordArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = RecordArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = RecordArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Record, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = RecordArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} diff --git a/vendor/github.com/jackc/pgtype/text.go b/vendor/github.com/jackc/pgtype/text.go deleted file mode 100644 index a01815d9..00000000 --- a/vendor/github.com/jackc/pgtype/text.go +++ /dev/null @@ -1,212 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/json" - "fmt" -) - -type Text struct { - String string - Status Status -} - -func (dst *Text) Set(src interface{}) error { - if src == nil { - *dst = Text{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case string: - *dst = Text{String: value, Status: Present} - case *string: - if value == nil { - *dst = Text{Status: Null} - } else { - *dst = Text{String: *value, Status: Present} - } - case []byte: - if value == nil { - *dst = Text{Status: Null} - } else { - *dst = Text{String: string(value), Status: Present} - } - case fmt.Stringer: - if value == fmt.Stringer(nil) { - *dst = Text{Status: Null} - } else { - *dst = Text{String: value.String(), Status: Present} - } - default: - // Cannot be part of the switch: If Value() returns nil on - // non-string, we should still try to checks the underlying type - // using reflection. - // - // For example the struct might implement driver.Valuer with - // pointer receiver and fmt.Stringer with value receiver. - if value, ok := src.(driver.Valuer); ok { - if value == driver.Valuer(nil) { - *dst = Text{Status: Null} - return nil - } else { - v, err := value.Value() - if err != nil { - return fmt.Errorf("driver.Valuer Value() method failed: %w", err) - } - - // Handles also v == nil case. - if s, ok := v.(string); ok { - *dst = Text{String: s, Status: Present} - return nil - } - } - } - - if originalSrc, ok := underlyingStringType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Text", value) - } - - return nil -} - -func (dst Text) Get() interface{} { - switch dst.Status { - case Present: - return dst.String - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Text) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *string: - *v = src.String - return nil - case *[]byte: - *v = make([]byte, len(src.String)) - copy(*v, src.String) - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (Text) PreferredResultFormat() int16 { - return TextFormatCode -} - -func (dst *Text) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Text{Status: Null} - return nil - } - - *dst = Text{String: string(src), Status: Present} - return nil -} - -func (dst *Text) DecodeBinary(ci *ConnInfo, src []byte) error { - return dst.DecodeText(ci, src) -} - -func (Text) PreferredParamFormat() int16 { - return TextFormatCode -} - -func (src Text) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.String...), nil -} - -func (src Text) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return src.EncodeText(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *Text) Scan(src interface{}) error { - if src == nil { - *dst = Text{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Text) Value() (driver.Value, error) { - switch src.Status { - case Present: - return src.String, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func (src Text) MarshalJSON() ([]byte, error) { - switch src.Status { - case Present: - return json.Marshal(src.String) - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - - return nil, errBadStatus -} - -func (dst *Text) UnmarshalJSON(b []byte) error { - var s *string - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - if s == nil { - *dst = Text{Status: Null} - } else { - *dst = Text{String: *s, Status: Present} - } - - return nil -} diff --git a/vendor/github.com/jackc/pgtype/text_array.go b/vendor/github.com/jackc/pgtype/text_array.go deleted file mode 100644 index 2461966b..00000000 --- a/vendor/github.com/jackc/pgtype/text_array.go +++ /dev/null @@ -1,517 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type TextArray struct { - Elements []Text - Dimensions []ArrayDimension - Status Status -} - -func (dst *TextArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = TextArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []string: - if value == nil { - *dst = TextArray{Status: Null} - } else if len(value) == 0 { - *dst = TextArray{Status: Present} - } else { - elements := make([]Text, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = TextArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*string: - if value == nil { - *dst = TextArray{Status: Null} - } else if len(value) == 0 { - *dst = TextArray{Status: Present} - } else { - elements := make([]Text, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = TextArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Text: - if value == nil { - *dst = TextArray{Status: Null} - } else if len(value) == 0 { - *dst = TextArray{Status: Present} - } else { - *dst = TextArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = TextArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for TextArray", src) - } - if elementsLength == 0 { - *dst = TextArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to TextArray", src) - } - - *dst = TextArray{ - Elements: make([]Text, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Text, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to TextArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *TextArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to TextArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in TextArray", err) - } - index++ - - return index, nil -} - -func (dst TextArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *TextArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]string: - *v = make([]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*string: - *v = make([]*string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *TextArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from TextArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from TextArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *TextArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TextArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Text - - if len(uta.Elements) > 0 { - elements = make([]Text, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Text - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = TextArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *TextArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TextArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = TextArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Text, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = TextArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src TextArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src TextArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("text"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "text") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *TextArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src TextArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/tid.go b/vendor/github.com/jackc/pgtype/tid.go deleted file mode 100644 index 4bb57f64..00000000 --- a/vendor/github.com/jackc/pgtype/tid.go +++ /dev/null @@ -1,156 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "strconv" - "strings" - - "github.com/jackc/pgio" -) - -// TID is PostgreSQL's Tuple Identifier type. -// -// When one does -// -// select ctid, * from some_table; -// -// it is the data type of the ctid hidden system column. -// -// It is currently implemented as a pair unsigned two byte integers. -// Its conversion functions can be found in src/backend/utils/adt/tid.c -// in the PostgreSQL sources. -type TID struct { - BlockNumber uint32 - OffsetNumber uint16 - Status Status -} - -func (dst *TID) Set(src interface{}) error { - return fmt.Errorf("cannot convert %v to TID", src) -} - -func (dst TID) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *TID) AssignTo(dst interface{}) error { - if src.Status == Present { - switch v := dst.(type) { - case *string: - *v = fmt.Sprintf(`(%d,%d)`, src.BlockNumber, src.OffsetNumber) - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - } - - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *TID) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TID{Status: Null} - return nil - } - - if len(src) < 5 { - return fmt.Errorf("invalid length for tid: %v", len(src)) - } - - parts := strings.SplitN(string(src[1:len(src)-1]), ",", 2) - if len(parts) < 2 { - return fmt.Errorf("invalid format for tid") - } - - blockNumber, err := strconv.ParseUint(parts[0], 10, 32) - if err != nil { - return err - } - - offsetNumber, err := strconv.ParseUint(parts[1], 10, 16) - if err != nil { - return err - } - - *dst = TID{BlockNumber: uint32(blockNumber), OffsetNumber: uint16(offsetNumber), Status: Present} - return nil -} - -func (dst *TID) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TID{Status: Null} - return nil - } - - if len(src) != 6 { - return fmt.Errorf("invalid length for tid: %v", len(src)) - } - - *dst = TID{ - BlockNumber: binary.BigEndian.Uint32(src), - OffsetNumber: binary.BigEndian.Uint16(src[4:]), - Status: Present, - } - return nil -} - -func (src TID) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, fmt.Sprintf(`(%d,%d)`, src.BlockNumber, src.OffsetNumber)...) - return buf, nil -} - -func (src TID) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendUint32(buf, src.BlockNumber) - buf = pgio.AppendUint16(buf, src.OffsetNumber) - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *TID) Scan(src interface{}) error { - if src == nil { - *dst = TID{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src TID) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/time.go b/vendor/github.com/jackc/pgtype/time.go deleted file mode 100644 index f7a28870..00000000 --- a/vendor/github.com/jackc/pgtype/time.go +++ /dev/null @@ -1,231 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "strconv" - "time" - - "github.com/jackc/pgio" -) - -// Time represents the PostgreSQL time type. The PostgreSQL time is a time of day without time zone. -// -// Time is represented as the number of microseconds since midnight in the same way that PostgreSQL does. Other time -// and date types in pgtype can use time.Time as the underlying representation. However, pgtype.Time type cannot due -// to needing to handle 24:00:00. time.Time converts that to 00:00:00 on the following day. -type Time struct { - Microseconds int64 // Number of microseconds since midnight - Status Status -} - -// Set converts src into a Time and stores in dst. -func (dst *Time) Set(src interface{}) error { - if src == nil { - *dst = Time{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case time.Time: - usec := int64(value.Hour())*microsecondsPerHour + - int64(value.Minute())*microsecondsPerMinute + - int64(value.Second())*microsecondsPerSecond + - int64(value.Nanosecond())/1000 - *dst = Time{Microseconds: usec, Status: Present} - case *time.Time: - if value == nil { - *dst = Time{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingTimeType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Time", value) - } - - return nil -} - -func (dst Time) Get() interface{} { - switch dst.Status { - case Present: - return dst.Microseconds - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Time) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *time.Time: - // 24:00:00 is max allowed time in PostgreSQL, but time.Time will normalize that to 00:00:00 the next day. - var maxRepresentableByTime int64 = 24*60*60*1000000 - 1 - if src.Microseconds > maxRepresentableByTime { - return fmt.Errorf("%d microseconds cannot be represented as time.Time", src.Microseconds) - } - - usec := src.Microseconds - hours := usec / microsecondsPerHour - usec -= hours * microsecondsPerHour - minutes := usec / microsecondsPerMinute - usec -= minutes * microsecondsPerMinute - seconds := usec / microsecondsPerSecond - usec -= seconds * microsecondsPerSecond - ns := usec * 1000 - *v = time.Date(2000, 1, 1, int(hours), int(minutes), int(seconds), int(ns), time.UTC) - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -// DecodeText decodes from src into dst. -func (dst *Time) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Time{Status: Null} - return nil - } - - s := string(src) - - if len(s) < 8 { - return fmt.Errorf("cannot decode %v into Time", s) - } - - hours, err := strconv.ParseInt(s[0:2], 10, 64) - if err != nil { - return fmt.Errorf("cannot decode %v into Time", s) - } - usec := hours * microsecondsPerHour - - minutes, err := strconv.ParseInt(s[3:5], 10, 64) - if err != nil { - return fmt.Errorf("cannot decode %v into Time", s) - } - usec += minutes * microsecondsPerMinute - - seconds, err := strconv.ParseInt(s[6:8], 10, 64) - if err != nil { - return fmt.Errorf("cannot decode %v into Time", s) - } - usec += seconds * microsecondsPerSecond - - if len(s) > 9 { - fraction := s[9:] - n, err := strconv.ParseInt(fraction, 10, 64) - if err != nil { - return fmt.Errorf("cannot decode %v into Time", s) - } - - for i := len(fraction); i < 6; i++ { - n *= 10 - } - - usec += n - } - - *dst = Time{Microseconds: usec, Status: Present} - - return nil -} - -// DecodeBinary decodes from src into dst. -func (dst *Time) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Time{Status: Null} - return nil - } - - if len(src) != 8 { - return fmt.Errorf("invalid length for time: %v", len(src)) - } - - usec := int64(binary.BigEndian.Uint64(src)) - *dst = Time{Microseconds: usec, Status: Present} - - return nil -} - -// EncodeText writes the text encoding of src into w. -func (src Time) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - usec := src.Microseconds - hours := usec / microsecondsPerHour - usec -= hours * microsecondsPerHour - minutes := usec / microsecondsPerMinute - usec -= minutes * microsecondsPerMinute - seconds := usec / microsecondsPerSecond - usec -= seconds * microsecondsPerSecond - - s := fmt.Sprintf("%02d:%02d:%02d.%06d", hours, minutes, seconds, usec) - - return append(buf, s...), nil -} - -// EncodeBinary writes the binary encoding of src into w. If src.Time is not in -// the UTC time zone it returns an error. -func (src Time) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return pgio.AppendInt64(buf, src.Microseconds), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Time) Scan(src interface{}) error { - if src == nil { - *dst = Time{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - case time.Time: - return dst.Set(src) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Time) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/timestamp.go b/vendor/github.com/jackc/pgtype/timestamp.go deleted file mode 100644 index fce490c8..00000000 --- a/vendor/github.com/jackc/pgtype/timestamp.go +++ /dev/null @@ -1,261 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "strings" - "time" - - "github.com/jackc/pgio" -) - -const pgTimestampFormat = "2006-01-02 15:04:05.999999999" - -// Timestamp represents the PostgreSQL timestamp type. The PostgreSQL -// timestamp does not have a time zone. This presents a problem when -// translating to and from time.Time which requires a time zone. It is highly -// recommended to use timestamptz whenever possible. Timestamp methods either -// convert to UTC or return an error on non-UTC times. -type Timestamp struct { - Time time.Time // Time must always be in UTC. - Status Status - InfinityModifier InfinityModifier -} - -// Set converts src into a Timestamp and stores in dst. If src is a -// time.Time in a non-UTC time zone, the time zone is discarded. -func (dst *Timestamp) Set(src interface{}) error { - if src == nil { - *dst = Timestamp{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case time.Time: - *dst = Timestamp{Time: time.Date(value.Year(), value.Month(), value.Day(), value.Hour(), value.Minute(), value.Second(), value.Nanosecond(), time.UTC), Status: Present} - case *time.Time: - if value == nil { - *dst = Timestamp{Status: Null} - } else { - return dst.Set(*value) - } - case string: - return dst.DecodeText(nil, []byte(value)) - case *string: - if value == nil { - *dst = Timestamp{Status: Null} - } else { - return dst.Set(*value) - } - case InfinityModifier: - *dst = Timestamp{InfinityModifier: value, Status: Present} - default: - if originalSrc, ok := underlyingTimeType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Timestamp", value) - } - - return nil -} - -func (dst Timestamp) Get() interface{} { - switch dst.Status { - case Present: - if dst.InfinityModifier != None { - return dst.InfinityModifier - } - return dst.Time - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Timestamp) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *time.Time: - if src.InfinityModifier != None { - return fmt.Errorf("cannot assign %v to %T", src, dst) - } - *v = src.Time - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -// DecodeText decodes from src into dst. The decoded time is considered to -// be in UTC. -func (dst *Timestamp) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Timestamp{Status: Null} - return nil - } - - sbuf := string(src) - switch sbuf { - case "infinity": - *dst = Timestamp{Status: Present, InfinityModifier: Infinity} - case "-infinity": - *dst = Timestamp{Status: Present, InfinityModifier: -Infinity} - default: - if strings.HasSuffix(sbuf, " BC") { - t, err := time.Parse(pgTimestampFormat, strings.TrimRight(sbuf, " BC")) - t2 := time.Date(1-t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), t.Location()) - if err != nil { - return err - } - *dst = Timestamp{Time: t2, Status: Present} - return nil - } - tim, err := time.Parse(pgTimestampFormat, sbuf) - if err != nil { - return err - } - - *dst = Timestamp{Time: tim, Status: Present} - } - - return nil -} - -// DecodeBinary decodes from src into dst. The decoded time is considered to -// be in UTC. -func (dst *Timestamp) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Timestamp{Status: Null} - return nil - } - - if len(src) != 8 { - return fmt.Errorf("invalid length for timestamp: %v", len(src)) - } - - microsecSinceY2K := int64(binary.BigEndian.Uint64(src)) - - switch microsecSinceY2K { - case infinityMicrosecondOffset: - *dst = Timestamp{Status: Present, InfinityModifier: Infinity} - case negativeInfinityMicrosecondOffset: - *dst = Timestamp{Status: Present, InfinityModifier: -Infinity} - default: - tim := time.Unix( - microsecFromUnixEpochToY2K/1000000+microsecSinceY2K/1000000, - (microsecFromUnixEpochToY2K%1000000*1000)+(microsecSinceY2K%1000000*1000), - ).UTC() - *dst = Timestamp{Time: tim, Status: Present} - } - - return nil -} - -// EncodeText writes the text encoding of src into w. If src.Time is not in -// the UTC time zone it returns an error. -func (src Timestamp) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - if src.Time.Location() != time.UTC { - return nil, fmt.Errorf("cannot encode non-UTC time into timestamp") - } - - var s string - - switch src.InfinityModifier { - case None: - s = src.Time.Truncate(time.Microsecond).Format(pgTimestampFormat) - case Infinity: - s = "infinity" - case NegativeInfinity: - s = "-infinity" - } - - return append(buf, s...), nil -} - -// EncodeBinary writes the binary encoding of src into w. If src.Time is not in -// the UTC time zone it returns an error. -func (src Timestamp) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - if src.Time.Location() != time.UTC { - return nil, fmt.Errorf("cannot encode non-UTC time into timestamp") - } - - var microsecSinceY2K int64 - switch src.InfinityModifier { - case None: - microsecSinceUnixEpoch := src.Time.Unix()*1000000 + int64(src.Time.Nanosecond())/1000 - microsecSinceY2K = microsecSinceUnixEpoch - microsecFromUnixEpochToY2K - case Infinity: - microsecSinceY2K = infinityMicrosecondOffset - case NegativeInfinity: - microsecSinceY2K = negativeInfinityMicrosecondOffset - } - - return pgio.AppendInt64(buf, microsecSinceY2K), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Timestamp) Scan(src interface{}) error { - if src == nil { - *dst = Timestamp{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - case time.Time: - *dst = Timestamp{Time: src, Status: Present} - return nil - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Timestamp) Value() (driver.Value, error) { - switch src.Status { - case Present: - if src.InfinityModifier != None { - return src.InfinityModifier.String(), nil - } - return src.Time, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} diff --git a/vendor/github.com/jackc/pgtype/timestamp_array.go b/vendor/github.com/jackc/pgtype/timestamp_array.go deleted file mode 100644 index e12481e3..00000000 --- a/vendor/github.com/jackc/pgtype/timestamp_array.go +++ /dev/null @@ -1,518 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - "time" - - "github.com/jackc/pgio" -) - -type TimestampArray struct { - Elements []Timestamp - Dimensions []ArrayDimension - Status Status -} - -func (dst *TimestampArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = TimestampArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []time.Time: - if value == nil { - *dst = TimestampArray{Status: Null} - } else if len(value) == 0 { - *dst = TimestampArray{Status: Present} - } else { - elements := make([]Timestamp, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = TimestampArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*time.Time: - if value == nil { - *dst = TimestampArray{Status: Null} - } else if len(value) == 0 { - *dst = TimestampArray{Status: Present} - } else { - elements := make([]Timestamp, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = TimestampArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Timestamp: - if value == nil { - *dst = TimestampArray{Status: Null} - } else if len(value) == 0 { - *dst = TimestampArray{Status: Present} - } else { - *dst = TimestampArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = TimestampArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for TimestampArray", src) - } - if elementsLength == 0 { - *dst = TimestampArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to TimestampArray", src) - } - - *dst = TimestampArray{ - Elements: make([]Timestamp, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Timestamp, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to TimestampArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *TimestampArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to TimestampArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in TimestampArray", err) - } - index++ - - return index, nil -} - -func (dst TimestampArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *TimestampArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]time.Time: - *v = make([]time.Time, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*time.Time: - *v = make([]*time.Time, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *TimestampArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from TimestampArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from TimestampArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *TimestampArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TimestampArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Timestamp - - if len(uta.Elements) > 0 { - elements = make([]Timestamp, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Timestamp - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = TimestampArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *TimestampArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TimestampArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = TimestampArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Timestamp, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = TimestampArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src TimestampArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src TimestampArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("timestamp"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "timestamp") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *TimestampArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src TimestampArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/timestamptz.go b/vendor/github.com/jackc/pgtype/timestamptz.go deleted file mode 100644 index 72ae4991..00000000 --- a/vendor/github.com/jackc/pgtype/timestamptz.go +++ /dev/null @@ -1,322 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "encoding/json" - "fmt" - "time" - - "github.com/jackc/pgio" -) - -const pgTimestamptzHourFormat = "2006-01-02 15:04:05.999999999Z07" -const pgTimestamptzMinuteFormat = "2006-01-02 15:04:05.999999999Z07:00" -const pgTimestamptzSecondFormat = "2006-01-02 15:04:05.999999999Z07:00:00" -const microsecFromUnixEpochToY2K = 946684800 * 1000000 - -const ( - negativeInfinityMicrosecondOffset = -9223372036854775808 - infinityMicrosecondOffset = 9223372036854775807 -) - -type Timestamptz struct { - Time time.Time - Status Status - InfinityModifier InfinityModifier -} - -func (dst *Timestamptz) Set(src interface{}) error { - if src == nil { - *dst = Timestamptz{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - switch value := src.(type) { - case time.Time: - *dst = Timestamptz{Time: value, Status: Present} - case *time.Time: - if value == nil { - *dst = Timestamptz{Status: Null} - } else { - return dst.Set(*value) - } - case string: - return dst.DecodeText(nil, []byte(value)) - case *string: - if value == nil { - *dst = Timestamptz{Status: Null} - } else { - return dst.Set(*value) - } - case InfinityModifier: - *dst = Timestamptz{InfinityModifier: value, Status: Present} - default: - if originalSrc, ok := underlyingTimeType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to Timestamptz", value) - } - - return nil -} - -func (dst Timestamptz) Get() interface{} { - switch dst.Status { - case Present: - if dst.InfinityModifier != None { - return dst.InfinityModifier - } - return dst.Time - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Timestamptz) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *time.Time: - if src.InfinityModifier != None { - return fmt.Errorf("cannot assign %v to %T", src, dst) - } - *v = src.Time - return nil - default: - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - return fmt.Errorf("unable to assign to %T", dst) - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (dst *Timestamptz) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Timestamptz{Status: Null} - return nil - } - - sbuf := string(src) - switch sbuf { - case "infinity": - *dst = Timestamptz{Status: Present, InfinityModifier: Infinity} - case "-infinity": - *dst = Timestamptz{Status: Present, InfinityModifier: -Infinity} - default: - var format string - if len(sbuf) >= 9 && (sbuf[len(sbuf)-9] == '-' || sbuf[len(sbuf)-9] == '+') { - format = pgTimestamptzSecondFormat - } else if len(sbuf) >= 6 && (sbuf[len(sbuf)-6] == '-' || sbuf[len(sbuf)-6] == '+') { - format = pgTimestamptzMinuteFormat - } else { - format = pgTimestamptzHourFormat - } - - tim, err := time.Parse(format, sbuf) - if err != nil { - return err - } - - *dst = Timestamptz{Time: normalizePotentialUTC(tim), Status: Present} - } - - return nil -} - -func (dst *Timestamptz) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Timestamptz{Status: Null} - return nil - } - - if len(src) != 8 { - return fmt.Errorf("invalid length for timestamptz: %v", len(src)) - } - - microsecSinceY2K := int64(binary.BigEndian.Uint64(src)) - - switch microsecSinceY2K { - case infinityMicrosecondOffset: - *dst = Timestamptz{Status: Present, InfinityModifier: Infinity} - case negativeInfinityMicrosecondOffset: - *dst = Timestamptz{Status: Present, InfinityModifier: -Infinity} - default: - tim := time.Unix( - microsecFromUnixEpochToY2K/1000000+microsecSinceY2K/1000000, - (microsecFromUnixEpochToY2K%1000000*1000)+(microsecSinceY2K%1000000*1000), - ) - *dst = Timestamptz{Time: tim, Status: Present} - } - - return nil -} - -func (src Timestamptz) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var s string - - switch src.InfinityModifier { - case None: - s = src.Time.UTC().Truncate(time.Microsecond).Format(pgTimestamptzSecondFormat) - case Infinity: - s = "infinity" - case NegativeInfinity: - s = "-infinity" - } - - return append(buf, s...), nil -} - -func (src Timestamptz) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var microsecSinceY2K int64 - switch src.InfinityModifier { - case None: - microsecSinceUnixEpoch := src.Time.Unix()*1000000 + int64(src.Time.Nanosecond())/1000 - microsecSinceY2K = microsecSinceUnixEpoch - microsecFromUnixEpochToY2K - case Infinity: - microsecSinceY2K = infinityMicrosecondOffset - case NegativeInfinity: - microsecSinceY2K = negativeInfinityMicrosecondOffset - } - - return pgio.AppendInt64(buf, microsecSinceY2K), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Timestamptz) Scan(src interface{}) error { - if src == nil { - *dst = Timestamptz{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - case time.Time: - *dst = Timestamptz{Time: src, Status: Present} - return nil - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Timestamptz) Value() (driver.Value, error) { - switch src.Status { - case Present: - if src.InfinityModifier != None { - return src.InfinityModifier.String(), nil - } - if src.Time.Location().String() == time.UTC.String() { - return src.Time.UTC(), nil - } - return src.Time, nil - case Null: - return nil, nil - default: - return nil, errUndefined - } -} - -func (src Timestamptz) MarshalJSON() ([]byte, error) { - switch src.Status { - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - - if src.Status != Present { - return nil, errBadStatus - } - - var s string - - switch src.InfinityModifier { - case None: - s = src.Time.Format(time.RFC3339Nano) - case Infinity: - s = "infinity" - case NegativeInfinity: - s = "-infinity" - } - - return json.Marshal(s) -} - -func (dst *Timestamptz) UnmarshalJSON(b []byte) error { - var s *string - err := json.Unmarshal(b, &s) - if err != nil { - return err - } - - if s == nil { - *dst = Timestamptz{Status: Null} - return nil - } - - switch *s { - case "infinity": - *dst = Timestamptz{Status: Present, InfinityModifier: Infinity} - case "-infinity": - *dst = Timestamptz{Status: Present, InfinityModifier: -Infinity} - default: - // PostgreSQL uses ISO 8601 for to_json function and casting from a string to timestamptz - tim, err := time.Parse(time.RFC3339Nano, *s) - if err != nil { - return err - } - - *dst = Timestamptz{Time: normalizePotentialUTC(tim), Status: Present} - } - - return nil -} - -// Normalize timestamps in UTC location to behave similarly to how the Golang -// standard library does it: UTC timestamps lack a .loc value. -// -// Reason for this: when comparing two timestamps with reflect.DeepEqual (generally -// speaking not a good idea, but several testing libraries (for example testify) -// does this), their location data needs to be equal for them to be considered -// equal. -func normalizePotentialUTC(timestamp time.Time) time.Time { - if timestamp.Location().String() != time.UTC.String() { - return timestamp - } - - return timestamp.UTC() -} diff --git a/vendor/github.com/jackc/pgtype/timestamptz_array.go b/vendor/github.com/jackc/pgtype/timestamptz_array.go deleted file mode 100644 index a3b4b263..00000000 --- a/vendor/github.com/jackc/pgtype/timestamptz_array.go +++ /dev/null @@ -1,518 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - "time" - - "github.com/jackc/pgio" -) - -type TimestamptzArray struct { - Elements []Timestamptz - Dimensions []ArrayDimension - Status Status -} - -func (dst *TimestamptzArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = TimestamptzArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []time.Time: - if value == nil { - *dst = TimestamptzArray{Status: Null} - } else if len(value) == 0 { - *dst = TimestamptzArray{Status: Present} - } else { - elements := make([]Timestamptz, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = TimestamptzArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*time.Time: - if value == nil { - *dst = TimestamptzArray{Status: Null} - } else if len(value) == 0 { - *dst = TimestamptzArray{Status: Present} - } else { - elements := make([]Timestamptz, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = TimestamptzArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Timestamptz: - if value == nil { - *dst = TimestamptzArray{Status: Null} - } else if len(value) == 0 { - *dst = TimestamptzArray{Status: Present} - } else { - *dst = TimestamptzArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = TimestamptzArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for TimestamptzArray", src) - } - if elementsLength == 0 { - *dst = TimestamptzArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to TimestamptzArray", src) - } - - *dst = TimestamptzArray{ - Elements: make([]Timestamptz, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Timestamptz, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to TimestamptzArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *TimestamptzArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to TimestamptzArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in TimestamptzArray", err) - } - index++ - - return index, nil -} - -func (dst TimestamptzArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *TimestamptzArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]time.Time: - *v = make([]time.Time, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*time.Time: - *v = make([]*time.Time, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *TimestamptzArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from TimestamptzArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from TimestamptzArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *TimestamptzArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TimestamptzArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Timestamptz - - if len(uta.Elements) > 0 { - elements = make([]Timestamptz, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Timestamptz - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = TimestamptzArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *TimestamptzArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TimestamptzArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = TimestamptzArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Timestamptz, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = TimestamptzArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src TimestamptzArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src TimestamptzArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("timestamptz"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "timestamptz") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *TimestamptzArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src TimestamptzArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/tsrange.go b/vendor/github.com/jackc/pgtype/tsrange.go deleted file mode 100644 index 19ecf446..00000000 --- a/vendor/github.com/jackc/pgtype/tsrange.go +++ /dev/null @@ -1,267 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" - - "github.com/jackc/pgio" -) - -type Tsrange struct { - Lower Timestamp - Upper Timestamp - LowerType BoundType - UpperType BoundType - Status Status -} - -func (dst *Tsrange) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Tsrange{Status: Null} - return nil - } - - switch value := src.(type) { - case Tsrange: - *dst = value - case *Tsrange: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - default: - return fmt.Errorf("cannot convert %v to Tsrange", src) - } - - return nil -} - -func (dst Tsrange) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Tsrange) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Tsrange) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Tsrange{Status: Null} - return nil - } - - utr, err := ParseUntypedTextRange(string(src)) - if err != nil { - return err - } - - *dst = Tsrange{Status: Present} - - dst.LowerType = utr.LowerType - dst.UpperType = utr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeText(ci, []byte(utr.Lower)); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeText(ci, []byte(utr.Upper)); err != nil { - return err - } - } - - return nil -} - -func (dst *Tsrange) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Tsrange{Status: Null} - return nil - } - - ubr, err := ParseUntypedBinaryRange(src) - if err != nil { - return err - } - - *dst = Tsrange{Status: Present} - - dst.LowerType = ubr.LowerType - dst.UpperType = ubr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeBinary(ci, ubr.Lower); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeBinary(ci, ubr.Upper); err != nil { - return err - } - } - - return nil -} - -func (src Tsrange) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - switch src.LowerType { - case Exclusive, Unbounded: - buf = append(buf, '(') - case Inclusive: - buf = append(buf, '[') - case Empty: - return append(buf, "empty"...), nil - default: - return nil, fmt.Errorf("unknown lower bound type %v", src.LowerType) - } - - var err error - - if src.LowerType != Unbounded { - buf, err = src.Lower.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - } - - buf = append(buf, ',') - - if src.UpperType != Unbounded { - buf, err = src.Upper.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - } - - switch src.UpperType { - case Exclusive, Unbounded: - buf = append(buf, ')') - case Inclusive: - buf = append(buf, ']') - default: - return nil, fmt.Errorf("unknown upper bound type %v", src.UpperType) - } - - return buf, nil -} - -func (src Tsrange) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var rangeType byte - switch src.LowerType { - case Inclusive: - rangeType |= lowerInclusiveMask - case Unbounded: - rangeType |= lowerUnboundedMask - case Exclusive: - case Empty: - return append(buf, emptyMask), nil - default: - return nil, fmt.Errorf("unknown LowerType: %v", src.LowerType) - } - - switch src.UpperType { - case Inclusive: - rangeType |= upperInclusiveMask - case Unbounded: - rangeType |= upperUnboundedMask - case Exclusive: - default: - return nil, fmt.Errorf("unknown UpperType: %v", src.UpperType) - } - - buf = append(buf, rangeType) - - var err error - - if src.LowerType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Lower.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - if src.UpperType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Upper.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Tsrange) Scan(src interface{}) error { - if src == nil { - *dst = Tsrange{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Tsrange) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/tsrange_array.go b/vendor/github.com/jackc/pgtype/tsrange_array.go deleted file mode 100644 index c64048eb..00000000 --- a/vendor/github.com/jackc/pgtype/tsrange_array.go +++ /dev/null @@ -1,470 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type TsrangeArray struct { - Elements []Tsrange - Dimensions []ArrayDimension - Status Status -} - -func (dst *TsrangeArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = TsrangeArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []Tsrange: - if value == nil { - *dst = TsrangeArray{Status: Null} - } else if len(value) == 0 { - *dst = TsrangeArray{Status: Present} - } else { - *dst = TsrangeArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = TsrangeArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for TsrangeArray", src) - } - if elementsLength == 0 { - *dst = TsrangeArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to TsrangeArray", src) - } - - *dst = TsrangeArray{ - Elements: make([]Tsrange, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Tsrange, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to TsrangeArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *TsrangeArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to TsrangeArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in TsrangeArray", err) - } - index++ - - return index, nil -} - -func (dst TsrangeArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *TsrangeArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]Tsrange: - *v = make([]Tsrange, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *TsrangeArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from TsrangeArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from TsrangeArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *TsrangeArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TsrangeArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Tsrange - - if len(uta.Elements) > 0 { - elements = make([]Tsrange, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Tsrange - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = TsrangeArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *TsrangeArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TsrangeArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = TsrangeArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Tsrange, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = TsrangeArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src TsrangeArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src TsrangeArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("tsrange"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "tsrange") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *TsrangeArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src TsrangeArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/tstzrange.go b/vendor/github.com/jackc/pgtype/tstzrange.go deleted file mode 100644 index 25576308..00000000 --- a/vendor/github.com/jackc/pgtype/tstzrange.go +++ /dev/null @@ -1,267 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "fmt" - - "github.com/jackc/pgio" -) - -type Tstzrange struct { - Lower Timestamptz - Upper Timestamptz - LowerType BoundType - UpperType BoundType - Status Status -} - -func (dst *Tstzrange) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = Tstzrange{Status: Null} - return nil - } - - switch value := src.(type) { - case Tstzrange: - *dst = value - case *Tstzrange: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - default: - return fmt.Errorf("cannot convert %v to Tstzrange", src) - } - - return nil -} - -func (dst Tstzrange) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Tstzrange) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Tstzrange) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Tstzrange{Status: Null} - return nil - } - - utr, err := ParseUntypedTextRange(string(src)) - if err != nil { - return err - } - - *dst = Tstzrange{Status: Present} - - dst.LowerType = utr.LowerType - dst.UpperType = utr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeText(ci, []byte(utr.Lower)); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeText(ci, []byte(utr.Upper)); err != nil { - return err - } - } - - return nil -} - -func (dst *Tstzrange) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Tstzrange{Status: Null} - return nil - } - - ubr, err := ParseUntypedBinaryRange(src) - if err != nil { - return err - } - - *dst = Tstzrange{Status: Present} - - dst.LowerType = ubr.LowerType - dst.UpperType = ubr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeBinary(ci, ubr.Lower); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeBinary(ci, ubr.Upper); err != nil { - return err - } - } - - return nil -} - -func (src Tstzrange) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - switch src.LowerType { - case Exclusive, Unbounded: - buf = append(buf, '(') - case Inclusive: - buf = append(buf, '[') - case Empty: - return append(buf, "empty"...), nil - default: - return nil, fmt.Errorf("unknown lower bound type %v", src.LowerType) - } - - var err error - - if src.LowerType != Unbounded { - buf, err = src.Lower.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - } - - buf = append(buf, ',') - - if src.UpperType != Unbounded { - buf, err = src.Upper.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - } - - switch src.UpperType { - case Exclusive, Unbounded: - buf = append(buf, ')') - case Inclusive: - buf = append(buf, ']') - default: - return nil, fmt.Errorf("unknown upper bound type %v", src.UpperType) - } - - return buf, nil -} - -func (src Tstzrange) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var rangeType byte - switch src.LowerType { - case Inclusive: - rangeType |= lowerInclusiveMask - case Unbounded: - rangeType |= lowerUnboundedMask - case Exclusive: - case Empty: - return append(buf, emptyMask), nil - default: - return nil, fmt.Errorf("unknown LowerType: %v", src.LowerType) - } - - switch src.UpperType { - case Inclusive: - rangeType |= upperInclusiveMask - case Unbounded: - rangeType |= upperUnboundedMask - case Exclusive: - default: - return nil, fmt.Errorf("unknown UpperType: %v", src.UpperType) - } - - buf = append(buf, rangeType) - - var err error - - if src.LowerType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Lower.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - if src.UpperType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Upper.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Tstzrange) Scan(src interface{}) error { - if src == nil { - *dst = Tstzrange{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Tstzrange) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/tstzrange_array.go b/vendor/github.com/jackc/pgtype/tstzrange_array.go deleted file mode 100644 index a216820a..00000000 --- a/vendor/github.com/jackc/pgtype/tstzrange_array.go +++ /dev/null @@ -1,470 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type TstzrangeArray struct { - Elements []Tstzrange - Dimensions []ArrayDimension - Status Status -} - -func (dst *TstzrangeArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = TstzrangeArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []Tstzrange: - if value == nil { - *dst = TstzrangeArray{Status: Null} - } else if len(value) == 0 { - *dst = TstzrangeArray{Status: Present} - } else { - *dst = TstzrangeArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = TstzrangeArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for TstzrangeArray", src) - } - if elementsLength == 0 { - *dst = TstzrangeArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to TstzrangeArray", src) - } - - *dst = TstzrangeArray{ - Elements: make([]Tstzrange, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Tstzrange, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to TstzrangeArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *TstzrangeArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to TstzrangeArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in TstzrangeArray", err) - } - index++ - - return index, nil -} - -func (dst TstzrangeArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *TstzrangeArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]Tstzrange: - *v = make([]Tstzrange, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *TstzrangeArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from TstzrangeArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from TstzrangeArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *TstzrangeArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TstzrangeArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Tstzrange - - if len(uta.Elements) > 0 { - elements = make([]Tstzrange, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Tstzrange - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = TstzrangeArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *TstzrangeArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = TstzrangeArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = TstzrangeArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Tstzrange, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = TstzrangeArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src TstzrangeArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src TstzrangeArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("tstzrange"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "tstzrange") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *TstzrangeArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src TstzrangeArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/typed_array.go.erb b/vendor/github.com/jackc/pgtype/typed_array.go.erb deleted file mode 100644 index e8433c04..00000000 --- a/vendor/github.com/jackc/pgtype/typed_array.go.erb +++ /dev/null @@ -1,512 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -<% - # defaults when not explicitly set on command line - - binary_format ||= "true" - text_format ||= "true" - - text_null ||= "NULL" - - encode_binary ||= binary_format - decode_binary ||= binary_format -%> - -package pgtype - -import ( - "bytes" - "fmt" - "io" - - "github.com/jackc/pgio" -) - -type <%= pgtype_array_type %> struct { - Elements []<%= pgtype_element_type %> - Dimensions []ArrayDimension - Status Status -} - -func (dst *<%= pgtype_array_type %>) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = <%= pgtype_array_type %>{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - <% go_array_types.split(",").each do |t| %> - <% if t != "[]#{pgtype_element_type}" %> - case <%= t %>: - if value == nil { - *dst = <%= pgtype_array_type %>{Status: Null} - } else if len(value) == 0 { - *dst = <%= pgtype_array_type %>{Status: Present} - } else { - elements := make([]<%= pgtype_element_type %>, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = <%= pgtype_array_type %>{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - <% end %> - <% end %> - case []<%= pgtype_element_type %>: - if value == nil { - *dst = <%= pgtype_array_type %>{Status: Null} - } else if len(value) == 0 { - *dst = <%= pgtype_array_type %>{Status: Present} - } else { - *dst = <%= pgtype_array_type %>{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status : Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = <%= pgtype_array_type %>{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for <%= pgtype_array_type %>", src) - } - if elementsLength == 0 { - *dst = <%= pgtype_array_type %>{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to <%= pgtype_array_type %>", src) - } - - *dst = <%= pgtype_array_type %> { - Elements: make([]<%= pgtype_element_type %>, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]<%= pgtype_element_type %>, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to <%= pgtype_array_type %>, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *<%= pgtype_array_type %>) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to <%= pgtype_array_type %>") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in <%= pgtype_array_type %>", err) - } - index++ - - return index, nil -} - -func (dst <%= pgtype_array_type %>) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *<%= pgtype_array_type %>) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1{ - // Attempt to match to select common types: - switch v := dst.(type) { - <% go_array_types.split(",").each do |t| %> - case *<%= t %>: - *v = make(<%= t %>, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - <% end %> - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *<%= pgtype_array_type %>) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr(){ - return 0, fmt.Errorf("cannot assign all values from <%= pgtype_array_type %>") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from <%= pgtype_array_type %>") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -<% if text_format == "true" %> -func (dst *<%= pgtype_array_type %>) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = <%= pgtype_array_type %>{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []<%= pgtype_element_type %> - - if len(uta.Elements) > 0 { - elements = make([]<%= pgtype_element_type %>, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem <%= pgtype_element_type %> - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = <%= pgtype_array_type %>{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} -<% end %> - -<% if decode_binary == "true" %> -func (dst *<%= pgtype_array_type %>) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = <%= pgtype_array_type %>{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = <%= pgtype_array_type %>{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]<%= pgtype_element_type %>, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp:rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = <%= pgtype_array_type %>{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} -<% end %> - -<% if text_format == "true" %> -func (src <%= pgtype_array_type %>) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `<%= text_null %>`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} -<% end %> - -<% if encode_binary == "true" %> - func (src <%= pgtype_array_type %>) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("<%= element_type_name %>"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "<%= element_type_name %>") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil - } -<% end %> - -<% if text_format == "true" %> -// Scan implements the database/sql Scanner interface. -func (dst *<%= pgtype_array_type %>) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src <%= pgtype_array_type %>) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} -<% end %> diff --git a/vendor/github.com/jackc/pgtype/typed_array_gen.sh b/vendor/github.com/jackc/pgtype/typed_array_gen.sh deleted file mode 100644 index 9ec768bf..00000000 --- a/vendor/github.com/jackc/pgtype/typed_array_gen.sh +++ /dev/null @@ -1,31 +0,0 @@ -erb pgtype_array_type=Int2Array pgtype_element_type=Int2 go_array_types=[]int16,[]*int16,[]uint16,[]*uint16,[]int32,[]*int32,[]uint32,[]*uint32,[]int64,[]*int64,[]uint64,[]*uint64,[]int,[]*int,[]uint,[]*uint element_type_name=int2 typed_array.go.erb > int2_array.go -erb pgtype_array_type=Int4Array pgtype_element_type=Int4 go_array_types=[]int16,[]*int16,[]uint16,[]*uint16,[]int32,[]*int32,[]uint32,[]*uint32,[]int64,[]*int64,[]uint64,[]*uint64,[]int,[]*int,[]uint,[]*uint element_type_name=int4 typed_array.go.erb > int4_array.go -erb pgtype_array_type=Int8Array pgtype_element_type=Int8 go_array_types=[]int16,[]*int16,[]uint16,[]*uint16,[]int32,[]*int32,[]uint32,[]*uint32,[]int64,[]*int64,[]uint64,[]*uint64,[]int,[]*int,[]uint,[]*uint element_type_name=int8 typed_array.go.erb > int8_array.go -erb pgtype_array_type=BoolArray pgtype_element_type=Bool go_array_types=[]bool,[]*bool element_type_name=bool typed_array.go.erb > bool_array.go -erb pgtype_array_type=DateArray pgtype_element_type=Date go_array_types=[]time.Time,[]*time.Time element_type_name=date typed_array.go.erb > date_array.go -erb pgtype_array_type=TimestamptzArray pgtype_element_type=Timestamptz go_array_types=[]time.Time,[]*time.Time element_type_name=timestamptz typed_array.go.erb > timestamptz_array.go -erb pgtype_array_type=TstzrangeArray pgtype_element_type=Tstzrange go_array_types=[]Tstzrange element_type_name=tstzrange typed_array.go.erb > tstzrange_array.go -erb pgtype_array_type=TsrangeArray pgtype_element_type=Tsrange go_array_types=[]Tsrange element_type_name=tsrange typed_array.go.erb > tsrange_array.go -erb pgtype_array_type=TimestampArray pgtype_element_type=Timestamp go_array_types=[]time.Time,[]*time.Time element_type_name=timestamp typed_array.go.erb > timestamp_array.go -erb pgtype_array_type=Float4Array pgtype_element_type=Float4 go_array_types=[]float32,[]*float32 element_type_name=float4 typed_array.go.erb > float4_array.go -erb pgtype_array_type=Float8Array pgtype_element_type=Float8 go_array_types=[]float64,[]*float64 element_type_name=float8 typed_array.go.erb > float8_array.go -erb pgtype_array_type=InetArray pgtype_element_type=Inet go_array_types=[]*net.IPNet,[]net.IP,[]*net.IP element_type_name=inet typed_array.go.erb > inet_array.go -erb pgtype_array_type=MacaddrArray pgtype_element_type=Macaddr go_array_types=[]net.HardwareAddr,[]*net.HardwareAddr element_type_name=macaddr typed_array.go.erb > macaddr_array.go -erb pgtype_array_type=CIDRArray pgtype_element_type=CIDR go_array_types=[]*net.IPNet,[]net.IP,[]*net.IP element_type_name=cidr typed_array.go.erb > cidr_array.go -erb pgtype_array_type=TextArray pgtype_element_type=Text go_array_types=[]string,[]*string element_type_name=text typed_array.go.erb > text_array.go -erb pgtype_array_type=VarcharArray pgtype_element_type=Varchar go_array_types=[]string,[]*string element_type_name=varchar typed_array.go.erb > varchar_array.go -erb pgtype_array_type=BPCharArray pgtype_element_type=BPChar go_array_types=[]string,[]*string element_type_name=bpchar typed_array.go.erb > bpchar_array.go -erb pgtype_array_type=ByteaArray pgtype_element_type=Bytea go_array_types=[][]byte element_type_name=bytea typed_array.go.erb > bytea_array.go -erb pgtype_array_type=ACLItemArray pgtype_element_type=ACLItem go_array_types=[]string,[]*string element_type_name=aclitem binary_format=false typed_array.go.erb > aclitem_array.go -erb pgtype_array_type=HstoreArray pgtype_element_type=Hstore go_array_types=[]map[string]string element_type_name=hstore typed_array.go.erb > hstore_array.go -erb pgtype_array_type=NumericArray pgtype_element_type=Numeric go_array_types=[]float32,[]*float32,[]float64,[]*float64,[]int64,[]*int64,[]uint64,[]*uint64 element_type_name=numeric typed_array.go.erb > numeric_array.go -erb pgtype_array_type=UUIDArray pgtype_element_type=UUID go_array_types=[][16]byte,[][]byte,[]string,[]*string element_type_name=uuid typed_array.go.erb > uuid_array.go -erb pgtype_array_type=JSONArray pgtype_element_type=JSON go_array_types=[]string,[][]byte,[]json.RawMessage element_type_name=json typed_array.go.erb > json_array.go -erb pgtype_array_type=JSONBArray pgtype_element_type=JSONB go_array_types=[]string,[][]byte,[]json.RawMessage element_type_name=jsonb typed_array.go.erb > jsonb_array.go - -# While the binary format is theoretically possible it is only practical to use the text format. -erb pgtype_array_type=EnumArray pgtype_element_type=GenericText go_array_types=[]string,[]*string binary_format=false typed_array.go.erb > enum_array.go - -erb pgtype_array_type=RecordArray pgtype_element_type=Record go_array_types=[][]Value element_type_name=record text_null=NULL encode_binary=false text_format=false typed_array.go.erb > record_array.go - -goimports -w *_array.go diff --git a/vendor/github.com/jackc/pgtype/typed_multirange.go.erb b/vendor/github.com/jackc/pgtype/typed_multirange.go.erb deleted file mode 100644 index 84c8299f..00000000 --- a/vendor/github.com/jackc/pgtype/typed_multirange.go.erb +++ /dev/null @@ -1,239 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - - "github.com/jackc/pgio" -) - -type <%= multirange_type %> struct { - Ranges []<%= range_type %> - Status Status -} - -func (dst *<%= multirange_type %>) Set(src interface{}) error { - //untyped nil and typed nil interfaces are different - if src == nil { - *dst = <%= multirange_type %>{Status: Null} - return nil - } - - switch value := src.(type) { - case <%= multirange_type %>: - *dst = value - case *<%= multirange_type %>: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - case []<%= range_type %>: - if value == nil { - *dst = <%= multirange_type %>{Status: Null} - } else if len(value) == 0 { - *dst = <%= multirange_type %>{Status: Present} - } else { - elements := make([]<%= range_type %>, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = <%= multirange_type %>{ - Ranges: elements, - Status: Present, - } - } - case []*<%= range_type %>: - if value == nil { - *dst = <%= multirange_type %>{Status: Null} - } else if len(value) == 0 { - *dst = <%= multirange_type %>{Status: Present} - } else { - elements := make([]<%= range_type %>, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = <%= multirange_type %>{ - Ranges: elements, - Status: Present, - } - } - default: - return fmt.Errorf("cannot convert %v to <%= multirange_type %>", src) - } - - return nil - -} - -func (dst <%= multirange_type %>) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *<%= multirange_type %>) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *<%= multirange_type %>) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = <%= multirange_type %>{Status: Null} - return nil - } - - utmr, err := ParseUntypedTextMultirange(string(src)) - if err != nil { - return err - } - - var elements []<%= range_type %> - - if len(utmr.Elements) > 0 { - elements = make([]<%= range_type %>, len(utmr.Elements)) - - for i, s := range utmr.Elements { - var elem <%= range_type %> - - elemSrc := []byte(s) - - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = <%= multirange_type %>{Ranges: elements, Status: Present} - - return nil -} - -func (dst *<%= multirange_type %>) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = <%= multirange_type %>{Status: Null} - return nil - } - - rp := 0 - - numElems := int(binary.BigEndian.Uint32(src[rp:])) - rp += 4 - - if numElems == 0 { - *dst = <%= multirange_type %>{Status: Present} - return nil - } - - elements := make([]<%= range_type %>, numElems) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err := elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = <%= multirange_type %>{Ranges: elements, Status: Present} - return nil -} - -func (src <%= multirange_type %>) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = append(buf, '{') - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Ranges { - if i > 0 { - buf = append(buf, ',') - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - return nil, fmt.Errorf("multi-range does not allow null range") - } else { - buf = append(buf, string(elemBuf)...) - } - - } - - buf = append(buf, '}') - - return buf, nil -} - -func (src <%= multirange_type %>) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendInt32(buf, int32(len(src.Ranges))) - - for i := range src.Ranges { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Ranges[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *<%= multirange_type %>) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src <%= multirange_type %>) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/typed_multirange_gen.sh b/vendor/github.com/jackc/pgtype/typed_multirange_gen.sh deleted file mode 100644 index 610f40a1..00000000 --- a/vendor/github.com/jackc/pgtype/typed_multirange_gen.sh +++ /dev/null @@ -1,8 +0,0 @@ -erb range_type=Numrange multirange_type=Nummultirange typed_multirange.go.erb > num_multirange.go -erb range_type=Int4range multirange_type=Int4multirange typed_multirange.go.erb > int4_multirange.go -erb range_type=Int8range multirange_type=Int8multirange typed_multirange.go.erb > int8_multirange.go -# TODO -# erb range_type=Tsrange multirange_type=Tsmultirange typed_multirange.go.erb > ts_multirange.go -# erb range_type=Tstzrange multirange_type=Tstzmultirange typed_multirange.go.erb > tstz_multirange.go -# erb range_type=Daterange multirange_type=Datemultirange typed_multirange.go.erb > date_multirange.go -goimports -w *multirange.go \ No newline at end of file diff --git a/vendor/github.com/jackc/pgtype/typed_range.go.erb b/vendor/github.com/jackc/pgtype/typed_range.go.erb deleted file mode 100644 index 5625587a..00000000 --- a/vendor/github.com/jackc/pgtype/typed_range.go.erb +++ /dev/null @@ -1,269 +0,0 @@ -package pgtype - -import ( - "bytes" - "database/sql/driver" - "fmt" - "io" - - "github.com/jackc/pgio" -) - -type <%= range_type %> struct { - Lower <%= element_type %> - Upper <%= element_type %> - LowerType BoundType - UpperType BoundType - Status Status -} - -func (dst *<%= range_type %>) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = <%= range_type %>{Status: Null} - return nil - } - - switch value := src.(type) { - case <%= range_type %>: - *dst = value - case *<%= range_type %>: - *dst = *value - case string: - return dst.DecodeText(nil, []byte(value)) - default: - return fmt.Errorf("cannot convert %v to <%= range_type %>", src) - } - - return nil -} - -func (dst <%= range_type %>) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *<%= range_type %>) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *<%= range_type %>) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = <%= range_type %>{Status: Null} - return nil - } - - utr, err := ParseUntypedTextRange(string(src)) - if err != nil { - return err - } - - *dst = <%= range_type %>{Status: Present} - - dst.LowerType = utr.LowerType - dst.UpperType = utr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeText(ci, []byte(utr.Lower)); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeText(ci, []byte(utr.Upper)); err != nil { - return err - } - } - - return nil -} - -func (dst *<%= range_type %>) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = <%= range_type %>{Status: Null} - return nil - } - - ubr, err := ParseUntypedBinaryRange(src) - if err != nil { - return err - } - - *dst = <%= range_type %>{Status: Present} - - dst.LowerType = ubr.LowerType - dst.UpperType = ubr.UpperType - - if dst.LowerType == Empty { - return nil - } - - if dst.LowerType == Inclusive || dst.LowerType == Exclusive { - if err := dst.Lower.DecodeBinary(ci, ubr.Lower); err != nil { - return err - } - } - - if dst.UpperType == Inclusive || dst.UpperType == Exclusive { - if err := dst.Upper.DecodeBinary(ci, ubr.Upper); err != nil { - return err - } - } - - return nil -} - -func (src <%= range_type %>) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - switch src.LowerType { - case Exclusive, Unbounded: - buf = append(buf, '(') - case Inclusive: - buf = append(buf, '[') - case Empty: - return append(buf, "empty"...), nil - default: - return nil, fmt.Errorf("unknown lower bound type %v", src.LowerType) - } - - var err error - - if src.LowerType != Unbounded { - buf, err = src.Lower.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - } - - buf = append(buf, ',') - - if src.UpperType != Unbounded { - buf, err = src.Upper.EncodeText(ci, buf) - if err != nil { - return nil, err - } else if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - } - - switch src.UpperType { - case Exclusive, Unbounded: - buf = append(buf, ')') - case Inclusive: - buf = append(buf, ']') - default: - return nil, fmt.Errorf("unknown upper bound type %v", src.UpperType) - } - - return buf, nil -} - -func (src <%= range_type %>) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - var rangeType byte - switch src.LowerType { - case Inclusive: - rangeType |= lowerInclusiveMask - case Unbounded: - rangeType |= lowerUnboundedMask - case Exclusive: - case Empty: - return append(buf, emptyMask), nil - default: - return nil, fmt.Errorf("unknown LowerType: %v", src.LowerType) - } - - switch src.UpperType { - case Inclusive: - rangeType |= upperInclusiveMask - case Unbounded: - rangeType |= upperUnboundedMask - case Exclusive: - default: - return nil, fmt.Errorf("unknown UpperType: %v", src.UpperType) - } - - buf = append(buf, rangeType) - - var err error - - if src.LowerType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Lower.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Lower cannot be null unless LowerType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - if src.UpperType != Unbounded { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - buf, err = src.Upper.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if buf == nil { - return nil, fmt.Errorf("Upper cannot be null unless UpperType is Unbounded") - } - - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *<%= range_type %>) Scan(src interface{}) error { - if src == nil { - *dst = <%= range_type %>{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src <%= range_type %>) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/typed_range_gen.sh b/vendor/github.com/jackc/pgtype/typed_range_gen.sh deleted file mode 100644 index bedda292..00000000 --- a/vendor/github.com/jackc/pgtype/typed_range_gen.sh +++ /dev/null @@ -1,7 +0,0 @@ -erb range_type=Int4range element_type=Int4 typed_range.go.erb > int4range.go -erb range_type=Int8range element_type=Int8 typed_range.go.erb > int8range.go -erb range_type=Tsrange element_type=Timestamp typed_range.go.erb > tsrange.go -erb range_type=Tstzrange element_type=Timestamptz typed_range.go.erb > tstzrange.go -erb range_type=Daterange element_type=Date typed_range.go.erb > daterange.go -erb range_type=Numrange element_type=Numeric typed_range.go.erb > numrange.go -goimports -w *range.go diff --git a/vendor/github.com/jackc/pgtype/unknown.go b/vendor/github.com/jackc/pgtype/unknown.go deleted file mode 100644 index c591b708..00000000 --- a/vendor/github.com/jackc/pgtype/unknown.go +++ /dev/null @@ -1,44 +0,0 @@ -package pgtype - -import "database/sql/driver" - -// Unknown represents the PostgreSQL unknown type. It is either a string literal -// or NULL. It is used when PostgreSQL does not know the type of a value. In -// general, this will only be used in pgx when selecting a null value without -// type information. e.g. SELECT NULL; -type Unknown struct { - String string - Status Status -} - -func (dst *Unknown) Set(src interface{}) error { - return (*Text)(dst).Set(src) -} - -func (dst Unknown) Get() interface{} { - return (Text)(dst).Get() -} - -// AssignTo assigns from src to dst. Note that as Unknown is not a general number -// type AssignTo does not do automatic type conversion as other number types do. -func (src *Unknown) AssignTo(dst interface{}) error { - return (*Text)(src).AssignTo(dst) -} - -func (dst *Unknown) DecodeText(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeText(ci, src) -} - -func (dst *Unknown) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeBinary(ci, src) -} - -// Scan implements the database/sql Scanner interface. -func (dst *Unknown) Scan(src interface{}) error { - return (*Text)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Unknown) Value() (driver.Value, error) { - return (Text)(src).Value() -} diff --git a/vendor/github.com/jackc/pgtype/uuid.go b/vendor/github.com/jackc/pgtype/uuid.go deleted file mode 100644 index 6839c052..00000000 --- a/vendor/github.com/jackc/pgtype/uuid.go +++ /dev/null @@ -1,231 +0,0 @@ -package pgtype - -import ( - "bytes" - "database/sql/driver" - "encoding/hex" - "fmt" -) - -type UUID struct { - Bytes [16]byte - Status Status -} - -func (dst *UUID) Set(src interface{}) error { - if src == nil { - *dst = UUID{Status: Null} - return nil - } - - switch value := src.(type) { - case interface{ Get() interface{} }: - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - case fmt.Stringer: - value2 := value.String() - return dst.Set(value2) - case [16]byte: - *dst = UUID{Bytes: value, Status: Present} - case []byte: - if value != nil { - if len(value) != 16 { - return fmt.Errorf("[]byte must be 16 bytes to convert to UUID: %d", len(value)) - } - *dst = UUID{Status: Present} - copy(dst.Bytes[:], value) - } else { - *dst = UUID{Status: Null} - } - case string: - uuid, err := parseUUID(value) - if err != nil { - return err - } - *dst = UUID{Bytes: uuid, Status: Present} - case *string: - if value == nil { - *dst = UUID{Status: Null} - } else { - return dst.Set(*value) - } - default: - if originalSrc, ok := underlyingUUIDType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to UUID", value) - } - - return nil -} - -func (dst UUID) Get() interface{} { - switch dst.Status { - case Present: - return dst.Bytes - case Null: - return nil - default: - return dst.Status - } -} - -func (src *UUID) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - switch v := dst.(type) { - case *[16]byte: - *v = src.Bytes - return nil - case *[]byte: - *v = make([]byte, 16) - copy(*v, src.Bytes[:]) - return nil - case *string: - *v = encodeUUID(src.Bytes) - return nil - default: - if nextDst, retry := GetAssignToDstType(v); retry { - return src.AssignTo(nextDst) - } - } - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot assign %v into %T", src, dst) -} - -// parseUUID converts a string UUID in standard form to a byte array. -func parseUUID(src string) (dst [16]byte, err error) { - switch len(src) { - case 36: - src = src[0:8] + src[9:13] + src[14:18] + src[19:23] + src[24:] - case 32: - // dashes already stripped, assume valid - default: - // assume invalid. - return dst, fmt.Errorf("cannot parse UUID %v", src) - } - - buf, err := hex.DecodeString(src) - if err != nil { - return dst, err - } - - copy(dst[:], buf) - return dst, err -} - -// encodeUUID converts a uuid byte array to UUID standard string form. -func encodeUUID(src [16]byte) string { - return fmt.Sprintf("%x-%x-%x-%x-%x", src[0:4], src[4:6], src[6:8], src[8:10], src[10:16]) -} - -func (dst *UUID) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = UUID{Status: Null} - return nil - } - - if len(src) != 36 { - return fmt.Errorf("invalid length for UUID: %v", len(src)) - } - - buf, err := parseUUID(string(src)) - if err != nil { - return err - } - - *dst = UUID{Bytes: buf, Status: Present} - return nil -} - -func (dst *UUID) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = UUID{Status: Null} - return nil - } - - if len(src) != 16 { - return fmt.Errorf("invalid length for UUID: %v", len(src)) - } - - *dst = UUID{Status: Present} - copy(dst.Bytes[:], src) - return nil -} - -func (src UUID) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, encodeUUID(src.Bytes)...), nil -} - -func (src UUID) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - return append(buf, src.Bytes[:]...), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *UUID) Scan(src interface{}) error { - if src == nil { - *dst = UUID{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src UUID) Value() (driver.Value, error) { - return EncodeValueText(src) -} - -func (src UUID) MarshalJSON() ([]byte, error) { - switch src.Status { - case Present: - var buff bytes.Buffer - buff.WriteByte('"') - buff.WriteString(encodeUUID(src.Bytes)) - buff.WriteByte('"') - return buff.Bytes(), nil - case Null: - return []byte("null"), nil - case Undefined: - return nil, errUndefined - } - return nil, errBadStatus -} - -func (dst *UUID) UnmarshalJSON(src []byte) error { - if bytes.Compare(src, []byte("null")) == 0 { - return dst.Set(nil) - } - if len(src) != 38 { - return fmt.Errorf("invalid length for UUID: %v", len(src)) - } - return dst.Set(string(src[1 : len(src)-1])) -} diff --git a/vendor/github.com/jackc/pgtype/uuid_array.go b/vendor/github.com/jackc/pgtype/uuid_array.go deleted file mode 100644 index 00721ef9..00000000 --- a/vendor/github.com/jackc/pgtype/uuid_array.go +++ /dev/null @@ -1,573 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type UUIDArray struct { - Elements []UUID - Dimensions []ArrayDimension - Status Status -} - -func (dst *UUIDArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = UUIDArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case [][16]byte: - if value == nil { - *dst = UUIDArray{Status: Null} - } else if len(value) == 0 { - *dst = UUIDArray{Status: Present} - } else { - elements := make([]UUID, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = UUIDArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case [][]byte: - if value == nil { - *dst = UUIDArray{Status: Null} - } else if len(value) == 0 { - *dst = UUIDArray{Status: Present} - } else { - elements := make([]UUID, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = UUIDArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []string: - if value == nil { - *dst = UUIDArray{Status: Null} - } else if len(value) == 0 { - *dst = UUIDArray{Status: Present} - } else { - elements := make([]UUID, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = UUIDArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*string: - if value == nil { - *dst = UUIDArray{Status: Null} - } else if len(value) == 0 { - *dst = UUIDArray{Status: Present} - } else { - elements := make([]UUID, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = UUIDArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []UUID: - if value == nil { - *dst = UUIDArray{Status: Null} - } else if len(value) == 0 { - *dst = UUIDArray{Status: Present} - } else { - *dst = UUIDArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = UUIDArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for UUIDArray", src) - } - if elementsLength == 0 { - *dst = UUIDArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to UUIDArray", src) - } - - *dst = UUIDArray{ - Elements: make([]UUID, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]UUID, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to UUIDArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *UUIDArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to UUIDArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in UUIDArray", err) - } - index++ - - return index, nil -} - -func (dst UUIDArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *UUIDArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[][16]byte: - *v = make([][16]byte, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[][]byte: - *v = make([][]byte, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]string: - *v = make([]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*string: - *v = make([]*string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *UUIDArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from UUIDArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from UUIDArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *UUIDArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = UUIDArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []UUID - - if len(uta.Elements) > 0 { - elements = make([]UUID, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem UUID - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = UUIDArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *UUIDArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = UUIDArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = UUIDArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]UUID, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = UUIDArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src UUIDArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src UUIDArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("uuid"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "uuid") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *UUIDArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src UUIDArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/varbit.go b/vendor/github.com/jackc/pgtype/varbit.go deleted file mode 100644 index f24dc5bc..00000000 --- a/vendor/github.com/jackc/pgtype/varbit.go +++ /dev/null @@ -1,133 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - - "github.com/jackc/pgio" -) - -type Varbit struct { - Bytes []byte - Len int32 // Number of bits - Status Status -} - -func (dst *Varbit) Set(src interface{}) error { - return fmt.Errorf("cannot convert %v to Varbit", src) -} - -func (dst Varbit) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *Varbit) AssignTo(dst interface{}) error { - return fmt.Errorf("cannot assign %v to %T", src, dst) -} - -func (dst *Varbit) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Varbit{Status: Null} - return nil - } - - bitLen := len(src) - byteLen := bitLen / 8 - if bitLen%8 > 0 { - byteLen++ - } - buf := make([]byte, byteLen) - - for i, b := range src { - if b == '1' { - byteIdx := i / 8 - bitIdx := uint(i % 8) - buf[byteIdx] = buf[byteIdx] | (128 >> bitIdx) - } - } - - *dst = Varbit{Bytes: buf, Len: int32(bitLen), Status: Present} - return nil -} - -func (dst *Varbit) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = Varbit{Status: Null} - return nil - } - - if len(src) < 4 { - return fmt.Errorf("invalid length for varbit: %v", len(src)) - } - - bitLen := int32(binary.BigEndian.Uint32(src)) - rp := 4 - - *dst = Varbit{Bytes: src[rp:], Len: bitLen, Status: Present} - return nil -} - -func (src Varbit) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - for i := int32(0); i < src.Len; i++ { - byteIdx := i / 8 - bitMask := byte(128 >> byte(i%8)) - char := byte('0') - if src.Bytes[byteIdx]&bitMask > 0 { - char = '1' - } - buf = append(buf, char) - } - - return buf, nil -} - -func (src Varbit) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - buf = pgio.AppendInt32(buf, src.Len) - return append(buf, src.Bytes...), nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *Varbit) Scan(src interface{}) error { - if src == nil { - *dst = Varbit{Status: Null} - return nil - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Varbit) Value() (driver.Value, error) { - return EncodeValueText(src) -} diff --git a/vendor/github.com/jackc/pgtype/varchar.go b/vendor/github.com/jackc/pgtype/varchar.go deleted file mode 100644 index fea31d18..00000000 --- a/vendor/github.com/jackc/pgtype/varchar.go +++ /dev/null @@ -1,66 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" -) - -type Varchar Text - -// Set converts from src to dst. Note that as Varchar is not a general -// number type Set does not do automatic type conversion as other number -// types do. -func (dst *Varchar) Set(src interface{}) error { - return (*Text)(dst).Set(src) -} - -func (dst Varchar) Get() interface{} { - return (Text)(dst).Get() -} - -// AssignTo assigns from src to dst. Note that as Varchar is not a general number -// type AssignTo does not do automatic type conversion as other number types do. -func (src *Varchar) AssignTo(dst interface{}) error { - return (*Text)(src).AssignTo(dst) -} - -func (Varchar) PreferredResultFormat() int16 { - return TextFormatCode -} - -func (dst *Varchar) DecodeText(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeText(ci, src) -} - -func (dst *Varchar) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*Text)(dst).DecodeBinary(ci, src) -} - -func (Varchar) PreferredParamFormat() int16 { - return TextFormatCode -} - -func (src Varchar) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Text)(src).EncodeText(ci, buf) -} - -func (src Varchar) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (Text)(src).EncodeBinary(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *Varchar) Scan(src interface{}) error { - return (*Text)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src Varchar) Value() (driver.Value, error) { - return (Text)(src).Value() -} - -func (src Varchar) MarshalJSON() ([]byte, error) { - return (Text)(src).MarshalJSON() -} - -func (dst *Varchar) UnmarshalJSON(b []byte) error { - return (*Text)(dst).UnmarshalJSON(b) -} diff --git a/vendor/github.com/jackc/pgtype/varchar_array.go b/vendor/github.com/jackc/pgtype/varchar_array.go deleted file mode 100644 index 8a309a3f..00000000 --- a/vendor/github.com/jackc/pgtype/varchar_array.go +++ /dev/null @@ -1,517 +0,0 @@ -// Code generated by erb. DO NOT EDIT. - -package pgtype - -import ( - "database/sql/driver" - "encoding/binary" - "fmt" - "reflect" - - "github.com/jackc/pgio" -) - -type VarcharArray struct { - Elements []Varchar - Dimensions []ArrayDimension - Status Status -} - -func (dst *VarcharArray) Set(src interface{}) error { - // untyped nil and typed nil interfaces are different - if src == nil { - *dst = VarcharArray{Status: Null} - return nil - } - - if value, ok := src.(interface{ Get() interface{} }); ok { - value2 := value.Get() - if value2 != value { - return dst.Set(value2) - } - } - - // Attempt to match to select common types: - switch value := src.(type) { - - case []string: - if value == nil { - *dst = VarcharArray{Status: Null} - } else if len(value) == 0 { - *dst = VarcharArray{Status: Present} - } else { - elements := make([]Varchar, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = VarcharArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []*string: - if value == nil { - *dst = VarcharArray{Status: Null} - } else if len(value) == 0 { - *dst = VarcharArray{Status: Present} - } else { - elements := make([]Varchar, len(value)) - for i := range value { - if err := elements[i].Set(value[i]); err != nil { - return err - } - } - *dst = VarcharArray{ - Elements: elements, - Dimensions: []ArrayDimension{{Length: int32(len(elements)), LowerBound: 1}}, - Status: Present, - } - } - - case []Varchar: - if value == nil { - *dst = VarcharArray{Status: Null} - } else if len(value) == 0 { - *dst = VarcharArray{Status: Present} - } else { - *dst = VarcharArray{ - Elements: value, - Dimensions: []ArrayDimension{{Length: int32(len(value)), LowerBound: 1}}, - Status: Present, - } - } - default: - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - reflectedValue := reflect.ValueOf(src) - if !reflectedValue.IsValid() || reflectedValue.IsZero() { - *dst = VarcharArray{Status: Null} - return nil - } - - dimensions, elementsLength, ok := findDimensionsFromValue(reflectedValue, nil, 0) - if !ok { - return fmt.Errorf("cannot find dimensions of %v for VarcharArray", src) - } - if elementsLength == 0 { - *dst = VarcharArray{Status: Present} - return nil - } - if len(dimensions) == 0 { - if originalSrc, ok := underlyingSliceType(src); ok { - return dst.Set(originalSrc) - } - return fmt.Errorf("cannot convert %v to VarcharArray", src) - } - - *dst = VarcharArray{ - Elements: make([]Varchar, elementsLength), - Dimensions: dimensions, - Status: Present, - } - elementCount, err := dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - // Maybe the target was one dimension too far, try again: - if len(dst.Dimensions) > 1 { - dst.Dimensions = dst.Dimensions[:len(dst.Dimensions)-1] - elementsLength = 0 - for _, dim := range dst.Dimensions { - if elementsLength == 0 { - elementsLength = int(dim.Length) - } else { - elementsLength *= int(dim.Length) - } - } - dst.Elements = make([]Varchar, elementsLength) - elementCount, err = dst.setRecursive(reflectedValue, 0, 0) - if err != nil { - return err - } - } else { - return err - } - } - if elementCount != len(dst.Elements) { - return fmt.Errorf("cannot convert %v to VarcharArray, expected %d dst.Elements, but got %d instead", src, len(dst.Elements), elementCount) - } - } - - return nil -} - -func (dst *VarcharArray) setRecursive(value reflect.Value, index, dimension int) (int, error) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(dst.Dimensions) == dimension { - break - } - - valueLen := value.Len() - if int32(valueLen) != dst.Dimensions[dimension].Length { - return 0, fmt.Errorf("multidimensional arrays must have array expressions with matching dimensions") - } - for i := 0; i < valueLen; i++ { - var err error - index, err = dst.setRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if !value.CanInterface() { - return 0, fmt.Errorf("cannot convert all values to VarcharArray") - } - if err := dst.Elements[index].Set(value.Interface()); err != nil { - return 0, fmt.Errorf("%v in VarcharArray", err) - } - index++ - - return index, nil -} - -func (dst VarcharArray) Get() interface{} { - switch dst.Status { - case Present: - return dst - case Null: - return nil - default: - return dst.Status - } -} - -func (src *VarcharArray) AssignTo(dst interface{}) error { - switch src.Status { - case Present: - if len(src.Dimensions) <= 1 { - // Attempt to match to select common types: - switch v := dst.(type) { - - case *[]string: - *v = make([]string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - case *[]*string: - *v = make([]*string, len(src.Elements)) - for i := range src.Elements { - if err := src.Elements[i].AssignTo(&((*v)[i])); err != nil { - return err - } - } - return nil - - } - } - - // Try to convert to something AssignTo can use directly. - if nextDst, retry := GetAssignToDstType(dst); retry { - return src.AssignTo(nextDst) - } - - // Fallback to reflection if an optimised match was not found. - // The reflection is necessary for arrays and multidimensional slices, - // but it comes with a 20-50% performance penalty for large arrays/slices - value := reflect.ValueOf(dst) - if value.Kind() == reflect.Ptr { - value = value.Elem() - } - - switch value.Kind() { - case reflect.Array, reflect.Slice: - default: - return fmt.Errorf("cannot assign %T to %T", src, dst) - } - - if len(src.Elements) == 0 { - if value.Kind() == reflect.Slice { - value.Set(reflect.MakeSlice(value.Type(), 0, 0)) - return nil - } - } - - elementCount, err := src.assignToRecursive(value, 0, 0) - if err != nil { - return err - } - if elementCount != len(src.Elements) { - return fmt.Errorf("cannot assign %v, needed to assign %d elements, but only assigned %d", dst, len(src.Elements), elementCount) - } - - return nil - case Null: - return NullAssignTo(dst) - } - - return fmt.Errorf("cannot decode %#v into %T", src, dst) -} - -func (src *VarcharArray) assignToRecursive(value reflect.Value, index, dimension int) (int, error) { - switch kind := value.Kind(); kind { - case reflect.Array: - fallthrough - case reflect.Slice: - if len(src.Dimensions) == dimension { - break - } - - length := int(src.Dimensions[dimension].Length) - if reflect.Array == kind { - typ := value.Type() - if typ.Len() != length { - return 0, fmt.Errorf("expected size %d array, but %s has size %d array", length, typ, typ.Len()) - } - value.Set(reflect.New(typ).Elem()) - } else { - value.Set(reflect.MakeSlice(value.Type(), length, length)) - } - - var err error - for i := 0; i < length; i++ { - index, err = src.assignToRecursive(value.Index(i), index, dimension+1) - if err != nil { - return 0, err - } - } - - return index, nil - } - if len(src.Dimensions) != dimension { - return 0, fmt.Errorf("incorrect dimensions, expected %d, found %d", len(src.Dimensions), dimension) - } - if !value.CanAddr() { - return 0, fmt.Errorf("cannot assign all values from VarcharArray") - } - addr := value.Addr() - if !addr.CanInterface() { - return 0, fmt.Errorf("cannot assign all values from VarcharArray") - } - if err := src.Elements[index].AssignTo(addr.Interface()); err != nil { - return 0, err - } - index++ - return index, nil -} - -func (dst *VarcharArray) DecodeText(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = VarcharArray{Status: Null} - return nil - } - - uta, err := ParseUntypedTextArray(string(src)) - if err != nil { - return err - } - - var elements []Varchar - - if len(uta.Elements) > 0 { - elements = make([]Varchar, len(uta.Elements)) - - for i, s := range uta.Elements { - var elem Varchar - var elemSrc []byte - if s != "NULL" || uta.Quoted[i] { - elemSrc = []byte(s) - } - err = elem.DecodeText(ci, elemSrc) - if err != nil { - return err - } - - elements[i] = elem - } - } - - *dst = VarcharArray{Elements: elements, Dimensions: uta.Dimensions, Status: Present} - - return nil -} - -func (dst *VarcharArray) DecodeBinary(ci *ConnInfo, src []byte) error { - if src == nil { - *dst = VarcharArray{Status: Null} - return nil - } - - var arrayHeader ArrayHeader - rp, err := arrayHeader.DecodeBinary(ci, src) - if err != nil { - return err - } - - if len(arrayHeader.Dimensions) == 0 { - *dst = VarcharArray{Dimensions: arrayHeader.Dimensions, Status: Present} - return nil - } - - elementCount := arrayHeader.Dimensions[0].Length - for _, d := range arrayHeader.Dimensions[1:] { - elementCount *= d.Length - } - - elements := make([]Varchar, elementCount) - - for i := range elements { - elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) - rp += 4 - var elemSrc []byte - if elemLen >= 0 { - elemSrc = src[rp : rp+elemLen] - rp += elemLen - } - err = elements[i].DecodeBinary(ci, elemSrc) - if err != nil { - return err - } - } - - *dst = VarcharArray{Elements: elements, Dimensions: arrayHeader.Dimensions, Status: Present} - return nil -} - -func (src VarcharArray) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - if len(src.Dimensions) == 0 { - return append(buf, '{', '}'), nil - } - - buf = EncodeTextArrayDimensions(buf, src.Dimensions) - - // dimElemCounts is the multiples of elements that each array lies on. For - // example, a single dimension array of length 4 would have a dimElemCounts of - // [4]. A multi-dimensional array of lengths [3,5,2] would have a - // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' - // or '}'. - dimElemCounts := make([]int, len(src.Dimensions)) - dimElemCounts[len(src.Dimensions)-1] = int(src.Dimensions[len(src.Dimensions)-1].Length) - for i := len(src.Dimensions) - 2; i > -1; i-- { - dimElemCounts[i] = int(src.Dimensions[i].Length) * dimElemCounts[i+1] - } - - inElemBuf := make([]byte, 0, 32) - for i, elem := range src.Elements { - if i > 0 { - buf = append(buf, ',') - } - - for _, dec := range dimElemCounts { - if i%dec == 0 { - buf = append(buf, '{') - } - } - - elemBuf, err := elem.EncodeText(ci, inElemBuf) - if err != nil { - return nil, err - } - if elemBuf == nil { - buf = append(buf, `NULL`...) - } else { - buf = append(buf, QuoteArrayElementIfNeeded(string(elemBuf))...) - } - - for _, dec := range dimElemCounts { - if (i+1)%dec == 0 { - buf = append(buf, '}') - } - } - } - - return buf, nil -} - -func (src VarcharArray) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - switch src.Status { - case Null: - return nil, nil - case Undefined: - return nil, errUndefined - } - - arrayHeader := ArrayHeader{ - Dimensions: src.Dimensions, - } - - if dt, ok := ci.DataTypeForName("varchar"); ok { - arrayHeader.ElementOID = int32(dt.OID) - } else { - return nil, fmt.Errorf("unable to find oid for type name %v", "varchar") - } - - for i := range src.Elements { - if src.Elements[i].Status == Null { - arrayHeader.ContainsNull = true - break - } - } - - buf = arrayHeader.EncodeBinary(ci, buf) - - for i := range src.Elements { - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - - elemBuf, err := src.Elements[i].EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if elemBuf != nil { - buf = elemBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - } - - return buf, nil -} - -// Scan implements the database/sql Scanner interface. -func (dst *VarcharArray) Scan(src interface{}) error { - if src == nil { - return dst.DecodeText(nil, nil) - } - - switch src := src.(type) { - case string: - return dst.DecodeText(nil, []byte(src)) - case []byte: - srcCopy := make([]byte, len(src)) - copy(srcCopy, src) - return dst.DecodeText(nil, srcCopy) - } - - return fmt.Errorf("cannot scan %T", src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src VarcharArray) Value() (driver.Value, error) { - buf, err := src.EncodeText(nil, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - - return string(buf), nil -} diff --git a/vendor/github.com/jackc/pgtype/xid.go b/vendor/github.com/jackc/pgtype/xid.go deleted file mode 100644 index f6d6b22d..00000000 --- a/vendor/github.com/jackc/pgtype/xid.go +++ /dev/null @@ -1,64 +0,0 @@ -package pgtype - -import ( - "database/sql/driver" -) - -// XID is PostgreSQL's Transaction ID type. -// -// In later versions of PostgreSQL, it is the type used for the backend_xid -// and backend_xmin columns of the pg_stat_activity system view. -// -// Also, when one does -// -// select xmin, xmax, * from some_table; -// -// it is the data type of the xmin and xmax hidden system columns. -// -// It is currently implemented as an unsigned four byte integer. -// Its definition can be found in src/include/postgres_ext.h as TransactionId -// in the PostgreSQL sources. -type XID pguint32 - -// Set converts from src to dst. Note that as XID is not a general -// number type Set does not do automatic type conversion as other number -// types do. -func (dst *XID) Set(src interface{}) error { - return (*pguint32)(dst).Set(src) -} - -func (dst XID) Get() interface{} { - return (pguint32)(dst).Get() -} - -// AssignTo assigns from src to dst. Note that as XID is not a general number -// type AssignTo does not do automatic type conversion as other number types do. -func (src *XID) AssignTo(dst interface{}) error { - return (*pguint32)(src).AssignTo(dst) -} - -func (dst *XID) DecodeText(ci *ConnInfo, src []byte) error { - return (*pguint32)(dst).DecodeText(ci, src) -} - -func (dst *XID) DecodeBinary(ci *ConnInfo, src []byte) error { - return (*pguint32)(dst).DecodeBinary(ci, src) -} - -func (src XID) EncodeText(ci *ConnInfo, buf []byte) ([]byte, error) { - return (pguint32)(src).EncodeText(ci, buf) -} - -func (src XID) EncodeBinary(ci *ConnInfo, buf []byte) ([]byte, error) { - return (pguint32)(src).EncodeBinary(ci, buf) -} - -// Scan implements the database/sql Scanner interface. -func (dst *XID) Scan(src interface{}) error { - return (*pguint32)(dst).Scan(src) -} - -// Value implements the database/sql/driver Valuer interface. -func (src XID) Value() (driver.Value, error) { - return (pguint32)(src).Value() -} diff --git a/vendor/github.com/jackc/pgx/v4/CHANGELOG.md b/vendor/github.com/jackc/pgx/v4/CHANGELOG.md deleted file mode 100644 index 17d29ccc..00000000 --- a/vendor/github.com/jackc/pgx/v4/CHANGELOG.md +++ /dev/null @@ -1,304 +0,0 @@ -# 4.18.2 (March 4, 2024) - -Fix CVE-2024-27289 - -SQL injection can occur when all of the following conditions are met: - -1. The non-default simple protocol is used. -2. A placeholder for a numeric value must be immediately preceded by a minus. -3. There must be a second placeholder for a string value after the first placeholder; both must be on the same line. -4. Both parameter values must be user-controlled. - -Thanks to Paul Gerste for reporting this issue. - -Fix CVE-2024-27304 - -SQL injection can occur if an attacker can cause a single query or bind message to exceed 4 GB in size. An integer -overflow in the calculated message size can cause the one large message to be sent as multiple messages under the -attacker's control. - -Thanks to Paul Gerste for reporting this issue. - -* Fix *dbTx.Exec not checking if it is already closed - -# 4.18.1 (February 27, 2023) - -* Fix: Support pgx v4 and v5 stdlib in same program (Tomáš Procházka) - -# 4.18.0 (February 11, 2023) - -* Upgrade pgconn to v1.14.0 -* Upgrade pgproto3 to v2.3.2 -* Upgrade pgtype to v1.14.0 -* Fix query sanitizer when query text contains Unicode replacement character -* Fix context with value in BeforeConnect (David Harju) -* Support pgx v4 and v5 stdlib in same program (Vitalii Solodilov) - -# 4.17.2 (September 3, 2022) - -* Fix panic when logging batch error (Tom Möller) - -# 4.17.1 (August 27, 2022) - -* Upgrade puddle to v1.3.0 - fixes context failing to cancel Acquire when acquire is creating resource which was introduced in v4.17.0 (James Hartig) -* Fix atomic alignment on 32-bit platforms - -# 4.17.0 (August 6, 2022) - -* Upgrade pgconn to v1.13.0 -* Upgrade pgproto3 to v2.3.1 -* Upgrade pgtype to v1.12.0 -* Allow background pool connections to continue even if cause is canceled (James Hartig) -* Add LoggerFunc (Gabor Szabad) -* pgxpool: health check should avoid going below minConns (James Hartig) -* Add pgxpool.Conn.Hijack() -* Logging improvements (Stepan Rabotkin) - -# 4.16.1 (May 7, 2022) - -* Upgrade pgconn to v1.12.1 -* Fix explicitly prepared statements with describe statement cache mode - -# 4.16.0 (April 21, 2022) - -* Upgrade pgconn to v1.12.0 -* Upgrade pgproto3 to v2.3.0 -* Upgrade pgtype to v1.11.0 -* Fix: Do not panic when context cancelled while getting statement from cache. -* Fix: Less memory pinning from old Rows. -* Fix: Support '\r' line ending when sanitizing SQL comment. -* Add pluggable GSSAPI support (Oliver Tan) - -# 4.15.0 (February 7, 2022) - -* Upgrade to pgconn v1.11.0 -* Upgrade to pgtype v1.10.0 -* Upgrade puddle to v1.2.1 -* Make BatchResults.Close safe to be called multiple times - -# 4.14.1 (November 28, 2021) - -* Upgrade pgtype to v1.9.1 (fixes unintentional change to timestamp binary decoding) -* Start pgxpool background health check after initial connections - -# 4.14.0 (November 20, 2021) - -* Upgrade pgconn to v1.10.1 -* Upgrade pgproto3 to v2.2.0 -* Upgrade pgtype to v1.9.0 -* Upgrade puddle to v1.2.0 -* Add QueryFunc to BatchResults -* Add context options to zerologadapter (Thomas Frössman) -* Add zerologadapter.NewContextLogger (urso) -* Eager initialize minpoolsize on connect (Daniel) -* Unpin memory used by large queries immediately after use - -# 4.13.0 (July 24, 2021) - -* Trimmed pseudo-dependencies in Go modules from other packages tests -* Upgrade pgconn -- context cancellation no longer will return a net.Error -* Support time durations for simple protocol (Michael Darr) - -# 4.12.0 (July 10, 2021) - -* ResetSession hook is called before a connection is reused from pool for another query (Dmytro Haranzha) -* stdlib: Add RandomizeHostOrderFunc (dkinder) -* stdlib: add OptionBeforeConnect (dkinder) -* stdlib: Do not reuse ConnConfig strings (Andrew Kimball) -* stdlib: implement Conn.ResetSession (Jonathan Amsterdam) -* Upgrade pgconn to v1.9.0 -* Upgrade pgtype to v1.8.0 - -# 4.11.0 (March 25, 2021) - -* Add BeforeConnect callback to pgxpool.Config (Robert Froehlich) -* Add Ping method to pgxpool.Conn (davidsbond) -* Added a kitlog level log adapter (Fabrice Aneche) -* Make ScanArgError public to allow identification of offending column (Pau Sanchez) -* Add *pgxpool.AcquireFunc -* Add BeginFunc and BeginTxFunc -* Add prefer_simple_protocol to connection string -* Add logging on CopyFrom (Patrick Hemmer) -* Add comment support when sanitizing SQL queries (Rusakow Andrew) -* Do not panic on double close of pgxpool.Pool (Matt Schultz) -* Avoid panic on SendBatch on closed Tx (Matt Schultz) -* Update pgconn to v1.8.1 -* Update pgtype to v1.7.0 - -# 4.10.1 (December 19, 2020) - -* Fix panic on Query error with nil stmtcache. - -# 4.10.0 (December 3, 2020) - -* Add CopyFromSlice to simplify CopyFrom usage (Egon Elbre) -* Remove broken prepared statements from stmtcache (Ethan Pailes) -* stdlib: consider any Ping error as fatal -* Update puddle to v1.1.3 - this fixes an issue where concurrent Acquires can hang when a connection cannot be established -* Update pgtype to v1.6.2 - -# 4.9.2 (November 3, 2020) - -The underlying library updates fix an issue where appending to a scanned slice could corrupt other data. - -* Update pgconn to v1.7.2 -* Update pgproto3 to v2.0.6 - -# 4.9.1 (October 31, 2020) - -* Update pgconn to v1.7.1 -* Update pgtype to v1.6.1 -* Fix SendBatch of all prepared statements with statement cache disabled - -# 4.9.0 (September 26, 2020) - -* pgxpool now waits for connection cleanup to finish before making room in pool for another connection. This prevents temporarily exceeding max pool size. -* Fix when scanning a column to nil to skip it on the first row but scanning it to a real value on a subsequent row. -* Fix prefer simple protocol with prepared statements. (Jinzhu) -* Fix FieldDescriptions not being available on Rows before calling Next the first time. -* Various minor fixes in updated versions of pgconn, pgtype, and puddle. - -# 4.8.1 (July 29, 2020) - -* Update pgconn to v1.6.4 - * Fix deadlock on error after CommandComplete but before ReadyForQuery - * Fix panic on parsing DSN with trailing '=' - -# 4.8.0 (July 22, 2020) - -* All argument types supported by native pgx should now also work through database/sql -* Update pgconn to v1.6.3 -* Update pgtype to v1.4.2 - -# 4.7.2 (July 14, 2020) - -* Improve performance of Columns() (zikaeroh) -* Fix fatal Commit() failure not being considered fatal -* Update pgconn to v1.6.2 -* Update pgtype to v1.4.1 - -# 4.7.1 (June 29, 2020) - -* Fix stdlib decoding error with certain order and combination of fields - -# 4.7.0 (June 27, 2020) - -* Update pgtype to v1.4.0 -* Update pgconn to v1.6.1 -* Update puddle to v1.1.1 -* Fix context propagation with Tx commit and Rollback (georgysavva) -* Add lazy connect option to pgxpool (georgysavva) -* Fix connection leak if pgxpool.BeginTx() fail (Jean-Baptiste Bronisz) -* Add native Go slice support for strings and numbers to simple protocol -* stdlib add default timeouts for Conn.Close() and Stmt.Close() (georgysavva) -* Assorted performance improvements especially with large result sets -* Fix close pool on not lazy connect failure (Yegor Myskin) -* Add Config copy (georgysavva) -* Support SendBatch with Simple Protocol (Jordan Lewis) -* Better error logging on rows close (Igor V. Kozinov) -* Expose stdlib.Conn.Conn() to enable database/sql.Conn.Raw() -* Improve unknown type support for database/sql -* Fix transaction commit failure closing connection - -# 4.6.0 (March 30, 2020) - -* stdlib: Bail early if preloading rows.Next() results in rows.Err() (Bas van Beek) -* Sanitize time to microsecond accuracy (Andrew Nicoll) -* Update pgtype to v1.3.0 -* Update pgconn to v1.5.0 - * Update golang.org/x/crypto for security fix - * Implement "verify-ca" SSL mode - -# 4.5.0 (March 7, 2020) - -* Update to pgconn v1.4.0 - * Fixes QueryRow with empty SQL - * Adds PostgreSQL service file support -* Add Len() to *pgx.Batch (WGH) -* Better logging for individual batch items (Ben Bader) - -# 4.4.1 (February 14, 2020) - -* Update pgconn to v1.3.2 - better default read buffer size -* Fix race in CopyFrom - -# 4.4.0 (February 5, 2020) - -* Update puddle to v1.1.0 - fixes possible deadlock when acquire is cancelled -* Update pgconn to v1.3.1 - fixes CopyFrom deadlock when multiple NoticeResponse received during copy -* Update pgtype to v1.2.0 -* Add MaxConnIdleTime to pgxpool (Patrick Ellul) -* Add MinConns to pgxpool (Patrick Ellul) -* Fix: stdlib.ReleaseConn closes connections left in invalid state - -# 4.3.0 (January 23, 2020) - -* Fix Rows.Values panic when unable to decode -* Add Rows.Values support for unknown types -* Add DriverContext support for stdlib (Alex Gaynor) -* Update pgproto3 to v2.0.1 to never return an io.EOF as it would be misinterpreted by database/sql. Instead return io.UnexpectedEOF. - -# 4.2.1 (January 13, 2020) - -* Update pgconn to v1.2.1 (fixes context cancellation data race introduced in v1.2.0)) - -# 4.2.0 (January 11, 2020) - -* Update pgconn to v1.2.0. -* Update pgtype to v1.1.0. -* Return error instead of panic when wrong number of arguments passed to Exec. (malstoun) -* Fix large objects functionality when PreferSimpleProtocol = true. -* Restore GetDefaultDriver which existed in v3. (Johan Brandhorst) -* Add RegisterConnConfig to stdlib which replaces the removed RegisterDriverConfig from v3. - -# 4.1.2 (October 22, 2019) - -* Fix dbSavepoint.Begin recursive self call -* Upgrade pgtype to v1.0.2 - fix scan pointer to pointer - -# 4.1.1 (October 21, 2019) - -* Fix pgxpool Rows.CommandTag() infinite loop / typo - -# 4.1.0 (October 12, 2019) - -## Potentially Breaking Changes - -Technically, two changes are breaking changes, but in practice these are extremely unlikely to break existing code. - -* Conn.Begin and Conn.BeginTx return a Tx interface instead of the internal dbTx struct. This is necessary for the Conn.Begin method to signature as other methods that begin a transaction. -* Add Conn() to Tx interface. This is necessary to allow code using a Tx to access the *Conn (and pgconn.PgConn) on which the Tx is executing. - -## Fixes - -* Releasing a busy connection closes the connection instead of returning an unusable connection to the pool -* Do not mutate config.Config.OnNotification in connect - -# 4.0.1 (September 19, 2019) - -* Fix statement cache cleanup. -* Corrected daterange OID. -* Fix Tx when committing or rolling back multiple times in certain cases. -* Improve documentation. - -# 4.0.0 (September 14, 2019) - -v4 is a major release with many significant changes some of which are breaking changes. The most significant are -included below. - -* Simplified establishing a connection with a connection string. -* All potentially blocking operations now require a context.Context. The non-context aware functions have been removed. -* OIDs are hard-coded for known types. This saves the query on connection. -* Context cancellations while network activity is in progress is now always fatal. Previously, it was sometimes recoverable. This led to increased complexity in pgx itself and in application code. -* Go modules are required. -* Errors are now implemented in the Go 1.13 style. -* `Rows` and `Tx` are now interfaces. -* The connection pool as been decoupled from pgx and is now a separate, included package (github.com/jackc/pgx/v4/pgxpool). -* pgtype has been spun off to a separate package (github.com/jackc/pgtype). -* pgproto3 has been spun off to a separate package (github.com/jackc/pgproto3/v2). -* Logical replication support has been spun off to a separate package (github.com/jackc/pglogrepl). -* Lower level PostgreSQL functionality is now implemented in a separate package (github.com/jackc/pgconn). -* Tests are now configured with environment variables. -* Conn has an automatic statement cache by default. -* Batch interface has been simplified. -* QueryArgs has been removed. diff --git a/vendor/github.com/jackc/pgx/v4/LICENSE b/vendor/github.com/jackc/pgx/v4/LICENSE deleted file mode 100644 index 5c486c39..00000000 --- a/vendor/github.com/jackc/pgx/v4/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2013-2021 Jack Christensen - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/jackc/pgx/v4/README.md b/vendor/github.com/jackc/pgx/v4/README.md deleted file mode 100644 index 46b9c559..00000000 --- a/vendor/github.com/jackc/pgx/v4/README.md +++ /dev/null @@ -1,196 +0,0 @@ -[![](https://godoc.org/github.com/jackc/pgx?status.svg)](https://pkg.go.dev/github.com/jackc/pgx/v4) -[![Build Status](https://travis-ci.org/jackc/pgx.svg)](https://travis-ci.org/jackc/pgx) - ---- - -This is the previous stable `v4` release. `v5` been released. - ---- -# pgx - PostgreSQL Driver and Toolkit - -pgx is a pure Go driver and toolkit for PostgreSQL. - -pgx aims to be low-level, fast, and performant, while also enabling PostgreSQL-specific features that the standard `database/sql` package does not allow for. - -The driver component of pgx can be used alongside the standard `database/sql` package. - -The toolkit component is a related set of packages that implement PostgreSQL functionality such as parsing the wire protocol -and type mapping between PostgreSQL and Go. These underlying packages can be used to implement alternative drivers, -proxies, load balancers, logical replication clients, etc. - -The current release of `pgx v4` requires Go modules. To use the previous version, checkout and vendor the `v3` branch. - -## Example Usage - -```go -package main - -import ( - "context" - "fmt" - "os" - - "github.com/jackc/pgx/v4" -) - -func main() { - // urlExample := "postgres://username:password@localhost:5432/database_name" - conn, err := pgx.Connect(context.Background(), os.Getenv("DATABASE_URL")) - if err != nil { - fmt.Fprintf(os.Stderr, "Unable to connect to database: %v\n", err) - os.Exit(1) - } - defer conn.Close(context.Background()) - - var name string - var weight int64 - err = conn.QueryRow(context.Background(), "select name, weight from widgets where id=$1", 42).Scan(&name, &weight) - if err != nil { - fmt.Fprintf(os.Stderr, "QueryRow failed: %v\n", err) - os.Exit(1) - } - - fmt.Println(name, weight) -} -``` - -See the [getting started guide](https://github.com/jackc/pgx/wiki/Getting-started-with-pgx) for more information. - -## Choosing Between the pgx and database/sql Interfaces - -It is recommended to use the pgx interface if: -1. The application only targets PostgreSQL. -2. No other libraries that require `database/sql` are in use. - -The pgx interface is faster and exposes more features. - -The `database/sql` interface only allows the underlying driver to return or receive the following types: `int64`, -`float64`, `bool`, `[]byte`, `string`, `time.Time`, or `nil`. Handling other types requires implementing the -`database/sql.Scanner` and the `database/sql/driver/driver.Valuer` interfaces which require transmission of values in text format. The binary format can be substantially faster, which is what the pgx interface uses. - -## Features - -pgx supports many features beyond what is available through `database/sql`: - -* Support for approximately 70 different PostgreSQL types -* Automatic statement preparation and caching -* Batch queries -* Single-round trip query mode -* Full TLS connection control -* Binary format support for custom types (allows for much quicker encoding/decoding) -* COPY protocol support for faster bulk data loads -* Extendable logging support including built-in support for `log15adapter`, [`logrus`](https://github.com/sirupsen/logrus), [`zap`](https://github.com/uber-go/zap), and [`zerolog`](https://github.com/rs/zerolog) -* Connection pool with after-connect hook for arbitrary connection setup -* Listen / notify -* Conversion of PostgreSQL arrays to Go slice mappings for integers, floats, and strings -* Hstore support -* JSON and JSONB support -* Maps `inet` and `cidr` PostgreSQL types to `net.IPNet` and `net.IP` -* Large object support -* NULL mapping to Null* struct or pointer to pointer -* Supports `database/sql.Scanner` and `database/sql/driver.Valuer` interfaces for custom types -* Notice response handling -* Simulated nested transactions with savepoints - -## Performance - -There are three areas in particular where pgx can provide a significant performance advantage over the standard -`database/sql` interface and other drivers: - -1. PostgreSQL specific types - Types such as arrays can be parsed much quicker because pgx uses the binary format. -2. Automatic statement preparation and caching - pgx will prepare and cache statements by default. This can provide an - significant free improvement to code that does not explicitly use prepared statements. Under certain workloads, it can - perform nearly 3x the number of queries per second. -3. Batched queries - Multiple queries can be batched together to minimize network round trips. - -## Testing - -pgx tests naturally require a PostgreSQL database. It will connect to the database specified in the `PGX_TEST_DATABASE` environment -variable. The `PGX_TEST_DATABASE` environment variable can either be a URL or DSN. In addition, the standard `PG*` environment -variables will be respected. Consider using [direnv](https://github.com/direnv/direnv) to simplify environment variable -handling. - -### Example Test Environment - -Connect to your PostgreSQL server and run: - -``` -create database pgx_test; -``` - -Connect to the newly-created database and run: - -``` -create domain uint64 as numeric(20,0); -``` - -Now, you can run the tests: - -``` -PGX_TEST_DATABASE="host=/var/run/postgresql database=pgx_test" go test ./... -``` - -In addition, there are tests specific for PgBouncer that will be executed if `PGX_TEST_PGBOUNCER_CONN_STRING` is set. - -## Supported Go and PostgreSQL Versions - -pgx supports the same versions of Go and PostgreSQL that are supported by their respective teams. For [Go](https://golang.org/doc/devel/release.html#policy) that is the two most recent major releases and for [PostgreSQL](https://www.postgresql.org/support/versioning/) the major releases in the last 5 years. This means pgx supports Go 1.17 and higher and PostgreSQL 10 and higher. pgx also is tested against the latest version of [CockroachDB](https://www.cockroachlabs.com/product/). - -## Version Policy - -pgx follows semantic versioning for the documented public API on stable releases. `v4` is the latest stable major version. - -## PGX Family Libraries - -pgx is the head of a family of PostgreSQL libraries. Many of these can be used independently. Many can also be accessed -from pgx for lower-level control. - -### [github.com/jackc/pgconn](https://github.com/jackc/pgconn) - -`pgconn` is a lower-level PostgreSQL database driver that operates at nearly the same level as the C library `libpq`. - -### [github.com/jackc/pgx/v4/pgxpool](https://github.com/jackc/pgx/tree/master/pgxpool) - -`pgxpool` is a connection pool for pgx. pgx is entirely decoupled from its default pool implementation. This means that pgx can be used with a different pool or without any pool at all. - -### [github.com/jackc/pgx/v4/stdlib](https://github.com/jackc/pgx/tree/master/stdlib) - -This is a `database/sql` compatibility layer for pgx. pgx can be used as a normal `database/sql` driver, but at any time, the native interface can be acquired for more performance or PostgreSQL specific functionality. - -### [github.com/jackc/pgtype](https://github.com/jackc/pgtype) - -Over 70 PostgreSQL types are supported including `uuid`, `hstore`, `json`, `bytea`, `numeric`, `interval`, `inet`, and arrays. These types support `database/sql` interfaces and are usable outside of pgx. They are fully tested in pgx and pq. They also support a higher performance interface when used with the pgx driver. - -### [github.com/jackc/pgproto3](https://github.com/jackc/pgproto3) - -pgproto3 provides standalone encoding and decoding of the PostgreSQL v3 wire protocol. This is useful for implementing very low level PostgreSQL tooling. - -### [github.com/jackc/pglogrepl](https://github.com/jackc/pglogrepl) - -pglogrepl provides functionality to act as a client for PostgreSQL logical replication. - -### [github.com/jackc/pgmock](https://github.com/jackc/pgmock) - -pgmock offers the ability to create a server that mocks the PostgreSQL wire protocol. This is used internally to test pgx by purposely inducing unusual errors. pgproto3 and pgmock together provide most of the foundational tooling required to implement a PostgreSQL proxy or MitM (such as for a custom connection pooler). - -### [github.com/jackc/tern](https://github.com/jackc/tern) - -tern is a stand-alone SQL migration system. - -### [github.com/jackc/pgerrcode](https://github.com/jackc/pgerrcode) - -pgerrcode contains constants for the PostgreSQL error codes. - -## 3rd Party Libraries with PGX Support - -### [github.com/georgysavva/scany](https://github.com/georgysavva/scany) - -Library for scanning data from a database into Go structs and more. - -### [https://github.com/otan/gopgkrb5](https://github.com/otan/gopgkrb5) - -Adds GSSAPI / Kerberos authentication support. - -### [https://github.com/vgarvardt/pgx-google-uuid](https://github.com/vgarvardt/pgx-google-uuid) - -Adds support for [`github.com/google/uuid`](https://github.com/google/uuid). diff --git a/vendor/github.com/jackc/pgx/v4/batch.go b/vendor/github.com/jackc/pgx/v4/batch.go deleted file mode 100644 index 7f86ad5c..00000000 --- a/vendor/github.com/jackc/pgx/v4/batch.go +++ /dev/null @@ -1,228 +0,0 @@ -package pgx - -import ( - "context" - "errors" - "fmt" - - "github.com/jackc/pgconn" -) - -type batchItem struct { - query string - arguments []interface{} -} - -// Batch queries are a way of bundling multiple queries together to avoid -// unnecessary network round trips. -type Batch struct { - items []*batchItem -} - -// Queue queues a query to batch b. query can be an SQL query or the name of a prepared statement. -func (b *Batch) Queue(query string, arguments ...interface{}) { - b.items = append(b.items, &batchItem{ - query: query, - arguments: arguments, - }) -} - -// Len returns number of queries that have been queued so far. -func (b *Batch) Len() int { - return len(b.items) -} - -type BatchResults interface { - // Exec reads the results from the next query in the batch as if the query has been sent with Conn.Exec. - Exec() (pgconn.CommandTag, error) - - // Query reads the results from the next query in the batch as if the query has been sent with Conn.Query. - Query() (Rows, error) - - // QueryRow reads the results from the next query in the batch as if the query has been sent with Conn.QueryRow. - QueryRow() Row - - // QueryFunc reads the results from the next query in the batch as if the query has been sent with Conn.QueryFunc. - QueryFunc(scans []interface{}, f func(QueryFuncRow) error) (pgconn.CommandTag, error) - - // Close closes the batch operation. This must be called before the underlying connection can be used again. Any error - // that occurred during a batch operation may have made it impossible to resyncronize the connection with the server. - // In this case the underlying connection will have been closed. Close is safe to call multiple times. - Close() error -} - -type batchResults struct { - ctx context.Context - conn *Conn - mrr *pgconn.MultiResultReader - err error - b *Batch - ix int - closed bool -} - -// Exec reads the results from the next query in the batch as if the query has been sent with Exec. -func (br *batchResults) Exec() (pgconn.CommandTag, error) { - if br.err != nil { - return nil, br.err - } - if br.closed { - return nil, fmt.Errorf("batch already closed") - } - - query, arguments, _ := br.nextQueryAndArgs() - - if !br.mrr.NextResult() { - err := br.mrr.Close() - if err == nil { - err = errors.New("no result") - } - if br.conn.shouldLog(LogLevelError) { - br.conn.log(br.ctx, LogLevelError, "BatchResult.Exec", map[string]interface{}{ - "sql": query, - "args": logQueryArgs(arguments), - "err": err, - }) - } - return nil, err - } - - commandTag, err := br.mrr.ResultReader().Close() - - if err != nil { - if br.conn.shouldLog(LogLevelError) { - br.conn.log(br.ctx, LogLevelError, "BatchResult.Exec", map[string]interface{}{ - "sql": query, - "args": logQueryArgs(arguments), - "err": err, - }) - } - } else if br.conn.shouldLog(LogLevelInfo) { - br.conn.log(br.ctx, LogLevelInfo, "BatchResult.Exec", map[string]interface{}{ - "sql": query, - "args": logQueryArgs(arguments), - "commandTag": commandTag, - }) - } - - return commandTag, err -} - -// Query reads the results from the next query in the batch as if the query has been sent with Query. -func (br *batchResults) Query() (Rows, error) { - query, arguments, ok := br.nextQueryAndArgs() - if !ok { - query = "batch query" - } - - if br.err != nil { - return &connRows{err: br.err, closed: true}, br.err - } - - if br.closed { - alreadyClosedErr := fmt.Errorf("batch already closed") - return &connRows{err: alreadyClosedErr, closed: true}, alreadyClosedErr - } - - rows := br.conn.getRows(br.ctx, query, arguments) - - if !br.mrr.NextResult() { - rows.err = br.mrr.Close() - if rows.err == nil { - rows.err = errors.New("no result") - } - rows.closed = true - - if br.conn.shouldLog(LogLevelError) { - br.conn.log(br.ctx, LogLevelError, "BatchResult.Query", map[string]interface{}{ - "sql": query, - "args": logQueryArgs(arguments), - "err": rows.err, - }) - } - - return rows, rows.err - } - - rows.resultReader = br.mrr.ResultReader() - return rows, nil -} - -// QueryFunc reads the results from the next query in the batch as if the query has been sent with Conn.QueryFunc. -func (br *batchResults) QueryFunc(scans []interface{}, f func(QueryFuncRow) error) (pgconn.CommandTag, error) { - if br.closed { - return nil, fmt.Errorf("batch already closed") - } - - rows, err := br.Query() - if err != nil { - return nil, err - } - defer rows.Close() - - for rows.Next() { - err = rows.Scan(scans...) - if err != nil { - return nil, err - } - - err = f(rows) - if err != nil { - return nil, err - } - } - - if err := rows.Err(); err != nil { - return nil, err - } - - return rows.CommandTag(), nil -} - -// QueryRow reads the results from the next query in the batch as if the query has been sent with QueryRow. -func (br *batchResults) QueryRow() Row { - rows, _ := br.Query() - return (*connRow)(rows.(*connRows)) - -} - -// Close closes the batch operation. Any error that occurred during a batch operation may have made it impossible to -// resyncronize the connection with the server. In this case the underlying connection will have been closed. -func (br *batchResults) Close() error { - if br.err != nil { - return br.err - } - - if br.closed { - return nil - } - br.closed = true - - // log any queries that haven't yet been logged by Exec or Query - for { - query, args, ok := br.nextQueryAndArgs() - if !ok { - break - } - - if br.conn.shouldLog(LogLevelInfo) { - br.conn.log(br.ctx, LogLevelInfo, "BatchResult.Close", map[string]interface{}{ - "sql": query, - "args": logQueryArgs(args), - }) - } - } - - return br.mrr.Close() -} - -func (br *batchResults) nextQueryAndArgs() (query string, args []interface{}, ok bool) { - if br.b != nil && br.ix < len(br.b.items) { - bi := br.b.items[br.ix] - query = bi.query - args = bi.arguments - ok = true - br.ix++ - } - return -} diff --git a/vendor/github.com/jackc/pgx/v4/conn.go b/vendor/github.com/jackc/pgx/v4/conn.go deleted file mode 100644 index 6f83f497..00000000 --- a/vendor/github.com/jackc/pgx/v4/conn.go +++ /dev/null @@ -1,857 +0,0 @@ -package pgx - -import ( - "context" - "errors" - "fmt" - "strconv" - "strings" - "time" - - "github.com/jackc/pgconn" - "github.com/jackc/pgconn/stmtcache" - "github.com/jackc/pgproto3/v2" - "github.com/jackc/pgtype" - "github.com/jackc/pgx/v4/internal/sanitize" -) - -// ConnConfig contains all the options used to establish a connection. It must be created by ParseConfig and -// then it can be modified. A manually initialized ConnConfig will cause ConnectConfig to panic. -type ConnConfig struct { - pgconn.Config - Logger Logger - LogLevel LogLevel - - // Original connection string that was parsed into config. - connString string - - // BuildStatementCache creates the stmtcache.Cache implementation for connections created with this config. Set - // to nil to disable automatic prepared statements. - BuildStatementCache BuildStatementCacheFunc - - // PreferSimpleProtocol disables implicit prepared statement usage. By default pgx automatically uses the extended - // protocol. This can improve performance due to being able to use the binary format. It also does not rely on client - // side parameter sanitization. However, it does incur two round-trips per query (unless using a prepared statement) - // and may be incompatible proxies such as PGBouncer. Setting PreferSimpleProtocol causes the simple protocol to be - // used by default. The same functionality can be controlled on a per query basis by setting - // QueryExOptions.SimpleProtocol. - PreferSimpleProtocol bool - - createdByParseConfig bool // Used to enforce created by ParseConfig rule. -} - -// Copy returns a deep copy of the config that is safe to use and modify. -// The only exception is the tls.Config: -// according to the tls.Config docs it must not be modified after creation. -func (cc *ConnConfig) Copy() *ConnConfig { - newConfig := new(ConnConfig) - *newConfig = *cc - newConfig.Config = *newConfig.Config.Copy() - return newConfig -} - -// ConnString returns the connection string as parsed by pgx.ParseConfig into pgx.ConnConfig. -func (cc *ConnConfig) ConnString() string { return cc.connString } - -// BuildStatementCacheFunc is a function that can be used to create a stmtcache.Cache implementation for connection. -type BuildStatementCacheFunc func(conn *pgconn.PgConn) stmtcache.Cache - -// Conn is a PostgreSQL connection handle. It is not safe for concurrent usage. Use a connection pool to manage access -// to multiple database connections from multiple goroutines. -type Conn struct { - pgConn *pgconn.PgConn - config *ConnConfig // config used when establishing this connection - preparedStatements map[string]*pgconn.StatementDescription - stmtcache stmtcache.Cache - logger Logger - logLevel LogLevel - - notifications []*pgconn.Notification - - doneChan chan struct{} - closedChan chan error - - connInfo *pgtype.ConnInfo - - wbuf []byte - eqb extendedQueryBuilder -} - -// Identifier a PostgreSQL identifier or name. Identifiers can be composed of -// multiple parts such as ["schema", "table"] or ["table", "column"]. -type Identifier []string - -// Sanitize returns a sanitized string safe for SQL interpolation. -func (ident Identifier) Sanitize() string { - parts := make([]string, len(ident)) - for i := range ident { - s := strings.ReplaceAll(ident[i], string([]byte{0}), "") - parts[i] = `"` + strings.ReplaceAll(s, `"`, `""`) + `"` - } - return strings.Join(parts, ".") -} - -// ErrNoRows occurs when rows are expected but none are returned. -var ErrNoRows = errors.New("no rows in result set") - -// ErrInvalidLogLevel occurs on attempt to set an invalid log level. -var ErrInvalidLogLevel = errors.New("invalid log level") - -// Connect establishes a connection with a PostgreSQL server with a connection string. See -// pgconn.Connect for details. -func Connect(ctx context.Context, connString string) (*Conn, error) { - connConfig, err := ParseConfig(connString) - if err != nil { - return nil, err - } - return connect(ctx, connConfig) -} - -// ConnectConfig establishes a connection with a PostgreSQL server with a configuration struct. -// connConfig must have been created by ParseConfig. -func ConnectConfig(ctx context.Context, connConfig *ConnConfig) (*Conn, error) { - return connect(ctx, connConfig) -} - -// ParseConfig creates a ConnConfig from a connection string. ParseConfig handles all options that pgconn.ParseConfig -// does. In addition, it accepts the following options: -// -// statement_cache_capacity -// The maximum size of the automatic statement cache. Set to 0 to disable automatic statement caching. Default: 512. -// -// statement_cache_mode -// Possible values: "prepare" and "describe". "prepare" will create prepared statements on the PostgreSQL server. -// "describe" will use the anonymous prepared statement to describe a statement without creating a statement on the -// server. "describe" is primarily useful when the environment does not allow prepared statements such as when -// running a connection pooler like PgBouncer. Default: "prepare" -// -// prefer_simple_protocol -// Possible values: "true" and "false". Use the simple protocol instead of extended protocol. Default: false -func ParseConfig(connString string) (*ConnConfig, error) { - config, err := pgconn.ParseConfig(connString) - if err != nil { - return nil, err - } - - var buildStatementCache BuildStatementCacheFunc - statementCacheCapacity := 512 - statementCacheMode := stmtcache.ModePrepare - if s, ok := config.RuntimeParams["statement_cache_capacity"]; ok { - delete(config.RuntimeParams, "statement_cache_capacity") - n, err := strconv.ParseInt(s, 10, 32) - if err != nil { - return nil, fmt.Errorf("cannot parse statement_cache_capacity: %w", err) - } - statementCacheCapacity = int(n) - } - - if s, ok := config.RuntimeParams["statement_cache_mode"]; ok { - delete(config.RuntimeParams, "statement_cache_mode") - switch s { - case "prepare": - statementCacheMode = stmtcache.ModePrepare - case "describe": - statementCacheMode = stmtcache.ModeDescribe - default: - return nil, fmt.Errorf("invalid statement_cache_mod: %s", s) - } - } - - if statementCacheCapacity > 0 { - buildStatementCache = func(conn *pgconn.PgConn) stmtcache.Cache { - return stmtcache.New(conn, statementCacheMode, statementCacheCapacity) - } - } - - preferSimpleProtocol := false - if s, ok := config.RuntimeParams["prefer_simple_protocol"]; ok { - delete(config.RuntimeParams, "prefer_simple_protocol") - if b, err := strconv.ParseBool(s); err == nil { - preferSimpleProtocol = b - } else { - return nil, fmt.Errorf("invalid prefer_simple_protocol: %v", err) - } - } - - connConfig := &ConnConfig{ - Config: *config, - createdByParseConfig: true, - LogLevel: LogLevelInfo, - BuildStatementCache: buildStatementCache, - PreferSimpleProtocol: preferSimpleProtocol, - connString: connString, - } - - return connConfig, nil -} - -func connect(ctx context.Context, config *ConnConfig) (c *Conn, err error) { - // Default values are set in ParseConfig. Enforce initial creation by ParseConfig rather than setting defaults from - // zero values. - if !config.createdByParseConfig { - panic("config must be created by ParseConfig") - } - originalConfig := config - - // This isn't really a deep copy. But it is enough to avoid the config.Config.OnNotification mutation from affecting - // other connections with the same config. See https://github.com/jackc/pgx/issues/618. - { - configCopy := *config - config = &configCopy - } - - c = &Conn{ - config: originalConfig, - connInfo: pgtype.NewConnInfo(), - logLevel: config.LogLevel, - logger: config.Logger, - } - - // Only install pgx notification system if no other callback handler is present. - if config.Config.OnNotification == nil { - config.Config.OnNotification = c.bufferNotifications - } else { - if c.shouldLog(LogLevelDebug) { - c.log(ctx, LogLevelDebug, "pgx notification handler disabled by application supplied OnNotification", map[string]interface{}{"host": config.Config.Host}) - } - } - - if c.shouldLog(LogLevelInfo) { - c.log(ctx, LogLevelInfo, "Dialing PostgreSQL server", map[string]interface{}{"host": config.Config.Host}) - } - c.pgConn, err = pgconn.ConnectConfig(ctx, &config.Config) - if err != nil { - if c.shouldLog(LogLevelError) { - c.log(ctx, LogLevelError, "connect failed", map[string]interface{}{"err": err}) - } - return nil, err - } - - c.preparedStatements = make(map[string]*pgconn.StatementDescription) - c.doneChan = make(chan struct{}) - c.closedChan = make(chan error) - c.wbuf = make([]byte, 0, 1024) - - if c.config.BuildStatementCache != nil { - c.stmtcache = c.config.BuildStatementCache(c.pgConn) - } - - // Replication connections can't execute the queries to - // populate the c.PgTypes and c.pgsqlAfInet - if _, ok := config.Config.RuntimeParams["replication"]; ok { - return c, nil - } - - return c, nil -} - -// Close closes a connection. It is safe to call Close on a already closed -// connection. -func (c *Conn) Close(ctx context.Context) error { - if c.IsClosed() { - return nil - } - - err := c.pgConn.Close(ctx) - if c.shouldLog(LogLevelInfo) { - c.log(ctx, LogLevelInfo, "closed connection", nil) - } - return err -} - -// Prepare creates a prepared statement with name and sql. sql can contain placeholders -// for bound parameters. These placeholders are referenced positional as $1, $2, etc. -// -// Prepare is idempotent; i.e. it is safe to call Prepare multiple times with the same -// name and sql arguments. This allows a code path to Prepare and Query/Exec without -// concern for if the statement has already been prepared. -func (c *Conn) Prepare(ctx context.Context, name, sql string) (sd *pgconn.StatementDescription, err error) { - if name != "" { - var ok bool - if sd, ok = c.preparedStatements[name]; ok && sd.SQL == sql { - return sd, nil - } - } - - if c.shouldLog(LogLevelError) { - defer func() { - if err != nil { - c.log(ctx, LogLevelError, "Prepare failed", map[string]interface{}{"err": err, "name": name, "sql": sql}) - } - }() - } - - sd, err = c.pgConn.Prepare(ctx, name, sql, nil) - if err != nil { - return nil, err - } - - if name != "" { - c.preparedStatements[name] = sd - } - - return sd, nil -} - -// Deallocate released a prepared statement -func (c *Conn) Deallocate(ctx context.Context, name string) error { - delete(c.preparedStatements, name) - _, err := c.pgConn.Exec(ctx, "deallocate "+quoteIdentifier(name)).ReadAll() - return err -} - -func (c *Conn) bufferNotifications(_ *pgconn.PgConn, n *pgconn.Notification) { - c.notifications = append(c.notifications, n) -} - -// WaitForNotification waits for a PostgreSQL notification. It wraps the underlying pgconn notification system in a -// slightly more convenient form. -func (c *Conn) WaitForNotification(ctx context.Context) (*pgconn.Notification, error) { - var n *pgconn.Notification - - // Return already received notification immediately - if len(c.notifications) > 0 { - n = c.notifications[0] - c.notifications = c.notifications[1:] - return n, nil - } - - err := c.pgConn.WaitForNotification(ctx) - if len(c.notifications) > 0 { - n = c.notifications[0] - c.notifications = c.notifications[1:] - } - return n, err -} - -// IsClosed reports if the connection has been closed. -func (c *Conn) IsClosed() bool { - return c.pgConn.IsClosed() -} - -func (c *Conn) die(err error) { - if c.IsClosed() { - return - } - - ctx, cancel := context.WithCancel(context.Background()) - cancel() // force immediate hard cancel - c.pgConn.Close(ctx) -} - -func (c *Conn) shouldLog(lvl LogLevel) bool { - return c.logger != nil && c.logLevel >= lvl -} - -func (c *Conn) log(ctx context.Context, lvl LogLevel, msg string, data map[string]interface{}) { - if data == nil { - data = map[string]interface{}{} - } - if c.pgConn != nil && c.pgConn.PID() != 0 { - data["pid"] = c.pgConn.PID() - } - - c.logger.Log(ctx, lvl, msg, data) -} - -func quoteIdentifier(s string) string { - return `"` + strings.ReplaceAll(s, `"`, `""`) + `"` -} - -// Ping executes an empty sql statement against the *Conn -// If the sql returns without error, the database Ping is considered successful, otherwise, the error is returned. -func (c *Conn) Ping(ctx context.Context) error { - _, err := c.Exec(ctx, ";") - return err -} - -// PgConn returns the underlying *pgconn.PgConn. This is an escape hatch method that allows lower level access to the -// PostgreSQL connection than pgx exposes. -// -// It is strongly recommended that the connection be idle (no in-progress queries) before the underlying *pgconn.PgConn -// is used and the connection must be returned to the same state before any *pgx.Conn methods are again used. -func (c *Conn) PgConn() *pgconn.PgConn { return c.pgConn } - -// StatementCache returns the statement cache used for this connection. -func (c *Conn) StatementCache() stmtcache.Cache { return c.stmtcache } - -// ConnInfo returns the connection info used for this connection. -func (c *Conn) ConnInfo() *pgtype.ConnInfo { return c.connInfo } - -// Config returns a copy of config that was used to establish this connection. -func (c *Conn) Config() *ConnConfig { return c.config.Copy() } - -// Exec executes sql. sql can be either a prepared statement name or an SQL string. arguments should be referenced -// positionally from the sql string as $1, $2, etc. -func (c *Conn) Exec(ctx context.Context, sql string, arguments ...interface{}) (pgconn.CommandTag, error) { - startTime := time.Now() - - commandTag, err := c.exec(ctx, sql, arguments...) - if err != nil { - if c.shouldLog(LogLevelError) { - endTime := time.Now() - c.log(ctx, LogLevelError, "Exec", map[string]interface{}{"sql": sql, "args": logQueryArgs(arguments), "err": err, "time": endTime.Sub(startTime)}) - } - return commandTag, err - } - - if c.shouldLog(LogLevelInfo) { - endTime := time.Now() - c.log(ctx, LogLevelInfo, "Exec", map[string]interface{}{"sql": sql, "args": logQueryArgs(arguments), "time": endTime.Sub(startTime), "commandTag": commandTag}) - } - - return commandTag, err -} - -func (c *Conn) exec(ctx context.Context, sql string, arguments ...interface{}) (commandTag pgconn.CommandTag, err error) { - simpleProtocol := c.config.PreferSimpleProtocol - -optionLoop: - for len(arguments) > 0 { - switch arg := arguments[0].(type) { - case QuerySimpleProtocol: - simpleProtocol = bool(arg) - arguments = arguments[1:] - default: - break optionLoop - } - } - - if sd, ok := c.preparedStatements[sql]; ok { - return c.execPrepared(ctx, sd, arguments) - } - - if simpleProtocol { - return c.execSimpleProtocol(ctx, sql, arguments) - } - - if len(arguments) == 0 { - return c.execSimpleProtocol(ctx, sql, arguments) - } - - if c.stmtcache != nil { - sd, err := c.stmtcache.Get(ctx, sql) - if err != nil { - return nil, err - } - - if c.stmtcache.Mode() == stmtcache.ModeDescribe { - return c.execParams(ctx, sd, arguments) - } - return c.execPrepared(ctx, sd, arguments) - } - - sd, err := c.Prepare(ctx, "", sql) - if err != nil { - return nil, err - } - return c.execPrepared(ctx, sd, arguments) -} - -func (c *Conn) execSimpleProtocol(ctx context.Context, sql string, arguments []interface{}) (commandTag pgconn.CommandTag, err error) { - if len(arguments) > 0 { - sql, err = c.sanitizeForSimpleQuery(sql, arguments...) - if err != nil { - return nil, err - } - } - - mrr := c.pgConn.Exec(ctx, sql) - for mrr.NextResult() { - commandTag, err = mrr.ResultReader().Close() - } - err = mrr.Close() - return commandTag, err -} - -func (c *Conn) execParamsAndPreparedPrefix(sd *pgconn.StatementDescription, arguments []interface{}) error { - if len(sd.ParamOIDs) != len(arguments) { - return fmt.Errorf("expected %d arguments, got %d", len(sd.ParamOIDs), len(arguments)) - } - - c.eqb.Reset() - - args, err := convertDriverValuers(arguments) - if err != nil { - return err - } - - for i := range args { - err = c.eqb.AppendParam(c.connInfo, sd.ParamOIDs[i], args[i]) - if err != nil { - return err - } - } - - for i := range sd.Fields { - c.eqb.AppendResultFormat(c.ConnInfo().ResultFormatCodeForOID(sd.Fields[i].DataTypeOID)) - } - - return nil -} - -func (c *Conn) execParams(ctx context.Context, sd *pgconn.StatementDescription, arguments []interface{}) (pgconn.CommandTag, error) { - err := c.execParamsAndPreparedPrefix(sd, arguments) - if err != nil { - return nil, err - } - - result := c.pgConn.ExecParams(ctx, sd.SQL, c.eqb.paramValues, sd.ParamOIDs, c.eqb.paramFormats, c.eqb.resultFormats).Read() - c.eqb.Reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. - return result.CommandTag, result.Err -} - -func (c *Conn) execPrepared(ctx context.Context, sd *pgconn.StatementDescription, arguments []interface{}) (pgconn.CommandTag, error) { - err := c.execParamsAndPreparedPrefix(sd, arguments) - if err != nil { - return nil, err - } - - result := c.pgConn.ExecPrepared(ctx, sd.Name, c.eqb.paramValues, c.eqb.paramFormats, c.eqb.resultFormats).Read() - c.eqb.Reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. - return result.CommandTag, result.Err -} - -func (c *Conn) getRows(ctx context.Context, sql string, args []interface{}) *connRows { - r := &connRows{} - - r.ctx = ctx - r.logger = c - r.connInfo = c.connInfo - r.startTime = time.Now() - r.sql = sql - r.args = args - r.conn = c - - return r -} - -// QuerySimpleProtocol controls whether the simple or extended protocol is used to send the query. -type QuerySimpleProtocol bool - -// QueryResultFormats controls the result format (text=0, binary=1) of a query by result column position. -type QueryResultFormats []int16 - -// QueryResultFormatsByOID controls the result format (text=0, binary=1) of a query by the result column OID. -type QueryResultFormatsByOID map[uint32]int16 - -// Query sends a query to the server and returns a Rows to read the results. Only errors encountered sending the query -// and initializing Rows will be returned. Err() on the returned Rows must be checked after the Rows is closed to -// determine if the query executed successfully. -// -// The returned Rows must be closed before the connection can be used again. It is safe to attempt to read from the -// returned Rows even if an error is returned. The error will be the available in rows.Err() after rows are closed. It -// is allowed to ignore the error returned from Query and handle it in Rows. -// -// Err() on the returned Rows must be checked after the Rows is closed to determine if the query executed successfully -// as some errors can only be detected by reading the entire response. e.g. A divide by zero error on the last row. -// -// For extra control over how the query is executed, the types QuerySimpleProtocol, QueryResultFormats, and -// QueryResultFormatsByOID may be used as the first args to control exactly how the query is executed. This is rarely -// needed. See the documentation for those types for details. -func (c *Conn) Query(ctx context.Context, sql string, args ...interface{}) (Rows, error) { - var resultFormats QueryResultFormats - var resultFormatsByOID QueryResultFormatsByOID - simpleProtocol := c.config.PreferSimpleProtocol - -optionLoop: - for len(args) > 0 { - switch arg := args[0].(type) { - case QueryResultFormats: - resultFormats = arg - args = args[1:] - case QueryResultFormatsByOID: - resultFormatsByOID = arg - args = args[1:] - case QuerySimpleProtocol: - simpleProtocol = bool(arg) - args = args[1:] - default: - break optionLoop - } - } - - rows := c.getRows(ctx, sql, args) - - var err error - sd, ok := c.preparedStatements[sql] - - if simpleProtocol && !ok { - sql, err = c.sanitizeForSimpleQuery(sql, args...) - if err != nil { - rows.fatal(err) - return rows, err - } - - mrr := c.pgConn.Exec(ctx, sql) - if mrr.NextResult() { - rows.resultReader = mrr.ResultReader() - rows.multiResultReader = mrr - } else { - err = mrr.Close() - rows.fatal(err) - return rows, err - } - - return rows, nil - } - - c.eqb.Reset() - - if !ok { - if c.stmtcache != nil { - sd, err = c.stmtcache.Get(ctx, sql) - if err != nil { - rows.fatal(err) - return rows, rows.err - } - } else { - sd, err = c.pgConn.Prepare(ctx, "", sql, nil) - if err != nil { - rows.fatal(err) - return rows, rows.err - } - } - } - if len(sd.ParamOIDs) != len(args) { - rows.fatal(fmt.Errorf("expected %d arguments, got %d", len(sd.ParamOIDs), len(args))) - return rows, rows.err - } - - rows.sql = sd.SQL - - args, err = convertDriverValuers(args) - if err != nil { - rows.fatal(err) - return rows, rows.err - } - - for i := range args { - err = c.eqb.AppendParam(c.connInfo, sd.ParamOIDs[i], args[i]) - if err != nil { - rows.fatal(err) - return rows, rows.err - } - } - - if resultFormatsByOID != nil { - resultFormats = make([]int16, len(sd.Fields)) - for i := range resultFormats { - resultFormats[i] = resultFormatsByOID[uint32(sd.Fields[i].DataTypeOID)] - } - } - - if resultFormats == nil { - for i := range sd.Fields { - c.eqb.AppendResultFormat(c.ConnInfo().ResultFormatCodeForOID(sd.Fields[i].DataTypeOID)) - } - - resultFormats = c.eqb.resultFormats - } - - if c.stmtcache != nil && c.stmtcache.Mode() == stmtcache.ModeDescribe && !ok { - rows.resultReader = c.pgConn.ExecParams(ctx, sql, c.eqb.paramValues, sd.ParamOIDs, c.eqb.paramFormats, resultFormats) - } else { - rows.resultReader = c.pgConn.ExecPrepared(ctx, sd.Name, c.eqb.paramValues, c.eqb.paramFormats, resultFormats) - } - - c.eqb.Reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. - - return rows, rows.err -} - -// QueryRow is a convenience wrapper over Query. Any error that occurs while -// querying is deferred until calling Scan on the returned Row. That Row will -// error with ErrNoRows if no rows are returned. -func (c *Conn) QueryRow(ctx context.Context, sql string, args ...interface{}) Row { - rows, _ := c.Query(ctx, sql, args...) - return (*connRow)(rows.(*connRows)) -} - -// QueryFuncRow is the argument to the QueryFunc callback function. -// -// QueryFuncRow is an interface instead of a struct to allow tests to mock QueryFunc. However, adding a method to an -// interface is technically a breaking change. Because of this the QueryFuncRow interface is partially excluded from -// semantic version requirements. Methods will not be removed or changed, but new methods may be added. -type QueryFuncRow interface { - FieldDescriptions() []pgproto3.FieldDescription - - // RawValues returns the unparsed bytes of the row values. The returned [][]byte is only valid during the current - // function call. However, the underlying byte data is safe to retain a reference to and mutate. - RawValues() [][]byte -} - -// QueryFunc executes sql with args. For each row returned by the query the values will scanned into the elements of -// scans and f will be called. If any row fails to scan or f returns an error the query will be aborted and the error -// will be returned. -func (c *Conn) QueryFunc(ctx context.Context, sql string, args []interface{}, scans []interface{}, f func(QueryFuncRow) error) (pgconn.CommandTag, error) { - rows, err := c.Query(ctx, sql, args...) - if err != nil { - return nil, err - } - defer rows.Close() - - for rows.Next() { - err = rows.Scan(scans...) - if err != nil { - return nil, err - } - - err = f(rows) - if err != nil { - return nil, err - } - } - - if err := rows.Err(); err != nil { - return nil, err - } - - return rows.CommandTag(), nil -} - -// SendBatch sends all queued queries to the server at once. All queries are run in an implicit transaction unless -// explicit transaction control statements are executed. The returned BatchResults must be closed before the connection -// is used again. -func (c *Conn) SendBatch(ctx context.Context, b *Batch) BatchResults { - startTime := time.Now() - - simpleProtocol := c.config.PreferSimpleProtocol - var sb strings.Builder - if simpleProtocol { - for i, bi := range b.items { - if i > 0 { - sb.WriteByte(';') - } - sql, err := c.sanitizeForSimpleQuery(bi.query, bi.arguments...) - if err != nil { - return &batchResults{ctx: ctx, conn: c, err: err} - } - sb.WriteString(sql) - } - mrr := c.pgConn.Exec(ctx, sb.String()) - return &batchResults{ - ctx: ctx, - conn: c, - mrr: mrr, - b: b, - ix: 0, - } - } - - distinctUnpreparedQueries := map[string]struct{}{} - - for _, bi := range b.items { - if _, ok := c.preparedStatements[bi.query]; ok { - continue - } - distinctUnpreparedQueries[bi.query] = struct{}{} - } - - var stmtCache stmtcache.Cache - if len(distinctUnpreparedQueries) > 0 { - if c.stmtcache != nil && c.stmtcache.Cap() >= len(distinctUnpreparedQueries) { - stmtCache = c.stmtcache - } else { - stmtCache = stmtcache.New(c.pgConn, stmtcache.ModeDescribe, len(distinctUnpreparedQueries)) - } - - for sql, _ := range distinctUnpreparedQueries { - _, err := stmtCache.Get(ctx, sql) - if err != nil { - return &batchResults{ctx: ctx, conn: c, err: err} - } - } - } - - batch := &pgconn.Batch{} - - for _, bi := range b.items { - c.eqb.Reset() - - sd := c.preparedStatements[bi.query] - if sd == nil { - var err error - sd, err = stmtCache.Get(ctx, bi.query) - if err != nil { - return c.logBatchResults(ctx, startTime, &batchResults{ctx: ctx, conn: c, err: err}) - } - } - - if len(sd.ParamOIDs) != len(bi.arguments) { - return c.logBatchResults(ctx, startTime, &batchResults{ctx: ctx, conn: c, err: fmt.Errorf("mismatched param and argument count")}) - } - - args, err := convertDriverValuers(bi.arguments) - if err != nil { - return c.logBatchResults(ctx, startTime, &batchResults{ctx: ctx, conn: c, err: err}) - } - - for i := range args { - err = c.eqb.AppendParam(c.connInfo, sd.ParamOIDs[i], args[i]) - if err != nil { - return c.logBatchResults(ctx, startTime, &batchResults{ctx: ctx, conn: c, err: err}) - } - } - - for i := range sd.Fields { - c.eqb.AppendResultFormat(c.ConnInfo().ResultFormatCodeForOID(sd.Fields[i].DataTypeOID)) - } - - if sd.Name == "" { - batch.ExecParams(bi.query, c.eqb.paramValues, sd.ParamOIDs, c.eqb.paramFormats, c.eqb.resultFormats) - } else { - batch.ExecPrepared(sd.Name, c.eqb.paramValues, c.eqb.paramFormats, c.eqb.resultFormats) - } - } - - c.eqb.Reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. - - mrr := c.pgConn.ExecBatch(ctx, batch) - - return c.logBatchResults(ctx, startTime, &batchResults{ - ctx: ctx, - conn: c, - mrr: mrr, - b: b, - ix: 0, - }) -} - -func (c *Conn) logBatchResults(ctx context.Context, startTime time.Time, results *batchResults) BatchResults { - if results.err != nil { - if c.shouldLog(LogLevelError) { - endTime := time.Now() - c.log(ctx, LogLevelError, "SendBatch", map[string]interface{}{"err": results.err, "time": endTime.Sub(startTime)}) - } - return results - } - - if c.shouldLog(LogLevelInfo) { - endTime := time.Now() - c.log(ctx, LogLevelInfo, "SendBatch", map[string]interface{}{"batchLen": results.b.Len(), "time": endTime.Sub(startTime)}) - } - - return results -} - -func (c *Conn) sanitizeForSimpleQuery(sql string, args ...interface{}) (string, error) { - if c.pgConn.ParameterStatus("standard_conforming_strings") != "on" { - return "", errors.New("simple protocol queries must be run with standard_conforming_strings=on") - } - - if c.pgConn.ParameterStatus("client_encoding") != "UTF8" { - return "", errors.New("simple protocol queries must be run with client_encoding=UTF8") - } - - var err error - valueArgs := make([]interface{}, len(args)) - for i, a := range args { - valueArgs[i], err = convertSimpleArgument(c.connInfo, a) - if err != nil { - return "", err - } - } - - return sanitize.SanitizeSQL(sql, valueArgs...) -} diff --git a/vendor/github.com/jackc/pgx/v4/doc.go b/vendor/github.com/jackc/pgx/v4/doc.go deleted file mode 100644 index 222f9047..00000000 --- a/vendor/github.com/jackc/pgx/v4/doc.go +++ /dev/null @@ -1,340 +0,0 @@ -// Package pgx is a PostgreSQL database driver. -/* -pgx provides lower level access to PostgreSQL than the standard database/sql. It remains as similar to the database/sql -interface as possible while providing better speed and access to PostgreSQL specific features. Import -github.com/jackc/pgx/v4/stdlib to use pgx as a database/sql compatible driver. - -Establishing a Connection - -The primary way of establishing a connection is with `pgx.Connect`. - - conn, err := pgx.Connect(context.Background(), os.Getenv("DATABASE_URL")) - -The database connection string can be in URL or DSN format. Both PostgreSQL settings and pgx settings can be specified -here. In addition, a config struct can be created by `ParseConfig` and modified before establishing the connection with -`ConnectConfig`. - - config, err := pgx.ParseConfig(os.Getenv("DATABASE_URL")) - if err != nil { - // ... - } - config.Logger = log15adapter.NewLogger(log.New("module", "pgx")) - - conn, err := pgx.ConnectConfig(context.Background(), config) - -Connection Pool - -`*pgx.Conn` represents a single connection to the database and is not concurrency safe. Use sub-package pgxpool for a -concurrency safe connection pool. - -Query Interface - -pgx implements Query and Scan in the familiar database/sql style. - - var sum int32 - - // Send the query to the server. The returned rows MUST be closed - // before conn can be used again. - rows, err := conn.Query(context.Background(), "select generate_series(1,$1)", 10) - if err != nil { - return err - } - - // rows.Close is called by rows.Next when all rows are read - // or an error occurs in Next or Scan. So it may optionally be - // omitted if nothing in the rows.Next loop can panic. It is - // safe to close rows multiple times. - defer rows.Close() - - // Iterate through the result set - for rows.Next() { - var n int32 - err = rows.Scan(&n) - if err != nil { - return err - } - sum += n - } - - // Any errors encountered by rows.Next or rows.Scan will be returned here - if rows.Err() != nil { - return rows.Err() - } - - // No errors found - do something with sum - -pgx also implements QueryRow in the same style as database/sql. - - var name string - var weight int64 - err := conn.QueryRow(context.Background(), "select name, weight from widgets where id=$1", 42).Scan(&name, &weight) - if err != nil { - return err - } - -Use Exec to execute a query that does not return a result set. - - commandTag, err := conn.Exec(context.Background(), "delete from widgets where id=$1", 42) - if err != nil { - return err - } - if commandTag.RowsAffected() != 1 { - return errors.New("No row found to delete") - } - -QueryFunc can be used to execute a callback function for every row. This is often easier to use than Query. - - var sum, n int32 - _, err = conn.QueryFunc( - context.Background(), - "select generate_series(1,$1)", - []interface{}{10}, - []interface{}{&n}, - func(pgx.QueryFuncRow) error { - sum += n - return nil - }, - ) - if err != nil { - return err - } - -Base Type Mapping - -pgx maps between all common base types directly between Go and PostgreSQL. In particular: - - Go PostgreSQL - ----------------------- - string varchar - text - - // Integers are automatically be converted to any other integer type if - // it can be done without overflow or underflow. - int8 - int16 smallint - int32 int - int64 bigint - int - uint8 - uint16 - uint32 - uint64 - uint - - // Floats are strict and do not automatically convert like integers. - float32 float4 - float64 float8 - - time.Time date - timestamp - timestamptz - - []byte bytea - - -Null Mapping - -pgx can map nulls in two ways. The first is package pgtype provides types that have a data field and a status field. -They work in a similar fashion to database/sql. The second is to use a pointer to a pointer. - - var foo pgtype.Varchar - var bar *string - err := conn.QueryRow("select foo, bar from widgets where id=$1", 42).Scan(&foo, &bar) - if err != nil { - return err - } - -Array Mapping - -pgx maps between int16, int32, int64, float32, float64, and string Go slices and the equivalent PostgreSQL array type. -Go slices of native types do not support nulls, so if a PostgreSQL array that contains a null is read into a native Go -slice an error will occur. The pgtype package includes many more array types for PostgreSQL types that do not directly -map to native Go types. - -JSON and JSONB Mapping - -pgx includes built-in support to marshal and unmarshal between Go types and the PostgreSQL JSON and JSONB. - -Inet and CIDR Mapping - -pgx encodes from net.IPNet to and from inet and cidr PostgreSQL types. In addition, as a convenience pgx will encode -from a net.IP; it will assume a /32 netmask for IPv4 and a /128 for IPv6. - -Custom Type Support - -pgx includes support for the common data types like integers, floats, strings, dates, and times that have direct -mappings between Go and SQL. In addition, pgx uses the github.com/jackc/pgtype library to support more types. See -documention for that library for instructions on how to implement custom types. - -See example_custom_type_test.go for an example of a custom type for the PostgreSQL point type. - -pgx also includes support for custom types implementing the database/sql.Scanner and database/sql/driver.Valuer -interfaces. - -If pgx does cannot natively encode a type and that type is a renamed type (e.g. type MyTime time.Time) pgx will attempt -to encode the underlying type. While this is usually desired behavior it can produce surprising behavior if one the -underlying type and the renamed type each implement database/sql interfaces and the other implements pgx interfaces. It -is recommended that this situation be avoided by implementing pgx interfaces on the renamed type. - -Composite types and row values - -Row values and composite types are represented as pgtype.Record (https://pkg.go.dev/github.com/jackc/pgtype?tab=doc#Record). -It is possible to get values of your custom type by implementing DecodeBinary interface. Decoding into -pgtype.Record first can simplify process by avoiding dealing with raw protocol directly. - -For example: - - type MyType struct { - a int // NULL will cause decoding error - b *string // there can be NULL in this position in SQL - } - - func (t *MyType) DecodeBinary(ci *pgtype.ConnInfo, src []byte) error { - r := pgtype.Record{ - Fields: []pgtype.Value{&pgtype.Int4{}, &pgtype.Text{}}, - } - - if err := r.DecodeBinary(ci, src); err != nil { - return err - } - - if r.Status != pgtype.Present { - return errors.New("BUG: decoding should not be called on NULL value") - } - - a := r.Fields[0].(*pgtype.Int4) - b := r.Fields[1].(*pgtype.Text) - - // type compatibility is checked by AssignTo - // only lossless assignments will succeed - if err := a.AssignTo(&t.a); err != nil { - return err - } - - // AssignTo also deals with null value handling - if err := b.AssignTo(&t.b); err != nil { - return err - } - return nil - } - - result := MyType{} - err := conn.QueryRow(context.Background(), "select row(1, 'foo'::text)", pgx.QueryResultFormats{pgx.BinaryFormatCode}).Scan(&r) - -Raw Bytes Mapping - -[]byte passed as arguments to Query, QueryRow, and Exec are passed unmodified to PostgreSQL. - -Transactions - -Transactions are started by calling Begin. - - tx, err := conn.Begin(context.Background()) - if err != nil { - return err - } - // Rollback is safe to call even if the tx is already closed, so if - // the tx commits successfully, this is a no-op - defer tx.Rollback(context.Background()) - - _, err = tx.Exec(context.Background(), "insert into foo(id) values (1)") - if err != nil { - return err - } - - err = tx.Commit(context.Background()) - if err != nil { - return err - } - -The Tx returned from Begin also implements the Begin method. This can be used to implement pseudo nested transactions. -These are internally implemented with savepoints. - -Use BeginTx to control the transaction mode. - -BeginFunc and BeginTxFunc are variants that begin a transaction, execute a function, and commit or rollback the -transaction depending on the return value of the function. These can be simpler and less error prone to use. - - err = conn.BeginFunc(context.Background(), func(tx pgx.Tx) error { - _, err := tx.Exec(context.Background(), "insert into foo(id) values (1)") - return err - }) - if err != nil { - return err - } - -Prepared Statements - -Prepared statements can be manually created with the Prepare method. However, this is rarely necessary because pgx -includes an automatic statement cache by default. Queries run through the normal Query, QueryRow, and Exec functions are -automatically prepared on first execution and the prepared statement is reused on subsequent executions. See ParseConfig -for information on how to customize or disable the statement cache. - -Copy Protocol - -Use CopyFrom to efficiently insert multiple rows at a time using the PostgreSQL copy protocol. CopyFrom accepts a -CopyFromSource interface. If the data is already in a [][]interface{} use CopyFromRows to wrap it in a CopyFromSource -interface. Or implement CopyFromSource to avoid buffering the entire data set in memory. - - rows := [][]interface{}{ - {"John", "Smith", int32(36)}, - {"Jane", "Doe", int32(29)}, - } - - copyCount, err := conn.CopyFrom( - context.Background(), - pgx.Identifier{"people"}, - []string{"first_name", "last_name", "age"}, - pgx.CopyFromRows(rows), - ) - -When you already have a typed array using CopyFromSlice can be more convenient. - - rows := []User{ - {"John", "Smith", 36}, - {"Jane", "Doe", 29}, - } - - copyCount, err := conn.CopyFrom( - context.Background(), - pgx.Identifier{"people"}, - []string{"first_name", "last_name", "age"}, - pgx.CopyFromSlice(len(rows), func(i int) ([]interface{}, error) { - return []interface{}{rows[i].FirstName, rows[i].LastName, rows[i].Age}, nil - }), - ) - -CopyFrom can be faster than an insert with as few as 5 rows. - -Listen and Notify - -pgx can listen to the PostgreSQL notification system with the `Conn.WaitForNotification` method. It blocks until a -notification is received or the context is canceled. - - _, err := conn.Exec(context.Background(), "listen channelname") - if err != nil { - return nil - } - - if notification, err := conn.WaitForNotification(context.Background()); err != nil { - // do something with notification - } - - -Logging - -pgx defines a simple logger interface. Connections optionally accept a logger that satisfies this interface. Set -LogLevel to control logging verbosity. Adapters for github.com/inconshreveable/log15, github.com/sirupsen/logrus, -go.uber.org/zap, github.com/rs/zerolog, and the testing log are provided in the log directory. - -Lower Level PostgreSQL Functionality - -pgx is implemented on top of github.com/jackc/pgconn a lower level PostgreSQL driver. The Conn.PgConn() method can be -used to access this lower layer. - -PgBouncer - -pgx is compatible with PgBouncer in two modes. One is when the connection has a statement cache in "describe" mode. The -other is when the connection is using the simple protocol. This can be set with the PreferSimpleProtocol config option. -*/ -package pgx diff --git a/vendor/github.com/jackc/pgx/v4/extended_query_builder.go b/vendor/github.com/jackc/pgx/v4/extended_query_builder.go deleted file mode 100644 index d06f63fd..00000000 --- a/vendor/github.com/jackc/pgx/v4/extended_query_builder.go +++ /dev/null @@ -1,161 +0,0 @@ -package pgx - -import ( - "database/sql/driver" - "fmt" - "reflect" - - "github.com/jackc/pgtype" -) - -type extendedQueryBuilder struct { - paramValues [][]byte - paramValueBytes []byte - paramFormats []int16 - resultFormats []int16 -} - -func (eqb *extendedQueryBuilder) AppendParam(ci *pgtype.ConnInfo, oid uint32, arg interface{}) error { - f := chooseParameterFormatCode(ci, oid, arg) - eqb.paramFormats = append(eqb.paramFormats, f) - - v, err := eqb.encodeExtendedParamValue(ci, oid, f, arg) - if err != nil { - return err - } - eqb.paramValues = append(eqb.paramValues, v) - - return nil -} - -func (eqb *extendedQueryBuilder) AppendResultFormat(f int16) { - eqb.resultFormats = append(eqb.resultFormats, f) -} - -// Reset readies eqb to build another query. -func (eqb *extendedQueryBuilder) Reset() { - eqb.paramValues = eqb.paramValues[0:0] - eqb.paramValueBytes = eqb.paramValueBytes[0:0] - eqb.paramFormats = eqb.paramFormats[0:0] - eqb.resultFormats = eqb.resultFormats[0:0] - - if cap(eqb.paramValues) > 64 { - eqb.paramValues = make([][]byte, 0, 64) - } - - if cap(eqb.paramValueBytes) > 256 { - eqb.paramValueBytes = make([]byte, 0, 256) - } - - if cap(eqb.paramFormats) > 64 { - eqb.paramFormats = make([]int16, 0, 64) - } - if cap(eqb.resultFormats) > 64 { - eqb.resultFormats = make([]int16, 0, 64) - } -} - -func (eqb *extendedQueryBuilder) encodeExtendedParamValue(ci *pgtype.ConnInfo, oid uint32, formatCode int16, arg interface{}) ([]byte, error) { - if arg == nil { - return nil, nil - } - - refVal := reflect.ValueOf(arg) - argIsPtr := refVal.Kind() == reflect.Ptr - - if argIsPtr && refVal.IsNil() { - return nil, nil - } - - if eqb.paramValueBytes == nil { - eqb.paramValueBytes = make([]byte, 0, 128) - } - - var err error - var buf []byte - pos := len(eqb.paramValueBytes) - - if arg, ok := arg.(string); ok { - return []byte(arg), nil - } - - if formatCode == TextFormatCode { - if arg, ok := arg.(pgtype.TextEncoder); ok { - buf, err = arg.EncodeText(ci, eqb.paramValueBytes) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - eqb.paramValueBytes = buf - return eqb.paramValueBytes[pos:], nil - } - } else if formatCode == BinaryFormatCode { - if arg, ok := arg.(pgtype.BinaryEncoder); ok { - buf, err = arg.EncodeBinary(ci, eqb.paramValueBytes) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - eqb.paramValueBytes = buf - return eqb.paramValueBytes[pos:], nil - } - } - - if argIsPtr { - // We have already checked that arg is not pointing to nil, - // so it is safe to dereference here. - arg = refVal.Elem().Interface() - return eqb.encodeExtendedParamValue(ci, oid, formatCode, arg) - } - - if dt, ok := ci.DataTypeForOID(oid); ok { - value := dt.Value - err := value.Set(arg) - if err != nil { - { - if arg, ok := arg.(driver.Valuer); ok { - v, err := callValuerValue(arg) - if err != nil { - return nil, err - } - return eqb.encodeExtendedParamValue(ci, oid, formatCode, v) - } - } - - return nil, err - } - - return eqb.encodeExtendedParamValue(ci, oid, formatCode, value) - } - - // There is no data type registered for the destination OID, but maybe there is data type registered for the arg - // type. If so use it's text encoder (if available). - if dt, ok := ci.DataTypeForValue(arg); ok { - value := dt.Value - if textEncoder, ok := value.(pgtype.TextEncoder); ok { - err := value.Set(arg) - if err != nil { - return nil, err - } - - buf, err = textEncoder.EncodeText(ci, eqb.paramValueBytes) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - eqb.paramValueBytes = buf - return eqb.paramValueBytes[pos:], nil - } - } - - if strippedArg, ok := stripNamedType(&refVal); ok { - return eqb.encodeExtendedParamValue(ci, oid, formatCode, strippedArg) - } - return nil, SerializationError(fmt.Sprintf("Cannot encode %T into oid %v - %T must implement Encoder or be converted to a string", arg, oid, arg)) -} diff --git a/vendor/github.com/jackc/pgx/v4/go_stdlib.go b/vendor/github.com/jackc/pgx/v4/go_stdlib.go deleted file mode 100644 index 9372f9ef..00000000 --- a/vendor/github.com/jackc/pgx/v4/go_stdlib.go +++ /dev/null @@ -1,61 +0,0 @@ -package pgx - -import ( - "database/sql/driver" - "reflect" -) - -// This file contains code copied from the Go standard library due to the -// required function not being public. - -// Copyright (c) 2009 The Go Authors. All rights reserved. - -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: - -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. - -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// From database/sql/convert.go - -var valuerReflectType = reflect.TypeOf((*driver.Valuer)(nil)).Elem() - -// callValuerValue returns vr.Value(), with one exception: -// If vr.Value is an auto-generated method on a pointer type and the -// pointer is nil, it would panic at runtime in the panicwrap -// method. Treat it like nil instead. -// Issue 8415. -// -// This is so people can implement driver.Value on value types and -// still use nil pointers to those types to mean nil/NULL, just like -// string/*string. -// -// This function is mirrored in the database/sql/driver package. -func callValuerValue(vr driver.Valuer) (v driver.Value, err error) { - if rv := reflect.ValueOf(vr); rv.Kind() == reflect.Ptr && - rv.IsNil() && - rv.Type().Elem().Implements(valuerReflectType) { - return nil, nil - } - return vr.Value() -} diff --git a/vendor/github.com/jackc/pgx/v4/logger.go b/vendor/github.com/jackc/pgx/v4/logger.go deleted file mode 100644 index 41f8b7e8..00000000 --- a/vendor/github.com/jackc/pgx/v4/logger.go +++ /dev/null @@ -1,107 +0,0 @@ -package pgx - -import ( - "context" - "encoding/hex" - "errors" - "fmt" -) - -// The values for log levels are chosen such that the zero value means that no -// log level was specified. -const ( - LogLevelTrace = 6 - LogLevelDebug = 5 - LogLevelInfo = 4 - LogLevelWarn = 3 - LogLevelError = 2 - LogLevelNone = 1 -) - -// LogLevel represents the pgx logging level. See LogLevel* constants for -// possible values. -type LogLevel int - -func (ll LogLevel) String() string { - switch ll { - case LogLevelTrace: - return "trace" - case LogLevelDebug: - return "debug" - case LogLevelInfo: - return "info" - case LogLevelWarn: - return "warn" - case LogLevelError: - return "error" - case LogLevelNone: - return "none" - default: - return fmt.Sprintf("invalid level %d", ll) - } -} - -// Logger is the interface used to get logging from pgx internals. -type Logger interface { - // Log a message at the given level with data key/value pairs. data may be nil. - Log(ctx context.Context, level LogLevel, msg string, data map[string]interface{}) -} - -// LoggerFunc is a wrapper around a function to satisfy the pgx.Logger interface -type LoggerFunc func(ctx context.Context, level LogLevel, msg string, data map[string]interface{}) - -// Log delegates the logging request to the wrapped function -func (f LoggerFunc) Log(ctx context.Context, level LogLevel, msg string, data map[string]interface{}) { - f(ctx, level, msg, data) -} - -// LogLevelFromString converts log level string to constant -// -// Valid levels: -// -// trace -// debug -// info -// warn -// error -// none -func LogLevelFromString(s string) (LogLevel, error) { - switch s { - case "trace": - return LogLevelTrace, nil - case "debug": - return LogLevelDebug, nil - case "info": - return LogLevelInfo, nil - case "warn": - return LogLevelWarn, nil - case "error": - return LogLevelError, nil - case "none": - return LogLevelNone, nil - default: - return 0, errors.New("invalid log level") - } -} - -func logQueryArgs(args []interface{}) []interface{} { - logArgs := make([]interface{}, 0, len(args)) - - for _, a := range args { - switch v := a.(type) { - case []byte: - if len(v) < 64 { - a = hex.EncodeToString(v) - } else { - a = fmt.Sprintf("%x (truncated %d bytes)", v[:64], len(v)-64) - } - case string: - if len(v) > 64 { - a = fmt.Sprintf("%s (truncated %d bytes)", v[:64], len(v)-64) - } - } - logArgs = append(logArgs, a) - } - - return logArgs -} diff --git a/vendor/github.com/jackc/pgx/v4/messages.go b/vendor/github.com/jackc/pgx/v4/messages.go deleted file mode 100644 index 5324cbb5..00000000 --- a/vendor/github.com/jackc/pgx/v4/messages.go +++ /dev/null @@ -1,23 +0,0 @@ -package pgx - -import ( - "database/sql/driver" - - "github.com/jackc/pgtype" -) - -func convertDriverValuers(args []interface{}) ([]interface{}, error) { - for i, arg := range args { - switch arg := arg.(type) { - case pgtype.BinaryEncoder: - case pgtype.TextEncoder: - case driver.Valuer: - v, err := callValuerValue(arg) - if err != nil { - return nil, err - } - args[i] = v - } - } - return args, nil -} diff --git a/vendor/github.com/jackc/pgx/v4/rows.go b/vendor/github.com/jackc/pgx/v4/rows.go deleted file mode 100644 index 4749ead9..00000000 --- a/vendor/github.com/jackc/pgx/v4/rows.go +++ /dev/null @@ -1,351 +0,0 @@ -package pgx - -import ( - "context" - "errors" - "fmt" - "time" - - "github.com/jackc/pgconn" - "github.com/jackc/pgproto3/v2" - "github.com/jackc/pgtype" -) - -// Rows is the result set returned from *Conn.Query. Rows must be closed before -// the *Conn can be used again. Rows are closed by explicitly calling Close(), -// calling Next() until it returns false, or when a fatal error occurs. -// -// Once a Rows is closed the only methods that may be called are Close(), Err(), and CommandTag(). -// -// Rows is an interface instead of a struct to allow tests to mock Query. However, -// adding a method to an interface is technically a breaking change. Because of this -// the Rows interface is partially excluded from semantic version requirements. -// Methods will not be removed or changed, but new methods may be added. -type Rows interface { - // Close closes the rows, making the connection ready for use again. It is safe - // to call Close after rows is already closed. - Close() - - // Err returns any error that occurred while reading. - Err() error - - // CommandTag returns the command tag from this query. It is only available after Rows is closed. - CommandTag() pgconn.CommandTag - - FieldDescriptions() []pgproto3.FieldDescription - - // Next prepares the next row for reading. It returns true if there is another - // row and false if no more rows are available. It automatically closes rows - // when all rows are read. - Next() bool - - // Scan reads the values from the current row into dest values positionally. - // dest can include pointers to core types, values implementing the Scanner - // interface, and nil. nil will skip the value entirely. It is an error to - // call Scan without first calling Next() and checking that it returned true. - Scan(dest ...interface{}) error - - // Values returns the decoded row values. As with Scan(), it is an error to - // call Values without first calling Next() and checking that it returned - // true. - Values() ([]interface{}, error) - - // RawValues returns the unparsed bytes of the row values. The returned [][]byte is only valid until the next Next - // call or the Rows is closed. However, the underlying byte data is safe to retain a reference to and mutate. - RawValues() [][]byte -} - -// Row is a convenience wrapper over Rows that is returned by QueryRow. -// -// Row is an interface instead of a struct to allow tests to mock QueryRow. However, -// adding a method to an interface is technically a breaking change. Because of this -// the Row interface is partially excluded from semantic version requirements. -// Methods will not be removed or changed, but new methods may be added. -type Row interface { - // Scan works the same as Rows. with the following exceptions. If no - // rows were found it returns ErrNoRows. If multiple rows are returned it - // ignores all but the first. - Scan(dest ...interface{}) error -} - -// connRow implements the Row interface for Conn.QueryRow. -type connRow connRows - -func (r *connRow) Scan(dest ...interface{}) (err error) { - rows := (*connRows)(r) - - if rows.Err() != nil { - return rows.Err() - } - - if !rows.Next() { - if rows.Err() == nil { - return ErrNoRows - } - return rows.Err() - } - - rows.Scan(dest...) - rows.Close() - return rows.Err() -} - -type rowLog interface { - shouldLog(lvl LogLevel) bool - log(ctx context.Context, lvl LogLevel, msg string, data map[string]interface{}) -} - -// connRows implements the Rows interface for Conn.Query. -type connRows struct { - ctx context.Context - logger rowLog - connInfo *pgtype.ConnInfo - values [][]byte - rowCount int - err error - commandTag pgconn.CommandTag - startTime time.Time - sql string - args []interface{} - closed bool - conn *Conn - - resultReader *pgconn.ResultReader - multiResultReader *pgconn.MultiResultReader - - scanPlans []pgtype.ScanPlan -} - -func (rows *connRows) FieldDescriptions() []pgproto3.FieldDescription { - return rows.resultReader.FieldDescriptions() -} - -func (rows *connRows) Close() { - if rows.closed { - return - } - - rows.closed = true - - if rows.resultReader != nil { - var closeErr error - rows.commandTag, closeErr = rows.resultReader.Close() - if rows.err == nil { - rows.err = closeErr - } - } - - if rows.multiResultReader != nil { - closeErr := rows.multiResultReader.Close() - if rows.err == nil { - rows.err = closeErr - } - } - - if rows.logger != nil { - endTime := time.Now() - - if rows.err == nil { - if rows.logger.shouldLog(LogLevelInfo) { - rows.logger.log(rows.ctx, LogLevelInfo, "Query", map[string]interface{}{"sql": rows.sql, "args": logQueryArgs(rows.args), "time": endTime.Sub(rows.startTime), "rowCount": rows.rowCount}) - } - } else { - if rows.logger.shouldLog(LogLevelError) { - rows.logger.log(rows.ctx, LogLevelError, "Query", map[string]interface{}{"err": rows.err, "sql": rows.sql, "time": endTime.Sub(rows.startTime), "args": logQueryArgs(rows.args)}) - } - if rows.err != nil && rows.conn.stmtcache != nil { - rows.conn.stmtcache.StatementErrored(rows.sql, rows.err) - } - } - } -} - -func (rows *connRows) CommandTag() pgconn.CommandTag { - return rows.commandTag -} - -func (rows *connRows) Err() error { - return rows.err -} - -// fatal signals an error occurred after the query was sent to the server. It -// closes the rows automatically. -func (rows *connRows) fatal(err error) { - if rows.err != nil { - return - } - - rows.err = err - rows.Close() -} - -func (rows *connRows) Next() bool { - if rows.closed { - return false - } - - if rows.resultReader.NextRow() { - rows.rowCount++ - rows.values = rows.resultReader.Values() - return true - } else { - rows.Close() - return false - } -} - -func (rows *connRows) Scan(dest ...interface{}) error { - ci := rows.connInfo - fieldDescriptions := rows.FieldDescriptions() - values := rows.values - - if len(fieldDescriptions) != len(values) { - err := fmt.Errorf("number of field descriptions must equal number of values, got %d and %d", len(fieldDescriptions), len(values)) - rows.fatal(err) - return err - } - if len(fieldDescriptions) != len(dest) { - err := fmt.Errorf("number of field descriptions must equal number of destinations, got %d and %d", len(fieldDescriptions), len(dest)) - rows.fatal(err) - return err - } - - if rows.scanPlans == nil { - rows.scanPlans = make([]pgtype.ScanPlan, len(values)) - for i := range dest { - rows.scanPlans[i] = ci.PlanScan(fieldDescriptions[i].DataTypeOID, fieldDescriptions[i].Format, dest[i]) - } - } - - for i, dst := range dest { - if dst == nil { - continue - } - - err := rows.scanPlans[i].Scan(ci, fieldDescriptions[i].DataTypeOID, fieldDescriptions[i].Format, values[i], dst) - if err != nil { - err = ScanArgError{ColumnIndex: i, Err: err} - rows.fatal(err) - return err - } - } - - return nil -} - -func (rows *connRows) Values() ([]interface{}, error) { - if rows.closed { - return nil, errors.New("rows is closed") - } - - values := make([]interface{}, 0, len(rows.FieldDescriptions())) - - for i := range rows.FieldDescriptions() { - buf := rows.values[i] - fd := &rows.FieldDescriptions()[i] - - if buf == nil { - values = append(values, nil) - continue - } - - if dt, ok := rows.connInfo.DataTypeForOID(fd.DataTypeOID); ok { - value := dt.Value - - switch fd.Format { - case TextFormatCode: - decoder, ok := value.(pgtype.TextDecoder) - if !ok { - decoder = &pgtype.GenericText{} - } - err := decoder.DecodeText(rows.connInfo, buf) - if err != nil { - rows.fatal(err) - } - values = append(values, decoder.(pgtype.Value).Get()) - case BinaryFormatCode: - decoder, ok := value.(pgtype.BinaryDecoder) - if !ok { - decoder = &pgtype.GenericBinary{} - } - err := decoder.DecodeBinary(rows.connInfo, buf) - if err != nil { - rows.fatal(err) - } - values = append(values, value.Get()) - default: - rows.fatal(errors.New("Unknown format code")) - } - } else { - switch fd.Format { - case TextFormatCode: - decoder := &pgtype.GenericText{} - err := decoder.DecodeText(rows.connInfo, buf) - if err != nil { - rows.fatal(err) - } - values = append(values, decoder.Get()) - case BinaryFormatCode: - decoder := &pgtype.GenericBinary{} - err := decoder.DecodeBinary(rows.connInfo, buf) - if err != nil { - rows.fatal(err) - } - values = append(values, decoder.Get()) - default: - rows.fatal(errors.New("Unknown format code")) - } - } - - if rows.Err() != nil { - return nil, rows.Err() - } - } - - return values, rows.Err() -} - -func (rows *connRows) RawValues() [][]byte { - return rows.values -} - -type ScanArgError struct { - ColumnIndex int - Err error -} - -func (e ScanArgError) Error() string { - return fmt.Sprintf("can't scan into dest[%d]: %v", e.ColumnIndex, e.Err) -} - -func (e ScanArgError) Unwrap() error { - return e.Err -} - -// ScanRow decodes raw row data into dest. It can be used to scan rows read from the lower level pgconn interface. -// -// connInfo - OID to Go type mapping. -// fieldDescriptions - OID and format of values -// values - the raw data as returned from the PostgreSQL server -// dest - the destination that values will be decoded into -func ScanRow(connInfo *pgtype.ConnInfo, fieldDescriptions []pgproto3.FieldDescription, values [][]byte, dest ...interface{}) error { - if len(fieldDescriptions) != len(values) { - return fmt.Errorf("number of field descriptions must equal number of values, got %d and %d", len(fieldDescriptions), len(values)) - } - if len(fieldDescriptions) != len(dest) { - return fmt.Errorf("number of field descriptions must equal number of destinations, got %d and %d", len(fieldDescriptions), len(dest)) - } - - for i, d := range dest { - if d == nil { - continue - } - - err := connInfo.Scan(fieldDescriptions[i].DataTypeOID, fieldDescriptions[i].Format, values[i], d) - if err != nil { - return ScanArgError{ColumnIndex: i, Err: err} - } - } - - return nil -} diff --git a/vendor/github.com/jackc/pgx/v4/values.go b/vendor/github.com/jackc/pgx/v4/values.go deleted file mode 100644 index 1a945475..00000000 --- a/vendor/github.com/jackc/pgx/v4/values.go +++ /dev/null @@ -1,280 +0,0 @@ -package pgx - -import ( - "database/sql/driver" - "fmt" - "math" - "reflect" - "time" - - "github.com/jackc/pgio" - "github.com/jackc/pgtype" -) - -// PostgreSQL format codes -const ( - TextFormatCode = 0 - BinaryFormatCode = 1 -) - -// SerializationError occurs on failure to encode or decode a value -type SerializationError string - -func (e SerializationError) Error() string { - return string(e) -} - -func convertSimpleArgument(ci *pgtype.ConnInfo, arg interface{}) (interface{}, error) { - if arg == nil { - return nil, nil - } - - refVal := reflect.ValueOf(arg) - if refVal.Kind() == reflect.Ptr && refVal.IsNil() { - return nil, nil - } - - switch arg := arg.(type) { - - // https://github.com/jackc/pgx/issues/409 Changed JSON and JSONB to surface - // []byte to database/sql instead of string. But that caused problems with the - // simple protocol because the driver.Valuer case got taken before the - // pgtype.TextEncoder case. And driver.Valuer needed to be first in the usual - // case because of https://github.com/jackc/pgx/issues/339. So instead we - // special case JSON and JSONB. - case *pgtype.JSON: - buf, err := arg.EncodeText(ci, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - return string(buf), nil - case *pgtype.JSONB: - buf, err := arg.EncodeText(ci, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - return string(buf), nil - - case driver.Valuer: - return callValuerValue(arg) - case pgtype.TextEncoder: - buf, err := arg.EncodeText(ci, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - return string(buf), nil - case float32: - return float64(arg), nil - case float64: - return arg, nil - case bool: - return arg, nil - case time.Duration: - return fmt.Sprintf("%d microsecond", int64(arg)/1000), nil - case time.Time: - return arg, nil - case string: - return arg, nil - case []byte: - return arg, nil - case int8: - return int64(arg), nil - case int16: - return int64(arg), nil - case int32: - return int64(arg), nil - case int64: - return arg, nil - case int: - return int64(arg), nil - case uint8: - return int64(arg), nil - case uint16: - return int64(arg), nil - case uint32: - return int64(arg), nil - case uint64: - if arg > math.MaxInt64 { - return nil, fmt.Errorf("arg too big for int64: %v", arg) - } - return int64(arg), nil - case uint: - if uint64(arg) > math.MaxInt64 { - return nil, fmt.Errorf("arg too big for int64: %v", arg) - } - return int64(arg), nil - } - - if dt, found := ci.DataTypeForValue(arg); found { - v := dt.Value - err := v.Set(arg) - if err != nil { - return nil, err - } - buf, err := v.(pgtype.TextEncoder).EncodeText(ci, nil) - if err != nil { - return nil, err - } - if buf == nil { - return nil, nil - } - return string(buf), nil - } - - if refVal.Kind() == reflect.Ptr { - arg = refVal.Elem().Interface() - return convertSimpleArgument(ci, arg) - } - - if strippedArg, ok := stripNamedType(&refVal); ok { - return convertSimpleArgument(ci, strippedArg) - } - return nil, SerializationError(fmt.Sprintf("Cannot encode %T in simple protocol - %T must implement driver.Valuer, pgtype.TextEncoder, or be a native type", arg, arg)) -} - -func encodePreparedStatementArgument(ci *pgtype.ConnInfo, buf []byte, oid uint32, arg interface{}) ([]byte, error) { - if arg == nil { - return pgio.AppendInt32(buf, -1), nil - } - - switch arg := arg.(type) { - case pgtype.BinaryEncoder: - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - argBuf, err := arg.EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if argBuf != nil { - buf = argBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - return buf, nil - case pgtype.TextEncoder: - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - argBuf, err := arg.EncodeText(ci, buf) - if err != nil { - return nil, err - } - if argBuf != nil { - buf = argBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - return buf, nil - case string: - buf = pgio.AppendInt32(buf, int32(len(arg))) - buf = append(buf, arg...) - return buf, nil - } - - refVal := reflect.ValueOf(arg) - - if refVal.Kind() == reflect.Ptr { - if refVal.IsNil() { - return pgio.AppendInt32(buf, -1), nil - } - arg = refVal.Elem().Interface() - return encodePreparedStatementArgument(ci, buf, oid, arg) - } - - if dt, ok := ci.DataTypeForOID(oid); ok { - value := dt.Value - err := value.Set(arg) - if err != nil { - { - if arg, ok := arg.(driver.Valuer); ok { - v, err := callValuerValue(arg) - if err != nil { - return nil, err - } - return encodePreparedStatementArgument(ci, buf, oid, v) - } - } - - return nil, err - } - - sp := len(buf) - buf = pgio.AppendInt32(buf, -1) - argBuf, err := value.(pgtype.BinaryEncoder).EncodeBinary(ci, buf) - if err != nil { - return nil, err - } - if argBuf != nil { - buf = argBuf - pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) - } - return buf, nil - } - - if strippedArg, ok := stripNamedType(&refVal); ok { - return encodePreparedStatementArgument(ci, buf, oid, strippedArg) - } - return nil, SerializationError(fmt.Sprintf("Cannot encode %T into oid %v - %T must implement Encoder or be converted to a string", arg, oid, arg)) -} - -// chooseParameterFormatCode determines the correct format code for an -// argument to a prepared statement. It defaults to TextFormatCode if no -// determination can be made. -func chooseParameterFormatCode(ci *pgtype.ConnInfo, oid uint32, arg interface{}) int16 { - switch arg := arg.(type) { - case pgtype.ParamFormatPreferrer: - return arg.PreferredParamFormat() - case pgtype.BinaryEncoder: - return BinaryFormatCode - case string, *string, pgtype.TextEncoder: - return TextFormatCode - } - - return ci.ParamFormatCodeForOID(oid) -} - -func stripNamedType(val *reflect.Value) (interface{}, bool) { - switch val.Kind() { - case reflect.Int: - convVal := int(val.Int()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Int8: - convVal := int8(val.Int()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Int16: - convVal := int16(val.Int()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Int32: - convVal := int32(val.Int()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Int64: - convVal := int64(val.Int()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Uint: - convVal := uint(val.Uint()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Uint8: - convVal := uint8(val.Uint()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Uint16: - convVal := uint16(val.Uint()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Uint32: - convVal := uint32(val.Uint()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.Uint64: - convVal := uint64(val.Uint()) - return convVal, reflect.TypeOf(convVal) != val.Type() - case reflect.String: - convVal := val.String() - return convVal, reflect.TypeOf(convVal) != val.Type() - } - - return nil, false -} diff --git a/vendor/github.com/jackc/pgx/v4/.gitignore b/vendor/github.com/jackc/pgx/v5/.gitignore similarity index 92% rename from vendor/github.com/jackc/pgx/v4/.gitignore rename to vendor/github.com/jackc/pgx/v5/.gitignore index 39175a96..a2ebbe9c 100644 --- a/vendor/github.com/jackc/pgx/v4/.gitignore +++ b/vendor/github.com/jackc/pgx/v5/.gitignore @@ -22,3 +22,6 @@ _testmain.go *.exe .envrc +/.testdb + +.DS_Store diff --git a/vendor/github.com/jackc/pgx/v5/CHANGELOG.md b/vendor/github.com/jackc/pgx/v5/CHANGELOG.md new file mode 100644 index 00000000..a0ff9ba3 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/CHANGELOG.md @@ -0,0 +1,422 @@ +# 5.7.1 (September 10, 2024) + +* Fix data race in tracelog.TraceLog +* Update puddle to v2.2.2. This removes the import of nanotime via linkname. +* Update golang.org/x/crypto and golang.org/x/text + +# 5.7.0 (September 7, 2024) + +* Add support for sslrootcert=system (Yann Soubeyrand) +* Add LoadTypes to load multiple types in a single SQL query (Nick Farrell) +* Add XMLCodec supports encoding + scanning XML column type like json (nickcruess-soda) +* Add MultiTrace (Stepan Rabotkin) +* Add TraceLogConfig with customizable TimeKey (stringintech) +* pgx.ErrNoRows wraps sql.ErrNoRows to aid in database/sql compatibility with native pgx functions (merlin) +* Support scanning binary formatted uint32 into string / TextScanner (jennifersp) +* Fix interval encoding to allow 0s and avoid extra spaces (Carlos Pérez-Aradros Herce) +* Update pgservicefile - fixes panic when parsing invalid file +* Better error message when reading past end of batch +* Don't print url when url.Parse returns an error (Kevin Biju) +* Fix snake case name normalization collision in RowToStructByName with db tag (nolandseigler) +* Fix: Scan and encode types with underlying types of arrays + +# 5.6.0 (May 25, 2024) + +* Add StrictNamedArgs (Tomas Zahradnicek) +* Add support for macaddr8 type (Carlos Pérez-Aradros Herce) +* Add SeverityUnlocalized field to PgError / Notice +* Performance optimization of RowToStructByPos/Name (Zach Olstein) +* Allow customizing context canceled behavior for pgconn +* Add ScanLocation to pgtype.Timestamp[tz]Codec +* Add custom data to pgconn.PgConn +* Fix ResultReader.Read() to handle nil values +* Do not encode interval microseconds when they are 0 (Carlos Pérez-Aradros Herce) +* pgconn.SafeToRetry checks for wrapped errors (tjasko) +* Failed connection attempts include all errors +* Optimize LargeObject.Read (Mitar) +* Add tracing for connection acquire and release from pool (ngavinsir) +* Fix encode driver.Valuer not called when nil +* Add support for custom JSON marshal and unmarshal (Mitar) +* Use Go default keepalive for TCP connections (Hans-Joachim Kliemeck) + +# 5.5.5 (March 9, 2024) + +Use spaces instead of parentheses for SQL sanitization. + +This still solves the problem of negative numbers creating a line comment, but this avoids breaking edge cases such as +`set foo to $1` where the substitution is taking place in a location where an arbitrary expression is not allowed. + +# 5.5.4 (March 4, 2024) + +Fix CVE-2024-27304 + +SQL injection can occur if an attacker can cause a single query or bind message to exceed 4 GB in size. An integer +overflow in the calculated message size can cause the one large message to be sent as multiple messages under the +attacker's control. + +Thanks to Paul Gerste for reporting this issue. + +* Fix behavior of CollectRows to return empty slice if Rows are empty (Felix) +* Fix simple protocol encoding of json.RawMessage +* Fix *Pipeline.getResults should close pipeline on error +* Fix panic in TryFindUnderlyingTypeScanPlan (David Kurman) +* Fix deallocation of invalidated cached statements in a transaction +* Handle invalid sslkey file +* Fix scan float4 into sql.Scanner +* Fix pgtype.Bits not making copy of data from read buffer. This would cause the data to be corrupted by future reads. + +# 5.5.3 (February 3, 2024) + +* Fix: prepared statement already exists +* Improve CopyFrom auto-conversion of text-ish values +* Add ltree type support (Florent Viel) +* Make some properties of Batch and QueuedQuery public (Pavlo Golub) +* Add AppendRows function (Edoardo Spadolini) +* Optimize convert UUID [16]byte to string (Kirill Malikov) +* Fix: LargeObject Read and Write of more than ~1GB at a time (Mitar) + +# 5.5.2 (January 13, 2024) + +* Allow NamedArgs to start with underscore +* pgproto3: Maximum message body length support (jeremy.spriet) +* Upgrade golang.org/x/crypto to v0.17.0 +* Add snake_case support to RowToStructByName (Tikhon Fedulov) +* Fix: update description cache after exec prepare (James Hartig) +* Fix: pipeline checks if it is closed (James Hartig and Ryan Fowler) +* Fix: normalize timeout / context errors during TLS startup (Samuel Stauffer) +* Add OnPgError for easier centralized error handling (James Hartig) + +# 5.5.1 (December 9, 2023) + +* Add CopyFromFunc helper function. (robford) +* Add PgConn.Deallocate method that uses PostgreSQL protocol Close message. +* pgx uses new PgConn.Deallocate method. This allows deallocating statements to work in a failed transaction. This fixes a case where the prepared statement map could become invalid. +* Fix: Prefer driver.Valuer over json.Marshaler for json fields. (Jacopo) +* Fix: simple protocol SQL sanitizer previously panicked if an invalid $0 placeholder was used. This now returns an error instead. (maksymnevajdev) +* Add pgtype.Numeric.ScanScientific (Eshton Robateau) + +# 5.5.0 (November 4, 2023) + +* Add CollectExactlyOneRow. (Julien GOTTELAND) +* Add OpenDBFromPool to create *database/sql.DB from *pgxpool.Pool. (Lev Zakharov) +* Prepare can automatically choose statement name based on sql. This makes it easier to explicitly manage prepared statements. +* Statement cache now uses deterministic, stable statement names. +* database/sql prepared statement names are deterministically generated. +* Fix: SendBatch wasn't respecting context cancellation. +* Fix: Timeout error from pipeline is now normalized. +* Fix: database/sql encoding json.RawMessage to []byte. +* CancelRequest: Wait for the cancel request to be acknowledged by the server. This should improve PgBouncer compatibility. (Anton Levakin) +* stdlib: Use Ping instead of CheckConn in ResetSession +* Add json.Marshaler and json.Unmarshaler for Float4, Float8 (Kirill Mironov) + +# 5.4.3 (August 5, 2023) + +* Fix: QCharArrayOID was defined with the wrong OID (Christoph Engelbert) +* Fix: connect_timeout for sslmode=allow|prefer (smaher-edb) +* Fix: pgxpool: background health check cannot overflow pool +* Fix: Check for nil in defer when sending batch (recover properly from panic) +* Fix: json scan of non-string pointer to pointer +* Fix: zeronull.Timestamptz should use pgtype.Timestamptz +* Fix: NewConnsCount was not correctly counting connections created by Acquire directly. (James Hartig) +* RowTo(AddrOf)StructByPos ignores fields with "-" db tag +* Optimization: improve text format numeric parsing (horpto) + +# 5.4.2 (July 11, 2023) + +* Fix: RowScanner errors are fatal to Rows +* Fix: Enable failover efforts when pg_hba.conf disallows non-ssl connections (Brandon Kauffman) +* Hstore text codec internal improvements (Evan Jones) +* Fix: Stop timers for background reader when not in use. Fixes memory leak when closing connections (Adrian-Stefan Mares) +* Fix: Stop background reader as soon as possible. +* Add PgConn.SyncConn(). This combined with the above fix makes it safe to directly use the underlying net.Conn. + +# 5.4.1 (June 18, 2023) + +* Fix: concurrency bug with pgtypeDefaultMap and simple protocol (Lev Zakharov) +* Add TxOptions.BeginQuery to allow overriding the default BEGIN query + +# 5.4.0 (June 14, 2023) + +* Replace platform specific syscalls for non-blocking IO with more traditional goroutines and deadlines. This returns to the v4 approach with some additional improvements and fixes. This restores the ability to use a pgx.Conn over an ssh.Conn as well as other non-TCP or Unix socket connections. In addition, it is a significantly simpler implementation that is less likely to have cross platform issues. +* Optimization: The default type registrations are now shared among all connections. This saves about 100KB of memory per connection. `pgtype.Type` and `pgtype.Codec` values are now required to be immutable after registration. This was already necessary in most cases but wasn't documented until now. (Lev Zakharov) +* Fix: Ensure pgxpool.Pool.QueryRow.Scan releases connection on panic +* CancelRequest: don't try to read the reply (Nicola Murino) +* Fix: correctly handle bool type aliases (Wichert Akkerman) +* Fix: pgconn.CancelRequest: Fix unix sockets: don't use RemoteAddr() +* Fix: pgx.Conn memory leak with prepared statement caching (Evan Jones) +* Add BeforeClose to pgxpool.Pool (Evan Cordell) +* Fix: various hstore fixes and optimizations (Evan Jones) +* Fix: RowToStructByPos with embedded unexported struct +* Support different bool string representations (Lev Zakharov) +* Fix: error when using BatchResults.Exec on a select that returns an error after some rows. +* Fix: pipelineBatchResults.Exec() not returning error from ResultReader +* Fix: pipeline batch results not closing pipeline when error occurs while reading directly from results instead of using + a callback. +* Fix: scanning a table type into a struct +* Fix: scan array of record to pointer to slice of struct +* Fix: handle null for json (Cemre Mengu) +* Batch Query callback is called even when there is an error +* Add RowTo(AddrOf)StructByNameLax (Audi P. Risa P) + +# 5.3.1 (February 27, 2023) + +* Fix: Support v4 and v5 stdlib in same program (Tomáš Procházka) +* Fix: sql.Scanner not being used in certain cases +* Add text format jsonpath support +* Fix: fake non-blocking read adaptive wait time + +# 5.3.0 (February 11, 2023) + +* Fix: json values work with sql.Scanner +* Fixed / improved error messages (Mark Chambers and Yevgeny Pats) +* Fix: support scan into single dimensional arrays +* Fix: MaxConnLifetimeJitter setting actually jitter (Ben Weintraub) +* Fix: driver.Value representation of bytea should be []byte not string +* Fix: better handling of unregistered OIDs +* CopyFrom can use query cache to avoid extra round trip to get OIDs (Alejandro Do Nascimento Mora) +* Fix: encode to json ignoring driver.Valuer +* Support sql.Scanner on renamed base type +* Fix: pgtype.Numeric text encoding of negative numbers (Mark Chambers) +* Fix: connect with multiple hostnames when one can't be resolved +* Upgrade puddle to remove dependency on uber/atomic and fix alignment issue on 32-bit platform +* Fix: scanning json column into **string +* Multiple reductions in memory allocations +* Fake non-blocking read adapts its max wait time +* Improve CopyFrom performance and reduce memory usage +* Fix: encode []any to array +* Fix: LoadType for composite with dropped attributes (Felix Röhrich) +* Support v4 and v5 stdlib in same program +* Fix: text format array decoding with string of "NULL" +* Prefer binary format for arrays + +# 5.2.0 (December 5, 2022) + +* `tracelog.TraceLog` implements the pgx.PrepareTracer interface. (Vitalii Solodilov) +* Optimize creating begin transaction SQL string (Petr Evdokimov and ksco) +* `Conn.LoadType` supports range and multirange types (Vitalii Solodilov) +* Fix scan `uint` and `uint64` `ScanNumeric`. This resolves a PostgreSQL `numeric` being incorrectly scanned into `uint` and `uint64`. + +# 5.1.1 (November 17, 2022) + +* Fix simple query sanitizer where query text contains a Unicode replacement character. +* Remove erroneous `name` argument from `DeallocateAll()`. Technically, this is a breaking change, but given that method was only added 5 days ago this change was accepted. (Bodo Kaiser) + +# 5.1.0 (November 12, 2022) + +* Update puddle to v2.1.2. This resolves a race condition and a deadlock in pgxpool. +* `QueryRewriter.RewriteQuery` now returns an error. Technically, this is a breaking change for any external implementers, but given the minimal likelihood that there are actually any external implementers this change was accepted. +* Expose `GetSSLPassword` support to pgx. +* Fix encode `ErrorResponse` unknown field handling. This would only affect pgproto3 being used directly as a proxy with a non-PostgreSQL server that included additional error fields. +* Fix date text format encoding with 5 digit years. +* Fix date values passed to a `sql.Scanner` as `string` instead of `time.Time`. +* DateCodec.DecodeValue can return `pgtype.InfinityModifier` instead of `string` for infinite values. This now matches the behavior of the timestamp types. +* Add domain type support to `Conn.LoadType()`. +* Add `RowToStructByName` and `RowToAddrOfStructByName`. (Pavlo Golub) +* Add `Conn.DeallocateAll()` to clear all prepared statements including the statement cache. (Bodo Kaiser) + +# 5.0.4 (October 24, 2022) + +* Fix: CollectOneRow prefers PostgreSQL error over pgx.ErrorNoRows +* Fix: some reflect Kind checks to first check for nil +* Bump golang.org/x/text dependency to placate snyk +* Fix: RowToStructByPos on structs with multiple anonymous sub-structs (Baptiste Fontaine) +* Fix: Exec checks if tx is closed + +# 5.0.3 (October 14, 2022) + +* Fix `driver.Valuer` handling edge cases that could cause infinite loop or crash + +# v5.0.2 (October 8, 2022) + +* Fix date encoding in text format to always use 2 digits for month and day +* Prefer driver.Valuer over wrap plans when encoding +* Fix scan to pointer to pointer to renamed type +* Allow scanning NULL even if PG and Go types are incompatible + +# v5.0.1 (September 24, 2022) + +* Fix 32-bit atomic usage +* Add MarshalJSON for Float8 (yogipristiawan) +* Add `[` and `]` to text encoding of `Lseg` +* Fix sqlScannerWrapper NULL handling + +# v5.0.0 (September 17, 2022) + +## Merged Packages + +`github.com/jackc/pgtype`, `github.com/jackc/pgconn`, and `github.com/jackc/pgproto3` are now included in the main +`github.com/jackc/pgx` repository. Previously there was confusion as to where issues should be reported, additional +release work due to releasing multiple packages, and less clear changelogs. + +## pgconn + +`CommandTag` is now an opaque type instead of directly exposing an underlying `[]byte`. + +The return value `ResultReader.Values()` is no longer safe to retain a reference to after a subsequent call to `NextRow()` or `Close()`. + +`Trace()` method adds low level message tracing similar to the `PQtrace` function in `libpq`. + +pgconn now uses non-blocking IO. This is a significant internal restructuring, but it should not cause any visible changes on its own. However, it is important in implementing other new features. + +`CheckConn()` checks a connection's liveness by doing a non-blocking read. This can be used to detect database restarts or network interruptions without executing a query or a ping. + +pgconn now supports pipeline mode. + +`*PgConn.ReceiveResults` removed. Use pipeline mode instead. + +`Timeout()` no longer considers `context.Canceled` as a timeout error. `context.DeadlineExceeded` still is considered a timeout error. + +## pgxpool + +`Connect` and `ConnectConfig` have been renamed to `New` and `NewWithConfig` respectively. The `LazyConnect` option has been removed. Pools always lazily connect. + +## pgtype + +The `pgtype` package has been significantly changed. + +### NULL Representation + +Previously, types had a `Status` field that could be `Undefined`, `Null`, or `Present`. This has been changed to a +`Valid` `bool` field to harmonize with how `database/sql` represents `NULL` and to make the zero value useable. + +Previously, a type that implemented `driver.Valuer` would have the `Value` method called even on a nil pointer. All nils +whether typed or untyped now represent `NULL`. + +### Codec and Value Split + +Previously, the type system combined decoding and encoding values with the value types. e.g. Type `Int8` both handled +encoding and decoding the PostgreSQL representation and acted as a value object. This caused some difficulties when +there was not an exact 1 to 1 relationship between the Go types and the PostgreSQL types For example, scanning a +PostgreSQL binary `numeric` into a Go `float64` was awkward (see https://github.com/jackc/pgtype/issues/147). This +concepts have been separated. A `Codec` only has responsibility for encoding and decoding values. Value types are +generally defined by implementing an interface that a particular `Codec` understands (e.g. `PointScanner` and +`PointValuer` for the PostgreSQL `point` type). + +### Array Types + +All array types are now handled by `ArrayCodec` instead of using code generation for each new array type. This also +means that less common array types such as `point[]` are now supported. `Array[T]` supports PostgreSQL multi-dimensional +arrays. + +### Composite Types + +Composite types must be registered before use. `CompositeFields` may still be used to construct and destruct composite +values, but any type may now implement `CompositeIndexGetter` and `CompositeIndexScanner` to be used as a composite. + +### Range Types + +Range types are now handled with types `RangeCodec` and `Range[T]`. This allows additional user defined range types to +easily be handled. Multirange types are handled similarly with `MultirangeCodec` and `Multirange[T]`. + +### pgxtype + +`LoadDataType` moved to `*Conn` as `LoadType`. + +### Bytea + +The `Bytea` and `GenericBinary` types have been replaced. Use the following instead: + +* `[]byte` - For normal usage directly use `[]byte`. +* `DriverBytes` - Uses driver memory only available until next database method call. Avoids a copy and an allocation. +* `PreallocBytes` - Uses preallocated byte slice to avoid an allocation. +* `UndecodedBytes` - Avoids any decoding. Allows working with raw bytes. + +### Dropped lib/pq Support + +`pgtype` previously supported and was tested against [lib/pq](https://github.com/lib/pq). While it will continue to work +in most cases this is no longer supported. + +### database/sql Scan + +Previously, most `Scan` implementations would convert `[]byte` to `string` automatically to decode a text value. Now +only `string` is handled. This is to allow the possibility of future binary support in `database/sql` mode by +considering `[]byte` to be binary format and `string` text format. This change should have no effect for any use with +`pgx`. The previous behavior was only necessary for `lib/pq` compatibility. + +Added `*Map.SQLScanner` to create a `sql.Scanner` for types such as `[]int32` and `Range[T]` that do not implement +`sql.Scanner` directly. + +### Number Type Fields Include Bit size + +`Int2`, `Int4`, `Int8`, `Float4`, `Float8`, and `Uint32` fields now include bit size. e.g. `Int` is renamed to `Int64`. +This matches the convention set by `database/sql`. In addition, for comparable types like `pgtype.Int8` and +`sql.NullInt64` the structures are identical. This means they can be directly converted one to another. + +### 3rd Party Type Integrations + +* Extracted integrations with https://github.com/shopspring/decimal and https://github.com/gofrs/uuid to + https://github.com/jackc/pgx-shopspring-decimal and https://github.com/jackc/pgx-gofrs-uuid respectively. This trims + the pgx dependency tree. + +### Other Changes + +* `Bit` and `Varbit` are both replaced by the `Bits` type. +* `CID`, `OID`, `OIDValue`, and `XID` are replaced by the `Uint32` type. +* `Hstore` is now defined as `map[string]*string`. +* `JSON` and `JSONB` types removed. Use `[]byte` or `string` directly. +* `QChar` type removed. Use `rune` or `byte` directly. +* `Inet` and `Cidr` types removed. Use `netip.Addr` and `netip.Prefix` directly. These types are more memory efficient than the previous `net.IPNet`. +* `Macaddr` type removed. Use `net.HardwareAddr` directly. +* Renamed `pgtype.ConnInfo` to `pgtype.Map`. +* Renamed `pgtype.DataType` to `pgtype.Type`. +* Renamed `pgtype.None` to `pgtype.Finite`. +* `RegisterType` now accepts a `*Type` instead of `Type`. +* Assorted array helper methods and types made private. + +## stdlib + +* Removed `AcquireConn` and `ReleaseConn` as that functionality has been built in since Go 1.13. + +## Reduced Memory Usage by Reusing Read Buffers + +Previously, the connection read buffer would allocate large chunks of memory and never reuse them. This allowed +transferring ownership to anything such as scanned values without incurring an additional allocation and memory copy. +However, this came at the cost of overall increased memory allocation size. But worse it was also possible to pin large +chunks of memory by retaining a reference to a small value that originally came directly from the read buffer. Now +ownership remains with the read buffer and anything needing to retain a value must make a copy. + +## Query Execution Modes + +Control over automatic prepared statement caching and simple protocol use are now combined into query execution mode. +See documentation for `QueryExecMode`. + +## QueryRewriter Interface and NamedArgs + +pgx now supports named arguments with the `NamedArgs` type. This is implemented via the new `QueryRewriter` interface which +allows arbitrary rewriting of query SQL and arguments. + +## RowScanner Interface + +The `RowScanner` interface allows a single argument to Rows.Scan to scan the entire row. + +## Rows Result Helpers + +* `CollectRows` and `RowTo*` functions simplify collecting results into a slice. +* `CollectOneRow` collects one row using `RowTo*` functions. +* `ForEachRow` simplifies scanning each row and executing code using the scanned values. `ForEachRow` replaces `QueryFunc`. + +## Tx Helpers + +Rather than every type that implemented `Begin` or `BeginTx` methods also needing to implement `BeginFunc` and +`BeginTxFunc` these methods have been converted to functions that take a db that implements `Begin` or `BeginTx`. + +## Improved Batch Query Ergonomics + +Previously, the code for building a batch went in one place before the call to `SendBatch`, and the code for reading the +results went in one place after the call to `SendBatch`. This could make it difficult to match up the query and the code +to handle the results. Now `Queue` returns a `QueuedQuery` which has methods `Query`, `QueryRow`, and `Exec` which can +be used to register a callback function that will handle the result. Callback functions are called automatically when +`BatchResults.Close` is called. + +## SendBatch Uses Pipeline Mode When Appropriate + +Previously, a batch with 10 unique parameterized statements executed 100 times would entail 11 network round trips. 1 +for each prepare / describe and 1 for executing them all. Now pipeline mode is used to prepare / describe all statements +in a single network round trip. So it would only take 2 round trips. + +## Tracing and Logging + +Internal logging support has been replaced with tracing hooks. This allows custom tracing integration with tools like OpenTelemetry. Package tracelog provides an adapter for pgx v4 loggers to act as a tracer. + +All integrations with 3rd party loggers have been extracted to separate repositories. This trims the pgx dependency +tree. diff --git a/vendor/github.com/jackc/pgx/v5/CONTRIBUTING.md b/vendor/github.com/jackc/pgx/v5/CONTRIBUTING.md new file mode 100644 index 00000000..c975a937 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/CONTRIBUTING.md @@ -0,0 +1,121 @@ +# Contributing + +## Discuss Significant Changes + +Before you invest a significant amount of time on a change, please create a discussion or issue describing your +proposal. This will help to ensure your proposed change has a reasonable chance of being merged. + +## Avoid Dependencies + +Adding a dependency is a big deal. While on occasion a new dependency may be accepted, the default answer to any change +that adds a dependency is no. + +## Development Environment Setup + +pgx tests naturally require a PostgreSQL database. It will connect to the database specified in the `PGX_TEST_DATABASE` +environment variable. The `PGX_TEST_DATABASE` environment variable can either be a URL or key-value pairs. In addition, +the standard `PG*` environment variables will be respected. Consider using [direnv](https://github.com/direnv/direnv) to +simplify environment variable handling. + +### Using an Existing PostgreSQL Cluster + +If you already have a PostgreSQL development server this is the quickest way to start and run the majority of the pgx +test suite. Some tests will be skipped that require server configuration changes (e.g. those testing different +authentication methods). + +Create and setup a test database: + +``` +export PGDATABASE=pgx_test +createdb +psql -c 'create extension hstore;' +psql -c 'create extension ltree;' +psql -c 'create domain uint64 as numeric(20,0);' +``` + +Ensure a `postgres` user exists. This happens by default in normal PostgreSQL installs, but some installation methods +such as Homebrew do not. + +``` +createuser -s postgres +``` + +Ensure your `PGX_TEST_DATABASE` environment variable points to the database you just created and run the tests. + +``` +export PGX_TEST_DATABASE="host=/private/tmp database=pgx_test" +go test ./... +``` + +This will run the vast majority of the tests, but some tests will be skipped (e.g. those testing different connection methods). + +### Creating a New PostgreSQL Cluster Exclusively for Testing + +The following environment variables need to be set both for initial setup and whenever the tests are run. (direnv is +highly recommended). Depending on your platform, you may need to change the host for `PGX_TEST_UNIX_SOCKET_CONN_STRING`. + +``` +export PGPORT=5015 +export PGUSER=postgres +export PGDATABASE=pgx_test +export POSTGRESQL_DATA_DIR=postgresql + +export PGX_TEST_DATABASE="host=127.0.0.1 database=pgx_test user=pgx_md5 password=secret" +export PGX_TEST_UNIX_SOCKET_CONN_STRING="host=/private/tmp database=pgx_test" +export PGX_TEST_TCP_CONN_STRING="host=127.0.0.1 database=pgx_test user=pgx_md5 password=secret" +export PGX_TEST_SCRAM_PASSWORD_CONN_STRING="host=127.0.0.1 user=pgx_scram password=secret database=pgx_test" +export PGX_TEST_MD5_PASSWORD_CONN_STRING="host=127.0.0.1 database=pgx_test user=pgx_md5 password=secret" +export PGX_TEST_PLAIN_PASSWORD_CONN_STRING="host=127.0.0.1 user=pgx_pw password=secret" +export PGX_TEST_TLS_CONN_STRING="host=localhost user=pgx_ssl password=secret sslmode=verify-full sslrootcert=`pwd`/.testdb/ca.pem" +export PGX_SSL_PASSWORD=certpw +export PGX_TEST_TLS_CLIENT_CONN_STRING="host=localhost user=pgx_sslcert sslmode=verify-full sslrootcert=`pwd`/.testdb/ca.pem database=pgx_test sslcert=`pwd`/.testdb/pgx_sslcert.crt sslkey=`pwd`/.testdb/pgx_sslcert.key" +``` + +Create a new database cluster. + +``` +initdb --locale=en_US -E UTF-8 --username=postgres .testdb/$POSTGRESQL_DATA_DIR + +echo "listen_addresses = '127.0.0.1'" >> .testdb/$POSTGRESQL_DATA_DIR/postgresql.conf +echo "port = $PGPORT" >> .testdb/$POSTGRESQL_DATA_DIR/postgresql.conf +cat testsetup/postgresql_ssl.conf >> .testdb/$POSTGRESQL_DATA_DIR/postgresql.conf +cp testsetup/pg_hba.conf .testdb/$POSTGRESQL_DATA_DIR/pg_hba.conf + +cd .testdb + +# Generate CA, server, and encrypted client certificates. +go run ../testsetup/generate_certs.go + +# Copy certificates to server directory and set permissions. +cp ca.pem $POSTGRESQL_DATA_DIR/root.crt +cp localhost.key $POSTGRESQL_DATA_DIR/server.key +chmod 600 $POSTGRESQL_DATA_DIR/server.key +cp localhost.crt $POSTGRESQL_DATA_DIR/server.crt + +cd .. +``` + + +Start the new cluster. This will be necessary whenever you are running pgx tests. + +``` +postgres -D .testdb/$POSTGRESQL_DATA_DIR +``` + +Setup the test database in the new cluster. + +``` +createdb +psql --no-psqlrc -f testsetup/postgresql_setup.sql +``` + +### PgBouncer + +There are tests specific for PgBouncer that will be executed if `PGX_TEST_PGBOUNCER_CONN_STRING` is set. + +### Optional Tests + +pgx supports multiple connection types and means of authentication. These tests are optional. They will only run if the +appropriate environment variables are set. In addition, there may be tests specific to particular PostgreSQL versions, +non-PostgreSQL servers (e.g. CockroachDB), or connection poolers (e.g. PgBouncer). `go test ./... -v | grep SKIP` to see +if any tests are being skipped. diff --git a/vendor/github.com/jackc/pgtype/LICENSE b/vendor/github.com/jackc/pgx/v5/LICENSE similarity index 100% rename from vendor/github.com/jackc/pgtype/LICENSE rename to vendor/github.com/jackc/pgx/v5/LICENSE diff --git a/vendor/github.com/jackc/pgx/v5/README.md b/vendor/github.com/jackc/pgx/v5/README.md new file mode 100644 index 00000000..0cf2c291 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/README.md @@ -0,0 +1,174 @@ +[![Go Reference](https://pkg.go.dev/badge/github.com/jackc/pgx/v5.svg)](https://pkg.go.dev/github.com/jackc/pgx/v5) +[![Build Status](https://github.com/jackc/pgx/actions/workflows/ci.yml/badge.svg)](https://github.com/jackc/pgx/actions/workflows/ci.yml) + +# pgx - PostgreSQL Driver and Toolkit + +pgx is a pure Go driver and toolkit for PostgreSQL. + +The pgx driver is a low-level, high performance interface that exposes PostgreSQL-specific features such as `LISTEN` / +`NOTIFY` and `COPY`. It also includes an adapter for the standard `database/sql` interface. + +The toolkit component is a related set of packages that implement PostgreSQL functionality such as parsing the wire protocol +and type mapping between PostgreSQL and Go. These underlying packages can be used to implement alternative drivers, +proxies, load balancers, logical replication clients, etc. + +## Example Usage + +```go +package main + +import ( + "context" + "fmt" + "os" + + "github.com/jackc/pgx/v5" +) + +func main() { + // urlExample := "postgres://username:password@localhost:5432/database_name" + conn, err := pgx.Connect(context.Background(), os.Getenv("DATABASE_URL")) + if err != nil { + fmt.Fprintf(os.Stderr, "Unable to connect to database: %v\n", err) + os.Exit(1) + } + defer conn.Close(context.Background()) + + var name string + var weight int64 + err = conn.QueryRow(context.Background(), "select name, weight from widgets where id=$1", 42).Scan(&name, &weight) + if err != nil { + fmt.Fprintf(os.Stderr, "QueryRow failed: %v\n", err) + os.Exit(1) + } + + fmt.Println(name, weight) +} +``` + +See the [getting started guide](https://github.com/jackc/pgx/wiki/Getting-started-with-pgx) for more information. + +## Features + +* Support for approximately 70 different PostgreSQL types +* Automatic statement preparation and caching +* Batch queries +* Single-round trip query mode +* Full TLS connection control +* Binary format support for custom types (allows for much quicker encoding/decoding) +* `COPY` protocol support for faster bulk data loads +* Tracing and logging support +* Connection pool with after-connect hook for arbitrary connection setup +* `LISTEN` / `NOTIFY` +* Conversion of PostgreSQL arrays to Go slice mappings for integers, floats, and strings +* `hstore` support +* `json` and `jsonb` support +* Maps `inet` and `cidr` PostgreSQL types to `netip.Addr` and `netip.Prefix` +* Large object support +* NULL mapping to pointer to pointer +* Supports `database/sql.Scanner` and `database/sql/driver.Valuer` interfaces for custom types +* Notice response handling +* Simulated nested transactions with savepoints + +## Choosing Between the pgx and database/sql Interfaces + +The pgx interface is faster. Many PostgreSQL specific features such as `LISTEN` / `NOTIFY` and `COPY` are not available +through the `database/sql` interface. + +The pgx interface is recommended when: + +1. The application only targets PostgreSQL. +2. No other libraries that require `database/sql` are in use. + +It is also possible to use the `database/sql` interface and convert a connection to the lower-level pgx interface as needed. + +## Testing + +See CONTRIBUTING.md for setup instructions. + +## Architecture + +See the presentation at Golang Estonia, [PGX Top to Bottom](https://www.youtube.com/watch?v=sXMSWhcHCf8) for a description of pgx architecture. + +## Supported Go and PostgreSQL Versions + +pgx supports the same versions of Go and PostgreSQL that are supported by their respective teams. For [Go](https://golang.org/doc/devel/release.html#policy) that is the two most recent major releases and for [PostgreSQL](https://www.postgresql.org/support/versioning/) the major releases in the last 5 years. This means pgx supports Go 1.21 and higher and PostgreSQL 12 and higher. pgx also is tested against the latest version of [CockroachDB](https://www.cockroachlabs.com/product/). + +## Version Policy + +pgx follows semantic versioning for the documented public API on stable releases. `v5` is the latest stable major version. + +## PGX Family Libraries + +### [github.com/jackc/pglogrepl](https://github.com/jackc/pglogrepl) + +pglogrepl provides functionality to act as a client for PostgreSQL logical replication. + +### [github.com/jackc/pgmock](https://github.com/jackc/pgmock) + +pgmock offers the ability to create a server that mocks the PostgreSQL wire protocol. This is used internally to test pgx by purposely inducing unusual errors. pgproto3 and pgmock together provide most of the foundational tooling required to implement a PostgreSQL proxy or MitM (such as for a custom connection pooler). + +### [github.com/jackc/tern](https://github.com/jackc/tern) + +tern is a stand-alone SQL migration system. + +### [github.com/jackc/pgerrcode](https://github.com/jackc/pgerrcode) + +pgerrcode contains constants for the PostgreSQL error codes. + +## Adapters for 3rd Party Types + +* [github.com/jackc/pgx-gofrs-uuid](https://github.com/jackc/pgx-gofrs-uuid) +* [github.com/jackc/pgx-shopspring-decimal](https://github.com/jackc/pgx-shopspring-decimal) +* [github.com/twpayne/pgx-geos](https://github.com/twpayne/pgx-geos) ([PostGIS](https://postgis.net/) and [GEOS](https://libgeos.org/) via [go-geos](https://github.com/twpayne/go-geos)) +* [github.com/vgarvardt/pgx-google-uuid](https://github.com/vgarvardt/pgx-google-uuid) + + +## Adapters for 3rd Party Tracers + +* [https://github.com/jackhopner/pgx-xray-tracer](https://github.com/jackhopner/pgx-xray-tracer) + +## Adapters for 3rd Party Loggers + +These adapters can be used with the tracelog package. + +* [github.com/jackc/pgx-go-kit-log](https://github.com/jackc/pgx-go-kit-log) +* [github.com/jackc/pgx-log15](https://github.com/jackc/pgx-log15) +* [github.com/jackc/pgx-logrus](https://github.com/jackc/pgx-logrus) +* [github.com/jackc/pgx-zap](https://github.com/jackc/pgx-zap) +* [github.com/jackc/pgx-zerolog](https://github.com/jackc/pgx-zerolog) +* [github.com/mcosta74/pgx-slog](https://github.com/mcosta74/pgx-slog) +* [github.com/kataras/pgx-golog](https://github.com/kataras/pgx-golog) + +## 3rd Party Libraries with PGX Support + +### [github.com/pashagolub/pgxmock](https://github.com/pashagolub/pgxmock) + +pgxmock is a mock library implementing pgx interfaces. +pgxmock has one and only purpose - to simulate pgx behavior in tests, without needing a real database connection. + +### [github.com/georgysavva/scany](https://github.com/georgysavva/scany) + +Library for scanning data from a database into Go structs and more. + +### [github.com/vingarcia/ksql](https://github.com/vingarcia/ksql) + +A carefully designed SQL client for making using SQL easier, +more productive, and less error-prone on Golang. + +### [https://github.com/otan/gopgkrb5](https://github.com/otan/gopgkrb5) + +Adds GSSAPI / Kerberos authentication support. + +### [github.com/wcamarao/pmx](https://github.com/wcamarao/pmx) + +Explicit data mapping and scanning library for Go structs and slices. + +### [github.com/stephenafamo/scan](https://github.com/stephenafamo/scan) + +Type safe and flexible package for scanning database data into Go types. +Supports, structs, maps, slices and custom mapping functions. + +### [https://github.com/z0ne-dev/mgx](https://github.com/z0ne-dev/mgx) + +Code first migration library for native pgx (no database/sql abstraction). diff --git a/vendor/github.com/jackc/pgx/v5/Rakefile b/vendor/github.com/jackc/pgx/v5/Rakefile new file mode 100644 index 00000000..d957573e --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/Rakefile @@ -0,0 +1,18 @@ +require "erb" + +rule '.go' => '.go.erb' do |task| + erb = ERB.new(File.read(task.source)) + File.write(task.name, "// Do not edit. Generated from #{task.source}\n" + erb.result(binding)) + sh "goimports", "-w", task.name +end + +generated_code_files = [ + "pgtype/int.go", + "pgtype/int_test.go", + "pgtype/integration_benchmark_test.go", + "pgtype/zeronull/int.go", + "pgtype/zeronull/int_test.go" +] + +desc "Generate code" +task generate: generated_code_files diff --git a/vendor/github.com/jackc/pgx/v5/batch.go b/vendor/github.com/jackc/pgx/v5/batch.go new file mode 100644 index 00000000..c3c2834f --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/batch.go @@ -0,0 +1,443 @@ +package pgx + +import ( + "context" + "errors" + "fmt" + + "github.com/jackc/pgx/v5/pgconn" +) + +// QueuedQuery is a query that has been queued for execution via a Batch. +type QueuedQuery struct { + SQL string + Arguments []any + Fn batchItemFunc + sd *pgconn.StatementDescription +} + +type batchItemFunc func(br BatchResults) error + +// Query sets fn to be called when the response to qq is received. +func (qq *QueuedQuery) Query(fn func(rows Rows) error) { + qq.Fn = func(br BatchResults) error { + rows, _ := br.Query() + defer rows.Close() + + err := fn(rows) + if err != nil { + return err + } + rows.Close() + + return rows.Err() + } +} + +// Query sets fn to be called when the response to qq is received. +func (qq *QueuedQuery) QueryRow(fn func(row Row) error) { + qq.Fn = func(br BatchResults) error { + row := br.QueryRow() + return fn(row) + } +} + +// Exec sets fn to be called when the response to qq is received. +func (qq *QueuedQuery) Exec(fn func(ct pgconn.CommandTag) error) { + qq.Fn = func(br BatchResults) error { + ct, err := br.Exec() + if err != nil { + return err + } + + return fn(ct) + } +} + +// Batch queries are a way of bundling multiple queries together to avoid +// unnecessary network round trips. A Batch must only be sent once. +type Batch struct { + QueuedQueries []*QueuedQuery +} + +// Queue queues a query to batch b. query can be an SQL query or the name of a prepared statement. The only pgx option +// argument that is supported is QueryRewriter. Queries are executed using the connection's DefaultQueryExecMode. +// +// While query can contain multiple statements if the connection's DefaultQueryExecMode is QueryModeSimple, this should +// be avoided. QueuedQuery.Fn must not be set as it will only be called for the first query. That is, QueuedQuery.Query, +// QueuedQuery.QueryRow, and QueuedQuery.Exec must not be called. In addition, any error messages or tracing that +// include the current query may reference the wrong query. +func (b *Batch) Queue(query string, arguments ...any) *QueuedQuery { + qq := &QueuedQuery{ + SQL: query, + Arguments: arguments, + } + b.QueuedQueries = append(b.QueuedQueries, qq) + return qq +} + +// Len returns number of queries that have been queued so far. +func (b *Batch) Len() int { + return len(b.QueuedQueries) +} + +type BatchResults interface { + // Exec reads the results from the next query in the batch as if the query has been sent with Conn.Exec. Prefer + // calling Exec on the QueuedQuery. + Exec() (pgconn.CommandTag, error) + + // Query reads the results from the next query in the batch as if the query has been sent with Conn.Query. Prefer + // calling Query on the QueuedQuery. + Query() (Rows, error) + + // QueryRow reads the results from the next query in the batch as if the query has been sent with Conn.QueryRow. + // Prefer calling QueryRow on the QueuedQuery. + QueryRow() Row + + // Close closes the batch operation. All unread results are read and any callback functions registered with + // QueuedQuery.Query, QueuedQuery.QueryRow, or QueuedQuery.Exec will be called. If a callback function returns an + // error or the batch encounters an error subsequent callback functions will not be called. + // + // Close must be called before the underlying connection can be used again. Any error that occurred during a batch + // operation may have made it impossible to resyncronize the connection with the server. In this case the underlying + // connection will have been closed. + // + // Close is safe to call multiple times. If it returns an error subsequent calls will return the same error. Callback + // functions will not be rerun. + Close() error +} + +type batchResults struct { + ctx context.Context + conn *Conn + mrr *pgconn.MultiResultReader + err error + b *Batch + qqIdx int + closed bool + endTraced bool +} + +// Exec reads the results from the next query in the batch as if the query has been sent with Exec. +func (br *batchResults) Exec() (pgconn.CommandTag, error) { + if br.err != nil { + return pgconn.CommandTag{}, br.err + } + if br.closed { + return pgconn.CommandTag{}, fmt.Errorf("batch already closed") + } + + query, arguments, _ := br.nextQueryAndArgs() + + if !br.mrr.NextResult() { + err := br.mrr.Close() + if err == nil { + err = errors.New("no more results in batch") + } + if br.conn.batchTracer != nil { + br.conn.batchTracer.TraceBatchQuery(br.ctx, br.conn, TraceBatchQueryData{ + SQL: query, + Args: arguments, + Err: err, + }) + } + return pgconn.CommandTag{}, err + } + + commandTag, err := br.mrr.ResultReader().Close() + if err != nil { + br.err = err + br.mrr.Close() + } + + if br.conn.batchTracer != nil { + br.conn.batchTracer.TraceBatchQuery(br.ctx, br.conn, TraceBatchQueryData{ + SQL: query, + Args: arguments, + CommandTag: commandTag, + Err: br.err, + }) + } + + return commandTag, br.err +} + +// Query reads the results from the next query in the batch as if the query has been sent with Query. +func (br *batchResults) Query() (Rows, error) { + query, arguments, ok := br.nextQueryAndArgs() + if !ok { + query = "batch query" + } + + if br.err != nil { + return &baseRows{err: br.err, closed: true}, br.err + } + + if br.closed { + alreadyClosedErr := fmt.Errorf("batch already closed") + return &baseRows{err: alreadyClosedErr, closed: true}, alreadyClosedErr + } + + rows := br.conn.getRows(br.ctx, query, arguments) + rows.batchTracer = br.conn.batchTracer + + if !br.mrr.NextResult() { + rows.err = br.mrr.Close() + if rows.err == nil { + rows.err = errors.New("no more results in batch") + } + rows.closed = true + + if br.conn.batchTracer != nil { + br.conn.batchTracer.TraceBatchQuery(br.ctx, br.conn, TraceBatchQueryData{ + SQL: query, + Args: arguments, + Err: rows.err, + }) + } + + return rows, rows.err + } + + rows.resultReader = br.mrr.ResultReader() + return rows, nil +} + +// QueryRow reads the results from the next query in the batch as if the query has been sent with QueryRow. +func (br *batchResults) QueryRow() Row { + rows, _ := br.Query() + return (*connRow)(rows.(*baseRows)) + +} + +// Close closes the batch operation. Any error that occurred during a batch operation may have made it impossible to +// resyncronize the connection with the server. In this case the underlying connection will have been closed. +func (br *batchResults) Close() error { + defer func() { + if !br.endTraced { + if br.conn != nil && br.conn.batchTracer != nil { + br.conn.batchTracer.TraceBatchEnd(br.ctx, br.conn, TraceBatchEndData{Err: br.err}) + } + br.endTraced = true + } + }() + + if br.err != nil { + return br.err + } + + if br.closed { + return nil + } + + // Read and run fn for all remaining items + for br.err == nil && !br.closed && br.b != nil && br.qqIdx < len(br.b.QueuedQueries) { + if br.b.QueuedQueries[br.qqIdx].Fn != nil { + err := br.b.QueuedQueries[br.qqIdx].Fn(br) + if err != nil { + br.err = err + } + } else { + br.Exec() + } + } + + br.closed = true + + err := br.mrr.Close() + if br.err == nil { + br.err = err + } + + return br.err +} + +func (br *batchResults) earlyError() error { + return br.err +} + +func (br *batchResults) nextQueryAndArgs() (query string, args []any, ok bool) { + if br.b != nil && br.qqIdx < len(br.b.QueuedQueries) { + bi := br.b.QueuedQueries[br.qqIdx] + query = bi.SQL + args = bi.Arguments + ok = true + br.qqIdx++ + } + return +} + +type pipelineBatchResults struct { + ctx context.Context + conn *Conn + pipeline *pgconn.Pipeline + lastRows *baseRows + err error + b *Batch + qqIdx int + closed bool + endTraced bool +} + +// Exec reads the results from the next query in the batch as if the query has been sent with Exec. +func (br *pipelineBatchResults) Exec() (pgconn.CommandTag, error) { + if br.err != nil { + return pgconn.CommandTag{}, br.err + } + if br.closed { + return pgconn.CommandTag{}, fmt.Errorf("batch already closed") + } + if br.lastRows != nil && br.lastRows.err != nil { + return pgconn.CommandTag{}, br.err + } + + query, arguments, err := br.nextQueryAndArgs() + if err != nil { + return pgconn.CommandTag{}, err + } + + results, err := br.pipeline.GetResults() + if err != nil { + br.err = err + return pgconn.CommandTag{}, br.err + } + var commandTag pgconn.CommandTag + switch results := results.(type) { + case *pgconn.ResultReader: + commandTag, br.err = results.Close() + default: + return pgconn.CommandTag{}, fmt.Errorf("unexpected pipeline result: %T", results) + } + + if br.conn.batchTracer != nil { + br.conn.batchTracer.TraceBatchQuery(br.ctx, br.conn, TraceBatchQueryData{ + SQL: query, + Args: arguments, + CommandTag: commandTag, + Err: br.err, + }) + } + + return commandTag, br.err +} + +// Query reads the results from the next query in the batch as if the query has been sent with Query. +func (br *pipelineBatchResults) Query() (Rows, error) { + if br.err != nil { + return &baseRows{err: br.err, closed: true}, br.err + } + + if br.closed { + alreadyClosedErr := fmt.Errorf("batch already closed") + return &baseRows{err: alreadyClosedErr, closed: true}, alreadyClosedErr + } + + if br.lastRows != nil && br.lastRows.err != nil { + br.err = br.lastRows.err + return &baseRows{err: br.err, closed: true}, br.err + } + + query, arguments, err := br.nextQueryAndArgs() + if err != nil { + return &baseRows{err: err, closed: true}, err + } + + rows := br.conn.getRows(br.ctx, query, arguments) + rows.batchTracer = br.conn.batchTracer + br.lastRows = rows + + results, err := br.pipeline.GetResults() + if err != nil { + br.err = err + rows.err = err + rows.closed = true + + if br.conn.batchTracer != nil { + br.conn.batchTracer.TraceBatchQuery(br.ctx, br.conn, TraceBatchQueryData{ + SQL: query, + Args: arguments, + Err: err, + }) + } + } else { + switch results := results.(type) { + case *pgconn.ResultReader: + rows.resultReader = results + default: + err = fmt.Errorf("unexpected pipeline result: %T", results) + br.err = err + rows.err = err + rows.closed = true + } + } + + return rows, rows.err +} + +// QueryRow reads the results from the next query in the batch as if the query has been sent with QueryRow. +func (br *pipelineBatchResults) QueryRow() Row { + rows, _ := br.Query() + return (*connRow)(rows.(*baseRows)) + +} + +// Close closes the batch operation. Any error that occurred during a batch operation may have made it impossible to +// resyncronize the connection with the server. In this case the underlying connection will have been closed. +func (br *pipelineBatchResults) Close() error { + defer func() { + if !br.endTraced { + if br.conn.batchTracer != nil { + br.conn.batchTracer.TraceBatchEnd(br.ctx, br.conn, TraceBatchEndData{Err: br.err}) + } + br.endTraced = true + } + }() + + if br.err == nil && br.lastRows != nil && br.lastRows.err != nil { + br.err = br.lastRows.err + return br.err + } + + if br.closed { + return br.err + } + + // Read and run fn for all remaining items + for br.err == nil && !br.closed && br.b != nil && br.qqIdx < len(br.b.QueuedQueries) { + if br.b.QueuedQueries[br.qqIdx].Fn != nil { + err := br.b.QueuedQueries[br.qqIdx].Fn(br) + if err != nil { + br.err = err + } + } else { + br.Exec() + } + } + + br.closed = true + + err := br.pipeline.Close() + if br.err == nil { + br.err = err + } + + return br.err +} + +func (br *pipelineBatchResults) earlyError() error { + return br.err +} + +func (br *pipelineBatchResults) nextQueryAndArgs() (query string, args []any, err error) { + if br.b == nil { + return "", nil, errors.New("no reference to batch") + } + + if br.qqIdx >= len(br.b.QueuedQueries) { + return "", nil, errors.New("no more results in batch") + } + + bi := br.b.QueuedQueries[br.qqIdx] + br.qqIdx++ + return bi.SQL, bi.Arguments, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/conn.go b/vendor/github.com/jackc/pgx/v5/conn.go new file mode 100644 index 00000000..187b3dd5 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/conn.go @@ -0,0 +1,1413 @@ +package pgx + +import ( + "context" + "crypto/sha256" + "database/sql" + "encoding/hex" + "errors" + "fmt" + "strconv" + "strings" + "time" + + "github.com/jackc/pgx/v5/internal/sanitize" + "github.com/jackc/pgx/v5/internal/stmtcache" + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/pgx/v5/pgtype" +) + +// ConnConfig contains all the options used to establish a connection. It must be created by ParseConfig and +// then it can be modified. A manually initialized ConnConfig will cause ConnectConfig to panic. +type ConnConfig struct { + pgconn.Config + + Tracer QueryTracer + + // Original connection string that was parsed into config. + connString string + + // StatementCacheCapacity is maximum size of the statement cache used when executing a query with "cache_statement" + // query exec mode. + StatementCacheCapacity int + + // DescriptionCacheCapacity is the maximum size of the description cache used when executing a query with + // "cache_describe" query exec mode. + DescriptionCacheCapacity int + + // DefaultQueryExecMode controls the default mode for executing queries. By default pgx uses the extended protocol + // and automatically prepares and caches prepared statements. However, this may be incompatible with proxies such as + // PGBouncer. In this case it may be preferable to use QueryExecModeExec or QueryExecModeSimpleProtocol. The same + // functionality can be controlled on a per query basis by passing a QueryExecMode as the first query argument. + DefaultQueryExecMode QueryExecMode + + createdByParseConfig bool // Used to enforce created by ParseConfig rule. +} + +// ParseConfigOptions contains options that control how a config is built such as getsslpassword. +type ParseConfigOptions struct { + pgconn.ParseConfigOptions +} + +// Copy returns a deep copy of the config that is safe to use and modify. +// The only exception is the tls.Config: +// according to the tls.Config docs it must not be modified after creation. +func (cc *ConnConfig) Copy() *ConnConfig { + newConfig := new(ConnConfig) + *newConfig = *cc + newConfig.Config = *newConfig.Config.Copy() + return newConfig +} + +// ConnString returns the connection string as parsed by pgx.ParseConfig into pgx.ConnConfig. +func (cc *ConnConfig) ConnString() string { return cc.connString } + +// Conn is a PostgreSQL connection handle. It is not safe for concurrent usage. Use a connection pool to manage access +// to multiple database connections from multiple goroutines. +type Conn struct { + pgConn *pgconn.PgConn + config *ConnConfig // config used when establishing this connection + preparedStatements map[string]*pgconn.StatementDescription + statementCache stmtcache.Cache + descriptionCache stmtcache.Cache + + queryTracer QueryTracer + batchTracer BatchTracer + copyFromTracer CopyFromTracer + prepareTracer PrepareTracer + + notifications []*pgconn.Notification + + doneChan chan struct{} + closedChan chan error + + typeMap *pgtype.Map + + wbuf []byte + eqb ExtendedQueryBuilder +} + +// Identifier a PostgreSQL identifier or name. Identifiers can be composed of +// multiple parts such as ["schema", "table"] or ["table", "column"]. +type Identifier []string + +// Sanitize returns a sanitized string safe for SQL interpolation. +func (ident Identifier) Sanitize() string { + parts := make([]string, len(ident)) + for i := range ident { + s := strings.ReplaceAll(ident[i], string([]byte{0}), "") + parts[i] = `"` + strings.ReplaceAll(s, `"`, `""`) + `"` + } + return strings.Join(parts, ".") +} + +var ( + // ErrNoRows occurs when rows are expected but none are returned. + ErrNoRows = newProxyErr(sql.ErrNoRows, "no rows in result set") + // ErrTooManyRows occurs when more rows than expected are returned. + ErrTooManyRows = errors.New("too many rows in result set") +) + +func newProxyErr(background error, msg string) error { + return &proxyError{ + msg: msg, + background: background, + } +} + +type proxyError struct { + msg string + background error +} + +func (err *proxyError) Error() string { return err.msg } + +func (err *proxyError) Unwrap() error { return err.background } + +var ( + errDisabledStatementCache = fmt.Errorf("cannot use QueryExecModeCacheStatement with disabled statement cache") + errDisabledDescriptionCache = fmt.Errorf("cannot use QueryExecModeCacheDescribe with disabled description cache") +) + +// Connect establishes a connection with a PostgreSQL server with a connection string. See +// pgconn.Connect for details. +func Connect(ctx context.Context, connString string) (*Conn, error) { + connConfig, err := ParseConfig(connString) + if err != nil { + return nil, err + } + return connect(ctx, connConfig) +} + +// ConnectWithOptions behaves exactly like Connect with the addition of options. At the present options is only used to +// provide a GetSSLPassword function. +func ConnectWithOptions(ctx context.Context, connString string, options ParseConfigOptions) (*Conn, error) { + connConfig, err := ParseConfigWithOptions(connString, options) + if err != nil { + return nil, err + } + return connect(ctx, connConfig) +} + +// ConnectConfig establishes a connection with a PostgreSQL server with a configuration struct. +// connConfig must have been created by ParseConfig. +func ConnectConfig(ctx context.Context, connConfig *ConnConfig) (*Conn, error) { + // In general this improves safety. In particular avoid the config.Config.OnNotification mutation from affecting other + // connections with the same config. See https://github.com/jackc/pgx/issues/618. + connConfig = connConfig.Copy() + + return connect(ctx, connConfig) +} + +// ParseConfigWithOptions behaves exactly as ParseConfig does with the addition of options. At the present options is +// only used to provide a GetSSLPassword function. +func ParseConfigWithOptions(connString string, options ParseConfigOptions) (*ConnConfig, error) { + config, err := pgconn.ParseConfigWithOptions(connString, options.ParseConfigOptions) + if err != nil { + return nil, err + } + + statementCacheCapacity := 512 + if s, ok := config.RuntimeParams["statement_cache_capacity"]; ok { + delete(config.RuntimeParams, "statement_cache_capacity") + n, err := strconv.ParseInt(s, 10, 32) + if err != nil { + return nil, fmt.Errorf("cannot parse statement_cache_capacity: %w", err) + } + statementCacheCapacity = int(n) + } + + descriptionCacheCapacity := 512 + if s, ok := config.RuntimeParams["description_cache_capacity"]; ok { + delete(config.RuntimeParams, "description_cache_capacity") + n, err := strconv.ParseInt(s, 10, 32) + if err != nil { + return nil, fmt.Errorf("cannot parse description_cache_capacity: %w", err) + } + descriptionCacheCapacity = int(n) + } + + defaultQueryExecMode := QueryExecModeCacheStatement + if s, ok := config.RuntimeParams["default_query_exec_mode"]; ok { + delete(config.RuntimeParams, "default_query_exec_mode") + switch s { + case "cache_statement": + defaultQueryExecMode = QueryExecModeCacheStatement + case "cache_describe": + defaultQueryExecMode = QueryExecModeCacheDescribe + case "describe_exec": + defaultQueryExecMode = QueryExecModeDescribeExec + case "exec": + defaultQueryExecMode = QueryExecModeExec + case "simple_protocol": + defaultQueryExecMode = QueryExecModeSimpleProtocol + default: + return nil, fmt.Errorf("invalid default_query_exec_mode: %s", s) + } + } + + connConfig := &ConnConfig{ + Config: *config, + createdByParseConfig: true, + StatementCacheCapacity: statementCacheCapacity, + DescriptionCacheCapacity: descriptionCacheCapacity, + DefaultQueryExecMode: defaultQueryExecMode, + connString: connString, + } + + return connConfig, nil +} + +// ParseConfig creates a ConnConfig from a connection string. ParseConfig handles all options that [pgconn.ParseConfig] +// does. In addition, it accepts the following options: +// +// - default_query_exec_mode. +// Possible values: "cache_statement", "cache_describe", "describe_exec", "exec", and "simple_protocol". See +// QueryExecMode constant documentation for the meaning of these values. Default: "cache_statement". +// +// - statement_cache_capacity. +// The maximum size of the statement cache used when executing a query with "cache_statement" query exec mode. +// Default: 512. +// +// - description_cache_capacity. +// The maximum size of the description cache used when executing a query with "cache_describe" query exec mode. +// Default: 512. +func ParseConfig(connString string) (*ConnConfig, error) { + return ParseConfigWithOptions(connString, ParseConfigOptions{}) +} + +// connect connects to a database. connect takes ownership of config. The caller must not use or access it again. +func connect(ctx context.Context, config *ConnConfig) (c *Conn, err error) { + if connectTracer, ok := config.Tracer.(ConnectTracer); ok { + ctx = connectTracer.TraceConnectStart(ctx, TraceConnectStartData{ConnConfig: config}) + defer func() { + connectTracer.TraceConnectEnd(ctx, TraceConnectEndData{Conn: c, Err: err}) + }() + } + + // Default values are set in ParseConfig. Enforce initial creation by ParseConfig rather than setting defaults from + // zero values. + if !config.createdByParseConfig { + panic("config must be created by ParseConfig") + } + + c = &Conn{ + config: config, + typeMap: pgtype.NewMap(), + queryTracer: config.Tracer, + } + + if t, ok := c.queryTracer.(BatchTracer); ok { + c.batchTracer = t + } + if t, ok := c.queryTracer.(CopyFromTracer); ok { + c.copyFromTracer = t + } + if t, ok := c.queryTracer.(PrepareTracer); ok { + c.prepareTracer = t + } + + // Only install pgx notification system if no other callback handler is present. + if config.Config.OnNotification == nil { + config.Config.OnNotification = c.bufferNotifications + } + + c.pgConn, err = pgconn.ConnectConfig(ctx, &config.Config) + if err != nil { + return nil, err + } + + c.preparedStatements = make(map[string]*pgconn.StatementDescription) + c.doneChan = make(chan struct{}) + c.closedChan = make(chan error) + c.wbuf = make([]byte, 0, 1024) + + if c.config.StatementCacheCapacity > 0 { + c.statementCache = stmtcache.NewLRUCache(c.config.StatementCacheCapacity) + } + + if c.config.DescriptionCacheCapacity > 0 { + c.descriptionCache = stmtcache.NewLRUCache(c.config.DescriptionCacheCapacity) + } + + return c, nil +} + +// Close closes a connection. It is safe to call Close on an already closed +// connection. +func (c *Conn) Close(ctx context.Context) error { + if c.IsClosed() { + return nil + } + + err := c.pgConn.Close(ctx) + return err +} + +// Prepare creates a prepared statement with name and sql. sql can contain placeholders for bound parameters. These +// placeholders are referenced positionally as $1, $2, etc. name can be used instead of sql with Query, QueryRow, and +// Exec to execute the statement. It can also be used with Batch.Queue. +// +// The underlying PostgreSQL identifier for the prepared statement will be name if name != sql or a digest of sql if +// name == sql. +// +// Prepare is idempotent; i.e. it is safe to call Prepare multiple times with the same name and sql arguments. This +// allows a code path to Prepare and Query/Exec without concern for if the statement has already been prepared. +func (c *Conn) Prepare(ctx context.Context, name, sql string) (sd *pgconn.StatementDescription, err error) { + if c.prepareTracer != nil { + ctx = c.prepareTracer.TracePrepareStart(ctx, c, TracePrepareStartData{Name: name, SQL: sql}) + } + + if name != "" { + var ok bool + if sd, ok = c.preparedStatements[name]; ok && sd.SQL == sql { + if c.prepareTracer != nil { + c.prepareTracer.TracePrepareEnd(ctx, c, TracePrepareEndData{AlreadyPrepared: true}) + } + return sd, nil + } + } + + if c.prepareTracer != nil { + defer func() { + c.prepareTracer.TracePrepareEnd(ctx, c, TracePrepareEndData{Err: err}) + }() + } + + var psName, psKey string + if name == sql { + digest := sha256.Sum256([]byte(sql)) + psName = "stmt_" + hex.EncodeToString(digest[0:24]) + psKey = sql + } else { + psName = name + psKey = name + } + + sd, err = c.pgConn.Prepare(ctx, psName, sql, nil) + if err != nil { + return nil, err + } + + if psKey != "" { + c.preparedStatements[psKey] = sd + } + + return sd, nil +} + +// Deallocate releases a prepared statement. Calling Deallocate on a non-existent prepared statement will succeed. +func (c *Conn) Deallocate(ctx context.Context, name string) error { + var psName string + sd := c.preparedStatements[name] + if sd != nil { + psName = sd.Name + } else { + psName = name + } + + err := c.pgConn.Deallocate(ctx, psName) + if err != nil { + return err + } + + if sd != nil { + delete(c.preparedStatements, name) + } + + return nil +} + +// DeallocateAll releases all previously prepared statements from the server and client, where it also resets the statement and description cache. +func (c *Conn) DeallocateAll(ctx context.Context) error { + c.preparedStatements = map[string]*pgconn.StatementDescription{} + if c.config.StatementCacheCapacity > 0 { + c.statementCache = stmtcache.NewLRUCache(c.config.StatementCacheCapacity) + } + if c.config.DescriptionCacheCapacity > 0 { + c.descriptionCache = stmtcache.NewLRUCache(c.config.DescriptionCacheCapacity) + } + _, err := c.pgConn.Exec(ctx, "deallocate all").ReadAll() + return err +} + +func (c *Conn) bufferNotifications(_ *pgconn.PgConn, n *pgconn.Notification) { + c.notifications = append(c.notifications, n) +} + +// WaitForNotification waits for a PostgreSQL notification. It wraps the underlying pgconn notification system in a +// slightly more convenient form. +func (c *Conn) WaitForNotification(ctx context.Context) (*pgconn.Notification, error) { + var n *pgconn.Notification + + // Return already received notification immediately + if len(c.notifications) > 0 { + n = c.notifications[0] + c.notifications = c.notifications[1:] + return n, nil + } + + err := c.pgConn.WaitForNotification(ctx) + if len(c.notifications) > 0 { + n = c.notifications[0] + c.notifications = c.notifications[1:] + } + return n, err +} + +// IsClosed reports if the connection has been closed. +func (c *Conn) IsClosed() bool { + return c.pgConn.IsClosed() +} + +func (c *Conn) die(err error) { + if c.IsClosed() { + return + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // force immediate hard cancel + c.pgConn.Close(ctx) +} + +func quoteIdentifier(s string) string { + return `"` + strings.ReplaceAll(s, `"`, `""`) + `"` +} + +// Ping delegates to the underlying *pgconn.PgConn.Ping. +func (c *Conn) Ping(ctx context.Context) error { + return c.pgConn.Ping(ctx) +} + +// PgConn returns the underlying *pgconn.PgConn. This is an escape hatch method that allows lower level access to the +// PostgreSQL connection than pgx exposes. +// +// It is strongly recommended that the connection be idle (no in-progress queries) before the underlying *pgconn.PgConn +// is used and the connection must be returned to the same state before any *pgx.Conn methods are again used. +func (c *Conn) PgConn() *pgconn.PgConn { return c.pgConn } + +// TypeMap returns the connection info used for this connection. +func (c *Conn) TypeMap() *pgtype.Map { return c.typeMap } + +// Config returns a copy of config that was used to establish this connection. +func (c *Conn) Config() *ConnConfig { return c.config.Copy() } + +// Exec executes sql. sql can be either a prepared statement name or an SQL string. arguments should be referenced +// positionally from the sql string as $1, $2, etc. +func (c *Conn) Exec(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + if c.queryTracer != nil { + ctx = c.queryTracer.TraceQueryStart(ctx, c, TraceQueryStartData{SQL: sql, Args: arguments}) + } + + if err := c.deallocateInvalidatedCachedStatements(ctx); err != nil { + return pgconn.CommandTag{}, err + } + + commandTag, err := c.exec(ctx, sql, arguments...) + + if c.queryTracer != nil { + c.queryTracer.TraceQueryEnd(ctx, c, TraceQueryEndData{CommandTag: commandTag, Err: err}) + } + + return commandTag, err +} + +func (c *Conn) exec(ctx context.Context, sql string, arguments ...any) (commandTag pgconn.CommandTag, err error) { + mode := c.config.DefaultQueryExecMode + var queryRewriter QueryRewriter + +optionLoop: + for len(arguments) > 0 { + switch arg := arguments[0].(type) { + case QueryExecMode: + mode = arg + arguments = arguments[1:] + case QueryRewriter: + queryRewriter = arg + arguments = arguments[1:] + default: + break optionLoop + } + } + + if queryRewriter != nil { + sql, arguments, err = queryRewriter.RewriteQuery(ctx, c, sql, arguments) + if err != nil { + return pgconn.CommandTag{}, fmt.Errorf("rewrite query failed: %w", err) + } + } + + // Always use simple protocol when there are no arguments. + if len(arguments) == 0 { + mode = QueryExecModeSimpleProtocol + } + + if sd, ok := c.preparedStatements[sql]; ok { + return c.execPrepared(ctx, sd, arguments) + } + + switch mode { + case QueryExecModeCacheStatement: + if c.statementCache == nil { + return pgconn.CommandTag{}, errDisabledStatementCache + } + sd := c.statementCache.Get(sql) + if sd == nil { + sd, err = c.Prepare(ctx, stmtcache.StatementName(sql), sql) + if err != nil { + return pgconn.CommandTag{}, err + } + c.statementCache.Put(sd) + } + + return c.execPrepared(ctx, sd, arguments) + case QueryExecModeCacheDescribe: + if c.descriptionCache == nil { + return pgconn.CommandTag{}, errDisabledDescriptionCache + } + sd := c.descriptionCache.Get(sql) + if sd == nil { + sd, err = c.Prepare(ctx, "", sql) + if err != nil { + return pgconn.CommandTag{}, err + } + c.descriptionCache.Put(sd) + } + + return c.execParams(ctx, sd, arguments) + case QueryExecModeDescribeExec: + sd, err := c.Prepare(ctx, "", sql) + if err != nil { + return pgconn.CommandTag{}, err + } + return c.execPrepared(ctx, sd, arguments) + case QueryExecModeExec: + return c.execSQLParams(ctx, sql, arguments) + case QueryExecModeSimpleProtocol: + return c.execSimpleProtocol(ctx, sql, arguments) + default: + return pgconn.CommandTag{}, fmt.Errorf("unknown QueryExecMode: %v", mode) + } +} + +func (c *Conn) execSimpleProtocol(ctx context.Context, sql string, arguments []any) (commandTag pgconn.CommandTag, err error) { + if len(arguments) > 0 { + sql, err = c.sanitizeForSimpleQuery(sql, arguments...) + if err != nil { + return pgconn.CommandTag{}, err + } + } + + mrr := c.pgConn.Exec(ctx, sql) + for mrr.NextResult() { + commandTag, _ = mrr.ResultReader().Close() + } + err = mrr.Close() + return commandTag, err +} + +func (c *Conn) execParams(ctx context.Context, sd *pgconn.StatementDescription, arguments []any) (pgconn.CommandTag, error) { + err := c.eqb.Build(c.typeMap, sd, arguments) + if err != nil { + return pgconn.CommandTag{}, err + } + + result := c.pgConn.ExecParams(ctx, sd.SQL, c.eqb.ParamValues, sd.ParamOIDs, c.eqb.ParamFormats, c.eqb.ResultFormats).Read() + c.eqb.reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. + return result.CommandTag, result.Err +} + +func (c *Conn) execPrepared(ctx context.Context, sd *pgconn.StatementDescription, arguments []any) (pgconn.CommandTag, error) { + err := c.eqb.Build(c.typeMap, sd, arguments) + if err != nil { + return pgconn.CommandTag{}, err + } + + result := c.pgConn.ExecPrepared(ctx, sd.Name, c.eqb.ParamValues, c.eqb.ParamFormats, c.eqb.ResultFormats).Read() + c.eqb.reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. + return result.CommandTag, result.Err +} + +type unknownArgumentTypeQueryExecModeExecError struct { + arg any +} + +func (e *unknownArgumentTypeQueryExecModeExecError) Error() string { + return fmt.Sprintf("cannot use unregistered type %T as query argument in QueryExecModeExec", e.arg) +} + +func (c *Conn) execSQLParams(ctx context.Context, sql string, args []any) (pgconn.CommandTag, error) { + err := c.eqb.Build(c.typeMap, nil, args) + if err != nil { + return pgconn.CommandTag{}, err + } + + result := c.pgConn.ExecParams(ctx, sql, c.eqb.ParamValues, nil, c.eqb.ParamFormats, c.eqb.ResultFormats).Read() + c.eqb.reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. + return result.CommandTag, result.Err +} + +func (c *Conn) getRows(ctx context.Context, sql string, args []any) *baseRows { + r := &baseRows{} + + r.ctx = ctx + r.queryTracer = c.queryTracer + r.typeMap = c.typeMap + r.startTime = time.Now() + r.sql = sql + r.args = args + r.conn = c + + return r +} + +type QueryExecMode int32 + +const ( + _ QueryExecMode = iota + + // Automatically prepare and cache statements. This uses the extended protocol. Queries are executed in a single round + // trip after the statement is cached. This is the default. If the database schema is modified or the search_path is + // changed after a statement is cached then the first execution of a previously cached query may fail. e.g. If the + // number of columns returned by a "SELECT *" changes or the type of a column is changed. + QueryExecModeCacheStatement + + // Cache statement descriptions (i.e. argument and result types) and assume they do not change. This uses the extended + // protocol. Queries are executed in a single round trip after the description is cached. If the database schema is + // modified or the search_path is changed after a statement is cached then the first execution of a previously cached + // query may fail. e.g. If the number of columns returned by a "SELECT *" changes or the type of a column is changed. + QueryExecModeCacheDescribe + + // Get the statement description on every execution. This uses the extended protocol. Queries require two round trips + // to execute. It does not use named prepared statements. But it does use the unnamed prepared statement to get the + // statement description on the first round trip and then uses it to execute the query on the second round trip. This + // may cause problems with connection poolers that switch the underlying connection between round trips. It is safe + // even when the database schema is modified concurrently. + QueryExecModeDescribeExec + + // Assume the PostgreSQL query parameter types based on the Go type of the arguments. This uses the extended protocol + // with text formatted parameters and results. Queries are executed in a single round trip. Type mappings can be + // registered with pgtype.Map.RegisterDefaultPgType. Queries will be rejected that have arguments that are + // unregistered or ambiguous. e.g. A map[string]string may have the PostgreSQL type json or hstore. Modes that know + // the PostgreSQL type can use a map[string]string directly as an argument. This mode cannot. + QueryExecModeExec + + // Use the simple protocol. Assume the PostgreSQL query parameter types based on the Go type of the arguments. + // Queries are executed in a single round trip. Type mappings can be registered with + // pgtype.Map.RegisterDefaultPgType. Queries will be rejected that have arguments that are unregistered or ambiguous. + // e.g. A map[string]string may have the PostgreSQL type json or hstore. Modes that know the PostgreSQL type can use + // a map[string]string directly as an argument. This mode cannot. + // + // QueryExecModeSimpleProtocol should have the user application visible behavior as QueryExecModeExec with minor + // exceptions such as behavior when multiple result returning queries are erroneously sent in a single string. + // + // QueryExecModeSimpleProtocol uses client side parameter interpolation. All values are quoted and escaped. Prefer + // QueryExecModeExec over QueryExecModeSimpleProtocol whenever possible. In general QueryExecModeSimpleProtocol + // should only be used if connecting to a proxy server, connection pool server, or non-PostgreSQL server that does + // not support the extended protocol. + QueryExecModeSimpleProtocol +) + +func (m QueryExecMode) String() string { + switch m { + case QueryExecModeCacheStatement: + return "cache statement" + case QueryExecModeCacheDescribe: + return "cache describe" + case QueryExecModeDescribeExec: + return "describe exec" + case QueryExecModeExec: + return "exec" + case QueryExecModeSimpleProtocol: + return "simple protocol" + default: + return "invalid" + } +} + +// QueryResultFormats controls the result format (text=0, binary=1) of a query by result column position. +type QueryResultFormats []int16 + +// QueryResultFormatsByOID controls the result format (text=0, binary=1) of a query by the result column OID. +type QueryResultFormatsByOID map[uint32]int16 + +// QueryRewriter rewrites a query when used as the first arguments to a query method. +type QueryRewriter interface { + RewriteQuery(ctx context.Context, conn *Conn, sql string, args []any) (newSQL string, newArgs []any, err error) +} + +// Query sends a query to the server and returns a Rows to read the results. Only errors encountered sending the query +// and initializing Rows will be returned. Err() on the returned Rows must be checked after the Rows is closed to +// determine if the query executed successfully. +// +// The returned Rows must be closed before the connection can be used again. It is safe to attempt to read from the +// returned Rows even if an error is returned. The error will be the available in rows.Err() after rows are closed. It +// is allowed to ignore the error returned from Query and handle it in Rows. +// +// It is possible for a call of FieldDescriptions on the returned Rows to return nil even if the Query call did not +// return an error. +// +// It is possible for a query to return one or more rows before encountering an error. In most cases the rows should be +// collected before processing rather than processed while receiving each row. This avoids the possibility of the +// application processing rows from a query that the server rejected. The CollectRows function is useful here. +// +// An implementor of QueryRewriter may be passed as the first element of args. It can rewrite the sql and change or +// replace args. For example, NamedArgs is QueryRewriter that implements named arguments. +// +// For extra control over how the query is executed, the types QueryExecMode, QueryResultFormats, and +// QueryResultFormatsByOID may be used as the first args to control exactly how the query is executed. This is rarely +// needed. See the documentation for those types for details. +func (c *Conn) Query(ctx context.Context, sql string, args ...any) (Rows, error) { + if c.queryTracer != nil { + ctx = c.queryTracer.TraceQueryStart(ctx, c, TraceQueryStartData{SQL: sql, Args: args}) + } + + if err := c.deallocateInvalidatedCachedStatements(ctx); err != nil { + if c.queryTracer != nil { + c.queryTracer.TraceQueryEnd(ctx, c, TraceQueryEndData{Err: err}) + } + return &baseRows{err: err, closed: true}, err + } + + var resultFormats QueryResultFormats + var resultFormatsByOID QueryResultFormatsByOID + mode := c.config.DefaultQueryExecMode + var queryRewriter QueryRewriter + +optionLoop: + for len(args) > 0 { + switch arg := args[0].(type) { + case QueryResultFormats: + resultFormats = arg + args = args[1:] + case QueryResultFormatsByOID: + resultFormatsByOID = arg + args = args[1:] + case QueryExecMode: + mode = arg + args = args[1:] + case QueryRewriter: + queryRewriter = arg + args = args[1:] + default: + break optionLoop + } + } + + if queryRewriter != nil { + var err error + originalSQL := sql + originalArgs := args + sql, args, err = queryRewriter.RewriteQuery(ctx, c, sql, args) + if err != nil { + rows := c.getRows(ctx, originalSQL, originalArgs) + err = fmt.Errorf("rewrite query failed: %w", err) + rows.fatal(err) + return rows, err + } + } + + // Bypass any statement caching. + if sql == "" { + mode = QueryExecModeSimpleProtocol + } + + c.eqb.reset() + rows := c.getRows(ctx, sql, args) + + var err error + sd, explicitPreparedStatement := c.preparedStatements[sql] + if sd != nil || mode == QueryExecModeCacheStatement || mode == QueryExecModeCacheDescribe || mode == QueryExecModeDescribeExec { + if sd == nil { + sd, err = c.getStatementDescription(ctx, mode, sql) + if err != nil { + rows.fatal(err) + return rows, err + } + } + + if len(sd.ParamOIDs) != len(args) { + rows.fatal(fmt.Errorf("expected %d arguments, got %d", len(sd.ParamOIDs), len(args))) + return rows, rows.err + } + + rows.sql = sd.SQL + + err = c.eqb.Build(c.typeMap, sd, args) + if err != nil { + rows.fatal(err) + return rows, rows.err + } + + if resultFormatsByOID != nil { + resultFormats = make([]int16, len(sd.Fields)) + for i := range resultFormats { + resultFormats[i] = resultFormatsByOID[uint32(sd.Fields[i].DataTypeOID)] + } + } + + if resultFormats == nil { + resultFormats = c.eqb.ResultFormats + } + + if !explicitPreparedStatement && mode == QueryExecModeCacheDescribe { + rows.resultReader = c.pgConn.ExecParams(ctx, sql, c.eqb.ParamValues, sd.ParamOIDs, c.eqb.ParamFormats, resultFormats) + } else { + rows.resultReader = c.pgConn.ExecPrepared(ctx, sd.Name, c.eqb.ParamValues, c.eqb.ParamFormats, resultFormats) + } + } else if mode == QueryExecModeExec { + err := c.eqb.Build(c.typeMap, nil, args) + if err != nil { + rows.fatal(err) + return rows, rows.err + } + + rows.resultReader = c.pgConn.ExecParams(ctx, sql, c.eqb.ParamValues, nil, c.eqb.ParamFormats, c.eqb.ResultFormats) + } else if mode == QueryExecModeSimpleProtocol { + sql, err = c.sanitizeForSimpleQuery(sql, args...) + if err != nil { + rows.fatal(err) + return rows, err + } + + mrr := c.pgConn.Exec(ctx, sql) + if mrr.NextResult() { + rows.resultReader = mrr.ResultReader() + rows.multiResultReader = mrr + } else { + err = mrr.Close() + rows.fatal(err) + return rows, err + } + + return rows, nil + } else { + err = fmt.Errorf("unknown QueryExecMode: %v", mode) + rows.fatal(err) + return rows, rows.err + } + + c.eqb.reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. + + return rows, rows.err +} + +// getStatementDescription returns the statement description of the sql query +// according to the given mode. +// +// If the mode is one that doesn't require to know the param and result OIDs +// then nil is returned without error. +func (c *Conn) getStatementDescription( + ctx context.Context, + mode QueryExecMode, + sql string, +) (sd *pgconn.StatementDescription, err error) { + switch mode { + case QueryExecModeCacheStatement: + if c.statementCache == nil { + return nil, errDisabledStatementCache + } + sd = c.statementCache.Get(sql) + if sd == nil { + sd, err = c.Prepare(ctx, stmtcache.StatementName(sql), sql) + if err != nil { + return nil, err + } + c.statementCache.Put(sd) + } + case QueryExecModeCacheDescribe: + if c.descriptionCache == nil { + return nil, errDisabledDescriptionCache + } + sd = c.descriptionCache.Get(sql) + if sd == nil { + sd, err = c.Prepare(ctx, "", sql) + if err != nil { + return nil, err + } + c.descriptionCache.Put(sd) + } + case QueryExecModeDescribeExec: + return c.Prepare(ctx, "", sql) + } + return sd, err +} + +// QueryRow is a convenience wrapper over Query. Any error that occurs while +// querying is deferred until calling Scan on the returned Row. That Row will +// error with ErrNoRows if no rows are returned. +func (c *Conn) QueryRow(ctx context.Context, sql string, args ...any) Row { + rows, _ := c.Query(ctx, sql, args...) + return (*connRow)(rows.(*baseRows)) +} + +// SendBatch sends all queued queries to the server at once. All queries are run in an implicit transaction unless +// explicit transaction control statements are executed. The returned BatchResults must be closed before the connection +// is used again. +func (c *Conn) SendBatch(ctx context.Context, b *Batch) (br BatchResults) { + if c.batchTracer != nil { + ctx = c.batchTracer.TraceBatchStart(ctx, c, TraceBatchStartData{Batch: b}) + defer func() { + err := br.(interface{ earlyError() error }).earlyError() + if err != nil { + c.batchTracer.TraceBatchEnd(ctx, c, TraceBatchEndData{Err: err}) + } + }() + } + + if err := c.deallocateInvalidatedCachedStatements(ctx); err != nil { + return &batchResults{ctx: ctx, conn: c, err: err} + } + + for _, bi := range b.QueuedQueries { + var queryRewriter QueryRewriter + sql := bi.SQL + arguments := bi.Arguments + + optionLoop: + for len(arguments) > 0 { + // Update Batch.Queue function comment when additional options are implemented + switch arg := arguments[0].(type) { + case QueryRewriter: + queryRewriter = arg + arguments = arguments[1:] + default: + break optionLoop + } + } + + if queryRewriter != nil { + var err error + sql, arguments, err = queryRewriter.RewriteQuery(ctx, c, sql, arguments) + if err != nil { + return &batchResults{ctx: ctx, conn: c, err: fmt.Errorf("rewrite query failed: %w", err)} + } + } + + bi.SQL = sql + bi.Arguments = arguments + } + + // TODO: changing mode per batch? Update Batch.Queue function comment when implemented + mode := c.config.DefaultQueryExecMode + if mode == QueryExecModeSimpleProtocol { + return c.sendBatchQueryExecModeSimpleProtocol(ctx, b) + } + + // All other modes use extended protocol and thus can use prepared statements. + for _, bi := range b.QueuedQueries { + if sd, ok := c.preparedStatements[bi.SQL]; ok { + bi.sd = sd + } + } + + switch mode { + case QueryExecModeExec: + return c.sendBatchQueryExecModeExec(ctx, b) + case QueryExecModeCacheStatement: + return c.sendBatchQueryExecModeCacheStatement(ctx, b) + case QueryExecModeCacheDescribe: + return c.sendBatchQueryExecModeCacheDescribe(ctx, b) + case QueryExecModeDescribeExec: + return c.sendBatchQueryExecModeDescribeExec(ctx, b) + default: + panic("unknown QueryExecMode") + } +} + +func (c *Conn) sendBatchQueryExecModeSimpleProtocol(ctx context.Context, b *Batch) *batchResults { + var sb strings.Builder + for i, bi := range b.QueuedQueries { + if i > 0 { + sb.WriteByte(';') + } + sql, err := c.sanitizeForSimpleQuery(bi.SQL, bi.Arguments...) + if err != nil { + return &batchResults{ctx: ctx, conn: c, err: err} + } + sb.WriteString(sql) + } + mrr := c.pgConn.Exec(ctx, sb.String()) + return &batchResults{ + ctx: ctx, + conn: c, + mrr: mrr, + b: b, + qqIdx: 0, + } +} + +func (c *Conn) sendBatchQueryExecModeExec(ctx context.Context, b *Batch) *batchResults { + batch := &pgconn.Batch{} + + for _, bi := range b.QueuedQueries { + sd := bi.sd + if sd != nil { + err := c.eqb.Build(c.typeMap, sd, bi.Arguments) + if err != nil { + return &batchResults{ctx: ctx, conn: c, err: err} + } + + batch.ExecPrepared(sd.Name, c.eqb.ParamValues, c.eqb.ParamFormats, c.eqb.ResultFormats) + } else { + err := c.eqb.Build(c.typeMap, nil, bi.Arguments) + if err != nil { + return &batchResults{ctx: ctx, conn: c, err: err} + } + batch.ExecParams(bi.SQL, c.eqb.ParamValues, nil, c.eqb.ParamFormats, c.eqb.ResultFormats) + } + } + + c.eqb.reset() // Allow c.eqb internal memory to be GC'ed as soon as possible. + + mrr := c.pgConn.ExecBatch(ctx, batch) + + return &batchResults{ + ctx: ctx, + conn: c, + mrr: mrr, + b: b, + qqIdx: 0, + } +} + +func (c *Conn) sendBatchQueryExecModeCacheStatement(ctx context.Context, b *Batch) (pbr *pipelineBatchResults) { + if c.statementCache == nil { + return &pipelineBatchResults{ctx: ctx, conn: c, err: errDisabledStatementCache, closed: true} + } + + distinctNewQueries := []*pgconn.StatementDescription{} + distinctNewQueriesIdxMap := make(map[string]int) + + for _, bi := range b.QueuedQueries { + if bi.sd == nil { + sd := c.statementCache.Get(bi.SQL) + if sd != nil { + bi.sd = sd + } else { + if idx, present := distinctNewQueriesIdxMap[bi.SQL]; present { + bi.sd = distinctNewQueries[idx] + } else { + sd = &pgconn.StatementDescription{ + Name: stmtcache.StatementName(bi.SQL), + SQL: bi.SQL, + } + distinctNewQueriesIdxMap[sd.SQL] = len(distinctNewQueries) + distinctNewQueries = append(distinctNewQueries, sd) + bi.sd = sd + } + } + } + } + + return c.sendBatchExtendedWithDescription(ctx, b, distinctNewQueries, c.statementCache) +} + +func (c *Conn) sendBatchQueryExecModeCacheDescribe(ctx context.Context, b *Batch) (pbr *pipelineBatchResults) { + if c.descriptionCache == nil { + return &pipelineBatchResults{ctx: ctx, conn: c, err: errDisabledDescriptionCache, closed: true} + } + + distinctNewQueries := []*pgconn.StatementDescription{} + distinctNewQueriesIdxMap := make(map[string]int) + + for _, bi := range b.QueuedQueries { + if bi.sd == nil { + sd := c.descriptionCache.Get(bi.SQL) + if sd != nil { + bi.sd = sd + } else { + if idx, present := distinctNewQueriesIdxMap[bi.SQL]; present { + bi.sd = distinctNewQueries[idx] + } else { + sd = &pgconn.StatementDescription{ + SQL: bi.SQL, + } + distinctNewQueriesIdxMap[sd.SQL] = len(distinctNewQueries) + distinctNewQueries = append(distinctNewQueries, sd) + bi.sd = sd + } + } + } + } + + return c.sendBatchExtendedWithDescription(ctx, b, distinctNewQueries, c.descriptionCache) +} + +func (c *Conn) sendBatchQueryExecModeDescribeExec(ctx context.Context, b *Batch) (pbr *pipelineBatchResults) { + distinctNewQueries := []*pgconn.StatementDescription{} + distinctNewQueriesIdxMap := make(map[string]int) + + for _, bi := range b.QueuedQueries { + if bi.sd == nil { + if idx, present := distinctNewQueriesIdxMap[bi.SQL]; present { + bi.sd = distinctNewQueries[idx] + } else { + sd := &pgconn.StatementDescription{ + SQL: bi.SQL, + } + distinctNewQueriesIdxMap[sd.SQL] = len(distinctNewQueries) + distinctNewQueries = append(distinctNewQueries, sd) + bi.sd = sd + } + } + } + + return c.sendBatchExtendedWithDescription(ctx, b, distinctNewQueries, nil) +} + +func (c *Conn) sendBatchExtendedWithDescription(ctx context.Context, b *Batch, distinctNewQueries []*pgconn.StatementDescription, sdCache stmtcache.Cache) (pbr *pipelineBatchResults) { + pipeline := c.pgConn.StartPipeline(ctx) + defer func() { + if pbr != nil && pbr.err != nil { + pipeline.Close() + } + }() + + // Prepare any needed queries + if len(distinctNewQueries) > 0 { + for _, sd := range distinctNewQueries { + pipeline.SendPrepare(sd.Name, sd.SQL, nil) + } + + err := pipeline.Sync() + if err != nil { + return &pipelineBatchResults{ctx: ctx, conn: c, err: err, closed: true} + } + + for _, sd := range distinctNewQueries { + results, err := pipeline.GetResults() + if err != nil { + return &pipelineBatchResults{ctx: ctx, conn: c, err: err, closed: true} + } + + resultSD, ok := results.(*pgconn.StatementDescription) + if !ok { + return &pipelineBatchResults{ctx: ctx, conn: c, err: fmt.Errorf("expected statement description, got %T", results), closed: true} + } + + // Fill in the previously empty / pending statement descriptions. + sd.ParamOIDs = resultSD.ParamOIDs + sd.Fields = resultSD.Fields + } + + results, err := pipeline.GetResults() + if err != nil { + return &pipelineBatchResults{ctx: ctx, conn: c, err: err, closed: true} + } + + _, ok := results.(*pgconn.PipelineSync) + if !ok { + return &pipelineBatchResults{ctx: ctx, conn: c, err: fmt.Errorf("expected sync, got %T", results), closed: true} + } + } + + // Put all statements into the cache. It's fine if it overflows because HandleInvalidated will clean them up later. + if sdCache != nil { + for _, sd := range distinctNewQueries { + sdCache.Put(sd) + } + } + + // Queue the queries. + for _, bi := range b.QueuedQueries { + err := c.eqb.Build(c.typeMap, bi.sd, bi.Arguments) + if err != nil { + // we wrap the error so we the user can understand which query failed inside the batch + err = fmt.Errorf("error building query %s: %w", bi.SQL, err) + return &pipelineBatchResults{ctx: ctx, conn: c, err: err, closed: true} + } + + if bi.sd.Name == "" { + pipeline.SendQueryParams(bi.sd.SQL, c.eqb.ParamValues, bi.sd.ParamOIDs, c.eqb.ParamFormats, c.eqb.ResultFormats) + } else { + pipeline.SendQueryPrepared(bi.sd.Name, c.eqb.ParamValues, c.eqb.ParamFormats, c.eqb.ResultFormats) + } + } + + err := pipeline.Sync() + if err != nil { + return &pipelineBatchResults{ctx: ctx, conn: c, err: err, closed: true} + } + + return &pipelineBatchResults{ + ctx: ctx, + conn: c, + pipeline: pipeline, + b: b, + } +} + +func (c *Conn) sanitizeForSimpleQuery(sql string, args ...any) (string, error) { + if c.pgConn.ParameterStatus("standard_conforming_strings") != "on" { + return "", errors.New("simple protocol queries must be run with standard_conforming_strings=on") + } + + if c.pgConn.ParameterStatus("client_encoding") != "UTF8" { + return "", errors.New("simple protocol queries must be run with client_encoding=UTF8") + } + + var err error + valueArgs := make([]any, len(args)) + for i, a := range args { + valueArgs[i], err = convertSimpleArgument(c.typeMap, a) + if err != nil { + return "", err + } + } + + return sanitize.SanitizeSQL(sql, valueArgs...) +} + +// LoadType inspects the database for typeName and produces a pgtype.Type suitable for registration. typeName must be +// the name of a type where the underlying type(s) is already understood by pgx. It is for derived types. In particular, +// typeName must be one of the following: +// - An array type name of a type that is already registered. e.g. "_foo" when "foo" is registered. +// - A composite type name where all field types are already registered. +// - A domain type name where the base type is already registered. +// - An enum type name. +// - A range type name where the element type is already registered. +// - A multirange type name where the element type is already registered. +func (c *Conn) LoadType(ctx context.Context, typeName string) (*pgtype.Type, error) { + var oid uint32 + + err := c.QueryRow(ctx, "select $1::text::regtype::oid;", typeName).Scan(&oid) + if err != nil { + return nil, err + } + + var typtype string + var typbasetype uint32 + + err = c.QueryRow(ctx, "select typtype::text, typbasetype from pg_type where oid=$1", oid).Scan(&typtype, &typbasetype) + if err != nil { + return nil, err + } + + switch typtype { + case "b": // array + elementOID, err := c.getArrayElementOID(ctx, oid) + if err != nil { + return nil, err + } + + dt, ok := c.TypeMap().TypeForOID(elementOID) + if !ok { + return nil, errors.New("array element OID not registered") + } + + return &pgtype.Type{Name: typeName, OID: oid, Codec: &pgtype.ArrayCodec{ElementType: dt}}, nil + case "c": // composite + fields, err := c.getCompositeFields(ctx, oid) + if err != nil { + return nil, err + } + + return &pgtype.Type{Name: typeName, OID: oid, Codec: &pgtype.CompositeCodec{Fields: fields}}, nil + case "d": // domain + dt, ok := c.TypeMap().TypeForOID(typbasetype) + if !ok { + return nil, errors.New("domain base type OID not registered") + } + + return &pgtype.Type{Name: typeName, OID: oid, Codec: dt.Codec}, nil + case "e": // enum + return &pgtype.Type{Name: typeName, OID: oid, Codec: &pgtype.EnumCodec{}}, nil + case "r": // range + elementOID, err := c.getRangeElementOID(ctx, oid) + if err != nil { + return nil, err + } + + dt, ok := c.TypeMap().TypeForOID(elementOID) + if !ok { + return nil, errors.New("range element OID not registered") + } + + return &pgtype.Type{Name: typeName, OID: oid, Codec: &pgtype.RangeCodec{ElementType: dt}}, nil + case "m": // multirange + elementOID, err := c.getMultiRangeElementOID(ctx, oid) + if err != nil { + return nil, err + } + + dt, ok := c.TypeMap().TypeForOID(elementOID) + if !ok { + return nil, errors.New("multirange element OID not registered") + } + + return &pgtype.Type{Name: typeName, OID: oid, Codec: &pgtype.MultirangeCodec{ElementType: dt}}, nil + default: + return &pgtype.Type{}, errors.New("unknown typtype") + } +} + +func (c *Conn) getArrayElementOID(ctx context.Context, oid uint32) (uint32, error) { + var typelem uint32 + + err := c.QueryRow(ctx, "select typelem from pg_type where oid=$1", oid).Scan(&typelem) + if err != nil { + return 0, err + } + + return typelem, nil +} + +func (c *Conn) getRangeElementOID(ctx context.Context, oid uint32) (uint32, error) { + var typelem uint32 + + err := c.QueryRow(ctx, "select rngsubtype from pg_range where rngtypid=$1", oid).Scan(&typelem) + if err != nil { + return 0, err + } + + return typelem, nil +} + +func (c *Conn) getMultiRangeElementOID(ctx context.Context, oid uint32) (uint32, error) { + var typelem uint32 + + err := c.QueryRow(ctx, "select rngtypid from pg_range where rngmultitypid=$1", oid).Scan(&typelem) + if err != nil { + return 0, err + } + + return typelem, nil +} + +func (c *Conn) getCompositeFields(ctx context.Context, oid uint32) ([]pgtype.CompositeCodecField, error) { + var typrelid uint32 + + err := c.QueryRow(ctx, "select typrelid from pg_type where oid=$1", oid).Scan(&typrelid) + if err != nil { + return nil, err + } + + var fields []pgtype.CompositeCodecField + var fieldName string + var fieldOID uint32 + rows, _ := c.Query(ctx, `select attname, atttypid +from pg_attribute +where attrelid=$1 + and not attisdropped + and attnum > 0 +order by attnum`, + typrelid, + ) + _, err = ForEachRow(rows, []any{&fieldName, &fieldOID}, func() error { + dt, ok := c.TypeMap().TypeForOID(fieldOID) + if !ok { + return fmt.Errorf("unknown composite type field OID: %v", fieldOID) + } + fields = append(fields, pgtype.CompositeCodecField{Name: fieldName, Type: dt}) + return nil + }) + if err != nil { + return nil, err + } + + return fields, nil +} + +func (c *Conn) deallocateInvalidatedCachedStatements(ctx context.Context) error { + if txStatus := c.pgConn.TxStatus(); txStatus != 'I' && txStatus != 'T' { + return nil + } + + if c.descriptionCache != nil { + c.descriptionCache.RemoveInvalidated() + } + + var invalidatedStatements []*pgconn.StatementDescription + if c.statementCache != nil { + invalidatedStatements = c.statementCache.GetInvalidated() + } + + if len(invalidatedStatements) == 0 { + return nil + } + + pipeline := c.pgConn.StartPipeline(ctx) + defer pipeline.Close() + + for _, sd := range invalidatedStatements { + pipeline.SendDeallocate(sd.Name) + } + + err := pipeline.Sync() + if err != nil { + return fmt.Errorf("failed to deallocate cached statement(s): %w", err) + } + + err = pipeline.Close() + if err != nil { + return fmt.Errorf("failed to deallocate cached statement(s): %w", err) + } + + c.statementCache.RemoveInvalidated() + for _, sd := range invalidatedStatements { + delete(c.preparedStatements, sd.Name) + } + + return nil +} diff --git a/vendor/github.com/jackc/pgx/v4/copy_from.go b/vendor/github.com/jackc/pgx/v5/copy_from.go similarity index 51% rename from vendor/github.com/jackc/pgx/v4/copy_from.go rename to vendor/github.com/jackc/pgx/v5/copy_from.go index 49139d05..abcd2239 100644 --- a/vendor/github.com/jackc/pgx/v4/copy_from.go +++ b/vendor/github.com/jackc/pgx/v5/copy_from.go @@ -5,20 +5,19 @@ import ( "context" "fmt" "io" - "time" - "github.com/jackc/pgconn" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" + "github.com/jackc/pgx/v5/pgconn" ) // CopyFromRows returns a CopyFromSource interface over the provided rows slice // making it usable by *Conn.CopyFrom. -func CopyFromRows(rows [][]interface{}) CopyFromSource { +func CopyFromRows(rows [][]any) CopyFromSource { return ©FromRows{rows: rows, idx: -1} } type copyFromRows struct { - rows [][]interface{} + rows [][]any idx int } @@ -27,7 +26,7 @@ func (ctr *copyFromRows) Next() bool { return ctr.idx < len(ctr.rows) } -func (ctr *copyFromRows) Values() ([]interface{}, error) { +func (ctr *copyFromRows) Values() ([]any, error) { return ctr.rows[ctr.idx], nil } @@ -37,12 +36,12 @@ func (ctr *copyFromRows) Err() error { // CopyFromSlice returns a CopyFromSource interface over a dynamic func // making it usable by *Conn.CopyFrom. -func CopyFromSlice(length int, next func(int) ([]interface{}, error)) CopyFromSource { +func CopyFromSlice(length int, next func(int) ([]any, error)) CopyFromSource { return ©FromSlice{next: next, idx: -1, len: length} } type copyFromSlice struct { - next func(int) ([]interface{}, error) + next func(int) ([]any, error) idx int len int err error @@ -53,7 +52,7 @@ func (cts *copyFromSlice) Next() bool { return cts.idx < cts.len } -func (cts *copyFromSlice) Values() ([]interface{}, error) { +func (cts *copyFromSlice) Values() ([]any, error) { values, err := cts.next(cts.idx) if err != nil { cts.err = err @@ -65,6 +64,33 @@ func (cts *copyFromSlice) Err() error { return cts.err } +// CopyFromFunc returns a CopyFromSource interface that relies on nxtf for values. +// nxtf returns rows until it either signals an 'end of data' by returning row=nil and err=nil, +// or it returns an error. If nxtf returns an error, the copy is aborted. +func CopyFromFunc(nxtf func() (row []any, err error)) CopyFromSource { + return ©FromFunc{next: nxtf} +} + +type copyFromFunc struct { + next func() ([]any, error) + valueRow []any + err error +} + +func (g *copyFromFunc) Next() bool { + g.valueRow, g.err = g.next() + // only return true if valueRow exists and no error + return g.valueRow != nil && g.err == nil +} + +func (g *copyFromFunc) Values() ([]any, error) { + return g.valueRow, g.err +} + +func (g *copyFromFunc) Err() error { + return g.err +} + // CopyFromSource is the interface used by *Conn.CopyFrom as the source for copy data. type CopyFromSource interface { // Next returns true if there is another row and makes the next row data @@ -73,7 +99,7 @@ type CopyFromSource interface { Next() bool // Values returns the values for the current row. - Values() ([]interface{}, error) + Values() ([]any, error) // Err returns any error that has been encountered by the CopyFromSource. If // this is not nil *Conn.CopyFrom will abort the copy. @@ -86,9 +112,17 @@ type copyFrom struct { columnNames []string rowSrc CopyFromSource readerErrChan chan error + mode QueryExecMode } func (ct *copyFrom) run(ctx context.Context) (int64, error) { + if ct.conn.copyFromTracer != nil { + ctx = ct.conn.copyFromTracer.TraceCopyFromStart(ctx, ct.conn, TraceCopyFromStartData{ + TableName: ct.tableName, + ColumnNames: ct.columnNames, + }) + } + quotedTableName := ct.tableName.Sanitize() cbuf := &bytes.Buffer{} for i, cn := range ct.columnNames { @@ -99,9 +133,29 @@ func (ct *copyFrom) run(ctx context.Context) (int64, error) { } quotedColumnNames := cbuf.String() - sd, err := ct.conn.Prepare(ctx, "", fmt.Sprintf("select %s from %s", quotedColumnNames, quotedTableName)) - if err != nil { - return 0, err + var sd *pgconn.StatementDescription + switch ct.mode { + case QueryExecModeExec, QueryExecModeSimpleProtocol: + // These modes don't support the binary format. Before the inclusion of the + // QueryExecModes, Conn.Prepare was called on every COPY operation to get + // the OIDs. These prepared statements were not cached. + // + // Since that's the same behavior provided by QueryExecModeDescribeExec, + // we'll default to that mode. + ct.mode = QueryExecModeDescribeExec + fallthrough + case QueryExecModeCacheStatement, QueryExecModeCacheDescribe, QueryExecModeDescribeExec: + var err error + sd, err = ct.conn.getStatementDescription( + ctx, + ct.mode, + fmt.Sprintf("select %s from %s", quotedColumnNames, quotedTableName), + ) + if err != nil { + return 0, fmt.Errorf("statement description failed: %w", err) + } + default: + return 0, fmt.Errorf("unknown QueryExecMode: %v", ct.mode) } r, w := io.Pipe() @@ -145,29 +199,29 @@ func (ct *copyFrom) run(ctx context.Context) (int64, error) { w.Close() }() - startTime := time.Now() - commandTag, err := ct.conn.pgConn.CopyFrom(ctx, r, fmt.Sprintf("copy %s ( %s ) from stdin binary;", quotedTableName, quotedColumnNames)) r.Close() <-doneChan - rowsAffected := commandTag.RowsAffected() - endTime := time.Now() - if err == nil { - if ct.conn.shouldLog(LogLevelInfo) { - ct.conn.log(ctx, LogLevelInfo, "CopyFrom", map[string]interface{}{"tableName": ct.tableName, "columnNames": ct.columnNames, "time": endTime.Sub(startTime), "rowCount": rowsAffected}) - } - } else if ct.conn.shouldLog(LogLevelError) { - ct.conn.log(ctx, LogLevelError, "CopyFrom", map[string]interface{}{"err": err, "tableName": ct.tableName, "columnNames": ct.columnNames, "time": endTime.Sub(startTime)}) + if ct.conn.copyFromTracer != nil { + ct.conn.copyFromTracer.TraceCopyFromEnd(ctx, ct.conn, TraceCopyFromEndData{ + CommandTag: commandTag, + Err: err, + }) } - return rowsAffected, err + return commandTag.RowsAffected(), err } func (ct *copyFrom) buildCopyBuf(buf []byte, sd *pgconn.StatementDescription) (bool, []byte, error) { + const sendBufSize = 65536 - 5 // The packet has a 5-byte header + lastBufLen := 0 + largestRowLen := 0 for ct.rowSrc.Next() { + lastBufLen = len(buf) + values, err := ct.rowSrc.Values() if err != nil { return false, nil, err @@ -178,13 +232,21 @@ func (ct *copyFrom) buildCopyBuf(buf []byte, sd *pgconn.StatementDescription) (b buf = pgio.AppendInt16(buf, int16(len(ct.columnNames))) for i, val := range values { - buf, err = encodePreparedStatementArgument(ct.conn.connInfo, buf, sd.Fields[i].DataTypeOID, val) + buf, err = encodeCopyValue(ct.conn.typeMap, buf, sd.Fields[i].DataTypeOID, val) if err != nil { return false, nil, err } } - if len(buf) > 65536 { + rowLen := len(buf) - lastBufLen + if rowLen > largestRowLen { + largestRowLen = rowLen + } + + // Try not to overflow size of the buffer PgConn.CopyFrom will be reading into. If that happens then the nature of + // io.Pipe means that the next Read will be short. This can lead to pathological send sizes such as 65531, 13, 65531 + // 13, 65531, 13, 65531, 13. + if len(buf) > sendBufSize-largestRowLen { return true, buf, nil } } @@ -192,12 +254,14 @@ func (ct *copyFrom) buildCopyBuf(buf []byte, sd *pgconn.StatementDescription) (b return false, buf, nil } -// CopyFrom uses the PostgreSQL copy protocol to perform bulk data insertion. -// It returns the number of rows copied and an error. +// CopyFrom uses the PostgreSQL copy protocol to perform bulk data insertion. It returns the number of rows copied and +// an error. +// +// CopyFrom requires all values use the binary format. A pgtype.Type that supports the binary format must be registered +// for the type of each column. Almost all types implemented by pgx support the binary format. // -// CopyFrom requires all values use the binary format. Almost all types -// implemented by pgx use the binary format by default. Types implementing -// Encoder can only be used if they encode to the binary format. +// Even though enum types appear to be strings they still must be registered to use with CopyFrom. This can be done with +// Conn.LoadType and pgtype.Map.RegisterType. func (c *Conn) CopyFrom(ctx context.Context, tableName Identifier, columnNames []string, rowSrc CopyFromSource) (int64, error) { ct := ©From{ conn: c, @@ -205,6 +269,7 @@ func (c *Conn) CopyFrom(ctx context.Context, tableName Identifier, columnNames [ columnNames: columnNames, rowSrc: rowSrc, readerErrChan: make(chan error), + mode: c.config.DefaultQueryExecMode, } return ct.run(ctx) diff --git a/vendor/github.com/jackc/pgx/v5/derived_types.go b/vendor/github.com/jackc/pgx/v5/derived_types.go new file mode 100644 index 00000000..22ab069c --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/derived_types.go @@ -0,0 +1,262 @@ +package pgx + +import ( + "context" + "fmt" + "regexp" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/pgtype" +) + +/* +buildLoadDerivedTypesSQL generates the correct query for retrieving type information. + + pgVersion: the major version of the PostgreSQL server + typeNames: the names of the types to load. If nil, load all types. +*/ +func buildLoadDerivedTypesSQL(pgVersion int64, typeNames []string) string { + supportsMultirange := (pgVersion >= 14) + var typeNamesClause string + + if typeNames == nil { + // This should not occur; this will not return any types + typeNamesClause = "= ''" + } else { + typeNamesClause = "= ANY($1)" + } + parts := make([]string, 0, 10) + + // Each of the type names provided might be found in pg_class or pg_type. + // Additionally, it may or may not include a schema portion. + parts = append(parts, ` +WITH RECURSIVE +-- find the OIDs in pg_class which match one of the provided type names +selected_classes(oid,reltype) AS ( + -- this query uses the namespace search path, so will match type names without a schema prefix + SELECT pg_class.oid, pg_class.reltype + FROM pg_catalog.pg_class + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = pg_class.relnamespace + WHERE pg_catalog.pg_table_is_visible(pg_class.oid) + AND relname `, typeNamesClause, ` +UNION ALL + -- this query will only match type names which include the schema prefix + SELECT pg_class.oid, pg_class.reltype + FROM pg_class + INNER JOIN pg_namespace ON (pg_class.relnamespace = pg_namespace.oid) + WHERE nspname || '.' || relname `, typeNamesClause, ` +), +selected_types(oid) AS ( + -- collect the OIDs from pg_types which correspond to the selected classes + SELECT reltype AS oid + FROM selected_classes +UNION ALL + -- as well as any other type names which match our criteria + SELECT pg_type.oid + FROM pg_type + LEFT OUTER JOIN pg_namespace ON (pg_type.typnamespace = pg_namespace.oid) + WHERE typname `, typeNamesClause, ` + OR nspname || '.' || typname `, typeNamesClause, ` +), +-- this builds a parent/child mapping of objects, allowing us to know +-- all the child (ie: dependent) types that a parent (type) requires +-- As can be seen, there are 3 ways this can occur (the last of which +-- is due to being a composite class, where the composite fields are children) +pc(parent, child) AS ( + SELECT parent.oid, parent.typelem + FROM pg_type parent + WHERE parent.typtype = 'b' AND parent.typelem != 0 +UNION ALL + SELECT parent.oid, parent.typbasetype + FROM pg_type parent + WHERE parent.typtypmod = -1 AND parent.typbasetype != 0 +UNION ALL + SELECT pg_type.oid, atttypid + FROM pg_attribute + INNER JOIN pg_class ON (pg_class.oid = pg_attribute.attrelid) + INNER JOIN pg_type ON (pg_type.oid = pg_class.reltype) + WHERE NOT attisdropped + AND attnum > 0 +), +-- Now construct a recursive query which includes a 'depth' element. +-- This is used to ensure that the "youngest" children are registered before +-- their parents. +relationships(parent, child, depth) AS ( + SELECT DISTINCT 0::OID, selected_types.oid, 0 + FROM selected_types +UNION ALL + SELECT pg_type.oid AS parent, pg_attribute.atttypid AS child, 1 + FROM selected_classes c + inner join pg_type ON (c.reltype = pg_type.oid) + inner join pg_attribute on (c.oid = pg_attribute.attrelid) +UNION ALL + SELECT pc.parent, pc.child, relationships.depth + 1 + FROM pc + INNER JOIN relationships ON (pc.parent = relationships.child) +), +-- composite fields need to be encapsulated as a couple of arrays to provide the required information for registration +composite AS ( + SELECT pg_type.oid, ARRAY_AGG(attname ORDER BY attnum) AS attnames, ARRAY_AGG(atttypid ORDER BY ATTNUM) AS atttypids + FROM pg_attribute + INNER JOIN pg_class ON (pg_class.oid = pg_attribute.attrelid) + INNER JOIN pg_type ON (pg_type.oid = pg_class.reltype) + WHERE NOT attisdropped + AND attnum > 0 + GROUP BY pg_type.oid +) +-- Bring together this information, showing all the information which might possibly be required +-- to complete the registration, applying filters to only show the items which relate to the selected +-- types/classes. +SELECT typname, + pg_namespace.nspname, + typtype, + typbasetype, + typelem, + pg_type.oid,`) + if supportsMultirange { + parts = append(parts, ` + COALESCE(multirange.rngtypid, 0) AS rngtypid,`) + } else { + parts = append(parts, ` + 0 AS rngtypid,`) + } + parts = append(parts, ` + COALESCE(pg_range.rngsubtype, 0) AS rngsubtype, + attnames, atttypids + FROM relationships + INNER JOIN pg_type ON (pg_type.oid = relationships.child) + LEFT OUTER JOIN pg_range ON (pg_type.oid = pg_range.rngtypid)`) + if supportsMultirange { + parts = append(parts, ` + LEFT OUTER JOIN pg_range multirange ON (pg_type.oid = multirange.rngmultitypid)`) + } + + parts = append(parts, ` + LEFT OUTER JOIN composite USING (oid) + LEFT OUTER JOIN pg_namespace ON (pg_type.typnamespace = pg_namespace.oid) + WHERE NOT (typtype = 'b' AND typelem = 0)`) + parts = append(parts, ` + GROUP BY typname, pg_namespace.nspname, typtype, typbasetype, typelem, pg_type.oid, pg_range.rngsubtype,`) + if supportsMultirange { + parts = append(parts, ` + multirange.rngtypid,`) + } + parts = append(parts, ` + attnames, atttypids + ORDER BY MAX(depth) desc, typname;`) + return strings.Join(parts, "") +} + +type derivedTypeInfo struct { + Oid, Typbasetype, Typelem, Rngsubtype, Rngtypid uint32 + TypeName, Typtype, NspName string + Attnames []string + Atttypids []uint32 +} + +// LoadTypes performs a single (complex) query, returning all the required +// information to register the named types, as well as any other types directly +// or indirectly required to complete the registration. +// The result of this call can be passed into RegisterTypes to complete the process. +func (c *Conn) LoadTypes(ctx context.Context, typeNames []string) ([]*pgtype.Type, error) { + m := c.TypeMap() + if typeNames == nil || len(typeNames) == 0 { + return nil, fmt.Errorf("No type names were supplied.") + } + + // Disregard server version errors. This will result in + // the SQL not support recent structures such as multirange + serverVersion, _ := serverVersion(c) + sql := buildLoadDerivedTypesSQL(serverVersion, typeNames) + var rows Rows + var err error + if typeNames == nil { + rows, err = c.Query(ctx, sql, QueryExecModeSimpleProtocol) + } else { + rows, err = c.Query(ctx, sql, QueryExecModeSimpleProtocol, typeNames) + } + if err != nil { + return nil, fmt.Errorf("While generating load types query: %w", err) + } + defer rows.Close() + result := make([]*pgtype.Type, 0, 100) + for rows.Next() { + ti := derivedTypeInfo{} + err = rows.Scan(&ti.TypeName, &ti.NspName, &ti.Typtype, &ti.Typbasetype, &ti.Typelem, &ti.Oid, &ti.Rngtypid, &ti.Rngsubtype, &ti.Attnames, &ti.Atttypids) + if err != nil { + return nil, fmt.Errorf("While scanning type information: %w", err) + } + var type_ *pgtype.Type + switch ti.Typtype { + case "b": // array + dt, ok := m.TypeForOID(ti.Typelem) + if !ok { + return nil, fmt.Errorf("Array element OID %v not registered while loading pgtype %q", ti.Typelem, ti.TypeName) + } + type_ = &pgtype.Type{Name: ti.TypeName, OID: ti.Oid, Codec: &pgtype.ArrayCodec{ElementType: dt}} + case "c": // composite + var fields []pgtype.CompositeCodecField + for i, fieldName := range ti.Attnames { + dt, ok := m.TypeForOID(ti.Atttypids[i]) + if !ok { + return nil, fmt.Errorf("Unknown field for composite type %q: field %q (OID %v) is not already registered.", ti.TypeName, fieldName, ti.Atttypids[i]) + } + fields = append(fields, pgtype.CompositeCodecField{Name: fieldName, Type: dt}) + } + + type_ = &pgtype.Type{Name: ti.TypeName, OID: ti.Oid, Codec: &pgtype.CompositeCodec{Fields: fields}} + case "d": // domain + dt, ok := m.TypeForOID(ti.Typbasetype) + if !ok { + return nil, fmt.Errorf("Domain base type OID %v was not already registered, needed for %q", ti.Typbasetype, ti.TypeName) + } + + type_ = &pgtype.Type{Name: ti.TypeName, OID: ti.Oid, Codec: dt.Codec} + case "e": // enum + type_ = &pgtype.Type{Name: ti.TypeName, OID: ti.Oid, Codec: &pgtype.EnumCodec{}} + case "r": // range + dt, ok := m.TypeForOID(ti.Rngsubtype) + if !ok { + return nil, fmt.Errorf("Range element OID %v was not already registered, needed for %q", ti.Rngsubtype, ti.TypeName) + } + + type_ = &pgtype.Type{Name: ti.TypeName, OID: ti.Oid, Codec: &pgtype.RangeCodec{ElementType: dt}} + case "m": // multirange + dt, ok := m.TypeForOID(ti.Rngtypid) + if !ok { + return nil, fmt.Errorf("Multirange element OID %v was not already registered, needed for %q", ti.Rngtypid, ti.TypeName) + } + + type_ = &pgtype.Type{Name: ti.TypeName, OID: ti.Oid, Codec: &pgtype.MultirangeCodec{ElementType: dt}} + default: + return nil, fmt.Errorf("Unknown typtype %q was found while registering %q", ti.Typtype, ti.TypeName) + } + if type_ != nil { + m.RegisterType(type_) + if ti.NspName != "" { + nspType := &pgtype.Type{Name: ti.NspName + "." + type_.Name, OID: type_.OID, Codec: type_.Codec} + m.RegisterType(nspType) + result = append(result, nspType) + } + result = append(result, type_) + } + } + return result, nil +} + +// serverVersion returns the postgresql server version. +func serverVersion(c *Conn) (int64, error) { + serverVersionStr := c.PgConn().ParameterStatus("server_version") + serverVersionStr = regexp.MustCompile(`^[0-9]+`).FindString(serverVersionStr) + // if not PostgreSQL do nothing + if serverVersionStr == "" { + return 0, fmt.Errorf("Cannot identify server version in %q", serverVersionStr) + } + + version, err := strconv.ParseInt(serverVersionStr, 10, 64) + if err != nil { + return 0, fmt.Errorf("postgres version parsing failed: %w", err) + } + return version, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/doc.go b/vendor/github.com/jackc/pgx/v5/doc.go new file mode 100644 index 00000000..0e91d64e --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/doc.go @@ -0,0 +1,194 @@ +// Package pgx is a PostgreSQL database driver. +/* +pgx provides a native PostgreSQL driver and can act as a database/sql driver. The native PostgreSQL interface is similar +to the database/sql interface while providing better speed and access to PostgreSQL specific features. Use +github.com/jackc/pgx/v5/stdlib to use pgx as a database/sql compatible driver. See that package's documentation for +details. + +Establishing a Connection + +The primary way of establishing a connection is with [pgx.Connect]: + + conn, err := pgx.Connect(context.Background(), os.Getenv("DATABASE_URL")) + +The database connection string can be in URL or key/value format. Both PostgreSQL settings and pgx settings can be +specified here. In addition, a config struct can be created by [ParseConfig] and modified before establishing the +connection with [ConnectConfig] to configure settings such as tracing that cannot be configured with a connection +string. + +Connection Pool + +[*pgx.Conn] represents a single connection to the database and is not concurrency safe. Use package +github.com/jackc/pgx/v5/pgxpool for a concurrency safe connection pool. + +Query Interface + +pgx implements Query in the familiar database/sql style. However, pgx provides generic functions such as CollectRows and +ForEachRow that are a simpler and safer way of processing rows than manually calling defer rows.Close(), rows.Next(), +rows.Scan, and rows.Err(). + +CollectRows can be used collect all returned rows into a slice. + + rows, _ := conn.Query(context.Background(), "select generate_series(1,$1)", 5) + numbers, err := pgx.CollectRows(rows, pgx.RowTo[int32]) + if err != nil { + return err + } + // numbers => [1 2 3 4 5] + +ForEachRow can be used to execute a callback function for every row. This is often easier than iterating over rows +directly. + + var sum, n int32 + rows, _ := conn.Query(context.Background(), "select generate_series(1,$1)", 10) + _, err := pgx.ForEachRow(rows, []any{&n}, func() error { + sum += n + return nil + }) + if err != nil { + return err + } + +pgx also implements QueryRow in the same style as database/sql. + + var name string + var weight int64 + err := conn.QueryRow(context.Background(), "select name, weight from widgets where id=$1", 42).Scan(&name, &weight) + if err != nil { + return err + } + +Use Exec to execute a query that does not return a result set. + + commandTag, err := conn.Exec(context.Background(), "delete from widgets where id=$1", 42) + if err != nil { + return err + } + if commandTag.RowsAffected() != 1 { + return errors.New("No row found to delete") + } + +PostgreSQL Data Types + +pgx uses the pgtype package to converting Go values to and from PostgreSQL values. It supports many PostgreSQL types +directly and is customizable and extendable. User defined data types such as enums, domains, and composite types may +require type registration. See that package's documentation for details. + +Transactions + +Transactions are started by calling Begin. + + tx, err := conn.Begin(context.Background()) + if err != nil { + return err + } + // Rollback is safe to call even if the tx is already closed, so if + // the tx commits successfully, this is a no-op + defer tx.Rollback(context.Background()) + + _, err = tx.Exec(context.Background(), "insert into foo(id) values (1)") + if err != nil { + return err + } + + err = tx.Commit(context.Background()) + if err != nil { + return err + } + +The Tx returned from Begin also implements the Begin method. This can be used to implement pseudo nested transactions. +These are internally implemented with savepoints. + +Use BeginTx to control the transaction mode. BeginTx also can be used to ensure a new transaction is created instead of +a pseudo nested transaction. + +BeginFunc and BeginTxFunc are functions that begin a transaction, execute a function, and commit or rollback the +transaction depending on the return value of the function. These can be simpler and less error prone to use. + + err = pgx.BeginFunc(context.Background(), conn, func(tx pgx.Tx) error { + _, err := tx.Exec(context.Background(), "insert into foo(id) values (1)") + return err + }) + if err != nil { + return err + } + +Prepared Statements + +Prepared statements can be manually created with the Prepare method. However, this is rarely necessary because pgx +includes an automatic statement cache by default. Queries run through the normal Query, QueryRow, and Exec functions are +automatically prepared on first execution and the prepared statement is reused on subsequent executions. See ParseConfig +for information on how to customize or disable the statement cache. + +Copy Protocol + +Use CopyFrom to efficiently insert multiple rows at a time using the PostgreSQL copy protocol. CopyFrom accepts a +CopyFromSource interface. If the data is already in a [][]any use CopyFromRows to wrap it in a CopyFromSource interface. +Or implement CopyFromSource to avoid buffering the entire data set in memory. + + rows := [][]any{ + {"John", "Smith", int32(36)}, + {"Jane", "Doe", int32(29)}, + } + + copyCount, err := conn.CopyFrom( + context.Background(), + pgx.Identifier{"people"}, + []string{"first_name", "last_name", "age"}, + pgx.CopyFromRows(rows), + ) + +When you already have a typed array using CopyFromSlice can be more convenient. + + rows := []User{ + {"John", "Smith", 36}, + {"Jane", "Doe", 29}, + } + + copyCount, err := conn.CopyFrom( + context.Background(), + pgx.Identifier{"people"}, + []string{"first_name", "last_name", "age"}, + pgx.CopyFromSlice(len(rows), func(i int) ([]any, error) { + return []any{rows[i].FirstName, rows[i].LastName, rows[i].Age}, nil + }), + ) + +CopyFrom can be faster than an insert with as few as 5 rows. + +Listen and Notify + +pgx can listen to the PostgreSQL notification system with the `Conn.WaitForNotification` method. It blocks until a +notification is received or the context is canceled. + + _, err := conn.Exec(context.Background(), "listen channelname") + if err != nil { + return err + } + + notification, err := conn.WaitForNotification(context.Background()) + if err != nil { + return err + } + // do something with notification + + +Tracing and Logging + +pgx supports tracing by setting ConnConfig.Tracer. To combine several tracers you can use the multitracer.Tracer. + +In addition, the tracelog package provides the TraceLog type which lets a traditional logger act as a Tracer. + +For debug tracing of the actual PostgreSQL wire protocol messages see github.com/jackc/pgx/v5/pgproto3. + +Lower Level PostgreSQL Functionality + +github.com/jackc/pgx/v5/pgconn contains a lower level PostgreSQL driver roughly at the level of libpq. pgx.Conn in +implemented on top of pgconn. The Conn.PgConn() method can be used to access this lower layer. + +PgBouncer + +By default pgx automatically uses prepared statements. Prepared statements are incompatible with PgBouncer. This can be +disabled by setting a different QueryExecMode in ConnConfig.DefaultQueryExecMode. +*/ +package pgx diff --git a/vendor/github.com/jackc/pgx/v5/extended_query_builder.go b/vendor/github.com/jackc/pgx/v5/extended_query_builder.go new file mode 100644 index 00000000..526b0e95 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/extended_query_builder.go @@ -0,0 +1,146 @@ +package pgx + +import ( + "fmt" + + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/pgx/v5/pgtype" +) + +// ExtendedQueryBuilder is used to choose the parameter formats, to format the parameters and to choose the result +// formats for an extended query. +type ExtendedQueryBuilder struct { + ParamValues [][]byte + paramValueBytes []byte + ParamFormats []int16 + ResultFormats []int16 +} + +// Build sets ParamValues, ParamFormats, and ResultFormats for use with *PgConn.ExecParams or *PgConn.ExecPrepared. If +// sd is nil then QueryExecModeExec behavior will be used. +func (eqb *ExtendedQueryBuilder) Build(m *pgtype.Map, sd *pgconn.StatementDescription, args []any) error { + eqb.reset() + + if sd == nil { + for i := range args { + err := eqb.appendParam(m, 0, pgtype.TextFormatCode, args[i]) + if err != nil { + err = fmt.Errorf("failed to encode args[%d]: %w", i, err) + return err + } + } + return nil + } + + if len(sd.ParamOIDs) != len(args) { + return fmt.Errorf("mismatched param and argument count") + } + + for i := range args { + err := eqb.appendParam(m, sd.ParamOIDs[i], -1, args[i]) + if err != nil { + err = fmt.Errorf("failed to encode args[%d]: %w", i, err) + return err + } + } + + for i := range sd.Fields { + eqb.appendResultFormat(m.FormatCodeForOID(sd.Fields[i].DataTypeOID)) + } + + return nil +} + +// appendParam appends a parameter to the query. format may be -1 to automatically choose the format. If arg is nil it +// must be an untyped nil. +func (eqb *ExtendedQueryBuilder) appendParam(m *pgtype.Map, oid uint32, format int16, arg any) error { + if format == -1 { + preferredFormat := eqb.chooseParameterFormatCode(m, oid, arg) + preferredErr := eqb.appendParam(m, oid, preferredFormat, arg) + if preferredErr == nil { + return nil + } + + var otherFormat int16 + if preferredFormat == TextFormatCode { + otherFormat = BinaryFormatCode + } else { + otherFormat = TextFormatCode + } + + otherErr := eqb.appendParam(m, oid, otherFormat, arg) + if otherErr == nil { + return nil + } + + return preferredErr // return the error from the preferred format + } + + v, err := eqb.encodeExtendedParamValue(m, oid, format, arg) + if err != nil { + return err + } + + eqb.ParamFormats = append(eqb.ParamFormats, format) + eqb.ParamValues = append(eqb.ParamValues, v) + + return nil +} + +// appendResultFormat appends a result format to the query. +func (eqb *ExtendedQueryBuilder) appendResultFormat(format int16) { + eqb.ResultFormats = append(eqb.ResultFormats, format) +} + +// reset readies eqb to build another query. +func (eqb *ExtendedQueryBuilder) reset() { + eqb.ParamValues = eqb.ParamValues[0:0] + eqb.paramValueBytes = eqb.paramValueBytes[0:0] + eqb.ParamFormats = eqb.ParamFormats[0:0] + eqb.ResultFormats = eqb.ResultFormats[0:0] + + if cap(eqb.ParamValues) > 64 { + eqb.ParamValues = make([][]byte, 0, 64) + } + + if cap(eqb.paramValueBytes) > 256 { + eqb.paramValueBytes = make([]byte, 0, 256) + } + + if cap(eqb.ParamFormats) > 64 { + eqb.ParamFormats = make([]int16, 0, 64) + } + if cap(eqb.ResultFormats) > 64 { + eqb.ResultFormats = make([]int16, 0, 64) + } +} + +func (eqb *ExtendedQueryBuilder) encodeExtendedParamValue(m *pgtype.Map, oid uint32, formatCode int16, arg any) ([]byte, error) { + if eqb.paramValueBytes == nil { + eqb.paramValueBytes = make([]byte, 0, 128) + } + + pos := len(eqb.paramValueBytes) + + buf, err := m.Encode(oid, formatCode, arg, eqb.paramValueBytes) + if err != nil { + return nil, err + } + if buf == nil { + return nil, nil + } + eqb.paramValueBytes = buf + return eqb.paramValueBytes[pos:], nil +} + +// chooseParameterFormatCode determines the correct format code for an +// argument to a prepared statement. It defaults to TextFormatCode if no +// determination can be made. +func (eqb *ExtendedQueryBuilder) chooseParameterFormatCode(m *pgtype.Map, oid uint32, arg any) int16 { + switch arg.(type) { + case string, *string: + return TextFormatCode + } + + return m.FormatCodeForOID(oid) +} diff --git a/vendor/github.com/jackc/pgx/v5/internal/iobufpool/iobufpool.go b/vendor/github.com/jackc/pgx/v5/internal/iobufpool/iobufpool.go new file mode 100644 index 00000000..89e0c227 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/internal/iobufpool/iobufpool.go @@ -0,0 +1,70 @@ +// Package iobufpool implements a global segregated-fit pool of buffers for IO. +// +// It uses *[]byte instead of []byte to avoid the sync.Pool allocation with Put. Unfortunately, using a pointer to avoid +// an allocation is purposely not documented. https://github.com/golang/go/issues/16323 +package iobufpool + +import "sync" + +const minPoolExpOf2 = 8 + +var pools [18]*sync.Pool + +func init() { + for i := range pools { + bufLen := 1 << (minPoolExpOf2 + i) + pools[i] = &sync.Pool{ + New: func() any { + buf := make([]byte, bufLen) + return &buf + }, + } + } +} + +// Get gets a []byte of len size with cap <= size*2. +func Get(size int) *[]byte { + i := getPoolIdx(size) + if i >= len(pools) { + buf := make([]byte, size) + return &buf + } + + ptrBuf := (pools[i].Get().(*[]byte)) + *ptrBuf = (*ptrBuf)[:size] + + return ptrBuf +} + +func getPoolIdx(size int) int { + size-- + size >>= minPoolExpOf2 + i := 0 + for size > 0 { + size >>= 1 + i++ + } + + return i +} + +// Put returns buf to the pool. +func Put(buf *[]byte) { + i := putPoolIdx(cap(*buf)) + if i < 0 { + return + } + + pools[i].Put(buf) +} + +func putPoolIdx(size int) int { + minPoolSize := 1 << minPoolExpOf2 + for i := range pools { + if size == minPoolSize< 0") + } + if argIdx >= len(args) { return "", fmt.Errorf("insufficient arguments") } @@ -61,7 +66,7 @@ func (q *Query) Sanitize(args ...interface{}) (string, error) { // Prevent SQL injection via Line Comment Creation // https://github.com/jackc/pgx/security/advisories/GHSA-m7wr-2xf7-cm9p - str = "(" + str + ")" + str = " " + str + " " default: return "", fmt.Errorf("invalid Part type: %T", part) } @@ -317,7 +322,7 @@ func multilineCommentState(l *sqlLexer) stateFn { // SanitizeSQL replaces placeholder values with args. It quotes and escapes args // as necessary. This function is only safe when standard_conforming_strings is // on. -func SanitizeSQL(sql string, args ...interface{}) (string, error) { +func SanitizeSQL(sql string, args ...any) (string, error) { query, err := NewQuery(sql) if err != nil { return "", err diff --git a/vendor/github.com/jackc/pgx/v5/internal/stmtcache/lru_cache.go b/vendor/github.com/jackc/pgx/v5/internal/stmtcache/lru_cache.go new file mode 100644 index 00000000..dec83f47 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/internal/stmtcache/lru_cache.go @@ -0,0 +1,112 @@ +package stmtcache + +import ( + "container/list" + + "github.com/jackc/pgx/v5/pgconn" +) + +// LRUCache implements Cache with a Least Recently Used (LRU) cache. +type LRUCache struct { + cap int + m map[string]*list.Element + l *list.List + invalidStmts []*pgconn.StatementDescription +} + +// NewLRUCache creates a new LRUCache. cap is the maximum size of the cache. +func NewLRUCache(cap int) *LRUCache { + return &LRUCache{ + cap: cap, + m: make(map[string]*list.Element), + l: list.New(), + } +} + +// Get returns the statement description for sql. Returns nil if not found. +func (c *LRUCache) Get(key string) *pgconn.StatementDescription { + if el, ok := c.m[key]; ok { + c.l.MoveToFront(el) + return el.Value.(*pgconn.StatementDescription) + } + + return nil + +} + +// Put stores sd in the cache. Put panics if sd.SQL is "". Put does nothing if sd.SQL already exists in the cache or +// sd.SQL has been invalidated and HandleInvalidated has not been called yet. +func (c *LRUCache) Put(sd *pgconn.StatementDescription) { + if sd.SQL == "" { + panic("cannot store statement description with empty SQL") + } + + if _, present := c.m[sd.SQL]; present { + return + } + + // The statement may have been invalidated but not yet handled. Do not readd it to the cache. + for _, invalidSD := range c.invalidStmts { + if invalidSD.SQL == sd.SQL { + return + } + } + + if c.l.Len() == c.cap { + c.invalidateOldest() + } + + el := c.l.PushFront(sd) + c.m[sd.SQL] = el +} + +// Invalidate invalidates statement description identified by sql. Does nothing if not found. +func (c *LRUCache) Invalidate(sql string) { + if el, ok := c.m[sql]; ok { + delete(c.m, sql) + c.invalidStmts = append(c.invalidStmts, el.Value.(*pgconn.StatementDescription)) + c.l.Remove(el) + } +} + +// InvalidateAll invalidates all statement descriptions. +func (c *LRUCache) InvalidateAll() { + el := c.l.Front() + for el != nil { + c.invalidStmts = append(c.invalidStmts, el.Value.(*pgconn.StatementDescription)) + el = el.Next() + } + + c.m = make(map[string]*list.Element) + c.l = list.New() +} + +// GetInvalidated returns a slice of all statement descriptions invalidated since the last call to RemoveInvalidated. +func (c *LRUCache) GetInvalidated() []*pgconn.StatementDescription { + return c.invalidStmts +} + +// RemoveInvalidated removes all invalidated statement descriptions. No other calls to Cache must be made between a +// call to GetInvalidated and RemoveInvalidated or RemoveInvalidated may remove statement descriptions that were +// never seen by the call to GetInvalidated. +func (c *LRUCache) RemoveInvalidated() { + c.invalidStmts = nil +} + +// Len returns the number of cached prepared statement descriptions. +func (c *LRUCache) Len() int { + return c.l.Len() +} + +// Cap returns the maximum number of cached prepared statement descriptions. +func (c *LRUCache) Cap() int { + return c.cap +} + +func (c *LRUCache) invalidateOldest() { + oldest := c.l.Back() + sd := oldest.Value.(*pgconn.StatementDescription) + c.invalidStmts = append(c.invalidStmts, sd) + delete(c.m, sd.SQL) + c.l.Remove(oldest) +} diff --git a/vendor/github.com/jackc/pgx/v5/internal/stmtcache/stmtcache.go b/vendor/github.com/jackc/pgx/v5/internal/stmtcache/stmtcache.go new file mode 100644 index 00000000..d57bdd29 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/internal/stmtcache/stmtcache.go @@ -0,0 +1,45 @@ +// Package stmtcache is a cache for statement descriptions. +package stmtcache + +import ( + "crypto/sha256" + "encoding/hex" + + "github.com/jackc/pgx/v5/pgconn" +) + +// StatementName returns a statement name that will be stable for sql across multiple connections and program +// executions. +func StatementName(sql string) string { + digest := sha256.Sum256([]byte(sql)) + return "stmtcache_" + hex.EncodeToString(digest[0:24]) +} + +// Cache caches statement descriptions. +type Cache interface { + // Get returns the statement description for sql. Returns nil if not found. + Get(sql string) *pgconn.StatementDescription + + // Put stores sd in the cache. Put panics if sd.SQL is "". Put does nothing if sd.SQL already exists in the cache. + Put(sd *pgconn.StatementDescription) + + // Invalidate invalidates statement description identified by sql. Does nothing if not found. + Invalidate(sql string) + + // InvalidateAll invalidates all statement descriptions. + InvalidateAll() + + // GetInvalidated returns a slice of all statement descriptions invalidated since the last call to RemoveInvalidated. + GetInvalidated() []*pgconn.StatementDescription + + // RemoveInvalidated removes all invalidated statement descriptions. No other calls to Cache must be made between a + // call to GetInvalidated and RemoveInvalidated or RemoveInvalidated may remove statement descriptions that were + // never seen by the call to GetInvalidated. + RemoveInvalidated() + + // Len returns the number of cached prepared statement descriptions. + Len() int + + // Cap returns the maximum number of cached prepared statement descriptions. + Cap() int +} diff --git a/vendor/github.com/jackc/pgx/v5/internal/stmtcache/unlimited_cache.go b/vendor/github.com/jackc/pgx/v5/internal/stmtcache/unlimited_cache.go new file mode 100644 index 00000000..69641329 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/internal/stmtcache/unlimited_cache.go @@ -0,0 +1,77 @@ +package stmtcache + +import ( + "math" + + "github.com/jackc/pgx/v5/pgconn" +) + +// UnlimitedCache implements Cache with no capacity limit. +type UnlimitedCache struct { + m map[string]*pgconn.StatementDescription + invalidStmts []*pgconn.StatementDescription +} + +// NewUnlimitedCache creates a new UnlimitedCache. +func NewUnlimitedCache() *UnlimitedCache { + return &UnlimitedCache{ + m: make(map[string]*pgconn.StatementDescription), + } +} + +// Get returns the statement description for sql. Returns nil if not found. +func (c *UnlimitedCache) Get(sql string) *pgconn.StatementDescription { + return c.m[sql] +} + +// Put stores sd in the cache. Put panics if sd.SQL is "". Put does nothing if sd.SQL already exists in the cache. +func (c *UnlimitedCache) Put(sd *pgconn.StatementDescription) { + if sd.SQL == "" { + panic("cannot store statement description with empty SQL") + } + + if _, present := c.m[sd.SQL]; present { + return + } + + c.m[sd.SQL] = sd +} + +// Invalidate invalidates statement description identified by sql. Does nothing if not found. +func (c *UnlimitedCache) Invalidate(sql string) { + if sd, ok := c.m[sql]; ok { + delete(c.m, sql) + c.invalidStmts = append(c.invalidStmts, sd) + } +} + +// InvalidateAll invalidates all statement descriptions. +func (c *UnlimitedCache) InvalidateAll() { + for _, sd := range c.m { + c.invalidStmts = append(c.invalidStmts, sd) + } + + c.m = make(map[string]*pgconn.StatementDescription) +} + +// GetInvalidated returns a slice of all statement descriptions invalidated since the last call to RemoveInvalidated. +func (c *UnlimitedCache) GetInvalidated() []*pgconn.StatementDescription { + return c.invalidStmts +} + +// RemoveInvalidated removes all invalidated statement descriptions. No other calls to Cache must be made between a +// call to GetInvalidated and RemoveInvalidated or RemoveInvalidated may remove statement descriptions that were +// never seen by the call to GetInvalidated. +func (c *UnlimitedCache) RemoveInvalidated() { + c.invalidStmts = nil +} + +// Len returns the number of cached prepared statement descriptions. +func (c *UnlimitedCache) Len() int { + return len(c.m) +} + +// Cap returns the maximum number of cached prepared statement descriptions. +func (c *UnlimitedCache) Cap() int { + return math.MaxInt +} diff --git a/vendor/github.com/jackc/pgx/v4/large_objects.go b/vendor/github.com/jackc/pgx/v5/large_objects.go similarity index 63% rename from vendor/github.com/jackc/pgx/v4/large_objects.go rename to vendor/github.com/jackc/pgx/v5/large_objects.go index c238ab9c..9d21afdc 100644 --- a/vendor/github.com/jackc/pgx/v4/large_objects.go +++ b/vendor/github.com/jackc/pgx/v5/large_objects.go @@ -4,8 +4,15 @@ import ( "context" "errors" "io" + + "github.com/jackc/pgx/v5/pgtype" ) +// The PostgreSQL wire protocol has a limit of 1 GB - 1 per message. See definition of +// PQ_LARGE_MESSAGE_LIMIT in the PostgreSQL source code. To allow for the other data +// in the message,maxLargeObjectMessageLength should be no larger than 1 GB - 1 KB. +var maxLargeObjectMessageLength = 1024*1024*1024 - 1024 + // LargeObjects is a structure used to access the large objects API. It is only valid within the transaction where it // was created. // @@ -68,32 +75,65 @@ type LargeObject struct { // Write writes p to the large object and returns the number of bytes written and an error if not all of p was written. func (o *LargeObject) Write(p []byte) (int, error) { - var n int - err := o.tx.QueryRow(o.ctx, "select lowrite($1, $2)", o.fd, p).Scan(&n) - if err != nil { - return n, err + nTotal := 0 + for { + expected := len(p) - nTotal + if expected == 0 { + break + } else if expected > maxLargeObjectMessageLength { + expected = maxLargeObjectMessageLength + } + + var n int + err := o.tx.QueryRow(o.ctx, "select lowrite($1, $2)", o.fd, p[nTotal:nTotal+expected]).Scan(&n) + if err != nil { + return nTotal, err + } + + if n < 0 { + return nTotal, errors.New("failed to write to large object") + } + + nTotal += n + + if n < expected { + return nTotal, errors.New("short write to large object") + } else if n > expected { + return nTotal, errors.New("invalid write to large object") + } } - if n < 0 { - return 0, errors.New("failed to write to large object") - } - - return n, nil + return nTotal, nil } // Read reads up to len(p) bytes into p returning the number of bytes read. func (o *LargeObject) Read(p []byte) (int, error) { - var res []byte - err := o.tx.QueryRow(o.ctx, "select loread($1, $2)", o.fd, len(p)).Scan(&res) - copy(p, res) - if err != nil { - return len(res), err + nTotal := 0 + for { + expected := len(p) - nTotal + if expected == 0 { + break + } else if expected > maxLargeObjectMessageLength { + expected = maxLargeObjectMessageLength + } + + res := pgtype.PreallocBytes(p[nTotal:]) + err := o.tx.QueryRow(o.ctx, "select loread($1, $2)", o.fd, expected).Scan(&res) + // We compute expected so that it always fits into p, so it should never happen + // that PreallocBytes's ScanBytes had to allocate a new slice. + nTotal += len(res) + if err != nil { + return nTotal, err + } + + if len(res) < expected { + return nTotal, io.EOF + } else if len(res) > expected { + return nTotal, errors.New("invalid read of large object") + } } - if len(res) < len(p) { - err = io.EOF - } - return len(res), err + return nTotal, nil } // Seek moves the current location pointer to the new location specified by offset. diff --git a/vendor/github.com/jackc/pgx/v5/named_args.go b/vendor/github.com/jackc/pgx/v5/named_args.go new file mode 100644 index 00000000..c88991ee --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/named_args.go @@ -0,0 +1,295 @@ +package pgx + +import ( + "context" + "fmt" + "strconv" + "strings" + "unicode/utf8" +) + +// NamedArgs can be used as the first argument to a query method. It will replace every '@' named placeholder with a '$' +// ordinal placeholder and construct the appropriate arguments. +// +// For example, the following two queries are equivalent: +// +// conn.Query(ctx, "select * from widgets where foo = @foo and bar = @bar", pgx.NamedArgs{"foo": 1, "bar": 2}) +// conn.Query(ctx, "select * from widgets where foo = $1 and bar = $2", 1, 2) +// +// Named placeholders are case sensitive and must start with a letter or underscore. Subsequent characters can be +// letters, numbers, or underscores. +type NamedArgs map[string]any + +// RewriteQuery implements the QueryRewriter interface. +func (na NamedArgs) RewriteQuery(ctx context.Context, conn *Conn, sql string, args []any) (newSQL string, newArgs []any, err error) { + return rewriteQuery(na, sql, false) +} + +// StrictNamedArgs can be used in the same way as NamedArgs, but provided arguments are also checked to include all +// named arguments that the sql query uses, and no extra arguments. +type StrictNamedArgs map[string]any + +// RewriteQuery implements the QueryRewriter interface. +func (sna StrictNamedArgs) RewriteQuery(ctx context.Context, conn *Conn, sql string, args []any) (newSQL string, newArgs []any, err error) { + return rewriteQuery(sna, sql, true) +} + +type namedArg string + +type sqlLexer struct { + src string + start int + pos int + nested int // multiline comment nesting level. + stateFn stateFn + parts []any + + nameToOrdinal map[namedArg]int +} + +type stateFn func(*sqlLexer) stateFn + +func rewriteQuery(na map[string]any, sql string, isStrict bool) (newSQL string, newArgs []any, err error) { + l := &sqlLexer{ + src: sql, + stateFn: rawState, + nameToOrdinal: make(map[namedArg]int, len(na)), + } + + for l.stateFn != nil { + l.stateFn = l.stateFn(l) + } + + sb := strings.Builder{} + for _, p := range l.parts { + switch p := p.(type) { + case string: + sb.WriteString(p) + case namedArg: + sb.WriteRune('$') + sb.WriteString(strconv.Itoa(l.nameToOrdinal[p])) + } + } + + newArgs = make([]any, len(l.nameToOrdinal)) + for name, ordinal := range l.nameToOrdinal { + var found bool + newArgs[ordinal-1], found = na[string(name)] + if isStrict && !found { + return "", nil, fmt.Errorf("argument %s found in sql query but not present in StrictNamedArgs", name) + } + } + + if isStrict { + for name := range na { + if _, found := l.nameToOrdinal[namedArg(name)]; !found { + return "", nil, fmt.Errorf("argument %s of StrictNamedArgs not found in sql query", name) + } + } + } + + return sb.String(), newArgs, nil +} + +func rawState(l *sqlLexer) stateFn { + for { + r, width := utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + + switch r { + case 'e', 'E': + nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:]) + if nextRune == '\'' { + l.pos += width + return escapeStringState + } + case '\'': + return singleQuoteState + case '"': + return doubleQuoteState + case '@': + nextRune, _ := utf8.DecodeRuneInString(l.src[l.pos:]) + if isLetter(nextRune) || nextRune == '_' { + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos-width]) + } + l.start = l.pos + return namedArgState + } + case '-': + nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:]) + if nextRune == '-' { + l.pos += width + return oneLineCommentState + } + case '/': + nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:]) + if nextRune == '*' { + l.pos += width + return multilineCommentState + } + case utf8.RuneError: + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil + } + } +} + +func isLetter(r rune) bool { + return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') +} + +func namedArgState(l *sqlLexer) stateFn { + for { + r, width := utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + + if r == utf8.RuneError { + if l.pos-l.start > 0 { + na := namedArg(l.src[l.start:l.pos]) + if _, found := l.nameToOrdinal[na]; !found { + l.nameToOrdinal[na] = len(l.nameToOrdinal) + 1 + } + l.parts = append(l.parts, na) + l.start = l.pos + } + return nil + } else if !(isLetter(r) || (r >= '0' && r <= '9') || r == '_') { + l.pos -= width + na := namedArg(l.src[l.start:l.pos]) + if _, found := l.nameToOrdinal[na]; !found { + l.nameToOrdinal[na] = len(l.nameToOrdinal) + 1 + } + l.parts = append(l.parts, namedArg(na)) + l.start = l.pos + return rawState + } + } +} + +func singleQuoteState(l *sqlLexer) stateFn { + for { + r, width := utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + + switch r { + case '\'': + nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:]) + if nextRune != '\'' { + return rawState + } + l.pos += width + case utf8.RuneError: + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil + } + } +} + +func doubleQuoteState(l *sqlLexer) stateFn { + for { + r, width := utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + + switch r { + case '"': + nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:]) + if nextRune != '"' { + return rawState + } + l.pos += width + case utf8.RuneError: + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil + } + } +} + +func escapeStringState(l *sqlLexer) stateFn { + for { + r, width := utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + + switch r { + case '\\': + _, width = utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + case '\'': + nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:]) + if nextRune != '\'' { + return rawState + } + l.pos += width + case utf8.RuneError: + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil + } + } +} + +func oneLineCommentState(l *sqlLexer) stateFn { + for { + r, width := utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + + switch r { + case '\\': + _, width = utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + case '\n', '\r': + return rawState + case utf8.RuneError: + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil + } + } +} + +func multilineCommentState(l *sqlLexer) stateFn { + for { + r, width := utf8.DecodeRuneInString(l.src[l.pos:]) + l.pos += width + + switch r { + case '/': + nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:]) + if nextRune == '*' { + l.pos += width + l.nested++ + } + case '*': + nextRune, width := utf8.DecodeRuneInString(l.src[l.pos:]) + if nextRune != '/' { + continue + } + + l.pos += width + if l.nested == 0 { + return rawState + } + l.nested-- + + case utf8.RuneError: + if l.pos-l.start > 0 { + l.parts = append(l.parts, l.src[l.start:l.pos]) + l.start = l.pos + } + return nil + } + } +} diff --git a/vendor/github.com/jackc/pgx/v5/pgconn/README.md b/vendor/github.com/jackc/pgx/v5/pgconn/README.md new file mode 100644 index 00000000..1fe15c26 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgconn/README.md @@ -0,0 +1,29 @@ +# pgconn + +Package pgconn is a low-level PostgreSQL database driver. It operates at nearly the same level as the C library libpq. +It is primarily intended to serve as the foundation for higher level libraries such as https://github.com/jackc/pgx. +Applications should handle normal queries with a higher level library and only use pgconn directly when required for +low-level access to PostgreSQL functionality. + +## Example Usage + +```go +pgConn, err := pgconn.Connect(context.Background(), os.Getenv("DATABASE_URL")) +if err != nil { + log.Fatalln("pgconn failed to connect:", err) +} +defer pgConn.Close(context.Background()) + +result := pgConn.ExecParams(context.Background(), "SELECT email FROM users WHERE id=$1", [][]byte{[]byte("123")}, nil, nil, nil) +for result.NextRow() { + fmt.Println("User 123 has email:", string(result.Values()[0])) +} +_, err = result.Close() +if err != nil { + log.Fatalln("failed reading result:", err) +} +``` + +## Testing + +See CONTRIBUTING.md for setup instructions. diff --git a/vendor/github.com/jackc/pgconn/auth_scram.go b/vendor/github.com/jackc/pgx/v5/pgconn/auth_scram.go similarity index 95% rename from vendor/github.com/jackc/pgconn/auth_scram.go rename to vendor/github.com/jackc/pgx/v5/pgconn/auth_scram.go index 1545b7c5..06498361 100644 --- a/vendor/github.com/jackc/pgconn/auth_scram.go +++ b/vendor/github.com/jackc/pgx/v5/pgconn/auth_scram.go @@ -22,7 +22,7 @@ import ( "fmt" "strconv" - "github.com/jackc/pgproto3/v2" + "github.com/jackc/pgx/v5/pgproto3" "golang.org/x/crypto/pbkdf2" "golang.org/x/text/secure/precis" ) @@ -41,16 +41,13 @@ func (c *PgConn) scramAuth(serverAuthMechanisms []string) error { AuthMechanism: "SCRAM-SHA-256", Data: sc.clientFirstMessage(), } - buf, err := saslInitialResponse.Encode(nil) - if err != nil { - return err - } - _, err = c.conn.Write(buf) + c.frontend.Send(saslInitialResponse) + err = c.flushWithPotentialWriteReadDeadlock() if err != nil { return err } - // Receive server-first-message payload in a AuthenticationSASLContinue. + // Receive server-first-message payload in an AuthenticationSASLContinue. saslContinue, err := c.rxSASLContinue() if err != nil { return err @@ -64,16 +61,13 @@ func (c *PgConn) scramAuth(serverAuthMechanisms []string) error { saslResponse := &pgproto3.SASLResponse{ Data: []byte(sc.clientFinalMessage()), } - buf, err = saslResponse.Encode(nil) - if err != nil { - return err - } - _, err = c.conn.Write(buf) + c.frontend.Send(saslResponse) + err = c.flushWithPotentialWriteReadDeadlock() if err != nil { return err } - // Receive server-final-message payload in a AuthenticationSASLFinal. + // Receive server-final-message payload in an AuthenticationSASLFinal. saslFinal, err := c.rxSASLFinal() if err != nil { return err diff --git a/vendor/github.com/jackc/pgconn/config.go b/vendor/github.com/jackc/pgx/v5/pgconn/config.go similarity index 82% rename from vendor/github.com/jackc/pgconn/config.go rename to vendor/github.com/jackc/pgx/v5/pgconn/config.go index 36b74c4a..6a198e67 100644 --- a/vendor/github.com/jackc/pgconn/config.go +++ b/vendor/github.com/jackc/pgx/v5/pgconn/config.go @@ -8,7 +8,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "math" "net" "net/url" @@ -18,17 +17,17 @@ import ( "strings" "time" - "github.com/jackc/chunkreader/v2" "github.com/jackc/pgpassfile" - "github.com/jackc/pgproto3/v2" "github.com/jackc/pgservicefile" + "github.com/jackc/pgx/v5/pgconn/ctxwatch" + "github.com/jackc/pgx/v5/pgproto3" ) type AfterConnectFunc func(ctx context.Context, pgconn *PgConn) error type ValidateConnectFunc func(ctx context.Context, pgconn *PgConn) error type GetSSLPasswordFunc func(ctx context.Context) string -// Config is the settings used to establish a connection to a PostgreSQL server. It must be created by ParseConfig. A +// Config is the settings used to establish a connection to a PostgreSQL server. It must be created by [ParseConfig]. A // manually initialized Config will cause ConnectConfig to panic. type Config struct { Host string // host (e.g. localhost) or absolute path to unix domain socket directory (e.g. /private/tmp) @@ -41,7 +40,12 @@ type Config struct { DialFunc DialFunc // e.g. net.Dialer.DialContext LookupFunc LookupFunc // e.g. net.Resolver.LookupHost BuildFrontend BuildFrontendFunc - RuntimeParams map[string]string // Run-time parameters to set on connection as session default values (e.g. search_path or application_name) + + // BuildContextWatcherHandler is called to create a ContextWatcherHandler for a connection. The handler is called + // when a context passed to a PgConn method is canceled. + BuildContextWatcherHandler func(*PgConn) ctxwatch.Handler + + RuntimeParams map[string]string // Run-time parameters to set on connection as session default values (e.g. search_path or application_name) KerberosSrvName string KerberosSpn string @@ -62,12 +66,17 @@ type Config struct { // OnNotification is a callback function called when a notification from the LISTEN/NOTIFY system is received. OnNotification NotificationHandler + // OnPgError is a callback function called when a Postgres error is received by the server. The default handler will close + // the connection on any FATAL errors. If you override this handler you should call the previously set handler or ensure + // that you close on FATAL errors by returning false. + OnPgError PgErrorHandler + createdByParseConfig bool // Used to enforce created by ParseConfig rule. } -// ParseConfigOptions contains options that control how a config is built such as getsslpassword. +// ParseConfigOptions contains options that control how a config is built such as GetSSLPassword. type ParseConfigOptions struct { - // GetSSLPassword gets the password to decrypt a SSL client certificate. This is analogous to the the libpq function + // GetSSLPassword gets the password to decrypt a SSL client certificate. This is analogous to the libpq function // PQsetSSLKeyPassHook_OpenSSL. GetSSLPassword GetSSLPasswordFunc } @@ -109,6 +118,14 @@ type FallbackConfig struct { TLSConfig *tls.Config // nil disables TLS } +// connectOneConfig is the configuration for a single attempt to connect to a single host. +type connectOneConfig struct { + network string + address string + originalHostname string // original hostname before resolving + tlsConfig *tls.Config // nil disables TLS +} + // isAbsolutePath checks if the provided value is an absolute path either // beginning with a forward slash (as on Linux-based systems) or with a capital // letter A-Z followed by a colon and a backslash, e.g., "C:\", (as on Windows). @@ -143,15 +160,15 @@ func NetworkAddress(host string, port uint16) (network, address string) { // ParseConfig builds a *Config from connString with similar behavior to the PostgreSQL standard C library libpq. It // uses the same defaults as libpq (e.g. port=5432) and understands most PG* environment variables. ParseConfig closely -// matches the parsing behavior of libpq. connString may either be in URL format or keyword = value format (DSN style). -// See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING for details. connString also may be -// empty to only read from the environment. If a password is not supplied it will attempt to read the .pgpass file. +// matches the parsing behavior of libpq. connString may either be in URL format or keyword = value format. See +// https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING for details. connString also may be empty +// to only read from the environment. If a password is not supplied it will attempt to read the .pgpass file. // -// # Example DSN -// user=jack password=secret host=pg.example.com port=5432 dbname=mydb sslmode=verify-ca +// # Example Keyword/Value +// user=jack password=secret host=pg.example.com port=5432 dbname=mydb sslmode=verify-ca // -// # Example URL -// postgres://jack:secret@pg.example.com:5432/mydb?sslmode=verify-ca +// # Example URL +// postgres://jack:secret@pg.example.com:5432/mydb?sslmode=verify-ca // // The returned *Config may be modified. However, it is strongly recommended that any configuration that can be done // through the connection string be done there. In particular the fields Host, Port, TLSConfig, and Fallbacks can be @@ -162,28 +179,28 @@ func NetworkAddress(host string, port uint16) (network, address string) { // values that will be tried in order. This can be used as part of a high availability system. See // https://www.postgresql.org/docs/11/libpq-connect.html#LIBPQ-MULTIPLE-HOSTS for more information. // -// # Example URL -// postgres://jack:secret@foo.example.com:5432,bar.example.com:5432/mydb +// # Example URL +// postgres://jack:secret@foo.example.com:5432,bar.example.com:5432/mydb // // ParseConfig currently recognizes the following environment variable and their parameter key word equivalents passed -// via database URL or DSN: +// via database URL or keyword/value: // -// PGHOST -// PGPORT -// PGDATABASE -// PGUSER -// PGPASSWORD -// PGPASSFILE -// PGSERVICE -// PGSERVICEFILE -// PGSSLMODE -// PGSSLCERT -// PGSSLKEY -// PGSSLROOTCERT -// PGSSLPASSWORD -// PGAPPNAME -// PGCONNECT_TIMEOUT -// PGTARGETSESSIONATTRS +// PGHOST +// PGPORT +// PGDATABASE +// PGUSER +// PGPASSWORD +// PGPASSFILE +// PGSERVICE +// PGSERVICEFILE +// PGSSLMODE +// PGSSLCERT +// PGSSLKEY +// PGSSLROOTCERT +// PGSSLPASSWORD +// PGAPPNAME +// PGCONNECT_TIMEOUT +// PGTARGETSESSIONATTRS // // See http://www.postgresql.org/docs/11/static/libpq-envars.html for details on the meaning of environment variables. // @@ -212,11 +229,9 @@ func NetworkAddress(host string, port uint16) (network, address string) { // // In addition, ParseConfig accepts the following options: // -// min_read_buffer_size -// The minimum size of the internal read buffer. Default 8192. -// servicefile -// libpq only reads servicefile from the PGSERVICEFILE environment variable. ParseConfig accepts servicefile as a -// part of the connection string. +// - servicefile. +// libpq only reads servicefile from the PGSERVICEFILE environment variable. ParseConfig accepts servicefile as a +// part of the connection string. func ParseConfig(connString string) (*Config, error) { var parseConfigOptions ParseConfigOptions return ParseConfigWithOptions(connString, parseConfigOptions) @@ -232,16 +247,16 @@ func ParseConfigWithOptions(connString string, options ParseConfigOptions) (*Con connStringSettings := make(map[string]string) if connString != "" { var err error - // connString may be a database URL or a DSN + // connString may be a database URL or in PostgreSQL keyword/value format if strings.HasPrefix(connString, "postgres://") || strings.HasPrefix(connString, "postgresql://") { connStringSettings, err = parseURLSettings(connString) if err != nil { - return nil, &parseConfigError{connString: connString, msg: "failed to parse as URL", err: err} + return nil, &ParseConfigError{ConnString: connString, msg: "failed to parse as URL", err: err} } } else { - connStringSettings, err = parseDSNSettings(connString) + connStringSettings, err = parseKeywordValueSettings(connString) if err != nil { - return nil, &parseConfigError{connString: connString, msg: "failed to parse as DSN", err: err} + return nil, &ParseConfigError{ConnString: connString, msg: "failed to parse as keyword/value", err: err} } } } @@ -250,30 +265,37 @@ func ParseConfigWithOptions(connString string, options ParseConfigOptions) (*Con if service, present := settings["service"]; present { serviceSettings, err := parseServiceSettings(settings["servicefile"], service) if err != nil { - return nil, &parseConfigError{connString: connString, msg: "failed to read service", err: err} + return nil, &ParseConfigError{ConnString: connString, msg: "failed to read service", err: err} } settings = mergeSettings(defaultSettings, envSettings, serviceSettings, connStringSettings) } - minReadBufferSize, err := strconv.ParseInt(settings["min_read_buffer_size"], 10, 32) - if err != nil { - return nil, &parseConfigError{connString: connString, msg: "cannot parse min_read_buffer_size", err: err} - } - config := &Config{ createdByParseConfig: true, Database: settings["database"], User: settings["user"], Password: settings["password"], RuntimeParams: make(map[string]string), - BuildFrontend: makeDefaultBuildFrontendFunc(int(minReadBufferSize)), + BuildFrontend: func(r io.Reader, w io.Writer) *pgproto3.Frontend { + return pgproto3.NewFrontend(r, w) + }, + BuildContextWatcherHandler: func(pgConn *PgConn) ctxwatch.Handler { + return &DeadlineContextWatcherHandler{Conn: pgConn.conn} + }, + OnPgError: func(_ *PgConn, pgErr *PgError) bool { + // we want to automatically close any fatal errors + if strings.EqualFold(pgErr.Severity, "FATAL") { + return false + } + return true + }, } if connectTimeoutSetting, present := settings["connect_timeout"]; present { connectTimeout, err := parseConnectTimeoutSetting(connectTimeoutSetting) if err != nil { - return nil, &parseConfigError{connString: connString, msg: "invalid connect_timeout", err: err} + return nil, &ParseConfigError{ConnString: connString, msg: "invalid connect_timeout", err: err} } config.ConnectTimeout = connectTimeout config.DialFunc = makeConnectTimeoutDialFunc(connectTimeout) @@ -301,7 +323,6 @@ func ParseConfigWithOptions(connString string, options ParseConfigOptions) (*Con "krbspn": {}, "krbsrvname": {}, "target_session_attrs": {}, - "min_read_buffer_size": {}, "service": {}, "servicefile": {}, } @@ -336,7 +357,7 @@ func ParseConfigWithOptions(connString string, options ParseConfigOptions) (*Con port, err := parsePort(portStr) if err != nil { - return nil, &parseConfigError{connString: connString, msg: "invalid port", err: err} + return nil, &ParseConfigError{ConnString: connString, msg: "invalid port", err: err} } var tlsConfigs []*tls.Config @@ -348,7 +369,7 @@ func ParseConfigWithOptions(connString string, options ParseConfigOptions) (*Con var err error tlsConfigs, err = configTLS(settings, host, options) if err != nil { - return nil, &parseConfigError{connString: connString, msg: "failed to configure TLS", err: err} + return nil, &ParseConfigError{ConnString: connString, msg: "failed to configure TLS", err: err} } } @@ -366,9 +387,9 @@ func ParseConfigWithOptions(connString string, options ParseConfigOptions) (*Con config.TLSConfig = fallbacks[0].TLSConfig config.Fallbacks = fallbacks[1:] - if config.Password == "" { - passfile, err := pgpassfile.ReadPassfile(settings["passfile"]) - if err == nil { + passfile, err := pgpassfile.ReadPassfile(settings["passfile"]) + if err == nil { + if config.Password == "" { host := config.Host if network, _ := NetworkAddress(config.Host, config.Port); network == "unix" { host = "localhost" @@ -392,7 +413,7 @@ func ParseConfigWithOptions(connString string, options ParseConfigOptions) (*Con case "any": // do nothing default: - return nil, &parseConfigError{connString: connString, msg: fmt.Sprintf("unknown target_session_attrs value: %v", tsa)} + return nil, &ParseConfigError{ConnString: connString, msg: fmt.Sprintf("unknown target_session_attrs value: %v", tsa)} } return config, nil @@ -446,14 +467,17 @@ func parseEnvSettings() map[string]string { func parseURLSettings(connString string) (map[string]string, error) { settings := make(map[string]string) - url, err := url.Parse(connString) + parsedURL, err := url.Parse(connString) if err != nil { + if urlErr := new(url.Error); errors.As(err, &urlErr) { + return nil, urlErr.Err + } return nil, err } - if url.User != nil { - settings["user"] = url.User.Username() - if password, present := url.User.Password(); present { + if parsedURL.User != nil { + settings["user"] = parsedURL.User.Username() + if password, present := parsedURL.User.Password(); present { settings["password"] = password } } @@ -461,7 +485,7 @@ func parseURLSettings(connString string) (map[string]string, error) { // Handle multiple host:port's in url.Host by splitting them into host,host,host and port,port,port. var hosts []string var ports []string - for _, host := range strings.Split(url.Host, ",") { + for _, host := range strings.Split(parsedURL.Host, ",") { if host == "" { continue } @@ -487,7 +511,7 @@ func parseURLSettings(connString string) (map[string]string, error) { settings["port"] = strings.Join(ports, ",") } - database := strings.TrimLeft(url.Path, "/") + database := strings.TrimLeft(parsedURL.Path, "/") if database != "" { settings["database"] = database } @@ -496,7 +520,7 @@ func parseURLSettings(connString string) (map[string]string, error) { "dbname": "database", } - for k, v := range url.Query() { + for k, v := range parsedURL.Query() { if k2, present := nameMap[k]; present { k = k2 } @@ -513,7 +537,7 @@ func isIPOnly(host string) bool { var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} -func parseDSNSettings(s string) (map[string]string, error) { +func parseKeywordValueSettings(s string) (map[string]string, error) { settings := make(map[string]string) nameMap := map[string]string{ @@ -524,7 +548,7 @@ func parseDSNSettings(s string) (map[string]string, error) { var key, val string eqIdx := strings.IndexRune(s, '=') if eqIdx < 0 { - return nil, errors.New("invalid dsn") + return nil, errors.New("invalid keyword/value") } key = strings.Trim(s[:eqIdx], " \t\n\r\v\f") @@ -576,7 +600,7 @@ func parseDSNSettings(s string) (map[string]string, error) { } if key == "" { - return nil, errors.New("invalid dsn") + return nil, errors.New("invalid keyword/value") } settings[key] = val @@ -633,6 +657,36 @@ func configTLS(settings map[string]string, thisHost string, parseConfigOptions P tlsConfig := &tls.Config{} + if sslrootcert != "" { + var caCertPool *x509.CertPool + + if sslrootcert == "system" { + var err error + + caCertPool, err = x509.SystemCertPool() + if err != nil { + return nil, fmt.Errorf("unable to load system certificate pool: %w", err) + } + + sslmode = "verify-full" + } else { + caCertPool = x509.NewCertPool() + + caPath := sslrootcert + caCert, err := os.ReadFile(caPath) + if err != nil { + return nil, fmt.Errorf("unable to read CA file: %w", err) + } + + if !caCertPool.AppendCertsFromPEM(caCert) { + return nil, errors.New("unable to add CA to cert pool") + } + } + + tlsConfig.RootCAs = caCertPool + tlsConfig.ClientCAs = caCertPool + } + switch sslmode { case "disable": return []*tls.Config{nil}, nil @@ -690,33 +744,19 @@ func configTLS(settings map[string]string, thisHost string, parseConfigOptions P return nil, errors.New("sslmode is invalid") } - if sslrootcert != "" { - caCertPool := x509.NewCertPool() - - caPath := sslrootcert - caCert, err := ioutil.ReadFile(caPath) - if err != nil { - return nil, fmt.Errorf("unable to read CA file: %w", err) - } - - if !caCertPool.AppendCertsFromPEM(caCert) { - return nil, errors.New("unable to add CA to cert pool") - } - - tlsConfig.RootCAs = caCertPool - tlsConfig.ClientCAs = caCertPool - } - if (sslcert != "" && sslkey == "") || (sslcert == "" && sslkey != "") { return nil, errors.New(`both "sslcert" and "sslkey" are required`) } if sslcert != "" && sslkey != "" { - buf, err := ioutil.ReadFile(sslkey) + buf, err := os.ReadFile(sslkey) if err != nil { return nil, fmt.Errorf("unable to read sslkey: %w", err) } block, _ := pem.Decode(buf) + if block == nil { + return nil, errors.New("failed to decode sslkey") + } var pemKey []byte var decryptedKey []byte var decryptedError error @@ -751,7 +791,7 @@ func configTLS(settings map[string]string, thisHost string, parseConfigOptions P } else { pemKey = pem.EncodeToMemory(block) } - certfile, err := ioutil.ReadFile(sslcert) + certfile, err := os.ReadFile(sslcert) if err != nil { return nil, fmt.Errorf("unable to read cert: %w", err) } @@ -793,25 +833,14 @@ func parsePort(s string) (uint16, error) { } func makeDefaultDialer() *net.Dialer { - return &net.Dialer{KeepAlive: 5 * time.Minute} + // rely on GOLANG KeepAlive settings + return &net.Dialer{} } func makeDefaultResolver() *net.Resolver { return net.DefaultResolver } -func makeDefaultBuildFrontendFunc(minBufferLen int) BuildFrontendFunc { - return func(r io.Reader, w io.Writer) Frontend { - cr, err := chunkreader.NewConfig(r, chunkreader.Config{MinBufLen: minBufferLen}) - if err != nil { - panic(fmt.Sprintf("BUG: chunkreader.NewConfig failed: %v", err)) - } - frontend := pgproto3.NewFrontend(cr, w) - - return frontend - } -} - func parseConnectTimeoutSetting(s string) (time.Duration, error) { timeout, err := strconv.ParseInt(s, 10, 64) if err != nil { @@ -829,7 +858,7 @@ func makeConnectTimeoutDialFunc(timeout time.Duration) DialFunc { return d.DialContext } -// ValidateConnectTargetSessionAttrsReadWrite is an ValidateConnectFunc that implements libpq compatible +// ValidateConnectTargetSessionAttrsReadWrite is a ValidateConnectFunc that implements libpq compatible // target_session_attrs=read-write. func ValidateConnectTargetSessionAttrsReadWrite(ctx context.Context, pgConn *PgConn) error { result := pgConn.ExecParams(ctx, "show transaction_read_only", nil, nil, nil, nil).Read() @@ -844,7 +873,7 @@ func ValidateConnectTargetSessionAttrsReadWrite(ctx context.Context, pgConn *PgC return nil } -// ValidateConnectTargetSessionAttrsReadOnly is an ValidateConnectFunc that implements libpq compatible +// ValidateConnectTargetSessionAttrsReadOnly is a ValidateConnectFunc that implements libpq compatible // target_session_attrs=read-only. func ValidateConnectTargetSessionAttrsReadOnly(ctx context.Context, pgConn *PgConn) error { result := pgConn.ExecParams(ctx, "show transaction_read_only", nil, nil, nil, nil).Read() @@ -859,7 +888,7 @@ func ValidateConnectTargetSessionAttrsReadOnly(ctx context.Context, pgConn *PgCo return nil } -// ValidateConnectTargetSessionAttrsStandby is an ValidateConnectFunc that implements libpq compatible +// ValidateConnectTargetSessionAttrsStandby is a ValidateConnectFunc that implements libpq compatible // target_session_attrs=standby. func ValidateConnectTargetSessionAttrsStandby(ctx context.Context, pgConn *PgConn) error { result := pgConn.ExecParams(ctx, "select pg_is_in_recovery()", nil, nil, nil, nil).Read() @@ -874,7 +903,7 @@ func ValidateConnectTargetSessionAttrsStandby(ctx context.Context, pgConn *PgCon return nil } -// ValidateConnectTargetSessionAttrsPrimary is an ValidateConnectFunc that implements libpq compatible +// ValidateConnectTargetSessionAttrsPrimary is a ValidateConnectFunc that implements libpq compatible // target_session_attrs=primary. func ValidateConnectTargetSessionAttrsPrimary(ctx context.Context, pgConn *PgConn) error { result := pgConn.ExecParams(ctx, "select pg_is_in_recovery()", nil, nil, nil, nil).Read() @@ -889,7 +918,7 @@ func ValidateConnectTargetSessionAttrsPrimary(ctx context.Context, pgConn *PgCon return nil } -// ValidateConnectTargetSessionAttrsPreferStandby is an ValidateConnectFunc that implements libpq compatible +// ValidateConnectTargetSessionAttrsPreferStandby is a ValidateConnectFunc that implements libpq compatible // target_session_attrs=prefer-standby. func ValidateConnectTargetSessionAttrsPreferStandby(ctx context.Context, pgConn *PgConn) error { result := pgConn.ExecParams(ctx, "select pg_is_in_recovery()", nil, nil, nil, nil).Read() diff --git a/vendor/github.com/jackc/pgconn/internal/ctxwatch/context_watcher.go b/vendor/github.com/jackc/pgx/v5/pgconn/ctxwatch/context_watcher.go similarity index 71% rename from vendor/github.com/jackc/pgconn/internal/ctxwatch/context_watcher.go rename to vendor/github.com/jackc/pgx/v5/pgconn/ctxwatch/context_watcher.go index b39cb3ee..db8884eb 100644 --- a/vendor/github.com/jackc/pgconn/internal/ctxwatch/context_watcher.go +++ b/vendor/github.com/jackc/pgx/v5/pgconn/ctxwatch/context_watcher.go @@ -8,9 +8,8 @@ import ( // ContextWatcher watches a context and performs an action when the context is canceled. It can watch one context at a // time. type ContextWatcher struct { - onCancel func() - onUnwatchAfterCancel func() - unwatchChan chan struct{} + handler Handler + unwatchChan chan struct{} lock sync.Mutex watchInProgress bool @@ -20,11 +19,10 @@ type ContextWatcher struct { // NewContextWatcher returns a ContextWatcher. onCancel will be called when a watched context is canceled. // OnUnwatchAfterCancel will be called when Unwatch is called and the watched context had already been canceled and // onCancel called. -func NewContextWatcher(onCancel func(), onUnwatchAfterCancel func()) *ContextWatcher { +func NewContextWatcher(handler Handler) *ContextWatcher { cw := &ContextWatcher{ - onCancel: onCancel, - onUnwatchAfterCancel: onUnwatchAfterCancel, - unwatchChan: make(chan struct{}), + handler: handler, + unwatchChan: make(chan struct{}), } return cw @@ -46,7 +44,7 @@ func (cw *ContextWatcher) Watch(ctx context.Context) { go func() { select { case <-ctx.Done(): - cw.onCancel() + cw.handler.HandleCancel(ctx) cw.onCancelWasCalled = true <-cw.unwatchChan case <-cw.unwatchChan: @@ -66,8 +64,17 @@ func (cw *ContextWatcher) Unwatch() { if cw.watchInProgress { cw.unwatchChan <- struct{}{} if cw.onCancelWasCalled { - cw.onUnwatchAfterCancel() + cw.handler.HandleUnwatchAfterCancel() } cw.watchInProgress = false } } + +type Handler interface { + // HandleCancel is called when the context that a ContextWatcher is currently watching is canceled. canceledCtx is the + // context that was canceled. + HandleCancel(canceledCtx context.Context) + + // HandleUnwatchAfterCancel is called when a ContextWatcher that called HandleCancel on this Handler is unwatched. + HandleUnwatchAfterCancel() +} diff --git a/vendor/github.com/jackc/pgconn/defaults.go b/vendor/github.com/jackc/pgx/v5/pgconn/defaults.go similarity index 97% rename from vendor/github.com/jackc/pgconn/defaults.go rename to vendor/github.com/jackc/pgx/v5/pgconn/defaults.go index c7209fdd..1dd514ff 100644 --- a/vendor/github.com/jackc/pgconn/defaults.go +++ b/vendor/github.com/jackc/pgx/v5/pgconn/defaults.go @@ -40,8 +40,6 @@ func defaultSettings() map[string]string { settings["target_session_attrs"] = "any" - settings["min_read_buffer_size"] = "8192" - return settings } diff --git a/vendor/github.com/jackc/pgconn/defaults_windows.go b/vendor/github.com/jackc/pgx/v5/pgconn/defaults_windows.go similarity index 97% rename from vendor/github.com/jackc/pgconn/defaults_windows.go rename to vendor/github.com/jackc/pgx/v5/pgconn/defaults_windows.go index 71eb77db..33b4a1ff 100644 --- a/vendor/github.com/jackc/pgconn/defaults_windows.go +++ b/vendor/github.com/jackc/pgx/v5/pgconn/defaults_windows.go @@ -46,8 +46,6 @@ func defaultSettings() map[string]string { settings["target_session_attrs"] = "any" - settings["min_read_buffer_size"] = "8192" - return settings } diff --git a/vendor/github.com/jackc/pgx/v5/pgconn/doc.go b/vendor/github.com/jackc/pgx/v5/pgconn/doc.go new file mode 100644 index 00000000..70137501 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgconn/doc.go @@ -0,0 +1,38 @@ +// Package pgconn is a low-level PostgreSQL database driver. +/* +pgconn provides lower level access to a PostgreSQL connection than a database/sql or pgx connection. It operates at +nearly the same level is the C library libpq. + +Establishing a Connection + +Use Connect to establish a connection. It accepts a connection string in URL or keyword/value format and will read the +environment for libpq style environment variables. + +Executing a Query + +ExecParams and ExecPrepared execute a single query. They return readers that iterate over each row. The Read method +reads all rows into memory. + +Executing Multiple Queries in a Single Round Trip + +Exec and ExecBatch can execute multiple queries in a single round trip. They return readers that iterate over each query +result. The ReadAll method reads all query results into memory. + +Pipeline Mode + +Pipeline mode allows sending queries without having read the results of previously sent queries. It allows control of +exactly how many and when network round trips occur. + +Context Support + +All potentially blocking operations take a context.Context. The default behavior when a context is canceled is for the +method to immediately return. In most circumstances, this will also close the underlying connection. This behavior can +be customized by using BuildContextWatcherHandler on the Config to create a ctxwatch.Handler with different behavior. +This can be especially useful when queries that are frequently canceled and the overhead of creating new connections is +a problem. DeadlineContextWatcherHandler and CancelRequestContextWatcherHandler can be used to introduce a delay before +interrupting the query in such a way as to close the connection. + +The CancelRequest method may be used to request the PostgreSQL server cancel an in-progress query without forcing the +client to abort. +*/ +package pgconn diff --git a/vendor/github.com/jackc/pgconn/errors.go b/vendor/github.com/jackc/pgx/v5/pgconn/errors.go similarity index 54% rename from vendor/github.com/jackc/pgconn/errors.go rename to vendor/github.com/jackc/pgx/v5/pgconn/errors.go index 66d35584..ec4a6d47 100644 --- a/vendor/github.com/jackc/pgconn/errors.go +++ b/vendor/github.com/jackc/pgx/v5/pgconn/errors.go @@ -12,14 +12,15 @@ import ( // SafeToRetry checks if the err is guaranteed to have occurred before sending any data to the server. func SafeToRetry(err error) bool { - if e, ok := err.(interface{ SafeToRetry() bool }); ok { - return e.SafeToRetry() + var retryableErr interface{ SafeToRetry() bool } + if errors.As(err, &retryableErr) { + return retryableErr.SafeToRetry() } return false } -// Timeout checks if err was was caused by a timeout. To be specific, it is true if err was caused within pgconn by a -// context.Canceled, context.DeadlineExceeded or an implementer of net.Error where Timeout() is true. +// Timeout checks if err was caused by a timeout. To be specific, it is true if err was caused within pgconn by a +// context.DeadlineExceeded or an implementer of net.Error where Timeout() is true. func Timeout(err error) bool { var timeoutErr *errTimeout return errors.As(err, &timeoutErr) @@ -29,23 +30,24 @@ func Timeout(err error) bool { // http://www.postgresql.org/docs/11/static/protocol-error-fields.html for // detailed field description. type PgError struct { - Severity string - Code string - Message string - Detail string - Hint string - Position int32 - InternalPosition int32 - InternalQuery string - Where string - SchemaName string - TableName string - ColumnName string - DataTypeName string - ConstraintName string - File string - Line int32 - Routine string + Severity string + SeverityUnlocalized string + Code string + Message string + Detail string + Hint string + Position int32 + InternalPosition int32 + InternalQuery string + Where string + SchemaName string + TableName string + ColumnName string + DataTypeName string + ConstraintName string + File string + Line int32 + Routine string } func (pe *PgError) Error() string { @@ -57,22 +59,37 @@ func (pe *PgError) SQLState() string { return pe.Code } -type connectError struct { - config *Config - msg string +// ConnectError is the error returned when a connection attempt fails. +type ConnectError struct { + Config *Config // The configuration that was used in the connection attempt. err error } -func (e *connectError) Error() string { - sb := &strings.Builder{} - fmt.Fprintf(sb, "failed to connect to `host=%s user=%s database=%s`: %s", e.config.Host, e.config.User, e.config.Database, e.msg) - if e.err != nil { - fmt.Fprintf(sb, " (%s)", e.err.Error()) +func (e *ConnectError) Error() string { + prefix := fmt.Sprintf("failed to connect to `user=%s database=%s`:", e.Config.User, e.Config.Database) + details := e.err.Error() + if strings.Contains(details, "\n") { + return prefix + "\n\t" + strings.ReplaceAll(details, "\n", "\n\t") + } else { + return prefix + " " + details } - return sb.String() } -func (e *connectError) Unwrap() error { +func (e *ConnectError) Unwrap() error { + return e.err +} + +type perDialConnectError struct { + address string + originalHostname string + err error +} + +func (e *perDialConnectError) Error() string { + return fmt.Sprintf("%s (%s): %s", e.address, e.originalHostname, e.err.Error()) +} + +func (e *perDialConnectError) Unwrap() error { return e.err } @@ -88,29 +105,39 @@ func (e *connLockError) Error() string { return e.status } -type parseConfigError struct { - connString string +// ParseConfigError is the error returned when a connection string cannot be parsed. +type ParseConfigError struct { + ConnString string // The connection string that could not be parsed. msg string err error } -func (e *parseConfigError) Error() string { - connString := redactPW(e.connString) +func (e *ParseConfigError) Error() string { + // Now that ParseConfigError is public and ConnString is available to the developer, perhaps it would be better only + // return a static string. That would ensure that the error message cannot leak a password. The ConnString field would + // allow access to the original string if desired and Unwrap would allow access to the underlying error. + connString := redactPW(e.ConnString) if e.err == nil { return fmt.Sprintf("cannot parse `%s`: %s", connString, e.msg) } return fmt.Sprintf("cannot parse `%s`: %s (%s)", connString, e.msg, e.err.Error()) } -func (e *parseConfigError) Unwrap() error { +func (e *ParseConfigError) Unwrap() error { return e.err } -// preferContextOverNetTimeoutError returns ctx.Err() if ctx.Err() is present and err is a net.Error with Timeout() == -// true. Otherwise returns err. -func preferContextOverNetTimeoutError(ctx context.Context, err error) error { - if err, ok := err.(net.Error); ok && err.Timeout() && ctx.Err() != nil { - return &errTimeout{err: ctx.Err()} +func normalizeTimeoutError(ctx context.Context, err error) error { + var netErr net.Error + if errors.As(err, &netErr) && netErr.Timeout() { + if ctx.Err() == context.Canceled { + // Since the timeout was caused by a context cancellation, the actual error is context.Canceled not the timeout error. + return context.Canceled + } else if ctx.Err() == context.DeadlineExceeded { + return &errTimeout{err: ctx.Err()} + } else { + return &errTimeout{err: netErr} + } } return err } @@ -178,33 +205,16 @@ func newContextAlreadyDoneError(ctx context.Context) (err error) { return &errTimeout{&contextAlreadyDoneError{err: ctx.Err()}} } -type writeError struct { - err error - safeToRetry bool -} - -func (e *writeError) Error() string { - return fmt.Sprintf("write failed: %s", e.err.Error()) -} - -func (e *writeError) SafeToRetry() bool { - return e.safeToRetry -} - -func (e *writeError) Unwrap() error { - return e.err -} - func redactPW(connString string) string { if strings.HasPrefix(connString, "postgres://") || strings.HasPrefix(connString, "postgresql://") { if u, err := url.Parse(connString); err == nil { return redactURL(u) } } - quotedDSN := regexp.MustCompile(`password='[^']*'`) - connString = quotedDSN.ReplaceAllLiteralString(connString, "password=xxxxx") - plainDSN := regexp.MustCompile(`password=[^ ]*`) - connString = plainDSN.ReplaceAllLiteralString(connString, "password=xxxxx") + quotedKV := regexp.MustCompile(`password='[^']*'`) + connString = quotedKV.ReplaceAllLiteralString(connString, "password=xxxxx") + plainKV := regexp.MustCompile(`password=[^ ]*`) + connString = plainKV.ReplaceAllLiteralString(connString, "password=xxxxx") brokenURL := regexp.MustCompile(`:[^:@]+?@`) connString = brokenURL.ReplaceAllLiteralString(connString, ":xxxxxx@") return connString diff --git a/vendor/github.com/jackc/pgx/v5/pgconn/internal/bgreader/bgreader.go b/vendor/github.com/jackc/pgx/v5/pgconn/internal/bgreader/bgreader.go new file mode 100644 index 00000000..e65c2c2b --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgconn/internal/bgreader/bgreader.go @@ -0,0 +1,139 @@ +// Package bgreader provides a io.Reader that can optionally buffer reads in the background. +package bgreader + +import ( + "io" + "sync" + + "github.com/jackc/pgx/v5/internal/iobufpool" +) + +const ( + StatusStopped = iota + StatusRunning + StatusStopping +) + +// BGReader is an io.Reader that can optionally buffer reads in the background. It is safe for concurrent use. +type BGReader struct { + r io.Reader + + cond *sync.Cond + status int32 + readResults []readResult +} + +type readResult struct { + buf *[]byte + err error +} + +// Start starts the backgrounder reader. If the background reader is already running this is a no-op. The background +// reader will stop automatically when the underlying reader returns an error. +func (r *BGReader) Start() { + r.cond.L.Lock() + defer r.cond.L.Unlock() + + switch r.status { + case StatusStopped: + r.status = StatusRunning + go r.bgRead() + case StatusRunning: + // no-op + case StatusStopping: + r.status = StatusRunning + } +} + +// Stop tells the background reader to stop after the in progress Read returns. It is safe to call Stop when the +// background reader is not running. +func (r *BGReader) Stop() { + r.cond.L.Lock() + defer r.cond.L.Unlock() + + switch r.status { + case StatusStopped: + // no-op + case StatusRunning: + r.status = StatusStopping + case StatusStopping: + // no-op + } +} + +// Status returns the current status of the background reader. +func (r *BGReader) Status() int32 { + r.cond.L.Lock() + defer r.cond.L.Unlock() + return r.status +} + +func (r *BGReader) bgRead() { + keepReading := true + for keepReading { + buf := iobufpool.Get(8192) + n, err := r.r.Read(*buf) + *buf = (*buf)[:n] + + r.cond.L.Lock() + r.readResults = append(r.readResults, readResult{buf: buf, err: err}) + if r.status == StatusStopping || err != nil { + r.status = StatusStopped + keepReading = false + } + r.cond.L.Unlock() + r.cond.Broadcast() + } +} + +// Read implements the io.Reader interface. +func (r *BGReader) Read(p []byte) (int, error) { + r.cond.L.Lock() + defer r.cond.L.Unlock() + + if len(r.readResults) > 0 { + return r.readFromReadResults(p) + } + + // There are no unread background read results and the background reader is stopped. + if r.status == StatusStopped { + return r.r.Read(p) + } + + // Wait for results from the background reader + for len(r.readResults) == 0 { + r.cond.Wait() + } + return r.readFromReadResults(p) +} + +// readBackgroundResults reads a result previously read by the background reader. r.cond.L must be held. +func (r *BGReader) readFromReadResults(p []byte) (int, error) { + buf := r.readResults[0].buf + var err error + + n := copy(p, *buf) + if n == len(*buf) { + err = r.readResults[0].err + iobufpool.Put(buf) + if len(r.readResults) == 1 { + r.readResults = nil + } else { + r.readResults = r.readResults[1:] + } + } else { + *buf = (*buf)[n:] + r.readResults[0].buf = buf + } + + return n, err +} + +func New(r io.Reader) *BGReader { + return &BGReader{ + r: r, + cond: &sync.Cond{ + L: &sync.Mutex{}, + }, + } +} diff --git a/vendor/github.com/jackc/pgconn/krb5.go b/vendor/github.com/jackc/pgx/v5/pgconn/krb5.go similarity index 94% rename from vendor/github.com/jackc/pgconn/krb5.go rename to vendor/github.com/jackc/pgx/v5/pgconn/krb5.go index 1639b728..3c1af347 100644 --- a/vendor/github.com/jackc/pgconn/krb5.go +++ b/vendor/github.com/jackc/pgx/v5/pgconn/krb5.go @@ -4,7 +4,7 @@ import ( "errors" "fmt" - "github.com/jackc/pgproto3/v2" + "github.com/jackc/pgx/v5/pgproto3" ) // NewGSSFunc creates a GSS authentication provider, for use with @@ -62,11 +62,8 @@ func (c *PgConn) gssAuth() error { gssResponse := &pgproto3.GSSResponse{ Data: nextData, } - buf, err := gssResponse.Encode(nil) - if err != nil { - return err - } - _, err = c.conn.Write(buf) + c.frontend.Send(gssResponse) + err = c.flushWithPotentialWriteReadDeadlock() if err != nil { return err } diff --git a/vendor/github.com/jackc/pgconn/pgconn.go b/vendor/github.com/jackc/pgx/v5/pgconn/pgconn.go similarity index 54% rename from vendor/github.com/jackc/pgconn/pgconn.go rename to vendor/github.com/jackc/pgx/v5/pgconn/pgconn.go index 894baa29..7efb522a 100644 --- a/vendor/github.com/jackc/pgconn/pgconn.go +++ b/vendor/github.com/jackc/pgx/v5/pgconn/pgconn.go @@ -16,9 +16,11 @@ import ( "sync" "time" - "github.com/jackc/pgconn/internal/ctxwatch" - "github.com/jackc/pgio" - "github.com/jackc/pgproto3/v2" + "github.com/jackc/pgx/v5/internal/iobufpool" + "github.com/jackc/pgx/v5/internal/pgio" + "github.com/jackc/pgx/v5/pgconn/ctxwatch" + "github.com/jackc/pgx/v5/pgconn/internal/bgreader" + "github.com/jackc/pgx/v5/pgproto3" ) const ( @@ -29,8 +31,6 @@ const ( connStatusBusy ) -const wbufLen = 1024 - // Notice represents a notice response message reported by the PostgreSQL server. Be aware that this is distinct from // LISTEN/NOTIFY notification. type Notice PgError @@ -50,7 +50,13 @@ type DialFunc func(ctx context.Context, network, addr string) (net.Conn, error) type LookupFunc func(ctx context.Context, host string) (addrs []string, err error) // BuildFrontendFunc is a function that can be used to create Frontend implementation for connection. -type BuildFrontendFunc func(r io.Reader, w io.Writer) Frontend +type BuildFrontendFunc func(r io.Reader, w io.Writer) *pgproto3.Frontend + +// PgErrorHandler is a function that handles errors returned from Postgres. This function must return true to keep +// the connection open. Returning false will cause the connection to be closed immediately. You should return +// false on any FATAL-severity errors. This will not receive network errors. The *PgConn is provided so the handler is +// aware of the origin of the error, but it must not invoke any query method. +type PgErrorHandler func(*PgConn, *PgError) bool // NoticeHandler is a function that can handle notices received from the PostgreSQL server. Notices can be received at // any time, usually during handling of a query response. The *PgConn is provided so the handler is aware of the origin @@ -64,19 +70,19 @@ type NoticeHandler func(*PgConn, *Notice) // notice event. type NotificationHandler func(*PgConn, *Notification) -// Frontend used to receive messages from backend. -type Frontend interface { - Receive() (pgproto3.BackendMessage, error) -} - // PgConn is a low-level PostgreSQL connection handle. It is not safe for concurrent usage. type PgConn struct { - conn net.Conn // the underlying TCP or unix domain socket connection + conn net.Conn pid uint32 // backend pid secretKey uint32 // key to use to send a cancel query message to the server parameterStatuses map[string]string // parameters that have been reported by the server txStatus byte - frontend Frontend + frontend *pgproto3.Frontend + bgReader *bgreader.BGReader + slowWriteTimer *time.Timer + bgReaderStarted chan struct{} + + customData map[string]any config *Config @@ -90,16 +96,18 @@ type PgConn struct { peekedMsg pgproto3.BackendMessage // Reusable / preallocated resources - wbuf []byte // write buffer resultReader ResultReader multiResultReader MultiResultReader + pipeline Pipeline contextWatcher *ctxwatch.ContextWatcher + fieldDescriptions [16]FieldDescription cleanupDone chan struct{} } -// Connect establishes a connection to a PostgreSQL server using the environment and connString (in URL or DSN format) -// to provide configuration. See documentation for ParseConfig for details. ctx can be used to cancel a connect attempt. +// Connect establishes a connection to a PostgreSQL server using the environment and connString (in URL or keyword/value +// format) to provide configuration. See documentation for [ParseConfig] for details. ctx can be used to cancel a +// connect attempt. func Connect(ctx context.Context, connString string) (*PgConn, error) { config, err := ParseConfig(connString) if err != nil { @@ -109,9 +117,9 @@ func Connect(ctx context.Context, connString string) (*PgConn, error) { return ConnectConfig(ctx, config) } -// Connect establishes a connection to a PostgreSQL server using the environment and connString (in URL or DSN format) -// and ParseConfigOptions to provide additional configuration. See documentation for ParseConfig for details. ctx can be -// used to cancel a connect attempt. +// Connect establishes a connection to a PostgreSQL server using the environment and connString (in URL or keyword/value +// format) and ParseConfigOptions to provide additional configuration. See documentation for [ParseConfig] for details. +// ctx can be used to cancel a connect attempt. func ConnectWithOptions(ctx context.Context, connString string, parseConfigOptions ParseConfigOptions) (*PgConn, error) { config, err := ParseConfigWithOptions(connString, parseConfigOptions) if err != nil { @@ -122,115 +130,82 @@ func ConnectWithOptions(ctx context.Context, connString string, parseConfigOptio } // Connect establishes a connection to a PostgreSQL server using config. config must have been constructed with -// ParseConfig. ctx can be used to cancel a connect attempt. +// [ParseConfig]. ctx can be used to cancel a connect attempt. // // If config.Fallbacks are present they will sequentially be tried in case of error establishing network connection. An // authentication error will terminate the chain of attempts (like libpq: -// https://www.postgresql.org/docs/11/libpq-connect.html#LIBPQ-MULTIPLE-HOSTS) and be returned as the error. Otherwise, -// if all attempts fail the last error is returned. -func ConnectConfig(octx context.Context, config *Config) (pgConn *PgConn, err error) { +// https://www.postgresql.org/docs/11/libpq-connect.html#LIBPQ-MULTIPLE-HOSTS) and be returned as the error. +func ConnectConfig(ctx context.Context, config *Config) (*PgConn, error) { // Default values are set in ParseConfig. Enforce initial creation by ParseConfig rather than setting defaults from // zero values. if !config.createdByParseConfig { panic("config must be created by ParseConfig") } - // Simplify usage by treating primary config and fallbacks the same. - fallbackConfigs := []*FallbackConfig{ - { - Host: config.Host, - Port: config.Port, - TLSConfig: config.TLSConfig, - }, - } - fallbackConfigs = append(fallbackConfigs, config.Fallbacks...) - ctx := octx - fallbackConfigs, err = expandWithIPs(ctx, config.LookupFunc, fallbackConfigs) - if err != nil { - return nil, &connectError{config: config, msg: "hostname resolving error", err: err} - } - - if len(fallbackConfigs) == 0 { - return nil, &connectError{config: config, msg: "hostname resolving error", err: errors.New("ip addr wasn't found")} - } + var allErrors []error - foundBestServer := false - var fallbackConfig *FallbackConfig - for i, fc := range fallbackConfigs { - // ConnectTimeout restricts the whole connection process. - if config.ConnectTimeout != 0 { - // create new context first time or when previous host was different - if i == 0 || (fallbackConfigs[i].Host != fallbackConfigs[i-1].Host) { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(octx, config.ConnectTimeout) - defer cancel() - } - } else { - ctx = octx - } - pgConn, err = connect(ctx, config, fc, false) - if err == nil { - foundBestServer = true - break - } else if pgerr, ok := err.(*PgError); ok { - err = &connectError{config: config, msg: "server error", err: pgerr} - const ERRCODE_INVALID_PASSWORD = "28P01" // wrong password - const ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION = "28000" // wrong password or bad pg_hba.conf settings - const ERRCODE_INVALID_CATALOG_NAME = "3D000" // db does not exist - const ERRCODE_INSUFFICIENT_PRIVILEGE = "42501" // missing connect privilege - if pgerr.Code == ERRCODE_INVALID_PASSWORD || - pgerr.Code == ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION && fc.TLSConfig != nil || - pgerr.Code == ERRCODE_INVALID_CATALOG_NAME || - pgerr.Code == ERRCODE_INSUFFICIENT_PRIVILEGE { - break - } - } else if cerr, ok := err.(*connectError); ok { - if _, ok := cerr.err.(*NotPreferredError); ok { - fallbackConfig = fc - } - } + connectConfigs, errs := buildConnectOneConfigs(ctx, config) + if len(errs) > 0 { + allErrors = append(allErrors, errs...) } - if !foundBestServer && fallbackConfig != nil { - pgConn, err = connect(ctx, config, fallbackConfig, true) - if pgerr, ok := err.(*PgError); ok { - err = &connectError{config: config, msg: "server error", err: pgerr} - } + if len(connectConfigs) == 0 { + return nil, &ConnectError{Config: config, err: fmt.Errorf("hostname resolving error: %w", errors.Join(allErrors...))} } - if err != nil { - return nil, err // no need to wrap in connectError because it will already be wrapped in all cases except PgError + pgConn, errs := connectPreferred(ctx, config, connectConfigs) + if len(errs) > 0 { + allErrors = append(allErrors, errs...) + return nil, &ConnectError{Config: config, err: errors.Join(allErrors...)} } if config.AfterConnect != nil { err := config.AfterConnect(ctx, pgConn) if err != nil { pgConn.conn.Close() - return nil, &connectError{config: config, msg: "AfterConnect error", err: err} + return nil, &ConnectError{Config: config, err: fmt.Errorf("AfterConnect error: %w", err)} } } return pgConn, nil } -func expandWithIPs(ctx context.Context, lookupFn LookupFunc, fallbacks []*FallbackConfig) ([]*FallbackConfig, error) { - var configs []*FallbackConfig +// buildConnectOneConfigs resolves hostnames and builds a list of connectOneConfigs to try connecting to. It returns a +// slice of successfully resolved connectOneConfigs and a slice of errors. It is possible for both slices to contain +// values if some hosts were successfully resolved and others were not. +func buildConnectOneConfigs(ctx context.Context, config *Config) ([]*connectOneConfig, []error) { + // Simplify usage by treating primary config and fallbacks the same. + fallbackConfigs := []*FallbackConfig{ + { + Host: config.Host, + Port: config.Port, + TLSConfig: config.TLSConfig, + }, + } + fallbackConfigs = append(fallbackConfigs, config.Fallbacks...) + + var configs []*connectOneConfig - for _, fb := range fallbacks { + var allErrors []error + + for _, fb := range fallbackConfigs { // skip resolve for unix sockets if isAbsolutePath(fb.Host) { - configs = append(configs, &FallbackConfig{ - Host: fb.Host, - Port: fb.Port, - TLSConfig: fb.TLSConfig, + network, address := NetworkAddress(fb.Host, fb.Port) + configs = append(configs, &connectOneConfig{ + network: network, + address: address, + originalHostname: fb.Host, + tlsConfig: fb.TLSConfig, }) continue } - ips, err := lookupFn(ctx, fb.Host) + ips, err := config.LookupFunc(ctx, fb.Host) if err != nil { - return nil, err + allErrors = append(allErrors, err) + continue } for _, ip := range ips { @@ -238,66 +213,140 @@ func expandWithIPs(ctx context.Context, lookupFn LookupFunc, fallbacks []*Fallba if err == nil { port, err := strconv.ParseUint(splitPort, 10, 16) if err != nil { - return nil, fmt.Errorf("error parsing port (%s) from lookup: %w", splitPort, err) + return nil, []error{fmt.Errorf("error parsing port (%s) from lookup: %w", splitPort, err)} } - configs = append(configs, &FallbackConfig{ - Host: splitIP, - Port: uint16(port), - TLSConfig: fb.TLSConfig, + network, address := NetworkAddress(splitIP, uint16(port)) + configs = append(configs, &connectOneConfig{ + network: network, + address: address, + originalHostname: fb.Host, + tlsConfig: fb.TLSConfig, }) } else { - configs = append(configs, &FallbackConfig{ - Host: ip, - Port: fb.Port, - TLSConfig: fb.TLSConfig, + network, address := NetworkAddress(ip, fb.Port) + configs = append(configs, &connectOneConfig{ + network: network, + address: address, + originalHostname: fb.Host, + tlsConfig: fb.TLSConfig, }) } } } - return configs, nil + return configs, allErrors +} + +// connectPreferred attempts to connect to the preferred host from connectOneConfigs. The connections are attempted in +// order. If a connection is successful it is returned. If no connection is successful then all errors are returned. If +// a connection attempt returns a [NotPreferredError], then that host will be used if no other hosts are successful. +func connectPreferred(ctx context.Context, config *Config, connectOneConfigs []*connectOneConfig) (*PgConn, []error) { + octx := ctx + var allErrors []error + + var fallbackConnectOneConfig *connectOneConfig + for i, c := range connectOneConfigs { + // ConnectTimeout restricts the whole connection process. + if config.ConnectTimeout != 0 { + // create new context first time or when previous host was different + if i == 0 || (connectOneConfigs[i].address != connectOneConfigs[i-1].address) { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(octx, config.ConnectTimeout) + defer cancel() + } + } else { + ctx = octx + } + + pgConn, err := connectOne(ctx, config, c, false) + if pgConn != nil { + return pgConn, nil + } + + allErrors = append(allErrors, err) + + var pgErr *PgError + if errors.As(err, &pgErr) { + const ERRCODE_INVALID_PASSWORD = "28P01" // wrong password + const ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION = "28000" // wrong password or bad pg_hba.conf settings + const ERRCODE_INVALID_CATALOG_NAME = "3D000" // db does not exist + const ERRCODE_INSUFFICIENT_PRIVILEGE = "42501" // missing connect privilege + if pgErr.Code == ERRCODE_INVALID_PASSWORD || + pgErr.Code == ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION && c.tlsConfig != nil || + pgErr.Code == ERRCODE_INVALID_CATALOG_NAME || + pgErr.Code == ERRCODE_INSUFFICIENT_PRIVILEGE { + return nil, allErrors + } + } + + var npErr *NotPreferredError + if errors.As(err, &npErr) { + fallbackConnectOneConfig = c + } + } + + if fallbackConnectOneConfig != nil { + pgConn, err := connectOne(ctx, config, fallbackConnectOneConfig, true) + if err == nil { + return pgConn, nil + } + allErrors = append(allErrors, err) + } + + return nil, allErrors } -func connect(ctx context.Context, config *Config, fallbackConfig *FallbackConfig, - ignoreNotPreferredErr bool) (*PgConn, error) { +// connectOne makes one connection attempt to a single host. +func connectOne(ctx context.Context, config *Config, connectConfig *connectOneConfig, + ignoreNotPreferredErr bool, +) (*PgConn, error) { pgConn := new(PgConn) pgConn.config = config - pgConn.wbuf = make([]byte, 0, wbufLen) pgConn.cleanupDone = make(chan struct{}) + pgConn.customData = make(map[string]any) var err error - network, address := NetworkAddress(fallbackConfig.Host, fallbackConfig.Port) - netConn, err := config.DialFunc(ctx, network, address) - if err != nil { - var netErr net.Error - if errors.As(err, &netErr) && netErr.Timeout() { - err = &errTimeout{err: err} - } - return nil, &connectError{config: config, msg: "dial error", err: err} + + newPerDialConnectError := func(msg string, err error) *perDialConnectError { + err = normalizeTimeoutError(ctx, err) + e := &perDialConnectError{address: connectConfig.address, originalHostname: connectConfig.originalHostname, err: fmt.Errorf("%s: %w", msg, err)} + return e } - pgConn.conn = netConn - pgConn.contextWatcher = newContextWatcher(netConn) - pgConn.contextWatcher.Watch(ctx) + pgConn.conn, err = config.DialFunc(ctx, connectConfig.network, connectConfig.address) + if err != nil { + return nil, newPerDialConnectError("dial error", err) + } - if fallbackConfig.TLSConfig != nil { - tlsConn, err := startTLS(netConn, fallbackConfig.TLSConfig) + if connectConfig.tlsConfig != nil { + pgConn.contextWatcher = ctxwatch.NewContextWatcher(&DeadlineContextWatcherHandler{Conn: pgConn.conn}) + pgConn.contextWatcher.Watch(ctx) + tlsConn, err := startTLS(pgConn.conn, connectConfig.tlsConfig) pgConn.contextWatcher.Unwatch() // Always unwatch `netConn` after TLS. if err != nil { - netConn.Close() - return nil, &connectError{config: config, msg: "tls error", err: err} + pgConn.conn.Close() + return nil, newPerDialConnectError("tls error", err) } pgConn.conn = tlsConn - pgConn.contextWatcher = newContextWatcher(tlsConn) - pgConn.contextWatcher.Watch(ctx) } + pgConn.contextWatcher = ctxwatch.NewContextWatcher(config.BuildContextWatcherHandler(pgConn)) + pgConn.contextWatcher.Watch(ctx) defer pgConn.contextWatcher.Unwatch() pgConn.parameterStatuses = make(map[string]string) pgConn.status = connStatusConnecting - pgConn.frontend = config.BuildFrontend(pgConn.conn, pgConn.conn) + pgConn.bgReader = bgreader.New(pgConn.conn) + pgConn.slowWriteTimer = time.AfterFunc(time.Duration(math.MaxInt64), + func() { + pgConn.bgReader.Start() + pgConn.bgReaderStarted <- struct{}{} + }, + ) + pgConn.slowWriteTimer.Stop() + pgConn.bgReaderStarted = make(chan struct{}) + pgConn.frontend = config.BuildFrontend(pgConn.bgReader, pgConn.conn) startupMsg := pgproto3.StartupMessage{ ProtocolVersion: pgproto3.ProtocolVersionNumber, @@ -314,13 +363,10 @@ func connect(ctx context.Context, config *Config, fallbackConfig *FallbackConfig startupMsg.Parameters["database"] = config.Database } - buf, err := startupMsg.Encode(pgConn.wbuf) - if err != nil { - return nil, &connectError{config: config, msg: "failed to write startup message", err: err} - } - if _, err := pgConn.conn.Write(buf); err != nil { + pgConn.frontend.Send(&startupMsg) + if err := pgConn.flushWithPotentialWriteReadDeadlock(); err != nil { pgConn.conn.Close() - return nil, &connectError{config: config, msg: "failed to write startup message", err: err} + return nil, newPerDialConnectError("failed to write startup message", err) } for { @@ -328,9 +374,9 @@ func connect(ctx context.Context, config *Config, fallbackConfig *FallbackConfig if err != nil { pgConn.conn.Close() if err, ok := err.(*PgError); ok { - return nil, err + return nil, newPerDialConnectError("server error", err) } - return nil, &connectError{config: config, msg: "failed to receive message", err: preferContextOverNetTimeoutError(ctx, err)} + return nil, newPerDialConnectError("failed to receive message", err) } switch msg := msg.(type) { @@ -343,26 +389,26 @@ func connect(ctx context.Context, config *Config, fallbackConfig *FallbackConfig err = pgConn.txPasswordMessage(pgConn.config.Password) if err != nil { pgConn.conn.Close() - return nil, &connectError{config: config, msg: "failed to write password message", err: err} + return nil, newPerDialConnectError("failed to write password message", err) } case *pgproto3.AuthenticationMD5Password: digestedPassword := "md5" + hexMD5(hexMD5(pgConn.config.Password+pgConn.config.User)+string(msg.Salt[:])) err = pgConn.txPasswordMessage(digestedPassword) if err != nil { pgConn.conn.Close() - return nil, &connectError{config: config, msg: "failed to write password message", err: err} + return nil, newPerDialConnectError("failed to write password message", err) } case *pgproto3.AuthenticationSASL: err = pgConn.scramAuth(msg.AuthMechanisms) if err != nil { pgConn.conn.Close() - return nil, &connectError{config: config, msg: "failed SASL auth", err: err} + return nil, newPerDialConnectError("failed SASL auth", err) } case *pgproto3.AuthenticationGSS: err = pgConn.gssAuth() if err != nil { pgConn.conn.Close() - return nil, &connectError{config: config, msg: "failed GSS auth", err: err} + return nil, newPerDialConnectError("failed GSS auth", err) } case *pgproto3.ReadyForQuery: pgConn.status = connStatusIdle @@ -380,7 +426,7 @@ func connect(ctx context.Context, config *Config, fallbackConfig *FallbackConfig return pgConn, nil } pgConn.conn.Close() - return nil, &connectError{config: config, msg: "ValidateConnect failed", err: err} + return nil, newPerDialConnectError("ValidateConnect failed", err) } } return pgConn, nil @@ -388,21 +434,14 @@ func connect(ctx context.Context, config *Config, fallbackConfig *FallbackConfig // handled by ReceiveMessage case *pgproto3.ErrorResponse: pgConn.conn.Close() - return nil, ErrorResponseToPgError(msg) + return nil, newPerDialConnectError("server error", ErrorResponseToPgError(msg)) default: pgConn.conn.Close() - return nil, &connectError{config: config, msg: "received unexpected message", err: err} + return nil, newPerDialConnectError("received unexpected message", err) } } } -func newContextWatcher(conn net.Conn) *ctxwatch.ContextWatcher { - return ctxwatch.NewContextWatcher( - func() { conn.SetDeadline(time.Date(1, 1, 1, 1, 1, 1, 1, time.UTC)) }, - func() { conn.SetDeadline(time.Time{}) }, - ) -} - func startTLS(conn net.Conn, tlsConfig *tls.Config) (net.Conn, error) { err := binary.Write(conn, binary.BigEndian, []int32{8, 80877103}) if err != nil { @@ -422,13 +461,8 @@ func startTLS(conn net.Conn, tlsConfig *tls.Config) (net.Conn, error) { } func (pgConn *PgConn) txPasswordMessage(password string) (err error) { - msg := &pgproto3.PasswordMessage{Password: password} - buf, err := msg.Encode(pgConn.wbuf) - if err != nil { - return err - } - _, err = pgConn.conn.Write(buf) - return err + pgConn.frontend.Send(&pgproto3.PasswordMessage{Password: password}) + return pgConn.flushWithPotentialWriteReadDeadlock() } func hexMD5(s string) string { @@ -455,36 +489,6 @@ func (pgConn *PgConn) signalMessage() chan struct{} { return ch } -// SendBytes sends buf to the PostgreSQL server. It must only be used when the connection is not busy. e.g. It is as -// error to call SendBytes while reading the result of a query. -// -// This is a very low level method that requires deep understanding of the PostgreSQL wire protocol to use correctly. -// See https://www.postgresql.org/docs/current/protocol.html. -func (pgConn *PgConn) SendBytes(ctx context.Context, buf []byte) error { - if err := pgConn.lock(); err != nil { - return err - } - defer pgConn.unlock() - - if ctx != context.Background() { - select { - case <-ctx.Done(): - return newContextAlreadyDoneError(ctx) - default: - } - pgConn.contextWatcher.Watch(ctx) - defer pgConn.contextWatcher.Unwatch() - } - - n, err := pgConn.conn.Write(buf) - if err != nil { - pgConn.asyncClose() - return &writeError{err: err, safeToRetry: n == 0} - } - - return nil -} - // ReceiveMessage receives one wire protocol message from the PostgreSQL server. It must only be used when the // connection is not busy. e.g. It is an error to call ReceiveMessage while reading the result of a query. The messages // are still handled by the core pgconn message handling system so receiving a NotificationResponse will still trigger @@ -512,8 +516,9 @@ func (pgConn *PgConn) ReceiveMessage(ctx context.Context) (pgproto3.BackendMessa if err != nil { err = &pgconnError{ msg: "receive message failed", - err: preferContextOverNetTimeoutError(ctx, err), - safeToRetry: true} + err: normalizeTimeoutError(ctx, err), + safeToRetry: true, + } } return msg, err } @@ -561,13 +566,6 @@ func (pgConn *PgConn) peekMessage() (pgproto3.BackendMessage, error) { func (pgConn *PgConn) receiveMessage() (pgproto3.BackendMessage, error) { msg, err := pgConn.peekMessage() if err != nil { - // Close on anything other than timeout error - everything else is fatal - var netErr net.Error - isNetErr := errors.As(err, &netErr) - if !(isNetErr && netErr.Timeout()) { - pgConn.asyncClose() - } - return nil, err } pgConn.peekedMsg = nil @@ -578,11 +576,12 @@ func (pgConn *PgConn) receiveMessage() (pgproto3.BackendMessage, error) { case *pgproto3.ParameterStatus: pgConn.parameterStatuses[msg.Name] = msg.Value case *pgproto3.ErrorResponse: - if msg.Severity == "FATAL" { + err := ErrorResponseToPgError(msg) + if pgConn.config.OnPgError != nil && !pgConn.config.OnPgError(pgConn, err) { pgConn.status = connStatusClosed pgConn.conn.Close() // Ignore error as the connection is already broken and there is already an error to return. close(pgConn.cleanupDone) - return nil, ErrorResponseToPgError(msg) + return nil, err } case *pgproto3.NoticeResponse: if pgConn.config.OnNotice != nil { @@ -597,7 +596,8 @@ func (pgConn *PgConn) receiveMessage() (pgproto3.BackendMessage, error) { return msg, nil } -// Conn returns the underlying net.Conn. +// Conn returns the underlying net.Conn. This rarely necessary. If the connection will be directly used for reading or +// writing then SyncConn should usually be called before Conn. func (pgConn *PgConn) Conn() net.Conn { return pgConn.conn } @@ -625,7 +625,12 @@ func (pgConn *PgConn) SecretKey() uint32 { return pgConn.secretKey } -// Close closes a connection. It is safe to call Close on a already closed connection. Close attempts a clean close by +// Frontend returns the underlying *pgproto3.Frontend. This rarely necessary. +func (pgConn *PgConn) Frontend() *pgproto3.Frontend { + return pgConn.frontend +} + +// Close closes a connection. It is safe to call Close on an already closed connection. Close attempts a clean close by // sending the exit message to PostgreSQL. However, this could block so ctx is available to limit the time to wait. The // underlying net.Conn.Close() will always be called regardless of any other errors. func (pgConn *PgConn) Close(ctx context.Context) error { @@ -654,7 +659,8 @@ func (pgConn *PgConn) Close(ctx context.Context) error { // ignores errors. // // See https://github.com/jackc/pgx/issues/637 - pgConn.conn.Write([]byte{'X', 0, 0, 0, 4}) + pgConn.frontend.Send(&pgproto3.Terminate{}) + pgConn.flushWithPotentialWriteReadDeadlock() return pgConn.conn.Close() } @@ -680,7 +686,8 @@ func (pgConn *PgConn) asyncClose() { pgConn.conn.SetDeadline(deadline) - pgConn.conn.Write([]byte{'X', 0, 0, 0, 4}) + pgConn.frontend.Send(&pgproto3.Terminate{}) + pgConn.flushWithPotentialWriteReadDeadlock() }() } @@ -738,16 +745,23 @@ func (pgConn *PgConn) ParameterStatus(key string) string { return pgConn.parameterStatuses[key] } -// CommandTag is the result of an Exec function -type CommandTag []byte +// CommandTag is the status text returned by PostgreSQL for a query. +type CommandTag struct { + s string +} + +// NewCommandTag makes a CommandTag from s. +func NewCommandTag(s string) CommandTag { + return CommandTag{s: s} +} // RowsAffected returns the number of rows affected. If the CommandTag was not // for a row affecting command (e.g. "CREATE TABLE") then it returns 0. func (ct CommandTag) RowsAffected() int64 { // Find last non-digit idx := -1 - for i := len(ct) - 1; i >= 0; i-- { - if ct[i] >= '0' && ct[i] <= '9' { + for i := len(ct.s) - 1; i >= 0; i-- { + if ct.s[i] >= '0' && ct.s[i] <= '9' { idx = i } else { break @@ -759,7 +773,7 @@ func (ct CommandTag) RowsAffected() int64 { } var n int64 - for _, b := range ct[idx:] { + for _, b := range ct.s[idx:] { n = n*10 + int64(b-'0') } @@ -767,62 +781,71 @@ func (ct CommandTag) RowsAffected() int64 { } func (ct CommandTag) String() string { - return string(ct) + return ct.s } // Insert is true if the command tag starts with "INSERT". func (ct CommandTag) Insert() bool { - return len(ct) >= 6 && - ct[0] == 'I' && - ct[1] == 'N' && - ct[2] == 'S' && - ct[3] == 'E' && - ct[4] == 'R' && - ct[5] == 'T' + return strings.HasPrefix(ct.s, "INSERT") } // Update is true if the command tag starts with "UPDATE". func (ct CommandTag) Update() bool { - return len(ct) >= 6 && - ct[0] == 'U' && - ct[1] == 'P' && - ct[2] == 'D' && - ct[3] == 'A' && - ct[4] == 'T' && - ct[5] == 'E' + return strings.HasPrefix(ct.s, "UPDATE") } // Delete is true if the command tag starts with "DELETE". func (ct CommandTag) Delete() bool { - return len(ct) >= 6 && - ct[0] == 'D' && - ct[1] == 'E' && - ct[2] == 'L' && - ct[3] == 'E' && - ct[4] == 'T' && - ct[5] == 'E' + return strings.HasPrefix(ct.s, "DELETE") } // Select is true if the command tag starts with "SELECT". func (ct CommandTag) Select() bool { - return len(ct) >= 6 && - ct[0] == 'S' && - ct[1] == 'E' && - ct[2] == 'L' && - ct[3] == 'E' && - ct[4] == 'C' && - ct[5] == 'T' + return strings.HasPrefix(ct.s, "SELECT") +} + +type FieldDescription struct { + Name string + TableOID uint32 + TableAttributeNumber uint16 + DataTypeOID uint32 + DataTypeSize int16 + TypeModifier int32 + Format int16 +} + +func (pgConn *PgConn) convertRowDescription(dst []FieldDescription, rd *pgproto3.RowDescription) []FieldDescription { + if cap(dst) >= len(rd.Fields) { + dst = dst[:len(rd.Fields):len(rd.Fields)] + } else { + dst = make([]FieldDescription, len(rd.Fields)) + } + + for i := range rd.Fields { + dst[i].Name = string(rd.Fields[i].Name) + dst[i].TableOID = rd.Fields[i].TableOID + dst[i].TableAttributeNumber = rd.Fields[i].TableAttributeNumber + dst[i].DataTypeOID = rd.Fields[i].DataTypeOID + dst[i].DataTypeSize = rd.Fields[i].DataTypeSize + dst[i].TypeModifier = rd.Fields[i].TypeModifier + dst[i].Format = rd.Fields[i].Format + } + + return dst } type StatementDescription struct { Name string SQL string ParamOIDs []uint32 - Fields []pgproto3.FieldDescription + Fields []FieldDescription } // Prepare creates a prepared statement. If the name is empty, the anonymous prepared statement will be used. This // allows Prepare to also to describe statements without creating a server-side prepared statement. +// +// Prepare does not send a PREPARE statement to the server. It uses the PostgreSQL Parse and Describe protocol messages +// directly. func (pgConn *PgConn) Prepare(ctx context.Context, name, sql string, paramOIDs []uint32) (*StatementDescription, error) { if err := pgConn.lock(); err != nil { return nil, err @@ -839,25 +862,13 @@ func (pgConn *PgConn) Prepare(ctx context.Context, name, sql string, paramOIDs [ defer pgConn.contextWatcher.Unwatch() } - buf := pgConn.wbuf - var err error - buf, err = (&pgproto3.Parse{Name: name, Query: sql, ParameterOIDs: paramOIDs}).Encode(buf) - if err != nil { - return nil, err - } - buf, err = (&pgproto3.Describe{ObjectType: 'S', Name: name}).Encode(buf) - if err != nil { - return nil, err - } - buf, err = (&pgproto3.Sync{}).Encode(buf) - if err != nil { - return nil, err - } - - n, err := pgConn.conn.Write(buf) + pgConn.frontend.SendParse(&pgproto3.Parse{Name: name, Query: sql, ParameterOIDs: paramOIDs}) + pgConn.frontend.SendDescribe(&pgproto3.Describe{ObjectType: 'S', Name: name}) + pgConn.frontend.SendSync(&pgproto3.Sync{}) + err := pgConn.flushWithPotentialWriteReadDeadlock() if err != nil { pgConn.asyncClose() - return nil, &writeError{err: err, safeToRetry: n == 0} + return nil, err } psd := &StatementDescription{Name: name, SQL: sql} @@ -869,7 +880,7 @@ readloop: msg, err := pgConn.receiveMessage() if err != nil { pgConn.asyncClose() - return nil, preferContextOverNetTimeoutError(ctx, err) + return nil, normalizeTimeoutError(ctx, err) } switch msg := msg.(type) { @@ -877,8 +888,7 @@ readloop: psd.ParamOIDs = make([]uint32, len(msg.ParameterOIDs)) copy(psd.ParamOIDs, msg.ParameterOIDs) case *pgproto3.RowDescription: - psd.Fields = make([]pgproto3.FieldDescription, len(msg.Fields)) - copy(psd.Fields, msg.Fields) + psd.Fields = pgConn.convertRowDescription(nil, msg) case *pgproto3.ErrorResponse: parseErr = ErrorResponseToPgError(msg) case *pgproto3.ReadyForQuery: @@ -892,26 +902,73 @@ readloop: return psd, nil } +// Deallocate deallocates a prepared statement. +// +// Deallocate does not send a DEALLOCATE statement to the server. It uses the PostgreSQL Close protocol message +// directly. This has slightly different behavior than executing DEALLOCATE statement. +// - Deallocate can succeed in an aborted transaction. +// - Deallocating a non-existent prepared statement is not an error. +func (pgConn *PgConn) Deallocate(ctx context.Context, name string) error { + if err := pgConn.lock(); err != nil { + return err + } + defer pgConn.unlock() + + if ctx != context.Background() { + select { + case <-ctx.Done(): + return newContextAlreadyDoneError(ctx) + default: + } + pgConn.contextWatcher.Watch(ctx) + defer pgConn.contextWatcher.Unwatch() + } + + pgConn.frontend.SendClose(&pgproto3.Close{ObjectType: 'S', Name: name}) + pgConn.frontend.SendSync(&pgproto3.Sync{}) + err := pgConn.flushWithPotentialWriteReadDeadlock() + if err != nil { + pgConn.asyncClose() + return err + } + + for { + msg, err := pgConn.receiveMessage() + if err != nil { + pgConn.asyncClose() + return normalizeTimeoutError(ctx, err) + } + + switch msg := msg.(type) { + case *pgproto3.ErrorResponse: + return ErrorResponseToPgError(msg) + case *pgproto3.ReadyForQuery: + return nil + } + } +} + // ErrorResponseToPgError converts a wire protocol error message to a *PgError. func ErrorResponseToPgError(msg *pgproto3.ErrorResponse) *PgError { return &PgError{ - Severity: msg.Severity, - Code: string(msg.Code), - Message: string(msg.Message), - Detail: string(msg.Detail), - Hint: msg.Hint, - Position: msg.Position, - InternalPosition: msg.InternalPosition, - InternalQuery: string(msg.InternalQuery), - Where: string(msg.Where), - SchemaName: string(msg.SchemaName), - TableName: string(msg.TableName), - ColumnName: string(msg.ColumnName), - DataTypeName: string(msg.DataTypeName), - ConstraintName: msg.ConstraintName, - File: string(msg.File), - Line: msg.Line, - Routine: string(msg.Routine), + Severity: msg.Severity, + SeverityUnlocalized: msg.SeverityUnlocalized, + Code: string(msg.Code), + Message: string(msg.Message), + Detail: string(msg.Detail), + Hint: msg.Hint, + Position: msg.Position, + InternalPosition: msg.InternalPosition, + InternalQuery: string(msg.InternalQuery), + Where: string(msg.Where), + SchemaName: string(msg.SchemaName), + TableName: string(msg.TableName), + ColumnName: string(msg.ColumnName), + DataTypeName: string(msg.DataTypeName), + ConstraintName: msg.ConstraintName, + File: string(msg.File), + Line: msg.Line, + Routine: string(msg.Routine), } } @@ -928,17 +985,33 @@ func (pgConn *PgConn) CancelRequest(ctx context.Context) error { // the connection config. This is important in high availability configurations where fallback connections may be // specified or DNS may be used to load balance. serverAddr := pgConn.conn.RemoteAddr() - cancelConn, err := pgConn.config.DialFunc(ctx, serverAddr.Network(), serverAddr.String()) + var serverNetwork string + var serverAddress string + if serverAddr.Network() == "unix" { + // for unix sockets, RemoteAddr() calls getpeername() which returns the name the + // server passed to bind(). For Postgres, this is always a relative path "./.s.PGSQL.5432" + // so connecting to it will fail. Fall back to the config's value + serverNetwork, serverAddress = NetworkAddress(pgConn.config.Host, pgConn.config.Port) + } else { + serverNetwork, serverAddress = serverAddr.Network(), serverAddr.String() + } + cancelConn, err := pgConn.config.DialFunc(ctx, serverNetwork, serverAddress) if err != nil { - return err + // In case of unix sockets, RemoteAddr() returns only the file part of the path. If the + // first connect failed, try the config. + if serverAddr.Network() != "unix" { + return err + } + serverNetwork, serverAddr := NetworkAddress(pgConn.config.Host, pgConn.config.Port) + cancelConn, err = pgConn.config.DialFunc(ctx, serverNetwork, serverAddr) + if err != nil { + return err + } } defer cancelConn.Close() if ctx != context.Background() { - contextWatcher := ctxwatch.NewContextWatcher( - func() { cancelConn.SetDeadline(time.Date(1, 1, 1, 1, 1, 1, 1, time.UTC)) }, - func() { cancelConn.SetDeadline(time.Time{}) }, - ) + contextWatcher := ctxwatch.NewContextWatcher(&DeadlineContextWatcherHandler{Conn: cancelConn}) contextWatcher.Watch(ctx) defer contextWatcher.Unwatch() } @@ -946,22 +1019,21 @@ func (pgConn *PgConn) CancelRequest(ctx context.Context) error { buf := make([]byte, 16) binary.BigEndian.PutUint32(buf[0:4], 16) binary.BigEndian.PutUint32(buf[4:8], 80877102) - binary.BigEndian.PutUint32(buf[8:12], uint32(pgConn.pid)) - binary.BigEndian.PutUint32(buf[12:16], uint32(pgConn.secretKey)) - _, err = cancelConn.Write(buf) - if err != nil { - return err - } + binary.BigEndian.PutUint32(buf[8:12], pgConn.pid) + binary.BigEndian.PutUint32(buf[12:16], pgConn.secretKey) - _, err = cancelConn.Read(buf) - if err != io.EOF { - return err + if _, err := cancelConn.Write(buf); err != nil { + return fmt.Errorf("write to connection for cancellation: %w", err) } + // Wait for the cancel request to be acknowledged by the server. + // It copies the behavior of the libpq: https://github.com/postgres/postgres/blob/REL_16_0/src/interfaces/libpq/fe-connect.c#L4946-L4960 + _, _ = cancelConn.Read(buf) + return nil } -// WaitForNotification waits for a LISTON/NOTIFY message to be received. It returns an error if a notification was not +// WaitForNotification waits for a LISTEN/NOTIFY message to be received. It returns an error if a notification was not // received. func (pgConn *PgConn) WaitForNotification(ctx context.Context) error { if err := pgConn.lock(); err != nil { @@ -983,7 +1055,7 @@ func (pgConn *PgConn) WaitForNotification(ctx context.Context) error { for { msg, err := pgConn.receiveMessage() if err != nil { - return preferContextOverNetTimeoutError(ctx, err) + return normalizeTimeoutError(ctx, err) } switch msg.(type) { @@ -1023,22 +1095,13 @@ func (pgConn *PgConn) Exec(ctx context.Context, sql string) *MultiResultReader { pgConn.contextWatcher.Watch(ctx) } - buf := pgConn.wbuf - var err error - buf, err = (&pgproto3.Query{String: sql}).Encode(buf) - if err != nil { - return &MultiResultReader{ - closed: true, - err: err, - } - } - - n, err := pgConn.conn.Write(buf) + pgConn.frontend.SendQuery(&pgproto3.Query{String: sql}) + err := pgConn.flushWithPotentialWriteReadDeadlock() if err != nil { pgConn.asyncClose() pgConn.contextWatcher.Unwatch() multiResult.closed = true - multiResult.err = &writeError{err: err, safeToRetry: n == 0} + multiResult.err = err pgConn.unlock() return multiResult } @@ -1046,39 +1109,6 @@ func (pgConn *PgConn) Exec(ctx context.Context, sql string) *MultiResultReader { return multiResult } -// ReceiveResults reads the result that might be returned by Postgres after a SendBytes -// (e.a. after sending a CopyDone in a copy-both situation). -// -// This is a very low level method that requires deep understanding of the PostgreSQL wire protocol to use correctly. -// See https://www.postgresql.org/docs/current/protocol.html. -func (pgConn *PgConn) ReceiveResults(ctx context.Context) *MultiResultReader { - if err := pgConn.lock(); err != nil { - return &MultiResultReader{ - closed: true, - err: err, - } - } - - pgConn.multiResultReader = MultiResultReader{ - pgConn: pgConn, - ctx: ctx, - } - multiResult := &pgConn.multiResultReader - if ctx != context.Background() { - select { - case <-ctx.Done(): - multiResult.closed = true - multiResult.err = newContextAlreadyDoneError(ctx) - pgConn.unlock() - return multiResult - default: - } - pgConn.contextWatcher.Watch(ctx) - } - - return multiResult -} - // ExecParams executes a command via the PostgreSQL extended query protocol. // // sql is a SQL command string. It may only contain one query. Parameter substitution is positional using $1, $2, $3, @@ -1104,27 +1134,10 @@ func (pgConn *PgConn) ExecParams(ctx context.Context, sql string, paramValues [] return result } - buf := pgConn.wbuf - var err error - buf, err = (&pgproto3.Parse{Query: sql, ParameterOIDs: paramOIDs}).Encode(buf) - if err != nil { - result.concludeCommand(nil, err) - pgConn.contextWatcher.Unwatch() - result.closed = true - pgConn.unlock() - return result - } - - buf, err = (&pgproto3.Bind{ParameterFormatCodes: paramFormats, Parameters: paramValues, ResultFormatCodes: resultFormats}).Encode(buf) - if err != nil { - result.concludeCommand(nil, err) - pgConn.contextWatcher.Unwatch() - result.closed = true - pgConn.unlock() - return result - } + pgConn.frontend.SendParse(&pgproto3.Parse{Query: sql, ParameterOIDs: paramOIDs}) + pgConn.frontend.SendBind(&pgproto3.Bind{ParameterFormatCodes: paramFormats, Parameters: paramValues, ResultFormatCodes: resultFormats}) - pgConn.execExtendedSuffix(buf, result) + pgConn.execExtendedSuffix(result) return result } @@ -1147,18 +1160,9 @@ func (pgConn *PgConn) ExecPrepared(ctx context.Context, stmtName string, paramVa return result } - buf := pgConn.wbuf - var err error - buf, err = (&pgproto3.Bind{PreparedStatement: stmtName, ParameterFormatCodes: paramFormats, Parameters: paramValues, ResultFormatCodes: resultFormats}).Encode(buf) - if err != nil { - result.concludeCommand(nil, err) - pgConn.contextWatcher.Unwatch() - result.closed = true - pgConn.unlock() - return result - } + pgConn.frontend.SendBind(&pgproto3.Bind{PreparedStatement: stmtName, ParameterFormatCodes: paramFormats, Parameters: paramValues, ResultFormatCodes: resultFormats}) - pgConn.execExtendedSuffix(buf, result) + pgConn.execExtendedSuffix(result) return result } @@ -1171,13 +1175,13 @@ func (pgConn *PgConn) execExtendedPrefix(ctx context.Context, paramValues [][]by result := &pgConn.resultReader if err := pgConn.lock(); err != nil { - result.concludeCommand(nil, err) + result.concludeCommand(CommandTag{}, err) result.closed = true return result } if len(paramValues) > math.MaxUint16 { - result.concludeCommand(nil, fmt.Errorf("extended protocol limited to %v parameters", math.MaxUint16)) + result.concludeCommand(CommandTag{}, fmt.Errorf("extended protocol limited to %v parameters", math.MaxUint16)) result.closed = true pgConn.unlock() return result @@ -1186,7 +1190,7 @@ func (pgConn *PgConn) execExtendedPrefix(ctx context.Context, paramValues [][]by if ctx != context.Background() { select { case <-ctx.Done(): - result.concludeCommand(nil, newContextAlreadyDoneError(ctx)) + result.concludeCommand(CommandTag{}, newContextAlreadyDoneError(ctx)) result.closed = true pgConn.unlock() return result @@ -1198,37 +1202,15 @@ func (pgConn *PgConn) execExtendedPrefix(ctx context.Context, paramValues [][]by return result } -func (pgConn *PgConn) execExtendedSuffix(buf []byte, result *ResultReader) { - var err error - buf, err = (&pgproto3.Describe{ObjectType: 'P'}).Encode(buf) - if err != nil { - result.concludeCommand(nil, err) - pgConn.contextWatcher.Unwatch() - result.closed = true - pgConn.unlock() - return - } - buf, err = (&pgproto3.Execute{}).Encode(buf) - if err != nil { - result.concludeCommand(nil, err) - pgConn.contextWatcher.Unwatch() - result.closed = true - pgConn.unlock() - return - } - buf, err = (&pgproto3.Sync{}).Encode(buf) - if err != nil { - result.concludeCommand(nil, err) - pgConn.contextWatcher.Unwatch() - result.closed = true - pgConn.unlock() - return - } +func (pgConn *PgConn) execExtendedSuffix(result *ResultReader) { + pgConn.frontend.SendDescribe(&pgproto3.Describe{ObjectType: 'P'}) + pgConn.frontend.SendExecute(&pgproto3.Execute{}) + pgConn.frontend.SendSync(&pgproto3.Sync{}) - n, err := pgConn.conn.Write(buf) + err := pgConn.flushWithPotentialWriteReadDeadlock() if err != nil { pgConn.asyncClose() - result.concludeCommand(nil, &writeError{err: err, safeToRetry: n == 0}) + result.concludeCommand(CommandTag{}, err) pgConn.contextWatcher.Unwatch() result.closed = true pgConn.unlock() @@ -1241,14 +1223,14 @@ func (pgConn *PgConn) execExtendedSuffix(buf []byte, result *ResultReader) { // CopyTo executes the copy command sql and copies the results to w. func (pgConn *PgConn) CopyTo(ctx context.Context, w io.Writer, sql string) (CommandTag, error) { if err := pgConn.lock(); err != nil { - return nil, err + return CommandTag{}, err } if ctx != context.Background() { select { case <-ctx.Done(): pgConn.unlock() - return nil, newContextAlreadyDoneError(ctx) + return CommandTag{}, newContextAlreadyDoneError(ctx) default: } pgConn.contextWatcher.Watch(ctx) @@ -1256,19 +1238,13 @@ func (pgConn *PgConn) CopyTo(ctx context.Context, w io.Writer, sql string) (Comm } // Send copy to command - buf := pgConn.wbuf - var err error - buf, err = (&pgproto3.Query{String: sql}).Encode(buf) - if err != nil { - pgConn.unlock() - return nil, err - } + pgConn.frontend.SendQuery(&pgproto3.Query{String: sql}) - n, err := pgConn.conn.Write(buf) + err := pgConn.flushWithPotentialWriteReadDeadlock() if err != nil { pgConn.asyncClose() pgConn.unlock() - return nil, &writeError{err: err, safeToRetry: n == 0} + return CommandTag{}, err } // Read results @@ -1278,7 +1254,7 @@ func (pgConn *PgConn) CopyTo(ctx context.Context, w io.Writer, sql string) (Comm msg, err := pgConn.receiveMessage() if err != nil { pgConn.asyncClose() - return nil, preferContextOverNetTimeoutError(ctx, err) + return CommandTag{}, normalizeTimeoutError(ctx, err) } switch msg := msg.(type) { @@ -1287,13 +1263,13 @@ func (pgConn *PgConn) CopyTo(ctx context.Context, w io.Writer, sql string) (Comm _, err := w.Write(msg.Data) if err != nil { pgConn.asyncClose() - return nil, err + return CommandTag{}, err } case *pgproto3.ReadyForQuery: pgConn.unlock() return commandTag, pgErr case *pgproto3.CommandComplete: - commandTag = CommandTag(msg.CommandTag) + commandTag = pgConn.makeCommandTag(msg.CommandTag) case *pgproto3.ErrorResponse: pgErr = ErrorResponseToPgError(msg) } @@ -1306,33 +1282,26 @@ func (pgConn *PgConn) CopyTo(ctx context.Context, w io.Writer, sql string) (Comm // could still block. func (pgConn *PgConn) CopyFrom(ctx context.Context, r io.Reader, sql string) (CommandTag, error) { if err := pgConn.lock(); err != nil { - return nil, err + return CommandTag{}, err } defer pgConn.unlock() if ctx != context.Background() { select { case <-ctx.Done(): - return nil, newContextAlreadyDoneError(ctx) + return CommandTag{}, newContextAlreadyDoneError(ctx) default: } pgConn.contextWatcher.Watch(ctx) defer pgConn.contextWatcher.Unwatch() } - // Send copy to command - buf := pgConn.wbuf - var err error - buf, err = (&pgproto3.Query{String: sql}).Encode(buf) - if err != nil { - pgConn.unlock() - return nil, err - } - - n, err := pgConn.conn.Write(buf) + // Send copy from query + pgConn.frontend.SendQuery(&pgproto3.Query{String: sql}) + err := pgConn.flushWithPotentialWriteReadDeadlock() if err != nil { pgConn.asyncClose() - return nil, &writeError{err: err, safeToRetry: n == 0} + return CommandTag{}, err } // Send copy data @@ -1344,19 +1313,20 @@ func (pgConn *PgConn) CopyFrom(ctx context.Context, r io.Reader, sql string) (Co go func() { defer wg.Done() - buf := make([]byte, 0, 65536) - buf = append(buf, 'd') - sp := len(buf) + buf := iobufpool.Get(65536) + defer iobufpool.Put(buf) + (*buf)[0] = 'd' for { - n, readErr := r.Read(buf[5:cap(buf)]) + n, readErr := r.Read((*buf)[5:cap(*buf)]) if n > 0 { - buf = buf[0 : n+5] - pgio.SetInt32(buf[sp:], int32(n+4)) + *buf = (*buf)[0 : n+5] + pgio.SetInt32((*buf)[1:], int32(n+4)) - _, writeErr := pgConn.conn.Write(buf) + writeErr := pgConn.frontend.SendUnbufferedEncodedCopyData(*buf) if writeErr != nil { - // Write errors are always fatal, but we can't use asyncClose because we are in a different goroutine. + // Write errors are always fatal, but we can't use asyncClose because we are in a different goroutine. Not + // setting pgConn.status or closing pgConn.cleanupDone for the same reason. pgConn.conn.Close() copyErrChan <- writeErr @@ -1382,11 +1352,16 @@ func (pgConn *PgConn) CopyFrom(ctx context.Context, r io.Reader, sql string) (Co select { case copyErr = <-copyErrChan: case <-signalMessageChan: - msg, err := pgConn.receiveMessage() - if err != nil { - pgConn.asyncClose() - return nil, preferContextOverNetTimeoutError(ctx, err) + // If pgConn.receiveMessage encounters an error it will call pgConn.asyncClose. But that is a race condition with + // the goroutine. So instead check pgConn.bufferingReceiveErr which will have been set by the signalMessage. If an + // error is found then forcibly close the connection without sending the Terminate message. + if err := pgConn.bufferingReceiveErr; err != nil { + pgConn.status = connStatusClosed + pgConn.conn.Close() + close(pgConn.cleanupDone) + return CommandTag{}, normalizeTimeoutError(ctx, err) } + msg, _ := pgConn.receiveMessage() switch msg := msg.(type) { case *pgproto3.ErrorResponse: @@ -1400,28 +1375,15 @@ func (pgConn *PgConn) CopyFrom(ctx context.Context, r io.Reader, sql string) (Co // Make sure io goroutine finishes before writing. wg.Wait() - buf = buf[:0] if copyErr == io.EOF || pgErr != nil { - copyDone := &pgproto3.CopyDone{} - var err error - buf, err = copyDone.Encode(buf) - if err != nil { - pgConn.asyncClose() - return nil, err - } + pgConn.frontend.Send(&pgproto3.CopyDone{}) } else { - copyFail := &pgproto3.CopyFail{Message: copyErr.Error()} - var err error - buf, err = copyFail.Encode(buf) - if err != nil { - pgConn.asyncClose() - return nil, err - } + pgConn.frontend.Send(&pgproto3.CopyFail{Message: copyErr.Error()}) } - _, err = pgConn.conn.Write(buf) + err = pgConn.flushWithPotentialWriteReadDeadlock() if err != nil { pgConn.asyncClose() - return nil, err + return CommandTag{}, err } // Read results @@ -1430,14 +1392,14 @@ func (pgConn *PgConn) CopyFrom(ctx context.Context, r io.Reader, sql string) (Co msg, err := pgConn.receiveMessage() if err != nil { pgConn.asyncClose() - return nil, preferContextOverNetTimeoutError(ctx, err) + return CommandTag{}, normalizeTimeoutError(ctx, err) } switch msg := msg.(type) { case *pgproto3.ReadyForQuery: return commandTag, pgErr case *pgproto3.CommandComplete: - commandTag = CommandTag(msg.CommandTag) + commandTag = pgConn.makeCommandTag(msg.CommandTag) case *pgproto3.ErrorResponse: pgErr = ErrorResponseToPgError(msg) } @@ -1446,8 +1408,9 @@ func (pgConn *PgConn) CopyFrom(ctx context.Context, r io.Reader, sql string) (Co // MultiResultReader is a reader for a command that could return multiple results such as Exec or ExecBatch. type MultiResultReader struct { - pgConn *PgConn - ctx context.Context + pgConn *PgConn + ctx context.Context + pipeline *Pipeline rr *ResultReader @@ -1469,10 +1432,9 @@ func (mrr *MultiResultReader) ReadAll() ([]*Result, error) { func (mrr *MultiResultReader) receiveMessage() (pgproto3.BackendMessage, error) { msg, err := mrr.pgConn.receiveMessage() - if err != nil { mrr.pgConn.contextWatcher.Unwatch() - mrr.err = preferContextOverNetTimeoutError(mrr.ctx, err) + mrr.err = normalizeTimeoutError(mrr.ctx, err) mrr.closed = true mrr.pgConn.asyncClose() return nil, mrr.err @@ -1480,9 +1442,13 @@ func (mrr *MultiResultReader) receiveMessage() (pgproto3.BackendMessage, error) switch msg := msg.(type) { case *pgproto3.ReadyForQuery: - mrr.pgConn.contextWatcher.Unwatch() mrr.closed = true - mrr.pgConn.unlock() + if mrr.pipeline != nil { + mrr.pipeline.expectedReadyForQueryCount-- + } else { + mrr.pgConn.contextWatcher.Unwatch() + mrr.pgConn.unlock() + } case *pgproto3.ErrorResponse: mrr.err = ErrorResponseToPgError(msg) } @@ -1504,13 +1470,14 @@ func (mrr *MultiResultReader) NextResult() bool { pgConn: mrr.pgConn, multiResultReader: mrr, ctx: mrr.ctx, - fieldDescriptions: msg.Fields, + fieldDescriptions: mrr.pgConn.convertRowDescription(mrr.pgConn.fieldDescriptions[:], msg), } + mrr.rr = &mrr.pgConn.resultReader return true case *pgproto3.CommandComplete: mrr.pgConn.resultReader = ResultReader{ - commandTag: CommandTag(msg.CommandTag), + commandTag: mrr.pgConn.makeCommandTag(msg.CommandTag), commandConcluded: true, closed: true, } @@ -1545,9 +1512,10 @@ func (mrr *MultiResultReader) Close() error { type ResultReader struct { pgConn *PgConn multiResultReader *MultiResultReader + pipeline *Pipeline ctx context.Context - fieldDescriptions []pgproto3.FieldDescription + fieldDescriptions []FieldDescription rowValues [][]byte commandTag CommandTag commandConcluded bool @@ -1557,7 +1525,7 @@ type ResultReader struct { // Result is the saved query response that is returned by calling Read on a ResultReader. type Result struct { - FieldDescriptions []pgproto3.FieldDescription + FieldDescriptions []FieldDescription Rows [][][]byte CommandTag CommandTag Err error @@ -1569,12 +1537,18 @@ func (rr *ResultReader) Read() *Result { for rr.NextRow() { if br.FieldDescriptions == nil { - br.FieldDescriptions = make([]pgproto3.FieldDescription, len(rr.FieldDescriptions())) + br.FieldDescriptions = make([]FieldDescription, len(rr.FieldDescriptions())) copy(br.FieldDescriptions, rr.FieldDescriptions()) } - row := make([][]byte, len(rr.Values())) - copy(row, rr.Values()) + values := rr.Values() + row := make([][]byte, len(values)) + for i := range row { + if values[i] != nil { + row[i] = make([]byte, len(values[i])) + copy(row[i], values[i]) + } + } br.Rows = append(br.Rows, row) } @@ -1602,14 +1576,14 @@ func (rr *ResultReader) NextRow() bool { } // FieldDescriptions returns the field descriptions for the current result set. The returned slice is only valid until -// the ResultReader is closed. -func (rr *ResultReader) FieldDescriptions() []pgproto3.FieldDescription { +// the ResultReader is closed. It may return nil (for example, if the query did not return a result set or an error was +// encountered.) +func (rr *ResultReader) FieldDescriptions() []FieldDescription { return rr.fieldDescriptions } // Values returns the current row data. NextRow must have been previously been called. The returned [][]byte is only -// valid until the next NextRow call or the ResultReader is closed. However, the underlying byte data is safe to -// retain a reference to and mutate. +// valid until the next NextRow call or the ResultReader is closed. func (rr *ResultReader) Values() [][]byte { return rr.rowValues } @@ -1625,15 +1599,15 @@ func (rr *ResultReader) Close() (CommandTag, error) { for !rr.commandConcluded { _, err := rr.receiveMessage() if err != nil { - return nil, rr.err + return CommandTag{}, rr.err } } - if rr.multiResultReader == nil { + if rr.multiResultReader == nil && rr.pipeline == nil { for { msg, err := rr.receiveMessage() if err != nil { - return nil, rr.err + return CommandTag{}, rr.err } switch msg := msg.(type) { @@ -1679,8 +1653,8 @@ func (rr *ResultReader) receiveMessage() (msg pgproto3.BackendMessage, err error } if err != nil { - err = preferContextOverNetTimeoutError(rr.ctx, err) - rr.concludeCommand(nil, err) + err = normalizeTimeoutError(rr.ctx, err) + rr.concludeCommand(CommandTag{}, err) rr.pgConn.contextWatcher.Unwatch() rr.closed = true if rr.multiResultReader == nil { @@ -1692,13 +1666,13 @@ func (rr *ResultReader) receiveMessage() (msg pgproto3.BackendMessage, err error switch msg := msg.(type) { case *pgproto3.RowDescription: - rr.fieldDescriptions = msg.Fields + rr.fieldDescriptions = rr.pgConn.convertRowDescription(rr.pgConn.fieldDescriptions[:], msg) case *pgproto3.CommandComplete: - rr.concludeCommand(CommandTag(msg.CommandTag), nil) + rr.concludeCommand(rr.pgConn.makeCommandTag(msg.CommandTag), nil) case *pgproto3.EmptyQueryResponse: - rr.concludeCommand(nil, nil) + rr.concludeCommand(CommandTag{}, nil) case *pgproto3.ErrorResponse: - rr.concludeCommand(nil, ErrorResponseToPgError(msg)) + rr.concludeCommand(CommandTag{}, ErrorResponseToPgError(msg)) } return msg, nil @@ -1762,7 +1736,8 @@ func (batch *Batch) ExecPrepared(stmtName string, paramValues [][]byte, paramFor } // ExecBatch executes all the queries in batch in a single round-trip. Execution is implicitly transactional unless a -// transaction is already in progress or SQL contains transaction control statements. +// transaction is already in progress or SQL contains transaction control statements. This is a simpler way of executing +// multiple queries in a single round trip than using pipeline mode. func (pgConn *PgConn) ExecBatch(ctx context.Context, batch *Batch) *MultiResultReader { if batch.err != nil { return &MultiResultReader{ @@ -1804,18 +1779,15 @@ func (pgConn *PgConn) ExecBatch(ctx context.Context, batch *Batch) *MultiResultR return multiResult } - // A large batch can deadlock without concurrent reading and writing. If the Write fails the underlying net.Conn is - // closed. This is all that can be done without introducing a race condition or adding a concurrent safe communication - // channel to relay the error back. The practical effect of this is that the underlying Write error is not reported. - // The error the code reading the batch results receives will be a closed connection error. - // - // See https://github.com/jackc/pgx/issues/374. - go func() { - _, err := pgConn.conn.Write(batch.buf) - if err != nil { - pgConn.conn.Close() - } - }() + pgConn.enterPotentialWriteReadDeadlock() + defer pgConn.exitPotentialWriteReadDeadlock() + _, err := pgConn.conn.Write(batch.buf) + if err != nil { + multiResult.closed = true + multiResult.err = err + pgConn.unlock() + return multiResult + } return multiResult } @@ -1837,23 +1809,122 @@ func (pgConn *PgConn) EscapeString(s string) (string, error) { return strings.Replace(s, "'", "''", -1), nil } +// CheckConn checks the underlying connection without writing any bytes. This is currently implemented by doing a read +// with a very short deadline. This can be useful because a TCP connection can be broken such that a write will appear +// to succeed even though it will never actually reach the server. Reading immediately before a write will detect this +// condition. If this is done immediately before sending a query it reduces the chances a query will be sent that fails +// without the client knowing whether the server received it or not. +// +// Deprecated: CheckConn is deprecated in favor of Ping. CheckConn cannot detect all types of broken connections where +// the write would still appear to succeed. Prefer Ping unless on a high latency connection. +func (pgConn *PgConn) CheckConn() error { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Millisecond) + defer cancel() + + _, err := pgConn.ReceiveMessage(ctx) + if err != nil { + if !Timeout(err) { + return err + } + } + + return nil +} + +// Ping pings the server. This can be useful because a TCP connection can be broken such that a write will appear to +// succeed even though it will never actually reach the server. Pinging immediately before sending a query reduces the +// chances a query will be sent that fails without the client knowing whether the server received it or not. +func (pgConn *PgConn) Ping(ctx context.Context) error { + return pgConn.Exec(ctx, "-- ping").Close() +} + +// makeCommandTag makes a CommandTag. It does not retain a reference to buf or buf's underlying memory. +func (pgConn *PgConn) makeCommandTag(buf []byte) CommandTag { + return CommandTag{s: string(buf)} +} + +// enterPotentialWriteReadDeadlock must be called before a write that could deadlock if the server is simultaneously +// blocked writing to us. +func (pgConn *PgConn) enterPotentialWriteReadDeadlock() { + // The time to wait is somewhat arbitrary. A Write should only take as long as the syscall and memcpy to the OS + // outbound network buffer unless the buffer is full (which potentially is a block). It needs to be long enough for + // the normal case, but short enough not to kill performance if a block occurs. + // + // In addition, on Windows the default timer resolution is 15.6ms. So setting the timer to less than that is + // ineffective. + if pgConn.slowWriteTimer.Reset(15 * time.Millisecond) { + panic("BUG: slow write timer already active") + } +} + +// exitPotentialWriteReadDeadlock must be called after a call to enterPotentialWriteReadDeadlock. +func (pgConn *PgConn) exitPotentialWriteReadDeadlock() { + if !pgConn.slowWriteTimer.Stop() { + // The timer starts its function in a separate goroutine. It is necessary to ensure the background reader has + // started before calling Stop. Otherwise, the background reader may not be stopped. That on its own is not a + // serious problem. But what is a serious problem is that the background reader may start at an inopportune time in + // a subsequent query. For example, if a subsequent query was canceled then a deadline may be set on the net.Conn to + // interrupt an in-progress read. After the read is interrupted, but before the deadline is cleared, the background + // reader could start and read a deadline error. Then the next query would receive the an unexpected deadline error. + <-pgConn.bgReaderStarted + pgConn.bgReader.Stop() + } +} + +func (pgConn *PgConn) flushWithPotentialWriteReadDeadlock() error { + pgConn.enterPotentialWriteReadDeadlock() + defer pgConn.exitPotentialWriteReadDeadlock() + err := pgConn.frontend.Flush() + return err +} + +// SyncConn prepares the underlying net.Conn for direct use. PgConn may internally buffer reads or use goroutines for +// background IO. This means that any direct use of the underlying net.Conn may be corrupted if a read is already +// buffered or a read is in progress. SyncConn drains read buffers and stops background IO. In some cases this may +// require sending a ping to the server. ctx can be used to cancel this operation. This should be called before any +// operation that will use the underlying net.Conn directly. e.g. Before Conn() or Hijack(). +// +// This should not be confused with the PostgreSQL protocol Sync message. +func (pgConn *PgConn) SyncConn(ctx context.Context) error { + for i := 0; i < 10; i++ { + if pgConn.bgReader.Status() == bgreader.StatusStopped && pgConn.frontend.ReadBufferLen() == 0 { + return nil + } + + err := pgConn.Ping(ctx) + if err != nil { + return fmt.Errorf("SyncConn: Ping failed while syncing conn: %w", err) + } + } + + // This should never happen. Only way I can imagine this occurring is if the server is constantly sending data such as + // LISTEN/NOTIFY or log notifications such that we never can get an empty buffer. + return errors.New("SyncConn: conn never synchronized") +} + +// CustomData returns a map that can be used to associate custom data with the connection. +func (pgConn *PgConn) CustomData() map[string]any { + return pgConn.customData +} + // HijackedConn is the result of hijacking a connection. // // Due to the necessary exposure of internal implementation details, it is not covered by the semantic versioning // compatibility. type HijackedConn struct { - Conn net.Conn // the underlying TCP or unix domain socket connection + Conn net.Conn PID uint32 // backend pid SecretKey uint32 // key to use to send a cancel query message to the server ParameterStatuses map[string]string // parameters that have been reported by the server TxStatus byte - Frontend Frontend + Frontend *pgproto3.Frontend Config *Config + CustomData map[string]any } -// Hijack extracts the internal connection data. pgConn must be in an idle state. pgConn is unusable after hijacking. -// Hijacking is typically only useful when using pgconn to establish a connection, but taking complete control of the -// raw connection after that (e.g. a load balancer or proxy). +// Hijack extracts the internal connection data. pgConn must be in an idle state. SyncConn should be called immediately +// before Hijack. pgConn is unusable after hijacking. Hijacking is typically only useful when using pgconn to establish +// a connection, but taking complete control of the raw connection after that (e.g. a load balancer or proxy). // // Due to the necessary exposure of internal implementation details, it is not covered by the semantic versioning // compatibility. @@ -1871,12 +1942,15 @@ func (pgConn *PgConn) Hijack() (*HijackedConn, error) { TxStatus: pgConn.txStatus, Frontend: pgConn.frontend, Config: pgConn.config, + CustomData: pgConn.customData, }, nil } // Construct created a PgConn from an already established connection to a PostgreSQL server. This is the inverse of // PgConn.Hijack. The connection must be in an idle state. // +// hc.Frontend is replaced by a new pgproto3.Frontend built by hc.Config.BuildFrontend. +// // Due to the necessary exposure of internal implementation details, it is not covered by the semantic versioning // compatibility. func Construct(hc *HijackedConn) (*PgConn, error) { @@ -1888,14 +1962,385 @@ func Construct(hc *HijackedConn) (*PgConn, error) { txStatus: hc.TxStatus, frontend: hc.Frontend, config: hc.Config, + customData: hc.CustomData, status: connStatusIdle, - wbuf: make([]byte, 0, wbufLen), cleanupDone: make(chan struct{}), } - pgConn.contextWatcher = newContextWatcher(pgConn.conn) + pgConn.contextWatcher = ctxwatch.NewContextWatcher(hc.Config.BuildContextWatcherHandler(pgConn)) + pgConn.bgReader = bgreader.New(pgConn.conn) + pgConn.slowWriteTimer = time.AfterFunc(time.Duration(math.MaxInt64), + func() { + pgConn.bgReader.Start() + pgConn.bgReaderStarted <- struct{}{} + }, + ) + pgConn.slowWriteTimer.Stop() + pgConn.bgReaderStarted = make(chan struct{}) + pgConn.frontend = hc.Config.BuildFrontend(pgConn.bgReader, pgConn.conn) return pgConn, nil } + +// Pipeline represents a connection in pipeline mode. +// +// SendPrepare, SendQueryParams, and SendQueryPrepared queue requests to the server. These requests are not written until +// pipeline is flushed by Flush or Sync. Sync must be called after the last request is queued. Requests between +// synchronization points are implicitly transactional unless explicit transaction control statements have been issued. +// +// The context the pipeline was started with is in effect for the entire life of the Pipeline. +// +// For a deeper understanding of pipeline mode see the PostgreSQL documentation for the extended query protocol +// (https://www.postgresql.org/docs/current/protocol-flow.html#PROTOCOL-FLOW-EXT-QUERY) and the libpq pipeline mode +// (https://www.postgresql.org/docs/current/libpq-pipeline-mode.html). +type Pipeline struct { + conn *PgConn + ctx context.Context + + expectedReadyForQueryCount int + pendingSync bool + + err error + closed bool +} + +// PipelineSync is returned by GetResults when a ReadyForQuery message is received. +type PipelineSync struct{} + +// CloseComplete is returned by GetResults when a CloseComplete message is received. +type CloseComplete struct{} + +// StartPipeline switches the connection to pipeline mode and returns a *Pipeline. In pipeline mode requests can be sent +// to the server without waiting for a response. Close must be called on the returned *Pipeline to return the connection +// to normal mode. While in pipeline mode, no methods that communicate with the server may be called except +// CancelRequest and Close. ctx is in effect for entire life of the *Pipeline. +// +// Prefer ExecBatch when only sending one group of queries at once. +func (pgConn *PgConn) StartPipeline(ctx context.Context) *Pipeline { + if err := pgConn.lock(); err != nil { + return &Pipeline{ + closed: true, + err: err, + } + } + + pgConn.pipeline = Pipeline{ + conn: pgConn, + ctx: ctx, + } + pipeline := &pgConn.pipeline + + if ctx != context.Background() { + select { + case <-ctx.Done(): + pipeline.closed = true + pipeline.err = newContextAlreadyDoneError(ctx) + pgConn.unlock() + return pipeline + default: + } + pgConn.contextWatcher.Watch(ctx) + } + + return pipeline +} + +// SendPrepare is the pipeline version of *PgConn.Prepare. +func (p *Pipeline) SendPrepare(name, sql string, paramOIDs []uint32) { + if p.closed { + return + } + p.pendingSync = true + + p.conn.frontend.SendParse(&pgproto3.Parse{Name: name, Query: sql, ParameterOIDs: paramOIDs}) + p.conn.frontend.SendDescribe(&pgproto3.Describe{ObjectType: 'S', Name: name}) +} + +// SendDeallocate deallocates a prepared statement. +func (p *Pipeline) SendDeallocate(name string) { + if p.closed { + return + } + p.pendingSync = true + + p.conn.frontend.SendClose(&pgproto3.Close{ObjectType: 'S', Name: name}) +} + +// SendQueryParams is the pipeline version of *PgConn.QueryParams. +func (p *Pipeline) SendQueryParams(sql string, paramValues [][]byte, paramOIDs []uint32, paramFormats []int16, resultFormats []int16) { + if p.closed { + return + } + p.pendingSync = true + + p.conn.frontend.SendParse(&pgproto3.Parse{Query: sql, ParameterOIDs: paramOIDs}) + p.conn.frontend.SendBind(&pgproto3.Bind{ParameterFormatCodes: paramFormats, Parameters: paramValues, ResultFormatCodes: resultFormats}) + p.conn.frontend.SendDescribe(&pgproto3.Describe{ObjectType: 'P'}) + p.conn.frontend.SendExecute(&pgproto3.Execute{}) +} + +// SendQueryPrepared is the pipeline version of *PgConn.QueryPrepared. +func (p *Pipeline) SendQueryPrepared(stmtName string, paramValues [][]byte, paramFormats []int16, resultFormats []int16) { + if p.closed { + return + } + p.pendingSync = true + + p.conn.frontend.SendBind(&pgproto3.Bind{PreparedStatement: stmtName, ParameterFormatCodes: paramFormats, Parameters: paramValues, ResultFormatCodes: resultFormats}) + p.conn.frontend.SendDescribe(&pgproto3.Describe{ObjectType: 'P'}) + p.conn.frontend.SendExecute(&pgproto3.Execute{}) +} + +// Flush flushes the queued requests without establishing a synchronization point. +func (p *Pipeline) Flush() error { + if p.closed { + if p.err != nil { + return p.err + } + return errors.New("pipeline closed") + } + + err := p.conn.flushWithPotentialWriteReadDeadlock() + if err != nil { + err = normalizeTimeoutError(p.ctx, err) + + p.conn.asyncClose() + + p.conn.contextWatcher.Unwatch() + p.conn.unlock() + p.closed = true + p.err = err + return err + } + + return nil +} + +// Sync establishes a synchronization point and flushes the queued requests. +func (p *Pipeline) Sync() error { + if p.closed { + if p.err != nil { + return p.err + } + return errors.New("pipeline closed") + } + + p.conn.frontend.SendSync(&pgproto3.Sync{}) + err := p.Flush() + if err != nil { + return err + } + + p.pendingSync = false + p.expectedReadyForQueryCount++ + + return nil +} + +// GetResults gets the next results. If results are present, results may be a *ResultReader, *StatementDescription, or +// *PipelineSync. If an ErrorResponse is received from the server, results will be nil and err will be a *PgError. If no +// results are available, results and err will both be nil. +func (p *Pipeline) GetResults() (results any, err error) { + if p.closed { + if p.err != nil { + return nil, p.err + } + return nil, errors.New("pipeline closed") + } + + if p.expectedReadyForQueryCount == 0 { + return nil, nil + } + + return p.getResults() +} + +func (p *Pipeline) getResults() (results any, err error) { + for { + msg, err := p.conn.receiveMessage() + if err != nil { + p.closed = true + p.err = err + p.conn.asyncClose() + return nil, normalizeTimeoutError(p.ctx, err) + } + + switch msg := msg.(type) { + case *pgproto3.RowDescription: + p.conn.resultReader = ResultReader{ + pgConn: p.conn, + pipeline: p, + ctx: p.ctx, + fieldDescriptions: p.conn.convertRowDescription(p.conn.fieldDescriptions[:], msg), + } + return &p.conn.resultReader, nil + case *pgproto3.CommandComplete: + p.conn.resultReader = ResultReader{ + commandTag: p.conn.makeCommandTag(msg.CommandTag), + commandConcluded: true, + closed: true, + } + return &p.conn.resultReader, nil + case *pgproto3.ParseComplete: + peekedMsg, err := p.conn.peekMessage() + if err != nil { + p.conn.asyncClose() + return nil, normalizeTimeoutError(p.ctx, err) + } + if _, ok := peekedMsg.(*pgproto3.ParameterDescription); ok { + return p.getResultsPrepare() + } + case *pgproto3.CloseComplete: + return &CloseComplete{}, nil + case *pgproto3.ReadyForQuery: + p.expectedReadyForQueryCount-- + return &PipelineSync{}, nil + case *pgproto3.ErrorResponse: + pgErr := ErrorResponseToPgError(msg) + return nil, pgErr + } + + } +} + +func (p *Pipeline) getResultsPrepare() (*StatementDescription, error) { + psd := &StatementDescription{} + + for { + msg, err := p.conn.receiveMessage() + if err != nil { + p.conn.asyncClose() + return nil, normalizeTimeoutError(p.ctx, err) + } + + switch msg := msg.(type) { + case *pgproto3.ParameterDescription: + psd.ParamOIDs = make([]uint32, len(msg.ParameterOIDs)) + copy(psd.ParamOIDs, msg.ParameterOIDs) + case *pgproto3.RowDescription: + psd.Fields = p.conn.convertRowDescription(nil, msg) + return psd, nil + + // NoData is returned instead of RowDescription when there is no expected result. e.g. An INSERT without a RETURNING + // clause. + case *pgproto3.NoData: + return psd, nil + + // These should never happen here. But don't take chances that could lead to a deadlock. + case *pgproto3.ErrorResponse: + pgErr := ErrorResponseToPgError(msg) + return nil, pgErr + case *pgproto3.CommandComplete: + p.conn.asyncClose() + return nil, errors.New("BUG: received CommandComplete while handling Describe") + case *pgproto3.ReadyForQuery: + p.conn.asyncClose() + return nil, errors.New("BUG: received ReadyForQuery while handling Describe") + } + } +} + +// Close closes the pipeline and returns the connection to normal mode. +func (p *Pipeline) Close() error { + if p.closed { + return p.err + } + + p.closed = true + + if p.pendingSync { + p.conn.asyncClose() + p.err = errors.New("pipeline has unsynced requests") + p.conn.contextWatcher.Unwatch() + p.conn.unlock() + + return p.err + } + + for p.expectedReadyForQueryCount > 0 { + _, err := p.getResults() + if err != nil { + p.err = err + var pgErr *PgError + if !errors.As(err, &pgErr) { + p.conn.asyncClose() + break + } + } + } + + p.conn.contextWatcher.Unwatch() + p.conn.unlock() + + return p.err +} + +// DeadlineContextWatcherHandler handles canceled contexts by setting a deadline on a net.Conn. +type DeadlineContextWatcherHandler struct { + Conn net.Conn + + // DeadlineDelay is the delay to set on the deadline set on net.Conn when the context is canceled. + DeadlineDelay time.Duration +} + +func (h *DeadlineContextWatcherHandler) HandleCancel(ctx context.Context) { + h.Conn.SetDeadline(time.Now().Add(h.DeadlineDelay)) +} + +func (h *DeadlineContextWatcherHandler) HandleUnwatchAfterCancel() { + h.Conn.SetDeadline(time.Time{}) +} + +// CancelRequestContextWatcherHandler handles canceled contexts by sending a cancel request to the server. It also sets +// a deadline on a net.Conn as a fallback. +type CancelRequestContextWatcherHandler struct { + Conn *PgConn + + // CancelRequestDelay is the delay before sending the cancel request to the server. + CancelRequestDelay time.Duration + + // DeadlineDelay is the delay to set on the deadline set on net.Conn when the context is canceled. + DeadlineDelay time.Duration + + cancelFinishedChan chan struct{} + handleUnwatchAfterCancelCalled func() +} + +func (h *CancelRequestContextWatcherHandler) HandleCancel(context.Context) { + h.cancelFinishedChan = make(chan struct{}) + var handleUnwatchedAfterCancelCalledCtx context.Context + handleUnwatchedAfterCancelCalledCtx, h.handleUnwatchAfterCancelCalled = context.WithCancel(context.Background()) + + deadline := time.Now().Add(h.DeadlineDelay) + h.Conn.conn.SetDeadline(deadline) + + go func() { + defer close(h.cancelFinishedChan) + + select { + case <-handleUnwatchedAfterCancelCalledCtx.Done(): + return + case <-time.After(h.CancelRequestDelay): + } + + cancelRequestCtx, cancel := context.WithDeadline(handleUnwatchedAfterCancelCalledCtx, deadline) + defer cancel() + h.Conn.CancelRequest(cancelRequestCtx) + + // CancelRequest is inherently racy. Even though the cancel request has been received by the server at this point, + // it hasn't necessarily been delivered to the other connection. If we immediately return and the connection is + // immediately used then it is possible the CancelRequest will actually cancel our next query. The + // TestCancelRequestContextWatcherHandler Stress test can produce this error without the sleep below. The sleep time + // is arbitrary, but should be sufficient to prevent this error case. + time.Sleep(100 * time.Millisecond) + }() +} + +func (h *CancelRequestContextWatcherHandler) HandleUnwatchAfterCancel() { + h.handleUnwatchAfterCancelCalled() + <-h.cancelFinishedChan + + h.Conn.conn.SetDeadline(time.Time{}) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgproto3/README.md b/vendor/github.com/jackc/pgx/v5/pgproto3/README.md new file mode 100644 index 00000000..7a26f1cb --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/README.md @@ -0,0 +1,7 @@ +# pgproto3 + +Package pgproto3 is an encoder and decoder of the PostgreSQL wire protocol version 3. + +pgproto3 can be used as a foundation for PostgreSQL drivers, proxies, mock servers, load balancers and more. + +See example/pgfortune for a playful example of a fake PostgreSQL server. diff --git a/vendor/github.com/jackc/pgproto3/v2/authentication_cleartext_password.go b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_cleartext_password.go similarity index 97% rename from vendor/github.com/jackc/pgproto3/v2/authentication_cleartext_password.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/authentication_cleartext_password.go index 1ec219bc..ac2962e9 100644 --- a/vendor/github.com/jackc/pgproto3/v2/authentication_cleartext_password.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_cleartext_password.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) // AuthenticationCleartextPassword is a message sent from the backend indicating that a clear-text password is required. diff --git a/vendor/github.com/jackc/pgproto3/v2/authentication_gss.go b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_gss.go similarity index 96% rename from vendor/github.com/jackc/pgproto3/v2/authentication_gss.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/authentication_gss.go index 425be6ef..178ef31d 100644 --- a/vendor/github.com/jackc/pgproto3/v2/authentication_gss.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_gss.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type AuthenticationGSS struct{} diff --git a/vendor/github.com/jackc/pgproto3/v2/authentication_gss_continue.go b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_gss_continue.go similarity index 96% rename from vendor/github.com/jackc/pgproto3/v2/authentication_gss_continue.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/authentication_gss_continue.go index 42a70daf..2ba3f3b3 100644 --- a/vendor/github.com/jackc/pgproto3/v2/authentication_gss_continue.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_gss_continue.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type AuthenticationGSSContinue struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/authentication_md5_password.go b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_md5_password.go similarity index 97% rename from vendor/github.com/jackc/pgproto3/v2/authentication_md5_password.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/authentication_md5_password.go index 9c0f5ee0..854c6404 100644 --- a/vendor/github.com/jackc/pgproto3/v2/authentication_md5_password.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_md5_password.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) // AuthenticationMD5Password is a message sent from the backend indicating that an MD5 hashed password is required. diff --git a/vendor/github.com/jackc/pgproto3/v2/authentication_ok.go b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_ok.go similarity index 97% rename from vendor/github.com/jackc/pgproto3/v2/authentication_ok.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/authentication_ok.go index 021f820f..ec11d39f 100644 --- a/vendor/github.com/jackc/pgproto3/v2/authentication_ok.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_ok.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) // AuthenticationOk is a message sent from the backend indicating that authentication was successful. diff --git a/vendor/github.com/jackc/pgproto3/v2/authentication_sasl.go b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl.go similarity index 86% rename from vendor/github.com/jackc/pgproto3/v2/authentication_sasl.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl.go index b56461cd..e66580f4 100644 --- a/vendor/github.com/jackc/pgproto3/v2/authentication_sasl.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl.go @@ -6,7 +6,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) // AuthenticationSASL is a message sent from the backend indicating that SASL authentication is required. @@ -36,10 +36,11 @@ func (dst *AuthenticationSASL) Decode(src []byte) error { authMechanisms := src[4:] for len(authMechanisms) > 1 { idx := bytes.IndexByte(authMechanisms, 0) - if idx > 0 { - dst.AuthMechanisms = append(dst.AuthMechanisms, string(authMechanisms[:idx])) - authMechanisms = authMechanisms[idx+1:] + if idx == -1 { + return &invalidMessageFormatErr{messageType: "AuthenticationSASL", details: "unterminated string"} } + dst.AuthMechanisms = append(dst.AuthMechanisms, string(authMechanisms[:idx])) + authMechanisms = authMechanisms[idx+1:] } return nil diff --git a/vendor/github.com/jackc/pgproto3/v2/authentication_sasl_continue.go b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl_continue.go similarity index 97% rename from vendor/github.com/jackc/pgproto3/v2/authentication_sasl_continue.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl_continue.go index d405b129..70fba4a6 100644 --- a/vendor/github.com/jackc/pgproto3/v2/authentication_sasl_continue.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl_continue.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) // AuthenticationSASLContinue is a message sent from the backend containing a SASL challenge. diff --git a/vendor/github.com/jackc/pgproto3/v2/authentication_sasl_final.go b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl_final.go similarity index 97% rename from vendor/github.com/jackc/pgproto3/v2/authentication_sasl_final.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl_final.go index c34ac4e6..84976c2a 100644 --- a/vendor/github.com/jackc/pgproto3/v2/authentication_sasl_final.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/authentication_sasl_final.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) // AuthenticationSASLFinal is a message sent from the backend indicating a SASL authentication has completed. diff --git a/vendor/github.com/jackc/pgproto3/v2/backend.go b/vendor/github.com/jackc/pgx/v5/pgproto3/backend.go similarity index 66% rename from vendor/github.com/jackc/pgproto3/v2/backend.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/backend.go index 6eabcd85..d146c338 100644 --- a/vendor/github.com/jackc/pgproto3/v2/backend.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/backend.go @@ -1,17 +1,24 @@ package pgproto3 import ( + "bytes" "encoding/binary" - "errors" "fmt" "io" ) // Backend acts as a server for the PostgreSQL wire protocol version 3. type Backend struct { - cr ChunkReader + cr *chunkReader w io.Writer + // tracer is used to trace messages when Send or Receive is called. This means an outbound message is traced + // before it is actually transmitted (i.e. before Flush). + tracer *tracer + + wbuf []byte + encodeError error + // Frontend message flyweights bind Bind cancelRequest CancelRequest @@ -32,6 +39,7 @@ type Backend struct { terminate Terminate bodyLen int + maxBodyLen int // maxBodyLen is the maximum length of a message body in octets. If a message body exceeds this length, Receive will return an error. msgType byte partialMsg bool authType uint32 @@ -43,19 +51,68 @@ const ( ) // NewBackend creates a new Backend. -func NewBackend(cr ChunkReader, w io.Writer) *Backend { +func NewBackend(r io.Reader, w io.Writer) *Backend { + cr := newChunkReader(r, 0) return &Backend{cr: cr, w: w} } -// Send sends a message to the frontend. -func (b *Backend) Send(msg BackendMessage) error { - buf, err := msg.Encode(nil) +// Send sends a message to the frontend (i.e. the client). The message is buffered until Flush is called. Any error +// encountered will be returned from Flush. +func (b *Backend) Send(msg BackendMessage) { + if b.encodeError != nil { + return + } + + prevLen := len(b.wbuf) + newBuf, err := msg.Encode(b.wbuf) + if err != nil { + b.encodeError = err + return + } + b.wbuf = newBuf + + if b.tracer != nil { + b.tracer.traceMessage('B', int32(len(b.wbuf)-prevLen), msg) + } +} + +// Flush writes any pending messages to the frontend (i.e. the client). +func (b *Backend) Flush() error { + if err := b.encodeError; err != nil { + b.encodeError = nil + b.wbuf = b.wbuf[:0] + return &writeError{err: err, safeToRetry: true} + } + + n, err := b.w.Write(b.wbuf) + + const maxLen = 1024 + if len(b.wbuf) > maxLen { + b.wbuf = make([]byte, 0, maxLen) + } else { + b.wbuf = b.wbuf[:0] + } + if err != nil { - return err + return &writeError{err: err, safeToRetry: n == 0} } - _, err = b.w.Write(buf) - return err + return nil +} + +// Trace starts tracing the message traffic to w. It writes in a similar format to that produced by the libpq function +// PQtrace. +func (b *Backend) Trace(w io.Writer, options TracerOptions) { + b.tracer = &tracer{ + w: w, + buf: &bytes.Buffer{}, + TracerOptions: options, + } +} + +// Untrace stops tracing. +func (b *Backend) Untrace() { + b.tracer = nil } // ReceiveStartupMessage receives the initial connection message. This method is used of the normal Receive method @@ -119,10 +176,10 @@ func (b *Backend) Receive() (FrontendMessage, error) { b.msgType = header[0] b.bodyLen = int(binary.BigEndian.Uint32(header[1:])) - 4 - b.partialMsg = true - if b.bodyLen < 0 { - return nil, errors.New("invalid message with negative body length received") + if b.maxBodyLen > 0 && b.bodyLen > b.maxBodyLen { + return nil, &ExceededMaxBodyLenErr{b.maxBodyLen, b.bodyLen} } + b.partialMsg = true } var msg FrontendMessage @@ -160,7 +217,7 @@ func (b *Backend) Receive() (FrontendMessage, error) { case AuthTypeCleartextPassword, AuthTypeMD5Password: fallthrough default: - // to maintain backwards compatability + // to maintain backwards compatibility msg = &PasswordMessage{} } case 'Q': @@ -181,7 +238,15 @@ func (b *Backend) Receive() (FrontendMessage, error) { b.partialMsg = false err = msg.Decode(msgBody) - return msg, err + if err != nil { + return nil, err + } + + if b.tracer != nil { + b.tracer.traceMessage('F', int32(5+len(msgBody)), msg) + } + + return msg, nil } // SetAuthType sets the authentication type in the backend. @@ -216,3 +281,12 @@ func (b *Backend) SetAuthType(authType uint32) error { return nil } + +// SetMaxBodyLen sets the maximum length of a message body in octets. If a message body exceeds this length, Receive will return +// an error. This is useful for protecting against malicious clients that send large messages with the intent of +// causing memory exhaustion. +// The default value is 0. +// If maxBodyLen is 0, then no maximum is enforced. +func (b *Backend) SetMaxBodyLen(maxBodyLen int) { + b.maxBodyLen = maxBodyLen +} diff --git a/vendor/github.com/jackc/pgproto3/v2/backend_key_data.go b/vendor/github.com/jackc/pgx/v5/pgproto3/backend_key_data.go similarity index 97% rename from vendor/github.com/jackc/pgproto3/v2/backend_key_data.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/backend_key_data.go index 0a3d5e55..23f5da67 100644 --- a/vendor/github.com/jackc/pgproto3/v2/backend_key_data.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/backend_key_data.go @@ -4,7 +4,7 @@ import ( "encoding/binary" "encoding/json" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type BackendKeyData struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/big_endian.go b/vendor/github.com/jackc/pgx/v5/pgproto3/big_endian.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/big_endian.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/big_endian.go diff --git a/vendor/github.com/jackc/pgproto3/v2/bind.go b/vendor/github.com/jackc/pgx/v5/pgproto3/bind.go similarity index 99% rename from vendor/github.com/jackc/pgproto3/v2/bind.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/bind.go index dd5503b1..ad6ac48b 100644 --- a/vendor/github.com/jackc/pgproto3/v2/bind.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/bind.go @@ -9,7 +9,7 @@ import ( "fmt" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type Bind struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/bind_complete.go b/vendor/github.com/jackc/pgx/v5/pgproto3/bind_complete.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/bind_complete.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/bind_complete.go diff --git a/vendor/github.com/jackc/pgproto3/v2/cancel_request.go b/vendor/github.com/jackc/pgx/v5/pgproto3/cancel_request.go similarity index 96% rename from vendor/github.com/jackc/pgproto3/v2/cancel_request.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/cancel_request.go index 76acb3fc..6b52dd97 100644 --- a/vendor/github.com/jackc/pgproto3/v2/cancel_request.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/cancel_request.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) const cancelRequestCode = 80877102 diff --git a/vendor/github.com/jackc/pgx/v5/pgproto3/chunkreader.go b/vendor/github.com/jackc/pgx/v5/pgproto3/chunkreader.go new file mode 100644 index 00000000..fc0fa61e --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/chunkreader.go @@ -0,0 +1,90 @@ +package pgproto3 + +import ( + "io" + + "github.com/jackc/pgx/v5/internal/iobufpool" +) + +// chunkReader is a io.Reader wrapper that minimizes IO reads and memory allocations. It allocates memory in chunks and +// will read as much as will fit in the current buffer in a single call regardless of how large a read is actually +// requested. The memory returned via Next is only valid until the next call to Next. +// +// This is roughly equivalent to a bufio.Reader that only uses Peek and Discard to never copy bytes. +type chunkReader struct { + r io.Reader + + buf *[]byte + rp, wp int // buf read position and write position + + minBufSize int +} + +// newChunkReader creates and returns a new chunkReader for r with default configuration. If minBufSize is <= 0 it uses +// a default value. +func newChunkReader(r io.Reader, minBufSize int) *chunkReader { + if minBufSize <= 0 { + // By historical reasons Postgres currently has 8KB send buffer inside, + // so here we want to have at least the same size buffer. + // @see https://github.com/postgres/postgres/blob/249d64999615802752940e017ee5166e726bc7cd/src/backend/libpq/pqcomm.c#L134 + // @see https://www.postgresql.org/message-id/0cdc5485-cb3c-5e16-4a46-e3b2f7a41322%40ya.ru + // + // In addition, testing has found no benefit of any larger buffer. + minBufSize = 8192 + } + + return &chunkReader{ + r: r, + minBufSize: minBufSize, + buf: iobufpool.Get(minBufSize), + } +} + +// Next returns buf filled with the next n bytes. buf is only valid until next call of Next. If an error occurs, buf +// will be nil. +func (r *chunkReader) Next(n int) (buf []byte, err error) { + // Reset the buffer if it is empty + if r.rp == r.wp { + if len(*r.buf) != r.minBufSize { + iobufpool.Put(r.buf) + r.buf = iobufpool.Get(r.minBufSize) + } + r.rp = 0 + r.wp = 0 + } + + // n bytes already in buf + if (r.wp - r.rp) >= n { + buf = (*r.buf)[r.rp : r.rp+n : r.rp+n] + r.rp += n + return buf, err + } + + // buf is smaller than requested number of bytes + if len(*r.buf) < n { + bigBuf := iobufpool.Get(n) + r.wp = copy((*bigBuf), (*r.buf)[r.rp:r.wp]) + r.rp = 0 + iobufpool.Put(r.buf) + r.buf = bigBuf + } + + // buf is large enough, but need to shift filled area to start to make enough contiguous space + minReadCount := n - (r.wp - r.rp) + if (len(*r.buf) - r.wp) < minReadCount { + r.wp = copy((*r.buf), (*r.buf)[r.rp:r.wp]) + r.rp = 0 + } + + // Read at least the required number of bytes from the underlying io.Reader + readBytesCount, err := io.ReadAtLeast(r.r, (*r.buf)[r.wp:], minReadCount) + r.wp += readBytesCount + // fmt.Println("read", n) + if err != nil { + return nil, err + } + + buf = (*r.buf)[r.rp : r.rp+n : r.rp+n] + r.rp += n + return buf, nil +} diff --git a/vendor/github.com/jackc/pgproto3/v2/close.go b/vendor/github.com/jackc/pgx/v5/pgproto3/close.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/close.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/close.go diff --git a/vendor/github.com/jackc/pgproto3/v2/close_complete.go b/vendor/github.com/jackc/pgx/v5/pgproto3/close_complete.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/close_complete.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/close_complete.go diff --git a/vendor/github.com/jackc/pgproto3/v2/command_complete.go b/vendor/github.com/jackc/pgx/v5/pgproto3/command_complete.go similarity index 90% rename from vendor/github.com/jackc/pgproto3/v2/command_complete.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/command_complete.go index 9d822064..eba70947 100644 --- a/vendor/github.com/jackc/pgproto3/v2/command_complete.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/command_complete.go @@ -16,8 +16,11 @@ func (*CommandComplete) Backend() {} // type identifier and 4 byte message length. func (dst *CommandComplete) Decode(src []byte) error { idx := bytes.IndexByte(src, 0) + if idx == -1 { + return &invalidMessageFormatErr{messageType: "CommandComplete", details: "unterminated string"} + } if idx != len(src)-1 { - return &invalidMessageFormatErr{messageType: "CommandComplete"} + return &invalidMessageFormatErr{messageType: "CommandComplete", details: "string terminated too early"} } dst.CommandTag = src[:idx] diff --git a/vendor/github.com/jackc/pgproto3/v2/copy_both_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_both_response.go similarity index 98% rename from vendor/github.com/jackc/pgproto3/v2/copy_both_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/copy_both_response.go index 4bf3ef32..99e1afea 100644 --- a/vendor/github.com/jackc/pgproto3/v2/copy_both_response.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_both_response.go @@ -7,7 +7,7 @@ import ( "errors" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type CopyBothResponse struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/copy_data.go b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_data.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/copy_data.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/copy_data.go diff --git a/vendor/github.com/jackc/pgproto3/v2/copy_done.go b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_done.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/copy_done.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/copy_done.go diff --git a/vendor/github.com/jackc/pgproto3/v2/copy_fail.go b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_fail.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/copy_fail.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/copy_fail.go diff --git a/vendor/github.com/jackc/pgproto3/v2/copy_in_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_in_response.go similarity index 98% rename from vendor/github.com/jackc/pgproto3/v2/copy_in_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/copy_in_response.go index bfc3ee07..06cf99ce 100644 --- a/vendor/github.com/jackc/pgproto3/v2/copy_in_response.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_in_response.go @@ -7,7 +7,7 @@ import ( "errors" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type CopyInResponse struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/copy_out_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_out_response.go similarity index 98% rename from vendor/github.com/jackc/pgproto3/v2/copy_out_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/copy_out_response.go index 265e35f9..549e916c 100644 --- a/vendor/github.com/jackc/pgproto3/v2/copy_out_response.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/copy_out_response.go @@ -7,7 +7,7 @@ import ( "errors" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type CopyOutResponse struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/data_row.go b/vendor/github.com/jackc/pgx/v5/pgproto3/data_row.go similarity index 92% rename from vendor/github.com/jackc/pgproto3/v2/data_row.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/data_row.go index d755515c..fdfb0f7f 100644 --- a/vendor/github.com/jackc/pgproto3/v2/data_row.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/data_row.go @@ -7,7 +7,7 @@ import ( "errors" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type DataRow struct { @@ -45,19 +45,19 @@ func (dst *DataRow) Decode(src []byte) error { return &invalidMessageFormatErr{messageType: "DataRow"} } - msgSize := int(int32(binary.BigEndian.Uint32(src[rp:]))) + valueLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) rp += 4 // null - if msgSize == -1 { + if valueLen == -1 { dst.Values[i] = nil } else { - if len(src[rp:]) < msgSize { + if len(src[rp:]) < valueLen || valueLen < 0 { return &invalidMessageFormatErr{messageType: "DataRow"} } - dst.Values[i] = src[rp : rp+msgSize : rp+msgSize] - rp += msgSize + dst.Values[i] = src[rp : rp+valueLen : rp+valueLen] + rp += valueLen } } diff --git a/vendor/github.com/jackc/pgproto3/v2/describe.go b/vendor/github.com/jackc/pgx/v5/pgproto3/describe.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/describe.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/describe.go diff --git a/vendor/github.com/jackc/pgx/v5/pgproto3/doc.go b/vendor/github.com/jackc/pgx/v5/pgproto3/doc.go new file mode 100644 index 00000000..0afd18e2 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/doc.go @@ -0,0 +1,11 @@ +// Package pgproto3 is an encoder and decoder of the PostgreSQL wire protocol version 3. +// +// The primary interfaces are Frontend and Backend. They correspond to a client and server respectively. Messages are +// sent with Send (or a specialized Send variant). Messages are automatically buffered to minimize small writes. Call +// Flush to ensure a message has actually been sent. +// +// The Trace method of Frontend and Backend can be used to examine the wire-level message traffic. It outputs in a +// similar format to the PQtrace function in libpq. +// +// See https://www.postgresql.org/docs/current/protocol-message-formats.html for meanings of the different messages. +package pgproto3 diff --git a/vendor/github.com/jackc/pgproto3/v2/empty_query_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/empty_query_response.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/empty_query_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/empty_query_response.go diff --git a/vendor/github.com/jackc/pgproto3/v2/error_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/error_response.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/error_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/error_response.go diff --git a/vendor/github.com/jackc/pgproto3/v2/execute.go b/vendor/github.com/jackc/pgx/v5/pgproto3/execute.go similarity index 97% rename from vendor/github.com/jackc/pgproto3/v2/execute.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/execute.go index efb9e1e2..31bc714d 100644 --- a/vendor/github.com/jackc/pgproto3/v2/execute.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/execute.go @@ -5,7 +5,7 @@ import ( "encoding/binary" "encoding/json" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type Execute struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/flush.go b/vendor/github.com/jackc/pgx/v5/pgproto3/flush.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/flush.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/flush.go diff --git a/vendor/github.com/jackc/pgx/v5/pgproto3/frontend.go b/vendor/github.com/jackc/pgx/v5/pgproto3/frontend.go new file mode 100644 index 00000000..b41abbe1 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/frontend.go @@ -0,0 +1,454 @@ +package pgproto3 + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" +) + +// Frontend acts as a client for the PostgreSQL wire protocol version 3. +type Frontend struct { + cr *chunkReader + w io.Writer + + // tracer is used to trace messages when Send or Receive is called. This means an outbound message is traced + // before it is actually transmitted (i.e. before Flush). It is safe to change this variable when the Frontend is + // idle. Setting and unsetting tracer provides equivalent functionality to PQtrace and PQuntrace in libpq. + tracer *tracer + + wbuf []byte + encodeError error + + // Backend message flyweights + authenticationOk AuthenticationOk + authenticationCleartextPassword AuthenticationCleartextPassword + authenticationMD5Password AuthenticationMD5Password + authenticationGSS AuthenticationGSS + authenticationGSSContinue AuthenticationGSSContinue + authenticationSASL AuthenticationSASL + authenticationSASLContinue AuthenticationSASLContinue + authenticationSASLFinal AuthenticationSASLFinal + backendKeyData BackendKeyData + bindComplete BindComplete + closeComplete CloseComplete + commandComplete CommandComplete + copyBothResponse CopyBothResponse + copyData CopyData + copyInResponse CopyInResponse + copyOutResponse CopyOutResponse + copyDone CopyDone + dataRow DataRow + emptyQueryResponse EmptyQueryResponse + errorResponse ErrorResponse + functionCallResponse FunctionCallResponse + noData NoData + noticeResponse NoticeResponse + notificationResponse NotificationResponse + parameterDescription ParameterDescription + parameterStatus ParameterStatus + parseComplete ParseComplete + readyForQuery ReadyForQuery + rowDescription RowDescription + portalSuspended PortalSuspended + + bodyLen int + msgType byte + partialMsg bool + authType uint32 +} + +// NewFrontend creates a new Frontend. +func NewFrontend(r io.Reader, w io.Writer) *Frontend { + cr := newChunkReader(r, 0) + return &Frontend{cr: cr, w: w} +} + +// Send sends a message to the backend (i.e. the server). The message is buffered until Flush is called. Any error +// encountered will be returned from Flush. +// +// Send can work with any FrontendMessage. Some commonly used message types such as Bind have specialized send methods +// such as SendBind. These methods should be preferred when the type of message is known up front (e.g. when building an +// extended query protocol query) as they may be faster due to knowing the type of msg rather than it being hidden +// behind an interface. +func (f *Frontend) Send(msg FrontendMessage) { + if f.encodeError != nil { + return + } + + prevLen := len(f.wbuf) + newBuf, err := msg.Encode(f.wbuf) + if err != nil { + f.encodeError = err + return + } + f.wbuf = newBuf + + if f.tracer != nil { + f.tracer.traceMessage('F', int32(len(f.wbuf)-prevLen), msg) + } +} + +// Flush writes any pending messages to the backend (i.e. the server). +func (f *Frontend) Flush() error { + if err := f.encodeError; err != nil { + f.encodeError = nil + f.wbuf = f.wbuf[:0] + return &writeError{err: err, safeToRetry: true} + } + + if len(f.wbuf) == 0 { + return nil + } + + n, err := f.w.Write(f.wbuf) + + const maxLen = 1024 + if len(f.wbuf) > maxLen { + f.wbuf = make([]byte, 0, maxLen) + } else { + f.wbuf = f.wbuf[:0] + } + + if err != nil { + return &writeError{err: err, safeToRetry: n == 0} + } + + return nil +} + +// Trace starts tracing the message traffic to w. It writes in a similar format to that produced by the libpq function +// PQtrace. +func (f *Frontend) Trace(w io.Writer, options TracerOptions) { + f.tracer = &tracer{ + w: w, + buf: &bytes.Buffer{}, + TracerOptions: options, + } +} + +// Untrace stops tracing. +func (f *Frontend) Untrace() { + f.tracer = nil +} + +// SendBind sends a Bind message to the backend (i.e. the server). The message is buffered until Flush is called. Any +// error encountered will be returned from Flush. +func (f *Frontend) SendBind(msg *Bind) { + if f.encodeError != nil { + return + } + + prevLen := len(f.wbuf) + newBuf, err := msg.Encode(f.wbuf) + if err != nil { + f.encodeError = err + return + } + f.wbuf = newBuf + + if f.tracer != nil { + f.tracer.traceBind('F', int32(len(f.wbuf)-prevLen), msg) + } +} + +// SendParse sends a Parse message to the backend (i.e. the server). The message is buffered until Flush is called. Any +// error encountered will be returned from Flush. +func (f *Frontend) SendParse(msg *Parse) { + if f.encodeError != nil { + return + } + + prevLen := len(f.wbuf) + newBuf, err := msg.Encode(f.wbuf) + if err != nil { + f.encodeError = err + return + } + f.wbuf = newBuf + + if f.tracer != nil { + f.tracer.traceParse('F', int32(len(f.wbuf)-prevLen), msg) + } +} + +// SendClose sends a Close message to the backend (i.e. the server). The message is buffered until Flush is called. Any +// error encountered will be returned from Flush. +func (f *Frontend) SendClose(msg *Close) { + if f.encodeError != nil { + return + } + + prevLen := len(f.wbuf) + newBuf, err := msg.Encode(f.wbuf) + if err != nil { + f.encodeError = err + return + } + f.wbuf = newBuf + + if f.tracer != nil { + f.tracer.traceClose('F', int32(len(f.wbuf)-prevLen), msg) + } +} + +// SendDescribe sends a Describe message to the backend (i.e. the server). The message is buffered until Flush is +// called. Any error encountered will be returned from Flush. +func (f *Frontend) SendDescribe(msg *Describe) { + if f.encodeError != nil { + return + } + + prevLen := len(f.wbuf) + newBuf, err := msg.Encode(f.wbuf) + if err != nil { + f.encodeError = err + return + } + f.wbuf = newBuf + + if f.tracer != nil { + f.tracer.traceDescribe('F', int32(len(f.wbuf)-prevLen), msg) + } +} + +// SendExecute sends an Execute message to the backend (i.e. the server). The message is buffered until Flush is called. +// Any error encountered will be returned from Flush. +func (f *Frontend) SendExecute(msg *Execute) { + if f.encodeError != nil { + return + } + + prevLen := len(f.wbuf) + newBuf, err := msg.Encode(f.wbuf) + if err != nil { + f.encodeError = err + return + } + f.wbuf = newBuf + + if f.tracer != nil { + f.tracer.TraceQueryute('F', int32(len(f.wbuf)-prevLen), msg) + } +} + +// SendSync sends a Sync message to the backend (i.e. the server). The message is buffered until Flush is called. Any +// error encountered will be returned from Flush. +func (f *Frontend) SendSync(msg *Sync) { + if f.encodeError != nil { + return + } + + prevLen := len(f.wbuf) + newBuf, err := msg.Encode(f.wbuf) + if err != nil { + f.encodeError = err + return + } + f.wbuf = newBuf + + if f.tracer != nil { + f.tracer.traceSync('F', int32(len(f.wbuf)-prevLen), msg) + } +} + +// SendQuery sends a Query message to the backend (i.e. the server). The message is buffered until Flush is called. Any +// error encountered will be returned from Flush. +func (f *Frontend) SendQuery(msg *Query) { + if f.encodeError != nil { + return + } + + prevLen := len(f.wbuf) + newBuf, err := msg.Encode(f.wbuf) + if err != nil { + f.encodeError = err + return + } + f.wbuf = newBuf + + if f.tracer != nil { + f.tracer.traceQuery('F', int32(len(f.wbuf)-prevLen), msg) + } +} + +// SendUnbufferedEncodedCopyData immediately sends an encoded CopyData message to the backend (i.e. the server). This method +// is more efficient than sending a CopyData message with Send as the message data is not copied to the internal buffer +// before being written out. The internal buffer is flushed before the message is sent. +func (f *Frontend) SendUnbufferedEncodedCopyData(msg []byte) error { + err := f.Flush() + if err != nil { + return err + } + + n, err := f.w.Write(msg) + if err != nil { + return &writeError{err: err, safeToRetry: n == 0} + } + + if f.tracer != nil { + f.tracer.traceCopyData('F', int32(len(msg)-1), &CopyData{}) + } + + return nil +} + +func translateEOFtoErrUnexpectedEOF(err error) error { + if err == io.EOF { + return io.ErrUnexpectedEOF + } + return err +} + +// Receive receives a message from the backend. The returned message is only valid until the next call to Receive. +func (f *Frontend) Receive() (BackendMessage, error) { + if !f.partialMsg { + header, err := f.cr.Next(5) + if err != nil { + return nil, translateEOFtoErrUnexpectedEOF(err) + } + + f.msgType = header[0] + + msgLength := int(binary.BigEndian.Uint32(header[1:])) + if msgLength < 4 { + return nil, fmt.Errorf("invalid message length: %d", msgLength) + } + + f.bodyLen = msgLength - 4 + f.partialMsg = true + } + + msgBody, err := f.cr.Next(f.bodyLen) + if err != nil { + return nil, translateEOFtoErrUnexpectedEOF(err) + } + + f.partialMsg = false + + var msg BackendMessage + switch f.msgType { + case '1': + msg = &f.parseComplete + case '2': + msg = &f.bindComplete + case '3': + msg = &f.closeComplete + case 'A': + msg = &f.notificationResponse + case 'c': + msg = &f.copyDone + case 'C': + msg = &f.commandComplete + case 'd': + msg = &f.copyData + case 'D': + msg = &f.dataRow + case 'E': + msg = &f.errorResponse + case 'G': + msg = &f.copyInResponse + case 'H': + msg = &f.copyOutResponse + case 'I': + msg = &f.emptyQueryResponse + case 'K': + msg = &f.backendKeyData + case 'n': + msg = &f.noData + case 'N': + msg = &f.noticeResponse + case 'R': + var err error + msg, err = f.findAuthenticationMessageType(msgBody) + if err != nil { + return nil, err + } + case 's': + msg = &f.portalSuspended + case 'S': + msg = &f.parameterStatus + case 't': + msg = &f.parameterDescription + case 'T': + msg = &f.rowDescription + case 'V': + msg = &f.functionCallResponse + case 'W': + msg = &f.copyBothResponse + case 'Z': + msg = &f.readyForQuery + default: + return nil, fmt.Errorf("unknown message type: %c", f.msgType) + } + + err = msg.Decode(msgBody) + if err != nil { + return nil, err + } + + if f.tracer != nil { + f.tracer.traceMessage('B', int32(5+len(msgBody)), msg) + } + + return msg, nil +} + +// Authentication message type constants. +// See src/include/libpq/pqcomm.h for all +// constants. +const ( + AuthTypeOk = 0 + AuthTypeCleartextPassword = 3 + AuthTypeMD5Password = 5 + AuthTypeSCMCreds = 6 + AuthTypeGSS = 7 + AuthTypeGSSCont = 8 + AuthTypeSSPI = 9 + AuthTypeSASL = 10 + AuthTypeSASLContinue = 11 + AuthTypeSASLFinal = 12 +) + +func (f *Frontend) findAuthenticationMessageType(src []byte) (BackendMessage, error) { + if len(src) < 4 { + return nil, errors.New("authentication message too short") + } + f.authType = binary.BigEndian.Uint32(src[:4]) + + switch f.authType { + case AuthTypeOk: + return &f.authenticationOk, nil + case AuthTypeCleartextPassword: + return &f.authenticationCleartextPassword, nil + case AuthTypeMD5Password: + return &f.authenticationMD5Password, nil + case AuthTypeSCMCreds: + return nil, errors.New("AuthTypeSCMCreds is unimplemented") + case AuthTypeGSS: + return &f.authenticationGSS, nil + case AuthTypeGSSCont: + return &f.authenticationGSSContinue, nil + case AuthTypeSSPI: + return nil, errors.New("AuthTypeSSPI is unimplemented") + case AuthTypeSASL: + return &f.authenticationSASL, nil + case AuthTypeSASLContinue: + return &f.authenticationSASLContinue, nil + case AuthTypeSASLFinal: + return &f.authenticationSASLFinal, nil + default: + return nil, fmt.Errorf("unknown authentication type: %d", f.authType) + } +} + +// GetAuthType returns the authType used in the current state of the frontend. +// See SetAuthType for more information. +func (f *Frontend) GetAuthType() uint32 { + return f.authType +} + +func (f *Frontend) ReadBufferLen() int { + return f.cr.wp - f.cr.rp +} diff --git a/vendor/github.com/jackc/pgproto3/v2/function_call.go b/vendor/github.com/jackc/pgx/v5/pgproto3/function_call.go similarity index 98% rename from vendor/github.com/jackc/pgproto3/v2/function_call.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/function_call.go index 5d799c4d..7d83579f 100644 --- a/vendor/github.com/jackc/pgproto3/v2/function_call.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/function_call.go @@ -5,7 +5,7 @@ import ( "errors" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type FunctionCall struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/function_call_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/function_call_response.go similarity index 98% rename from vendor/github.com/jackc/pgproto3/v2/function_call_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/function_call_response.go index abc14f0d..1f273495 100644 --- a/vendor/github.com/jackc/pgproto3/v2/function_call_response.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/function_call_response.go @@ -5,7 +5,7 @@ import ( "encoding/hex" "encoding/json" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type FunctionCallResponse struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/gss_enc_request.go b/vendor/github.com/jackc/pgx/v5/pgproto3/gss_enc_request.go similarity index 96% rename from vendor/github.com/jackc/pgproto3/v2/gss_enc_request.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/gss_enc_request.go index f6e4f662..70cb20cd 100644 --- a/vendor/github.com/jackc/pgproto3/v2/gss_enc_request.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/gss_enc_request.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) const gssEncReqNumber = 80877104 diff --git a/vendor/github.com/jackc/pgproto3/v2/gss_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/gss_response.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/gss_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/gss_response.go diff --git a/vendor/github.com/jackc/pgproto3/v2/no_data.go b/vendor/github.com/jackc/pgx/v5/pgproto3/no_data.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/no_data.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/no_data.go diff --git a/vendor/github.com/jackc/pgproto3/v2/notice_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/notice_response.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/notice_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/notice_response.go diff --git a/vendor/github.com/jackc/pgproto3/v2/notification_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/notification_response.go similarity index 90% rename from vendor/github.com/jackc/pgproto3/v2/notification_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/notification_response.go index 5be3edd3..243b6bf7 100644 --- a/vendor/github.com/jackc/pgproto3/v2/notification_response.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/notification_response.go @@ -5,7 +5,7 @@ import ( "encoding/binary" "encoding/json" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type NotificationResponse struct { @@ -22,6 +22,10 @@ func (*NotificationResponse) Backend() {} func (dst *NotificationResponse) Decode(src []byte) error { buf := bytes.NewBuffer(src) + if buf.Len() < 4 { + return &invalidMessageFormatErr{messageType: "NotificationResponse", details: "too short"} + } + pid := binary.BigEndian.Uint32(buf.Next(4)) b, err := buf.ReadBytes(0) diff --git a/vendor/github.com/jackc/pgproto3/v2/parameter_description.go b/vendor/github.com/jackc/pgx/v5/pgproto3/parameter_description.go similarity index 97% rename from vendor/github.com/jackc/pgproto3/v2/parameter_description.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/parameter_description.go index fec0fce8..1ef27b75 100644 --- a/vendor/github.com/jackc/pgproto3/v2/parameter_description.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/parameter_description.go @@ -7,7 +7,7 @@ import ( "errors" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type ParameterDescription struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/parameter_status.go b/vendor/github.com/jackc/pgx/v5/pgproto3/parameter_status.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/parameter_status.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/parameter_status.go diff --git a/vendor/github.com/jackc/pgproto3/v2/parse.go b/vendor/github.com/jackc/pgx/v5/pgproto3/parse.go similarity index 98% rename from vendor/github.com/jackc/pgproto3/v2/parse.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/parse.go index 7dd06990..6ba3486c 100644 --- a/vendor/github.com/jackc/pgproto3/v2/parse.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/parse.go @@ -7,7 +7,7 @@ import ( "errors" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type Parse struct { diff --git a/vendor/github.com/jackc/pgproto3/v2/parse_complete.go b/vendor/github.com/jackc/pgx/v5/pgproto3/parse_complete.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/parse_complete.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/parse_complete.go diff --git a/vendor/github.com/jackc/pgproto3/v2/password_message.go b/vendor/github.com/jackc/pgx/v5/pgproto3/password_message.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/password_message.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/password_message.go diff --git a/vendor/github.com/jackc/pgproto3/v2/pgproto3.go b/vendor/github.com/jackc/pgx/v5/pgproto3/pgproto3.go similarity index 73% rename from vendor/github.com/jackc/pgproto3/v2/pgproto3.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/pgproto3.go index aa4167c4..128f97f8 100644 --- a/vendor/github.com/jackc/pgproto3/v2/pgproto3.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/pgproto3.go @@ -5,7 +5,7 @@ import ( "errors" "fmt" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) // maxMessageBodyLen is the maximum length of a message body in bytes. See PG_LARGE_MESSAGE_LIMIT in the PostgreSQL @@ -23,11 +23,13 @@ type Message interface { Encode(dst []byte) ([]byte, error) } +// FrontendMessage is a message sent by the frontend (i.e. the client). type FrontendMessage interface { Message Frontend() // no-op method to distinguish frontend from backend methods } +// BackendMessage is a message sent by the backend (i.e. the server). type BackendMessage interface { Message Backend() // no-op method to distinguish frontend from backend methods @@ -50,10 +52,37 @@ func (e *invalidMessageLenErr) Error() string { type invalidMessageFormatErr struct { messageType string + details string } func (e *invalidMessageFormatErr) Error() string { - return fmt.Sprintf("%s body is invalid", e.messageType) + return fmt.Sprintf("%s body is invalid %s", e.messageType, e.details) +} + +type writeError struct { + err error + safeToRetry bool +} + +func (e *writeError) Error() string { + return fmt.Sprintf("write failed: %s", e.err.Error()) +} + +func (e *writeError) SafeToRetry() bool { + return e.safeToRetry +} + +func (e *writeError) Unwrap() error { + return e.err +} + +type ExceededMaxBodyLenErr struct { + MaxExpectedBodyLen int + ActualBodyLen int +} + +func (e *ExceededMaxBodyLenErr) Error() string { + return fmt.Sprintf("invalid body length: expected at most %d, but got %d", e.MaxExpectedBodyLen, e.ActualBodyLen) } // getValueFromJSON gets the value from a protocol message representation in JSON. @@ -70,7 +99,7 @@ func getValueFromJSON(v map[string]string) ([]byte, error) { return nil, errors.New("unknown protocol representation") } -// beginMessage begines a new message of type t. It appends the message type and a placeholder for the message length to +// beginMessage begins a new message of type t. It appends the message type and a placeholder for the message length to // dst. It returns the new buffer and the position of the message length placeholder. func beginMessage(dst []byte, t byte) ([]byte, int) { dst = append(dst, t) diff --git a/vendor/github.com/jackc/pgproto3/v2/portal_suspended.go b/vendor/github.com/jackc/pgx/v5/pgproto3/portal_suspended.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/portal_suspended.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/portal_suspended.go diff --git a/vendor/github.com/jackc/pgproto3/v2/query.go b/vendor/github.com/jackc/pgx/v5/pgproto3/query.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/query.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/query.go diff --git a/vendor/github.com/jackc/pgproto3/v2/ready_for_query.go b/vendor/github.com/jackc/pgx/v5/pgproto3/ready_for_query.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/ready_for_query.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/ready_for_query.go diff --git a/vendor/github.com/jackc/pgproto3/v2/row_description.go b/vendor/github.com/jackc/pgx/v5/pgproto3/row_description.go similarity index 99% rename from vendor/github.com/jackc/pgproto3/v2/row_description.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/row_description.go index 3f6b2c64..dc2a4ddf 100644 --- a/vendor/github.com/jackc/pgproto3/v2/row_description.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/row_description.go @@ -7,7 +7,7 @@ import ( "errors" "math" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) const ( diff --git a/vendor/github.com/jackc/pgproto3/v2/sasl_initial_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/sasl_initial_response.go similarity index 91% rename from vendor/github.com/jackc/pgproto3/v2/sasl_initial_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/sasl_initial_response.go index 1938f658..9eb1b6a4 100644 --- a/vendor/github.com/jackc/pgproto3/v2/sasl_initial_response.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/sasl_initial_response.go @@ -2,10 +2,11 @@ package pgproto3 import ( "bytes" + "encoding/hex" "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) type SASLInitialResponse struct { @@ -78,6 +79,12 @@ func (dst *SASLInitialResponse) UnmarshalJSON(data []byte) error { return err } dst.AuthMechanism = msg.AuthMechanism - dst.Data = []byte(msg.Data) + if msg.Data != "" { + decoded, err := hex.DecodeString(msg.Data) + if err != nil { + return err + } + dst.Data = decoded + } return nil } diff --git a/vendor/github.com/jackc/pgproto3/v2/sasl_response.go b/vendor/github.com/jackc/pgx/v5/pgproto3/sasl_response.go similarity index 89% rename from vendor/github.com/jackc/pgproto3/v2/sasl_response.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/sasl_response.go index f4a13185..1b604c25 100644 --- a/vendor/github.com/jackc/pgproto3/v2/sasl_response.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/sasl_response.go @@ -1,6 +1,7 @@ package pgproto3 import ( + "encoding/hex" "encoding/json" ) @@ -44,6 +45,12 @@ func (dst *SASLResponse) UnmarshalJSON(data []byte) error { if err := json.Unmarshal(data, &msg); err != nil { return err } - dst.Data = []byte(msg.Data) + if msg.Data != "" { + decoded, err := hex.DecodeString(msg.Data) + if err != nil { + return err + } + dst.Data = decoded + } return nil } diff --git a/vendor/github.com/jackc/pgproto3/v2/ssl_request.go b/vendor/github.com/jackc/pgx/v5/pgproto3/ssl_request.go similarity index 96% rename from vendor/github.com/jackc/pgproto3/v2/ssl_request.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/ssl_request.go index 8feff1a2..b0fc2847 100644 --- a/vendor/github.com/jackc/pgproto3/v2/ssl_request.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/ssl_request.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) const sslRequestNumber = 80877103 diff --git a/vendor/github.com/jackc/pgproto3/v2/startup_message.go b/vendor/github.com/jackc/pgx/v5/pgproto3/startup_message.go similarity index 92% rename from vendor/github.com/jackc/pgproto3/v2/startup_message.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/startup_message.go index 255ea22d..3af4587d 100644 --- a/vendor/github.com/jackc/pgproto3/v2/startup_message.go +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/startup_message.go @@ -7,7 +7,7 @@ import ( "errors" "fmt" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) const ProtocolVersionNumber = 196608 // 3.0 @@ -38,14 +38,14 @@ func (dst *StartupMessage) Decode(src []byte) error { for { idx := bytes.IndexByte(src[rp:], 0) if idx < 0 { - return &invalidMessageFormatErr{messageType: "StartupMesage"} + return &invalidMessageFormatErr{messageType: "StartupMessage"} } key := string(src[rp : rp+idx]) rp += idx + 1 idx = bytes.IndexByte(src[rp:], 0) if idx < 0 { - return &invalidMessageFormatErr{messageType: "StartupMesage"} + return &invalidMessageFormatErr{messageType: "StartupMessage"} } value := string(src[rp : rp+idx]) rp += idx + 1 diff --git a/vendor/github.com/jackc/pgproto3/v2/sync.go b/vendor/github.com/jackc/pgx/v5/pgproto3/sync.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/sync.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/sync.go diff --git a/vendor/github.com/jackc/pgproto3/v2/terminate.go b/vendor/github.com/jackc/pgx/v5/pgproto3/terminate.go similarity index 100% rename from vendor/github.com/jackc/pgproto3/v2/terminate.go rename to vendor/github.com/jackc/pgx/v5/pgproto3/terminate.go diff --git a/vendor/github.com/jackc/pgx/v5/pgproto3/trace.go b/vendor/github.com/jackc/pgx/v5/pgproto3/trace.go new file mode 100644 index 00000000..6cc7d3e3 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgproto3/trace.go @@ -0,0 +1,416 @@ +package pgproto3 + +import ( + "bytes" + "fmt" + "io" + "strconv" + "strings" + "sync" + "time" +) + +// tracer traces the messages send to and from a Backend or Frontend. The format it produces roughly mimics the +// format produced by the libpq C function PQtrace. +type tracer struct { + TracerOptions + + mux sync.Mutex + w io.Writer + buf *bytes.Buffer +} + +// TracerOptions controls tracing behavior. It is roughly equivalent to the libpq function PQsetTraceFlags. +type TracerOptions struct { + // SuppressTimestamps prevents printing of timestamps. + SuppressTimestamps bool + + // RegressMode redacts fields that may be vary between executions. + RegressMode bool +} + +func (t *tracer) traceMessage(sender byte, encodedLen int32, msg Message) { + switch msg := msg.(type) { + case *AuthenticationCleartextPassword: + t.traceAuthenticationCleartextPassword(sender, encodedLen, msg) + case *AuthenticationGSS: + t.traceAuthenticationGSS(sender, encodedLen, msg) + case *AuthenticationGSSContinue: + t.traceAuthenticationGSSContinue(sender, encodedLen, msg) + case *AuthenticationMD5Password: + t.traceAuthenticationMD5Password(sender, encodedLen, msg) + case *AuthenticationOk: + t.traceAuthenticationOk(sender, encodedLen, msg) + case *AuthenticationSASL: + t.traceAuthenticationSASL(sender, encodedLen, msg) + case *AuthenticationSASLContinue: + t.traceAuthenticationSASLContinue(sender, encodedLen, msg) + case *AuthenticationSASLFinal: + t.traceAuthenticationSASLFinal(sender, encodedLen, msg) + case *BackendKeyData: + t.traceBackendKeyData(sender, encodedLen, msg) + case *Bind: + t.traceBind(sender, encodedLen, msg) + case *BindComplete: + t.traceBindComplete(sender, encodedLen, msg) + case *CancelRequest: + t.traceCancelRequest(sender, encodedLen, msg) + case *Close: + t.traceClose(sender, encodedLen, msg) + case *CloseComplete: + t.traceCloseComplete(sender, encodedLen, msg) + case *CommandComplete: + t.traceCommandComplete(sender, encodedLen, msg) + case *CopyBothResponse: + t.traceCopyBothResponse(sender, encodedLen, msg) + case *CopyData: + t.traceCopyData(sender, encodedLen, msg) + case *CopyDone: + t.traceCopyDone(sender, encodedLen, msg) + case *CopyFail: + t.traceCopyFail(sender, encodedLen, msg) + case *CopyInResponse: + t.traceCopyInResponse(sender, encodedLen, msg) + case *CopyOutResponse: + t.traceCopyOutResponse(sender, encodedLen, msg) + case *DataRow: + t.traceDataRow(sender, encodedLen, msg) + case *Describe: + t.traceDescribe(sender, encodedLen, msg) + case *EmptyQueryResponse: + t.traceEmptyQueryResponse(sender, encodedLen, msg) + case *ErrorResponse: + t.traceErrorResponse(sender, encodedLen, msg) + case *Execute: + t.TraceQueryute(sender, encodedLen, msg) + case *Flush: + t.traceFlush(sender, encodedLen, msg) + case *FunctionCall: + t.traceFunctionCall(sender, encodedLen, msg) + case *FunctionCallResponse: + t.traceFunctionCallResponse(sender, encodedLen, msg) + case *GSSEncRequest: + t.traceGSSEncRequest(sender, encodedLen, msg) + case *NoData: + t.traceNoData(sender, encodedLen, msg) + case *NoticeResponse: + t.traceNoticeResponse(sender, encodedLen, msg) + case *NotificationResponse: + t.traceNotificationResponse(sender, encodedLen, msg) + case *ParameterDescription: + t.traceParameterDescription(sender, encodedLen, msg) + case *ParameterStatus: + t.traceParameterStatus(sender, encodedLen, msg) + case *Parse: + t.traceParse(sender, encodedLen, msg) + case *ParseComplete: + t.traceParseComplete(sender, encodedLen, msg) + case *PortalSuspended: + t.tracePortalSuspended(sender, encodedLen, msg) + case *Query: + t.traceQuery(sender, encodedLen, msg) + case *ReadyForQuery: + t.traceReadyForQuery(sender, encodedLen, msg) + case *RowDescription: + t.traceRowDescription(sender, encodedLen, msg) + case *SSLRequest: + t.traceSSLRequest(sender, encodedLen, msg) + case *StartupMessage: + t.traceStartupMessage(sender, encodedLen, msg) + case *Sync: + t.traceSync(sender, encodedLen, msg) + case *Terminate: + t.traceTerminate(sender, encodedLen, msg) + default: + t.writeTrace(sender, encodedLen, "Unknown", nil) + } +} + +func (t *tracer) traceAuthenticationCleartextPassword(sender byte, encodedLen int32, msg *AuthenticationCleartextPassword) { + t.writeTrace(sender, encodedLen, "AuthenticationCleartextPassword", nil) +} + +func (t *tracer) traceAuthenticationGSS(sender byte, encodedLen int32, msg *AuthenticationGSS) { + t.writeTrace(sender, encodedLen, "AuthenticationGSS", nil) +} + +func (t *tracer) traceAuthenticationGSSContinue(sender byte, encodedLen int32, msg *AuthenticationGSSContinue) { + t.writeTrace(sender, encodedLen, "AuthenticationGSSContinue", nil) +} + +func (t *tracer) traceAuthenticationMD5Password(sender byte, encodedLen int32, msg *AuthenticationMD5Password) { + t.writeTrace(sender, encodedLen, "AuthenticationMD5Password", nil) +} + +func (t *tracer) traceAuthenticationOk(sender byte, encodedLen int32, msg *AuthenticationOk) { + t.writeTrace(sender, encodedLen, "AuthenticationOk", nil) +} + +func (t *tracer) traceAuthenticationSASL(sender byte, encodedLen int32, msg *AuthenticationSASL) { + t.writeTrace(sender, encodedLen, "AuthenticationSASL", nil) +} + +func (t *tracer) traceAuthenticationSASLContinue(sender byte, encodedLen int32, msg *AuthenticationSASLContinue) { + t.writeTrace(sender, encodedLen, "AuthenticationSASLContinue", nil) +} + +func (t *tracer) traceAuthenticationSASLFinal(sender byte, encodedLen int32, msg *AuthenticationSASLFinal) { + t.writeTrace(sender, encodedLen, "AuthenticationSASLFinal", nil) +} + +func (t *tracer) traceBackendKeyData(sender byte, encodedLen int32, msg *BackendKeyData) { + t.writeTrace(sender, encodedLen, "BackendKeyData", func() { + if t.RegressMode { + t.buf.WriteString("\t NNNN NNNN") + } else { + fmt.Fprintf(t.buf, "\t %d %d", msg.ProcessID, msg.SecretKey) + } + }) +} + +func (t *tracer) traceBind(sender byte, encodedLen int32, msg *Bind) { + t.writeTrace(sender, encodedLen, "Bind", func() { + fmt.Fprintf(t.buf, "\t %s %s %d", traceDoubleQuotedString([]byte(msg.DestinationPortal)), traceDoubleQuotedString([]byte(msg.PreparedStatement)), len(msg.ParameterFormatCodes)) + for _, fc := range msg.ParameterFormatCodes { + fmt.Fprintf(t.buf, " %d", fc) + } + fmt.Fprintf(t.buf, " %d", len(msg.Parameters)) + for _, p := range msg.Parameters { + fmt.Fprintf(t.buf, " %s", traceSingleQuotedString(p)) + } + fmt.Fprintf(t.buf, " %d", len(msg.ResultFormatCodes)) + for _, fc := range msg.ResultFormatCodes { + fmt.Fprintf(t.buf, " %d", fc) + } + }) +} + +func (t *tracer) traceBindComplete(sender byte, encodedLen int32, msg *BindComplete) { + t.writeTrace(sender, encodedLen, "BindComplete", nil) +} + +func (t *tracer) traceCancelRequest(sender byte, encodedLen int32, msg *CancelRequest) { + t.writeTrace(sender, encodedLen, "CancelRequest", nil) +} + +func (t *tracer) traceClose(sender byte, encodedLen int32, msg *Close) { + t.writeTrace(sender, encodedLen, "Close", nil) +} + +func (t *tracer) traceCloseComplete(sender byte, encodedLen int32, msg *CloseComplete) { + t.writeTrace(sender, encodedLen, "CloseComplete", nil) +} + +func (t *tracer) traceCommandComplete(sender byte, encodedLen int32, msg *CommandComplete) { + t.writeTrace(sender, encodedLen, "CommandComplete", func() { + fmt.Fprintf(t.buf, "\t %s", traceDoubleQuotedString(msg.CommandTag)) + }) +} + +func (t *tracer) traceCopyBothResponse(sender byte, encodedLen int32, msg *CopyBothResponse) { + t.writeTrace(sender, encodedLen, "CopyBothResponse", nil) +} + +func (t *tracer) traceCopyData(sender byte, encodedLen int32, msg *CopyData) { + t.writeTrace(sender, encodedLen, "CopyData", nil) +} + +func (t *tracer) traceCopyDone(sender byte, encodedLen int32, msg *CopyDone) { + t.writeTrace(sender, encodedLen, "CopyDone", nil) +} + +func (t *tracer) traceCopyFail(sender byte, encodedLen int32, msg *CopyFail) { + t.writeTrace(sender, encodedLen, "CopyFail", func() { + fmt.Fprintf(t.buf, "\t %s", traceDoubleQuotedString([]byte(msg.Message))) + }) +} + +func (t *tracer) traceCopyInResponse(sender byte, encodedLen int32, msg *CopyInResponse) { + t.writeTrace(sender, encodedLen, "CopyInResponse", nil) +} + +func (t *tracer) traceCopyOutResponse(sender byte, encodedLen int32, msg *CopyOutResponse) { + t.writeTrace(sender, encodedLen, "CopyOutResponse", nil) +} + +func (t *tracer) traceDataRow(sender byte, encodedLen int32, msg *DataRow) { + t.writeTrace(sender, encodedLen, "DataRow", func() { + fmt.Fprintf(t.buf, "\t %d", len(msg.Values)) + for _, v := range msg.Values { + if v == nil { + t.buf.WriteString(" -1") + } else { + fmt.Fprintf(t.buf, " %d %s", len(v), traceSingleQuotedString(v)) + } + } + }) +} + +func (t *tracer) traceDescribe(sender byte, encodedLen int32, msg *Describe) { + t.writeTrace(sender, encodedLen, "Describe", func() { + fmt.Fprintf(t.buf, "\t %c %s", msg.ObjectType, traceDoubleQuotedString([]byte(msg.Name))) + }) +} + +func (t *tracer) traceEmptyQueryResponse(sender byte, encodedLen int32, msg *EmptyQueryResponse) { + t.writeTrace(sender, encodedLen, "EmptyQueryResponse", nil) +} + +func (t *tracer) traceErrorResponse(sender byte, encodedLen int32, msg *ErrorResponse) { + t.writeTrace(sender, encodedLen, "ErrorResponse", nil) +} + +func (t *tracer) TraceQueryute(sender byte, encodedLen int32, msg *Execute) { + t.writeTrace(sender, encodedLen, "Execute", func() { + fmt.Fprintf(t.buf, "\t %s %d", traceDoubleQuotedString([]byte(msg.Portal)), msg.MaxRows) + }) +} + +func (t *tracer) traceFlush(sender byte, encodedLen int32, msg *Flush) { + t.writeTrace(sender, encodedLen, "Flush", nil) +} + +func (t *tracer) traceFunctionCall(sender byte, encodedLen int32, msg *FunctionCall) { + t.writeTrace(sender, encodedLen, "FunctionCall", nil) +} + +func (t *tracer) traceFunctionCallResponse(sender byte, encodedLen int32, msg *FunctionCallResponse) { + t.writeTrace(sender, encodedLen, "FunctionCallResponse", nil) +} + +func (t *tracer) traceGSSEncRequest(sender byte, encodedLen int32, msg *GSSEncRequest) { + t.writeTrace(sender, encodedLen, "GSSEncRequest", nil) +} + +func (t *tracer) traceNoData(sender byte, encodedLen int32, msg *NoData) { + t.writeTrace(sender, encodedLen, "NoData", nil) +} + +func (t *tracer) traceNoticeResponse(sender byte, encodedLen int32, msg *NoticeResponse) { + t.writeTrace(sender, encodedLen, "NoticeResponse", nil) +} + +func (t *tracer) traceNotificationResponse(sender byte, encodedLen int32, msg *NotificationResponse) { + t.writeTrace(sender, encodedLen, "NotificationResponse", func() { + fmt.Fprintf(t.buf, "\t %d %s %s", msg.PID, traceDoubleQuotedString([]byte(msg.Channel)), traceDoubleQuotedString([]byte(msg.Payload))) + }) +} + +func (t *tracer) traceParameterDescription(sender byte, encodedLen int32, msg *ParameterDescription) { + t.writeTrace(sender, encodedLen, "ParameterDescription", nil) +} + +func (t *tracer) traceParameterStatus(sender byte, encodedLen int32, msg *ParameterStatus) { + t.writeTrace(sender, encodedLen, "ParameterStatus", func() { + fmt.Fprintf(t.buf, "\t %s %s", traceDoubleQuotedString([]byte(msg.Name)), traceDoubleQuotedString([]byte(msg.Value))) + }) +} + +func (t *tracer) traceParse(sender byte, encodedLen int32, msg *Parse) { + t.writeTrace(sender, encodedLen, "Parse", func() { + fmt.Fprintf(t.buf, "\t %s %s %d", traceDoubleQuotedString([]byte(msg.Name)), traceDoubleQuotedString([]byte(msg.Query)), len(msg.ParameterOIDs)) + for _, oid := range msg.ParameterOIDs { + fmt.Fprintf(t.buf, " %d", oid) + } + }) +} + +func (t *tracer) traceParseComplete(sender byte, encodedLen int32, msg *ParseComplete) { + t.writeTrace(sender, encodedLen, "ParseComplete", nil) +} + +func (t *tracer) tracePortalSuspended(sender byte, encodedLen int32, msg *PortalSuspended) { + t.writeTrace(sender, encodedLen, "PortalSuspended", nil) +} + +func (t *tracer) traceQuery(sender byte, encodedLen int32, msg *Query) { + t.writeTrace(sender, encodedLen, "Query", func() { + fmt.Fprintf(t.buf, "\t %s", traceDoubleQuotedString([]byte(msg.String))) + }) +} + +func (t *tracer) traceReadyForQuery(sender byte, encodedLen int32, msg *ReadyForQuery) { + t.writeTrace(sender, encodedLen, "ReadyForQuery", func() { + fmt.Fprintf(t.buf, "\t %c", msg.TxStatus) + }) +} + +func (t *tracer) traceRowDescription(sender byte, encodedLen int32, msg *RowDescription) { + t.writeTrace(sender, encodedLen, "RowDescription", func() { + fmt.Fprintf(t.buf, "\t %d", len(msg.Fields)) + for _, fd := range msg.Fields { + fmt.Fprintf(t.buf, ` %s %d %d %d %d %d %d`, traceDoubleQuotedString(fd.Name), fd.TableOID, fd.TableAttributeNumber, fd.DataTypeOID, fd.DataTypeSize, fd.TypeModifier, fd.Format) + } + }) +} + +func (t *tracer) traceSSLRequest(sender byte, encodedLen int32, msg *SSLRequest) { + t.writeTrace(sender, encodedLen, "SSLRequest", nil) +} + +func (t *tracer) traceStartupMessage(sender byte, encodedLen int32, msg *StartupMessage) { + t.writeTrace(sender, encodedLen, "StartupMessage", nil) +} + +func (t *tracer) traceSync(sender byte, encodedLen int32, msg *Sync) { + t.writeTrace(sender, encodedLen, "Sync", nil) +} + +func (t *tracer) traceTerminate(sender byte, encodedLen int32, msg *Terminate) { + t.writeTrace(sender, encodedLen, "Terminate", nil) +} + +func (t *tracer) writeTrace(sender byte, encodedLen int32, msgType string, writeDetails func()) { + t.mux.Lock() + defer t.mux.Unlock() + defer func() { + if t.buf.Cap() > 1024 { + t.buf = &bytes.Buffer{} + } else { + t.buf.Reset() + } + }() + + if !t.SuppressTimestamps { + now := time.Now() + t.buf.WriteString(now.Format("2006-01-02 15:04:05.000000")) + t.buf.WriteByte('\t') + } + + t.buf.WriteByte(sender) + t.buf.WriteByte('\t') + t.buf.WriteString(msgType) + t.buf.WriteByte('\t') + t.buf.WriteString(strconv.FormatInt(int64(encodedLen), 10)) + + if writeDetails != nil { + writeDetails() + } + + t.buf.WriteByte('\n') + t.buf.WriteTo(t.w) +} + +// traceDoubleQuotedString returns t.buf as a double-quoted string without any escaping. It is roughly equivalent to +// pqTraceOutputString in libpq. +func traceDoubleQuotedString(buf []byte) string { + return `"` + string(buf) + `"` +} + +// traceSingleQuotedString returns buf as a single-quoted string with non-printable characters hex-escaped. It is +// roughly equivalent to pqTraceOutputNchar in libpq. +func traceSingleQuotedString(buf []byte) string { + sb := &strings.Builder{} + + sb.WriteByte('\'') + for _, b := range buf { + if b < 32 || b > 126 { + fmt.Fprintf(sb, `\x%x`, b) + } else { + sb.WriteByte(b) + } + } + sb.WriteByte('\'') + + return sb.String() +} diff --git a/vendor/github.com/jackc/pgtype/array.go b/vendor/github.com/jackc/pgx/v5/pgtype/array.go similarity index 63% rename from vendor/github.com/jackc/pgtype/array.go rename to vendor/github.com/jackc/pgx/v5/pgtype/array.go index 174007c1..06b824ad 100644 --- a/vendor/github.com/jackc/pgtype/array.go +++ b/vendor/github.com/jackc/pgx/v5/pgtype/array.go @@ -5,21 +5,20 @@ import ( "encoding/binary" "fmt" "io" - "reflect" "strconv" "strings" "unicode" - "github.com/jackc/pgio" + "github.com/jackc/pgx/v5/internal/pgio" ) // Information on the internals of PostgreSQL arrays can be found in // src/include/utils/array.h and src/backend/utils/adt/arrayfuncs.c. Of // particular interest is the array_send function. -type ArrayHeader struct { +type arrayHeader struct { ContainsNull bool - ElementOID int32 + ElementOID uint32 Dimensions []ArrayDimension } @@ -28,7 +27,21 @@ type ArrayDimension struct { LowerBound int32 } -func (dst *ArrayHeader) DecodeBinary(ci *ConnInfo, src []byte) (int, error) { +// cardinality returns the number of elements in an array of dimensions size. +func cardinality(dimensions []ArrayDimension) int { + if len(dimensions) == 0 { + return 0 + } + + elementCount := int(dimensions[0].Length) + for _, d := range dimensions[1:] { + elementCount *= int(d.Length) + } + + return elementCount +} + +func (dst *arrayHeader) DecodeBinary(m *Map, src []byte) (int, error) { if len(src) < 12 { return 0, fmt.Errorf("array header too short: %d", len(src)) } @@ -41,12 +54,10 @@ func (dst *ArrayHeader) DecodeBinary(ci *ConnInfo, src []byte) (int, error) { dst.ContainsNull = binary.BigEndian.Uint32(src[rp:]) == 1 rp += 4 - dst.ElementOID = int32(binary.BigEndian.Uint32(src[rp:])) + dst.ElementOID = binary.BigEndian.Uint32(src[rp:]) rp += 4 - if numDims > 0 { - dst.Dimensions = make([]ArrayDimension, numDims) - } + dst.Dimensions = make([]ArrayDimension, numDims) if len(src) < 12+numDims*8 { return 0, fmt.Errorf("array header too short for %d dimensions: %d", numDims, len(src)) } @@ -61,7 +72,7 @@ func (dst *ArrayHeader) DecodeBinary(ci *ConnInfo, src []byte) (int, error) { return rp, nil } -func (src ArrayHeader) EncodeBinary(ci *ConnInfo, buf []byte) []byte { +func (src arrayHeader) EncodeBinary(buf []byte) []byte { buf = pgio.AppendInt32(buf, int32(len(src.Dimensions))) var containsNull int32 @@ -70,7 +81,7 @@ func (src ArrayHeader) EncodeBinary(ci *ConnInfo, buf []byte) []byte { } buf = pgio.AppendInt32(buf, containsNull) - buf = pgio.AppendInt32(buf, src.ElementOID) + buf = pgio.AppendUint32(buf, src.ElementOID) for i := range src.Dimensions { buf = pgio.AppendInt32(buf, src.Dimensions[i].Length) @@ -80,14 +91,18 @@ func (src ArrayHeader) EncodeBinary(ci *ConnInfo, buf []byte) []byte { return buf } -type UntypedTextArray struct { +type untypedTextArray struct { Elements []string Quoted []bool Dimensions []ArrayDimension } -func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { - dst := &UntypedTextArray{} +func parseUntypedTextArray(src string) (*untypedTextArray, error) { + dst := &untypedTextArray{ + Elements: []string{}, + Quoted: []bool{}, + Dimensions: []ArrayDimension{}, + } buf := bytes.NewBufferString(src) @@ -95,7 +110,7 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { r, _, err := buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } var explicitDimensions []ArrayDimension @@ -107,7 +122,7 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { for { r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } if r == '=' { @@ -118,12 +133,12 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { lower, err := arrayParseInteger(buf) if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } if r != ':' { @@ -132,12 +147,12 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { upper, err := arrayParseInteger(buf) if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } if r != ']' { @@ -149,12 +164,12 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } } if r != '{' { - return nil, fmt.Errorf("invalid array, expected '{': %v", err) + return nil, fmt.Errorf("invalid array, expected '{' got %v", r) } implicitDimensions := []ArrayDimension{{LowerBound: 1, Length: 0}} @@ -163,7 +178,7 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { for { r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } if r == '{' { @@ -180,7 +195,7 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { for { r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid array: %v", err) + return nil, fmt.Errorf("invalid array: %w", err) } switch r { @@ -199,7 +214,7 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { buf.UnreadRune() value, quoted, err := arrayParseValue(buf) if err != nil { - return nil, fmt.Errorf("invalid array value: %v", err) + return nil, fmt.Errorf("invalid array value: %w", err) } if currentDim == counterDim { implicitDimensions[currentDim].Length++ @@ -220,7 +235,6 @@ func ParseUntypedTextArray(src string) (*UntypedTextArray, error) { } if len(dst.Elements) == 0 { - dst.Dimensions = nil } else if len(explicitDimensions) > 0 { dst.Dimensions = explicitDimensions } else { @@ -318,7 +332,7 @@ func arrayParseInteger(buf *bytes.Buffer) (int32, error) { } } -func EncodeTextArrayDimensions(buf []byte, dimensions []ArrayDimension) []byte { +func encodeTextArrayDimensions(buf []byte, dimensions []ArrayDimension) []byte { var customDimensions bool for _, dim := range dimensions { if dim.LowerBound != 1 { @@ -348,34 +362,99 @@ func quoteArrayElement(src string) string { } func isSpace(ch byte) bool { - // see https://github.com/postgres/postgres/blob/REL_12_STABLE/src/backend/parser/scansup.c#L224 - return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\f' + // see array_isspace: + // https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/arrayfuncs.c + return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f' } -func QuoteArrayElementIfNeeded(src string) string { - if src == "" || (len(src) == 4 && strings.ToLower(src) == "null") || isSpace(src[0]) || isSpace(src[len(src)-1]) || strings.ContainsAny(src, `{},"\`) { +func quoteArrayElementIfNeeded(src string) string { + if src == "" || (len(src) == 4 && strings.EqualFold(src, "null")) || isSpace(src[0]) || isSpace(src[len(src)-1]) || strings.ContainsAny(src, `{},"\`) { return quoteArrayElement(src) } return src } -func findDimensionsFromValue(value reflect.Value, dimensions []ArrayDimension, elementsLength int) ([]ArrayDimension, int, bool) { - switch value.Kind() { - case reflect.Array: - fallthrough - case reflect.Slice: - length := value.Len() - if 0 == elementsLength { - elementsLength = length - } else { - elementsLength *= length - } - dimensions = append(dimensions, ArrayDimension{Length: int32(length), LowerBound: 1}) - for i := 0; i < length; i++ { - if d, l, ok := findDimensionsFromValue(value.Index(i), dimensions, elementsLength); ok { - return d, l, true - } - } +// Array represents a PostgreSQL array for T. It implements the ArrayGetter and ArraySetter interfaces. It preserves +// PostgreSQL dimensions and custom lower bounds. Use FlatArray if these are not needed. +type Array[T any] struct { + Elements []T + Dims []ArrayDimension + Valid bool +} + +func (a Array[T]) Dimensions() []ArrayDimension { + return a.Dims +} + +func (a Array[T]) Index(i int) any { + return a.Elements[i] +} + +func (a Array[T]) IndexType() any { + var el T + return el +} + +func (a *Array[T]) SetDimensions(dimensions []ArrayDimension) error { + if dimensions == nil { + *a = Array[T]{} + return nil + } + + elementCount := cardinality(dimensions) + *a = Array[T]{ + Elements: make([]T, elementCount), + Dims: dimensions, + Valid: true, } - return dimensions, elementsLength, true + + return nil +} + +func (a Array[T]) ScanIndex(i int) any { + return &a.Elements[i] +} + +func (a Array[T]) ScanIndexType() any { + return new(T) +} + +// FlatArray implements the ArrayGetter and ArraySetter interfaces for any slice of T. It ignores PostgreSQL dimensions +// and custom lower bounds. Use Array to preserve these. +type FlatArray[T any] []T + +func (a FlatArray[T]) Dimensions() []ArrayDimension { + if a == nil { + return nil + } + + return []ArrayDimension{{Length: int32(len(a)), LowerBound: 1}} +} + +func (a FlatArray[T]) Index(i int) any { + return a[i] +} + +func (a FlatArray[T]) IndexType() any { + var el T + return el +} + +func (a *FlatArray[T]) SetDimensions(dimensions []ArrayDimension) error { + if dimensions == nil { + *a = nil + return nil + } + + elementCount := cardinality(dimensions) + *a = make(FlatArray[T], elementCount) + return nil +} + +func (a FlatArray[T]) ScanIndex(i int) any { + return &a[i] +} + +func (a FlatArray[T]) ScanIndexType() any { + return new(T) } diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/array_codec.go b/vendor/github.com/jackc/pgx/v5/pgtype/array_codec.go new file mode 100644 index 00000000..bf5f6989 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/array_codec.go @@ -0,0 +1,405 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "reflect" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +// ArrayGetter is a type that can be converted into a PostgreSQL array. +type ArrayGetter interface { + // Dimensions returns the array dimensions. If array is nil then nil is returned. + Dimensions() []ArrayDimension + + // Index returns the element at i. + Index(i int) any + + // IndexType returns a non-nil scan target of the type Index will return. This is used by ArrayCodec.PlanEncode. + IndexType() any +} + +// ArraySetter is a type can be set from a PostgreSQL array. +type ArraySetter interface { + // SetDimensions prepares the value such that ScanIndex can be called for each element. This will remove any existing + // elements. dimensions may be nil to indicate a NULL array. If unable to exactly preserve dimensions SetDimensions + // may return an error or silently flatten the array dimensions. + SetDimensions(dimensions []ArrayDimension) error + + // ScanIndex returns a value usable as a scan target for i. SetDimensions must be called before ScanIndex. + ScanIndex(i int) any + + // ScanIndexType returns a non-nil scan target of the type ScanIndex will return. This is used by + // ArrayCodec.PlanScan. + ScanIndexType() any +} + +// ArrayCodec is a codec for any array type. +type ArrayCodec struct { + ElementType *Type +} + +func (c *ArrayCodec) FormatSupported(format int16) bool { + return c.ElementType.Codec.FormatSupported(format) +} + +func (c *ArrayCodec) PreferredFormat() int16 { + // The binary format should always be preferred for arrays if it is supported. Usually, this will happen automatically + // because most types that support binary prefer it. However, text, json, and jsonb support binary but prefer the text + // format. This is because it is simpler for jsonb and PostgreSQL can be significantly faster using the text format + // for text-like data types than binary. However, arrays appear to always be faster in binary. + // + // https://www.postgresql.org/message-id/CAMovtNoHFod2jMAKQjjxv209PCTJx5Kc66anwWvX0mEiaXwgmA%40mail.gmail.com + if c.ElementType.Codec.FormatSupported(BinaryFormatCode) { + return BinaryFormatCode + } + return TextFormatCode +} + +func (c *ArrayCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + arrayValuer, ok := value.(ArrayGetter) + if !ok { + return nil + } + + elementType := arrayValuer.IndexType() + + elementEncodePlan := m.PlanEncode(c.ElementType.OID, format, elementType) + if elementEncodePlan == nil { + if reflect.TypeOf(elementType) != nil { + return nil + } + } + + switch format { + case BinaryFormatCode: + return &encodePlanArrayCodecBinary{ac: c, m: m, oid: oid} + case TextFormatCode: + return &encodePlanArrayCodecText{ac: c, m: m, oid: oid} + } + + return nil +} + +type encodePlanArrayCodecText struct { + ac *ArrayCodec + m *Map + oid uint32 +} + +func (p *encodePlanArrayCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + array := value.(ArrayGetter) + + dimensions := array.Dimensions() + if dimensions == nil { + return nil, nil + } + + elementCount := cardinality(dimensions) + if elementCount == 0 { + return append(buf, '{', '}'), nil + } + + buf = encodeTextArrayDimensions(buf, dimensions) + + // dimElemCounts is the multiples of elements that each array lies on. For + // example, a single dimension array of length 4 would have a dimElemCounts of + // [4]. A multi-dimensional array of lengths [3,5,2] would have a + // dimElemCounts of [30,10,2]. This is used to simplify when to render a '{' + // or '}'. + dimElemCounts := make([]int, len(dimensions)) + dimElemCounts[len(dimensions)-1] = int(dimensions[len(dimensions)-1].Length) + for i := len(dimensions) - 2; i > -1; i-- { + dimElemCounts[i] = int(dimensions[i].Length) * dimElemCounts[i+1] + } + + var encodePlan EncodePlan + var lastElemType reflect.Type + inElemBuf := make([]byte, 0, 32) + for i := 0; i < elementCount; i++ { + if i > 0 { + buf = append(buf, ',') + } + + for _, dec := range dimElemCounts { + if i%dec == 0 { + buf = append(buf, '{') + } + } + + elem := array.Index(i) + var elemBuf []byte + if elem != nil { + elemType := reflect.TypeOf(elem) + if lastElemType != elemType { + lastElemType = elemType + encodePlan = p.m.PlanEncode(p.ac.ElementType.OID, TextFormatCode, elem) + if encodePlan == nil { + return nil, fmt.Errorf("unable to encode %v", array.Index(i)) + } + } + elemBuf, err = encodePlan.Encode(elem, inElemBuf) + if err != nil { + return nil, err + } + } + + if elemBuf == nil { + buf = append(buf, `NULL`...) + } else { + buf = append(buf, quoteArrayElementIfNeeded(string(elemBuf))...) + } + + for _, dec := range dimElemCounts { + if (i+1)%dec == 0 { + buf = append(buf, '}') + } + } + } + + return buf, nil +} + +type encodePlanArrayCodecBinary struct { + ac *ArrayCodec + m *Map + oid uint32 +} + +func (p *encodePlanArrayCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + array := value.(ArrayGetter) + + dimensions := array.Dimensions() + if dimensions == nil { + return nil, nil + } + + arrayHeader := arrayHeader{ + Dimensions: dimensions, + ElementOID: p.ac.ElementType.OID, + } + + containsNullIndex := len(buf) + 4 + + buf = arrayHeader.EncodeBinary(buf) + + elementCount := cardinality(dimensions) + + var encodePlan EncodePlan + var lastElemType reflect.Type + for i := 0; i < elementCount; i++ { + sp := len(buf) + buf = pgio.AppendInt32(buf, -1) + + elem := array.Index(i) + var elemBuf []byte + if elem != nil { + elemType := reflect.TypeOf(elem) + if lastElemType != elemType { + lastElemType = elemType + encodePlan = p.m.PlanEncode(p.ac.ElementType.OID, BinaryFormatCode, elem) + if encodePlan == nil { + return nil, fmt.Errorf("unable to encode %v", array.Index(i)) + } + } + elemBuf, err = encodePlan.Encode(elem, buf) + if err != nil { + return nil, err + } + } + + if elemBuf == nil { + pgio.SetInt32(buf[containsNullIndex:], 1) + } else { + buf = elemBuf + pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) + } + } + + return buf, nil +} + +func (c *ArrayCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + arrayScanner, ok := target.(ArraySetter) + if !ok { + return nil + } + + // target / arrayScanner might be a pointer to a nil. If it is create one so we can call ScanIndexType to plan the + // scan of the elements. + if isNil, _ := isNilDriverValuer(target); isNil { + arrayScanner = reflect.New(reflect.TypeOf(target).Elem()).Interface().(ArraySetter) + } + + elementType := arrayScanner.ScanIndexType() + + elementScanPlan := m.PlanScan(c.ElementType.OID, format, elementType) + if _, ok := elementScanPlan.(*scanPlanFail); ok { + return nil + } + + return &scanPlanArrayCodec{ + arrayCodec: c, + m: m, + oid: oid, + formatCode: format, + } +} + +func (c *ArrayCodec) decodeBinary(m *Map, arrayOID uint32, src []byte, array ArraySetter) error { + var arrayHeader arrayHeader + rp, err := arrayHeader.DecodeBinary(m, src) + if err != nil { + return err + } + + err = array.SetDimensions(arrayHeader.Dimensions) + if err != nil { + return err + } + + elementCount := cardinality(arrayHeader.Dimensions) + if elementCount == 0 { + return nil + } + + elementScanPlan := c.ElementType.Codec.PlanScan(m, c.ElementType.OID, BinaryFormatCode, array.ScanIndex(0)) + if elementScanPlan == nil { + elementScanPlan = m.PlanScan(c.ElementType.OID, BinaryFormatCode, array.ScanIndex(0)) + } + + for i := 0; i < elementCount; i++ { + elem := array.ScanIndex(i) + elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) + rp += 4 + var elemSrc []byte + if elemLen >= 0 { + elemSrc = src[rp : rp+elemLen] + rp += elemLen + } + err = elementScanPlan.Scan(elemSrc, elem) + if err != nil { + return fmt.Errorf("failed to scan array element %d: %w", i, err) + } + } + + return nil +} + +func (c *ArrayCodec) decodeText(m *Map, arrayOID uint32, src []byte, array ArraySetter) error { + uta, err := parseUntypedTextArray(string(src)) + if err != nil { + return err + } + + err = array.SetDimensions(uta.Dimensions) + if err != nil { + return err + } + + if len(uta.Elements) == 0 { + return nil + } + + elementScanPlan := c.ElementType.Codec.PlanScan(m, c.ElementType.OID, TextFormatCode, array.ScanIndex(0)) + if elementScanPlan == nil { + elementScanPlan = m.PlanScan(c.ElementType.OID, TextFormatCode, array.ScanIndex(0)) + } + + for i, s := range uta.Elements { + elem := array.ScanIndex(i) + var elemSrc []byte + if s != "NULL" || uta.Quoted[i] { + elemSrc = []byte(s) + } + + err = elementScanPlan.Scan(elemSrc, elem) + if err != nil { + return err + } + } + + return nil +} + +type scanPlanArrayCodec struct { + arrayCodec *ArrayCodec + m *Map + oid uint32 + formatCode int16 + elementScanPlan ScanPlan +} + +func (spac *scanPlanArrayCodec) Scan(src []byte, dst any) error { + c := spac.arrayCodec + m := spac.m + oid := spac.oid + formatCode := spac.formatCode + + array := dst.(ArraySetter) + + if src == nil { + return array.SetDimensions(nil) + } + + switch formatCode { + case BinaryFormatCode: + return c.decodeBinary(m, oid, src, array) + case TextFormatCode: + return c.decodeText(m, oid, src, array) + default: + return fmt.Errorf("unknown format code %d", formatCode) + } +} + +func (c *ArrayCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + switch format { + case TextFormatCode: + return string(src), nil + case BinaryFormatCode: + buf := make([]byte, len(src)) + copy(buf, src) + return buf, nil + default: + return nil, fmt.Errorf("unknown format code %d", format) + } +} + +func (c *ArrayCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var slice []any + err := m.PlanScan(oid, format, &slice).Scan(src, &slice) + return slice, err +} + +func isRagged(slice reflect.Value) bool { + if slice.Type().Elem().Kind() != reflect.Slice { + return false + } + + sliceLen := slice.Len() + innerLen := 0 + for i := 0; i < sliceLen; i++ { + if i == 0 { + innerLen = slice.Index(i).Len() + } else { + if slice.Index(i).Len() != innerLen { + return true + } + } + if isRagged(slice.Index(i)) { + return true + } + } + + return false +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/bits.go b/vendor/github.com/jackc/pgx/v5/pgtype/bits.go new file mode 100644 index 00000000..e7a1d016 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/bits.go @@ -0,0 +1,210 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type BitsScanner interface { + ScanBits(v Bits) error +} + +type BitsValuer interface { + BitsValue() (Bits, error) +} + +// Bits represents the PostgreSQL bit and varbit types. +type Bits struct { + Bytes []byte + Len int32 // Number of bits + Valid bool +} + +func (b *Bits) ScanBits(v Bits) error { + *b = v + return nil +} + +func (b Bits) BitsValue() (Bits, error) { + return b, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Bits) Scan(src any) error { + if src == nil { + *dst = Bits{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToBitsScanner{}.Scan([]byte(src), dst) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src Bits) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + + buf, err := BitsCodec{}.PlanEncode(nil, 0, TextFormatCode, src).Encode(src, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type BitsCodec struct{} + +func (BitsCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (BitsCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (BitsCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(BitsValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanBitsCodecBinary{} + case TextFormatCode: + return encodePlanBitsCodecText{} + } + + return nil +} + +type encodePlanBitsCodecBinary struct{} + +func (encodePlanBitsCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + bits, err := value.(BitsValuer).BitsValue() + if err != nil { + return nil, err + } + + if !bits.Valid { + return nil, nil + } + + buf = pgio.AppendInt32(buf, bits.Len) + return append(buf, bits.Bytes...), nil +} + +type encodePlanBitsCodecText struct{} + +func (encodePlanBitsCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + bits, err := value.(BitsValuer).BitsValue() + if err != nil { + return nil, err + } + + if !bits.Valid { + return nil, nil + } + + for i := int32(0); i < bits.Len; i++ { + byteIdx := i / 8 + bitMask := byte(128 >> byte(i%8)) + char := byte('0') + if bits.Bytes[byteIdx]&bitMask > 0 { + char = '1' + } + buf = append(buf, char) + } + + return buf, nil +} + +func (BitsCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case BitsScanner: + return scanPlanBinaryBitsToBitsScanner{} + } + case TextFormatCode: + switch target.(type) { + case BitsScanner: + return scanPlanTextAnyToBitsScanner{} + } + } + + return nil +} + +func (c BitsCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c BitsCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var box Bits + err := codecScan(c, m, oid, format, src, &box) + if err != nil { + return nil, err + } + return box, nil +} + +type scanPlanBinaryBitsToBitsScanner struct{} + +func (scanPlanBinaryBitsToBitsScanner) Scan(src []byte, dst any) error { + scanner := (dst).(BitsScanner) + + if src == nil { + return scanner.ScanBits(Bits{}) + } + + if len(src) < 4 { + return fmt.Errorf("invalid length for bit/varbit: %v", len(src)) + } + + bitLen := int32(binary.BigEndian.Uint32(src)) + rp := 4 + buf := make([]byte, len(src[rp:])) + copy(buf, src[rp:]) + + return scanner.ScanBits(Bits{Bytes: buf, Len: bitLen, Valid: true}) +} + +type scanPlanTextAnyToBitsScanner struct{} + +func (scanPlanTextAnyToBitsScanner) Scan(src []byte, dst any) error { + scanner := (dst).(BitsScanner) + + if src == nil { + return scanner.ScanBits(Bits{}) + } + + bitLen := len(src) + byteLen := bitLen / 8 + if bitLen%8 > 0 { + byteLen++ + } + buf := make([]byte, byteLen) + + for i, b := range src { + if b == '1' { + byteIdx := i / 8 + bitIdx := uint(i % 8) + buf[byteIdx] = buf[byteIdx] | (128 >> bitIdx) + } + } + + return scanner.ScanBits(Bits{Bytes: buf, Len: int32(bitLen), Valid: true}) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/bool.go b/vendor/github.com/jackc/pgx/v5/pgtype/bool.go new file mode 100644 index 00000000..71caffa7 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/bool.go @@ -0,0 +1,343 @@ +package pgtype + +import ( + "bytes" + "database/sql/driver" + "encoding/json" + "fmt" + "strconv" + "strings" +) + +type BoolScanner interface { + ScanBool(v Bool) error +} + +type BoolValuer interface { + BoolValue() (Bool, error) +} + +type Bool struct { + Bool bool + Valid bool +} + +func (b *Bool) ScanBool(v Bool) error { + *b = v + return nil +} + +func (b Bool) BoolValue() (Bool, error) { + return b, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Bool) Scan(src any) error { + if src == nil { + *dst = Bool{} + return nil + } + + switch src := src.(type) { + case bool: + *dst = Bool{Bool: src, Valid: true} + return nil + case string: + b, err := strconv.ParseBool(src) + if err != nil { + return err + } + *dst = Bool{Bool: b, Valid: true} + return nil + case []byte: + b, err := strconv.ParseBool(string(src)) + if err != nil { + return err + } + *dst = Bool{Bool: b, Valid: true} + return nil + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src Bool) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + + return src.Bool, nil +} + +func (src Bool) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + + if src.Bool { + return []byte("true"), nil + } else { + return []byte("false"), nil + } +} + +func (dst *Bool) UnmarshalJSON(b []byte) error { + var v *bool + err := json.Unmarshal(b, &v) + if err != nil { + return err + } + + if v == nil { + *dst = Bool{} + } else { + *dst = Bool{Bool: *v, Valid: true} + } + + return nil +} + +type BoolCodec struct{} + +func (BoolCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (BoolCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (BoolCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case bool: + return encodePlanBoolCodecBinaryBool{} + case BoolValuer: + return encodePlanBoolCodecBinaryBoolValuer{} + } + case TextFormatCode: + switch value.(type) { + case bool: + return encodePlanBoolCodecTextBool{} + case BoolValuer: + return encodePlanBoolCodecTextBoolValuer{} + } + } + + return nil +} + +type encodePlanBoolCodecBinaryBool struct{} + +func (encodePlanBoolCodecBinaryBool) Encode(value any, buf []byte) (newBuf []byte, err error) { + v := value.(bool) + + if v { + buf = append(buf, 1) + } else { + buf = append(buf, 0) + } + + return buf, nil +} + +type encodePlanBoolCodecTextBoolValuer struct{} + +func (encodePlanBoolCodecTextBoolValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + b, err := value.(BoolValuer).BoolValue() + if err != nil { + return nil, err + } + + if !b.Valid { + return nil, nil + } + + if b.Bool { + buf = append(buf, 't') + } else { + buf = append(buf, 'f') + } + + return buf, nil +} + +type encodePlanBoolCodecBinaryBoolValuer struct{} + +func (encodePlanBoolCodecBinaryBoolValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + b, err := value.(BoolValuer).BoolValue() + if err != nil { + return nil, err + } + + if !b.Valid { + return nil, nil + } + + if b.Bool { + buf = append(buf, 1) + } else { + buf = append(buf, 0) + } + + return buf, nil +} + +type encodePlanBoolCodecTextBool struct{} + +func (encodePlanBoolCodecTextBool) Encode(value any, buf []byte) (newBuf []byte, err error) { + v := value.(bool) + + if v { + buf = append(buf, 't') + } else { + buf = append(buf, 'f') + } + + return buf, nil +} + +func (BoolCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *bool: + return scanPlanBinaryBoolToBool{} + case BoolScanner: + return scanPlanBinaryBoolToBoolScanner{} + } + case TextFormatCode: + switch target.(type) { + case *bool: + return scanPlanTextAnyToBool{} + case BoolScanner: + return scanPlanTextAnyToBoolScanner{} + } + } + + return nil +} + +func (c BoolCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return c.DecodeValue(m, oid, format, src) +} + +func (c BoolCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var b bool + err := codecScan(c, m, oid, format, src, &b) + if err != nil { + return nil, err + } + return b, nil +} + +type scanPlanBinaryBoolToBool struct{} + +func (scanPlanBinaryBoolToBool) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 1 { + return fmt.Errorf("invalid length for bool: %v", len(src)) + } + + p, ok := (dst).(*bool) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = src[0] == 1 + + return nil +} + +type scanPlanTextAnyToBool struct{} + +func (scanPlanTextAnyToBool) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) == 0 { + return fmt.Errorf("cannot scan empty string into %T", dst) + } + + p, ok := (dst).(*bool) + if !ok { + return ErrScanTargetTypeChanged + } + + v, err := planTextToBool(src) + if err != nil { + return err + } + + *p = v + + return nil +} + +type scanPlanBinaryBoolToBoolScanner struct{} + +func (scanPlanBinaryBoolToBoolScanner) Scan(src []byte, dst any) error { + s, ok := (dst).(BoolScanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanBool(Bool{}) + } + + if len(src) != 1 { + return fmt.Errorf("invalid length for bool: %v", len(src)) + } + + return s.ScanBool(Bool{Bool: src[0] == 1, Valid: true}) +} + +type scanPlanTextAnyToBoolScanner struct{} + +func (scanPlanTextAnyToBoolScanner) Scan(src []byte, dst any) error { + s, ok := (dst).(BoolScanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanBool(Bool{}) + } + + if len(src) == 0 { + return fmt.Errorf("cannot scan empty string into %T", dst) + } + + v, err := planTextToBool(src) + if err != nil { + return err + } + + return s.ScanBool(Bool{Bool: v, Valid: true}) +} + +// https://www.postgresql.org/docs/11/datatype-boolean.html +func planTextToBool(src []byte) (bool, error) { + s := string(bytes.ToLower(bytes.TrimSpace(src))) + + switch { + case strings.HasPrefix("true", s), strings.HasPrefix("yes", s), s == "on", s == "1": + return true, nil + case strings.HasPrefix("false", s), strings.HasPrefix("no", s), strings.HasPrefix("off", s), s == "0": + return false, nil + default: + return false, fmt.Errorf("unknown boolean string representation %q", src) + } +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/box.go b/vendor/github.com/jackc/pgx/v5/pgtype/box.go new file mode 100644 index 00000000..887d268b --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/box.go @@ -0,0 +1,238 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type BoxScanner interface { + ScanBox(v Box) error +} + +type BoxValuer interface { + BoxValue() (Box, error) +} + +type Box struct { + P [2]Vec2 + Valid bool +} + +func (b *Box) ScanBox(v Box) error { + *b = v + return nil +} + +func (b Box) BoxValue() (Box, error) { + return b, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Box) Scan(src any) error { + if src == nil { + *dst = Box{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToBoxScanner{}.Scan([]byte(src), dst) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src Box) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + + buf, err := BoxCodec{}.PlanEncode(nil, 0, TextFormatCode, src).Encode(src, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type BoxCodec struct{} + +func (BoxCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (BoxCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (BoxCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(BoxValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanBoxCodecBinary{} + case TextFormatCode: + return encodePlanBoxCodecText{} + } + + return nil +} + +type encodePlanBoxCodecBinary struct{} + +func (encodePlanBoxCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + box, err := value.(BoxValuer).BoxValue() + if err != nil { + return nil, err + } + + if !box.Valid { + return nil, nil + } + + buf = pgio.AppendUint64(buf, math.Float64bits(box.P[0].X)) + buf = pgio.AppendUint64(buf, math.Float64bits(box.P[0].Y)) + buf = pgio.AppendUint64(buf, math.Float64bits(box.P[1].X)) + buf = pgio.AppendUint64(buf, math.Float64bits(box.P[1].Y)) + return buf, nil +} + +type encodePlanBoxCodecText struct{} + +func (encodePlanBoxCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + box, err := value.(BoxValuer).BoxValue() + if err != nil { + return nil, err + } + + if !box.Valid { + return nil, nil + } + + buf = append(buf, fmt.Sprintf(`(%s,%s),(%s,%s)`, + strconv.FormatFloat(box.P[0].X, 'f', -1, 64), + strconv.FormatFloat(box.P[0].Y, 'f', -1, 64), + strconv.FormatFloat(box.P[1].X, 'f', -1, 64), + strconv.FormatFloat(box.P[1].Y, 'f', -1, 64), + )...) + return buf, nil +} + +func (BoxCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case BoxScanner: + return scanPlanBinaryBoxToBoxScanner{} + } + case TextFormatCode: + switch target.(type) { + case BoxScanner: + return scanPlanTextAnyToBoxScanner{} + } + } + + return nil +} + +type scanPlanBinaryBoxToBoxScanner struct{} + +func (scanPlanBinaryBoxToBoxScanner) Scan(src []byte, dst any) error { + scanner := (dst).(BoxScanner) + + if src == nil { + return scanner.ScanBox(Box{}) + } + + if len(src) != 32 { + return fmt.Errorf("invalid length for Box: %v", len(src)) + } + + x1 := binary.BigEndian.Uint64(src) + y1 := binary.BigEndian.Uint64(src[8:]) + x2 := binary.BigEndian.Uint64(src[16:]) + y2 := binary.BigEndian.Uint64(src[24:]) + + return scanner.ScanBox(Box{ + P: [2]Vec2{ + {math.Float64frombits(x1), math.Float64frombits(y1)}, + {math.Float64frombits(x2), math.Float64frombits(y2)}, + }, + Valid: true, + }) +} + +type scanPlanTextAnyToBoxScanner struct{} + +func (scanPlanTextAnyToBoxScanner) Scan(src []byte, dst any) error { + scanner := (dst).(BoxScanner) + + if src == nil { + return scanner.ScanBox(Box{}) + } + + if len(src) < 11 { + return fmt.Errorf("invalid length for Box: %v", len(src)) + } + + str := string(src[1:]) + + var end int + end = strings.IndexByte(str, ',') + + x1, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+1:] + end = strings.IndexByte(str, ')') + + y1, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+3:] + end = strings.IndexByte(str, ',') + + x2, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+1 : len(str)-1] + + y2, err := strconv.ParseFloat(str, 64) + if err != nil { + return err + } + + return scanner.ScanBox(Box{P: [2]Vec2{{x1, y1}, {x2, y2}}, Valid: true}) +} + +func (c BoxCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c BoxCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var box Box + err := codecScan(c, m, oid, format, src, &box) + if err != nil { + return nil, err + } + return box, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/builtin_wrappers.go b/vendor/github.com/jackc/pgx/v5/pgtype/builtin_wrappers.go new file mode 100644 index 00000000..b39d3fa1 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/builtin_wrappers.go @@ -0,0 +1,952 @@ +package pgtype + +import ( + "errors" + "fmt" + "math" + "math/big" + "net" + "net/netip" + "reflect" + "time" +) + +type int8Wrapper int8 + +func (w int8Wrapper) SkipUnderlyingTypePlan() {} + +func (w *int8Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *int8") + } + + if v.Int64 < math.MinInt8 { + return fmt.Errorf("%d is less than minimum value for int8", v.Int64) + } + if v.Int64 > math.MaxInt8 { + return fmt.Errorf("%d is greater than maximum value for int8", v.Int64) + } + *w = int8Wrapper(v.Int64) + + return nil +} + +func (w int8Wrapper) Int64Value() (Int8, error) { + return Int8{Int64: int64(w), Valid: true}, nil +} + +type int16Wrapper int16 + +func (w int16Wrapper) SkipUnderlyingTypePlan() {} + +func (w *int16Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *int16") + } + + if v.Int64 < math.MinInt16 { + return fmt.Errorf("%d is less than minimum value for int16", v.Int64) + } + if v.Int64 > math.MaxInt16 { + return fmt.Errorf("%d is greater than maximum value for int16", v.Int64) + } + *w = int16Wrapper(v.Int64) + + return nil +} + +func (w int16Wrapper) Int64Value() (Int8, error) { + return Int8{Int64: int64(w), Valid: true}, nil +} + +type int32Wrapper int32 + +func (w int32Wrapper) SkipUnderlyingTypePlan() {} + +func (w *int32Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *int32") + } + + if v.Int64 < math.MinInt32 { + return fmt.Errorf("%d is less than minimum value for int32", v.Int64) + } + if v.Int64 > math.MaxInt32 { + return fmt.Errorf("%d is greater than maximum value for int32", v.Int64) + } + *w = int32Wrapper(v.Int64) + + return nil +} + +func (w int32Wrapper) Int64Value() (Int8, error) { + return Int8{Int64: int64(w), Valid: true}, nil +} + +type int64Wrapper int64 + +func (w int64Wrapper) SkipUnderlyingTypePlan() {} + +func (w *int64Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *int64") + } + + *w = int64Wrapper(v.Int64) + + return nil +} + +func (w int64Wrapper) Int64Value() (Int8, error) { + return Int8{Int64: int64(w), Valid: true}, nil +} + +type intWrapper int + +func (w intWrapper) SkipUnderlyingTypePlan() {} + +func (w *intWrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *int") + } + + if v.Int64 < math.MinInt { + return fmt.Errorf("%d is less than minimum value for int", v.Int64) + } + if v.Int64 > math.MaxInt { + return fmt.Errorf("%d is greater than maximum value for int", v.Int64) + } + + *w = intWrapper(v.Int64) + + return nil +} + +func (w intWrapper) Int64Value() (Int8, error) { + return Int8{Int64: int64(w), Valid: true}, nil +} + +type uint8Wrapper uint8 + +func (w uint8Wrapper) SkipUnderlyingTypePlan() {} + +func (w *uint8Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *uint8") + } + + if v.Int64 < 0 { + return fmt.Errorf("%d is less than minimum value for uint8", v.Int64) + } + if v.Int64 > math.MaxUint8 { + return fmt.Errorf("%d is greater than maximum value for uint8", v.Int64) + } + *w = uint8Wrapper(v.Int64) + + return nil +} + +func (w uint8Wrapper) Int64Value() (Int8, error) { + return Int8{Int64: int64(w), Valid: true}, nil +} + +type uint16Wrapper uint16 + +func (w uint16Wrapper) SkipUnderlyingTypePlan() {} + +func (w *uint16Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *uint16") + } + + if v.Int64 < 0 { + return fmt.Errorf("%d is less than minimum value for uint16", v.Int64) + } + if v.Int64 > math.MaxUint16 { + return fmt.Errorf("%d is greater than maximum value for uint16", v.Int64) + } + *w = uint16Wrapper(v.Int64) + + return nil +} + +func (w uint16Wrapper) Int64Value() (Int8, error) { + return Int8{Int64: int64(w), Valid: true}, nil +} + +type uint32Wrapper uint32 + +func (w uint32Wrapper) SkipUnderlyingTypePlan() {} + +func (w *uint32Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *uint32") + } + + if v.Int64 < 0 { + return fmt.Errorf("%d is less than minimum value for uint32", v.Int64) + } + if v.Int64 > math.MaxUint32 { + return fmt.Errorf("%d is greater than maximum value for uint32", v.Int64) + } + *w = uint32Wrapper(v.Int64) + + return nil +} + +func (w uint32Wrapper) Int64Value() (Int8, error) { + return Int8{Int64: int64(w), Valid: true}, nil +} + +type uint64Wrapper uint64 + +func (w uint64Wrapper) SkipUnderlyingTypePlan() {} + +func (w *uint64Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *uint64") + } + + if v.Int64 < 0 { + return fmt.Errorf("%d is less than minimum value for uint64", v.Int64) + } + + *w = uint64Wrapper(v.Int64) + + return nil +} + +func (w uint64Wrapper) Int64Value() (Int8, error) { + if uint64(w) > uint64(math.MaxInt64) { + return Int8{}, fmt.Errorf("%d is greater than maximum value for int64", w) + } + + return Int8{Int64: int64(w), Valid: true}, nil +} + +func (w *uint64Wrapper) ScanNumeric(v Numeric) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *uint64") + } + + bi, err := v.toBigInt() + if err != nil { + return fmt.Errorf("cannot scan into *uint64: %w", err) + } + + if !bi.IsUint64() { + return fmt.Errorf("cannot scan %v into *uint64", bi.String()) + } + + *w = uint64Wrapper(bi.Uint64()) + + return nil +} + +func (w uint64Wrapper) NumericValue() (Numeric, error) { + return Numeric{Int: new(big.Int).SetUint64(uint64(w)), Valid: true}, nil +} + +type uintWrapper uint + +func (w uintWrapper) SkipUnderlyingTypePlan() {} + +func (w *uintWrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *uint64") + } + + if v.Int64 < 0 { + return fmt.Errorf("%d is less than minimum value for uint64", v.Int64) + } + + if uint64(v.Int64) > math.MaxUint { + return fmt.Errorf("%d is greater than maximum value for uint", v.Int64) + } + + *w = uintWrapper(v.Int64) + + return nil +} + +func (w uintWrapper) Int64Value() (Int8, error) { + if uint64(w) > uint64(math.MaxInt64) { + return Int8{}, fmt.Errorf("%d is greater than maximum value for int64", w) + } + + return Int8{Int64: int64(w), Valid: true}, nil +} + +func (w *uintWrapper) ScanNumeric(v Numeric) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *uint") + } + + bi, err := v.toBigInt() + if err != nil { + return fmt.Errorf("cannot scan into *uint: %w", err) + } + + if !bi.IsUint64() { + return fmt.Errorf("cannot scan %v into *uint", bi.String()) + } + + ui := bi.Uint64() + + if math.MaxUint < ui { + return fmt.Errorf("cannot scan %v into *uint", ui) + } + + *w = uintWrapper(ui) + + return nil +} + +func (w uintWrapper) NumericValue() (Numeric, error) { + return Numeric{Int: new(big.Int).SetUint64(uint64(w)), Valid: true}, nil +} + +type float32Wrapper float32 + +func (w float32Wrapper) SkipUnderlyingTypePlan() {} + +func (w *float32Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *float32") + } + + *w = float32Wrapper(v.Int64) + + return nil +} + +func (w float32Wrapper) Int64Value() (Int8, error) { + if w > math.MaxInt64 { + return Int8{}, fmt.Errorf("%f is greater than maximum value for int64", w) + } + + return Int8{Int64: int64(w), Valid: true}, nil +} + +func (w *float32Wrapper) ScanFloat64(v Float8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *float32") + } + + *w = float32Wrapper(v.Float64) + + return nil +} + +func (w float32Wrapper) Float64Value() (Float8, error) { + return Float8{Float64: float64(w), Valid: true}, nil +} + +type float64Wrapper float64 + +func (w float64Wrapper) SkipUnderlyingTypePlan() {} + +func (w *float64Wrapper) ScanInt64(v Int8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *float64") + } + + *w = float64Wrapper(v.Int64) + + return nil +} + +func (w float64Wrapper) Int64Value() (Int8, error) { + if w > math.MaxInt64 { + return Int8{}, fmt.Errorf("%f is greater than maximum value for int64", w) + } + + return Int8{Int64: int64(w), Valid: true}, nil +} + +func (w *float64Wrapper) ScanFloat64(v Float8) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *float64") + } + + *w = float64Wrapper(v.Float64) + + return nil +} + +func (w float64Wrapper) Float64Value() (Float8, error) { + return Float8{Float64: float64(w), Valid: true}, nil +} + +type stringWrapper string + +func (w stringWrapper) SkipUnderlyingTypePlan() {} + +func (w *stringWrapper) ScanText(v Text) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *string") + } + + *w = stringWrapper(v.String) + return nil +} + +func (w stringWrapper) TextValue() (Text, error) { + return Text{String: string(w), Valid: true}, nil +} + +type timeWrapper time.Time + +func (w *timeWrapper) ScanDate(v Date) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *time.Time") + } + + switch v.InfinityModifier { + case Finite: + *w = timeWrapper(v.Time) + return nil + case Infinity: + return fmt.Errorf("cannot scan Infinity into *time.Time") + case NegativeInfinity: + return fmt.Errorf("cannot scan -Infinity into *time.Time") + default: + return fmt.Errorf("invalid InfinityModifier: %v", v.InfinityModifier) + } +} + +func (w timeWrapper) DateValue() (Date, error) { + return Date{Time: time.Time(w), Valid: true}, nil +} + +func (w *timeWrapper) ScanTimestamp(v Timestamp) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *time.Time") + } + + switch v.InfinityModifier { + case Finite: + *w = timeWrapper(v.Time) + return nil + case Infinity: + return fmt.Errorf("cannot scan Infinity into *time.Time") + case NegativeInfinity: + return fmt.Errorf("cannot scan -Infinity into *time.Time") + default: + return fmt.Errorf("invalid InfinityModifier: %v", v.InfinityModifier) + } +} + +func (w timeWrapper) TimestampValue() (Timestamp, error) { + return Timestamp{Time: time.Time(w), Valid: true}, nil +} + +func (w *timeWrapper) ScanTimestamptz(v Timestamptz) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *time.Time") + } + + switch v.InfinityModifier { + case Finite: + *w = timeWrapper(v.Time) + return nil + case Infinity: + return fmt.Errorf("cannot scan Infinity into *time.Time") + case NegativeInfinity: + return fmt.Errorf("cannot scan -Infinity into *time.Time") + default: + return fmt.Errorf("invalid InfinityModifier: %v", v.InfinityModifier) + } +} + +func (w timeWrapper) TimestamptzValue() (Timestamptz, error) { + return Timestamptz{Time: time.Time(w), Valid: true}, nil +} + +func (w *timeWrapper) ScanTime(v Time) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *time.Time") + } + + // 24:00:00 is max allowed time in PostgreSQL, but time.Time will normalize that to 00:00:00 the next day. + var maxRepresentableByTime int64 = 24*60*60*1000000 - 1 + if v.Microseconds > maxRepresentableByTime { + return fmt.Errorf("%d microseconds cannot be represented as time.Time", v.Microseconds) + } + + usec := v.Microseconds + hours := usec / microsecondsPerHour + usec -= hours * microsecondsPerHour + minutes := usec / microsecondsPerMinute + usec -= minutes * microsecondsPerMinute + seconds := usec / microsecondsPerSecond + usec -= seconds * microsecondsPerSecond + ns := usec * 1000 + *w = timeWrapper(time.Date(2000, 1, 1, int(hours), int(minutes), int(seconds), int(ns), time.UTC)) + return nil +} + +func (w timeWrapper) TimeValue() (Time, error) { + t := time.Time(w) + usec := int64(t.Hour())*microsecondsPerHour + + int64(t.Minute())*microsecondsPerMinute + + int64(t.Second())*microsecondsPerSecond + + int64(t.Nanosecond())/1000 + return Time{Microseconds: usec, Valid: true}, nil +} + +type durationWrapper time.Duration + +func (w durationWrapper) SkipUnderlyingTypePlan() {} + +func (w *durationWrapper) ScanInterval(v Interval) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *time.Interval") + } + + us := int64(v.Months)*microsecondsPerMonth + int64(v.Days)*microsecondsPerDay + v.Microseconds + *w = durationWrapper(time.Duration(us) * time.Microsecond) + return nil +} + +func (w durationWrapper) IntervalValue() (Interval, error) { + return Interval{Microseconds: int64(w) / 1000, Valid: true}, nil +} + +type netIPNetWrapper net.IPNet + +func (w *netIPNetWrapper) ScanNetipPrefix(v netip.Prefix) error { + if !v.IsValid() { + return fmt.Errorf("cannot scan NULL into *net.IPNet") + } + + *w = netIPNetWrapper{ + IP: v.Addr().AsSlice(), + Mask: net.CIDRMask(v.Bits(), v.Addr().BitLen()), + } + + return nil +} +func (w netIPNetWrapper) NetipPrefixValue() (netip.Prefix, error) { + ip, ok := netip.AddrFromSlice(w.IP) + if !ok { + return netip.Prefix{}, errors.New("invalid net.IPNet") + } + + ones, _ := w.Mask.Size() + + return netip.PrefixFrom(ip, ones), nil +} + +type netIPWrapper net.IP + +func (w netIPWrapper) SkipUnderlyingTypePlan() {} + +func (w *netIPWrapper) ScanNetipPrefix(v netip.Prefix) error { + if !v.IsValid() { + *w = nil + return nil + } + + if v.Addr().BitLen() != v.Bits() { + return fmt.Errorf("cannot scan %v to *net.IP", v) + } + + *w = netIPWrapper(v.Addr().AsSlice()) + return nil +} + +func (w netIPWrapper) NetipPrefixValue() (netip.Prefix, error) { + if w == nil { + return netip.Prefix{}, nil + } + + addr, ok := netip.AddrFromSlice([]byte(w)) + if !ok { + return netip.Prefix{}, errors.New("invalid net.IP") + } + + return netip.PrefixFrom(addr, addr.BitLen()), nil +} + +type netipPrefixWrapper netip.Prefix + +func (w *netipPrefixWrapper) ScanNetipPrefix(v netip.Prefix) error { + *w = netipPrefixWrapper(v) + return nil +} + +func (w netipPrefixWrapper) NetipPrefixValue() (netip.Prefix, error) { + return netip.Prefix(w), nil +} + +type netipAddrWrapper netip.Addr + +func (w *netipAddrWrapper) ScanNetipPrefix(v netip.Prefix) error { + if !v.IsValid() { + *w = netipAddrWrapper(netip.Addr{}) + return nil + } + + if v.Addr().BitLen() != v.Bits() { + return fmt.Errorf("cannot scan %v to netip.Addr", v) + } + + *w = netipAddrWrapper(v.Addr()) + + return nil +} + +func (w netipAddrWrapper) NetipPrefixValue() (netip.Prefix, error) { + addr := (netip.Addr)(w) + if !addr.IsValid() { + return netip.Prefix{}, nil + } + + return netip.PrefixFrom(addr, addr.BitLen()), nil +} + +type mapStringToPointerStringWrapper map[string]*string + +func (w *mapStringToPointerStringWrapper) ScanHstore(v Hstore) error { + *w = mapStringToPointerStringWrapper(v) + return nil +} + +func (w mapStringToPointerStringWrapper) HstoreValue() (Hstore, error) { + return Hstore(w), nil +} + +type mapStringToStringWrapper map[string]string + +func (w *mapStringToStringWrapper) ScanHstore(v Hstore) error { + *w = make(mapStringToStringWrapper, len(v)) + for k, v := range v { + if v == nil { + return fmt.Errorf("cannot scan NULL to string") + } + (*w)[k] = *v + } + return nil +} + +func (w mapStringToStringWrapper) HstoreValue() (Hstore, error) { + if w == nil { + return nil, nil + } + + hstore := make(Hstore, len(w)) + for k, v := range w { + s := v + hstore[k] = &s + } + return hstore, nil +} + +type fmtStringerWrapper struct { + s fmt.Stringer +} + +func (w fmtStringerWrapper) TextValue() (Text, error) { + return Text{String: w.s.String(), Valid: true}, nil +} + +type byte16Wrapper [16]byte + +func (w *byte16Wrapper) ScanUUID(v UUID) error { + if !v.Valid { + return fmt.Errorf("cannot scan NULL into *[16]byte") + } + *w = byte16Wrapper(v.Bytes) + return nil +} + +func (w byte16Wrapper) UUIDValue() (UUID, error) { + return UUID{Bytes: [16]byte(w), Valid: true}, nil +} + +type byteSliceWrapper []byte + +func (w byteSliceWrapper) SkipUnderlyingTypePlan() {} + +func (w *byteSliceWrapper) ScanText(v Text) error { + if !v.Valid { + *w = nil + return nil + } + + *w = byteSliceWrapper(v.String) + return nil +} + +func (w byteSliceWrapper) TextValue() (Text, error) { + if w == nil { + return Text{}, nil + } + + return Text{String: string(w), Valid: true}, nil +} + +func (w *byteSliceWrapper) ScanUUID(v UUID) error { + if !v.Valid { + *w = nil + return nil + } + *w = make(byteSliceWrapper, 16) + copy(*w, v.Bytes[:]) + return nil +} + +func (w byteSliceWrapper) UUIDValue() (UUID, error) { + if w == nil { + return UUID{}, nil + } + + uuid := UUID{Valid: true} + copy(uuid.Bytes[:], w) + return uuid, nil +} + +// structWrapper implements CompositeIndexGetter for a struct. +type structWrapper struct { + s any + exportedFields []reflect.Value +} + +func (w structWrapper) IsNull() bool { + return w.s == nil +} + +func (w structWrapper) Index(i int) any { + if i >= len(w.exportedFields) { + return fmt.Errorf("%#v only has %d public fields - %d is out of bounds", w.s, len(w.exportedFields), i) + } + + return w.exportedFields[i].Interface() +} + +// ptrStructWrapper implements CompositeIndexScanner for a pointer to a struct. +type ptrStructWrapper struct { + s any + exportedFields []reflect.Value +} + +func (w *ptrStructWrapper) ScanNull() error { + return fmt.Errorf("cannot scan NULL into %#v", w.s) +} + +func (w *ptrStructWrapper) ScanIndex(i int) any { + if i >= len(w.exportedFields) { + return fmt.Errorf("%#v only has %d public fields - %d is out of bounds", w.s, len(w.exportedFields), i) + } + + return w.exportedFields[i].Addr().Interface() +} + +type anySliceArrayReflect struct { + slice reflect.Value +} + +func (a anySliceArrayReflect) Dimensions() []ArrayDimension { + if a.slice.IsNil() { + return nil + } + + return []ArrayDimension{{Length: int32(a.slice.Len()), LowerBound: 1}} +} + +func (a anySliceArrayReflect) Index(i int) any { + return a.slice.Index(i).Interface() +} + +func (a anySliceArrayReflect) IndexType() any { + return reflect.New(a.slice.Type().Elem()).Elem().Interface() +} + +func (a *anySliceArrayReflect) SetDimensions(dimensions []ArrayDimension) error { + sliceType := a.slice.Type() + + if dimensions == nil { + a.slice.Set(reflect.Zero(sliceType)) + return nil + } + + elementCount := cardinality(dimensions) + slice := reflect.MakeSlice(sliceType, elementCount, elementCount) + a.slice.Set(slice) + return nil +} + +func (a *anySliceArrayReflect) ScanIndex(i int) any { + return a.slice.Index(i).Addr().Interface() +} + +func (a *anySliceArrayReflect) ScanIndexType() any { + return reflect.New(a.slice.Type().Elem()).Interface() +} + +type anyMultiDimSliceArray struct { + slice reflect.Value + dims []ArrayDimension +} + +func (a *anyMultiDimSliceArray) Dimensions() []ArrayDimension { + if a.slice.IsNil() { + return nil + } + + s := a.slice + for { + a.dims = append(a.dims, ArrayDimension{Length: int32(s.Len()), LowerBound: 1}) + if s.Len() > 0 { + s = s.Index(0) + } else { + break + } + if s.Type().Kind() == reflect.Slice { + } else { + break + } + } + + return a.dims +} + +func (a *anyMultiDimSliceArray) Index(i int) any { + if len(a.dims) == 1 { + return a.slice.Index(i).Interface() + } + + indexes := make([]int, len(a.dims)) + for j := len(a.dims) - 1; j >= 0; j-- { + dimLen := int(a.dims[j].Length) + indexes[j] = i % dimLen + i = i / dimLen + } + + v := a.slice + for _, si := range indexes { + v = v.Index(si) + } + + return v.Interface() +} + +func (a *anyMultiDimSliceArray) IndexType() any { + lowestSliceType := a.slice.Type() + for ; lowestSliceType.Elem().Kind() == reflect.Slice; lowestSliceType = lowestSliceType.Elem() { + } + return reflect.New(lowestSliceType.Elem()).Elem().Interface() +} + +func (a *anyMultiDimSliceArray) SetDimensions(dimensions []ArrayDimension) error { + sliceType := a.slice.Type() + + if dimensions == nil { + a.slice.Set(reflect.Zero(sliceType)) + return nil + } + + switch len(dimensions) { + case 0: + // Empty, but non-nil array + slice := reflect.MakeSlice(sliceType, 0, 0) + a.slice.Set(slice) + return nil + case 1: + elementCount := cardinality(dimensions) + slice := reflect.MakeSlice(sliceType, elementCount, elementCount) + a.slice.Set(slice) + return nil + default: + sliceDimensionCount := 1 + lowestSliceType := sliceType + for ; lowestSliceType.Elem().Kind() == reflect.Slice; lowestSliceType = lowestSliceType.Elem() { + sliceDimensionCount++ + } + + if sliceDimensionCount != len(dimensions) { + return fmt.Errorf("PostgreSQL array has %d dimensions but slice has %d dimensions", len(dimensions), sliceDimensionCount) + } + + elementCount := cardinality(dimensions) + flatSlice := reflect.MakeSlice(lowestSliceType, elementCount, elementCount) + + multiDimSlice := a.makeMultidimensionalSlice(sliceType, dimensions, flatSlice, 0) + a.slice.Set(multiDimSlice) + + // Now that a.slice is a multi-dimensional slice with the underlying data pointed at flatSlice change a.slice to + // flatSlice so ScanIndex only has to handle simple one dimensional slices. + a.slice = flatSlice + + return nil + } + +} + +func (a *anyMultiDimSliceArray) makeMultidimensionalSlice(sliceType reflect.Type, dimensions []ArrayDimension, flatSlice reflect.Value, flatSliceIdx int) reflect.Value { + if len(dimensions) == 1 { + endIdx := flatSliceIdx + int(dimensions[0].Length) + return flatSlice.Slice3(flatSliceIdx, endIdx, endIdx) + } + + sliceLen := int(dimensions[0].Length) + slice := reflect.MakeSlice(sliceType, sliceLen, sliceLen) + for i := 0; i < sliceLen; i++ { + subSlice := a.makeMultidimensionalSlice(sliceType.Elem(), dimensions[1:], flatSlice, flatSliceIdx+(i*int(dimensions[1].Length))) + slice.Index(i).Set(subSlice) + } + + return slice +} + +func (a *anyMultiDimSliceArray) ScanIndex(i int) any { + return a.slice.Index(i).Addr().Interface() +} + +func (a *anyMultiDimSliceArray) ScanIndexType() any { + lowestSliceType := a.slice.Type() + for ; lowestSliceType.Elem().Kind() == reflect.Slice; lowestSliceType = lowestSliceType.Elem() { + } + return reflect.New(lowestSliceType.Elem()).Interface() +} + +type anyArrayArrayReflect struct { + array reflect.Value +} + +func (a anyArrayArrayReflect) Dimensions() []ArrayDimension { + return []ArrayDimension{{Length: int32(a.array.Len()), LowerBound: 1}} +} + +func (a anyArrayArrayReflect) Index(i int) any { + return a.array.Index(i).Interface() +} + +func (a anyArrayArrayReflect) IndexType() any { + return reflect.New(a.array.Type().Elem()).Elem().Interface() +} + +func (a *anyArrayArrayReflect) SetDimensions(dimensions []ArrayDimension) error { + if dimensions == nil { + return fmt.Errorf("anyArrayArrayReflect: cannot scan NULL into %v", a.array.Type().String()) + } + + if len(dimensions) != 1 { + return fmt.Errorf("anyArrayArrayReflect: cannot scan multi-dimensional array into %v", a.array.Type().String()) + } + + if int(dimensions[0].Length) != a.array.Len() { + return fmt.Errorf("anyArrayArrayReflect: cannot scan array with length %v into %v", dimensions[0].Length, a.array.Type().String()) + } + + return nil +} + +func (a *anyArrayArrayReflect) ScanIndex(i int) any { + return a.array.Index(i).Addr().Interface() +} + +func (a *anyArrayArrayReflect) ScanIndexType() any { + return reflect.New(a.array.Type().Elem()).Interface() +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/bytea.go b/vendor/github.com/jackc/pgx/v5/pgtype/bytea.go new file mode 100644 index 00000000..a247705e --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/bytea.go @@ -0,0 +1,255 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/hex" + "fmt" +) + +type BytesScanner interface { + // ScanBytes receives a byte slice of driver memory that is only valid until the next database method call. + ScanBytes(v []byte) error +} + +type BytesValuer interface { + // BytesValue returns a byte slice of the byte data. The caller must not change the returned slice. + BytesValue() ([]byte, error) +} + +// DriverBytes is a byte slice that holds a reference to memory owned by the driver. It is only valid from the time it +// is scanned until Rows.Next or Rows.Close is called. It is never safe to use DriverBytes with QueryRow as Row.Scan +// internally calls Rows.Close before returning. +type DriverBytes []byte + +func (b *DriverBytes) ScanBytes(v []byte) error { + *b = v + return nil +} + +// PreallocBytes is a byte slice of preallocated memory that scanned bytes will be copied to. If it is too small a new +// slice will be allocated. +type PreallocBytes []byte + +func (b *PreallocBytes) ScanBytes(v []byte) error { + if v == nil { + *b = nil + return nil + } + + if len(v) <= len(*b) { + *b = (*b)[:len(v)] + } else { + *b = make(PreallocBytes, len(v)) + } + copy(*b, v) + return nil +} + +// UndecodedBytes can be used as a scan target to get the raw bytes from PostgreSQL without any decoding. +type UndecodedBytes []byte + +type scanPlanAnyToUndecodedBytes struct{} + +func (scanPlanAnyToUndecodedBytes) Scan(src []byte, dst any) error { + dstBuf := dst.(*UndecodedBytes) + if src == nil { + *dstBuf = nil + return nil + } + + *dstBuf = make([]byte, len(src)) + copy(*dstBuf, src) + return nil +} + +type ByteaCodec struct{} + +func (ByteaCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (ByteaCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (ByteaCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case []byte: + return encodePlanBytesCodecBinaryBytes{} + case BytesValuer: + return encodePlanBytesCodecBinaryBytesValuer{} + } + case TextFormatCode: + switch value.(type) { + case []byte: + return encodePlanBytesCodecTextBytes{} + case BytesValuer: + return encodePlanBytesCodecTextBytesValuer{} + } + } + + return nil +} + +type encodePlanBytesCodecBinaryBytes struct{} + +func (encodePlanBytesCodecBinaryBytes) Encode(value any, buf []byte) (newBuf []byte, err error) { + b := value.([]byte) + if b == nil { + return nil, nil + } + + return append(buf, b...), nil +} + +type encodePlanBytesCodecBinaryBytesValuer struct{} + +func (encodePlanBytesCodecBinaryBytesValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + b, err := value.(BytesValuer).BytesValue() + if err != nil { + return nil, err + } + if b == nil { + return nil, nil + } + + return append(buf, b...), nil +} + +type encodePlanBytesCodecTextBytes struct{} + +func (encodePlanBytesCodecTextBytes) Encode(value any, buf []byte) (newBuf []byte, err error) { + b := value.([]byte) + if b == nil { + return nil, nil + } + + buf = append(buf, `\x`...) + buf = append(buf, hex.EncodeToString(b)...) + return buf, nil +} + +type encodePlanBytesCodecTextBytesValuer struct{} + +func (encodePlanBytesCodecTextBytesValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + b, err := value.(BytesValuer).BytesValue() + if err != nil { + return nil, err + } + if b == nil { + return nil, nil + } + + buf = append(buf, `\x`...) + buf = append(buf, hex.EncodeToString(b)...) + return buf, nil +} + +func (ByteaCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *[]byte: + return scanPlanBinaryBytesToBytes{} + case BytesScanner: + return scanPlanBinaryBytesToBytesScanner{} + } + case TextFormatCode: + switch target.(type) { + case *[]byte: + return scanPlanTextByteaToBytes{} + case BytesScanner: + return scanPlanTextByteaToBytesScanner{} + } + } + + return nil +} + +type scanPlanBinaryBytesToBytes struct{} + +func (scanPlanBinaryBytesToBytes) Scan(src []byte, dst any) error { + dstBuf := dst.(*[]byte) + if src == nil { + *dstBuf = nil + return nil + } + + *dstBuf = make([]byte, len(src)) + copy(*dstBuf, src) + return nil +} + +type scanPlanBinaryBytesToBytesScanner struct{} + +func (scanPlanBinaryBytesToBytesScanner) Scan(src []byte, dst any) error { + scanner := (dst).(BytesScanner) + return scanner.ScanBytes(src) +} + +type scanPlanTextByteaToBytes struct{} + +func (scanPlanTextByteaToBytes) Scan(src []byte, dst any) error { + dstBuf := dst.(*[]byte) + if src == nil { + *dstBuf = nil + return nil + } + + buf, err := decodeHexBytea(src) + if err != nil { + return err + } + *dstBuf = buf + + return nil +} + +type scanPlanTextByteaToBytesScanner struct{} + +func (scanPlanTextByteaToBytesScanner) Scan(src []byte, dst any) error { + scanner := (dst).(BytesScanner) + buf, err := decodeHexBytea(src) + if err != nil { + return err + } + return scanner.ScanBytes(buf) +} + +func decodeHexBytea(src []byte) ([]byte, error) { + if src == nil { + return nil, nil + } + + if len(src) < 2 || src[0] != '\\' || src[1] != 'x' { + return nil, fmt.Errorf("invalid hex format") + } + + buf := make([]byte, (len(src)-2)/2) + _, err := hex.Decode(buf, src[2:]) + if err != nil { + return nil, err + } + + return buf, nil +} + +func (c ByteaCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return c.DecodeValue(m, oid, format, src) +} + +func (c ByteaCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var buf []byte + err := codecScan(c, m, oid, format, src, &buf) + if err != nil { + return nil, err + } + return buf, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/circle.go b/vendor/github.com/jackc/pgx/v5/pgtype/circle.go new file mode 100644 index 00000000..e8f118cc --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/circle.go @@ -0,0 +1,222 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type CircleScanner interface { + ScanCircle(v Circle) error +} + +type CircleValuer interface { + CircleValue() (Circle, error) +} + +type Circle struct { + P Vec2 + R float64 + Valid bool +} + +func (c *Circle) ScanCircle(v Circle) error { + *c = v + return nil +} + +func (c Circle) CircleValue() (Circle, error) { + return c, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Circle) Scan(src any) error { + if src == nil { + *dst = Circle{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToCircleScanner{}.Scan([]byte(src), dst) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src Circle) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + + buf, err := CircleCodec{}.PlanEncode(nil, 0, TextFormatCode, src).Encode(src, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type CircleCodec struct{} + +func (CircleCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (CircleCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (CircleCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(CircleValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanCircleCodecBinary{} + case TextFormatCode: + return encodePlanCircleCodecText{} + } + + return nil +} + +type encodePlanCircleCodecBinary struct{} + +func (encodePlanCircleCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + circle, err := value.(CircleValuer).CircleValue() + if err != nil { + return nil, err + } + + if !circle.Valid { + return nil, nil + } + + buf = pgio.AppendUint64(buf, math.Float64bits(circle.P.X)) + buf = pgio.AppendUint64(buf, math.Float64bits(circle.P.Y)) + buf = pgio.AppendUint64(buf, math.Float64bits(circle.R)) + return buf, nil +} + +type encodePlanCircleCodecText struct{} + +func (encodePlanCircleCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + circle, err := value.(CircleValuer).CircleValue() + if err != nil { + return nil, err + } + + if !circle.Valid { + return nil, nil + } + + buf = append(buf, fmt.Sprintf(`<(%s,%s),%s>`, + strconv.FormatFloat(circle.P.X, 'f', -1, 64), + strconv.FormatFloat(circle.P.Y, 'f', -1, 64), + strconv.FormatFloat(circle.R, 'f', -1, 64), + )...) + return buf, nil +} + +func (CircleCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case BinaryFormatCode: + switch target.(type) { + case CircleScanner: + return scanPlanBinaryCircleToCircleScanner{} + } + case TextFormatCode: + switch target.(type) { + case CircleScanner: + return scanPlanTextAnyToCircleScanner{} + } + } + + return nil +} + +func (c CircleCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c CircleCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var circle Circle + err := codecScan(c, m, oid, format, src, &circle) + if err != nil { + return nil, err + } + return circle, nil +} + +type scanPlanBinaryCircleToCircleScanner struct{} + +func (scanPlanBinaryCircleToCircleScanner) Scan(src []byte, dst any) error { + scanner := (dst).(CircleScanner) + + if src == nil { + return scanner.ScanCircle(Circle{}) + } + + if len(src) != 24 { + return fmt.Errorf("invalid length for Circle: %v", len(src)) + } + + x := binary.BigEndian.Uint64(src) + y := binary.BigEndian.Uint64(src[8:]) + r := binary.BigEndian.Uint64(src[16:]) + + return scanner.ScanCircle(Circle{ + P: Vec2{math.Float64frombits(x), math.Float64frombits(y)}, + R: math.Float64frombits(r), + Valid: true, + }) +} + +type scanPlanTextAnyToCircleScanner struct{} + +func (scanPlanTextAnyToCircleScanner) Scan(src []byte, dst any) error { + scanner := (dst).(CircleScanner) + + if src == nil { + return scanner.ScanCircle(Circle{}) + } + + if len(src) < 9 { + return fmt.Errorf("invalid length for Circle: %v", len(src)) + } + + str := string(src[2:]) + end := strings.IndexByte(str, ',') + x, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+1:] + end = strings.IndexByte(str, ')') + + y, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+2 : len(str)-1] + + r, err := strconv.ParseFloat(str, 64) + if err != nil { + return err + } + + return scanner.ScanCircle(Circle{P: Vec2{x, y}, R: r, Valid: true}) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/composite.go b/vendor/github.com/jackc/pgx/v5/pgtype/composite.go new file mode 100644 index 00000000..fb372325 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/composite.go @@ -0,0 +1,602 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "errors" + "fmt" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +// CompositeIndexGetter is a type accessed by index that can be converted into a PostgreSQL composite. +type CompositeIndexGetter interface { + // IsNull returns true if the value is SQL NULL. + IsNull() bool + + // Index returns the element at i. + Index(i int) any +} + +// CompositeIndexScanner is a type accessed by index that can be scanned from a PostgreSQL composite. +type CompositeIndexScanner interface { + // ScanNull sets the value to SQL NULL. + ScanNull() error + + // ScanIndex returns a value usable as a scan target for i. + ScanIndex(i int) any +} + +type CompositeCodecField struct { + Name string + Type *Type +} + +type CompositeCodec struct { + Fields []CompositeCodecField +} + +func (c *CompositeCodec) FormatSupported(format int16) bool { + for _, f := range c.Fields { + if !f.Type.Codec.FormatSupported(format) { + return false + } + } + + return true +} + +func (c *CompositeCodec) PreferredFormat() int16 { + if c.FormatSupported(BinaryFormatCode) { + return BinaryFormatCode + } + return TextFormatCode +} + +func (c *CompositeCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(CompositeIndexGetter); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return &encodePlanCompositeCodecCompositeIndexGetterToBinary{cc: c, m: m} + case TextFormatCode: + return &encodePlanCompositeCodecCompositeIndexGetterToText{cc: c, m: m} + } + + return nil +} + +type encodePlanCompositeCodecCompositeIndexGetterToBinary struct { + cc *CompositeCodec + m *Map +} + +func (plan *encodePlanCompositeCodecCompositeIndexGetterToBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + getter := value.(CompositeIndexGetter) + + if getter.IsNull() { + return nil, nil + } + + builder := NewCompositeBinaryBuilder(plan.m, buf) + for i, field := range plan.cc.Fields { + builder.AppendValue(field.Type.OID, getter.Index(i)) + } + + return builder.Finish() +} + +type encodePlanCompositeCodecCompositeIndexGetterToText struct { + cc *CompositeCodec + m *Map +} + +func (plan *encodePlanCompositeCodecCompositeIndexGetterToText) Encode(value any, buf []byte) (newBuf []byte, err error) { + getter := value.(CompositeIndexGetter) + + if getter.IsNull() { + return nil, nil + } + + b := NewCompositeTextBuilder(plan.m, buf) + for i, field := range plan.cc.Fields { + b.AppendValue(field.Type.OID, getter.Index(i)) + } + + return b.Finish() +} + +func (c *CompositeCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case BinaryFormatCode: + switch target.(type) { + case CompositeIndexScanner: + return &scanPlanBinaryCompositeToCompositeIndexScanner{cc: c, m: m} + } + case TextFormatCode: + switch target.(type) { + case CompositeIndexScanner: + return &scanPlanTextCompositeToCompositeIndexScanner{cc: c, m: m} + } + } + + return nil +} + +type scanPlanBinaryCompositeToCompositeIndexScanner struct { + cc *CompositeCodec + m *Map +} + +func (plan *scanPlanBinaryCompositeToCompositeIndexScanner) Scan(src []byte, target any) error { + targetScanner := (target).(CompositeIndexScanner) + + if src == nil { + return targetScanner.ScanNull() + } + + scanner := NewCompositeBinaryScanner(plan.m, src) + for i, field := range plan.cc.Fields { + if scanner.Next() { + fieldTarget := targetScanner.ScanIndex(i) + if fieldTarget != nil { + fieldPlan := plan.m.PlanScan(field.Type.OID, BinaryFormatCode, fieldTarget) + if fieldPlan == nil { + return fmt.Errorf("unable to encode %v into OID %d in binary format", field, field.Type.OID) + } + + err := fieldPlan.Scan(scanner.Bytes(), fieldTarget) + if err != nil { + return err + } + } + } else { + return errors.New("read past end of composite") + } + } + + if err := scanner.Err(); err != nil { + return err + } + + return nil +} + +type scanPlanTextCompositeToCompositeIndexScanner struct { + cc *CompositeCodec + m *Map +} + +func (plan *scanPlanTextCompositeToCompositeIndexScanner) Scan(src []byte, target any) error { + targetScanner := (target).(CompositeIndexScanner) + + if src == nil { + return targetScanner.ScanNull() + } + + scanner := NewCompositeTextScanner(plan.m, src) + for i, field := range plan.cc.Fields { + if scanner.Next() { + fieldTarget := targetScanner.ScanIndex(i) + if fieldTarget != nil { + fieldPlan := plan.m.PlanScan(field.Type.OID, TextFormatCode, fieldTarget) + if fieldPlan == nil { + return fmt.Errorf("unable to encode %v into OID %d in text format", field, field.Type.OID) + } + + err := fieldPlan.Scan(scanner.Bytes(), fieldTarget) + if err != nil { + return err + } + } + } else { + return errors.New("read past end of composite") + } + } + + if err := scanner.Err(); err != nil { + return err + } + + return nil +} + +func (c *CompositeCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + switch format { + case TextFormatCode: + return string(src), nil + case BinaryFormatCode: + buf := make([]byte, len(src)) + copy(buf, src) + return buf, nil + default: + return nil, fmt.Errorf("unknown format code %d", format) + } +} + +func (c *CompositeCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + switch format { + case TextFormatCode: + scanner := NewCompositeTextScanner(m, src) + values := make(map[string]any, len(c.Fields)) + for i := 0; scanner.Next() && i < len(c.Fields); i++ { + var v any + fieldPlan := m.PlanScan(c.Fields[i].Type.OID, TextFormatCode, &v) + if fieldPlan == nil { + return nil, fmt.Errorf("unable to scan OID %d in text format into %v", c.Fields[i].Type.OID, v) + } + + err := fieldPlan.Scan(scanner.Bytes(), &v) + if err != nil { + return nil, err + } + + values[c.Fields[i].Name] = v + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return values, nil + case BinaryFormatCode: + scanner := NewCompositeBinaryScanner(m, src) + values := make(map[string]any, len(c.Fields)) + for i := 0; scanner.Next() && i < len(c.Fields); i++ { + var v any + fieldPlan := m.PlanScan(scanner.OID(), BinaryFormatCode, &v) + if fieldPlan == nil { + return nil, fmt.Errorf("unable to scan OID %d in binary format into %v", scanner.OID(), v) + } + + err := fieldPlan.Scan(scanner.Bytes(), &v) + if err != nil { + return nil, err + } + + values[c.Fields[i].Name] = v + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return values, nil + default: + return nil, fmt.Errorf("unknown format code %d", format) + } + +} + +type CompositeBinaryScanner struct { + m *Map + rp int + src []byte + + fieldCount int32 + fieldBytes []byte + fieldOID uint32 + err error +} + +// NewCompositeBinaryScanner a scanner over a binary encoded composite balue. +func NewCompositeBinaryScanner(m *Map, src []byte) *CompositeBinaryScanner { + rp := 0 + if len(src[rp:]) < 4 { + return &CompositeBinaryScanner{err: fmt.Errorf("Record incomplete %v", src)} + } + + fieldCount := int32(binary.BigEndian.Uint32(src[rp:])) + rp += 4 + + return &CompositeBinaryScanner{ + m: m, + rp: rp, + src: src, + fieldCount: fieldCount, + } +} + +// Next advances the scanner to the next field. It returns false after the last field is read or an error occurs. After +// Next returns false, the Err method can be called to check if any errors occurred. +func (cfs *CompositeBinaryScanner) Next() bool { + if cfs.err != nil { + return false + } + + if cfs.rp == len(cfs.src) { + return false + } + + if len(cfs.src[cfs.rp:]) < 8 { + cfs.err = fmt.Errorf("Record incomplete %v", cfs.src) + return false + } + cfs.fieldOID = binary.BigEndian.Uint32(cfs.src[cfs.rp:]) + cfs.rp += 4 + + fieldLen := int(int32(binary.BigEndian.Uint32(cfs.src[cfs.rp:]))) + cfs.rp += 4 + + if fieldLen >= 0 { + if len(cfs.src[cfs.rp:]) < fieldLen { + cfs.err = fmt.Errorf("Record incomplete rp=%d src=%v", cfs.rp, cfs.src) + return false + } + cfs.fieldBytes = cfs.src[cfs.rp : cfs.rp+fieldLen] + cfs.rp += fieldLen + } else { + cfs.fieldBytes = nil + } + + return true +} + +func (cfs *CompositeBinaryScanner) FieldCount() int { + return int(cfs.fieldCount) +} + +// Bytes returns the bytes of the field most recently read by Scan(). +func (cfs *CompositeBinaryScanner) Bytes() []byte { + return cfs.fieldBytes +} + +// OID returns the OID of the field most recently read by Scan(). +func (cfs *CompositeBinaryScanner) OID() uint32 { + return cfs.fieldOID +} + +// Err returns any error encountered by the scanner. +func (cfs *CompositeBinaryScanner) Err() error { + return cfs.err +} + +type CompositeTextScanner struct { + m *Map + rp int + src []byte + + fieldBytes []byte + err error +} + +// NewCompositeTextScanner a scanner over a text encoded composite value. +func NewCompositeTextScanner(m *Map, src []byte) *CompositeTextScanner { + if len(src) < 2 { + return &CompositeTextScanner{err: fmt.Errorf("Record incomplete %v", src)} + } + + if src[0] != '(' { + return &CompositeTextScanner{err: fmt.Errorf("composite text format must start with '('")} + } + + if src[len(src)-1] != ')' { + return &CompositeTextScanner{err: fmt.Errorf("composite text format must end with ')'")} + } + + return &CompositeTextScanner{ + m: m, + rp: 1, + src: src, + } +} + +// Next advances the scanner to the next field. It returns false after the last field is read or an error occurs. After +// Next returns false, the Err method can be called to check if any errors occurred. +func (cfs *CompositeTextScanner) Next() bool { + if cfs.err != nil { + return false + } + + if cfs.rp == len(cfs.src) { + return false + } + + switch cfs.src[cfs.rp] { + case ',', ')': // null + cfs.rp++ + cfs.fieldBytes = nil + return true + case '"': // quoted value + cfs.rp++ + cfs.fieldBytes = make([]byte, 0, 16) + for { + ch := cfs.src[cfs.rp] + + if ch == '"' { + cfs.rp++ + if cfs.src[cfs.rp] == '"' { + cfs.fieldBytes = append(cfs.fieldBytes, '"') + cfs.rp++ + } else { + break + } + } else if ch == '\\' { + cfs.rp++ + cfs.fieldBytes = append(cfs.fieldBytes, cfs.src[cfs.rp]) + cfs.rp++ + } else { + cfs.fieldBytes = append(cfs.fieldBytes, ch) + cfs.rp++ + } + } + cfs.rp++ + return true + default: // unquoted value + start := cfs.rp + for { + ch := cfs.src[cfs.rp] + if ch == ',' || ch == ')' { + break + } + cfs.rp++ + } + cfs.fieldBytes = cfs.src[start:cfs.rp] + cfs.rp++ + return true + } +} + +// Bytes returns the bytes of the field most recently read by Scan(). +func (cfs *CompositeTextScanner) Bytes() []byte { + return cfs.fieldBytes +} + +// Err returns any error encountered by the scanner. +func (cfs *CompositeTextScanner) Err() error { + return cfs.err +} + +type CompositeBinaryBuilder struct { + m *Map + buf []byte + startIdx int + fieldCount uint32 + err error +} + +func NewCompositeBinaryBuilder(m *Map, buf []byte) *CompositeBinaryBuilder { + startIdx := len(buf) + buf = append(buf, 0, 0, 0, 0) // allocate room for number of fields + return &CompositeBinaryBuilder{m: m, buf: buf, startIdx: startIdx} +} + +func (b *CompositeBinaryBuilder) AppendValue(oid uint32, field any) { + if b.err != nil { + return + } + + if field == nil { + b.buf = pgio.AppendUint32(b.buf, oid) + b.buf = pgio.AppendInt32(b.buf, -1) + b.fieldCount++ + return + } + + plan := b.m.PlanEncode(oid, BinaryFormatCode, field) + if plan == nil { + b.err = fmt.Errorf("unable to encode %v into OID %d in binary format", field, oid) + return + } + + b.buf = pgio.AppendUint32(b.buf, oid) + lengthPos := len(b.buf) + b.buf = pgio.AppendInt32(b.buf, -1) + fieldBuf, err := plan.Encode(field, b.buf) + if err != nil { + b.err = err + return + } + if fieldBuf != nil { + binary.BigEndian.PutUint32(fieldBuf[lengthPos:], uint32(len(fieldBuf)-len(b.buf))) + b.buf = fieldBuf + } + + b.fieldCount++ +} + +func (b *CompositeBinaryBuilder) Finish() ([]byte, error) { + if b.err != nil { + return nil, b.err + } + + binary.BigEndian.PutUint32(b.buf[b.startIdx:], b.fieldCount) + return b.buf, nil +} + +type CompositeTextBuilder struct { + m *Map + buf []byte + startIdx int + fieldCount uint32 + err error + fieldBuf [32]byte +} + +func NewCompositeTextBuilder(m *Map, buf []byte) *CompositeTextBuilder { + buf = append(buf, '(') // allocate room for number of fields + return &CompositeTextBuilder{m: m, buf: buf} +} + +func (b *CompositeTextBuilder) AppendValue(oid uint32, field any) { + if b.err != nil { + return + } + + if field == nil { + b.buf = append(b.buf, ',') + return + } + + plan := b.m.PlanEncode(oid, TextFormatCode, field) + if plan == nil { + b.err = fmt.Errorf("unable to encode %v into OID %d in text format", field, oid) + return + } + + fieldBuf, err := plan.Encode(field, b.fieldBuf[0:0]) + if err != nil { + b.err = err + return + } + if fieldBuf != nil { + b.buf = append(b.buf, quoteCompositeFieldIfNeeded(string(fieldBuf))...) + } + + b.buf = append(b.buf, ',') +} + +func (b *CompositeTextBuilder) Finish() ([]byte, error) { + if b.err != nil { + return nil, b.err + } + + b.buf[len(b.buf)-1] = ')' + return b.buf, nil +} + +var quoteCompositeReplacer = strings.NewReplacer(`\`, `\\`, `"`, `\"`) + +func quoteCompositeField(src string) string { + return `"` + quoteCompositeReplacer.Replace(src) + `"` +} + +func quoteCompositeFieldIfNeeded(src string) string { + if src == "" || src[0] == ' ' || src[len(src)-1] == ' ' || strings.ContainsAny(src, `(),"\`) { + return quoteCompositeField(src) + } + return src +} + +// CompositeFields represents the values of a composite value. It can be used as an encoding source or as a scan target. +// It cannot scan a NULL, but the composite fields can be NULL. +type CompositeFields []any + +func (cf CompositeFields) SkipUnderlyingTypePlan() {} + +func (cf CompositeFields) IsNull() bool { + return cf == nil +} + +func (cf CompositeFields) Index(i int) any { + return cf[i] +} + +func (cf CompositeFields) ScanNull() error { + return fmt.Errorf("cannot scan NULL into CompositeFields") +} + +func (cf CompositeFields) ScanIndex(i int) any { + return cf[i] +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/convert.go b/vendor/github.com/jackc/pgx/v5/pgtype/convert.go new file mode 100644 index 00000000..8a9cee9c --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/convert.go @@ -0,0 +1,108 @@ +package pgtype + +import ( + "reflect" +) + +func NullAssignTo(dst any) error { + dstPtr := reflect.ValueOf(dst) + + // AssignTo dst must always be a pointer + if dstPtr.Kind() != reflect.Ptr { + return &nullAssignmentError{dst: dst} + } + + dstVal := dstPtr.Elem() + + switch dstVal.Kind() { + case reflect.Ptr, reflect.Slice, reflect.Map: + dstVal.Set(reflect.Zero(dstVal.Type())) + return nil + } + + return &nullAssignmentError{dst: dst} +} + +var kindTypes map[reflect.Kind]reflect.Type + +func toInterface(dst reflect.Value, t reflect.Type) (any, bool) { + nextDst := dst.Convert(t) + return nextDst.Interface(), dst.Type() != nextDst.Type() +} + +// GetAssignToDstType attempts to convert dst to something AssignTo can assign +// to. If dst is a pointer to pointer it allocates a value and returns the +// dereferences pointer. If dst is a named type such as *Foo where Foo is type +// Foo int16, it converts dst to *int16. +// +// GetAssignToDstType returns the converted dst and a bool representing if any +// change was made. +func GetAssignToDstType(dst any) (any, bool) { + dstPtr := reflect.ValueOf(dst) + + // AssignTo dst must always be a pointer + if dstPtr.Kind() != reflect.Ptr { + return nil, false + } + + dstVal := dstPtr.Elem() + + // if dst is a pointer to pointer, allocate space try again with the dereferenced pointer + if dstVal.Kind() == reflect.Ptr { + dstVal.Set(reflect.New(dstVal.Type().Elem())) + return dstVal.Interface(), true + } + + // if dst is pointer to a base type that has been renamed + if baseValType, ok := kindTypes[dstVal.Kind()]; ok { + return toInterface(dstPtr, reflect.PtrTo(baseValType)) + } + + if dstVal.Kind() == reflect.Slice { + if baseElemType, ok := kindTypes[dstVal.Type().Elem().Kind()]; ok { + return toInterface(dstPtr, reflect.PtrTo(reflect.SliceOf(baseElemType))) + } + } + + if dstVal.Kind() == reflect.Array { + if baseElemType, ok := kindTypes[dstVal.Type().Elem().Kind()]; ok { + return toInterface(dstPtr, reflect.PtrTo(reflect.ArrayOf(dstVal.Len(), baseElemType))) + } + } + + if dstVal.Kind() == reflect.Struct { + if dstVal.Type().NumField() == 1 && dstVal.Type().Field(0).Anonymous { + dstPtr = dstVal.Field(0).Addr() + nested := dstVal.Type().Field(0).Type + if nested.Kind() == reflect.Array { + if baseElemType, ok := kindTypes[nested.Elem().Kind()]; ok { + return toInterface(dstPtr, reflect.PtrTo(reflect.ArrayOf(nested.Len(), baseElemType))) + } + } + if _, ok := kindTypes[nested.Kind()]; ok && dstPtr.CanInterface() { + return dstPtr.Interface(), true + } + } + } + + return nil, false +} + +func init() { + kindTypes = map[reflect.Kind]reflect.Type{ + reflect.Bool: reflect.TypeOf(false), + reflect.Float32: reflect.TypeOf(float32(0)), + reflect.Float64: reflect.TypeOf(float64(0)), + reflect.Int: reflect.TypeOf(int(0)), + reflect.Int8: reflect.TypeOf(int8(0)), + reflect.Int16: reflect.TypeOf(int16(0)), + reflect.Int32: reflect.TypeOf(int32(0)), + reflect.Int64: reflect.TypeOf(int64(0)), + reflect.Uint: reflect.TypeOf(uint(0)), + reflect.Uint8: reflect.TypeOf(uint8(0)), + reflect.Uint16: reflect.TypeOf(uint16(0)), + reflect.Uint32: reflect.TypeOf(uint32(0)), + reflect.Uint64: reflect.TypeOf(uint64(0)), + reflect.String: reflect.TypeOf(""), + } +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/date.go b/vendor/github.com/jackc/pgx/v5/pgtype/date.go new file mode 100644 index 00000000..784b16de --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/date.go @@ -0,0 +1,351 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "encoding/json" + "fmt" + "regexp" + "strconv" + "time" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type DateScanner interface { + ScanDate(v Date) error +} + +type DateValuer interface { + DateValue() (Date, error) +} + +type Date struct { + Time time.Time + InfinityModifier InfinityModifier + Valid bool +} + +func (d *Date) ScanDate(v Date) error { + *d = v + return nil +} + +func (d Date) DateValue() (Date, error) { + return d, nil +} + +const ( + negativeInfinityDayOffset = -2147483648 + infinityDayOffset = 2147483647 +) + +// Scan implements the database/sql Scanner interface. +func (dst *Date) Scan(src any) error { + if src == nil { + *dst = Date{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToDateScanner{}.Scan([]byte(src), dst) + case time.Time: + *dst = Date{Time: src, Valid: true} + return nil + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src Date) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + + if src.InfinityModifier != Finite { + return src.InfinityModifier.String(), nil + } + return src.Time, nil +} + +func (src Date) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + + var s string + + switch src.InfinityModifier { + case Finite: + s = src.Time.Format("2006-01-02") + case Infinity: + s = "infinity" + case NegativeInfinity: + s = "-infinity" + } + + return json.Marshal(s) +} + +func (dst *Date) UnmarshalJSON(b []byte) error { + var s *string + err := json.Unmarshal(b, &s) + if err != nil { + return err + } + + if s == nil { + *dst = Date{} + return nil + } + + switch *s { + case "infinity": + *dst = Date{Valid: true, InfinityModifier: Infinity} + case "-infinity": + *dst = Date{Valid: true, InfinityModifier: -Infinity} + default: + t, err := time.ParseInLocation("2006-01-02", *s, time.UTC) + if err != nil { + return err + } + + *dst = Date{Time: t, Valid: true} + } + + return nil +} + +type DateCodec struct{} + +func (DateCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (DateCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (DateCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(DateValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanDateCodecBinary{} + case TextFormatCode: + return encodePlanDateCodecText{} + } + + return nil +} + +type encodePlanDateCodecBinary struct{} + +func (encodePlanDateCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + date, err := value.(DateValuer).DateValue() + if err != nil { + return nil, err + } + + if !date.Valid { + return nil, nil + } + + var daysSinceDateEpoch int32 + switch date.InfinityModifier { + case Finite: + tUnix := time.Date(date.Time.Year(), date.Time.Month(), date.Time.Day(), 0, 0, 0, 0, time.UTC).Unix() + dateEpoch := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC).Unix() + + secSinceDateEpoch := tUnix - dateEpoch + daysSinceDateEpoch = int32(secSinceDateEpoch / 86400) + case Infinity: + daysSinceDateEpoch = infinityDayOffset + case NegativeInfinity: + daysSinceDateEpoch = negativeInfinityDayOffset + } + + return pgio.AppendInt32(buf, daysSinceDateEpoch), nil +} + +type encodePlanDateCodecText struct{} + +func (encodePlanDateCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + date, err := value.(DateValuer).DateValue() + if err != nil { + return nil, err + } + + if !date.Valid { + return nil, nil + } + + switch date.InfinityModifier { + case Finite: + // Year 0000 is 1 BC + bc := false + year := date.Time.Year() + if year <= 0 { + year = -year + 1 + bc = true + } + + yearBytes := strconv.AppendInt(make([]byte, 0, 6), int64(year), 10) + for i := len(yearBytes); i < 4; i++ { + buf = append(buf, '0') + } + buf = append(buf, yearBytes...) + buf = append(buf, '-') + if date.Time.Month() < 10 { + buf = append(buf, '0') + } + buf = strconv.AppendInt(buf, int64(date.Time.Month()), 10) + buf = append(buf, '-') + if date.Time.Day() < 10 { + buf = append(buf, '0') + } + buf = strconv.AppendInt(buf, int64(date.Time.Day()), 10) + + if bc { + buf = append(buf, " BC"...) + } + case Infinity: + buf = append(buf, "infinity"...) + case NegativeInfinity: + buf = append(buf, "-infinity"...) + } + + return buf, nil +} + +func (DateCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case DateScanner: + return scanPlanBinaryDateToDateScanner{} + } + case TextFormatCode: + switch target.(type) { + case DateScanner: + return scanPlanTextAnyToDateScanner{} + } + } + + return nil +} + +type scanPlanBinaryDateToDateScanner struct{} + +func (scanPlanBinaryDateToDateScanner) Scan(src []byte, dst any) error { + scanner := (dst).(DateScanner) + + if src == nil { + return scanner.ScanDate(Date{}) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for date: %v", len(src)) + } + + dayOffset := int32(binary.BigEndian.Uint32(src)) + + switch dayOffset { + case infinityDayOffset: + return scanner.ScanDate(Date{InfinityModifier: Infinity, Valid: true}) + case negativeInfinityDayOffset: + return scanner.ScanDate(Date{InfinityModifier: -Infinity, Valid: true}) + default: + t := time.Date(2000, 1, int(1+dayOffset), 0, 0, 0, 0, time.UTC) + return scanner.ScanDate(Date{Time: t, Valid: true}) + } +} + +type scanPlanTextAnyToDateScanner struct{} + +var dateRegexp = regexp.MustCompile(`^(\d{4,})-(\d\d)-(\d\d)( BC)?$`) + +func (scanPlanTextAnyToDateScanner) Scan(src []byte, dst any) error { + scanner := (dst).(DateScanner) + + if src == nil { + return scanner.ScanDate(Date{}) + } + + sbuf := string(src) + match := dateRegexp.FindStringSubmatch(sbuf) + if match != nil { + year, err := strconv.ParseInt(match[1], 10, 32) + if err != nil { + return fmt.Errorf("BUG: cannot parse date that regexp matched (year): %w", err) + } + + month, err := strconv.ParseInt(match[2], 10, 32) + if err != nil { + return fmt.Errorf("BUG: cannot parse date that regexp matched (month): %w", err) + } + + day, err := strconv.ParseInt(match[3], 10, 32) + if err != nil { + return fmt.Errorf("BUG: cannot parse date that regexp matched (month): %w", err) + } + + // BC matched + if len(match[4]) > 0 { + year = -year + 1 + } + + t := time.Date(int(year), time.Month(month), int(day), 0, 0, 0, 0, time.UTC) + return scanner.ScanDate(Date{Time: t, Valid: true}) + } + + switch sbuf { + case "infinity": + return scanner.ScanDate(Date{InfinityModifier: Infinity, Valid: true}) + case "-infinity": + return scanner.ScanDate(Date{InfinityModifier: -Infinity, Valid: true}) + default: + return fmt.Errorf("invalid date format") + } +} + +func (c DateCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var date Date + err := codecScan(c, m, oid, format, src, &date) + if err != nil { + return nil, err + } + + if date.InfinityModifier != Finite { + return date.InfinityModifier.String(), nil + } + + return date.Time, nil +} + +func (c DateCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var date Date + err := codecScan(c, m, oid, format, src, &date) + if err != nil { + return nil, err + } + + if date.InfinityModifier != Finite { + return date.InfinityModifier, nil + } + + return date.Time, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/doc.go b/vendor/github.com/jackc/pgx/v5/pgtype/doc.go new file mode 100644 index 00000000..7687ea8f --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/doc.go @@ -0,0 +1,191 @@ +// Package pgtype converts between Go and PostgreSQL values. +/* +The primary type is the Map type. It is a map of PostgreSQL types identified by OID (object ID) to a Codec. A Codec is +responsible for converting between Go and PostgreSQL values. NewMap creates a Map with all supported standard PostgreSQL +types already registered. Additional types can be registered with Map.RegisterType. + +Use Map.Scan and Map.Encode to decode PostgreSQL values to Go and encode Go values to PostgreSQL respectively. + +Base Type Mapping + +pgtype maps between all common base types directly between Go and PostgreSQL. In particular: + + Go PostgreSQL + ----------------------- + string varchar + text + + // Integers are automatically be converted to any other integer type if + // it can be done without overflow or underflow. + int8 + int16 smallint + int32 int + int64 bigint + int + uint8 + uint16 + uint32 + uint64 + uint + + // Floats are strict and do not automatically convert like integers. + float32 float4 + float64 float8 + + time.Time date + timestamp + timestamptz + + netip.Addr inet + netip.Prefix cidr + + []byte bytea + +Null Values + +pgtype can map NULLs in two ways. The first is types that can directly represent NULL such as Int4. They work in a +similar fashion to database/sql. The second is to use a pointer to a pointer. + + var foo pgtype.Text + var bar *string + err := conn.QueryRow("select foo, bar from widgets where id=$1", 42).Scan(&foo, &bar) + if err != nil { + return err + } + +When using nullable pgtype types as parameters for queries, one has to remember +to explicitly set their Valid field to true, otherwise the parameter's value will be NULL. + +JSON Support + +pgtype automatically marshals and unmarshals data from json and jsonb PostgreSQL types. + +Extending Existing PostgreSQL Type Support + +Generally, all Codecs will support interfaces that can be implemented to enable scanning and encoding. For example, +PointCodec can use any Go type that implements the PointScanner and PointValuer interfaces. So rather than use +pgtype.Point and application can directly use its own point type with pgtype as long as it implements those interfaces. + +See example_custom_type_test.go for an example of a custom type for the PostgreSQL point type. + +Sometimes pgx supports a PostgreSQL type such as numeric but the Go type is in an external package that does not have +pgx support such as github.com/shopspring/decimal. These types can be registered with pgtype with custom conversion +logic. See https://github.com/jackc/pgx-shopspring-decimal and https://github.com/jackc/pgx-gofrs-uuid for example +integrations. + +New PostgreSQL Type Support + +pgtype uses the PostgreSQL OID to determine how to encode or decode a value. pgtype supports array, composite, domain, +and enum types. However, any type created in PostgreSQL with CREATE TYPE will receive a new OID. This means that the OID +of each new PostgreSQL type must be registered for pgtype to handle values of that type with the correct Codec. + +The pgx.Conn LoadType method can return a *Type for array, composite, domain, and enum types by inspecting the database +metadata. This *Type can then be registered with Map.RegisterType. + +For example, the following function could be called after a connection is established: + + func RegisterDataTypes(ctx context.Context, conn *pgx.Conn) error { + dataTypeNames := []string{ + "foo", + "_foo", + "bar", + "_bar", + } + + for _, typeName := range dataTypeNames { + dataType, err := conn.LoadType(ctx, typeName) + if err != nil { + return err + } + conn.TypeMap().RegisterType(dataType) + } + + return nil + } + +A type cannot be registered unless all types it depends on are already registered. e.g. An array type cannot be +registered until its element type is registered. + +ArrayCodec implements support for arrays. If pgtype supports type T then it can easily support []T by registering an +ArrayCodec for the appropriate PostgreSQL OID. In addition, Array[T] type can support multi-dimensional arrays. + +CompositeCodec implements support for PostgreSQL composite types. Go structs can be scanned into if the public fields of +the struct are in the exact order and type of the PostgreSQL type or by implementing CompositeIndexScanner and +CompositeIndexGetter. + +Domain types are treated as their underlying type if the underlying type and the domain type are registered. + +PostgreSQL enums can usually be treated as text. However, EnumCodec implements support for interning strings which can +reduce memory usage. + +While pgtype will often still work with unregistered types it is highly recommended that all types be registered due to +an improvement in performance and the elimination of certain edge cases. + +If an entirely new PostgreSQL type (e.g. PostGIS types) is used then the application or a library can create a new +Codec. Then the OID / Codec mapping can be registered with Map.RegisterType. There is no difference between a Codec +defined and registered by the application and a Codec built in to pgtype. See any of the Codecs in pgtype for Codec +examples and for examples of type registration. + +Encoding Unknown Types + +pgtype works best when the OID of the PostgreSQL type is known. But in some cases such as using the simple protocol the +OID is unknown. In this case Map.RegisterDefaultPgType can be used to register an assumed OID for a particular Go type. + +Renamed Types + +If pgtype does not recognize a type and that type is a renamed simple type simple (e.g. type MyInt32 int32) pgtype acts +as if it is the underlying type. It currently cannot automatically detect the underlying type of renamed structs (eg.g. +type MyTime time.Time). + +Compatibility with database/sql + +pgtype also includes support for custom types implementing the database/sql.Scanner and database/sql/driver.Valuer +interfaces. + +Encoding Typed Nils + +pgtype encodes untyped and typed nils (e.g. nil and []byte(nil)) to the SQL NULL value without going through the Codec +system. This means that Codecs and other encoding logic do not have to handle nil or *T(nil). + +However, database/sql compatibility requires Value to be called on T(nil) when T implements driver.Valuer. Therefore, +driver.Valuer values are only considered NULL when *T(nil) where driver.Valuer is implemented on T not on *T. See +https://github.com/golang/go/issues/8415 and +https://github.com/golang/go/commit/0ce1d79a6a771f7449ec493b993ed2a720917870. + +Child Records + +pgtype's support for arrays and composite records can be used to load records and their children in a single query. See +example_child_records_test.go for an example. + +Overview of Scanning Implementation + +The first step is to use the OID to lookup the correct Codec. If the OID is unavailable, Map will try to find the OID +from previous calls of Map.RegisterDefaultPgType. The Map will call the Codec's PlanScan method to get a plan for +scanning into the Go value. A Codec will support scanning into one or more Go types. Oftentime these Go types are +interfaces rather than explicit types. For example, PointCodec can use any Go type that implements the PointScanner and +PointValuer interfaces. + +If a Go value is not supported directly by a Codec then Map will try wrapping it with additional logic and try again. +For example, Int8Codec does not support scanning into a renamed type (e.g. type myInt64 int64). But Map will detect that +myInt64 is a renamed type and create a plan that converts the value to the underlying int64 type and then passes that to +the Codec (see TryFindUnderlyingTypeScanPlan). + +These plan wrappers are contained in Map.TryWrapScanPlanFuncs. By default these contain shared logic to handle renamed +types, pointers to pointers, slices, composite types, etc. Additional plan wrappers can be added to seamlessly integrate +types that do not support pgx directly. For example, the before mentioned +https://github.com/jackc/pgx-shopspring-decimal package detects decimal.Decimal values, wraps them in something +implementing NumericScanner and passes that to the Codec. + +Map.Scan and Map.Encode are convenience methods that wrap Map.PlanScan and Map.PlanEncode. Determining how to scan or +encode a particular type may be a time consuming operation. Hence the planning and execution steps of a conversion are +internally separated. + +Reducing Compiled Binary Size + +pgx.QueryExecModeExec and pgx.QueryExecModeSimpleProtocol require the default PostgreSQL type to be registered for each +Go type used as a query parameter. By default pgx does this for all supported types and their array variants. If an +application does not use those query execution modes or manually registers the default PostgreSQL type for the types it +uses as query parameters it can use the build tag nopgxregisterdefaulttypes. This omits the default type registration +and reduces the compiled binary size by ~2MB. +*/ +package pgtype diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/enum_codec.go b/vendor/github.com/jackc/pgx/v5/pgtype/enum_codec.go new file mode 100644 index 00000000..5e787c1e --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/enum_codec.go @@ -0,0 +1,109 @@ +package pgtype + +import ( + "database/sql/driver" + "fmt" +) + +// EnumCodec is a codec that caches the strings it decodes. If the same string is read multiple times only one copy is +// allocated. These strings are only garbage collected when the EnumCodec is garbage collected. EnumCodec can be used +// for any text type not only enums, but it should only be used when there are a small number of possible values. +type EnumCodec struct { + membersMap map[string]string // map to quickly lookup member and reuse string instead of allocating +} + +func (EnumCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (EnumCodec) PreferredFormat() int16 { + return TextFormatCode +} + +func (EnumCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case TextFormatCode, BinaryFormatCode: + switch value.(type) { + case string: + return encodePlanTextCodecString{} + case []byte: + return encodePlanTextCodecByteSlice{} + case TextValuer: + return encodePlanTextCodecTextValuer{} + } + } + + return nil +} + +func (c *EnumCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case TextFormatCode, BinaryFormatCode: + switch target.(type) { + case *string: + return &scanPlanTextAnyToEnumString{codec: c} + case *[]byte: + return scanPlanAnyToNewByteSlice{} + case TextScanner: + return &scanPlanTextAnyToEnumTextScanner{codec: c} + } + } + + return nil +} + +func (c *EnumCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return c.DecodeValue(m, oid, format, src) +} + +func (c *EnumCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + return c.lookupAndCacheString(src), nil +} + +// lookupAndCacheString looks for src in the members map. If it is not found it is added to the map. +func (c *EnumCodec) lookupAndCacheString(src []byte) string { + if c.membersMap == nil { + c.membersMap = make(map[string]string) + } + + if s, found := c.membersMap[string(src)]; found { + return s + } + + s := string(src) + c.membersMap[s] = s + return s +} + +type scanPlanTextAnyToEnumString struct { + codec *EnumCodec +} + +func (plan *scanPlanTextAnyToEnumString) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p := (dst).(*string) + *p = plan.codec.lookupAndCacheString(src) + + return nil +} + +type scanPlanTextAnyToEnumTextScanner struct { + codec *EnumCodec +} + +func (plan *scanPlanTextAnyToEnumTextScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TextScanner) + + if src == nil { + return scanner.ScanText(Text{}) + } + + return scanner.ScanText(Text{String: plan.codec.lookupAndCacheString(src), Valid: true}) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/float4.go b/vendor/github.com/jackc/pgx/v5/pgtype/float4.go new file mode 100644 index 00000000..8646d9d2 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/float4.go @@ -0,0 +1,319 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "encoding/json" + "fmt" + "math" + "strconv" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type Float4 struct { + Float32 float32 + Valid bool +} + +// ScanFloat64 implements the Float64Scanner interface. +func (f *Float4) ScanFloat64(n Float8) error { + *f = Float4{Float32: float32(n.Float64), Valid: n.Valid} + return nil +} + +func (f Float4) Float64Value() (Float8, error) { + return Float8{Float64: float64(f.Float32), Valid: f.Valid}, nil +} + +func (f *Float4) ScanInt64(n Int8) error { + *f = Float4{Float32: float32(n.Int64), Valid: n.Valid} + return nil +} + +func (f Float4) Int64Value() (Int8, error) { + return Int8{Int64: int64(f.Float32), Valid: f.Valid}, nil +} + +// Scan implements the database/sql Scanner interface. +func (f *Float4) Scan(src any) error { + if src == nil { + *f = Float4{} + return nil + } + + switch src := src.(type) { + case float64: + *f = Float4{Float32: float32(src), Valid: true} + return nil + case string: + n, err := strconv.ParseFloat(string(src), 32) + if err != nil { + return err + } + *f = Float4{Float32: float32(n), Valid: true} + return nil + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (f Float4) Value() (driver.Value, error) { + if !f.Valid { + return nil, nil + } + return float64(f.Float32), nil +} + +func (f Float4) MarshalJSON() ([]byte, error) { + if !f.Valid { + return []byte("null"), nil + } + return json.Marshal(f.Float32) +} + +func (f *Float4) UnmarshalJSON(b []byte) error { + var n *float32 + err := json.Unmarshal(b, &n) + if err != nil { + return err + } + + if n == nil { + *f = Float4{} + } else { + *f = Float4{Float32: *n, Valid: true} + } + + return nil +} + +type Float4Codec struct{} + +func (Float4Codec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (Float4Codec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (Float4Codec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case float32: + return encodePlanFloat4CodecBinaryFloat32{} + case Float64Valuer: + return encodePlanFloat4CodecBinaryFloat64Valuer{} + case Int64Valuer: + return encodePlanFloat4CodecBinaryInt64Valuer{} + } + case TextFormatCode: + switch value.(type) { + case float32: + return encodePlanTextFloat32{} + case Float64Valuer: + return encodePlanTextFloat64Valuer{} + case Int64Valuer: + return encodePlanTextInt64Valuer{} + } + } + + return nil +} + +type encodePlanFloat4CodecBinaryFloat32 struct{} + +func (encodePlanFloat4CodecBinaryFloat32) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(float32) + return pgio.AppendUint32(buf, math.Float32bits(n)), nil +} + +type encodePlanTextFloat32 struct{} + +func (encodePlanTextFloat32) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(float32) + return append(buf, strconv.FormatFloat(float64(n), 'f', -1, 32)...), nil +} + +type encodePlanFloat4CodecBinaryFloat64Valuer struct{} + +func (encodePlanFloat4CodecBinaryFloat64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Float64Valuer).Float64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + return pgio.AppendUint32(buf, math.Float32bits(float32(n.Float64))), nil +} + +type encodePlanFloat4CodecBinaryInt64Valuer struct{} + +func (encodePlanFloat4CodecBinaryInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + f := float32(n.Int64) + return pgio.AppendUint32(buf, math.Float32bits(f)), nil +} + +func (Float4Codec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *float32: + return scanPlanBinaryFloat4ToFloat32{} + case Float64Scanner: + return scanPlanBinaryFloat4ToFloat64Scanner{} + case Int64Scanner: + return scanPlanBinaryFloat4ToInt64Scanner{} + case TextScanner: + return scanPlanBinaryFloat4ToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case *float32: + return scanPlanTextAnyToFloat32{} + case Float64Scanner: + return scanPlanTextAnyToFloat64Scanner{} + case Int64Scanner: + return scanPlanTextAnyToInt64Scanner{} + } + } + + return nil +} + +type scanPlanBinaryFloat4ToFloat32 struct{} + +func (scanPlanBinaryFloat4ToFloat32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for float4: %v", len(src)) + } + + n := int32(binary.BigEndian.Uint32(src)) + f := (dst).(*float32) + *f = math.Float32frombits(uint32(n)) + + return nil +} + +type scanPlanBinaryFloat4ToFloat64Scanner struct{} + +func (scanPlanBinaryFloat4ToFloat64Scanner) Scan(src []byte, dst any) error { + s := (dst).(Float64Scanner) + + if src == nil { + return s.ScanFloat64(Float8{}) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for float4: %v", len(src)) + } + + n := int32(binary.BigEndian.Uint32(src)) + return s.ScanFloat64(Float8{Float64: float64(math.Float32frombits(uint32(n))), Valid: true}) +} + +type scanPlanBinaryFloat4ToInt64Scanner struct{} + +func (scanPlanBinaryFloat4ToInt64Scanner) Scan(src []byte, dst any) error { + s := (dst).(Int64Scanner) + + if src == nil { + return s.ScanInt64(Int8{}) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for float4: %v", len(src)) + } + + ui32 := int32(binary.BigEndian.Uint32(src)) + f32 := math.Float32frombits(uint32(ui32)) + i64 := int64(f32) + if f32 != float32(i64) { + return fmt.Errorf("cannot losslessly convert %v to int64", f32) + } + + return s.ScanInt64(Int8{Int64: i64, Valid: true}) +} + +type scanPlanBinaryFloat4ToTextScanner struct{} + +func (scanPlanBinaryFloat4ToTextScanner) Scan(src []byte, dst any) error { + s := (dst).(TextScanner) + + if src == nil { + return s.ScanText(Text{}) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for float4: %v", len(src)) + } + + ui32 := int32(binary.BigEndian.Uint32(src)) + f32 := math.Float32frombits(uint32(ui32)) + + return s.ScanText(Text{String: strconv.FormatFloat(float64(f32), 'f', -1, 32), Valid: true}) +} + +type scanPlanTextAnyToFloat32 struct{} + +func (scanPlanTextAnyToFloat32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + n, err := strconv.ParseFloat(string(src), 32) + if err != nil { + return err + } + + f := (dst).(*float32) + *f = float32(n) + + return nil +} + +func (c Float4Codec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var n float32 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return float64(n), nil +} + +func (c Float4Codec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var n float32 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/float8.go b/vendor/github.com/jackc/pgx/v5/pgtype/float8.go new file mode 100644 index 00000000..9c923c9a --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/float8.go @@ -0,0 +1,365 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "encoding/json" + "fmt" + "math" + "strconv" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type Float64Scanner interface { + ScanFloat64(Float8) error +} + +type Float64Valuer interface { + Float64Value() (Float8, error) +} + +type Float8 struct { + Float64 float64 + Valid bool +} + +// ScanFloat64 implements the Float64Scanner interface. +func (f *Float8) ScanFloat64(n Float8) error { + *f = n + return nil +} + +func (f Float8) Float64Value() (Float8, error) { + return f, nil +} + +func (f *Float8) ScanInt64(n Int8) error { + *f = Float8{Float64: float64(n.Int64), Valid: n.Valid} + return nil +} + +func (f Float8) Int64Value() (Int8, error) { + return Int8{Int64: int64(f.Float64), Valid: f.Valid}, nil +} + +// Scan implements the database/sql Scanner interface. +func (f *Float8) Scan(src any) error { + if src == nil { + *f = Float8{} + return nil + } + + switch src := src.(type) { + case float64: + *f = Float8{Float64: src, Valid: true} + return nil + case string: + n, err := strconv.ParseFloat(string(src), 64) + if err != nil { + return err + } + *f = Float8{Float64: n, Valid: true} + return nil + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (f Float8) Value() (driver.Value, error) { + if !f.Valid { + return nil, nil + } + return f.Float64, nil +} + +func (f Float8) MarshalJSON() ([]byte, error) { + if !f.Valid { + return []byte("null"), nil + } + return json.Marshal(f.Float64) +} + +func (f *Float8) UnmarshalJSON(b []byte) error { + var n *float64 + err := json.Unmarshal(b, &n) + if err != nil { + return err + } + + if n == nil { + *f = Float8{} + } else { + *f = Float8{Float64: *n, Valid: true} + } + + return nil +} + +type Float8Codec struct{} + +func (Float8Codec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (Float8Codec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (Float8Codec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case float64: + return encodePlanFloat8CodecBinaryFloat64{} + case Float64Valuer: + return encodePlanFloat8CodecBinaryFloat64Valuer{} + case Int64Valuer: + return encodePlanFloat8CodecBinaryInt64Valuer{} + } + case TextFormatCode: + switch value.(type) { + case float64: + return encodePlanTextFloat64{} + case Float64Valuer: + return encodePlanTextFloat64Valuer{} + case Int64Valuer: + return encodePlanTextInt64Valuer{} + } + } + + return nil +} + +type encodePlanFloat8CodecBinaryFloat64 struct{} + +func (encodePlanFloat8CodecBinaryFloat64) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(float64) + return pgio.AppendUint64(buf, math.Float64bits(n)), nil +} + +type encodePlanTextFloat64 struct{} + +func (encodePlanTextFloat64) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(float64) + return append(buf, strconv.FormatFloat(n, 'f', -1, 64)...), nil +} + +type encodePlanFloat8CodecBinaryFloat64Valuer struct{} + +func (encodePlanFloat8CodecBinaryFloat64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Float64Valuer).Float64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + return pgio.AppendUint64(buf, math.Float64bits(n.Float64)), nil +} + +type encodePlanTextFloat64Valuer struct{} + +func (encodePlanTextFloat64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Float64Valuer).Float64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + return append(buf, strconv.FormatFloat(n.Float64, 'f', -1, 64)...), nil +} + +type encodePlanFloat8CodecBinaryInt64Valuer struct{} + +func (encodePlanFloat8CodecBinaryInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + f := float64(n.Int64) + return pgio.AppendUint64(buf, math.Float64bits(f)), nil +} + +type encodePlanTextInt64Valuer struct{} + +func (encodePlanTextInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + return append(buf, strconv.FormatInt(n.Int64, 10)...), nil +} + +func (Float8Codec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *float64: + return scanPlanBinaryFloat8ToFloat64{} + case Float64Scanner: + return scanPlanBinaryFloat8ToFloat64Scanner{} + case Int64Scanner: + return scanPlanBinaryFloat8ToInt64Scanner{} + case TextScanner: + return scanPlanBinaryFloat8ToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case *float64: + return scanPlanTextAnyToFloat64{} + case Float64Scanner: + return scanPlanTextAnyToFloat64Scanner{} + case Int64Scanner: + return scanPlanTextAnyToInt64Scanner{} + } + } + + return nil +} + +type scanPlanBinaryFloat8ToFloat64 struct{} + +func (scanPlanBinaryFloat8ToFloat64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for float8: %v", len(src)) + } + + n := int64(binary.BigEndian.Uint64(src)) + f := (dst).(*float64) + *f = math.Float64frombits(uint64(n)) + + return nil +} + +type scanPlanBinaryFloat8ToFloat64Scanner struct{} + +func (scanPlanBinaryFloat8ToFloat64Scanner) Scan(src []byte, dst any) error { + s := (dst).(Float64Scanner) + + if src == nil { + return s.ScanFloat64(Float8{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for float8: %v", len(src)) + } + + n := int64(binary.BigEndian.Uint64(src)) + return s.ScanFloat64(Float8{Float64: math.Float64frombits(uint64(n)), Valid: true}) +} + +type scanPlanBinaryFloat8ToInt64Scanner struct{} + +func (scanPlanBinaryFloat8ToInt64Scanner) Scan(src []byte, dst any) error { + s := (dst).(Int64Scanner) + + if src == nil { + return s.ScanInt64(Int8{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for float8: %v", len(src)) + } + + ui64 := int64(binary.BigEndian.Uint64(src)) + f64 := math.Float64frombits(uint64(ui64)) + i64 := int64(f64) + if f64 != float64(i64) { + return fmt.Errorf("cannot losslessly convert %v to int64", f64) + } + + return s.ScanInt64(Int8{Int64: i64, Valid: true}) +} + +type scanPlanBinaryFloat8ToTextScanner struct{} + +func (scanPlanBinaryFloat8ToTextScanner) Scan(src []byte, dst any) error { + s := (dst).(TextScanner) + + if src == nil { + return s.ScanText(Text{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for float8: %v", len(src)) + } + + ui64 := int64(binary.BigEndian.Uint64(src)) + f64 := math.Float64frombits(uint64(ui64)) + + return s.ScanText(Text{String: strconv.FormatFloat(f64, 'f', -1, 64), Valid: true}) +} + +type scanPlanTextAnyToFloat64 struct{} + +func (scanPlanTextAnyToFloat64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + n, err := strconv.ParseFloat(string(src), 64) + if err != nil { + return err + } + + f := (dst).(*float64) + *f = n + + return nil +} + +type scanPlanTextAnyToFloat64Scanner struct{} + +func (scanPlanTextAnyToFloat64Scanner) Scan(src []byte, dst any) error { + s := (dst).(Float64Scanner) + + if src == nil { + return s.ScanFloat64(Float8{}) + } + + n, err := strconv.ParseFloat(string(src), 64) + if err != nil { + return err + } + + return s.ScanFloat64(Float8{Float64: n, Valid: true}) +} + +func (c Float8Codec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return c.DecodeValue(m, oid, format, src) +} + +func (c Float8Codec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var n float64 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/hstore.go b/vendor/github.com/jackc/pgx/v5/pgtype/hstore.go new file mode 100644 index 00000000..2f34f4c9 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/hstore.go @@ -0,0 +1,486 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "errors" + "fmt" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type HstoreScanner interface { + ScanHstore(v Hstore) error +} + +type HstoreValuer interface { + HstoreValue() (Hstore, error) +} + +// Hstore represents an hstore column that can be null or have null values +// associated with its keys. +type Hstore map[string]*string + +func (h *Hstore) ScanHstore(v Hstore) error { + *h = v + return nil +} + +func (h Hstore) HstoreValue() (Hstore, error) { + return h, nil +} + +// Scan implements the database/sql Scanner interface. +func (h *Hstore) Scan(src any) error { + if src == nil { + *h = nil + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToHstoreScanner{}.scanString(src, h) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (h Hstore) Value() (driver.Value, error) { + if h == nil { + return nil, nil + } + + buf, err := HstoreCodec{}.PlanEncode(nil, 0, TextFormatCode, h).Encode(h, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type HstoreCodec struct{} + +func (HstoreCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (HstoreCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (HstoreCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(HstoreValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanHstoreCodecBinary{} + case TextFormatCode: + return encodePlanHstoreCodecText{} + } + + return nil +} + +type encodePlanHstoreCodecBinary struct{} + +func (encodePlanHstoreCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + hstore, err := value.(HstoreValuer).HstoreValue() + if err != nil { + return nil, err + } + + if hstore == nil { + return nil, nil + } + + buf = pgio.AppendInt32(buf, int32(len(hstore))) + + for k, v := range hstore { + buf = pgio.AppendInt32(buf, int32(len(k))) + buf = append(buf, k...) + + if v == nil { + buf = pgio.AppendInt32(buf, -1) + } else { + buf = pgio.AppendInt32(buf, int32(len(*v))) + buf = append(buf, (*v)...) + } + } + + return buf, nil +} + +type encodePlanHstoreCodecText struct{} + +func (encodePlanHstoreCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + hstore, err := value.(HstoreValuer).HstoreValue() + if err != nil { + return nil, err + } + + if len(hstore) == 0 { + // distinguish between empty and nil: Not strictly required by Postgres, since its protocol + // explicitly marks NULL column values separately. However, the Binary codec does this, and + // this means we can "round trip" Encode and Scan without data loss. + // nil: []byte(nil); empty: []byte{} + if hstore == nil { + return nil, nil + } + return []byte{}, nil + } + + firstPair := true + + for k, v := range hstore { + if firstPair { + firstPair = false + } else { + buf = append(buf, ',', ' ') + } + + // unconditionally quote hstore keys/values like Postgres does + // this avoids a Mac OS X Postgres hstore parsing bug: + // https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com + buf = append(buf, '"') + buf = append(buf, quoteArrayReplacer.Replace(k)...) + buf = append(buf, '"') + buf = append(buf, "=>"...) + + if v == nil { + buf = append(buf, "NULL"...) + } else { + buf = append(buf, '"') + buf = append(buf, quoteArrayReplacer.Replace(*v)...) + buf = append(buf, '"') + } + } + + return buf, nil +} + +func (HstoreCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case HstoreScanner: + return scanPlanBinaryHstoreToHstoreScanner{} + } + case TextFormatCode: + switch target.(type) { + case HstoreScanner: + return scanPlanTextAnyToHstoreScanner{} + } + } + + return nil +} + +type scanPlanBinaryHstoreToHstoreScanner struct{} + +func (scanPlanBinaryHstoreToHstoreScanner) Scan(src []byte, dst any) error { + scanner := (dst).(HstoreScanner) + + if src == nil { + return scanner.ScanHstore(Hstore(nil)) + } + + rp := 0 + + const uint32Len = 4 + if len(src[rp:]) < uint32Len { + return fmt.Errorf("hstore incomplete %v", src) + } + pairCount := int(int32(binary.BigEndian.Uint32(src[rp:]))) + rp += uint32Len + + hstore := make(Hstore, pairCount) + // one allocation for all *string, rather than one per string, just like text parsing + valueStrings := make([]string, pairCount) + + for i := 0; i < pairCount; i++ { + if len(src[rp:]) < uint32Len { + return fmt.Errorf("hstore incomplete %v", src) + } + keyLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) + rp += uint32Len + + if len(src[rp:]) < keyLen { + return fmt.Errorf("hstore incomplete %v", src) + } + key := string(src[rp : rp+keyLen]) + rp += keyLen + + if len(src[rp:]) < uint32Len { + return fmt.Errorf("hstore incomplete %v", src) + } + valueLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) + rp += 4 + + if valueLen >= 0 { + valueStrings[i] = string(src[rp : rp+valueLen]) + rp += valueLen + + hstore[key] = &valueStrings[i] + } else { + hstore[key] = nil + } + } + + return scanner.ScanHstore(hstore) +} + +type scanPlanTextAnyToHstoreScanner struct{} + +func (s scanPlanTextAnyToHstoreScanner) Scan(src []byte, dst any) error { + scanner := (dst).(HstoreScanner) + + if src == nil { + return scanner.ScanHstore(Hstore(nil)) + } + return s.scanString(string(src), scanner) +} + +// scanString does not return nil hstore values because string cannot be nil. +func (scanPlanTextAnyToHstoreScanner) scanString(src string, scanner HstoreScanner) error { + hstore, err := parseHstore(src) + if err != nil { + return err + } + return scanner.ScanHstore(hstore) +} + +func (c HstoreCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c HstoreCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var hstore Hstore + err := codecScan(c, m, oid, format, src, &hstore) + if err != nil { + return nil, err + } + return hstore, nil +} + +type hstoreParser struct { + str string + pos int + nextBackslash int +} + +func newHSP(in string) *hstoreParser { + return &hstoreParser{ + pos: 0, + str: in, + nextBackslash: strings.IndexByte(in, '\\'), + } +} + +func (p *hstoreParser) atEnd() bool { + return p.pos >= len(p.str) +} + +// consume returns the next byte of the string, or end if the string is done. +func (p *hstoreParser) consume() (b byte, end bool) { + if p.pos >= len(p.str) { + return 0, true + } + b = p.str[p.pos] + p.pos++ + return b, false +} + +func unexpectedByteErr(actualB byte, expectedB byte) error { + return fmt.Errorf("expected '%c' ('%#v'); found '%c' ('%#v')", expectedB, expectedB, actualB, actualB) +} + +// consumeExpectedByte consumes expectedB from the string, or returns an error. +func (p *hstoreParser) consumeExpectedByte(expectedB byte) error { + nextB, end := p.consume() + if end { + return fmt.Errorf("expected '%c' ('%#v'); found end", expectedB, expectedB) + } + if nextB != expectedB { + return unexpectedByteErr(nextB, expectedB) + } + return nil +} + +// consumeExpected2 consumes two expected bytes or returns an error. +// This was a bit faster than using a string argument (better inlining? Not sure). +func (p *hstoreParser) consumeExpected2(one byte, two byte) error { + if p.pos+2 > len(p.str) { + return errors.New("unexpected end of string") + } + if p.str[p.pos] != one { + return unexpectedByteErr(p.str[p.pos], one) + } + if p.str[p.pos+1] != two { + return unexpectedByteErr(p.str[p.pos+1], two) + } + p.pos += 2 + return nil +} + +var errEOSInQuoted = errors.New(`found end before closing double-quote ('"')`) + +// consumeDoubleQuoted consumes a double-quoted string from p. The double quote must have been +// parsed already. This copies the string from the backing string so it can be garbage collected. +func (p *hstoreParser) consumeDoubleQuoted() (string, error) { + // fast path: assume most keys/values do not contain escapes + nextDoubleQuote := strings.IndexByte(p.str[p.pos:], '"') + if nextDoubleQuote == -1 { + return "", errEOSInQuoted + } + nextDoubleQuote += p.pos + if p.nextBackslash == -1 || p.nextBackslash > nextDoubleQuote { + // clone the string from the source string to ensure it can be garbage collected separately + // TODO: use strings.Clone on Go 1.20; this could get optimized away + s := strings.Clone(p.str[p.pos:nextDoubleQuote]) + p.pos = nextDoubleQuote + 1 + return s, nil + } + + // slow path: string contains escapes + s, err := p.consumeDoubleQuotedWithEscapes(p.nextBackslash) + p.nextBackslash = strings.IndexByte(p.str[p.pos:], '\\') + if p.nextBackslash != -1 { + p.nextBackslash += p.pos + } + return s, err +} + +// consumeDoubleQuotedWithEscapes consumes a double-quoted string containing escapes, starting +// at p.pos, and with the first backslash at firstBackslash. This copies the string so it can be +// garbage collected separately. +func (p *hstoreParser) consumeDoubleQuotedWithEscapes(firstBackslash int) (string, error) { + // copy the prefix that does not contain backslashes + var builder strings.Builder + builder.WriteString(p.str[p.pos:firstBackslash]) + + // skip to the backslash + p.pos = firstBackslash + + // copy bytes until the end, unescaping backslashes + for { + nextB, end := p.consume() + if end { + return "", errEOSInQuoted + } else if nextB == '"' { + break + } else if nextB == '\\' { + // escape: skip the backslash and copy the char + nextB, end = p.consume() + if end { + return "", errEOSInQuoted + } + if !(nextB == '\\' || nextB == '"') { + return "", fmt.Errorf("unexpected escape in quoted string: found '%#v'", nextB) + } + builder.WriteByte(nextB) + } else { + // normal byte: copy it + builder.WriteByte(nextB) + } + } + return builder.String(), nil +} + +// consumePairSeparator consumes the Hstore pair separator ", " or returns an error. +func (p *hstoreParser) consumePairSeparator() error { + return p.consumeExpected2(',', ' ') +} + +// consumeKVSeparator consumes the Hstore key/value separator "=>" or returns an error. +func (p *hstoreParser) consumeKVSeparator() error { + return p.consumeExpected2('=', '>') +} + +// consumeDoubleQuotedOrNull consumes the Hstore key/value separator "=>" or returns an error. +func (p *hstoreParser) consumeDoubleQuotedOrNull() (Text, error) { + // peek at the next byte + if p.atEnd() { + return Text{}, errors.New("found end instead of value") + } + next := p.str[p.pos] + if next == 'N' { + // must be the exact string NULL: use consumeExpected2 twice + err := p.consumeExpected2('N', 'U') + if err != nil { + return Text{}, err + } + err = p.consumeExpected2('L', 'L') + if err != nil { + return Text{}, err + } + return Text{String: "", Valid: false}, nil + } else if next != '"' { + return Text{}, unexpectedByteErr(next, '"') + } + + // skip the double quote + p.pos += 1 + s, err := p.consumeDoubleQuoted() + if err != nil { + return Text{}, err + } + return Text{String: s, Valid: true}, nil +} + +func parseHstore(s string) (Hstore, error) { + p := newHSP(s) + + // This is an over-estimate of the number of key/value pairs. Use '>' because I am guessing it + // is less likely to occur in keys/values than '=' or ','. + numPairsEstimate := strings.Count(s, ">") + // makes one allocation of strings for the entire Hstore, rather than one allocation per value. + valueStrings := make([]string, 0, numPairsEstimate) + result := make(Hstore, numPairsEstimate) + first := true + for !p.atEnd() { + if !first { + err := p.consumePairSeparator() + if err != nil { + return nil, err + } + } else { + first = false + } + + err := p.consumeExpectedByte('"') + if err != nil { + return nil, err + } + + key, err := p.consumeDoubleQuoted() + if err != nil { + return nil, err + } + + err = p.consumeKVSeparator() + if err != nil { + return nil, err + } + + value, err := p.consumeDoubleQuotedOrNull() + if err != nil { + return nil, err + } + if value.Valid { + valueStrings = append(valueStrings, value.String) + result[key] = &valueStrings[len(valueStrings)-1] + } else { + result[key] = nil + } + } + + return result, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/inet.go b/vendor/github.com/jackc/pgx/v5/pgtype/inet.go new file mode 100644 index 00000000..6ca10ea0 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/inet.go @@ -0,0 +1,200 @@ +package pgtype + +import ( + "bytes" + "database/sql/driver" + "errors" + "fmt" + "net/netip" +) + +// Network address family is dependent on server socket.h value for AF_INET. +// In practice, all platforms appear to have the same value. See +// src/include/utils/inet.h for more information. +const ( + defaultAFInet = 2 + defaultAFInet6 = 3 +) + +type NetipPrefixScanner interface { + ScanNetipPrefix(v netip.Prefix) error +} + +type NetipPrefixValuer interface { + NetipPrefixValue() (netip.Prefix, error) +} + +// InetCodec handles both inet and cidr PostgreSQL types. The preferred Go types are netip.Prefix and netip.Addr. If +// IsValid() is false then they are treated as SQL NULL. +type InetCodec struct{} + +func (InetCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (InetCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (InetCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(NetipPrefixValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanInetCodecBinary{} + case TextFormatCode: + return encodePlanInetCodecText{} + } + + return nil +} + +type encodePlanInetCodecBinary struct{} + +func (encodePlanInetCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + prefix, err := value.(NetipPrefixValuer).NetipPrefixValue() + if err != nil { + return nil, err + } + + if !prefix.IsValid() { + return nil, nil + } + + var family byte + if prefix.Addr().Is4() { + family = defaultAFInet + } else { + family = defaultAFInet6 + } + + buf = append(buf, family) + + ones := prefix.Bits() + buf = append(buf, byte(ones)) + + // is_cidr is ignored on server + buf = append(buf, 0) + + if family == defaultAFInet { + buf = append(buf, byte(4)) + b := prefix.Addr().As4() + buf = append(buf, b[:]...) + } else { + buf = append(buf, byte(16)) + b := prefix.Addr().As16() + buf = append(buf, b[:]...) + } + + return buf, nil +} + +type encodePlanInetCodecText struct{} + +func (encodePlanInetCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + prefix, err := value.(NetipPrefixValuer).NetipPrefixValue() + if err != nil { + return nil, err + } + + if !prefix.IsValid() { + return nil, nil + } + + return append(buf, prefix.String()...), nil +} + +func (InetCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case NetipPrefixScanner: + return scanPlanBinaryInetToNetipPrefixScanner{} + } + case TextFormatCode: + switch target.(type) { + case NetipPrefixScanner: + return scanPlanTextAnyToNetipPrefixScanner{} + } + } + + return nil +} + +func (c InetCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c InetCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var prefix netip.Prefix + err := codecScan(c, m, oid, format, src, (*netipPrefixWrapper)(&prefix)) + if err != nil { + return nil, err + } + + if !prefix.IsValid() { + return nil, nil + } + + return prefix, nil +} + +type scanPlanBinaryInetToNetipPrefixScanner struct{} + +func (scanPlanBinaryInetToNetipPrefixScanner) Scan(src []byte, dst any) error { + scanner := (dst).(NetipPrefixScanner) + + if src == nil { + return scanner.ScanNetipPrefix(netip.Prefix{}) + } + + if len(src) != 8 && len(src) != 20 { + return fmt.Errorf("Received an invalid size for an inet: %d", len(src)) + } + + // ignore family + bits := src[1] + // ignore is_cidr + // ignore addressLength - implicit in length of message + + addr, ok := netip.AddrFromSlice(src[4:]) + if !ok { + return errors.New("netip.AddrFromSlice failed") + } + + return scanner.ScanNetipPrefix(netip.PrefixFrom(addr, int(bits))) +} + +type scanPlanTextAnyToNetipPrefixScanner struct{} + +func (scanPlanTextAnyToNetipPrefixScanner) Scan(src []byte, dst any) error { + scanner := (dst).(NetipPrefixScanner) + + if src == nil { + return scanner.ScanNetipPrefix(netip.Prefix{}) + } + + var prefix netip.Prefix + if bytes.IndexByte(src, '/') == -1 { + addr, err := netip.ParseAddr(string(src)) + if err != nil { + return err + } + prefix = netip.PrefixFrom(addr, addr.BitLen()) + } else { + var err error + prefix, err = netip.ParsePrefix(string(src)) + if err != nil { + return err + } + } + + return scanner.ScanNetipPrefix(prefix) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/int.go b/vendor/github.com/jackc/pgx/v5/pgtype/int.go new file mode 100644 index 00000000..90a20a26 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/int.go @@ -0,0 +1,1980 @@ +// Do not edit. Generated from pgtype/int.go.erb +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "encoding/json" + "fmt" + "math" + "strconv" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type Int64Scanner interface { + ScanInt64(Int8) error +} + +type Int64Valuer interface { + Int64Value() (Int8, error) +} + +type Int2 struct { + Int16 int16 + Valid bool +} + +// ScanInt64 implements the Int64Scanner interface. +func (dst *Int2) ScanInt64(n Int8) error { + if !n.Valid { + *dst = Int2{} + return nil + } + + if n.Int64 < math.MinInt16 { + return fmt.Errorf("%d is less than minimum value for Int2", n.Int64) + } + if n.Int64 > math.MaxInt16 { + return fmt.Errorf("%d is greater than maximum value for Int2", n.Int64) + } + *dst = Int2{Int16: int16(n.Int64), Valid: true} + + return nil +} + +func (n Int2) Int64Value() (Int8, error) { + return Int8{Int64: int64(n.Int16), Valid: n.Valid}, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Int2) Scan(src any) error { + if src == nil { + *dst = Int2{} + return nil + } + + var n int64 + + switch src := src.(type) { + case int64: + n = src + case string: + var err error + n, err = strconv.ParseInt(src, 10, 16) + if err != nil { + return err + } + case []byte: + var err error + n, err = strconv.ParseInt(string(src), 10, 16) + if err != nil { + return err + } + default: + return fmt.Errorf("cannot scan %T", src) + } + + if n < math.MinInt16 { + return fmt.Errorf("%d is greater than maximum value for Int2", n) + } + if n > math.MaxInt16 { + return fmt.Errorf("%d is greater than maximum value for Int2", n) + } + *dst = Int2{Int16: int16(n), Valid: true} + + return nil +} + +// Value implements the database/sql/driver Valuer interface. +func (src Int2) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + return int64(src.Int16), nil +} + +func (src Int2) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + return []byte(strconv.FormatInt(int64(src.Int16), 10)), nil +} + +func (dst *Int2) UnmarshalJSON(b []byte) error { + var n *int16 + err := json.Unmarshal(b, &n) + if err != nil { + return err + } + + if n == nil { + *dst = Int2{} + } else { + *dst = Int2{Int16: *n, Valid: true} + } + + return nil +} + +type Int2Codec struct{} + +func (Int2Codec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (Int2Codec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (Int2Codec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case int16: + return encodePlanInt2CodecBinaryInt16{} + case Int64Valuer: + return encodePlanInt2CodecBinaryInt64Valuer{} + } + case TextFormatCode: + switch value.(type) { + case int16: + return encodePlanInt2CodecTextInt16{} + case Int64Valuer: + return encodePlanInt2CodecTextInt64Valuer{} + } + } + + return nil +} + +type encodePlanInt2CodecBinaryInt16 struct{} + +func (encodePlanInt2CodecBinaryInt16) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(int16) + return pgio.AppendInt16(buf, int16(n)), nil +} + +type encodePlanInt2CodecTextInt16 struct{} + +func (encodePlanInt2CodecTextInt16) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(int16) + return append(buf, strconv.FormatInt(int64(n), 10)...), nil +} + +type encodePlanInt2CodecBinaryInt64Valuer struct{} + +func (encodePlanInt2CodecBinaryInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if n.Int64 > math.MaxInt16 { + return nil, fmt.Errorf("%d is greater than maximum value for int2", n.Int64) + } + if n.Int64 < math.MinInt16 { + return nil, fmt.Errorf("%d is less than minimum value for int2", n.Int64) + } + + return pgio.AppendInt16(buf, int16(n.Int64)), nil +} + +type encodePlanInt2CodecTextInt64Valuer struct{} + +func (encodePlanInt2CodecTextInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if n.Int64 > math.MaxInt16 { + return nil, fmt.Errorf("%d is greater than maximum value for int2", n.Int64) + } + if n.Int64 < math.MinInt16 { + return nil, fmt.Errorf("%d is less than minimum value for int2", n.Int64) + } + + return append(buf, strconv.FormatInt(n.Int64, 10)...), nil +} + +func (Int2Codec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *int8: + return scanPlanBinaryInt2ToInt8{} + case *int16: + return scanPlanBinaryInt2ToInt16{} + case *int32: + return scanPlanBinaryInt2ToInt32{} + case *int64: + return scanPlanBinaryInt2ToInt64{} + case *int: + return scanPlanBinaryInt2ToInt{} + case *uint8: + return scanPlanBinaryInt2ToUint8{} + case *uint16: + return scanPlanBinaryInt2ToUint16{} + case *uint32: + return scanPlanBinaryInt2ToUint32{} + case *uint64: + return scanPlanBinaryInt2ToUint64{} + case *uint: + return scanPlanBinaryInt2ToUint{} + case Int64Scanner: + return scanPlanBinaryInt2ToInt64Scanner{} + case TextScanner: + return scanPlanBinaryInt2ToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case *int8: + return scanPlanTextAnyToInt8{} + case *int16: + return scanPlanTextAnyToInt16{} + case *int32: + return scanPlanTextAnyToInt32{} + case *int64: + return scanPlanTextAnyToInt64{} + case *int: + return scanPlanTextAnyToInt{} + case *uint8: + return scanPlanTextAnyToUint8{} + case *uint16: + return scanPlanTextAnyToUint16{} + case *uint32: + return scanPlanTextAnyToUint32{} + case *uint64: + return scanPlanTextAnyToUint64{} + case *uint: + return scanPlanTextAnyToUint{} + case Int64Scanner: + return scanPlanTextAnyToInt64Scanner{} + } + } + + return nil +} + +func (c Int2Codec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var n int64 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +func (c Int2Codec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var n int16 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +type scanPlanBinaryInt2ToInt8 struct{} + +func (scanPlanBinaryInt2ToInt8) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for int2: %v", len(src)) + } + + p, ok := (dst).(*int8) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int16(binary.BigEndian.Uint16(src)) + if n < math.MinInt8 { + return fmt.Errorf("%d is less than minimum value for int8", n) + } else if n > math.MaxInt8 { + return fmt.Errorf("%d is greater than maximum value for int8", n) + } + + *p = int8(n) + + return nil +} + +type scanPlanBinaryInt2ToUint8 struct{} + +func (scanPlanBinaryInt2ToUint8) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for uint2: %v", len(src)) + } + + p, ok := (dst).(*uint8) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int16(binary.BigEndian.Uint16(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint8", n) + } + + if n > math.MaxUint8 { + return fmt.Errorf("%d is greater than maximum value for uint8", n) + } + + *p = uint8(n) + + return nil +} + +type scanPlanBinaryInt2ToInt16 struct{} + +func (scanPlanBinaryInt2ToInt16) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for int2: %v", len(src)) + } + + p, ok := (dst).(*int16) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = int16(binary.BigEndian.Uint16(src)) + + return nil +} + +type scanPlanBinaryInt2ToUint16 struct{} + +func (scanPlanBinaryInt2ToUint16) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for uint2: %v", len(src)) + } + + p, ok := (dst).(*uint16) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int16(binary.BigEndian.Uint16(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint16", n) + } + + *p = uint16(n) + + return nil +} + +type scanPlanBinaryInt2ToInt32 struct{} + +func (scanPlanBinaryInt2ToInt32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for int2: %v", len(src)) + } + + p, ok := (dst).(*int32) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = int32(int16(binary.BigEndian.Uint16(src))) + + return nil +} + +type scanPlanBinaryInt2ToUint32 struct{} + +func (scanPlanBinaryInt2ToUint32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for uint2: %v", len(src)) + } + + p, ok := (dst).(*uint32) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int16(binary.BigEndian.Uint16(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint32", n) + } + + *p = uint32(n) + + return nil +} + +type scanPlanBinaryInt2ToInt64 struct{} + +func (scanPlanBinaryInt2ToInt64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for int2: %v", len(src)) + } + + p, ok := (dst).(*int64) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = int64(int16(binary.BigEndian.Uint16(src))) + + return nil +} + +type scanPlanBinaryInt2ToUint64 struct{} + +func (scanPlanBinaryInt2ToUint64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for uint2: %v", len(src)) + } + + p, ok := (dst).(*uint64) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int16(binary.BigEndian.Uint16(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint64", n) + } + + *p = uint64(n) + + return nil +} + +type scanPlanBinaryInt2ToInt struct{} + +func (scanPlanBinaryInt2ToInt) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for int2: %v", len(src)) + } + + p, ok := (dst).(*int) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = int(int16(binary.BigEndian.Uint16(src))) + + return nil +} + +type scanPlanBinaryInt2ToUint struct{} + +func (scanPlanBinaryInt2ToUint) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for uint2: %v", len(src)) + } + + p, ok := (dst).(*uint) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(int16(binary.BigEndian.Uint16(src))) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint", n) + } + + *p = uint(n) + + return nil +} + +type scanPlanBinaryInt2ToInt64Scanner struct{} + +func (scanPlanBinaryInt2ToInt64Scanner) Scan(src []byte, dst any) error { + s, ok := (dst).(Int64Scanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanInt64(Int8{}) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for int2: %v", len(src)) + } + + n := int64(int16(binary.BigEndian.Uint16(src))) + + return s.ScanInt64(Int8{Int64: n, Valid: true}) +} + +type scanPlanBinaryInt2ToTextScanner struct{} + +func (scanPlanBinaryInt2ToTextScanner) Scan(src []byte, dst any) error { + s, ok := (dst).(TextScanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanText(Text{}) + } + + if len(src) != 2 { + return fmt.Errorf("invalid length for int2: %v", len(src)) + } + + n := int64(int16(binary.BigEndian.Uint16(src))) + + return s.ScanText(Text{String: strconv.FormatInt(n, 10), Valid: true}) +} + +type Int4 struct { + Int32 int32 + Valid bool +} + +// ScanInt64 implements the Int64Scanner interface. +func (dst *Int4) ScanInt64(n Int8) error { + if !n.Valid { + *dst = Int4{} + return nil + } + + if n.Int64 < math.MinInt32 { + return fmt.Errorf("%d is less than minimum value for Int4", n.Int64) + } + if n.Int64 > math.MaxInt32 { + return fmt.Errorf("%d is greater than maximum value for Int4", n.Int64) + } + *dst = Int4{Int32: int32(n.Int64), Valid: true} + + return nil +} + +func (n Int4) Int64Value() (Int8, error) { + return Int8{Int64: int64(n.Int32), Valid: n.Valid}, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Int4) Scan(src any) error { + if src == nil { + *dst = Int4{} + return nil + } + + var n int64 + + switch src := src.(type) { + case int64: + n = src + case string: + var err error + n, err = strconv.ParseInt(src, 10, 32) + if err != nil { + return err + } + case []byte: + var err error + n, err = strconv.ParseInt(string(src), 10, 32) + if err != nil { + return err + } + default: + return fmt.Errorf("cannot scan %T", src) + } + + if n < math.MinInt32 { + return fmt.Errorf("%d is greater than maximum value for Int4", n) + } + if n > math.MaxInt32 { + return fmt.Errorf("%d is greater than maximum value for Int4", n) + } + *dst = Int4{Int32: int32(n), Valid: true} + + return nil +} + +// Value implements the database/sql/driver Valuer interface. +func (src Int4) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + return int64(src.Int32), nil +} + +func (src Int4) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + return []byte(strconv.FormatInt(int64(src.Int32), 10)), nil +} + +func (dst *Int4) UnmarshalJSON(b []byte) error { + var n *int32 + err := json.Unmarshal(b, &n) + if err != nil { + return err + } + + if n == nil { + *dst = Int4{} + } else { + *dst = Int4{Int32: *n, Valid: true} + } + + return nil +} + +type Int4Codec struct{} + +func (Int4Codec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (Int4Codec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (Int4Codec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case int32: + return encodePlanInt4CodecBinaryInt32{} + case Int64Valuer: + return encodePlanInt4CodecBinaryInt64Valuer{} + } + case TextFormatCode: + switch value.(type) { + case int32: + return encodePlanInt4CodecTextInt32{} + case Int64Valuer: + return encodePlanInt4CodecTextInt64Valuer{} + } + } + + return nil +} + +type encodePlanInt4CodecBinaryInt32 struct{} + +func (encodePlanInt4CodecBinaryInt32) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(int32) + return pgio.AppendInt32(buf, int32(n)), nil +} + +type encodePlanInt4CodecTextInt32 struct{} + +func (encodePlanInt4CodecTextInt32) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(int32) + return append(buf, strconv.FormatInt(int64(n), 10)...), nil +} + +type encodePlanInt4CodecBinaryInt64Valuer struct{} + +func (encodePlanInt4CodecBinaryInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if n.Int64 > math.MaxInt32 { + return nil, fmt.Errorf("%d is greater than maximum value for int4", n.Int64) + } + if n.Int64 < math.MinInt32 { + return nil, fmt.Errorf("%d is less than minimum value for int4", n.Int64) + } + + return pgio.AppendInt32(buf, int32(n.Int64)), nil +} + +type encodePlanInt4CodecTextInt64Valuer struct{} + +func (encodePlanInt4CodecTextInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if n.Int64 > math.MaxInt32 { + return nil, fmt.Errorf("%d is greater than maximum value for int4", n.Int64) + } + if n.Int64 < math.MinInt32 { + return nil, fmt.Errorf("%d is less than minimum value for int4", n.Int64) + } + + return append(buf, strconv.FormatInt(n.Int64, 10)...), nil +} + +func (Int4Codec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *int8: + return scanPlanBinaryInt4ToInt8{} + case *int16: + return scanPlanBinaryInt4ToInt16{} + case *int32: + return scanPlanBinaryInt4ToInt32{} + case *int64: + return scanPlanBinaryInt4ToInt64{} + case *int: + return scanPlanBinaryInt4ToInt{} + case *uint8: + return scanPlanBinaryInt4ToUint8{} + case *uint16: + return scanPlanBinaryInt4ToUint16{} + case *uint32: + return scanPlanBinaryInt4ToUint32{} + case *uint64: + return scanPlanBinaryInt4ToUint64{} + case *uint: + return scanPlanBinaryInt4ToUint{} + case Int64Scanner: + return scanPlanBinaryInt4ToInt64Scanner{} + case TextScanner: + return scanPlanBinaryInt4ToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case *int8: + return scanPlanTextAnyToInt8{} + case *int16: + return scanPlanTextAnyToInt16{} + case *int32: + return scanPlanTextAnyToInt32{} + case *int64: + return scanPlanTextAnyToInt64{} + case *int: + return scanPlanTextAnyToInt{} + case *uint8: + return scanPlanTextAnyToUint8{} + case *uint16: + return scanPlanTextAnyToUint16{} + case *uint32: + return scanPlanTextAnyToUint32{} + case *uint64: + return scanPlanTextAnyToUint64{} + case *uint: + return scanPlanTextAnyToUint{} + case Int64Scanner: + return scanPlanTextAnyToInt64Scanner{} + } + } + + return nil +} + +func (c Int4Codec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var n int64 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +func (c Int4Codec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var n int32 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +type scanPlanBinaryInt4ToInt8 struct{} + +func (scanPlanBinaryInt4ToInt8) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for int4: %v", len(src)) + } + + p, ok := (dst).(*int8) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int32(binary.BigEndian.Uint32(src)) + if n < math.MinInt8 { + return fmt.Errorf("%d is less than minimum value for int8", n) + } else if n > math.MaxInt8 { + return fmt.Errorf("%d is greater than maximum value for int8", n) + } + + *p = int8(n) + + return nil +} + +type scanPlanBinaryInt4ToUint8 struct{} + +func (scanPlanBinaryInt4ToUint8) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for uint4: %v", len(src)) + } + + p, ok := (dst).(*uint8) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int32(binary.BigEndian.Uint32(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint8", n) + } + + if n > math.MaxUint8 { + return fmt.Errorf("%d is greater than maximum value for uint8", n) + } + + *p = uint8(n) + + return nil +} + +type scanPlanBinaryInt4ToInt16 struct{} + +func (scanPlanBinaryInt4ToInt16) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for int4: %v", len(src)) + } + + p, ok := (dst).(*int16) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int32(binary.BigEndian.Uint32(src)) + if n < math.MinInt16 { + return fmt.Errorf("%d is less than minimum value for int16", n) + } else if n > math.MaxInt16 { + return fmt.Errorf("%d is greater than maximum value for int16", n) + } + + *p = int16(n) + + return nil +} + +type scanPlanBinaryInt4ToUint16 struct{} + +func (scanPlanBinaryInt4ToUint16) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for uint4: %v", len(src)) + } + + p, ok := (dst).(*uint16) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int32(binary.BigEndian.Uint32(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint16", n) + } + + if n > math.MaxUint16 { + return fmt.Errorf("%d is greater than maximum value for uint16", n) + } + + *p = uint16(n) + + return nil +} + +type scanPlanBinaryInt4ToInt32 struct{} + +func (scanPlanBinaryInt4ToInt32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for int4: %v", len(src)) + } + + p, ok := (dst).(*int32) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = int32(binary.BigEndian.Uint32(src)) + + return nil +} + +type scanPlanBinaryInt4ToUint32 struct{} + +func (scanPlanBinaryInt4ToUint32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for uint4: %v", len(src)) + } + + p, ok := (dst).(*uint32) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int32(binary.BigEndian.Uint32(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint32", n) + } + + *p = uint32(n) + + return nil +} + +type scanPlanBinaryInt4ToInt64 struct{} + +func (scanPlanBinaryInt4ToInt64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for int4: %v", len(src)) + } + + p, ok := (dst).(*int64) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = int64(int32(binary.BigEndian.Uint32(src))) + + return nil +} + +type scanPlanBinaryInt4ToUint64 struct{} + +func (scanPlanBinaryInt4ToUint64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for uint4: %v", len(src)) + } + + p, ok := (dst).(*uint64) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int32(binary.BigEndian.Uint32(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint64", n) + } + + *p = uint64(n) + + return nil +} + +type scanPlanBinaryInt4ToInt struct{} + +func (scanPlanBinaryInt4ToInt) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for int4: %v", len(src)) + } + + p, ok := (dst).(*int) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = int(int32(binary.BigEndian.Uint32(src))) + + return nil +} + +type scanPlanBinaryInt4ToUint struct{} + +func (scanPlanBinaryInt4ToUint) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for uint4: %v", len(src)) + } + + p, ok := (dst).(*uint) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(int32(binary.BigEndian.Uint32(src))) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint", n) + } + + *p = uint(n) + + return nil +} + +type scanPlanBinaryInt4ToInt64Scanner struct{} + +func (scanPlanBinaryInt4ToInt64Scanner) Scan(src []byte, dst any) error { + s, ok := (dst).(Int64Scanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanInt64(Int8{}) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for int4: %v", len(src)) + } + + n := int64(int32(binary.BigEndian.Uint32(src))) + + return s.ScanInt64(Int8{Int64: n, Valid: true}) +} + +type scanPlanBinaryInt4ToTextScanner struct{} + +func (scanPlanBinaryInt4ToTextScanner) Scan(src []byte, dst any) error { + s, ok := (dst).(TextScanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanText(Text{}) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for int4: %v", len(src)) + } + + n := int64(int32(binary.BigEndian.Uint32(src))) + + return s.ScanText(Text{String: strconv.FormatInt(n, 10), Valid: true}) +} + +type Int8 struct { + Int64 int64 + Valid bool +} + +// ScanInt64 implements the Int64Scanner interface. +func (dst *Int8) ScanInt64(n Int8) error { + if !n.Valid { + *dst = Int8{} + return nil + } + + if n.Int64 < math.MinInt64 { + return fmt.Errorf("%d is less than minimum value for Int8", n.Int64) + } + if n.Int64 > math.MaxInt64 { + return fmt.Errorf("%d is greater than maximum value for Int8", n.Int64) + } + *dst = Int8{Int64: int64(n.Int64), Valid: true} + + return nil +} + +func (n Int8) Int64Value() (Int8, error) { + return Int8{Int64: int64(n.Int64), Valid: n.Valid}, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Int8) Scan(src any) error { + if src == nil { + *dst = Int8{} + return nil + } + + var n int64 + + switch src := src.(type) { + case int64: + n = src + case string: + var err error + n, err = strconv.ParseInt(src, 10, 64) + if err != nil { + return err + } + case []byte: + var err error + n, err = strconv.ParseInt(string(src), 10, 64) + if err != nil { + return err + } + default: + return fmt.Errorf("cannot scan %T", src) + } + + if n < math.MinInt64 { + return fmt.Errorf("%d is greater than maximum value for Int8", n) + } + if n > math.MaxInt64 { + return fmt.Errorf("%d is greater than maximum value for Int8", n) + } + *dst = Int8{Int64: int64(n), Valid: true} + + return nil +} + +// Value implements the database/sql/driver Valuer interface. +func (src Int8) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + return int64(src.Int64), nil +} + +func (src Int8) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + return []byte(strconv.FormatInt(int64(src.Int64), 10)), nil +} + +func (dst *Int8) UnmarshalJSON(b []byte) error { + var n *int64 + err := json.Unmarshal(b, &n) + if err != nil { + return err + } + + if n == nil { + *dst = Int8{} + } else { + *dst = Int8{Int64: *n, Valid: true} + } + + return nil +} + +type Int8Codec struct{} + +func (Int8Codec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (Int8Codec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (Int8Codec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case int64: + return encodePlanInt8CodecBinaryInt64{} + case Int64Valuer: + return encodePlanInt8CodecBinaryInt64Valuer{} + } + case TextFormatCode: + switch value.(type) { + case int64: + return encodePlanInt8CodecTextInt64{} + case Int64Valuer: + return encodePlanInt8CodecTextInt64Valuer{} + } + } + + return nil +} + +type encodePlanInt8CodecBinaryInt64 struct{} + +func (encodePlanInt8CodecBinaryInt64) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(int64) + return pgio.AppendInt64(buf, int64(n)), nil +} + +type encodePlanInt8CodecTextInt64 struct{} + +func (encodePlanInt8CodecTextInt64) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(int64) + return append(buf, strconv.FormatInt(int64(n), 10)...), nil +} + +type encodePlanInt8CodecBinaryInt64Valuer struct{} + +func (encodePlanInt8CodecBinaryInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if n.Int64 > math.MaxInt64 { + return nil, fmt.Errorf("%d is greater than maximum value for int8", n.Int64) + } + if n.Int64 < math.MinInt64 { + return nil, fmt.Errorf("%d is less than minimum value for int8", n.Int64) + } + + return pgio.AppendInt64(buf, int64(n.Int64)), nil +} + +type encodePlanInt8CodecTextInt64Valuer struct{} + +func (encodePlanInt8CodecTextInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if n.Int64 > math.MaxInt64 { + return nil, fmt.Errorf("%d is greater than maximum value for int8", n.Int64) + } + if n.Int64 < math.MinInt64 { + return nil, fmt.Errorf("%d is less than minimum value for int8", n.Int64) + } + + return append(buf, strconv.FormatInt(n.Int64, 10)...), nil +} + +func (Int8Codec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *int8: + return scanPlanBinaryInt8ToInt8{} + case *int16: + return scanPlanBinaryInt8ToInt16{} + case *int32: + return scanPlanBinaryInt8ToInt32{} + case *int64: + return scanPlanBinaryInt8ToInt64{} + case *int: + return scanPlanBinaryInt8ToInt{} + case *uint8: + return scanPlanBinaryInt8ToUint8{} + case *uint16: + return scanPlanBinaryInt8ToUint16{} + case *uint32: + return scanPlanBinaryInt8ToUint32{} + case *uint64: + return scanPlanBinaryInt8ToUint64{} + case *uint: + return scanPlanBinaryInt8ToUint{} + case Int64Scanner: + return scanPlanBinaryInt8ToInt64Scanner{} + case TextScanner: + return scanPlanBinaryInt8ToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case *int8: + return scanPlanTextAnyToInt8{} + case *int16: + return scanPlanTextAnyToInt16{} + case *int32: + return scanPlanTextAnyToInt32{} + case *int64: + return scanPlanTextAnyToInt64{} + case *int: + return scanPlanTextAnyToInt{} + case *uint8: + return scanPlanTextAnyToUint8{} + case *uint16: + return scanPlanTextAnyToUint16{} + case *uint32: + return scanPlanTextAnyToUint32{} + case *uint64: + return scanPlanTextAnyToUint64{} + case *uint: + return scanPlanTextAnyToUint{} + case Int64Scanner: + return scanPlanTextAnyToInt64Scanner{} + } + } + + return nil +} + +func (c Int8Codec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var n int64 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +func (c Int8Codec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var n int64 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +type scanPlanBinaryInt8ToInt8 struct{} + +func (scanPlanBinaryInt8ToInt8) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for int8: %v", len(src)) + } + + p, ok := (dst).(*int8) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(binary.BigEndian.Uint64(src)) + if n < math.MinInt8 { + return fmt.Errorf("%d is less than minimum value for int8", n) + } else if n > math.MaxInt8 { + return fmt.Errorf("%d is greater than maximum value for int8", n) + } + + *p = int8(n) + + return nil +} + +type scanPlanBinaryInt8ToUint8 struct{} + +func (scanPlanBinaryInt8ToUint8) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for uint8: %v", len(src)) + } + + p, ok := (dst).(*uint8) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(binary.BigEndian.Uint64(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint8", n) + } + + if n > math.MaxUint8 { + return fmt.Errorf("%d is greater than maximum value for uint8", n) + } + + *p = uint8(n) + + return nil +} + +type scanPlanBinaryInt8ToInt16 struct{} + +func (scanPlanBinaryInt8ToInt16) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for int8: %v", len(src)) + } + + p, ok := (dst).(*int16) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(binary.BigEndian.Uint64(src)) + if n < math.MinInt16 { + return fmt.Errorf("%d is less than minimum value for int16", n) + } else if n > math.MaxInt16 { + return fmt.Errorf("%d is greater than maximum value for int16", n) + } + + *p = int16(n) + + return nil +} + +type scanPlanBinaryInt8ToUint16 struct{} + +func (scanPlanBinaryInt8ToUint16) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for uint8: %v", len(src)) + } + + p, ok := (dst).(*uint16) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(binary.BigEndian.Uint64(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint16", n) + } + + if n > math.MaxUint16 { + return fmt.Errorf("%d is greater than maximum value for uint16", n) + } + + *p = uint16(n) + + return nil +} + +type scanPlanBinaryInt8ToInt32 struct{} + +func (scanPlanBinaryInt8ToInt32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for int8: %v", len(src)) + } + + p, ok := (dst).(*int32) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(binary.BigEndian.Uint64(src)) + if n < math.MinInt32 { + return fmt.Errorf("%d is less than minimum value for int32", n) + } else if n > math.MaxInt32 { + return fmt.Errorf("%d is greater than maximum value for int32", n) + } + + *p = int32(n) + + return nil +} + +type scanPlanBinaryInt8ToUint32 struct{} + +func (scanPlanBinaryInt8ToUint32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for uint8: %v", len(src)) + } + + p, ok := (dst).(*uint32) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(binary.BigEndian.Uint64(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint32", n) + } + + if n > math.MaxUint32 { + return fmt.Errorf("%d is greater than maximum value for uint32", n) + } + + *p = uint32(n) + + return nil +} + +type scanPlanBinaryInt8ToInt64 struct{} + +func (scanPlanBinaryInt8ToInt64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for int8: %v", len(src)) + } + + p, ok := (dst).(*int64) + if !ok { + return ErrScanTargetTypeChanged + } + + *p = int64(binary.BigEndian.Uint64(src)) + + return nil +} + +type scanPlanBinaryInt8ToUint64 struct{} + +func (scanPlanBinaryInt8ToUint64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for uint8: %v", len(src)) + } + + p, ok := (dst).(*uint64) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(binary.BigEndian.Uint64(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint64", n) + } + + *p = uint64(n) + + return nil +} + +type scanPlanBinaryInt8ToInt struct{} + +func (scanPlanBinaryInt8ToInt) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for int8: %v", len(src)) + } + + p, ok := (dst).(*int) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(binary.BigEndian.Uint64(src)) + if n < math.MinInt { + return fmt.Errorf("%d is less than minimum value for int", n) + } else if n > math.MaxInt { + return fmt.Errorf("%d is greater than maximum value for int", n) + } + + *p = int(n) + + return nil +} + +type scanPlanBinaryInt8ToUint struct{} + +func (scanPlanBinaryInt8ToUint) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for uint8: %v", len(src)) + } + + p, ok := (dst).(*uint) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(int64(binary.BigEndian.Uint64(src))) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint", n) + } + + if uint64(n) > math.MaxUint { + return fmt.Errorf("%d is greater than maximum value for uint", n) + } + + *p = uint(n) + + return nil +} + +type scanPlanBinaryInt8ToInt64Scanner struct{} + +func (scanPlanBinaryInt8ToInt64Scanner) Scan(src []byte, dst any) error { + s, ok := (dst).(Int64Scanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanInt64(Int8{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for int8: %v", len(src)) + } + + n := int64(int64(binary.BigEndian.Uint64(src))) + + return s.ScanInt64(Int8{Int64: n, Valid: true}) +} + +type scanPlanBinaryInt8ToTextScanner struct{} + +func (scanPlanBinaryInt8ToTextScanner) Scan(src []byte, dst any) error { + s, ok := (dst).(TextScanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanText(Text{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for int8: %v", len(src)) + } + + n := int64(int64(binary.BigEndian.Uint64(src))) + + return s.ScanText(Text{String: strconv.FormatInt(n, 10), Valid: true}) +} + +type scanPlanTextAnyToInt8 struct{} + +func (scanPlanTextAnyToInt8) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*int8) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseInt(string(src), 10, 8) + if err != nil { + return err + } + + *p = int8(n) + return nil +} + +type scanPlanTextAnyToUint8 struct{} + +func (scanPlanTextAnyToUint8) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*uint8) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseUint(string(src), 10, 8) + if err != nil { + return err + } + + *p = uint8(n) + return nil +} + +type scanPlanTextAnyToInt16 struct{} + +func (scanPlanTextAnyToInt16) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*int16) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseInt(string(src), 10, 16) + if err != nil { + return err + } + + *p = int16(n) + return nil +} + +type scanPlanTextAnyToUint16 struct{} + +func (scanPlanTextAnyToUint16) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*uint16) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseUint(string(src), 10, 16) + if err != nil { + return err + } + + *p = uint16(n) + return nil +} + +type scanPlanTextAnyToInt32 struct{} + +func (scanPlanTextAnyToInt32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*int32) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseInt(string(src), 10, 32) + if err != nil { + return err + } + + *p = int32(n) + return nil +} + +type scanPlanTextAnyToUint32 struct{} + +func (scanPlanTextAnyToUint32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*uint32) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseUint(string(src), 10, 32) + if err != nil { + return err + } + + *p = uint32(n) + return nil +} + +type scanPlanTextAnyToInt64 struct{} + +func (scanPlanTextAnyToInt64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*int64) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseInt(string(src), 10, 64) + if err != nil { + return err + } + + *p = int64(n) + return nil +} + +type scanPlanTextAnyToUint64 struct{} + +func (scanPlanTextAnyToUint64) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*uint64) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseUint(string(src), 10, 64) + if err != nil { + return err + } + + *p = uint64(n) + return nil +} + +type scanPlanTextAnyToInt struct{} + +func (scanPlanTextAnyToInt) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*int) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseInt(string(src), 10, 0) + if err != nil { + return err + } + + *p = int(n) + return nil +} + +type scanPlanTextAnyToUint struct{} + +func (scanPlanTextAnyToUint) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*uint) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseUint(string(src), 10, 0) + if err != nil { + return err + } + + *p = uint(n) + return nil +} + +type scanPlanTextAnyToInt64Scanner struct{} + +func (scanPlanTextAnyToInt64Scanner) Scan(src []byte, dst any) error { + s, ok := (dst).(Int64Scanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanInt64(Int8{}) + } + + n, err := strconv.ParseInt(string(src), 10, 64) + if err != nil { + return err + } + + err = s.ScanInt64(Int8{Int64: n, Valid: true}) + if err != nil { + return err + } + + return nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/int.go.erb b/vendor/github.com/jackc/pgx/v5/pgtype/int.go.erb new file mode 100644 index 00000000..e0c8b7a3 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/int.go.erb @@ -0,0 +1,548 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "encoding/json" + "fmt" + "math" + "strconv" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type Int64Scanner interface { + ScanInt64(Int8) error +} + +type Int64Valuer interface { + Int64Value() (Int8, error) +} + + +<% [2, 4, 8].each do |pg_byte_size| %> +<% pg_bit_size = pg_byte_size * 8 %> +type Int<%= pg_byte_size %> struct { + Int<%= pg_bit_size %> int<%= pg_bit_size %> + Valid bool +} + +// ScanInt64 implements the Int64Scanner interface. +func (dst *Int<%= pg_byte_size %>) ScanInt64(n Int8) error { + if !n.Valid { + *dst = Int<%= pg_byte_size %>{} + return nil + } + + if n.Int64 < math.MinInt<%= pg_bit_size %> { + return fmt.Errorf("%d is less than minimum value for Int<%= pg_byte_size %>", n.Int64) + } + if n.Int64 > math.MaxInt<%= pg_bit_size %> { + return fmt.Errorf("%d is greater than maximum value for Int<%= pg_byte_size %>", n.Int64) + } + *dst = Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: int<%= pg_bit_size %>(n.Int64), Valid: true} + + return nil +} + +func (n Int<%= pg_byte_size %>) Int64Value() (Int8, error) { + return Int8{Int64: int64(n.Int<%= pg_bit_size %>), Valid: n.Valid}, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Int<%= pg_byte_size %>) Scan(src any) error { + if src == nil { + *dst = Int<%= pg_byte_size %>{} + return nil + } + + var n int64 + + switch src := src.(type) { + case int64: + n = src + case string: + var err error + n, err = strconv.ParseInt(src, 10, <%= pg_bit_size %>) + if err != nil { + return err + } + case []byte: + var err error + n, err = strconv.ParseInt(string(src), 10, <%= pg_bit_size %>) + if err != nil { + return err + } + default: + return fmt.Errorf("cannot scan %T", src) + } + + if n < math.MinInt<%= pg_bit_size %> { + return fmt.Errorf("%d is greater than maximum value for Int<%= pg_byte_size %>", n) + } + if n > math.MaxInt<%= pg_bit_size %> { + return fmt.Errorf("%d is greater than maximum value for Int<%= pg_byte_size %>", n) + } + *dst = Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: int<%= pg_bit_size %>(n), Valid: true} + + return nil +} + +// Value implements the database/sql/driver Valuer interface. +func (src Int<%= pg_byte_size %>) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + return int64(src.Int<%= pg_bit_size %>), nil +} + +func (src Int<%= pg_byte_size %>) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + return []byte(strconv.FormatInt(int64(src.Int<%= pg_bit_size %>), 10)), nil +} + +func (dst *Int<%= pg_byte_size %>) UnmarshalJSON(b []byte) error { + var n *int<%= pg_bit_size %> + err := json.Unmarshal(b, &n) + if err != nil { + return err + } + + if n == nil { + *dst = Int<%= pg_byte_size %>{} + } else { + *dst = Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: *n, Valid: true} + } + + return nil +} + +type Int<%= pg_byte_size %>Codec struct{} + +func (Int<%= pg_byte_size %>Codec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (Int<%= pg_byte_size %>Codec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (Int<%= pg_byte_size %>Codec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case int<%= pg_bit_size %>: + return encodePlanInt<%= pg_byte_size %>CodecBinaryInt<%= pg_bit_size %>{} + case Int64Valuer: + return encodePlanInt<%= pg_byte_size %>CodecBinaryInt64Valuer{} + } + case TextFormatCode: + switch value.(type) { + case int<%= pg_bit_size %>: + return encodePlanInt<%= pg_byte_size %>CodecTextInt<%= pg_bit_size %>{} + case Int64Valuer: + return encodePlanInt<%= pg_byte_size %>CodecTextInt64Valuer{} + } + } + + return nil +} + +type encodePlanInt<%= pg_byte_size %>CodecBinaryInt<%= pg_bit_size %> struct{} + +func (encodePlanInt<%= pg_byte_size %>CodecBinaryInt<%= pg_bit_size %>) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(int<%= pg_bit_size %>) + return pgio.AppendInt<%= pg_bit_size %>(buf, int<%= pg_bit_size %>(n)), nil +} + +type encodePlanInt<%= pg_byte_size %>CodecTextInt<%= pg_bit_size %> struct{} + +func (encodePlanInt<%= pg_byte_size %>CodecTextInt<%= pg_bit_size %>) Encode(value any, buf []byte) (newBuf []byte, err error) { + n := value.(int<%= pg_bit_size %>) + return append(buf, strconv.FormatInt(int64(n), 10)...), nil +} + +type encodePlanInt<%= pg_byte_size %>CodecBinaryInt64Valuer struct{} + +func (encodePlanInt<%= pg_byte_size %>CodecBinaryInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if n.Int64 > math.MaxInt<%= pg_bit_size %> { + return nil, fmt.Errorf("%d is greater than maximum value for int<%= pg_byte_size %>", n.Int64) + } + if n.Int64 < math.MinInt<%= pg_bit_size %> { + return nil, fmt.Errorf("%d is less than minimum value for int<%= pg_byte_size %>", n.Int64) + } + + return pgio.AppendInt<%= pg_bit_size %>(buf, int<%= pg_bit_size %>(n.Int64)), nil +} + +type encodePlanInt<%= pg_byte_size %>CodecTextInt64Valuer struct{} + +func (encodePlanInt<%= pg_byte_size %>CodecTextInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if n.Int64 > math.MaxInt<%= pg_bit_size %> { + return nil, fmt.Errorf("%d is greater than maximum value for int<%= pg_byte_size %>", n.Int64) + } + if n.Int64 < math.MinInt<%= pg_bit_size %> { + return nil, fmt.Errorf("%d is less than minimum value for int<%= pg_byte_size %>", n.Int64) + } + + return append(buf, strconv.FormatInt(n.Int64, 10)...), nil +} + +func (Int<%= pg_byte_size %>Codec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *int8: + return scanPlanBinaryInt<%= pg_byte_size %>ToInt8{} + case *int16: + return scanPlanBinaryInt<%= pg_byte_size %>ToInt16{} + case *int32: + return scanPlanBinaryInt<%= pg_byte_size %>ToInt32{} + case *int64: + return scanPlanBinaryInt<%= pg_byte_size %>ToInt64{} + case *int: + return scanPlanBinaryInt<%= pg_byte_size %>ToInt{} + case *uint8: + return scanPlanBinaryInt<%= pg_byte_size %>ToUint8{} + case *uint16: + return scanPlanBinaryInt<%= pg_byte_size %>ToUint16{} + case *uint32: + return scanPlanBinaryInt<%= pg_byte_size %>ToUint32{} + case *uint64: + return scanPlanBinaryInt<%= pg_byte_size %>ToUint64{} + case *uint: + return scanPlanBinaryInt<%= pg_byte_size %>ToUint{} + case Int64Scanner: + return scanPlanBinaryInt<%= pg_byte_size %>ToInt64Scanner{} + case TextScanner: + return scanPlanBinaryInt<%= pg_byte_size %>ToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case *int8: + return scanPlanTextAnyToInt8{} + case *int16: + return scanPlanTextAnyToInt16{} + case *int32: + return scanPlanTextAnyToInt32{} + case *int64: + return scanPlanTextAnyToInt64{} + case *int: + return scanPlanTextAnyToInt{} + case *uint8: + return scanPlanTextAnyToUint8{} + case *uint16: + return scanPlanTextAnyToUint16{} + case *uint32: + return scanPlanTextAnyToUint32{} + case *uint64: + return scanPlanTextAnyToUint64{} + case *uint: + return scanPlanTextAnyToUint{} + case Int64Scanner: + return scanPlanTextAnyToInt64Scanner{} + } + } + + return nil +} + +func (c Int<%= pg_byte_size %>Codec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var n int64 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +func (c Int<%= pg_byte_size %>Codec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var n int<%= pg_bit_size %> + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +<%# PostgreSQL binary format integer to fixed size Go integers %> +<% [8, 16, 32, 64].each do |dst_bit_size| %> +type scanPlanBinaryInt<%= pg_byte_size %>ToInt<%= dst_bit_size %> struct{} + +func (scanPlanBinaryInt<%= pg_byte_size %>ToInt<%= dst_bit_size %>) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != <%= pg_byte_size %> { + return fmt.Errorf("invalid length for int<%= pg_byte_size %>: %v", len(src)) + } + + p, ok := (dst).(*int<%= dst_bit_size %>) + if !ok { + return ErrScanTargetTypeChanged + } + + <% if dst_bit_size < pg_bit_size %> + n := int<%= pg_bit_size %>(binary.BigEndian.Uint<%= pg_bit_size %>(src)) + if n < math.MinInt<%= dst_bit_size %> { + return fmt.Errorf("%d is less than minimum value for int<%= dst_bit_size %>", n) + } else if n > math.MaxInt<%= dst_bit_size %> { + return fmt.Errorf("%d is greater than maximum value for int<%= dst_bit_size %>", n) + } + + *p = int<%= dst_bit_size %>(n) + <% elsif dst_bit_size == pg_bit_size %> + *p = int<%= dst_bit_size %>(binary.BigEndian.Uint<%= pg_bit_size %>(src)) + <% else %> + *p = int<%= dst_bit_size %>(int<%= pg_bit_size %>(binary.BigEndian.Uint<%= pg_bit_size %>(src))) + <% end %> + + return nil +} + +type scanPlanBinaryInt<%= pg_byte_size %>ToUint<%= dst_bit_size %> struct{} + +func (scanPlanBinaryInt<%= pg_byte_size %>ToUint<%= dst_bit_size %>) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != <%= pg_byte_size %> { + return fmt.Errorf("invalid length for uint<%= pg_byte_size %>: %v", len(src)) + } + + p, ok := (dst).(*uint<%= dst_bit_size %>) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int<%= pg_bit_size %>(binary.BigEndian.Uint<%= pg_bit_size %>(src)) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint<%= dst_bit_size %>", n) + } + <% if dst_bit_size < pg_bit_size %> + if n > math.MaxUint<%= dst_bit_size %> { + return fmt.Errorf("%d is greater than maximum value for uint<%= dst_bit_size %>", n) + } + <% end %> + *p = uint<%= dst_bit_size %>(n) + + return nil +} +<% end %> + +<%# PostgreSQL binary format integer to Go machine integers %> +type scanPlanBinaryInt<%= pg_byte_size %>ToInt struct{} + +func (scanPlanBinaryInt<%= pg_byte_size %>ToInt) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != <%= pg_byte_size %> { + return fmt.Errorf("invalid length for int<%= pg_byte_size %>: %v", len(src)) + } + + p, ok := (dst).(*int) + if !ok { + return ErrScanTargetTypeChanged + } + + <% if 32 < pg_bit_size %> + n := int64(binary.BigEndian.Uint<%= pg_bit_size %>(src)) + if n < math.MinInt { + return fmt.Errorf("%d is less than minimum value for int", n) + } else if n > math.MaxInt { + return fmt.Errorf("%d is greater than maximum value for int", n) + } + + *p = int(n) + <% else %> + *p = int(int<%= pg_bit_size %>(binary.BigEndian.Uint<%= pg_bit_size %>(src))) + <% end %> + + return nil +} + +type scanPlanBinaryInt<%= pg_byte_size %>ToUint struct{} + +func (scanPlanBinaryInt<%= pg_byte_size %>ToUint) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != <%= pg_byte_size %> { + return fmt.Errorf("invalid length for uint<%= pg_byte_size %>: %v", len(src)) + } + + p, ok := (dst).(*uint) + if !ok { + return ErrScanTargetTypeChanged + } + + n := int64(int<%= pg_bit_size %>(binary.BigEndian.Uint<%= pg_bit_size %>(src))) + if n < 0 { + return fmt.Errorf("%d is less than minimum value for uint", n) + } + <% if 32 < pg_bit_size %> + if uint64(n) > math.MaxUint { + return fmt.Errorf("%d is greater than maximum value for uint", n) + } + <% end %> + *p = uint(n) + + return nil +} + +<%# PostgreSQL binary format integer to Go Int64Scanner %> +type scanPlanBinaryInt<%= pg_byte_size %>ToInt64Scanner struct{} + +func (scanPlanBinaryInt<%= pg_byte_size %>ToInt64Scanner) Scan(src []byte, dst any) error { + s, ok := (dst).(Int64Scanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanInt64(Int8{}) + } + + if len(src) != <%= pg_byte_size %> { + return fmt.Errorf("invalid length for int<%= pg_byte_size %>: %v", len(src)) + } + + + n := int64(int<%= pg_bit_size %>(binary.BigEndian.Uint<%= pg_bit_size %>(src))) + + return s.ScanInt64(Int8{Int64: n, Valid: true}) +} + +<%# PostgreSQL binary format integer to Go TextScanner %> +type scanPlanBinaryInt<%= pg_byte_size %>ToTextScanner struct{} + +func (scanPlanBinaryInt<%= pg_byte_size %>ToTextScanner) Scan(src []byte, dst any) error { + s, ok := (dst).(TextScanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanText(Text{}) + } + + if len(src) != <%= pg_byte_size %> { + return fmt.Errorf("invalid length for int<%= pg_byte_size %>: %v", len(src)) + } + + + n := int64(int<%= pg_bit_size %>(binary.BigEndian.Uint<%= pg_bit_size %>(src))) + + return s.ScanText(Text{String: strconv.FormatInt(n, 10), Valid: true}) +} +<% end %> + +<%# Any text to all integer types %> +<% [ + ["8", 8], + ["16", 16], + ["32", 32], + ["64", 64], + ["", 0] +].each do |type_suffix, bit_size| %> +type scanPlanTextAnyToInt<%= type_suffix %> struct{} + +func (scanPlanTextAnyToInt<%= type_suffix %>) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*int<%= type_suffix %>) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseInt(string(src), 10, <%= bit_size %>) + if err != nil { + return err + } + + *p = int<%= type_suffix %>(n) + return nil +} + +type scanPlanTextAnyToUint<%= type_suffix %> struct{} + +func (scanPlanTextAnyToUint<%= type_suffix %>) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p, ok := (dst).(*uint<%= type_suffix %>) + if !ok { + return ErrScanTargetTypeChanged + } + + n, err := strconv.ParseUint(string(src), 10, <%= bit_size %>) + if err != nil { + return err + } + + *p = uint<%= type_suffix %>(n) + return nil +} +<% end %> + +type scanPlanTextAnyToInt64Scanner struct{} + +func (scanPlanTextAnyToInt64Scanner) Scan(src []byte, dst any) error { + s, ok := (dst).(Int64Scanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanInt64(Int8{}) + } + + n, err := strconv.ParseInt(string(src), 10, 64) + if err != nil { + return err + } + + err = s.ScanInt64(Int8{Int64: n, Valid: true}) + if err != nil { + return err + } + + return nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/int_test.go.erb b/vendor/github.com/jackc/pgx/v5/pgtype/int_test.go.erb new file mode 100644 index 00000000..ac9a3f14 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/int_test.go.erb @@ -0,0 +1,93 @@ +package pgtype_test + +import ( + "math" + "testing" + + "github.com/jackc/pgx/v5/pgtype" +) + +<% [2, 4, 8].each do |pg_byte_size| %> +<% pg_bit_size = pg_byte_size * 8 %> +func TestInt<%= pg_byte_size %>Codec(t *testing.T) { + pgxtest.RunValueRoundTripTests(context.Background(), t, defaultConnTestRunner, nil, "int<%= pg_byte_size %>", []pgxtest.ValueRoundTripTest{ + {int8(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {int16(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {int32(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {int64(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {uint8(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {uint16(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {uint32(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {uint64(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {int(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {uint(1), new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {pgtype.Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: 1, Valid: true}, new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {int32(-1), new(pgtype.Int<%= pg_byte_size %>), isExpectedEq(pgtype.Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: -1, Valid: true})}, + {1, new(int8), isExpectedEq(int8(1))}, + {1, new(int16), isExpectedEq(int16(1))}, + {1, new(int32), isExpectedEq(int32(1))}, + {1, new(int64), isExpectedEq(int64(1))}, + {1, new(uint8), isExpectedEq(uint8(1))}, + {1, new(uint16), isExpectedEq(uint16(1))}, + {1, new(uint32), isExpectedEq(uint32(1))}, + {1, new(uint64), isExpectedEq(uint64(1))}, + {1, new(int), isExpectedEq(int(1))}, + {1, new(uint), isExpectedEq(uint(1))}, + {-1, new(int8), isExpectedEq(int8(-1))}, + {-1, new(int16), isExpectedEq(int16(-1))}, + {-1, new(int32), isExpectedEq(int32(-1))}, + {-1, new(int64), isExpectedEq(int64(-1))}, + {-1, new(int), isExpectedEq(int(-1))}, + {math.MinInt<%= pg_bit_size %>, new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(math.MinInt<%= pg_bit_size %>))}, + {-1, new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(-1))}, + {0, new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(0))}, + {1, new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(1))}, + {math.MaxInt<%= pg_bit_size %>, new(int<%= pg_bit_size %>), isExpectedEq(int<%= pg_bit_size %>(math.MaxInt<%= pg_bit_size %>))}, + {1, new(pgtype.Int<%= pg_byte_size %>), isExpectedEq(pgtype.Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: 1, Valid: true})}, + {"1", new(string), isExpectedEq("1")}, + {pgtype.Int<%= pg_byte_size %>{}, new(pgtype.Int<%= pg_byte_size %>), isExpectedEq(pgtype.Int<%= pg_byte_size %>{})}, + {nil, new(*int<%= pg_bit_size %>), isExpectedEq((*int<%= pg_bit_size %>)(nil))}, + }) +} + +func TestInt<%= pg_byte_size %>MarshalJSON(t *testing.T) { + successfulTests := []struct { + source pgtype.Int<%= pg_byte_size %> + result string + }{ + {source: pgtype.Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: 0}, result: "null"}, + {source: pgtype.Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: 1, Valid: true}, result: "1"}, + } + for i, tt := range successfulTests { + r, err := tt.source.MarshalJSON() + if err != nil { + t.Errorf("%d: %v", i, err) + } + + if string(r) != tt.result { + t.Errorf("%d: expected %v to convert to %v, but it was %v", i, tt.source, tt.result, string(r)) + } + } +} + +func TestInt<%= pg_byte_size %>UnmarshalJSON(t *testing.T) { + successfulTests := []struct { + source string + result pgtype.Int<%= pg_byte_size %> + }{ + {source: "null", result: pgtype.Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: 0}}, + {source: "1", result: pgtype.Int<%= pg_byte_size %>{Int<%= pg_bit_size %>: 1, Valid: true}}, + } + for i, tt := range successfulTests { + var r pgtype.Int<%= pg_byte_size %> + err := r.UnmarshalJSON([]byte(tt.source)) + if err != nil { + t.Errorf("%d: %v", i, err) + } + + if r != tt.result { + t.Errorf("%d: expected %v to convert to %v, but it was %v", i, tt.source, tt.result, r) + } + } +} +<% end %> diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/integration_benchmark_test.go.erb b/vendor/github.com/jackc/pgx/v5/pgtype/integration_benchmark_test.go.erb new file mode 100644 index 00000000..0175700a --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/integration_benchmark_test.go.erb @@ -0,0 +1,62 @@ +package pgtype_test + +import ( + "context" + "testing" + + "github.com/jackc/pgx/v5/pgtype/testutil" + "github.com/jackc/pgx/v5" +) + +<% + [ + ["int4", ["int16", "int32", "int64", "uint64", "pgtype.Int4"], [[1, 1], [1, 10], [10, 1], [100, 10]]], + ["numeric", ["int64", "float64", "pgtype.Numeric"], [[1, 1], [1, 10], [10, 1], [100, 10]]], + ].each do |pg_type, go_types, rows_columns| +%> +<% go_types.each do |go_type| %> +<% rows_columns.each do |rows, columns| %> +<% [["Text", "pgx.TextFormatCode"], ["Binary", "pgx.BinaryFormatCode"]].each do |format_name, format_code| %> +func BenchmarkQuery<%= format_name %>FormatDecode_PG_<%= pg_type %>_to_Go_<%= go_type.gsub(/\W/, "_") %>_<%= rows %>_rows_<%= columns %>_columns(b *testing.B) { + defaultConnTestRunner.RunTest(context.Background(), b, func(ctx context.Context, _ testing.TB, conn *pgx.Conn) { + b.ResetTimer() + var v [<%= columns %>]<%= go_type %> + for i := 0; i < b.N; i++ { + rows, _ := conn.Query( + ctx, + `select <% columns.times do |col_idx| %><% if col_idx != 0 %>, <% end %>n::<%= pg_type %> + <%= col_idx%><% end %> from generate_series(1, <%= rows %>) n`, + []any{pgx.QueryResultFormats{<%= format_code %>}}, + ) + _, err := pgx.ForEachRow(rows, []any{<% columns.times do |col_idx| %><% if col_idx != 0 %>, <% end %>&v[<%= col_idx%>]<% end %>}, func() error { return nil }) + if err != nil { + b.Fatal(err) + } + } + }) +} +<% end %> +<% end %> +<% end %> +<% end %> + +<% [10, 100, 1000].each do |array_size| %> +<% [["Text", "pgx.TextFormatCode"], ["Binary", "pgx.BinaryFormatCode"]].each do |format_name, format_code| %> +func BenchmarkQuery<%= format_name %>FormatDecode_PG_Int4Array_With_Go_Int4Array_<%= array_size %>(b *testing.B) { + defaultConnTestRunner.RunTest(context.Background(), b, func(ctx context.Context, _ testing.TB, conn *pgx.Conn) { + b.ResetTimer() + var v []int32 + for i := 0; i < b.N; i++ { + rows, _ := conn.Query( + ctx, + `select array_agg(n) from generate_series(1, <%= array_size %>) n`, + []any{pgx.QueryResultFormats{<%= format_code %>}}, + ) + _, err := pgx.ForEachRow(rows, []any{&v}, func() error { return nil }) + if err != nil { + b.Fatal(err) + } + } + }) +} +<% end %> +<% end %> diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/integration_benchmark_test_gen.sh b/vendor/github.com/jackc/pgx/v5/pgtype/integration_benchmark_test_gen.sh new file mode 100644 index 00000000..22ac01aa --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/integration_benchmark_test_gen.sh @@ -0,0 +1,2 @@ +erb integration_benchmark_test.go.erb > integration_benchmark_test.go +goimports -w integration_benchmark_test.go diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/interval.go b/vendor/github.com/jackc/pgx/v5/pgtype/interval.go new file mode 100644 index 00000000..4b511629 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/interval.go @@ -0,0 +1,297 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +const ( + microsecondsPerSecond = 1000000 + microsecondsPerMinute = 60 * microsecondsPerSecond + microsecondsPerHour = 60 * microsecondsPerMinute + microsecondsPerDay = 24 * microsecondsPerHour + microsecondsPerMonth = 30 * microsecondsPerDay +) + +type IntervalScanner interface { + ScanInterval(v Interval) error +} + +type IntervalValuer interface { + IntervalValue() (Interval, error) +} + +type Interval struct { + Microseconds int64 + Days int32 + Months int32 + Valid bool +} + +func (interval *Interval) ScanInterval(v Interval) error { + *interval = v + return nil +} + +func (interval Interval) IntervalValue() (Interval, error) { + return interval, nil +} + +// Scan implements the database/sql Scanner interface. +func (interval *Interval) Scan(src any) error { + if src == nil { + *interval = Interval{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToIntervalScanner{}.Scan([]byte(src), interval) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (interval Interval) Value() (driver.Value, error) { + if !interval.Valid { + return nil, nil + } + + buf, err := IntervalCodec{}.PlanEncode(nil, 0, TextFormatCode, interval).Encode(interval, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type IntervalCodec struct{} + +func (IntervalCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (IntervalCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (IntervalCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(IntervalValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanIntervalCodecBinary{} + case TextFormatCode: + return encodePlanIntervalCodecText{} + } + + return nil +} + +type encodePlanIntervalCodecBinary struct{} + +func (encodePlanIntervalCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + interval, err := value.(IntervalValuer).IntervalValue() + if err != nil { + return nil, err + } + + if !interval.Valid { + return nil, nil + } + + buf = pgio.AppendInt64(buf, interval.Microseconds) + buf = pgio.AppendInt32(buf, interval.Days) + buf = pgio.AppendInt32(buf, interval.Months) + return buf, nil +} + +type encodePlanIntervalCodecText struct{} + +func (encodePlanIntervalCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + interval, err := value.(IntervalValuer).IntervalValue() + if err != nil { + return nil, err + } + + if !interval.Valid { + return nil, nil + } + + if interval.Months != 0 { + buf = append(buf, strconv.FormatInt(int64(interval.Months), 10)...) + buf = append(buf, " mon "...) + } + + if interval.Days != 0 { + buf = append(buf, strconv.FormatInt(int64(interval.Days), 10)...) + buf = append(buf, " day "...) + } + + absMicroseconds := interval.Microseconds + if absMicroseconds < 0 { + absMicroseconds = -absMicroseconds + buf = append(buf, '-') + } + + hours := absMicroseconds / microsecondsPerHour + minutes := (absMicroseconds % microsecondsPerHour) / microsecondsPerMinute + seconds := (absMicroseconds % microsecondsPerMinute) / microsecondsPerSecond + + timeStr := fmt.Sprintf("%02d:%02d:%02d", hours, minutes, seconds) + buf = append(buf, timeStr...) + + microseconds := absMicroseconds % microsecondsPerSecond + if microseconds != 0 { + buf = append(buf, fmt.Sprintf(".%06d", microseconds)...) + } + + return buf, nil +} + +func (IntervalCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case IntervalScanner: + return scanPlanBinaryIntervalToIntervalScanner{} + } + case TextFormatCode: + switch target.(type) { + case IntervalScanner: + return scanPlanTextAnyToIntervalScanner{} + } + } + + return nil +} + +type scanPlanBinaryIntervalToIntervalScanner struct{} + +func (scanPlanBinaryIntervalToIntervalScanner) Scan(src []byte, dst any) error { + scanner := (dst).(IntervalScanner) + + if src == nil { + return scanner.ScanInterval(Interval{}) + } + + if len(src) != 16 { + return fmt.Errorf("Received an invalid size for an interval: %d", len(src)) + } + + microseconds := int64(binary.BigEndian.Uint64(src)) + days := int32(binary.BigEndian.Uint32(src[8:])) + months := int32(binary.BigEndian.Uint32(src[12:])) + + return scanner.ScanInterval(Interval{Microseconds: microseconds, Days: days, Months: months, Valid: true}) +} + +type scanPlanTextAnyToIntervalScanner struct{} + +func (scanPlanTextAnyToIntervalScanner) Scan(src []byte, dst any) error { + scanner := (dst).(IntervalScanner) + + if src == nil { + return scanner.ScanInterval(Interval{}) + } + + var microseconds int64 + var days int32 + var months int32 + + parts := strings.Split(string(src), " ") + + for i := 0; i < len(parts)-1; i += 2 { + scalar, err := strconv.ParseInt(parts[i], 10, 64) + if err != nil { + return fmt.Errorf("bad interval format") + } + + switch parts[i+1] { + case "year", "years": + months += int32(scalar * 12) + case "mon", "mons": + months += int32(scalar) + case "day", "days": + days = int32(scalar) + } + } + + if len(parts)%2 == 1 { + timeParts := strings.SplitN(parts[len(parts)-1], ":", 3) + if len(timeParts) != 3 { + return fmt.Errorf("bad interval format") + } + + var negative bool + if timeParts[0][0] == '-' { + negative = true + timeParts[0] = timeParts[0][1:] + } + + hours, err := strconv.ParseInt(timeParts[0], 10, 64) + if err != nil { + return fmt.Errorf("bad interval hour format: %s", timeParts[0]) + } + + minutes, err := strconv.ParseInt(timeParts[1], 10, 64) + if err != nil { + return fmt.Errorf("bad interval minute format: %s", timeParts[1]) + } + + sec, secFrac, secFracFound := strings.Cut(timeParts[2], ".") + + seconds, err := strconv.ParseInt(sec, 10, 64) + if err != nil { + return fmt.Errorf("bad interval second format: %s", sec) + } + + var uSeconds int64 + if secFracFound { + uSeconds, err = strconv.ParseInt(secFrac, 10, 64) + if err != nil { + return fmt.Errorf("bad interval decimal format: %s", secFrac) + } + + for i := 0; i < 6-len(secFrac); i++ { + uSeconds *= 10 + } + } + + microseconds = hours * microsecondsPerHour + microseconds += minutes * microsecondsPerMinute + microseconds += seconds * microsecondsPerSecond + microseconds += uSeconds + + if negative { + microseconds = -microseconds + } + } + + return scanner.ScanInterval(Interval{Months: months, Days: days, Microseconds: microseconds, Valid: true}) +} + +func (c IntervalCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c IntervalCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var interval Interval + err := codecScan(c, m, oid, format, src, &interval) + if err != nil { + return nil, err + } + return interval, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/json.go b/vendor/github.com/jackc/pgx/v5/pgtype/json.go new file mode 100644 index 00000000..c2aa0d3b --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/json.go @@ -0,0 +1,223 @@ +package pgtype + +import ( + "database/sql" + "database/sql/driver" + "encoding/json" + "fmt" + "reflect" +) + +type JSONCodec struct { + Marshal func(v any) ([]byte, error) + Unmarshal func(data []byte, v any) error +} + +func (*JSONCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (*JSONCodec) PreferredFormat() int16 { + return TextFormatCode +} + +func (c *JSONCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch value.(type) { + case string: + return encodePlanJSONCodecEitherFormatString{} + case []byte: + return encodePlanJSONCodecEitherFormatByteSlice{} + + // Handle json.RawMessage specifically because if it is run through json.Marshal it may be mutated. + // e.g. `{"foo": "bar"}` -> `{"foo":"bar"}`. + case json.RawMessage: + return encodePlanJSONCodecEitherFormatJSONRawMessage{} + + // Cannot rely on driver.Valuer being handled later because anything can be marshalled. + // + // https://github.com/jackc/pgx/issues/1430 + // + // Check for driver.Valuer must come before json.Marshaler so that it is guaranteed to be used + // when both are implemented https://github.com/jackc/pgx/issues/1805 + case driver.Valuer: + return &encodePlanDriverValuer{m: m, oid: oid, formatCode: format} + + // Must come before trying wrap encode plans because a pointer to a struct may be unwrapped to a struct that can be + // marshalled. + // + // https://github.com/jackc/pgx/issues/1681 + case json.Marshaler: + return &encodePlanJSONCodecEitherFormatMarshal{ + marshal: c.Marshal, + } + } + + // Because anything can be marshalled the normal wrapping in Map.PlanScan doesn't get a chance to run. So try the + // appropriate wrappers here. + for _, f := range []TryWrapEncodePlanFunc{ + TryWrapDerefPointerEncodePlan, + TryWrapFindUnderlyingTypeEncodePlan, + } { + if wrapperPlan, nextValue, ok := f(value); ok { + if nextPlan := c.PlanEncode(m, oid, format, nextValue); nextPlan != nil { + wrapperPlan.SetNext(nextPlan) + return wrapperPlan + } + } + } + + return &encodePlanJSONCodecEitherFormatMarshal{ + marshal: c.Marshal, + } +} + +type encodePlanJSONCodecEitherFormatString struct{} + +func (encodePlanJSONCodecEitherFormatString) Encode(value any, buf []byte) (newBuf []byte, err error) { + jsonString := value.(string) + buf = append(buf, jsonString...) + return buf, nil +} + +type encodePlanJSONCodecEitherFormatByteSlice struct{} + +func (encodePlanJSONCodecEitherFormatByteSlice) Encode(value any, buf []byte) (newBuf []byte, err error) { + jsonBytes := value.([]byte) + if jsonBytes == nil { + return nil, nil + } + + buf = append(buf, jsonBytes...) + return buf, nil +} + +type encodePlanJSONCodecEitherFormatJSONRawMessage struct{} + +func (encodePlanJSONCodecEitherFormatJSONRawMessage) Encode(value any, buf []byte) (newBuf []byte, err error) { + jsonBytes := value.(json.RawMessage) + if jsonBytes == nil { + return nil, nil + } + + buf = append(buf, jsonBytes...) + return buf, nil +} + +type encodePlanJSONCodecEitherFormatMarshal struct { + marshal func(v any) ([]byte, error) +} + +func (e *encodePlanJSONCodecEitherFormatMarshal) Encode(value any, buf []byte) (newBuf []byte, err error) { + jsonBytes, err := e.marshal(value) + if err != nil { + return nil, err + } + + buf = append(buf, jsonBytes...) + return buf, nil +} + +func (c *JSONCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch target.(type) { + case *string: + return scanPlanAnyToString{} + + case **string: + // This is to fix **string scanning. It seems wrong to special case **string, but it's not clear what a better + // solution would be. + // + // https://github.com/jackc/pgx/issues/1470 -- **string + // https://github.com/jackc/pgx/issues/1691 -- ** anything else + + if wrapperPlan, nextDst, ok := TryPointerPointerScanPlan(target); ok { + if nextPlan := m.planScan(oid, format, nextDst); nextPlan != nil { + if _, failed := nextPlan.(*scanPlanFail); !failed { + wrapperPlan.SetNext(nextPlan) + return wrapperPlan + } + } + } + + case *[]byte: + return scanPlanJSONToByteSlice{} + case BytesScanner: + return scanPlanBinaryBytesToBytesScanner{} + + // Cannot rely on sql.Scanner being handled later because scanPlanJSONToJSONUnmarshal will take precedence. + // + // https://github.com/jackc/pgx/issues/1418 + case sql.Scanner: + return &scanPlanSQLScanner{formatCode: format} + } + + return &scanPlanJSONToJSONUnmarshal{ + unmarshal: c.Unmarshal, + } +} + +type scanPlanAnyToString struct{} + +func (scanPlanAnyToString) Scan(src []byte, dst any) error { + p := dst.(*string) + *p = string(src) + return nil +} + +type scanPlanJSONToByteSlice struct{} + +func (scanPlanJSONToByteSlice) Scan(src []byte, dst any) error { + dstBuf := dst.(*[]byte) + if src == nil { + *dstBuf = nil + return nil + } + + *dstBuf = make([]byte, len(src)) + copy(*dstBuf, src) + return nil +} + +type scanPlanJSONToJSONUnmarshal struct { + unmarshal func(data []byte, v any) error +} + +func (s *scanPlanJSONToJSONUnmarshal) Scan(src []byte, dst any) error { + if src == nil { + dstValue := reflect.ValueOf(dst) + if dstValue.Kind() == reflect.Ptr { + el := dstValue.Elem() + switch el.Kind() { + case reflect.Ptr, reflect.Slice, reflect.Map, reflect.Interface: + el.Set(reflect.Zero(el.Type())) + return nil + } + } + + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + elem := reflect.ValueOf(dst).Elem() + elem.Set(reflect.Zero(elem.Type())) + + return s.unmarshal(src, dst) +} + +func (c *JSONCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + dstBuf := make([]byte, len(src)) + copy(dstBuf, src) + return dstBuf, nil +} + +func (c *JSONCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var dst any + err := c.Unmarshal(src, &dst) + return dst, err +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/jsonb.go b/vendor/github.com/jackc/pgx/v5/pgtype/jsonb.go new file mode 100644 index 00000000..4d4eb58e --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/jsonb.go @@ -0,0 +1,129 @@ +package pgtype + +import ( + "database/sql/driver" + "fmt" +) + +type JSONBCodec struct { + Marshal func(v any) ([]byte, error) + Unmarshal func(data []byte, v any) error +} + +func (*JSONBCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (*JSONBCodec) PreferredFormat() int16 { + return TextFormatCode +} + +func (c *JSONBCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + plan := (&JSONCodec{Marshal: c.Marshal, Unmarshal: c.Unmarshal}).PlanEncode(m, oid, TextFormatCode, value) + if plan != nil { + return &encodePlanJSONBCodecBinaryWrapper{textPlan: plan} + } + case TextFormatCode: + return (&JSONCodec{Marshal: c.Marshal, Unmarshal: c.Unmarshal}).PlanEncode(m, oid, format, value) + } + + return nil +} + +type encodePlanJSONBCodecBinaryWrapper struct { + textPlan EncodePlan +} + +func (plan *encodePlanJSONBCodecBinaryWrapper) Encode(value any, buf []byte) (newBuf []byte, err error) { + buf = append(buf, 1) + return plan.textPlan.Encode(value, buf) +} + +func (c *JSONBCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case BinaryFormatCode: + plan := (&JSONCodec{Marshal: c.Marshal, Unmarshal: c.Unmarshal}).PlanScan(m, oid, TextFormatCode, target) + if plan != nil { + return &scanPlanJSONBCodecBinaryUnwrapper{textPlan: plan} + } + case TextFormatCode: + return (&JSONCodec{Marshal: c.Marshal, Unmarshal: c.Unmarshal}).PlanScan(m, oid, format, target) + } + + return nil +} + +type scanPlanJSONBCodecBinaryUnwrapper struct { + textPlan ScanPlan +} + +func (plan *scanPlanJSONBCodecBinaryUnwrapper) Scan(src []byte, dst any) error { + if src == nil { + return plan.textPlan.Scan(src, dst) + } + + if len(src) == 0 { + return fmt.Errorf("jsonb too short") + } + + if src[0] != 1 { + return fmt.Errorf("unknown jsonb version number %d", src[0]) + } + + return plan.textPlan.Scan(src[1:], dst) +} + +func (c *JSONBCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + switch format { + case BinaryFormatCode: + if len(src) == 0 { + return nil, fmt.Errorf("jsonb too short") + } + + if src[0] != 1 { + return nil, fmt.Errorf("unknown jsonb version number %d", src[0]) + } + + dstBuf := make([]byte, len(src)-1) + copy(dstBuf, src[1:]) + return dstBuf, nil + case TextFormatCode: + dstBuf := make([]byte, len(src)) + copy(dstBuf, src) + return dstBuf, nil + default: + return nil, fmt.Errorf("unknown format code: %v", format) + } +} + +func (c *JSONBCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + switch format { + case BinaryFormatCode: + if len(src) == 0 { + return nil, fmt.Errorf("jsonb too short") + } + + if src[0] != 1 { + return nil, fmt.Errorf("unknown jsonb version number %d", src[0]) + } + + src = src[1:] + case TextFormatCode: + default: + return nil, fmt.Errorf("unknown format code: %v", format) + } + + var dst any + err := c.Unmarshal(src, &dst) + return dst, err +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/line.go b/vendor/github.com/jackc/pgx/v5/pgtype/line.go new file mode 100644 index 00000000..4ae8003e --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/line.go @@ -0,0 +1,225 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type LineScanner interface { + ScanLine(v Line) error +} + +type LineValuer interface { + LineValue() (Line, error) +} + +type Line struct { + A, B, C float64 + Valid bool +} + +func (line *Line) ScanLine(v Line) error { + *line = v + return nil +} + +func (line Line) LineValue() (Line, error) { + return line, nil +} + +func (line *Line) Set(src any) error { + return fmt.Errorf("cannot convert %v to Line", src) +} + +// Scan implements the database/sql Scanner interface. +func (line *Line) Scan(src any) error { + if src == nil { + *line = Line{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToLineScanner{}.Scan([]byte(src), line) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (line Line) Value() (driver.Value, error) { + if !line.Valid { + return nil, nil + } + + buf, err := LineCodec{}.PlanEncode(nil, 0, TextFormatCode, line).Encode(line, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type LineCodec struct{} + +func (LineCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (LineCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (LineCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(LineValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanLineCodecBinary{} + case TextFormatCode: + return encodePlanLineCodecText{} + } + + return nil +} + +type encodePlanLineCodecBinary struct{} + +func (encodePlanLineCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + line, err := value.(LineValuer).LineValue() + if err != nil { + return nil, err + } + + if !line.Valid { + return nil, nil + } + + buf = pgio.AppendUint64(buf, math.Float64bits(line.A)) + buf = pgio.AppendUint64(buf, math.Float64bits(line.B)) + buf = pgio.AppendUint64(buf, math.Float64bits(line.C)) + return buf, nil +} + +type encodePlanLineCodecText struct{} + +func (encodePlanLineCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + line, err := value.(LineValuer).LineValue() + if err != nil { + return nil, err + } + + if !line.Valid { + return nil, nil + } + + buf = append(buf, fmt.Sprintf(`{%s,%s,%s}`, + strconv.FormatFloat(line.A, 'f', -1, 64), + strconv.FormatFloat(line.B, 'f', -1, 64), + strconv.FormatFloat(line.C, 'f', -1, 64), + )...) + return buf, nil +} + +func (LineCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case LineScanner: + return scanPlanBinaryLineToLineScanner{} + } + case TextFormatCode: + switch target.(type) { + case LineScanner: + return scanPlanTextAnyToLineScanner{} + } + } + + return nil +} + +type scanPlanBinaryLineToLineScanner struct{} + +func (scanPlanBinaryLineToLineScanner) Scan(src []byte, dst any) error { + scanner := (dst).(LineScanner) + + if src == nil { + return scanner.ScanLine(Line{}) + } + + if len(src) != 24 { + return fmt.Errorf("invalid length for line: %v", len(src)) + } + + a := binary.BigEndian.Uint64(src) + b := binary.BigEndian.Uint64(src[8:]) + c := binary.BigEndian.Uint64(src[16:]) + + return scanner.ScanLine(Line{ + A: math.Float64frombits(a), + B: math.Float64frombits(b), + C: math.Float64frombits(c), + Valid: true, + }) +} + +type scanPlanTextAnyToLineScanner struct{} + +func (scanPlanTextAnyToLineScanner) Scan(src []byte, dst any) error { + scanner := (dst).(LineScanner) + + if src == nil { + return scanner.ScanLine(Line{}) + } + + if len(src) < 7 { + return fmt.Errorf("invalid length for line: %v", len(src)) + } + + parts := strings.SplitN(string(src[1:len(src)-1]), ",", 3) + if len(parts) < 3 { + return fmt.Errorf("invalid format for line") + } + + a, err := strconv.ParseFloat(parts[0], 64) + if err != nil { + return err + } + + b, err := strconv.ParseFloat(parts[1], 64) + if err != nil { + return err + } + + c, err := strconv.ParseFloat(parts[2], 64) + if err != nil { + return err + } + + return scanner.ScanLine(Line{A: a, B: b, C: c, Valid: true}) +} + +func (c LineCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c LineCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var line Line + err := codecScan(c, m, oid, format, src, &line) + if err != nil { + return nil, err + } + return line, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/lseg.go b/vendor/github.com/jackc/pgx/v5/pgtype/lseg.go new file mode 100644 index 00000000..05a86e1c --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/lseg.go @@ -0,0 +1,238 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type LsegScanner interface { + ScanLseg(v Lseg) error +} + +type LsegValuer interface { + LsegValue() (Lseg, error) +} + +type Lseg struct { + P [2]Vec2 + Valid bool +} + +func (lseg *Lseg) ScanLseg(v Lseg) error { + *lseg = v + return nil +} + +func (lseg Lseg) LsegValue() (Lseg, error) { + return lseg, nil +} + +// Scan implements the database/sql Scanner interface. +func (lseg *Lseg) Scan(src any) error { + if src == nil { + *lseg = Lseg{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToLsegScanner{}.Scan([]byte(src), lseg) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (lseg Lseg) Value() (driver.Value, error) { + if !lseg.Valid { + return nil, nil + } + + buf, err := LsegCodec{}.PlanEncode(nil, 0, TextFormatCode, lseg).Encode(lseg, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type LsegCodec struct{} + +func (LsegCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (LsegCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (LsegCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(LsegValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanLsegCodecBinary{} + case TextFormatCode: + return encodePlanLsegCodecText{} + } + + return nil +} + +type encodePlanLsegCodecBinary struct{} + +func (encodePlanLsegCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + lseg, err := value.(LsegValuer).LsegValue() + if err != nil { + return nil, err + } + + if !lseg.Valid { + return nil, nil + } + + buf = pgio.AppendUint64(buf, math.Float64bits(lseg.P[0].X)) + buf = pgio.AppendUint64(buf, math.Float64bits(lseg.P[0].Y)) + buf = pgio.AppendUint64(buf, math.Float64bits(lseg.P[1].X)) + buf = pgio.AppendUint64(buf, math.Float64bits(lseg.P[1].Y)) + return buf, nil +} + +type encodePlanLsegCodecText struct{} + +func (encodePlanLsegCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + lseg, err := value.(LsegValuer).LsegValue() + if err != nil { + return nil, err + } + + if !lseg.Valid { + return nil, nil + } + + buf = append(buf, fmt.Sprintf(`[(%s,%s),(%s,%s)]`, + strconv.FormatFloat(lseg.P[0].X, 'f', -1, 64), + strconv.FormatFloat(lseg.P[0].Y, 'f', -1, 64), + strconv.FormatFloat(lseg.P[1].X, 'f', -1, 64), + strconv.FormatFloat(lseg.P[1].Y, 'f', -1, 64), + )...) + return buf, nil +} + +func (LsegCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case LsegScanner: + return scanPlanBinaryLsegToLsegScanner{} + } + case TextFormatCode: + switch target.(type) { + case LsegScanner: + return scanPlanTextAnyToLsegScanner{} + } + } + + return nil +} + +type scanPlanBinaryLsegToLsegScanner struct{} + +func (scanPlanBinaryLsegToLsegScanner) Scan(src []byte, dst any) error { + scanner := (dst).(LsegScanner) + + if src == nil { + return scanner.ScanLseg(Lseg{}) + } + + if len(src) != 32 { + return fmt.Errorf("invalid length for lseg: %v", len(src)) + } + + x1 := binary.BigEndian.Uint64(src) + y1 := binary.BigEndian.Uint64(src[8:]) + x2 := binary.BigEndian.Uint64(src[16:]) + y2 := binary.BigEndian.Uint64(src[24:]) + + return scanner.ScanLseg(Lseg{ + P: [2]Vec2{ + {math.Float64frombits(x1), math.Float64frombits(y1)}, + {math.Float64frombits(x2), math.Float64frombits(y2)}, + }, + Valid: true, + }) +} + +type scanPlanTextAnyToLsegScanner struct{} + +func (scanPlanTextAnyToLsegScanner) Scan(src []byte, dst any) error { + scanner := (dst).(LsegScanner) + + if src == nil { + return scanner.ScanLseg(Lseg{}) + } + + if len(src) < 11 { + return fmt.Errorf("invalid length for lseg: %v", len(src)) + } + + str := string(src[2:]) + + var end int + end = strings.IndexByte(str, ',') + + x1, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+1:] + end = strings.IndexByte(str, ')') + + y1, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+3:] + end = strings.IndexByte(str, ',') + + x2, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+1 : len(str)-2] + + y2, err := strconv.ParseFloat(str, 64) + if err != nil { + return err + } + + return scanner.ScanLseg(Lseg{P: [2]Vec2{{x1, y1}, {x2, y2}}, Valid: true}) +} + +func (c LsegCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c LsegCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var lseg Lseg + err := codecScan(c, m, oid, format, src, &lseg) + if err != nil { + return nil, err + } + return lseg, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/ltree.go b/vendor/github.com/jackc/pgx/v5/pgtype/ltree.go new file mode 100644 index 00000000..6af31779 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/ltree.go @@ -0,0 +1,122 @@ +package pgtype + +import ( + "database/sql/driver" + "fmt" +) + +type LtreeCodec struct{} + +func (l LtreeCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +// PreferredFormat returns the preferred format. +func (l LtreeCodec) PreferredFormat() int16 { + return TextFormatCode +} + +// PlanEncode returns an EncodePlan for encoding value into PostgreSQL format for oid and format. If no plan can be +// found then nil is returned. +func (l LtreeCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case TextFormatCode: + return (TextCodec)(l).PlanEncode(m, oid, format, value) + case BinaryFormatCode: + switch value.(type) { + case string: + return encodeLtreeCodecBinaryString{} + case []byte: + return encodeLtreeCodecBinaryByteSlice{} + case TextValuer: + return encodeLtreeCodecBinaryTextValuer{} + } + } + + return nil +} + +type encodeLtreeCodecBinaryString struct{} + +func (encodeLtreeCodecBinaryString) Encode(value any, buf []byte) (newBuf []byte, err error) { + ltree := value.(string) + buf = append(buf, 1) + return append(buf, ltree...), nil +} + +type encodeLtreeCodecBinaryByteSlice struct{} + +func (encodeLtreeCodecBinaryByteSlice) Encode(value any, buf []byte) (newBuf []byte, err error) { + ltree := value.([]byte) + buf = append(buf, 1) + return append(buf, ltree...), nil +} + +type encodeLtreeCodecBinaryTextValuer struct{} + +func (encodeLtreeCodecBinaryTextValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + t, err := value.(TextValuer).TextValue() + if err != nil { + return nil, err + } + if !t.Valid { + return nil, nil + } + + buf = append(buf, 1) + return append(buf, t.String...), nil +} + +// PlanScan returns a ScanPlan for scanning a PostgreSQL value into a destination with the same type as target. If +// no plan can be found then nil is returned. +func (l LtreeCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case TextFormatCode: + return (TextCodec)(l).PlanScan(m, oid, format, target) + case BinaryFormatCode: + switch target.(type) { + case *string: + return scanPlanBinaryLtreeToString{} + case TextScanner: + return scanPlanBinaryLtreeToTextScanner{} + } + } + + return nil +} + +type scanPlanBinaryLtreeToString struct{} + +func (scanPlanBinaryLtreeToString) Scan(src []byte, target any) error { + version := src[0] + if version != 1 { + return fmt.Errorf("unsupported ltree version %d", version) + } + + p := (target).(*string) + *p = string(src[1:]) + + return nil +} + +type scanPlanBinaryLtreeToTextScanner struct{} + +func (scanPlanBinaryLtreeToTextScanner) Scan(src []byte, target any) error { + version := src[0] + if version != 1 { + return fmt.Errorf("unsupported ltree version %d", version) + } + + scanner := (target).(TextScanner) + return scanner.ScanText(Text{String: string(src[1:]), Valid: true}) +} + +// DecodeDatabaseSQLValue returns src decoded into a value compatible with the sql.Scanner interface. +func (l LtreeCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return (TextCodec)(l).DecodeDatabaseSQLValue(m, oid, format, src) +} + +// DecodeValue returns src decoded into its default format. +func (l LtreeCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + return (TextCodec)(l).DecodeValue(m, oid, format, src) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/macaddr.go b/vendor/github.com/jackc/pgx/v5/pgtype/macaddr.go new file mode 100644 index 00000000..e913ec90 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/macaddr.go @@ -0,0 +1,162 @@ +package pgtype + +import ( + "database/sql/driver" + "net" +) + +type MacaddrCodec struct{} + +func (MacaddrCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (MacaddrCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (MacaddrCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case net.HardwareAddr: + return encodePlanMacaddrCodecBinaryHardwareAddr{} + case TextValuer: + return encodePlanMacAddrCodecTextValuer{} + + } + case TextFormatCode: + switch value.(type) { + case net.HardwareAddr: + return encodePlanMacaddrCodecTextHardwareAddr{} + case TextValuer: + return encodePlanTextCodecTextValuer{} + } + } + + return nil +} + +type encodePlanMacaddrCodecBinaryHardwareAddr struct{} + +func (encodePlanMacaddrCodecBinaryHardwareAddr) Encode(value any, buf []byte) (newBuf []byte, err error) { + addr := value.(net.HardwareAddr) + if addr == nil { + return nil, nil + } + + return append(buf, addr...), nil +} + +type encodePlanMacAddrCodecTextValuer struct{} + +func (encodePlanMacAddrCodecTextValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + t, err := value.(TextValuer).TextValue() + if err != nil { + return nil, err + } + if !t.Valid { + return nil, nil + } + + addr, err := net.ParseMAC(t.String) + if err != nil { + return nil, err + } + + return append(buf, addr...), nil +} + +type encodePlanMacaddrCodecTextHardwareAddr struct{} + +func (encodePlanMacaddrCodecTextHardwareAddr) Encode(value any, buf []byte) (newBuf []byte, err error) { + addr := value.(net.HardwareAddr) + if addr == nil { + return nil, nil + } + + return append(buf, addr.String()...), nil +} + +func (MacaddrCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case BinaryFormatCode: + switch target.(type) { + case *net.HardwareAddr: + return scanPlanBinaryMacaddrToHardwareAddr{} + case TextScanner: + return scanPlanBinaryMacaddrToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case *net.HardwareAddr: + return scanPlanTextMacaddrToHardwareAddr{} + case TextScanner: + return scanPlanTextAnyToTextScanner{} + } + } + + return nil +} + +type scanPlanBinaryMacaddrToHardwareAddr struct{} + +func (scanPlanBinaryMacaddrToHardwareAddr) Scan(src []byte, dst any) error { + dstBuf := dst.(*net.HardwareAddr) + if src == nil { + *dstBuf = nil + return nil + } + + *dstBuf = make([]byte, len(src)) + copy(*dstBuf, src) + return nil +} + +type scanPlanBinaryMacaddrToTextScanner struct{} + +func (scanPlanBinaryMacaddrToTextScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TextScanner) + if src == nil { + return scanner.ScanText(Text{}) + } + + return scanner.ScanText(Text{String: net.HardwareAddr(src).String(), Valid: true}) +} + +type scanPlanTextMacaddrToHardwareAddr struct{} + +func (scanPlanTextMacaddrToHardwareAddr) Scan(src []byte, dst any) error { + p := dst.(*net.HardwareAddr) + + if src == nil { + *p = nil + return nil + } + + addr, err := net.ParseMAC(string(src)) + if err != nil { + return err + } + + *p = addr + + return nil +} + +func (c MacaddrCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c MacaddrCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var addr net.HardwareAddr + err := codecScan(c, m, oid, format, src, &addr) + if err != nil { + return nil, err + } + return addr, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/multirange.go b/vendor/github.com/jackc/pgx/v5/pgtype/multirange.go new file mode 100644 index 00000000..e5763788 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/multirange.go @@ -0,0 +1,443 @@ +package pgtype + +import ( + "bytes" + "database/sql/driver" + "encoding/binary" + "fmt" + "reflect" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +// MultirangeGetter is a type that can be converted into a PostgreSQL multirange. +type MultirangeGetter interface { + // IsNull returns true if the value is SQL NULL. + IsNull() bool + + // Len returns the number of elements in the multirange. + Len() int + + // Index returns the element at i. + Index(i int) any + + // IndexType returns a non-nil scan target of the type Index will return. This is used by MultirangeCodec.PlanEncode. + IndexType() any +} + +// MultirangeSetter is a type can be set from a PostgreSQL multirange. +type MultirangeSetter interface { + // ScanNull sets the value to SQL NULL. + ScanNull() error + + // SetLen prepares the value such that ScanIndex can be called for each element. This will remove any existing + // elements. + SetLen(n int) error + + // ScanIndex returns a value usable as a scan target for i. SetLen must be called before ScanIndex. + ScanIndex(i int) any + + // ScanIndexType returns a non-nil scan target of the type ScanIndex will return. This is used by + // MultirangeCodec.PlanScan. + ScanIndexType() any +} + +// MultirangeCodec is a codec for any multirange type. +type MultirangeCodec struct { + ElementType *Type +} + +func (c *MultirangeCodec) FormatSupported(format int16) bool { + return c.ElementType.Codec.FormatSupported(format) +} + +func (c *MultirangeCodec) PreferredFormat() int16 { + return c.ElementType.Codec.PreferredFormat() +} + +func (c *MultirangeCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + multirangeValuer, ok := value.(MultirangeGetter) + if !ok { + return nil + } + + elementType := multirangeValuer.IndexType() + + elementEncodePlan := m.PlanEncode(c.ElementType.OID, format, elementType) + if elementEncodePlan == nil { + return nil + } + + switch format { + case BinaryFormatCode: + return &encodePlanMultirangeCodecBinary{ac: c, m: m, oid: oid} + case TextFormatCode: + return &encodePlanMultirangeCodecText{ac: c, m: m, oid: oid} + } + + return nil +} + +type encodePlanMultirangeCodecText struct { + ac *MultirangeCodec + m *Map + oid uint32 +} + +func (p *encodePlanMultirangeCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + multirange := value.(MultirangeGetter) + + if multirange.IsNull() { + return nil, nil + } + + elementCount := multirange.Len() + + buf = append(buf, '{') + + var encodePlan EncodePlan + var lastElemType reflect.Type + inElemBuf := make([]byte, 0, 32) + for i := 0; i < elementCount; i++ { + if i > 0 { + buf = append(buf, ',') + } + + elem := multirange.Index(i) + var elemBuf []byte + if elem != nil { + elemType := reflect.TypeOf(elem) + if lastElemType != elemType { + lastElemType = elemType + encodePlan = p.m.PlanEncode(p.ac.ElementType.OID, TextFormatCode, elem) + if encodePlan == nil { + return nil, fmt.Errorf("unable to encode %v", multirange.Index(i)) + } + } + elemBuf, err = encodePlan.Encode(elem, inElemBuf) + if err != nil { + return nil, err + } + } + + if elemBuf == nil { + return nil, fmt.Errorf("multirange cannot contain NULL element") + } else { + buf = append(buf, elemBuf...) + } + } + + buf = append(buf, '}') + + return buf, nil +} + +type encodePlanMultirangeCodecBinary struct { + ac *MultirangeCodec + m *Map + oid uint32 +} + +func (p *encodePlanMultirangeCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + multirange := value.(MultirangeGetter) + + if multirange.IsNull() { + return nil, nil + } + + elementCount := multirange.Len() + + buf = pgio.AppendInt32(buf, int32(elementCount)) + + var encodePlan EncodePlan + var lastElemType reflect.Type + for i := 0; i < elementCount; i++ { + sp := len(buf) + buf = pgio.AppendInt32(buf, -1) + + elem := multirange.Index(i) + var elemBuf []byte + if elem != nil { + elemType := reflect.TypeOf(elem) + if lastElemType != elemType { + lastElemType = elemType + encodePlan = p.m.PlanEncode(p.ac.ElementType.OID, BinaryFormatCode, elem) + if encodePlan == nil { + return nil, fmt.Errorf("unable to encode %v", multirange.Index(i)) + } + } + elemBuf, err = encodePlan.Encode(elem, buf) + if err != nil { + return nil, err + } + } + + if elemBuf == nil { + return nil, fmt.Errorf("multirange cannot contain NULL element") + } else { + buf = elemBuf + pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) + } + } + + return buf, nil +} + +func (c *MultirangeCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + multirangeScanner, ok := target.(MultirangeSetter) + if !ok { + return nil + } + + elementType := multirangeScanner.ScanIndexType() + + elementScanPlan := m.PlanScan(c.ElementType.OID, format, elementType) + if _, ok := elementScanPlan.(*scanPlanFail); ok { + return nil + } + + return &scanPlanMultirangeCodec{ + multirangeCodec: c, + m: m, + oid: oid, + formatCode: format, + } +} + +func (c *MultirangeCodec) decodeBinary(m *Map, multirangeOID uint32, src []byte, multirange MultirangeSetter) error { + rp := 0 + + elementCount := int(binary.BigEndian.Uint32(src[rp:])) + rp += 4 + + err := multirange.SetLen(elementCount) + if err != nil { + return err + } + + if elementCount == 0 { + return nil + } + + elementScanPlan := c.ElementType.Codec.PlanScan(m, c.ElementType.OID, BinaryFormatCode, multirange.ScanIndex(0)) + if elementScanPlan == nil { + elementScanPlan = m.PlanScan(c.ElementType.OID, BinaryFormatCode, multirange.ScanIndex(0)) + } + + for i := 0; i < elementCount; i++ { + elem := multirange.ScanIndex(i) + elemLen := int(int32(binary.BigEndian.Uint32(src[rp:]))) + rp += 4 + var elemSrc []byte + if elemLen >= 0 { + elemSrc = src[rp : rp+elemLen] + rp += elemLen + } + err = elementScanPlan.Scan(elemSrc, elem) + if err != nil { + return fmt.Errorf("failed to scan multirange element %d: %w", i, err) + } + } + + return nil +} + +func (c *MultirangeCodec) decodeText(m *Map, multirangeOID uint32, src []byte, multirange MultirangeSetter) error { + elements, err := parseUntypedTextMultirange(src) + if err != nil { + return err + } + + err = multirange.SetLen(len(elements)) + if err != nil { + return err + } + + if len(elements) == 0 { + return nil + } + + elementScanPlan := c.ElementType.Codec.PlanScan(m, c.ElementType.OID, TextFormatCode, multirange.ScanIndex(0)) + if elementScanPlan == nil { + elementScanPlan = m.PlanScan(c.ElementType.OID, TextFormatCode, multirange.ScanIndex(0)) + } + + for i, s := range elements { + elem := multirange.ScanIndex(i) + err = elementScanPlan.Scan([]byte(s), elem) + if err != nil { + return err + } + } + + return nil +} + +type scanPlanMultirangeCodec struct { + multirangeCodec *MultirangeCodec + m *Map + oid uint32 + formatCode int16 + elementScanPlan ScanPlan +} + +func (spac *scanPlanMultirangeCodec) Scan(src []byte, dst any) error { + c := spac.multirangeCodec + m := spac.m + oid := spac.oid + formatCode := spac.formatCode + + multirange := dst.(MultirangeSetter) + + if src == nil { + return multirange.ScanNull() + } + + switch formatCode { + case BinaryFormatCode: + return c.decodeBinary(m, oid, src, multirange) + case TextFormatCode: + return c.decodeText(m, oid, src, multirange) + default: + return fmt.Errorf("unknown format code %d", formatCode) + } +} + +func (c *MultirangeCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + switch format { + case TextFormatCode: + return string(src), nil + case BinaryFormatCode: + buf := make([]byte, len(src)) + copy(buf, src) + return buf, nil + default: + return nil, fmt.Errorf("unknown format code %d", format) + } +} + +func (c *MultirangeCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var multirange Multirange[Range[any]] + err := m.PlanScan(oid, format, &multirange).Scan(src, &multirange) + return multirange, err +} + +func parseUntypedTextMultirange(src []byte) ([]string, error) { + elements := make([]string, 0) + + buf := bytes.NewBuffer(src) + + skipWhitespace(buf) + + r, _, err := buf.ReadRune() + if err != nil { + return nil, fmt.Errorf("invalid array: %w", err) + } + + if r != '{' { + return nil, fmt.Errorf("invalid multirange, expected '{' got %v", r) + } + +parseValueLoop: + for { + r, _, err = buf.ReadRune() + if err != nil { + return nil, fmt.Errorf("invalid multirange: %w", err) + } + + switch r { + case ',': // skip range separator + case '}': + break parseValueLoop + default: + buf.UnreadRune() + value, err := parseRange(buf) + if err != nil { + return nil, fmt.Errorf("invalid multirange value: %w", err) + } + elements = append(elements, value) + } + } + + skipWhitespace(buf) + + if buf.Len() > 0 { + return nil, fmt.Errorf("unexpected trailing data: %v", buf.String()) + } + + return elements, nil + +} + +func parseRange(buf *bytes.Buffer) (string, error) { + s := &bytes.Buffer{} + + boundSepRead := false + for { + r, _, err := buf.ReadRune() + if err != nil { + return "", err + } + + switch r { + case ',', '}': + if r == ',' && !boundSepRead { + boundSepRead = true + break + } + buf.UnreadRune() + return s.String(), nil + } + + s.WriteRune(r) + } +} + +// Multirange is a generic multirange type. +// +// T should implement RangeValuer and *T should implement RangeScanner. However, there does not appear to be a way to +// enforce the RangeScanner constraint. +type Multirange[T RangeValuer] []T + +func (r Multirange[T]) IsNull() bool { + return r == nil +} + +func (r Multirange[T]) Len() int { + return len(r) +} + +func (r Multirange[T]) Index(i int) any { + return r[i] +} + +func (r Multirange[T]) IndexType() any { + var zero T + return zero +} + +func (r *Multirange[T]) ScanNull() error { + *r = nil + return nil +} + +func (r *Multirange[T]) SetLen(n int) error { + *r = make([]T, n) + return nil +} + +func (r Multirange[T]) ScanIndex(i int) any { + return &r[i] +} + +func (r Multirange[T]) ScanIndexType() any { + return new(T) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/numeric.go b/vendor/github.com/jackc/pgx/v5/pgtype/numeric.go new file mode 100644 index 00000000..4dbec786 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/numeric.go @@ -0,0 +1,823 @@ +package pgtype + +import ( + "bytes" + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "math/big" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +// PostgreSQL internal numeric storage uses 16-bit "digits" with base of 10,000 +const nbase = 10000 + +const ( + pgNumericNaN = 0x00000000c0000000 + pgNumericNaNSign = 0xc000 + + pgNumericPosInf = 0x00000000d0000000 + pgNumericPosInfSign = 0xd000 + + pgNumericNegInf = 0x00000000f0000000 + pgNumericNegInfSign = 0xf000 +) + +var big0 *big.Int = big.NewInt(0) +var big1 *big.Int = big.NewInt(1) +var big10 *big.Int = big.NewInt(10) +var big100 *big.Int = big.NewInt(100) +var big1000 *big.Int = big.NewInt(1000) + +var bigNBase *big.Int = big.NewInt(nbase) +var bigNBaseX2 *big.Int = big.NewInt(nbase * nbase) +var bigNBaseX3 *big.Int = big.NewInt(nbase * nbase * nbase) +var bigNBaseX4 *big.Int = big.NewInt(nbase * nbase * nbase * nbase) + +type NumericScanner interface { + ScanNumeric(v Numeric) error +} + +type NumericValuer interface { + NumericValue() (Numeric, error) +} + +type Numeric struct { + Int *big.Int + Exp int32 + NaN bool + InfinityModifier InfinityModifier + Valid bool +} + +func (n *Numeric) ScanNumeric(v Numeric) error { + *n = v + return nil +} + +func (n Numeric) NumericValue() (Numeric, error) { + return n, nil +} + +func (n Numeric) Float64Value() (Float8, error) { + if !n.Valid { + return Float8{}, nil + } else if n.NaN { + return Float8{Float64: math.NaN(), Valid: true}, nil + } else if n.InfinityModifier == Infinity { + return Float8{Float64: math.Inf(1), Valid: true}, nil + } else if n.InfinityModifier == NegativeInfinity { + return Float8{Float64: math.Inf(-1), Valid: true}, nil + } + + buf := make([]byte, 0, 32) + + if n.Int == nil { + buf = append(buf, '0') + } else { + buf = append(buf, n.Int.String()...) + } + buf = append(buf, 'e') + buf = append(buf, strconv.FormatInt(int64(n.Exp), 10)...) + + f, err := strconv.ParseFloat(string(buf), 64) + if err != nil { + return Float8{}, err + } + + return Float8{Float64: f, Valid: true}, nil +} + +func (n *Numeric) ScanInt64(v Int8) error { + if !v.Valid { + *n = Numeric{} + return nil + } + + *n = Numeric{Int: big.NewInt(v.Int64), Valid: true} + return nil +} + +func (n Numeric) Int64Value() (Int8, error) { + if !n.Valid { + return Int8{}, nil + } + + bi, err := n.toBigInt() + if err != nil { + return Int8{}, err + } + + if !bi.IsInt64() { + return Int8{}, fmt.Errorf("cannot convert %v to int64", n) + } + + return Int8{Int64: bi.Int64(), Valid: true}, nil +} + +func (n *Numeric) ScanScientific(src string) error { + if !strings.ContainsAny("eE", src) { + return scanPlanTextAnyToNumericScanner{}.Scan([]byte(src), n) + } + + if bigF, ok := new(big.Float).SetString(string(src)); ok { + smallF, _ := bigF.Float64() + src = strconv.FormatFloat(smallF, 'f', -1, 64) + } + + num, exp, err := parseNumericString(src) + if err != nil { + return err + } + + *n = Numeric{Int: num, Exp: exp, Valid: true} + + return nil +} + +func (n *Numeric) toBigInt() (*big.Int, error) { + if n.Exp == 0 { + return n.Int, nil + } + + num := &big.Int{} + num.Set(n.Int) + if n.Exp > 0 { + mul := &big.Int{} + mul.Exp(big10, big.NewInt(int64(n.Exp)), nil) + num.Mul(num, mul) + return num, nil + } + + div := &big.Int{} + div.Exp(big10, big.NewInt(int64(-n.Exp)), nil) + remainder := &big.Int{} + num.DivMod(num, div, remainder) + if remainder.Cmp(big0) != 0 { + return nil, fmt.Errorf("cannot convert %v to integer", n) + } + return num, nil +} + +func parseNumericString(str string) (n *big.Int, exp int32, err error) { + idx := strings.IndexByte(str, '.') + + if idx == -1 { + for len(str) > 1 && str[len(str)-1] == '0' && str[len(str)-2] != '-' { + str = str[:len(str)-1] + exp++ + } + } else { + exp = int32(-(len(str) - idx - 1)) + str = str[:idx] + str[idx+1:] + } + + accum := &big.Int{} + if _, ok := accum.SetString(str, 10); !ok { + return nil, 0, fmt.Errorf("%s is not a number", str) + } + + return accum, exp, nil +} + +func nbaseDigitsToInt64(src []byte) (accum int64, bytesRead, digitsRead int) { + digits := len(src) / 2 + if digits > 4 { + digits = 4 + } + + rp := 0 + + for i := 0; i < digits; i++ { + if i > 0 { + accum *= nbase + } + accum += int64(binary.BigEndian.Uint16(src[rp:])) + rp += 2 + } + + return accum, rp, digits +} + +// Scan implements the database/sql Scanner interface. +func (n *Numeric) Scan(src any) error { + if src == nil { + *n = Numeric{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToNumericScanner{}.Scan([]byte(src), n) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (n Numeric) Value() (driver.Value, error) { + if !n.Valid { + return nil, nil + } + + buf, err := NumericCodec{}.PlanEncode(nil, 0, TextFormatCode, n).Encode(n, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +func (n Numeric) MarshalJSON() ([]byte, error) { + if !n.Valid { + return []byte("null"), nil + } + + if n.NaN { + return []byte(`"NaN"`), nil + } + + return n.numberTextBytes(), nil +} + +func (n *Numeric) UnmarshalJSON(src []byte) error { + if bytes.Equal(src, []byte(`null`)) { + *n = Numeric{} + return nil + } + if bytes.Equal(src, []byte(`"NaN"`)) { + *n = Numeric{NaN: true, Valid: true} + return nil + } + return scanPlanTextAnyToNumericScanner{}.Scan(src, n) +} + +// numberString returns a string of the number. undefined if NaN, infinite, or NULL +func (n Numeric) numberTextBytes() []byte { + intStr := n.Int.String() + + buf := &bytes.Buffer{} + + if len(intStr) > 0 && intStr[:1] == "-" { + intStr = intStr[1:] + buf.WriteByte('-') + } + + exp := int(n.Exp) + if exp > 0 { + buf.WriteString(intStr) + for i := 0; i < exp; i++ { + buf.WriteByte('0') + } + } else if exp < 0 { + if len(intStr) <= -exp { + buf.WriteString("0.") + leadingZeros := -exp - len(intStr) + for i := 0; i < leadingZeros; i++ { + buf.WriteByte('0') + } + buf.WriteString(intStr) + } else if len(intStr) > -exp { + dpPos := len(intStr) + exp + buf.WriteString(intStr[:dpPos]) + buf.WriteByte('.') + buf.WriteString(intStr[dpPos:]) + } + } else { + buf.WriteString(intStr) + } + + return buf.Bytes() +} + +type NumericCodec struct{} + +func (NumericCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (NumericCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (NumericCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case NumericValuer: + return encodePlanNumericCodecBinaryNumericValuer{} + case Float64Valuer: + return encodePlanNumericCodecBinaryFloat64Valuer{} + case Int64Valuer: + return encodePlanNumericCodecBinaryInt64Valuer{} + } + case TextFormatCode: + switch value.(type) { + case NumericValuer: + return encodePlanNumericCodecTextNumericValuer{} + case Float64Valuer: + return encodePlanNumericCodecTextFloat64Valuer{} + case Int64Valuer: + return encodePlanNumericCodecTextInt64Valuer{} + } + } + + return nil +} + +type encodePlanNumericCodecBinaryNumericValuer struct{} + +func (encodePlanNumericCodecBinaryNumericValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(NumericValuer).NumericValue() + if err != nil { + return nil, err + } + + return encodeNumericBinary(n, buf) +} + +type encodePlanNumericCodecBinaryFloat64Valuer struct{} + +func (encodePlanNumericCodecBinaryFloat64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Float64Valuer).Float64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if math.IsNaN(n.Float64) { + return encodeNumericBinary(Numeric{NaN: true, Valid: true}, buf) + } else if math.IsInf(n.Float64, 1) { + return encodeNumericBinary(Numeric{InfinityModifier: Infinity, Valid: true}, buf) + } else if math.IsInf(n.Float64, -1) { + return encodeNumericBinary(Numeric{InfinityModifier: NegativeInfinity, Valid: true}, buf) + } + num, exp, err := parseNumericString(strconv.FormatFloat(n.Float64, 'f', -1, 64)) + if err != nil { + return nil, err + } + + return encodeNumericBinary(Numeric{Int: num, Exp: exp, Valid: true}, buf) +} + +type encodePlanNumericCodecBinaryInt64Valuer struct{} + +func (encodePlanNumericCodecBinaryInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + return encodeNumericBinary(Numeric{Int: big.NewInt(n.Int64), Valid: true}, buf) +} + +func encodeNumericBinary(n Numeric, buf []byte) (newBuf []byte, err error) { + if !n.Valid { + return nil, nil + } + + if n.NaN { + buf = pgio.AppendUint64(buf, pgNumericNaN) + return buf, nil + } else if n.InfinityModifier == Infinity { + buf = pgio.AppendUint64(buf, pgNumericPosInf) + return buf, nil + } else if n.InfinityModifier == NegativeInfinity { + buf = pgio.AppendUint64(buf, pgNumericNegInf) + return buf, nil + } + + var sign int16 + if n.Int.Cmp(big0) < 0 { + sign = 16384 + } + + absInt := &big.Int{} + wholePart := &big.Int{} + fracPart := &big.Int{} + remainder := &big.Int{} + absInt.Abs(n.Int) + + // Normalize absInt and exp to where exp is always a multiple of 4. This makes + // converting to 16-bit base 10,000 digits easier. + var exp int32 + switch n.Exp % 4 { + case 1, -3: + exp = n.Exp - 1 + absInt.Mul(absInt, big10) + case 2, -2: + exp = n.Exp - 2 + absInt.Mul(absInt, big100) + case 3, -1: + exp = n.Exp - 3 + absInt.Mul(absInt, big1000) + default: + exp = n.Exp + } + + if exp < 0 { + divisor := &big.Int{} + divisor.Exp(big10, big.NewInt(int64(-exp)), nil) + wholePart.DivMod(absInt, divisor, fracPart) + fracPart.Add(fracPart, divisor) + } else { + wholePart = absInt + } + + var wholeDigits, fracDigits []int16 + + for wholePart.Cmp(big0) != 0 { + wholePart.DivMod(wholePart, bigNBase, remainder) + wholeDigits = append(wholeDigits, int16(remainder.Int64())) + } + + if fracPart.Cmp(big0) != 0 { + for fracPart.Cmp(big1) != 0 { + fracPart.DivMod(fracPart, bigNBase, remainder) + fracDigits = append(fracDigits, int16(remainder.Int64())) + } + } + + buf = pgio.AppendInt16(buf, int16(len(wholeDigits)+len(fracDigits))) + + var weight int16 + if len(wholeDigits) > 0 { + weight = int16(len(wholeDigits) - 1) + if exp > 0 { + weight += int16(exp / 4) + } + } else { + weight = int16(exp/4) - 1 + int16(len(fracDigits)) + } + buf = pgio.AppendInt16(buf, weight) + + buf = pgio.AppendInt16(buf, sign) + + var dscale int16 + if n.Exp < 0 { + dscale = int16(-n.Exp) + } + buf = pgio.AppendInt16(buf, dscale) + + for i := len(wholeDigits) - 1; i >= 0; i-- { + buf = pgio.AppendInt16(buf, wholeDigits[i]) + } + + for i := len(fracDigits) - 1; i >= 0; i-- { + buf = pgio.AppendInt16(buf, fracDigits[i]) + } + + return buf, nil +} + +type encodePlanNumericCodecTextNumericValuer struct{} + +func (encodePlanNumericCodecTextNumericValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(NumericValuer).NumericValue() + if err != nil { + return nil, err + } + + return encodeNumericText(n, buf) +} + +type encodePlanNumericCodecTextFloat64Valuer struct{} + +func (encodePlanNumericCodecTextFloat64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Float64Valuer).Float64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + if math.IsNaN(n.Float64) { + buf = append(buf, "NaN"...) + } else if math.IsInf(n.Float64, 1) { + buf = append(buf, "Infinity"...) + } else if math.IsInf(n.Float64, -1) { + buf = append(buf, "-Infinity"...) + } else { + buf = append(buf, strconv.FormatFloat(n.Float64, 'f', -1, 64)...) + } + return buf, nil +} + +type encodePlanNumericCodecTextInt64Valuer struct{} + +func (encodePlanNumericCodecTextInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + n, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !n.Valid { + return nil, nil + } + + buf = append(buf, strconv.FormatInt(n.Int64, 10)...) + return buf, nil +} + +func encodeNumericText(n Numeric, buf []byte) (newBuf []byte, err error) { + if !n.Valid { + return nil, nil + } + + if n.NaN { + buf = append(buf, "NaN"...) + return buf, nil + } else if n.InfinityModifier == Infinity { + buf = append(buf, "Infinity"...) + return buf, nil + } else if n.InfinityModifier == NegativeInfinity { + buf = append(buf, "-Infinity"...) + return buf, nil + } + + buf = append(buf, n.numberTextBytes()...) + + return buf, nil +} + +func (NumericCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case NumericScanner: + return scanPlanBinaryNumericToNumericScanner{} + case Float64Scanner: + return scanPlanBinaryNumericToFloat64Scanner{} + case Int64Scanner: + return scanPlanBinaryNumericToInt64Scanner{} + case TextScanner: + return scanPlanBinaryNumericToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case NumericScanner: + return scanPlanTextAnyToNumericScanner{} + case Float64Scanner: + return scanPlanTextAnyToFloat64Scanner{} + case Int64Scanner: + return scanPlanTextAnyToInt64Scanner{} + } + } + + return nil +} + +type scanPlanBinaryNumericToNumericScanner struct{} + +func (scanPlanBinaryNumericToNumericScanner) Scan(src []byte, dst any) error { + scanner := (dst).(NumericScanner) + + if src == nil { + return scanner.ScanNumeric(Numeric{}) + } + + if len(src) < 8 { + return fmt.Errorf("numeric incomplete %v", src) + } + + rp := 0 + ndigits := binary.BigEndian.Uint16(src[rp:]) + rp += 2 + weight := int16(binary.BigEndian.Uint16(src[rp:])) + rp += 2 + sign := binary.BigEndian.Uint16(src[rp:]) + rp += 2 + dscale := int16(binary.BigEndian.Uint16(src[rp:])) + rp += 2 + + if sign == pgNumericNaNSign { + return scanner.ScanNumeric(Numeric{NaN: true, Valid: true}) + } else if sign == pgNumericPosInfSign { + return scanner.ScanNumeric(Numeric{InfinityModifier: Infinity, Valid: true}) + } else if sign == pgNumericNegInfSign { + return scanner.ScanNumeric(Numeric{InfinityModifier: NegativeInfinity, Valid: true}) + } + + if ndigits == 0 { + return scanner.ScanNumeric(Numeric{Int: big.NewInt(0), Valid: true}) + } + + if len(src[rp:]) < int(ndigits)*2 { + return fmt.Errorf("numeric incomplete %v", src) + } + + accum := &big.Int{} + + for i := 0; i < int(ndigits+3)/4; i++ { + int64accum, bytesRead, digitsRead := nbaseDigitsToInt64(src[rp:]) + rp += bytesRead + + if i > 0 { + var mul *big.Int + switch digitsRead { + case 1: + mul = bigNBase + case 2: + mul = bigNBaseX2 + case 3: + mul = bigNBaseX3 + case 4: + mul = bigNBaseX4 + default: + return fmt.Errorf("invalid digitsRead: %d (this can't happen)", digitsRead) + } + accum.Mul(accum, mul) + } + + accum.Add(accum, big.NewInt(int64accum)) + } + + exp := (int32(weight) - int32(ndigits) + 1) * 4 + + if dscale > 0 { + fracNBaseDigits := int16(int32(ndigits) - int32(weight) - 1) + fracDecimalDigits := fracNBaseDigits * 4 + + if dscale > fracDecimalDigits { + multCount := int(dscale - fracDecimalDigits) + for i := 0; i < multCount; i++ { + accum.Mul(accum, big10) + exp-- + } + } else if dscale < fracDecimalDigits { + divCount := int(fracDecimalDigits - dscale) + for i := 0; i < divCount; i++ { + accum.Div(accum, big10) + exp++ + } + } + } + + reduced := &big.Int{} + remainder := &big.Int{} + if exp >= 0 { + for { + reduced.DivMod(accum, big10, remainder) + if remainder.Cmp(big0) != 0 { + break + } + accum.Set(reduced) + exp++ + } + } + + if sign != 0 { + accum.Neg(accum) + } + + return scanner.ScanNumeric(Numeric{Int: accum, Exp: exp, Valid: true}) +} + +type scanPlanBinaryNumericToFloat64Scanner struct{} + +func (scanPlanBinaryNumericToFloat64Scanner) Scan(src []byte, dst any) error { + scanner := (dst).(Float64Scanner) + + if src == nil { + return scanner.ScanFloat64(Float8{}) + } + + var n Numeric + + err := scanPlanBinaryNumericToNumericScanner{}.Scan(src, &n) + if err != nil { + return err + } + + f8, err := n.Float64Value() + if err != nil { + return err + } + + return scanner.ScanFloat64(f8) +} + +type scanPlanBinaryNumericToInt64Scanner struct{} + +func (scanPlanBinaryNumericToInt64Scanner) Scan(src []byte, dst any) error { + scanner := (dst).(Int64Scanner) + + if src == nil { + return scanner.ScanInt64(Int8{}) + } + + var n Numeric + + err := scanPlanBinaryNumericToNumericScanner{}.Scan(src, &n) + if err != nil { + return err + } + + bigInt, err := n.toBigInt() + if err != nil { + return err + } + + if !bigInt.IsInt64() { + return fmt.Errorf("%v is out of range for int64", bigInt) + } + + return scanner.ScanInt64(Int8{Int64: bigInt.Int64(), Valid: true}) +} + +type scanPlanBinaryNumericToTextScanner struct{} + +func (scanPlanBinaryNumericToTextScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TextScanner) + + if src == nil { + return scanner.ScanText(Text{}) + } + + var n Numeric + + err := scanPlanBinaryNumericToNumericScanner{}.Scan(src, &n) + if err != nil { + return err + } + + sbuf, err := encodeNumericText(n, nil) + if err != nil { + return err + } + + return scanner.ScanText(Text{String: string(sbuf), Valid: true}) +} + +type scanPlanTextAnyToNumericScanner struct{} + +func (scanPlanTextAnyToNumericScanner) Scan(src []byte, dst any) error { + scanner := (dst).(NumericScanner) + + if src == nil { + return scanner.ScanNumeric(Numeric{}) + } + + if string(src) == "NaN" { + return scanner.ScanNumeric(Numeric{NaN: true, Valid: true}) + } else if string(src) == "Infinity" { + return scanner.ScanNumeric(Numeric{InfinityModifier: Infinity, Valid: true}) + } else if string(src) == "-Infinity" { + return scanner.ScanNumeric(Numeric{InfinityModifier: NegativeInfinity, Valid: true}) + } + + num, exp, err := parseNumericString(string(src)) + if err != nil { + return err + } + + return scanner.ScanNumeric(Numeric{Int: num, Exp: exp, Valid: true}) +} + +func (c NumericCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + if format == TextFormatCode { + return string(src), nil + } + + var n Numeric + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + + buf, err := m.Encode(oid, TextFormatCode, n, nil) + if err != nil { + return nil, err + } + return string(buf), nil +} + +func (c NumericCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var n Numeric + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/path.go b/vendor/github.com/jackc/pgx/v5/pgtype/path.go new file mode 100644 index 00000000..73e0ec52 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/path.go @@ -0,0 +1,272 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type PathScanner interface { + ScanPath(v Path) error +} + +type PathValuer interface { + PathValue() (Path, error) +} + +type Path struct { + P []Vec2 + Closed bool + Valid bool +} + +func (path *Path) ScanPath(v Path) error { + *path = v + return nil +} + +func (path Path) PathValue() (Path, error) { + return path, nil +} + +// Scan implements the database/sql Scanner interface. +func (path *Path) Scan(src any) error { + if src == nil { + *path = Path{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToPathScanner{}.Scan([]byte(src), path) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (path Path) Value() (driver.Value, error) { + if !path.Valid { + return nil, nil + } + + buf, err := PathCodec{}.PlanEncode(nil, 0, TextFormatCode, path).Encode(path, nil) + if err != nil { + return nil, err + } + + return string(buf), err +} + +type PathCodec struct{} + +func (PathCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (PathCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (PathCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(PathValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanPathCodecBinary{} + case TextFormatCode: + return encodePlanPathCodecText{} + } + + return nil +} + +type encodePlanPathCodecBinary struct{} + +func (encodePlanPathCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + path, err := value.(PathValuer).PathValue() + if err != nil { + return nil, err + } + + if !path.Valid { + return nil, nil + } + + var closeByte byte + if path.Closed { + closeByte = 1 + } + buf = append(buf, closeByte) + + buf = pgio.AppendInt32(buf, int32(len(path.P))) + + for _, p := range path.P { + buf = pgio.AppendUint64(buf, math.Float64bits(p.X)) + buf = pgio.AppendUint64(buf, math.Float64bits(p.Y)) + } + + return buf, nil +} + +type encodePlanPathCodecText struct{} + +func (encodePlanPathCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + path, err := value.(PathValuer).PathValue() + if err != nil { + return nil, err + } + + if !path.Valid { + return nil, nil + } + + var startByte, endByte byte + if path.Closed { + startByte = '(' + endByte = ')' + } else { + startByte = '[' + endByte = ']' + } + buf = append(buf, startByte) + + for i, p := range path.P { + if i > 0 { + buf = append(buf, ',') + } + buf = append(buf, fmt.Sprintf(`(%s,%s)`, + strconv.FormatFloat(p.X, 'f', -1, 64), + strconv.FormatFloat(p.Y, 'f', -1, 64), + )...) + } + + buf = append(buf, endByte) + + return buf, nil +} + +func (PathCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case PathScanner: + return scanPlanBinaryPathToPathScanner{} + } + case TextFormatCode: + switch target.(type) { + case PathScanner: + return scanPlanTextAnyToPathScanner{} + } + } + + return nil +} + +type scanPlanBinaryPathToPathScanner struct{} + +func (scanPlanBinaryPathToPathScanner) Scan(src []byte, dst any) error { + scanner := (dst).(PathScanner) + + if src == nil { + return scanner.ScanPath(Path{}) + } + + if len(src) < 5 { + return fmt.Errorf("invalid length for Path: %v", len(src)) + } + + closed := src[0] == 1 + pointCount := int(binary.BigEndian.Uint32(src[1:])) + + rp := 5 + + if 5+pointCount*16 != len(src) { + return fmt.Errorf("invalid length for Path with %d points: %v", pointCount, len(src)) + } + + points := make([]Vec2, pointCount) + for i := 0; i < len(points); i++ { + x := binary.BigEndian.Uint64(src[rp:]) + rp += 8 + y := binary.BigEndian.Uint64(src[rp:]) + rp += 8 + points[i] = Vec2{math.Float64frombits(x), math.Float64frombits(y)} + } + + return scanner.ScanPath(Path{ + P: points, + Closed: closed, + Valid: true, + }) +} + +type scanPlanTextAnyToPathScanner struct{} + +func (scanPlanTextAnyToPathScanner) Scan(src []byte, dst any) error { + scanner := (dst).(PathScanner) + + if src == nil { + return scanner.ScanPath(Path{}) + } + + if len(src) < 7 { + return fmt.Errorf("invalid length for Path: %v", len(src)) + } + + closed := src[0] == '(' + points := make([]Vec2, 0) + + str := string(src[2:]) + + for { + end := strings.IndexByte(str, ',') + x, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+1:] + end = strings.IndexByte(str, ')') + + y, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + points = append(points, Vec2{x, y}) + + if end+3 < len(str) { + str = str[end+3:] + } else { + break + } + } + + return scanner.ScanPath(Path{P: points, Closed: closed, Valid: true}) +} + +func (c PathCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c PathCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var path Path + err := codecScan(c, m, oid, format, src, &path) + if err != nil { + return nil, err + } + return path, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/pgtype.go b/vendor/github.com/jackc/pgx/v5/pgtype/pgtype.go new file mode 100644 index 00000000..bdd9f05c --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/pgtype.go @@ -0,0 +1,2065 @@ +package pgtype + +import ( + "database/sql" + "database/sql/driver" + "errors" + "fmt" + "net" + "net/netip" + "reflect" + "time" +) + +// PostgreSQL oids for common types +const ( + BoolOID = 16 + ByteaOID = 17 + QCharOID = 18 + NameOID = 19 + Int8OID = 20 + Int2OID = 21 + Int4OID = 23 + TextOID = 25 + OIDOID = 26 + TIDOID = 27 + XIDOID = 28 + CIDOID = 29 + JSONOID = 114 + XMLOID = 142 + XMLArrayOID = 143 + JSONArrayOID = 199 + PointOID = 600 + LsegOID = 601 + PathOID = 602 + BoxOID = 603 + PolygonOID = 604 + LineOID = 628 + LineArrayOID = 629 + CIDROID = 650 + CIDRArrayOID = 651 + Float4OID = 700 + Float8OID = 701 + CircleOID = 718 + CircleArrayOID = 719 + UnknownOID = 705 + Macaddr8OID = 774 + MacaddrOID = 829 + InetOID = 869 + BoolArrayOID = 1000 + QCharArrayOID = 1002 + NameArrayOID = 1003 + Int2ArrayOID = 1005 + Int4ArrayOID = 1007 + TextArrayOID = 1009 + TIDArrayOID = 1010 + ByteaArrayOID = 1001 + XIDArrayOID = 1011 + CIDArrayOID = 1012 + BPCharArrayOID = 1014 + VarcharArrayOID = 1015 + Int8ArrayOID = 1016 + PointArrayOID = 1017 + LsegArrayOID = 1018 + PathArrayOID = 1019 + BoxArrayOID = 1020 + Float4ArrayOID = 1021 + Float8ArrayOID = 1022 + PolygonArrayOID = 1027 + OIDArrayOID = 1028 + ACLItemOID = 1033 + ACLItemArrayOID = 1034 + MacaddrArrayOID = 1040 + InetArrayOID = 1041 + BPCharOID = 1042 + VarcharOID = 1043 + DateOID = 1082 + TimeOID = 1083 + TimestampOID = 1114 + TimestampArrayOID = 1115 + DateArrayOID = 1182 + TimeArrayOID = 1183 + TimestamptzOID = 1184 + TimestamptzArrayOID = 1185 + IntervalOID = 1186 + IntervalArrayOID = 1187 + NumericArrayOID = 1231 + TimetzOID = 1266 + TimetzArrayOID = 1270 + BitOID = 1560 + BitArrayOID = 1561 + VarbitOID = 1562 + VarbitArrayOID = 1563 + NumericOID = 1700 + RecordOID = 2249 + RecordArrayOID = 2287 + UUIDOID = 2950 + UUIDArrayOID = 2951 + JSONBOID = 3802 + JSONBArrayOID = 3807 + DaterangeOID = 3912 + DaterangeArrayOID = 3913 + Int4rangeOID = 3904 + Int4rangeArrayOID = 3905 + NumrangeOID = 3906 + NumrangeArrayOID = 3907 + TsrangeOID = 3908 + TsrangeArrayOID = 3909 + TstzrangeOID = 3910 + TstzrangeArrayOID = 3911 + Int8rangeOID = 3926 + Int8rangeArrayOID = 3927 + JSONPathOID = 4072 + JSONPathArrayOID = 4073 + Int4multirangeOID = 4451 + NummultirangeOID = 4532 + TsmultirangeOID = 4533 + TstzmultirangeOID = 4534 + DatemultirangeOID = 4535 + Int8multirangeOID = 4536 + Int4multirangeArrayOID = 6150 + NummultirangeArrayOID = 6151 + TsmultirangeArrayOID = 6152 + TstzmultirangeArrayOID = 6153 + DatemultirangeArrayOID = 6155 + Int8multirangeArrayOID = 6157 +) + +type InfinityModifier int8 + +const ( + Infinity InfinityModifier = 1 + Finite InfinityModifier = 0 + NegativeInfinity InfinityModifier = -Infinity +) + +func (im InfinityModifier) String() string { + switch im { + case Finite: + return "finite" + case Infinity: + return "infinity" + case NegativeInfinity: + return "-infinity" + default: + return "invalid" + } +} + +// PostgreSQL format codes +const ( + TextFormatCode = 0 + BinaryFormatCode = 1 +) + +// A Codec converts between Go and PostgreSQL values. A Codec must not be mutated after it is registered with a Map. +type Codec interface { + // FormatSupported returns true if the format is supported. + FormatSupported(int16) bool + + // PreferredFormat returns the preferred format. + PreferredFormat() int16 + + // PlanEncode returns an EncodePlan for encoding value into PostgreSQL format for oid and format. If no plan can be + // found then nil is returned. + PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan + + // PlanScan returns a ScanPlan for scanning a PostgreSQL value into a destination with the same type as target. If + // no plan can be found then nil is returned. + PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan + + // DecodeDatabaseSQLValue returns src decoded into a value compatible with the sql.Scanner interface. + DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) + + // DecodeValue returns src decoded into its default format. + DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) +} + +type nullAssignmentError struct { + dst any +} + +func (e *nullAssignmentError) Error() string { + return fmt.Sprintf("cannot assign NULL to %T", e.dst) +} + +// Type represents a PostgreSQL data type. It must not be mutated after it is registered with a Map. +type Type struct { + Codec Codec + Name string + OID uint32 +} + +// Map is the mapping between PostgreSQL server types and Go type handling logic. It can encode values for +// transmission to a PostgreSQL server and scan received values. +type Map struct { + oidToType map[uint32]*Type + nameToType map[string]*Type + reflectTypeToName map[reflect.Type]string + oidToFormatCode map[uint32]int16 + + reflectTypeToType map[reflect.Type]*Type + + memoizedScanPlans map[uint32]map[reflect.Type][2]ScanPlan + memoizedEncodePlans map[uint32]map[reflect.Type][2]EncodePlan + + // TryWrapEncodePlanFuncs is a slice of functions that will wrap a value that cannot be encoded by the Codec. Every + // time a wrapper is found the PlanEncode method will be recursively called with the new value. This allows several layers of wrappers + // to be built up. There are default functions placed in this slice by NewMap(). In most cases these functions + // should run last. i.e. Additional functions should typically be prepended not appended. + TryWrapEncodePlanFuncs []TryWrapEncodePlanFunc + + // TryWrapScanPlanFuncs is a slice of functions that will wrap a target that cannot be scanned into by the Codec. Every + // time a wrapper is found the PlanScan method will be recursively called with the new target. This allows several layers of wrappers + // to be built up. There are default functions placed in this slice by NewMap(). In most cases these functions + // should run last. i.e. Additional functions should typically be prepended not appended. + TryWrapScanPlanFuncs []TryWrapScanPlanFunc +} + +// Copy returns a new Map containing the same registered types. +func (m *Map) Copy() *Map { + newMap := NewMap() + for _, type_ := range m.oidToType { + newMap.RegisterType(type_) + } + return newMap +} + +func NewMap() *Map { + defaultMapInitOnce.Do(initDefaultMap) + + return &Map{ + oidToType: make(map[uint32]*Type), + nameToType: make(map[string]*Type), + reflectTypeToName: make(map[reflect.Type]string), + oidToFormatCode: make(map[uint32]int16), + + memoizedScanPlans: make(map[uint32]map[reflect.Type][2]ScanPlan), + memoizedEncodePlans: make(map[uint32]map[reflect.Type][2]EncodePlan), + + TryWrapEncodePlanFuncs: []TryWrapEncodePlanFunc{ + TryWrapDerefPointerEncodePlan, + TryWrapBuiltinTypeEncodePlan, + TryWrapFindUnderlyingTypeEncodePlan, + TryWrapStructEncodePlan, + TryWrapSliceEncodePlan, + TryWrapMultiDimSliceEncodePlan, + TryWrapArrayEncodePlan, + }, + + TryWrapScanPlanFuncs: []TryWrapScanPlanFunc{ + TryPointerPointerScanPlan, + TryWrapBuiltinTypeScanPlan, + TryFindUnderlyingTypeScanPlan, + TryWrapStructScanPlan, + TryWrapPtrSliceScanPlan, + TryWrapPtrMultiDimSliceScanPlan, + TryWrapPtrArrayScanPlan, + }, + } +} + +// RegisterTypes registers multiple data types in the sequence they are provided. +func (m *Map) RegisterTypes(types []*Type) { + for _, t := range types { + m.RegisterType(t) + } +} + +// RegisterType registers a data type with the Map. t must not be mutated after it is registered. +func (m *Map) RegisterType(t *Type) { + m.oidToType[t.OID] = t + m.nameToType[t.Name] = t + m.oidToFormatCode[t.OID] = t.Codec.PreferredFormat() + + // Invalidated by type registration + m.reflectTypeToType = nil + for k := range m.memoizedScanPlans { + delete(m.memoizedScanPlans, k) + } + for k := range m.memoizedEncodePlans { + delete(m.memoizedEncodePlans, k) + } +} + +// RegisterDefaultPgType registers a mapping of a Go type to a PostgreSQL type name. Typically the data type to be +// encoded or decoded is determined by the PostgreSQL OID. But if the OID of a value to be encoded or decoded is +// unknown, this additional mapping will be used by TypeForValue to determine a suitable data type. +func (m *Map) RegisterDefaultPgType(value any, name string) { + m.reflectTypeToName[reflect.TypeOf(value)] = name + + // Invalidated by type registration + m.reflectTypeToType = nil + for k := range m.memoizedScanPlans { + delete(m.memoizedScanPlans, k) + } + for k := range m.memoizedEncodePlans { + delete(m.memoizedEncodePlans, k) + } +} + +// TypeForOID returns the Type registered for the given OID. The returned Type must not be mutated. +func (m *Map) TypeForOID(oid uint32) (*Type, bool) { + if dt, ok := m.oidToType[oid]; ok { + return dt, true + } + + dt, ok := defaultMap.oidToType[oid] + return dt, ok +} + +// TypeForName returns the Type registered for the given name. The returned Type must not be mutated. +func (m *Map) TypeForName(name string) (*Type, bool) { + if dt, ok := m.nameToType[name]; ok { + return dt, true + } + dt, ok := defaultMap.nameToType[name] + return dt, ok +} + +func (m *Map) buildReflectTypeToType() { + m.reflectTypeToType = make(map[reflect.Type]*Type) + + for reflectType, name := range m.reflectTypeToName { + if dt, ok := m.TypeForName(name); ok { + m.reflectTypeToType[reflectType] = dt + } + } +} + +// TypeForValue finds a data type suitable for v. Use RegisterType to register types that can encode and decode +// themselves. Use RegisterDefaultPgType to register that can be handled by a registered data type. The returned Type +// must not be mutated. +func (m *Map) TypeForValue(v any) (*Type, bool) { + if m.reflectTypeToType == nil { + m.buildReflectTypeToType() + } + + if dt, ok := m.reflectTypeToType[reflect.TypeOf(v)]; ok { + return dt, true + } + + dt, ok := defaultMap.reflectTypeToType[reflect.TypeOf(v)] + return dt, ok +} + +// FormatCodeForOID returns the preferred format code for type oid. If the type is not registered it returns the text +// format code. +func (m *Map) FormatCodeForOID(oid uint32) int16 { + if fc, ok := m.oidToFormatCode[oid]; ok { + return fc + } + + if fc, ok := defaultMap.oidToFormatCode[oid]; ok { + return fc + } + + return TextFormatCode +} + +// EncodePlan is a precompiled plan to encode a particular type into a particular OID and format. +type EncodePlan interface { + // Encode appends the encoded bytes of value to buf. If value is the SQL value NULL then append nothing and return + // (nil, nil). The caller of Encode is responsible for writing the correct NULL value or the length of the data + // written. + Encode(value any, buf []byte) (newBuf []byte, err error) +} + +// ScanPlan is a precompiled plan to scan into a type of destination. +type ScanPlan interface { + // Scan scans src into target. src is only valid during the call to Scan. The ScanPlan must not retain a reference to + // src. + Scan(src []byte, target any) error +} + +type scanPlanCodecSQLScanner struct { + c Codec + m *Map + oid uint32 + formatCode int16 +} + +func (plan *scanPlanCodecSQLScanner) Scan(src []byte, dst any) error { + value, err := plan.c.DecodeDatabaseSQLValue(plan.m, plan.oid, plan.formatCode, src) + if err != nil { + return err + } + + scanner := dst.(sql.Scanner) + return scanner.Scan(value) +} + +type scanPlanSQLScanner struct { + formatCode int16 +} + +func (plan *scanPlanSQLScanner) Scan(src []byte, dst any) error { + scanner := dst.(sql.Scanner) + if src == nil { + // This is necessary because interface value []byte:nil does not equal nil:nil for the binary format path and the + // text format path would be converted to empty string. + return scanner.Scan(nil) + } else if plan.formatCode == BinaryFormatCode { + return scanner.Scan(src) + } else { + return scanner.Scan(string(src)) + } +} + +type scanPlanString struct{} + +func (scanPlanString) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p := (dst).(*string) + *p = string(src) + return nil +} + +type scanPlanAnyTextToBytes struct{} + +func (scanPlanAnyTextToBytes) Scan(src []byte, dst any) error { + dstBuf := dst.(*[]byte) + if src == nil { + *dstBuf = nil + return nil + } + + *dstBuf = make([]byte, len(src)) + copy(*dstBuf, src) + return nil +} + +type scanPlanFail struct { + m *Map + oid uint32 + formatCode int16 +} + +func (plan *scanPlanFail) Scan(src []byte, dst any) error { + // If src is NULL it might be possible to scan into dst even though it is the types are not compatible. While this + // may seem to be a contrived case it can occur when selecting NULL directly. PostgreSQL assigns it the type of text. + // It would be surprising to the caller to have to cast the NULL (e.g. `select null::int`). So try to figure out a + // compatible data type for dst and scan with that. + // + // See https://github.com/jackc/pgx/issues/1326 + if src == nil { + // As a horrible hack try all types to find anything that can scan into dst. + for oid := range plan.m.oidToType { + // using planScan instead of Scan or PlanScan to avoid polluting the planned scan cache. + plan := plan.m.planScan(oid, plan.formatCode, dst) + if _, ok := plan.(*scanPlanFail); !ok { + return plan.Scan(src, dst) + } + } + for oid := range defaultMap.oidToType { + if _, ok := plan.m.oidToType[oid]; !ok { + plan := plan.m.planScan(oid, plan.formatCode, dst) + if _, ok := plan.(*scanPlanFail); !ok { + return plan.Scan(src, dst) + } + } + } + } + + var format string + switch plan.formatCode { + case TextFormatCode: + format = "text" + case BinaryFormatCode: + format = "binary" + default: + format = fmt.Sprintf("unknown %d", plan.formatCode) + } + + var dataTypeName string + if t, ok := plan.m.TypeForOID(plan.oid); ok { + dataTypeName = t.Name + } else { + dataTypeName = "unknown type" + } + + return fmt.Errorf("cannot scan %s (OID %d) in %v format into %T", dataTypeName, plan.oid, format, dst) +} + +// TryWrapScanPlanFunc is a function that tries to create a wrapper plan for target. If successful it returns a plan +// that will convert the target passed to Scan and then call the next plan. nextTarget is target as it will be converted +// by plan. It must be used to find another suitable ScanPlan. When it is found SetNext must be called on plan for it +// to be usabled. ok indicates if a suitable wrapper was found. +type TryWrapScanPlanFunc func(target any) (plan WrappedScanPlanNextSetter, nextTarget any, ok bool) + +type pointerPointerScanPlan struct { + dstType reflect.Type + next ScanPlan +} + +func (plan *pointerPointerScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *pointerPointerScanPlan) Scan(src []byte, dst any) error { + el := reflect.ValueOf(dst).Elem() + if src == nil { + el.Set(reflect.Zero(el.Type())) + return nil + } + + el.Set(reflect.New(el.Type().Elem())) + return plan.next.Scan(src, el.Interface()) +} + +// TryPointerPointerScanPlan handles a pointer to a pointer by setting the target to nil for SQL NULL and allocating and +// scanning for non-NULL. +func TryPointerPointerScanPlan(target any) (plan WrappedScanPlanNextSetter, nextTarget any, ok bool) { + if dstValue := reflect.ValueOf(target); dstValue.Kind() == reflect.Ptr { + elemValue := dstValue.Elem() + if elemValue.Kind() == reflect.Ptr { + plan = &pointerPointerScanPlan{dstType: dstValue.Type()} + return plan, reflect.Zero(elemValue.Type()).Interface(), true + } + } + + return nil, nil, false +} + +// SkipUnderlyingTypePlanner prevents PlanScan and PlanDecode from trying to use the underlying type. +type SkipUnderlyingTypePlanner interface { + SkipUnderlyingTypePlan() +} + +var elemKindToPointerTypes map[reflect.Kind]reflect.Type = map[reflect.Kind]reflect.Type{ + reflect.Int: reflect.TypeOf(new(int)), + reflect.Int8: reflect.TypeOf(new(int8)), + reflect.Int16: reflect.TypeOf(new(int16)), + reflect.Int32: reflect.TypeOf(new(int32)), + reflect.Int64: reflect.TypeOf(new(int64)), + reflect.Uint: reflect.TypeOf(new(uint)), + reflect.Uint8: reflect.TypeOf(new(uint8)), + reflect.Uint16: reflect.TypeOf(new(uint16)), + reflect.Uint32: reflect.TypeOf(new(uint32)), + reflect.Uint64: reflect.TypeOf(new(uint64)), + reflect.Float32: reflect.TypeOf(new(float32)), + reflect.Float64: reflect.TypeOf(new(float64)), + reflect.String: reflect.TypeOf(new(string)), + reflect.Bool: reflect.TypeOf(new(bool)), +} + +type underlyingTypeScanPlan struct { + dstType reflect.Type + nextDstType reflect.Type + next ScanPlan +} + +func (plan *underlyingTypeScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *underlyingTypeScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, reflect.ValueOf(dst).Convert(plan.nextDstType).Interface()) +} + +// TryFindUnderlyingTypeScanPlan tries to convert to a Go builtin type. e.g. If value was of type MyString and +// MyString was defined as a string then a wrapper plan would be returned that converts MyString to string. +func TryFindUnderlyingTypeScanPlan(dst any) (plan WrappedScanPlanNextSetter, nextDst any, ok bool) { + if _, ok := dst.(SkipUnderlyingTypePlanner); ok { + return nil, nil, false + } + + dstValue := reflect.ValueOf(dst) + + if dstValue.Kind() == reflect.Ptr { + var elemValue reflect.Value + if dstValue.IsNil() { + elemValue = reflect.New(dstValue.Type().Elem()).Elem() + } else { + elemValue = dstValue.Elem() + } + nextDstType := elemKindToPointerTypes[elemValue.Kind()] + if nextDstType == nil { + if elemValue.Kind() == reflect.Slice { + if elemValue.Type().Elem().Kind() == reflect.Uint8 { + var v *[]byte + nextDstType = reflect.TypeOf(v) + } + } + + // Get underlying type of any array. + // https://github.com/jackc/pgx/issues/2107 + if elemValue.Kind() == reflect.Array { + nextDstType = reflect.PointerTo(reflect.ArrayOf(elemValue.Len(), elemValue.Type().Elem())) + } + } + + if nextDstType != nil && dstValue.Type() != nextDstType && dstValue.CanConvert(nextDstType) { + return &underlyingTypeScanPlan{dstType: dstValue.Type(), nextDstType: nextDstType}, dstValue.Convert(nextDstType).Interface(), true + } + } + + return nil, nil, false +} + +type WrappedScanPlanNextSetter interface { + SetNext(ScanPlan) + ScanPlan +} + +// TryWrapBuiltinTypeScanPlan tries to wrap a builtin type with a wrapper that provides additional methods. e.g. If +// value was of type int32 then a wrapper plan would be returned that converts target to a value that implements +// Int64Scanner. +func TryWrapBuiltinTypeScanPlan(target any) (plan WrappedScanPlanNextSetter, nextDst any, ok bool) { + switch target := target.(type) { + case *int8: + return &wrapInt8ScanPlan{}, (*int8Wrapper)(target), true + case *int16: + return &wrapInt16ScanPlan{}, (*int16Wrapper)(target), true + case *int32: + return &wrapInt32ScanPlan{}, (*int32Wrapper)(target), true + case *int64: + return &wrapInt64ScanPlan{}, (*int64Wrapper)(target), true + case *int: + return &wrapIntScanPlan{}, (*intWrapper)(target), true + case *uint8: + return &wrapUint8ScanPlan{}, (*uint8Wrapper)(target), true + case *uint16: + return &wrapUint16ScanPlan{}, (*uint16Wrapper)(target), true + case *uint32: + return &wrapUint32ScanPlan{}, (*uint32Wrapper)(target), true + case *uint64: + return &wrapUint64ScanPlan{}, (*uint64Wrapper)(target), true + case *uint: + return &wrapUintScanPlan{}, (*uintWrapper)(target), true + case *float32: + return &wrapFloat32ScanPlan{}, (*float32Wrapper)(target), true + case *float64: + return &wrapFloat64ScanPlan{}, (*float64Wrapper)(target), true + case *string: + return &wrapStringScanPlan{}, (*stringWrapper)(target), true + case *time.Time: + return &wrapTimeScanPlan{}, (*timeWrapper)(target), true + case *time.Duration: + return &wrapDurationScanPlan{}, (*durationWrapper)(target), true + case *net.IPNet: + return &wrapNetIPNetScanPlan{}, (*netIPNetWrapper)(target), true + case *net.IP: + return &wrapNetIPScanPlan{}, (*netIPWrapper)(target), true + case *netip.Prefix: + return &wrapNetipPrefixScanPlan{}, (*netipPrefixWrapper)(target), true + case *netip.Addr: + return &wrapNetipAddrScanPlan{}, (*netipAddrWrapper)(target), true + case *map[string]*string: + return &wrapMapStringToPointerStringScanPlan{}, (*mapStringToPointerStringWrapper)(target), true + case *map[string]string: + return &wrapMapStringToStringScanPlan{}, (*mapStringToStringWrapper)(target), true + case *[16]byte: + return &wrapByte16ScanPlan{}, (*byte16Wrapper)(target), true + case *[]byte: + return &wrapByteSliceScanPlan{}, (*byteSliceWrapper)(target), true + } + + return nil, nil, false +} + +type wrapInt8ScanPlan struct { + next ScanPlan +} + +func (plan *wrapInt8ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapInt8ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*int8Wrapper)(dst.(*int8))) +} + +type wrapInt16ScanPlan struct { + next ScanPlan +} + +func (plan *wrapInt16ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapInt16ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*int16Wrapper)(dst.(*int16))) +} + +type wrapInt32ScanPlan struct { + next ScanPlan +} + +func (plan *wrapInt32ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapInt32ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*int32Wrapper)(dst.(*int32))) +} + +type wrapInt64ScanPlan struct { + next ScanPlan +} + +func (plan *wrapInt64ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapInt64ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*int64Wrapper)(dst.(*int64))) +} + +type wrapIntScanPlan struct { + next ScanPlan +} + +func (plan *wrapIntScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapIntScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*intWrapper)(dst.(*int))) +} + +type wrapUint8ScanPlan struct { + next ScanPlan +} + +func (plan *wrapUint8ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapUint8ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*uint8Wrapper)(dst.(*uint8))) +} + +type wrapUint16ScanPlan struct { + next ScanPlan +} + +func (plan *wrapUint16ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapUint16ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*uint16Wrapper)(dst.(*uint16))) +} + +type wrapUint32ScanPlan struct { + next ScanPlan +} + +func (plan *wrapUint32ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapUint32ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*uint32Wrapper)(dst.(*uint32))) +} + +type wrapUint64ScanPlan struct { + next ScanPlan +} + +func (plan *wrapUint64ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapUint64ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*uint64Wrapper)(dst.(*uint64))) +} + +type wrapUintScanPlan struct { + next ScanPlan +} + +func (plan *wrapUintScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapUintScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*uintWrapper)(dst.(*uint))) +} + +type wrapFloat32ScanPlan struct { + next ScanPlan +} + +func (plan *wrapFloat32ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapFloat32ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*float32Wrapper)(dst.(*float32))) +} + +type wrapFloat64ScanPlan struct { + next ScanPlan +} + +func (plan *wrapFloat64ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapFloat64ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*float64Wrapper)(dst.(*float64))) +} + +type wrapStringScanPlan struct { + next ScanPlan +} + +func (plan *wrapStringScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapStringScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*stringWrapper)(dst.(*string))) +} + +type wrapTimeScanPlan struct { + next ScanPlan +} + +func (plan *wrapTimeScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapTimeScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*timeWrapper)(dst.(*time.Time))) +} + +type wrapDurationScanPlan struct { + next ScanPlan +} + +func (plan *wrapDurationScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapDurationScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*durationWrapper)(dst.(*time.Duration))) +} + +type wrapNetIPNetScanPlan struct { + next ScanPlan +} + +func (plan *wrapNetIPNetScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapNetIPNetScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*netIPNetWrapper)(dst.(*net.IPNet))) +} + +type wrapNetIPScanPlan struct { + next ScanPlan +} + +func (plan *wrapNetIPScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapNetIPScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*netIPWrapper)(dst.(*net.IP))) +} + +type wrapNetipPrefixScanPlan struct { + next ScanPlan +} + +func (plan *wrapNetipPrefixScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapNetipPrefixScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*netipPrefixWrapper)(dst.(*netip.Prefix))) +} + +type wrapNetipAddrScanPlan struct { + next ScanPlan +} + +func (plan *wrapNetipAddrScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapNetipAddrScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*netipAddrWrapper)(dst.(*netip.Addr))) +} + +type wrapMapStringToPointerStringScanPlan struct { + next ScanPlan +} + +func (plan *wrapMapStringToPointerStringScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapMapStringToPointerStringScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*mapStringToPointerStringWrapper)(dst.(*map[string]*string))) +} + +type wrapMapStringToStringScanPlan struct { + next ScanPlan +} + +func (plan *wrapMapStringToStringScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapMapStringToStringScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*mapStringToStringWrapper)(dst.(*map[string]string))) +} + +type wrapByte16ScanPlan struct { + next ScanPlan +} + +func (plan *wrapByte16ScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapByte16ScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*byte16Wrapper)(dst.(*[16]byte))) +} + +type wrapByteSliceScanPlan struct { + next ScanPlan +} + +func (plan *wrapByteSliceScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapByteSliceScanPlan) Scan(src []byte, dst any) error { + return plan.next.Scan(src, (*byteSliceWrapper)(dst.(*[]byte))) +} + +type pointerEmptyInterfaceScanPlan struct { + codec Codec + m *Map + oid uint32 + formatCode int16 +} + +func (plan *pointerEmptyInterfaceScanPlan) Scan(src []byte, dst any) error { + value, err := plan.codec.DecodeValue(plan.m, plan.oid, plan.formatCode, src) + if err != nil { + return err + } + + ptrAny := dst.(*any) + *ptrAny = value + + return nil +} + +// TryWrapStructPlan tries to wrap a struct with a wrapper that implements CompositeIndexGetter. +func TryWrapStructScanPlan(target any) (plan WrappedScanPlanNextSetter, nextValue any, ok bool) { + targetValue := reflect.ValueOf(target) + if targetValue.Kind() != reflect.Ptr { + return nil, nil, false + } + + var targetElemValue reflect.Value + if targetValue.IsNil() { + targetElemValue = reflect.Zero(targetValue.Type().Elem()) + } else { + targetElemValue = targetValue.Elem() + } + targetElemType := targetElemValue.Type() + + if targetElemType.Kind() == reflect.Struct { + exportedFields := getExportedFieldValues(targetElemValue) + if len(exportedFields) == 0 { + return nil, nil, false + } + + w := ptrStructWrapper{ + s: target, + exportedFields: exportedFields, + } + return &wrapAnyPtrStructScanPlan{}, &w, true + } + + return nil, nil, false +} + +type wrapAnyPtrStructScanPlan struct { + next ScanPlan +} + +func (plan *wrapAnyPtrStructScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapAnyPtrStructScanPlan) Scan(src []byte, target any) error { + w := ptrStructWrapper{ + s: target, + exportedFields: getExportedFieldValues(reflect.ValueOf(target).Elem()), + } + + return plan.next.Scan(src, &w) +} + +// TryWrapPtrSliceScanPlan tries to wrap a pointer to a single dimension slice. +func TryWrapPtrSliceScanPlan(target any) (plan WrappedScanPlanNextSetter, nextValue any, ok bool) { + // Avoid using reflect path for common types. + switch target := target.(type) { + case *[]int16: + return &wrapPtrSliceScanPlan[int16]{}, (*FlatArray[int16])(target), true + case *[]int32: + return &wrapPtrSliceScanPlan[int32]{}, (*FlatArray[int32])(target), true + case *[]int64: + return &wrapPtrSliceScanPlan[int64]{}, (*FlatArray[int64])(target), true + case *[]float32: + return &wrapPtrSliceScanPlan[float32]{}, (*FlatArray[float32])(target), true + case *[]float64: + return &wrapPtrSliceScanPlan[float64]{}, (*FlatArray[float64])(target), true + case *[]string: + return &wrapPtrSliceScanPlan[string]{}, (*FlatArray[string])(target), true + case *[]time.Time: + return &wrapPtrSliceScanPlan[time.Time]{}, (*FlatArray[time.Time])(target), true + } + + targetType := reflect.TypeOf(target) + if targetType.Kind() != reflect.Ptr { + return nil, nil, false + } + + targetElemType := targetType.Elem() + + if targetElemType.Kind() == reflect.Slice { + slice := reflect.New(targetElemType).Elem() + return &wrapPtrSliceReflectScanPlan{}, &anySliceArrayReflect{slice: slice}, true + } + return nil, nil, false +} + +type wrapPtrSliceScanPlan[T any] struct { + next ScanPlan +} + +func (plan *wrapPtrSliceScanPlan[T]) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapPtrSliceScanPlan[T]) Scan(src []byte, target any) error { + return plan.next.Scan(src, (*FlatArray[T])(target.(*[]T))) +} + +type wrapPtrSliceReflectScanPlan struct { + next ScanPlan +} + +func (plan *wrapPtrSliceReflectScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapPtrSliceReflectScanPlan) Scan(src []byte, target any) error { + return plan.next.Scan(src, &anySliceArrayReflect{slice: reflect.ValueOf(target).Elem()}) +} + +// TryWrapPtrMultiDimSliceScanPlan tries to wrap a pointer to a multi-dimension slice. +func TryWrapPtrMultiDimSliceScanPlan(target any) (plan WrappedScanPlanNextSetter, nextValue any, ok bool) { + targetValue := reflect.ValueOf(target) + if targetValue.Kind() != reflect.Ptr { + return nil, nil, false + } + + targetElemValue := targetValue.Elem() + + if targetElemValue.Kind() == reflect.Slice { + elemElemKind := targetElemValue.Type().Elem().Kind() + if elemElemKind == reflect.Slice { + if !isRagged(targetElemValue) { + return &wrapPtrMultiDimSliceScanPlan{}, &anyMultiDimSliceArray{slice: targetValue.Elem()}, true + } + } + } + + return nil, nil, false +} + +type wrapPtrMultiDimSliceScanPlan struct { + next ScanPlan +} + +func (plan *wrapPtrMultiDimSliceScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapPtrMultiDimSliceScanPlan) Scan(src []byte, target any) error { + return plan.next.Scan(src, &anyMultiDimSliceArray{slice: reflect.ValueOf(target).Elem()}) +} + +// TryWrapPtrArrayScanPlan tries to wrap a pointer to a single dimension array. +func TryWrapPtrArrayScanPlan(target any) (plan WrappedScanPlanNextSetter, nextValue any, ok bool) { + targetValue := reflect.ValueOf(target) + if targetValue.Kind() != reflect.Ptr { + return nil, nil, false + } + + targetElemValue := targetValue.Elem() + + if targetElemValue.Kind() == reflect.Array { + return &wrapPtrArrayReflectScanPlan{}, &anyArrayArrayReflect{array: targetElemValue}, true + } + return nil, nil, false +} + +type wrapPtrArrayReflectScanPlan struct { + next ScanPlan +} + +func (plan *wrapPtrArrayReflectScanPlan) SetNext(next ScanPlan) { plan.next = next } + +func (plan *wrapPtrArrayReflectScanPlan) Scan(src []byte, target any) error { + return plan.next.Scan(src, &anyArrayArrayReflect{array: reflect.ValueOf(target).Elem()}) +} + +// PlanScan prepares a plan to scan a value into target. +func (m *Map) PlanScan(oid uint32, formatCode int16, target any) ScanPlan { + oidMemo := m.memoizedScanPlans[oid] + if oidMemo == nil { + oidMemo = make(map[reflect.Type][2]ScanPlan) + m.memoizedScanPlans[oid] = oidMemo + } + targetReflectType := reflect.TypeOf(target) + typeMemo := oidMemo[targetReflectType] + plan := typeMemo[formatCode] + if plan == nil { + plan = m.planScan(oid, formatCode, target) + typeMemo[formatCode] = plan + oidMemo[targetReflectType] = typeMemo + } + + return plan +} + +func (m *Map) planScan(oid uint32, formatCode int16, target any) ScanPlan { + if target == nil { + return &scanPlanFail{m: m, oid: oid, formatCode: formatCode} + } + + if _, ok := target.(*UndecodedBytes); ok { + return scanPlanAnyToUndecodedBytes{} + } + + switch formatCode { + case BinaryFormatCode: + switch target.(type) { + case *string: + switch oid { + case TextOID, VarcharOID: + return scanPlanString{} + } + } + case TextFormatCode: + switch target.(type) { + case *string: + return scanPlanString{} + case *[]byte: + if oid != ByteaOID { + return scanPlanAnyTextToBytes{} + } + case TextScanner: + return scanPlanTextAnyToTextScanner{} + } + } + + var dt *Type + + if dataType, ok := m.TypeForOID(oid); ok { + dt = dataType + } else if dataType, ok := m.TypeForValue(target); ok { + dt = dataType + oid = dt.OID // Preserve assumed OID in case we are recursively called below. + } + + if dt != nil { + if plan := dt.Codec.PlanScan(m, oid, formatCode, target); plan != nil { + return plan + } + } + + // This needs to happen before trying m.TryWrapScanPlanFuncs. Otherwise, a sql.Scanner would not get called if it was + // defined on a type that could be unwrapped such as `type myString string`. + // + // https://github.com/jackc/pgtype/issues/197 + if _, ok := target.(sql.Scanner); ok { + if dt == nil { + return &scanPlanSQLScanner{formatCode: formatCode} + } else { + return &scanPlanCodecSQLScanner{c: dt.Codec, m: m, oid: oid, formatCode: formatCode} + } + } + + for _, f := range m.TryWrapScanPlanFuncs { + if wrapperPlan, nextDst, ok := f(target); ok { + if nextPlan := m.planScan(oid, formatCode, nextDst); nextPlan != nil { + if _, failed := nextPlan.(*scanPlanFail); !failed { + wrapperPlan.SetNext(nextPlan) + return wrapperPlan + } + } + } + } + + if dt != nil { + if _, ok := target.(*any); ok { + return &pointerEmptyInterfaceScanPlan{codec: dt.Codec, m: m, oid: oid, formatCode: formatCode} + } + } + + return &scanPlanFail{m: m, oid: oid, formatCode: formatCode} +} + +func (m *Map) Scan(oid uint32, formatCode int16, src []byte, dst any) error { + if dst == nil { + return nil + } + + plan := m.PlanScan(oid, formatCode, dst) + return plan.Scan(src, dst) +} + +var ErrScanTargetTypeChanged = errors.New("scan target type changed") + +func codecScan(codec Codec, m *Map, oid uint32, format int16, src []byte, dst any) error { + scanPlan := codec.PlanScan(m, oid, format, dst) + if scanPlan == nil { + return fmt.Errorf("PlanScan did not find a plan") + } + return scanPlan.Scan(src, dst) +} + +func codecDecodeToTextFormat(codec Codec, m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + if format == TextFormatCode { + return string(src), nil + } else { + value, err := codec.DecodeValue(m, oid, format, src) + if err != nil { + return nil, err + } + buf, err := m.Encode(oid, TextFormatCode, value, nil) + if err != nil { + return nil, err + } + return string(buf), nil + } +} + +// PlanEncode returns an Encode plan for encoding value into PostgreSQL format for oid and format. If no plan can be +// found then nil is returned. +func (m *Map) PlanEncode(oid uint32, format int16, value any) EncodePlan { + oidMemo := m.memoizedEncodePlans[oid] + if oidMemo == nil { + oidMemo = make(map[reflect.Type][2]EncodePlan) + m.memoizedEncodePlans[oid] = oidMemo + } + targetReflectType := reflect.TypeOf(value) + typeMemo := oidMemo[targetReflectType] + plan := typeMemo[format] + if plan == nil { + plan = m.planEncode(oid, format, value) + typeMemo[format] = plan + oidMemo[targetReflectType] = typeMemo + } + + return plan +} + +func (m *Map) planEncode(oid uint32, format int16, value any) EncodePlan { + if format == TextFormatCode { + switch value.(type) { + case string: + return encodePlanStringToAnyTextFormat{} + case TextValuer: + return encodePlanTextValuerToAnyTextFormat{} + } + } + + var dt *Type + if dataType, ok := m.TypeForOID(oid); ok { + dt = dataType + } else { + // If no type for the OID was found, then either it is unknowable (e.g. the simple protocol) or it is an + // unregistered type. In either case try to find the type and OID that matches the value (e.g. a []byte would be + // registered to PostgreSQL bytea). + if dataType, ok := m.TypeForValue(value); ok { + dt = dataType + oid = dt.OID // Preserve assumed OID in case we are recursively called below. + } + } + + if dt != nil { + if plan := dt.Codec.PlanEncode(m, oid, format, value); plan != nil { + return plan + } + } + + for _, f := range m.TryWrapEncodePlanFuncs { + if wrapperPlan, nextValue, ok := f(value); ok { + if nextPlan := m.PlanEncode(oid, format, nextValue); nextPlan != nil { + wrapperPlan.SetNext(nextPlan) + return wrapperPlan + } + } + } + + if _, ok := value.(driver.Valuer); ok { + return &encodePlanDriverValuer{m: m, oid: oid, formatCode: format} + } + + return nil +} + +type encodePlanStringToAnyTextFormat struct{} + +func (encodePlanStringToAnyTextFormat) Encode(value any, buf []byte) (newBuf []byte, err error) { + s := value.(string) + return append(buf, s...), nil +} + +type encodePlanTextValuerToAnyTextFormat struct{} + +func (encodePlanTextValuerToAnyTextFormat) Encode(value any, buf []byte) (newBuf []byte, err error) { + t, err := value.(TextValuer).TextValue() + if err != nil { + return nil, err + } + if !t.Valid { + return nil, nil + } + + return append(buf, t.String...), nil +} + +type encodePlanDriverValuer struct { + m *Map + oid uint32 + formatCode int16 +} + +func (plan *encodePlanDriverValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + dv := value.(driver.Valuer) + if dv == nil { + return nil, nil + } + v, err := dv.Value() + if err != nil { + return nil, err + } + if v == nil { + return nil, nil + } + + newBuf, err = plan.m.Encode(plan.oid, plan.formatCode, v, buf) + if err == nil { + return newBuf, nil + } + + s, ok := v.(string) + if !ok { + return nil, err + } + + var scannedValue any + scanErr := plan.m.Scan(plan.oid, TextFormatCode, []byte(s), &scannedValue) + if scanErr != nil { + return nil, err + } + + // Prevent infinite loop. We can't encode this. See https://github.com/jackc/pgx/issues/1331. + if reflect.TypeOf(value) == reflect.TypeOf(scannedValue) { + return nil, fmt.Errorf("tried to encode %v via encoding to text and scanning but failed due to receiving same type back", value) + } + + var err2 error + newBuf, err2 = plan.m.Encode(plan.oid, BinaryFormatCode, scannedValue, buf) + if err2 != nil { + return nil, err + } + + return newBuf, nil +} + +// TryWrapEncodePlanFunc is a function that tries to create a wrapper plan for value. If successful it returns a plan +// that will convert the value passed to Encode and then call the next plan. nextValue is value as it will be converted +// by plan. It must be used to find another suitable EncodePlan. When it is found SetNext must be called on plan for it +// to be usabled. ok indicates if a suitable wrapper was found. +type TryWrapEncodePlanFunc func(value any) (plan WrappedEncodePlanNextSetter, nextValue any, ok bool) + +type derefPointerEncodePlan struct { + next EncodePlan +} + +func (plan *derefPointerEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *derefPointerEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + ptr := reflect.ValueOf(value) + + if ptr.IsNil() { + return nil, nil + } + + return plan.next.Encode(ptr.Elem().Interface(), buf) +} + +// TryWrapDerefPointerEncodePlan tries to dereference a pointer. e.g. If value was of type *string then a wrapper plan +// would be returned that dereferences the value. +func TryWrapDerefPointerEncodePlan(value any) (plan WrappedEncodePlanNextSetter, nextValue any, ok bool) { + if _, ok := value.(driver.Valuer); ok { + return nil, nil, false + } + + if valueType := reflect.TypeOf(value); valueType != nil && valueType.Kind() == reflect.Ptr { + return &derefPointerEncodePlan{}, reflect.New(valueType.Elem()).Elem().Interface(), true + } + + return nil, nil, false +} + +var kindToTypes map[reflect.Kind]reflect.Type = map[reflect.Kind]reflect.Type{ + reflect.Int: reflect.TypeOf(int(0)), + reflect.Int8: reflect.TypeOf(int8(0)), + reflect.Int16: reflect.TypeOf(int16(0)), + reflect.Int32: reflect.TypeOf(int32(0)), + reflect.Int64: reflect.TypeOf(int64(0)), + reflect.Uint: reflect.TypeOf(uint(0)), + reflect.Uint8: reflect.TypeOf(uint8(0)), + reflect.Uint16: reflect.TypeOf(uint16(0)), + reflect.Uint32: reflect.TypeOf(uint32(0)), + reflect.Uint64: reflect.TypeOf(uint64(0)), + reflect.Float32: reflect.TypeOf(float32(0)), + reflect.Float64: reflect.TypeOf(float64(0)), + reflect.String: reflect.TypeOf(""), + reflect.Bool: reflect.TypeOf(false), +} + +var byteSliceType = reflect.TypeOf([]byte{}) + +type underlyingTypeEncodePlan struct { + nextValueType reflect.Type + next EncodePlan +} + +func (plan *underlyingTypeEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *underlyingTypeEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(reflect.ValueOf(value).Convert(plan.nextValueType).Interface(), buf) +} + +// TryWrapFindUnderlyingTypeEncodePlan tries to convert to a Go builtin type. e.g. If value was of type MyString and +// MyString was defined as a string then a wrapper plan would be returned that converts MyString to string. +func TryWrapFindUnderlyingTypeEncodePlan(value any) (plan WrappedEncodePlanNextSetter, nextValue any, ok bool) { + if value == nil { + return nil, nil, false + } + + if _, ok := value.(driver.Valuer); ok { + return nil, nil, false + } + + if _, ok := value.(SkipUnderlyingTypePlanner); ok { + return nil, nil, false + } + + refValue := reflect.ValueOf(value) + + nextValueType := kindToTypes[refValue.Kind()] + if nextValueType != nil && refValue.Type() != nextValueType { + return &underlyingTypeEncodePlan{nextValueType: nextValueType}, refValue.Convert(nextValueType).Interface(), true + } + + // []byte is a special case. It is a slice but we treat it as a scalar type. In the case of a named type like + // json.RawMessage which is defined as []byte the underlying type should be considered as []byte. But any other slice + // does not have a special underlying type. + // + // https://github.com/jackc/pgx/issues/1763 + if refValue.Type() != byteSliceType && refValue.Type().AssignableTo(byteSliceType) { + return &underlyingTypeEncodePlan{nextValueType: byteSliceType}, refValue.Convert(byteSliceType).Interface(), true + } + + // Get underlying type of any array. + // https://github.com/jackc/pgx/issues/2107 + if refValue.Kind() == reflect.Array { + underlyingArrayType := reflect.ArrayOf(refValue.Len(), refValue.Type().Elem()) + if refValue.Type() != underlyingArrayType { + return &underlyingTypeEncodePlan{nextValueType: underlyingArrayType}, refValue.Convert(underlyingArrayType).Interface(), true + } + } + + return nil, nil, false +} + +type WrappedEncodePlanNextSetter interface { + SetNext(EncodePlan) + EncodePlan +} + +// TryWrapBuiltinTypeEncodePlan tries to wrap a builtin type with a wrapper that provides additional methods. e.g. If +// value was of type int32 then a wrapper plan would be returned that converts value to a type that implements +// Int64Valuer. +func TryWrapBuiltinTypeEncodePlan(value any) (plan WrappedEncodePlanNextSetter, nextValue any, ok bool) { + if _, ok := value.(driver.Valuer); ok { + return nil, nil, false + } + + switch value := value.(type) { + case int8: + return &wrapInt8EncodePlan{}, int8Wrapper(value), true + case int16: + return &wrapInt16EncodePlan{}, int16Wrapper(value), true + case int32: + return &wrapInt32EncodePlan{}, int32Wrapper(value), true + case int64: + return &wrapInt64EncodePlan{}, int64Wrapper(value), true + case int: + return &wrapIntEncodePlan{}, intWrapper(value), true + case uint8: + return &wrapUint8EncodePlan{}, uint8Wrapper(value), true + case uint16: + return &wrapUint16EncodePlan{}, uint16Wrapper(value), true + case uint32: + return &wrapUint32EncodePlan{}, uint32Wrapper(value), true + case uint64: + return &wrapUint64EncodePlan{}, uint64Wrapper(value), true + case uint: + return &wrapUintEncodePlan{}, uintWrapper(value), true + case float32: + return &wrapFloat32EncodePlan{}, float32Wrapper(value), true + case float64: + return &wrapFloat64EncodePlan{}, float64Wrapper(value), true + case string: + return &wrapStringEncodePlan{}, stringWrapper(value), true + case time.Time: + return &wrapTimeEncodePlan{}, timeWrapper(value), true + case time.Duration: + return &wrapDurationEncodePlan{}, durationWrapper(value), true + case net.IPNet: + return &wrapNetIPNetEncodePlan{}, netIPNetWrapper(value), true + case net.IP: + return &wrapNetIPEncodePlan{}, netIPWrapper(value), true + case netip.Prefix: + return &wrapNetipPrefixEncodePlan{}, netipPrefixWrapper(value), true + case netip.Addr: + return &wrapNetipAddrEncodePlan{}, netipAddrWrapper(value), true + case map[string]*string: + return &wrapMapStringToPointerStringEncodePlan{}, mapStringToPointerStringWrapper(value), true + case map[string]string: + return &wrapMapStringToStringEncodePlan{}, mapStringToStringWrapper(value), true + case [16]byte: + return &wrapByte16EncodePlan{}, byte16Wrapper(value), true + case []byte: + return &wrapByteSliceEncodePlan{}, byteSliceWrapper(value), true + case fmt.Stringer: + return &wrapFmtStringerEncodePlan{}, fmtStringerWrapper{value}, true + } + + return nil, nil, false +} + +type wrapInt8EncodePlan struct { + next EncodePlan +} + +func (plan *wrapInt8EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapInt8EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(int8Wrapper(value.(int8)), buf) +} + +type wrapInt16EncodePlan struct { + next EncodePlan +} + +func (plan *wrapInt16EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapInt16EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(int16Wrapper(value.(int16)), buf) +} + +type wrapInt32EncodePlan struct { + next EncodePlan +} + +func (plan *wrapInt32EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapInt32EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(int32Wrapper(value.(int32)), buf) +} + +type wrapInt64EncodePlan struct { + next EncodePlan +} + +func (plan *wrapInt64EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapInt64EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(int64Wrapper(value.(int64)), buf) +} + +type wrapIntEncodePlan struct { + next EncodePlan +} + +func (plan *wrapIntEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapIntEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(intWrapper(value.(int)), buf) +} + +type wrapUint8EncodePlan struct { + next EncodePlan +} + +func (plan *wrapUint8EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapUint8EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(uint8Wrapper(value.(uint8)), buf) +} + +type wrapUint16EncodePlan struct { + next EncodePlan +} + +func (plan *wrapUint16EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapUint16EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(uint16Wrapper(value.(uint16)), buf) +} + +type wrapUint32EncodePlan struct { + next EncodePlan +} + +func (plan *wrapUint32EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapUint32EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(uint32Wrapper(value.(uint32)), buf) +} + +type wrapUint64EncodePlan struct { + next EncodePlan +} + +func (plan *wrapUint64EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapUint64EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(uint64Wrapper(value.(uint64)), buf) +} + +type wrapUintEncodePlan struct { + next EncodePlan +} + +func (plan *wrapUintEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapUintEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(uintWrapper(value.(uint)), buf) +} + +type wrapFloat32EncodePlan struct { + next EncodePlan +} + +func (plan *wrapFloat32EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapFloat32EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(float32Wrapper(value.(float32)), buf) +} + +type wrapFloat64EncodePlan struct { + next EncodePlan +} + +func (plan *wrapFloat64EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapFloat64EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(float64Wrapper(value.(float64)), buf) +} + +type wrapStringEncodePlan struct { + next EncodePlan +} + +func (plan *wrapStringEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapStringEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(stringWrapper(value.(string)), buf) +} + +type wrapTimeEncodePlan struct { + next EncodePlan +} + +func (plan *wrapTimeEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapTimeEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(timeWrapper(value.(time.Time)), buf) +} + +type wrapDurationEncodePlan struct { + next EncodePlan +} + +func (plan *wrapDurationEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapDurationEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(durationWrapper(value.(time.Duration)), buf) +} + +type wrapNetIPNetEncodePlan struct { + next EncodePlan +} + +func (plan *wrapNetIPNetEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapNetIPNetEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(netIPNetWrapper(value.(net.IPNet)), buf) +} + +type wrapNetIPEncodePlan struct { + next EncodePlan +} + +func (plan *wrapNetIPEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapNetIPEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(netIPWrapper(value.(net.IP)), buf) +} + +type wrapNetipPrefixEncodePlan struct { + next EncodePlan +} + +func (plan *wrapNetipPrefixEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapNetipPrefixEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(netipPrefixWrapper(value.(netip.Prefix)), buf) +} + +type wrapNetipAddrEncodePlan struct { + next EncodePlan +} + +func (plan *wrapNetipAddrEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapNetipAddrEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(netipAddrWrapper(value.(netip.Addr)), buf) +} + +type wrapMapStringToPointerStringEncodePlan struct { + next EncodePlan +} + +func (plan *wrapMapStringToPointerStringEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapMapStringToPointerStringEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(mapStringToPointerStringWrapper(value.(map[string]*string)), buf) +} + +type wrapMapStringToStringEncodePlan struct { + next EncodePlan +} + +func (plan *wrapMapStringToStringEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapMapStringToStringEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(mapStringToStringWrapper(value.(map[string]string)), buf) +} + +type wrapByte16EncodePlan struct { + next EncodePlan +} + +func (plan *wrapByte16EncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapByte16EncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(byte16Wrapper(value.([16]byte)), buf) +} + +type wrapByteSliceEncodePlan struct { + next EncodePlan +} + +func (plan *wrapByteSliceEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapByteSliceEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(byteSliceWrapper(value.([]byte)), buf) +} + +type wrapFmtStringerEncodePlan struct { + next EncodePlan +} + +func (plan *wrapFmtStringerEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapFmtStringerEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode(fmtStringerWrapper{value.(fmt.Stringer)}, buf) +} + +// TryWrapStructPlan tries to wrap a struct with a wrapper that implements CompositeIndexGetter. +func TryWrapStructEncodePlan(value any) (plan WrappedEncodePlanNextSetter, nextValue any, ok bool) { + if _, ok := value.(driver.Valuer); ok { + return nil, nil, false + } + + if valueType := reflect.TypeOf(value); valueType != nil && valueType.Kind() == reflect.Struct { + exportedFields := getExportedFieldValues(reflect.ValueOf(value)) + if len(exportedFields) == 0 { + return nil, nil, false + } + + w := structWrapper{ + s: value, + exportedFields: exportedFields, + } + return &wrapAnyStructEncodePlan{}, w, true + } + + return nil, nil, false +} + +type wrapAnyStructEncodePlan struct { + next EncodePlan +} + +func (plan *wrapAnyStructEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapAnyStructEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + w := structWrapper{ + s: value, + exportedFields: getExportedFieldValues(reflect.ValueOf(value)), + } + + return plan.next.Encode(w, buf) +} + +func getExportedFieldValues(structValue reflect.Value) []reflect.Value { + structType := structValue.Type() + exportedFields := make([]reflect.Value, 0, structValue.NumField()) + for i := 0; i < structType.NumField(); i++ { + sf := structType.Field(i) + if sf.IsExported() { + exportedFields = append(exportedFields, structValue.Field(i)) + } + } + + return exportedFields +} + +func TryWrapSliceEncodePlan(value any) (plan WrappedEncodePlanNextSetter, nextValue any, ok bool) { + if _, ok := value.(driver.Valuer); ok { + return nil, nil, false + } + + // Avoid using reflect path for common types. + switch value := value.(type) { + case []int16: + return &wrapSliceEncodePlan[int16]{}, (FlatArray[int16])(value), true + case []int32: + return &wrapSliceEncodePlan[int32]{}, (FlatArray[int32])(value), true + case []int64: + return &wrapSliceEncodePlan[int64]{}, (FlatArray[int64])(value), true + case []float32: + return &wrapSliceEncodePlan[float32]{}, (FlatArray[float32])(value), true + case []float64: + return &wrapSliceEncodePlan[float64]{}, (FlatArray[float64])(value), true + case []string: + return &wrapSliceEncodePlan[string]{}, (FlatArray[string])(value), true + case []time.Time: + return &wrapSliceEncodePlan[time.Time]{}, (FlatArray[time.Time])(value), true + } + + if valueType := reflect.TypeOf(value); valueType != nil && valueType.Kind() == reflect.Slice { + w := anySliceArrayReflect{ + slice: reflect.ValueOf(value), + } + return &wrapSliceEncodeReflectPlan{}, w, true + } + + return nil, nil, false +} + +type wrapSliceEncodePlan[T any] struct { + next EncodePlan +} + +func (plan *wrapSliceEncodePlan[T]) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapSliceEncodePlan[T]) Encode(value any, buf []byte) (newBuf []byte, err error) { + return plan.next.Encode((FlatArray[T])(value.([]T)), buf) +} + +type wrapSliceEncodeReflectPlan struct { + next EncodePlan +} + +func (plan *wrapSliceEncodeReflectPlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapSliceEncodeReflectPlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + w := anySliceArrayReflect{ + slice: reflect.ValueOf(value), + } + + return plan.next.Encode(w, buf) +} + +func TryWrapMultiDimSliceEncodePlan(value any) (plan WrappedEncodePlanNextSetter, nextValue any, ok bool) { + if _, ok := value.(driver.Valuer); ok { + return nil, nil, false + } + + sliceValue := reflect.ValueOf(value) + if sliceValue.Kind() == reflect.Slice { + valueElemType := sliceValue.Type().Elem() + + if valueElemType.Kind() == reflect.Slice { + if !isRagged(sliceValue) { + w := anyMultiDimSliceArray{ + slice: reflect.ValueOf(value), + } + return &wrapMultiDimSliceEncodePlan{}, &w, true + } + } + } + + return nil, nil, false +} + +type wrapMultiDimSliceEncodePlan struct { + next EncodePlan +} + +func (plan *wrapMultiDimSliceEncodePlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapMultiDimSliceEncodePlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + w := anyMultiDimSliceArray{ + slice: reflect.ValueOf(value), + } + + return plan.next.Encode(&w, buf) +} + +func TryWrapArrayEncodePlan(value any) (plan WrappedEncodePlanNextSetter, nextValue any, ok bool) { + if _, ok := value.(driver.Valuer); ok { + return nil, nil, false + } + + if valueType := reflect.TypeOf(value); valueType != nil && valueType.Kind() == reflect.Array { + w := anyArrayArrayReflect{ + array: reflect.ValueOf(value), + } + return &wrapArrayEncodeReflectPlan{}, w, true + } + + return nil, nil, false +} + +type wrapArrayEncodeReflectPlan struct { + next EncodePlan +} + +func (plan *wrapArrayEncodeReflectPlan) SetNext(next EncodePlan) { plan.next = next } + +func (plan *wrapArrayEncodeReflectPlan) Encode(value any, buf []byte) (newBuf []byte, err error) { + w := anyArrayArrayReflect{ + array: reflect.ValueOf(value), + } + + return plan.next.Encode(w, buf) +} + +func newEncodeError(value any, m *Map, oid uint32, formatCode int16, err error) error { + var format string + switch formatCode { + case TextFormatCode: + format = "text" + case BinaryFormatCode: + format = "binary" + default: + format = fmt.Sprintf("unknown (%d)", formatCode) + } + + var dataTypeName string + if t, ok := m.TypeForOID(oid); ok { + dataTypeName = t.Name + } else { + dataTypeName = "unknown type" + } + + return fmt.Errorf("unable to encode %#v into %s format for %s (OID %d): %w", value, format, dataTypeName, oid, err) +} + +// Encode appends the encoded bytes of value to buf. If value is the SQL value NULL then append nothing and return +// (nil, nil). The caller of Encode is responsible for writing the correct NULL value or the length of the data +// written. +func (m *Map) Encode(oid uint32, formatCode int16, value any, buf []byte) (newBuf []byte, err error) { + if isNil, callNilDriverValuer := isNilDriverValuer(value); isNil { + if callNilDriverValuer { + newBuf, err = (&encodePlanDriverValuer{m: m, oid: oid, formatCode: formatCode}).Encode(value, buf) + if err != nil { + return nil, newEncodeError(value, m, oid, formatCode, err) + } + + return newBuf, nil + } else { + return nil, nil + } + } + + plan := m.PlanEncode(oid, formatCode, value) + if plan == nil { + return nil, newEncodeError(value, m, oid, formatCode, errors.New("cannot find encode plan")) + } + + newBuf, err = plan.Encode(value, buf) + if err != nil { + return nil, newEncodeError(value, m, oid, formatCode, err) + } + + return newBuf, nil +} + +// SQLScanner returns a database/sql.Scanner for v. This is necessary for types like Array[T] and Range[T] where the +// type needs assistance from Map to implement the sql.Scanner interface. It is not necessary for types like Box that +// implement sql.Scanner directly. +// +// This uses the type of v to look up the PostgreSQL OID that v presumably came from. This means v must be registered +// with m by calling RegisterDefaultPgType. +func (m *Map) SQLScanner(v any) sql.Scanner { + if s, ok := v.(sql.Scanner); ok { + return s + } + + return &sqlScannerWrapper{m: m, v: v} +} + +type sqlScannerWrapper struct { + m *Map + v any +} + +func (w *sqlScannerWrapper) Scan(src any) error { + t, ok := w.m.TypeForValue(w.v) + if !ok { + return fmt.Errorf("cannot convert to sql.Scanner: cannot find registered type for %T", w.v) + } + + var bufSrc []byte + if src != nil { + switch src := src.(type) { + case string: + bufSrc = []byte(src) + case []byte: + bufSrc = src + default: + bufSrc = []byte(fmt.Sprint(bufSrc)) + } + } + + return w.m.Scan(t.OID, TextFormatCode, bufSrc, w.v) +} + +// canBeNil returns true if value can be nil. +func canBeNil(value any) bool { + refVal := reflect.ValueOf(value) + kind := refVal.Kind() + switch kind { + case reflect.Chan, reflect.Func, reflect.Map, reflect.Ptr, reflect.UnsafePointer, reflect.Interface, reflect.Slice: + return true + default: + return false + } +} + +// valuerReflectType is a reflect.Type for driver.Valuer. It has confusing syntax because reflect.TypeOf returns nil +// when it's argument is a nil interface value. So we use a pointer to the interface and call Elem to get the actual +// type. Yuck. +// +// This can be simplified in Go 1.22 with reflect.TypeFor. +// +// var valuerReflectType = reflect.TypeFor[driver.Valuer]() +var valuerReflectType = reflect.TypeOf((*driver.Valuer)(nil)).Elem() + +// isNilDriverValuer returns true if value is any type of nil unless it implements driver.Valuer. *T is not considered to implement +// driver.Valuer if it is only implemented by T. +func isNilDriverValuer(value any) (isNil bool, callNilDriverValuer bool) { + if value == nil { + return true, false + } + + refVal := reflect.ValueOf(value) + kind := refVal.Kind() + switch kind { + case reflect.Chan, reflect.Func, reflect.Map, reflect.Ptr, reflect.UnsafePointer, reflect.Interface, reflect.Slice: + if !refVal.IsNil() { + return false, false + } + + if _, ok := value.(driver.Valuer); ok { + if kind == reflect.Ptr { + // The type assertion will succeed if driver.Valuer is implemented on T or *T. Check if it is implemented on *T + // by checking if it is not implemented on *T. + return true, !refVal.Type().Elem().Implements(valuerReflectType) + } else { + return true, true + } + } + + return true, false + default: + return false, false + } +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/pgtype_default.go b/vendor/github.com/jackc/pgx/v5/pgtype/pgtype_default.go new file mode 100644 index 00000000..c8125731 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/pgtype_default.go @@ -0,0 +1,229 @@ +package pgtype + +import ( + "encoding/json" + "encoding/xml" + "net" + "net/netip" + "reflect" + "sync" + "time" +) + +var ( + // defaultMap contains default mappings between PostgreSQL server types and Go type handling logic. + defaultMap *Map + defaultMapInitOnce = sync.Once{} +) + +func initDefaultMap() { + defaultMap = &Map{ + oidToType: make(map[uint32]*Type), + nameToType: make(map[string]*Type), + reflectTypeToName: make(map[reflect.Type]string), + oidToFormatCode: make(map[uint32]int16), + + memoizedScanPlans: make(map[uint32]map[reflect.Type][2]ScanPlan), + memoizedEncodePlans: make(map[uint32]map[reflect.Type][2]EncodePlan), + + TryWrapEncodePlanFuncs: []TryWrapEncodePlanFunc{ + TryWrapDerefPointerEncodePlan, + TryWrapBuiltinTypeEncodePlan, + TryWrapFindUnderlyingTypeEncodePlan, + TryWrapStructEncodePlan, + TryWrapSliceEncodePlan, + TryWrapMultiDimSliceEncodePlan, + TryWrapArrayEncodePlan, + }, + + TryWrapScanPlanFuncs: []TryWrapScanPlanFunc{ + TryPointerPointerScanPlan, + TryWrapBuiltinTypeScanPlan, + TryFindUnderlyingTypeScanPlan, + TryWrapStructScanPlan, + TryWrapPtrSliceScanPlan, + TryWrapPtrMultiDimSliceScanPlan, + TryWrapPtrArrayScanPlan, + }, + } + + // Base types + defaultMap.RegisterType(&Type{Name: "aclitem", OID: ACLItemOID, Codec: &TextFormatOnlyCodec{TextCodec{}}}) + defaultMap.RegisterType(&Type{Name: "bit", OID: BitOID, Codec: BitsCodec{}}) + defaultMap.RegisterType(&Type{Name: "bool", OID: BoolOID, Codec: BoolCodec{}}) + defaultMap.RegisterType(&Type{Name: "box", OID: BoxOID, Codec: BoxCodec{}}) + defaultMap.RegisterType(&Type{Name: "bpchar", OID: BPCharOID, Codec: TextCodec{}}) + defaultMap.RegisterType(&Type{Name: "bytea", OID: ByteaOID, Codec: ByteaCodec{}}) + defaultMap.RegisterType(&Type{Name: "char", OID: QCharOID, Codec: QCharCodec{}}) + defaultMap.RegisterType(&Type{Name: "cid", OID: CIDOID, Codec: Uint32Codec{}}) + defaultMap.RegisterType(&Type{Name: "cidr", OID: CIDROID, Codec: InetCodec{}}) + defaultMap.RegisterType(&Type{Name: "circle", OID: CircleOID, Codec: CircleCodec{}}) + defaultMap.RegisterType(&Type{Name: "date", OID: DateOID, Codec: DateCodec{}}) + defaultMap.RegisterType(&Type{Name: "float4", OID: Float4OID, Codec: Float4Codec{}}) + defaultMap.RegisterType(&Type{Name: "float8", OID: Float8OID, Codec: Float8Codec{}}) + defaultMap.RegisterType(&Type{Name: "inet", OID: InetOID, Codec: InetCodec{}}) + defaultMap.RegisterType(&Type{Name: "int2", OID: Int2OID, Codec: Int2Codec{}}) + defaultMap.RegisterType(&Type{Name: "int4", OID: Int4OID, Codec: Int4Codec{}}) + defaultMap.RegisterType(&Type{Name: "int8", OID: Int8OID, Codec: Int8Codec{}}) + defaultMap.RegisterType(&Type{Name: "interval", OID: IntervalOID, Codec: IntervalCodec{}}) + defaultMap.RegisterType(&Type{Name: "json", OID: JSONOID, Codec: &JSONCodec{Marshal: json.Marshal, Unmarshal: json.Unmarshal}}) + defaultMap.RegisterType(&Type{Name: "jsonb", OID: JSONBOID, Codec: &JSONBCodec{Marshal: json.Marshal, Unmarshal: json.Unmarshal}}) + defaultMap.RegisterType(&Type{Name: "jsonpath", OID: JSONPathOID, Codec: &TextFormatOnlyCodec{TextCodec{}}}) + defaultMap.RegisterType(&Type{Name: "line", OID: LineOID, Codec: LineCodec{}}) + defaultMap.RegisterType(&Type{Name: "lseg", OID: LsegOID, Codec: LsegCodec{}}) + defaultMap.RegisterType(&Type{Name: "macaddr8", OID: Macaddr8OID, Codec: MacaddrCodec{}}) + defaultMap.RegisterType(&Type{Name: "macaddr", OID: MacaddrOID, Codec: MacaddrCodec{}}) + defaultMap.RegisterType(&Type{Name: "name", OID: NameOID, Codec: TextCodec{}}) + defaultMap.RegisterType(&Type{Name: "numeric", OID: NumericOID, Codec: NumericCodec{}}) + defaultMap.RegisterType(&Type{Name: "oid", OID: OIDOID, Codec: Uint32Codec{}}) + defaultMap.RegisterType(&Type{Name: "path", OID: PathOID, Codec: PathCodec{}}) + defaultMap.RegisterType(&Type{Name: "point", OID: PointOID, Codec: PointCodec{}}) + defaultMap.RegisterType(&Type{Name: "polygon", OID: PolygonOID, Codec: PolygonCodec{}}) + defaultMap.RegisterType(&Type{Name: "record", OID: RecordOID, Codec: RecordCodec{}}) + defaultMap.RegisterType(&Type{Name: "text", OID: TextOID, Codec: TextCodec{}}) + defaultMap.RegisterType(&Type{Name: "tid", OID: TIDOID, Codec: TIDCodec{}}) + defaultMap.RegisterType(&Type{Name: "time", OID: TimeOID, Codec: TimeCodec{}}) + defaultMap.RegisterType(&Type{Name: "timestamp", OID: TimestampOID, Codec: &TimestampCodec{}}) + defaultMap.RegisterType(&Type{Name: "timestamptz", OID: TimestamptzOID, Codec: &TimestamptzCodec{}}) + defaultMap.RegisterType(&Type{Name: "unknown", OID: UnknownOID, Codec: TextCodec{}}) + defaultMap.RegisterType(&Type{Name: "uuid", OID: UUIDOID, Codec: UUIDCodec{}}) + defaultMap.RegisterType(&Type{Name: "varbit", OID: VarbitOID, Codec: BitsCodec{}}) + defaultMap.RegisterType(&Type{Name: "varchar", OID: VarcharOID, Codec: TextCodec{}}) + defaultMap.RegisterType(&Type{Name: "xid", OID: XIDOID, Codec: Uint32Codec{}}) + defaultMap.RegisterType(&Type{Name: "xml", OID: XMLOID, Codec: &XMLCodec{Marshal: xml.Marshal, Unmarshal: xml.Unmarshal}}) + + // Range types + defaultMap.RegisterType(&Type{Name: "daterange", OID: DaterangeOID, Codec: &RangeCodec{ElementType: defaultMap.oidToType[DateOID]}}) + defaultMap.RegisterType(&Type{Name: "int4range", OID: Int4rangeOID, Codec: &RangeCodec{ElementType: defaultMap.oidToType[Int4OID]}}) + defaultMap.RegisterType(&Type{Name: "int8range", OID: Int8rangeOID, Codec: &RangeCodec{ElementType: defaultMap.oidToType[Int8OID]}}) + defaultMap.RegisterType(&Type{Name: "numrange", OID: NumrangeOID, Codec: &RangeCodec{ElementType: defaultMap.oidToType[NumericOID]}}) + defaultMap.RegisterType(&Type{Name: "tsrange", OID: TsrangeOID, Codec: &RangeCodec{ElementType: defaultMap.oidToType[TimestampOID]}}) + defaultMap.RegisterType(&Type{Name: "tstzrange", OID: TstzrangeOID, Codec: &RangeCodec{ElementType: defaultMap.oidToType[TimestamptzOID]}}) + + // Multirange types + defaultMap.RegisterType(&Type{Name: "datemultirange", OID: DatemultirangeOID, Codec: &MultirangeCodec{ElementType: defaultMap.oidToType[DaterangeOID]}}) + defaultMap.RegisterType(&Type{Name: "int4multirange", OID: Int4multirangeOID, Codec: &MultirangeCodec{ElementType: defaultMap.oidToType[Int4rangeOID]}}) + defaultMap.RegisterType(&Type{Name: "int8multirange", OID: Int8multirangeOID, Codec: &MultirangeCodec{ElementType: defaultMap.oidToType[Int8rangeOID]}}) + defaultMap.RegisterType(&Type{Name: "nummultirange", OID: NummultirangeOID, Codec: &MultirangeCodec{ElementType: defaultMap.oidToType[NumrangeOID]}}) + defaultMap.RegisterType(&Type{Name: "tsmultirange", OID: TsmultirangeOID, Codec: &MultirangeCodec{ElementType: defaultMap.oidToType[TsrangeOID]}}) + defaultMap.RegisterType(&Type{Name: "tstzmultirange", OID: TstzmultirangeOID, Codec: &MultirangeCodec{ElementType: defaultMap.oidToType[TstzrangeOID]}}) + + // Array types + defaultMap.RegisterType(&Type{Name: "_aclitem", OID: ACLItemArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[ACLItemOID]}}) + defaultMap.RegisterType(&Type{Name: "_bit", OID: BitArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[BitOID]}}) + defaultMap.RegisterType(&Type{Name: "_bool", OID: BoolArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[BoolOID]}}) + defaultMap.RegisterType(&Type{Name: "_box", OID: BoxArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[BoxOID]}}) + defaultMap.RegisterType(&Type{Name: "_bpchar", OID: BPCharArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[BPCharOID]}}) + defaultMap.RegisterType(&Type{Name: "_bytea", OID: ByteaArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[ByteaOID]}}) + defaultMap.RegisterType(&Type{Name: "_char", OID: QCharArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[QCharOID]}}) + defaultMap.RegisterType(&Type{Name: "_cid", OID: CIDArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[CIDOID]}}) + defaultMap.RegisterType(&Type{Name: "_cidr", OID: CIDRArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[CIDROID]}}) + defaultMap.RegisterType(&Type{Name: "_circle", OID: CircleArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[CircleOID]}}) + defaultMap.RegisterType(&Type{Name: "_date", OID: DateArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[DateOID]}}) + defaultMap.RegisterType(&Type{Name: "_daterange", OID: DaterangeArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[DaterangeOID]}}) + defaultMap.RegisterType(&Type{Name: "_float4", OID: Float4ArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[Float4OID]}}) + defaultMap.RegisterType(&Type{Name: "_float8", OID: Float8ArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[Float8OID]}}) + defaultMap.RegisterType(&Type{Name: "_inet", OID: InetArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[InetOID]}}) + defaultMap.RegisterType(&Type{Name: "_int2", OID: Int2ArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[Int2OID]}}) + defaultMap.RegisterType(&Type{Name: "_int4", OID: Int4ArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[Int4OID]}}) + defaultMap.RegisterType(&Type{Name: "_int4range", OID: Int4rangeArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[Int4rangeOID]}}) + defaultMap.RegisterType(&Type{Name: "_int8", OID: Int8ArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[Int8OID]}}) + defaultMap.RegisterType(&Type{Name: "_int8range", OID: Int8rangeArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[Int8rangeOID]}}) + defaultMap.RegisterType(&Type{Name: "_interval", OID: IntervalArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[IntervalOID]}}) + defaultMap.RegisterType(&Type{Name: "_json", OID: JSONArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[JSONOID]}}) + defaultMap.RegisterType(&Type{Name: "_jsonb", OID: JSONBArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[JSONBOID]}}) + defaultMap.RegisterType(&Type{Name: "_jsonpath", OID: JSONPathArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[JSONPathOID]}}) + defaultMap.RegisterType(&Type{Name: "_line", OID: LineArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[LineOID]}}) + defaultMap.RegisterType(&Type{Name: "_lseg", OID: LsegArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[LsegOID]}}) + defaultMap.RegisterType(&Type{Name: "_macaddr", OID: MacaddrArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[MacaddrOID]}}) + defaultMap.RegisterType(&Type{Name: "_name", OID: NameArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[NameOID]}}) + defaultMap.RegisterType(&Type{Name: "_numeric", OID: NumericArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[NumericOID]}}) + defaultMap.RegisterType(&Type{Name: "_numrange", OID: NumrangeArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[NumrangeOID]}}) + defaultMap.RegisterType(&Type{Name: "_oid", OID: OIDArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[OIDOID]}}) + defaultMap.RegisterType(&Type{Name: "_path", OID: PathArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[PathOID]}}) + defaultMap.RegisterType(&Type{Name: "_point", OID: PointArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[PointOID]}}) + defaultMap.RegisterType(&Type{Name: "_polygon", OID: PolygonArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[PolygonOID]}}) + defaultMap.RegisterType(&Type{Name: "_record", OID: RecordArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[RecordOID]}}) + defaultMap.RegisterType(&Type{Name: "_text", OID: TextArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[TextOID]}}) + defaultMap.RegisterType(&Type{Name: "_tid", OID: TIDArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[TIDOID]}}) + defaultMap.RegisterType(&Type{Name: "_time", OID: TimeArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[TimeOID]}}) + defaultMap.RegisterType(&Type{Name: "_timestamp", OID: TimestampArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[TimestampOID]}}) + defaultMap.RegisterType(&Type{Name: "_timestamptz", OID: TimestamptzArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[TimestamptzOID]}}) + defaultMap.RegisterType(&Type{Name: "_tsrange", OID: TsrangeArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[TsrangeOID]}}) + defaultMap.RegisterType(&Type{Name: "_tstzrange", OID: TstzrangeArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[TstzrangeOID]}}) + defaultMap.RegisterType(&Type{Name: "_uuid", OID: UUIDArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[UUIDOID]}}) + defaultMap.RegisterType(&Type{Name: "_varbit", OID: VarbitArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[VarbitOID]}}) + defaultMap.RegisterType(&Type{Name: "_varchar", OID: VarcharArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[VarcharOID]}}) + defaultMap.RegisterType(&Type{Name: "_xid", OID: XIDArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[XIDOID]}}) + defaultMap.RegisterType(&Type{Name: "_xml", OID: XMLArrayOID, Codec: &ArrayCodec{ElementType: defaultMap.oidToType[XMLOID]}}) + + // Integer types that directly map to a PostgreSQL type + registerDefaultPgTypeVariants[int16](defaultMap, "int2") + registerDefaultPgTypeVariants[int32](defaultMap, "int4") + registerDefaultPgTypeVariants[int64](defaultMap, "int8") + + // Integer types that do not have a direct match to a PostgreSQL type + registerDefaultPgTypeVariants[int8](defaultMap, "int8") + registerDefaultPgTypeVariants[int](defaultMap, "int8") + registerDefaultPgTypeVariants[uint8](defaultMap, "int8") + registerDefaultPgTypeVariants[uint16](defaultMap, "int8") + registerDefaultPgTypeVariants[uint32](defaultMap, "int8") + registerDefaultPgTypeVariants[uint64](defaultMap, "numeric") + registerDefaultPgTypeVariants[uint](defaultMap, "numeric") + + registerDefaultPgTypeVariants[float32](defaultMap, "float4") + registerDefaultPgTypeVariants[float64](defaultMap, "float8") + + registerDefaultPgTypeVariants[bool](defaultMap, "bool") + registerDefaultPgTypeVariants[time.Time](defaultMap, "timestamptz") + registerDefaultPgTypeVariants[time.Duration](defaultMap, "interval") + registerDefaultPgTypeVariants[string](defaultMap, "text") + registerDefaultPgTypeVariants[json.RawMessage](defaultMap, "json") + registerDefaultPgTypeVariants[[]byte](defaultMap, "bytea") + + registerDefaultPgTypeVariants[net.IP](defaultMap, "inet") + registerDefaultPgTypeVariants[net.IPNet](defaultMap, "cidr") + registerDefaultPgTypeVariants[netip.Addr](defaultMap, "inet") + registerDefaultPgTypeVariants[netip.Prefix](defaultMap, "cidr") + + // pgtype provided structs + registerDefaultPgTypeVariants[Bits](defaultMap, "varbit") + registerDefaultPgTypeVariants[Bool](defaultMap, "bool") + registerDefaultPgTypeVariants[Box](defaultMap, "box") + registerDefaultPgTypeVariants[Circle](defaultMap, "circle") + registerDefaultPgTypeVariants[Date](defaultMap, "date") + registerDefaultPgTypeVariants[Range[Date]](defaultMap, "daterange") + registerDefaultPgTypeVariants[Multirange[Range[Date]]](defaultMap, "datemultirange") + registerDefaultPgTypeVariants[Float4](defaultMap, "float4") + registerDefaultPgTypeVariants[Float8](defaultMap, "float8") + registerDefaultPgTypeVariants[Range[Float8]](defaultMap, "numrange") // There is no PostgreSQL builtin float8range so map it to numrange. + registerDefaultPgTypeVariants[Multirange[Range[Float8]]](defaultMap, "nummultirange") // There is no PostgreSQL builtin float8multirange so map it to nummultirange. + registerDefaultPgTypeVariants[Int2](defaultMap, "int2") + registerDefaultPgTypeVariants[Int4](defaultMap, "int4") + registerDefaultPgTypeVariants[Range[Int4]](defaultMap, "int4range") + registerDefaultPgTypeVariants[Multirange[Range[Int4]]](defaultMap, "int4multirange") + registerDefaultPgTypeVariants[Int8](defaultMap, "int8") + registerDefaultPgTypeVariants[Range[Int8]](defaultMap, "int8range") + registerDefaultPgTypeVariants[Multirange[Range[Int8]]](defaultMap, "int8multirange") + registerDefaultPgTypeVariants[Interval](defaultMap, "interval") + registerDefaultPgTypeVariants[Line](defaultMap, "line") + registerDefaultPgTypeVariants[Lseg](defaultMap, "lseg") + registerDefaultPgTypeVariants[Numeric](defaultMap, "numeric") + registerDefaultPgTypeVariants[Range[Numeric]](defaultMap, "numrange") + registerDefaultPgTypeVariants[Multirange[Range[Numeric]]](defaultMap, "nummultirange") + registerDefaultPgTypeVariants[Path](defaultMap, "path") + registerDefaultPgTypeVariants[Point](defaultMap, "point") + registerDefaultPgTypeVariants[Polygon](defaultMap, "polygon") + registerDefaultPgTypeVariants[TID](defaultMap, "tid") + registerDefaultPgTypeVariants[Text](defaultMap, "text") + registerDefaultPgTypeVariants[Time](defaultMap, "time") + registerDefaultPgTypeVariants[Timestamp](defaultMap, "timestamp") + registerDefaultPgTypeVariants[Timestamptz](defaultMap, "timestamptz") + registerDefaultPgTypeVariants[Range[Timestamp]](defaultMap, "tsrange") + registerDefaultPgTypeVariants[Multirange[Range[Timestamp]]](defaultMap, "tsmultirange") + registerDefaultPgTypeVariants[Range[Timestamptz]](defaultMap, "tstzrange") + registerDefaultPgTypeVariants[Multirange[Range[Timestamptz]]](defaultMap, "tstzmultirange") + registerDefaultPgTypeVariants[UUID](defaultMap, "uuid") + + defaultMap.buildReflectTypeToType() +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/point.go b/vendor/github.com/jackc/pgx/v5/pgtype/point.go new file mode 100644 index 00000000..09b19bb5 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/point.go @@ -0,0 +1,266 @@ +package pgtype + +import ( + "bytes" + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type Vec2 struct { + X float64 + Y float64 +} + +type PointScanner interface { + ScanPoint(v Point) error +} + +type PointValuer interface { + PointValue() (Point, error) +} + +type Point struct { + P Vec2 + Valid bool +} + +func (p *Point) ScanPoint(v Point) error { + *p = v + return nil +} + +func (p Point) PointValue() (Point, error) { + return p, nil +} + +func parsePoint(src []byte) (*Point, error) { + if src == nil || bytes.Equal(src, []byte("null")) { + return &Point{}, nil + } + + if len(src) < 5 { + return nil, fmt.Errorf("invalid length for point: %v", len(src)) + } + if src[0] == '"' && src[len(src)-1] == '"' { + src = src[1 : len(src)-1] + } + sx, sy, found := strings.Cut(string(src[1:len(src)-1]), ",") + if !found { + return nil, fmt.Errorf("invalid format for point") + } + + x, err := strconv.ParseFloat(sx, 64) + if err != nil { + return nil, err + } + + y, err := strconv.ParseFloat(sy, 64) + if err != nil { + return nil, err + } + + return &Point{P: Vec2{x, y}, Valid: true}, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Point) Scan(src any) error { + if src == nil { + *dst = Point{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToPointScanner{}.Scan([]byte(src), dst) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src Point) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + + buf, err := PointCodec{}.PlanEncode(nil, 0, TextFormatCode, src).Encode(src, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +func (src Point) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + + var buff bytes.Buffer + buff.WriteByte('"') + buff.WriteString(fmt.Sprintf("(%g,%g)", src.P.X, src.P.Y)) + buff.WriteByte('"') + return buff.Bytes(), nil +} + +func (dst *Point) UnmarshalJSON(point []byte) error { + p, err := parsePoint(point) + if err != nil { + return err + } + *dst = *p + return nil +} + +type PointCodec struct{} + +func (PointCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (PointCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (PointCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(PointValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanPointCodecBinary{} + case TextFormatCode: + return encodePlanPointCodecText{} + } + + return nil +} + +type encodePlanPointCodecBinary struct{} + +func (encodePlanPointCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + point, err := value.(PointValuer).PointValue() + if err != nil { + return nil, err + } + + if !point.Valid { + return nil, nil + } + + buf = pgio.AppendUint64(buf, math.Float64bits(point.P.X)) + buf = pgio.AppendUint64(buf, math.Float64bits(point.P.Y)) + return buf, nil +} + +type encodePlanPointCodecText struct{} + +func (encodePlanPointCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + point, err := value.(PointValuer).PointValue() + if err != nil { + return nil, err + } + + if !point.Valid { + return nil, nil + } + + return append(buf, fmt.Sprintf(`(%s,%s)`, + strconv.FormatFloat(point.P.X, 'f', -1, 64), + strconv.FormatFloat(point.P.Y, 'f', -1, 64), + )...), nil +} + +func (PointCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case PointScanner: + return scanPlanBinaryPointToPointScanner{} + } + case TextFormatCode: + switch target.(type) { + case PointScanner: + return scanPlanTextAnyToPointScanner{} + } + } + + return nil +} + +func (c PointCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c PointCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var point Point + err := codecScan(c, m, oid, format, src, &point) + if err != nil { + return nil, err + } + return point, nil +} + +type scanPlanBinaryPointToPointScanner struct{} + +func (scanPlanBinaryPointToPointScanner) Scan(src []byte, dst any) error { + scanner := (dst).(PointScanner) + + if src == nil { + return scanner.ScanPoint(Point{}) + } + + if len(src) != 16 { + return fmt.Errorf("invalid length for point: %v", len(src)) + } + + x := binary.BigEndian.Uint64(src) + y := binary.BigEndian.Uint64(src[8:]) + + return scanner.ScanPoint(Point{ + P: Vec2{math.Float64frombits(x), math.Float64frombits(y)}, + Valid: true, + }) +} + +type scanPlanTextAnyToPointScanner struct{} + +func (scanPlanTextAnyToPointScanner) Scan(src []byte, dst any) error { + scanner := (dst).(PointScanner) + + if src == nil { + return scanner.ScanPoint(Point{}) + } + + if len(src) < 5 { + return fmt.Errorf("invalid length for point: %v", len(src)) + } + + sx, sy, found := strings.Cut(string(src[1:len(src)-1]), ",") + if !found { + return fmt.Errorf("invalid format for point") + } + + x, err := strconv.ParseFloat(sx, 64) + if err != nil { + return err + } + + y, err := strconv.ParseFloat(sy, 64) + if err != nil { + return err + } + + return scanner.ScanPoint(Point{P: Vec2{x, y}, Valid: true}) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/polygon.go b/vendor/github.com/jackc/pgx/v5/pgtype/polygon.go new file mode 100644 index 00000000..04b0ba6b --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/polygon.go @@ -0,0 +1,253 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type PolygonScanner interface { + ScanPolygon(v Polygon) error +} + +type PolygonValuer interface { + PolygonValue() (Polygon, error) +} + +type Polygon struct { + P []Vec2 + Valid bool +} + +func (p *Polygon) ScanPolygon(v Polygon) error { + *p = v + return nil +} + +func (p Polygon) PolygonValue() (Polygon, error) { + return p, nil +} + +// Scan implements the database/sql Scanner interface. +func (p *Polygon) Scan(src any) error { + if src == nil { + *p = Polygon{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToPolygonScanner{}.Scan([]byte(src), p) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (p Polygon) Value() (driver.Value, error) { + if !p.Valid { + return nil, nil + } + + buf, err := PolygonCodec{}.PlanEncode(nil, 0, TextFormatCode, p).Encode(p, nil) + if err != nil { + return nil, err + } + + return string(buf), err +} + +type PolygonCodec struct{} + +func (PolygonCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (PolygonCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (PolygonCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(PolygonValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanPolygonCodecBinary{} + case TextFormatCode: + return encodePlanPolygonCodecText{} + } + + return nil +} + +type encodePlanPolygonCodecBinary struct{} + +func (encodePlanPolygonCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + polygon, err := value.(PolygonValuer).PolygonValue() + if err != nil { + return nil, err + } + + if !polygon.Valid { + return nil, nil + } + + buf = pgio.AppendInt32(buf, int32(len(polygon.P))) + + for _, p := range polygon.P { + buf = pgio.AppendUint64(buf, math.Float64bits(p.X)) + buf = pgio.AppendUint64(buf, math.Float64bits(p.Y)) + } + + return buf, nil +} + +type encodePlanPolygonCodecText struct{} + +func (encodePlanPolygonCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + polygon, err := value.(PolygonValuer).PolygonValue() + if err != nil { + return nil, err + } + + if !polygon.Valid { + return nil, nil + } + + buf = append(buf, '(') + + for i, p := range polygon.P { + if i > 0 { + buf = append(buf, ',') + } + buf = append(buf, fmt.Sprintf(`(%s,%s)`, + strconv.FormatFloat(p.X, 'f', -1, 64), + strconv.FormatFloat(p.Y, 'f', -1, 64), + )...) + } + + buf = append(buf, ')') + + return buf, nil +} + +func (PolygonCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case PolygonScanner: + return scanPlanBinaryPolygonToPolygonScanner{} + } + case TextFormatCode: + switch target.(type) { + case PolygonScanner: + return scanPlanTextAnyToPolygonScanner{} + } + } + + return nil +} + +type scanPlanBinaryPolygonToPolygonScanner struct{} + +func (scanPlanBinaryPolygonToPolygonScanner) Scan(src []byte, dst any) error { + scanner := (dst).(PolygonScanner) + + if src == nil { + return scanner.ScanPolygon(Polygon{}) + } + + if len(src) < 5 { + return fmt.Errorf("invalid length for polygon: %v", len(src)) + } + + pointCount := int(binary.BigEndian.Uint32(src)) + rp := 4 + + if 4+pointCount*16 != len(src) { + return fmt.Errorf("invalid length for Polygon with %d points: %v", pointCount, len(src)) + } + + points := make([]Vec2, pointCount) + for i := 0; i < len(points); i++ { + x := binary.BigEndian.Uint64(src[rp:]) + rp += 8 + y := binary.BigEndian.Uint64(src[rp:]) + rp += 8 + points[i] = Vec2{math.Float64frombits(x), math.Float64frombits(y)} + } + + return scanner.ScanPolygon(Polygon{ + P: points, + Valid: true, + }) +} + +type scanPlanTextAnyToPolygonScanner struct{} + +func (scanPlanTextAnyToPolygonScanner) Scan(src []byte, dst any) error { + scanner := (dst).(PolygonScanner) + + if src == nil { + return scanner.ScanPolygon(Polygon{}) + } + + if len(src) < 7 { + return fmt.Errorf("invalid length for Polygon: %v", len(src)) + } + + points := make([]Vec2, 0) + + str := string(src[2:]) + + for { + end := strings.IndexByte(str, ',') + x, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + str = str[end+1:] + end = strings.IndexByte(str, ')') + + y, err := strconv.ParseFloat(str[:end], 64) + if err != nil { + return err + } + + points = append(points, Vec2{x, y}) + + if end+3 < len(str) { + str = str[end+3:] + } else { + break + } + } + + return scanner.ScanPolygon(Polygon{P: points, Valid: true}) +} + +func (c PolygonCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c PolygonCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var polygon Polygon + err := codecScan(c, m, oid, format, src, &polygon) + if err != nil { + return nil, err + } + return polygon, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/qchar.go b/vendor/github.com/jackc/pgx/v5/pgtype/qchar.go new file mode 100644 index 00000000..fc40a5b2 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/qchar.go @@ -0,0 +1,141 @@ +package pgtype + +import ( + "database/sql/driver" + "fmt" + "math" +) + +// QCharCodec is for PostgreSQL's special 8-bit-only "char" type more akin to the C +// language's char type, or Go's byte type. (Note that the name in PostgreSQL +// itself is "char", in double-quotes, and not char.) It gets used a lot in +// PostgreSQL's system tables to hold a single ASCII character value (eg +// pg_class.relkind). It is named Qchar for quoted char to disambiguate from SQL +// standard type char. +type QCharCodec struct{} + +func (QCharCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (QCharCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (QCharCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case TextFormatCode, BinaryFormatCode: + switch value.(type) { + case byte: + return encodePlanQcharCodecByte{} + case rune: + return encodePlanQcharCodecRune{} + } + } + + return nil +} + +type encodePlanQcharCodecByte struct{} + +func (encodePlanQcharCodecByte) Encode(value any, buf []byte) (newBuf []byte, err error) { + b := value.(byte) + buf = append(buf, b) + return buf, nil +} + +type encodePlanQcharCodecRune struct{} + +func (encodePlanQcharCodecRune) Encode(value any, buf []byte) (newBuf []byte, err error) { + r := value.(rune) + if r > math.MaxUint8 { + return nil, fmt.Errorf(`%v cannot be encoded to "char"`, r) + } + b := byte(r) + buf = append(buf, b) + return buf, nil +} + +func (QCharCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case TextFormatCode, BinaryFormatCode: + switch target.(type) { + case *byte: + return scanPlanQcharCodecByte{} + case *rune: + return scanPlanQcharCodecRune{} + } + } + + return nil +} + +type scanPlanQcharCodecByte struct{} + +func (scanPlanQcharCodecByte) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) > 1 { + return fmt.Errorf(`invalid length for "char": %v`, len(src)) + } + + b := dst.(*byte) + // In the text format the zero value is returned as a zero byte value instead of 0 + if len(src) == 0 { + *b = 0 + } else { + *b = src[0] + } + + return nil +} + +type scanPlanQcharCodecRune struct{} + +func (scanPlanQcharCodecRune) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) > 1 { + return fmt.Errorf(`invalid length for "char": %v`, len(src)) + } + + r := dst.(*rune) + // In the text format the zero value is returned as a zero byte value instead of 0 + if len(src) == 0 { + *r = 0 + } else { + *r = rune(src[0]) + } + + return nil +} + +func (c QCharCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var r rune + err := codecScan(c, m, oid, format, src, &r) + if err != nil { + return nil, err + } + return string(r), nil +} + +func (c QCharCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var r rune + err := codecScan(c, m, oid, format, src, &r) + if err != nil { + return nil, err + } + return r, nil +} diff --git a/vendor/github.com/jackc/pgtype/range.go b/vendor/github.com/jackc/pgx/v5/pgtype/range.go similarity index 76% rename from vendor/github.com/jackc/pgtype/range.go rename to vendor/github.com/jackc/pgx/v5/pgtype/range.go index e999f6a9..16427ccc 100644 --- a/vendor/github.com/jackc/pgtype/range.go +++ b/vendor/github.com/jackc/pgx/v5/pgtype/range.go @@ -19,15 +19,15 @@ func (bt BoundType) String() string { return string(bt) } -type UntypedTextRange struct { +type untypedTextRange struct { Lower string Upper string LowerType BoundType UpperType BoundType } -func ParseUntypedTextRange(src string) (*UntypedTextRange, error) { - utr := &UntypedTextRange{} +func parseUntypedTextRange(src string) (*untypedTextRange, error) { + utr := &untypedTextRange{} if src == "empty" { utr.LowerType = Empty utr.UpperType = Empty @@ -40,7 +40,7 @@ func ParseUntypedTextRange(src string) (*UntypedTextRange, error) { r, _, err := buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid lower bound: %v", err) + return nil, fmt.Errorf("invalid lower bound: %w", err) } switch r { case '(': @@ -53,7 +53,7 @@ func ParseUntypedTextRange(src string) (*UntypedTextRange, error) { r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid lower value: %v", err) + return nil, fmt.Errorf("invalid lower value: %w", err) } buf.UnreadRune() @@ -62,13 +62,13 @@ func ParseUntypedTextRange(src string) (*UntypedTextRange, error) { } else { utr.Lower, err = rangeParseValue(buf) if err != nil { - return nil, fmt.Errorf("invalid lower value: %v", err) + return nil, fmt.Errorf("invalid lower value: %w", err) } } r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("missing range separator: %v", err) + return nil, fmt.Errorf("missing range separator: %w", err) } if r != ',' { return nil, fmt.Errorf("missing range separator: %v", r) @@ -76,7 +76,7 @@ func ParseUntypedTextRange(src string) (*UntypedTextRange, error) { r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("invalid upper value: %v", err) + return nil, fmt.Errorf("invalid upper value: %w", err) } if r == ')' || r == ']' { @@ -85,12 +85,12 @@ func ParseUntypedTextRange(src string) (*UntypedTextRange, error) { buf.UnreadRune() utr.Upper, err = rangeParseValue(buf) if err != nil { - return nil, fmt.Errorf("invalid upper value: %v", err) + return nil, fmt.Errorf("invalid upper value: %w", err) } r, _, err = buf.ReadRune() if err != nil { - return nil, fmt.Errorf("missing upper bound: %v", err) + return nil, fmt.Errorf("missing upper bound: %w", err) } switch r { case ')': @@ -173,7 +173,7 @@ func rangeParseQuotedValue(buf *bytes.Buffer) (string, error) { } } -type UntypedBinaryRange struct { +type untypedBinaryRange struct { Lower []byte Upper []byte LowerType BoundType @@ -197,8 +197,8 @@ const upperInclusiveMask = 4 const lowerUnboundedMask = 8 const upperUnboundedMask = 16 -func ParseUntypedBinaryRange(src []byte) (*UntypedBinaryRange, error) { - ubr := &UntypedBinaryRange{} +func parseUntypedBinaryRange(src []byte) (*untypedBinaryRange, error) { + ubr := &untypedBinaryRange{} if len(src) == 0 { return nil, fmt.Errorf("range too short: %v", len(src)) @@ -275,3 +275,48 @@ func ParseUntypedBinaryRange(src []byte) (*UntypedBinaryRange, error) { return ubr, nil } + +// Range is a generic range type. +type Range[T any] struct { + Lower T + Upper T + LowerType BoundType + UpperType BoundType + Valid bool +} + +func (r Range[T]) IsNull() bool { + return !r.Valid +} + +func (r Range[T]) BoundTypes() (lower, upper BoundType) { + return r.LowerType, r.UpperType +} + +func (r Range[T]) Bounds() (lower, upper any) { + return &r.Lower, &r.Upper +} + +func (r *Range[T]) ScanNull() error { + *r = Range[T]{} + return nil +} + +func (r *Range[T]) ScanBounds() (lowerTarget, upperTarget any) { + return &r.Lower, &r.Upper +} + +func (r *Range[T]) SetBoundTypes(lower, upper BoundType) error { + if lower == Unbounded || lower == Empty { + var zero T + r.Lower = zero + } + if upper == Unbounded || upper == Empty { + var zero T + r.Upper = zero + } + r.LowerType = lower + r.UpperType = upper + r.Valid = true + return nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/range_codec.go b/vendor/github.com/jackc/pgx/v5/pgtype/range_codec.go new file mode 100644 index 00000000..684f1bf7 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/range_codec.go @@ -0,0 +1,379 @@ +package pgtype + +import ( + "database/sql/driver" + "fmt" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +// RangeValuer is a type that can be converted into a PostgreSQL range. +type RangeValuer interface { + // IsNull returns true if the value is SQL NULL. + IsNull() bool + + // BoundTypes returns the lower and upper bound types. + BoundTypes() (lower, upper BoundType) + + // Bounds returns the lower and upper range values. + Bounds() (lower, upper any) +} + +// RangeScanner is a type can be scanned from a PostgreSQL range. +type RangeScanner interface { + // ScanNull sets the value to SQL NULL. + ScanNull() error + + // ScanBounds returns values usable as a scan target. The returned values may not be scanned if the range is empty or + // the bound type is unbounded. + ScanBounds() (lowerTarget, upperTarget any) + + // SetBoundTypes sets the lower and upper bound types. ScanBounds will be called and the returned values scanned + // (if appropriate) before SetBoundTypes is called. If the bound types are unbounded or empty this method must + // also set the bound values. + SetBoundTypes(lower, upper BoundType) error +} + +// RangeCodec is a codec for any range type. +type RangeCodec struct { + ElementType *Type +} + +func (c *RangeCodec) FormatSupported(format int16) bool { + return c.ElementType.Codec.FormatSupported(format) +} + +func (c *RangeCodec) PreferredFormat() int16 { + if c.FormatSupported(BinaryFormatCode) { + return BinaryFormatCode + } + return TextFormatCode +} + +func (c *RangeCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(RangeValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return &encodePlanRangeCodecRangeValuerToBinary{rc: c, m: m} + case TextFormatCode: + return &encodePlanRangeCodecRangeValuerToText{rc: c, m: m} + } + + return nil +} + +type encodePlanRangeCodecRangeValuerToBinary struct { + rc *RangeCodec + m *Map +} + +func (plan *encodePlanRangeCodecRangeValuerToBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + getter := value.(RangeValuer) + + if getter.IsNull() { + return nil, nil + } + + lowerType, upperType := getter.BoundTypes() + lower, upper := getter.Bounds() + + var rangeType byte + switch lowerType { + case Inclusive: + rangeType |= lowerInclusiveMask + case Unbounded: + rangeType |= lowerUnboundedMask + case Exclusive: + case Empty: + return append(buf, emptyMask), nil + default: + return nil, fmt.Errorf("unknown LowerType: %v", lowerType) + } + + switch upperType { + case Inclusive: + rangeType |= upperInclusiveMask + case Unbounded: + rangeType |= upperUnboundedMask + case Exclusive: + default: + return nil, fmt.Errorf("unknown UpperType: %v", upperType) + } + + buf = append(buf, rangeType) + + if lowerType != Unbounded { + if lower == nil { + return nil, fmt.Errorf("Lower cannot be NULL unless LowerType is Unbounded") + } + + sp := len(buf) + buf = pgio.AppendInt32(buf, -1) + + lowerPlan := plan.m.PlanEncode(plan.rc.ElementType.OID, BinaryFormatCode, lower) + if lowerPlan == nil { + return nil, fmt.Errorf("cannot encode %v as element of range", lower) + } + + buf, err = lowerPlan.Encode(lower, buf) + if err != nil { + return nil, fmt.Errorf("failed to encode %v as element of range: %w", lower, err) + } + if buf == nil { + return nil, fmt.Errorf("Lower cannot be NULL unless LowerType is Unbounded") + } + + pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) + } + + if upperType != Unbounded { + if upper == nil { + return nil, fmt.Errorf("Upper cannot be NULL unless UpperType is Unbounded") + } + + sp := len(buf) + buf = pgio.AppendInt32(buf, -1) + + upperPlan := plan.m.PlanEncode(plan.rc.ElementType.OID, BinaryFormatCode, upper) + if upperPlan == nil { + return nil, fmt.Errorf("cannot encode %v as element of range", upper) + } + + buf, err = upperPlan.Encode(upper, buf) + if err != nil { + return nil, fmt.Errorf("failed to encode %v as element of range: %w", upper, err) + } + if buf == nil { + return nil, fmt.Errorf("Upper cannot be NULL unless UpperType is Unbounded") + } + + pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) + } + + return buf, nil +} + +type encodePlanRangeCodecRangeValuerToText struct { + rc *RangeCodec + m *Map +} + +func (plan *encodePlanRangeCodecRangeValuerToText) Encode(value any, buf []byte) (newBuf []byte, err error) { + getter := value.(RangeValuer) + + if getter.IsNull() { + return nil, nil + } + + lowerType, upperType := getter.BoundTypes() + lower, upper := getter.Bounds() + + switch lowerType { + case Exclusive, Unbounded: + buf = append(buf, '(') + case Inclusive: + buf = append(buf, '[') + case Empty: + return append(buf, "empty"...), nil + default: + return nil, fmt.Errorf("unknown lower bound type %v", lowerType) + } + + if lowerType != Unbounded { + if lower == nil { + return nil, fmt.Errorf("Lower cannot be NULL unless LowerType is Unbounded") + } + + lowerPlan := plan.m.PlanEncode(plan.rc.ElementType.OID, TextFormatCode, lower) + if lowerPlan == nil { + return nil, fmt.Errorf("cannot encode %v as element of range", lower) + } + + buf, err = lowerPlan.Encode(lower, buf) + if err != nil { + return nil, fmt.Errorf("failed to encode %v as element of range: %w", lower, err) + } + if buf == nil { + return nil, fmt.Errorf("Lower cannot be NULL unless LowerType is Unbounded") + } + } + + buf = append(buf, ',') + + if upperType != Unbounded { + if upper == nil { + return nil, fmt.Errorf("Upper cannot be NULL unless UpperType is Unbounded") + } + + upperPlan := plan.m.PlanEncode(plan.rc.ElementType.OID, TextFormatCode, upper) + if upperPlan == nil { + return nil, fmt.Errorf("cannot encode %v as element of range", upper) + } + + buf, err = upperPlan.Encode(upper, buf) + if err != nil { + return nil, fmt.Errorf("failed to encode %v as element of range: %w", upper, err) + } + if buf == nil { + return nil, fmt.Errorf("Upper cannot be NULL unless UpperType is Unbounded") + } + } + + switch upperType { + case Exclusive, Unbounded: + buf = append(buf, ')') + case Inclusive: + buf = append(buf, ']') + default: + return nil, fmt.Errorf("unknown upper bound type %v", upperType) + } + + return buf, nil +} + +func (c *RangeCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case BinaryFormatCode: + switch target.(type) { + case RangeScanner: + return &scanPlanBinaryRangeToRangeScanner{rc: c, m: m} + } + case TextFormatCode: + switch target.(type) { + case RangeScanner: + return &scanPlanTextRangeToRangeScanner{rc: c, m: m} + } + } + + return nil +} + +type scanPlanBinaryRangeToRangeScanner struct { + rc *RangeCodec + m *Map +} + +func (plan *scanPlanBinaryRangeToRangeScanner) Scan(src []byte, target any) error { + rangeScanner := (target).(RangeScanner) + + if src == nil { + return rangeScanner.ScanNull() + } + + ubr, err := parseUntypedBinaryRange(src) + if err != nil { + return err + } + + if ubr.LowerType == Empty { + return rangeScanner.SetBoundTypes(ubr.LowerType, ubr.UpperType) + } + + lowerTarget, upperTarget := rangeScanner.ScanBounds() + + if ubr.LowerType == Inclusive || ubr.LowerType == Exclusive { + lowerPlan := plan.m.PlanScan(plan.rc.ElementType.OID, BinaryFormatCode, lowerTarget) + if lowerPlan == nil { + return fmt.Errorf("cannot scan into %v from range element", lowerTarget) + } + + err = lowerPlan.Scan(ubr.Lower, lowerTarget) + if err != nil { + return fmt.Errorf("cannot scan into %v from range element: %w", lowerTarget, err) + } + } + + if ubr.UpperType == Inclusive || ubr.UpperType == Exclusive { + upperPlan := plan.m.PlanScan(plan.rc.ElementType.OID, BinaryFormatCode, upperTarget) + if upperPlan == nil { + return fmt.Errorf("cannot scan into %v from range element", upperTarget) + } + + err = upperPlan.Scan(ubr.Upper, upperTarget) + if err != nil { + return fmt.Errorf("cannot scan into %v from range element: %w", upperTarget, err) + } + } + + return rangeScanner.SetBoundTypes(ubr.LowerType, ubr.UpperType) +} + +type scanPlanTextRangeToRangeScanner struct { + rc *RangeCodec + m *Map +} + +func (plan *scanPlanTextRangeToRangeScanner) Scan(src []byte, target any) error { + rangeScanner := (target).(RangeScanner) + + if src == nil { + return rangeScanner.ScanNull() + } + + utr, err := parseUntypedTextRange(string(src)) + if err != nil { + return err + } + + if utr.LowerType == Empty { + return rangeScanner.SetBoundTypes(utr.LowerType, utr.UpperType) + } + + lowerTarget, upperTarget := rangeScanner.ScanBounds() + + if utr.LowerType == Inclusive || utr.LowerType == Exclusive { + lowerPlan := plan.m.PlanScan(plan.rc.ElementType.OID, TextFormatCode, lowerTarget) + if lowerPlan == nil { + return fmt.Errorf("cannot scan into %v from range element", lowerTarget) + } + + err = lowerPlan.Scan([]byte(utr.Lower), lowerTarget) + if err != nil { + return fmt.Errorf("cannot scan into %v from range element: %w", lowerTarget, err) + } + } + + if utr.UpperType == Inclusive || utr.UpperType == Exclusive { + upperPlan := plan.m.PlanScan(plan.rc.ElementType.OID, TextFormatCode, upperTarget) + if upperPlan == nil { + return fmt.Errorf("cannot scan into %v from range element", upperTarget) + } + + err = upperPlan.Scan([]byte(utr.Upper), upperTarget) + if err != nil { + return fmt.Errorf("cannot scan into %v from range element: %w", upperTarget, err) + } + } + + return rangeScanner.SetBoundTypes(utr.LowerType, utr.UpperType) +} + +func (c *RangeCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + switch format { + case TextFormatCode: + return string(src), nil + case BinaryFormatCode: + buf := make([]byte, len(src)) + copy(buf, src) + return buf, nil + default: + return nil, fmt.Errorf("unknown format code %d", format) + } +} + +func (c *RangeCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var r Range[any] + err := c.PlanScan(m, oid, format, &r).Scan(src, &r) + return r, err +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/record_codec.go b/vendor/github.com/jackc/pgx/v5/pgtype/record_codec.go new file mode 100644 index 00000000..b3b16604 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/record_codec.go @@ -0,0 +1,125 @@ +package pgtype + +import ( + "database/sql/driver" + "fmt" +) + +// ArrayGetter is a type that can be converted into a PostgreSQL array. + +// RecordCodec is a codec for the generic PostgreSQL record type such as is created with the "row" function. Record can +// only decode the binary format. The text format output format from PostgreSQL does not include type information and +// is therefore impossible to decode. Encoding is impossible because PostgreSQL does not support input of generic +// records. +type RecordCodec struct{} + +func (RecordCodec) FormatSupported(format int16) bool { + return format == BinaryFormatCode +} + +func (RecordCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (RecordCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + return nil +} + +func (RecordCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + if format == BinaryFormatCode { + switch target.(type) { + case CompositeIndexScanner: + return &scanPlanBinaryRecordToCompositeIndexScanner{m: m} + } + } + + return nil +} + +type scanPlanBinaryRecordToCompositeIndexScanner struct { + m *Map +} + +func (plan *scanPlanBinaryRecordToCompositeIndexScanner) Scan(src []byte, target any) error { + targetScanner := (target).(CompositeIndexScanner) + + if src == nil { + return targetScanner.ScanNull() + } + + scanner := NewCompositeBinaryScanner(plan.m, src) + for i := 0; scanner.Next(); i++ { + fieldTarget := targetScanner.ScanIndex(i) + if fieldTarget != nil { + fieldPlan := plan.m.PlanScan(scanner.OID(), BinaryFormatCode, fieldTarget) + if fieldPlan == nil { + return fmt.Errorf("unable to scan OID %d in binary format into %v", scanner.OID(), fieldTarget) + } + + err := fieldPlan.Scan(scanner.Bytes(), fieldTarget) + if err != nil { + return err + } + } + } + + if err := scanner.Err(); err != nil { + return err + } + + return nil +} + +func (RecordCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + switch format { + case TextFormatCode: + return string(src), nil + case BinaryFormatCode: + buf := make([]byte, len(src)) + copy(buf, src) + return buf, nil + default: + return nil, fmt.Errorf("unknown format code %d", format) + } +} + +func (RecordCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + switch format { + case TextFormatCode: + return string(src), nil + case BinaryFormatCode: + scanner := NewCompositeBinaryScanner(m, src) + values := make([]any, scanner.FieldCount()) + for i := 0; scanner.Next(); i++ { + var v any + fieldPlan := m.PlanScan(scanner.OID(), BinaryFormatCode, &v) + if fieldPlan == nil { + return nil, fmt.Errorf("unable to scan OID %d in binary format into %v", scanner.OID(), v) + } + + err := fieldPlan.Scan(scanner.Bytes(), &v) + if err != nil { + return nil, err + } + + values[i] = v + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return values, nil + default: + return nil, fmt.Errorf("unknown format code %d", format) + } + +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/register_default_pg_types.go b/vendor/github.com/jackc/pgx/v5/pgtype/register_default_pg_types.go new file mode 100644 index 00000000..be1ca4a1 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/register_default_pg_types.go @@ -0,0 +1,35 @@ +//go:build !nopgxregisterdefaulttypes + +package pgtype + +func registerDefaultPgTypeVariants[T any](m *Map, name string) { + arrayName := "_" + name + + var value T + m.RegisterDefaultPgType(value, name) // T + m.RegisterDefaultPgType(&value, name) // *T + + var sliceT []T + m.RegisterDefaultPgType(sliceT, arrayName) // []T + m.RegisterDefaultPgType(&sliceT, arrayName) // *[]T + + var slicePtrT []*T + m.RegisterDefaultPgType(slicePtrT, arrayName) // []*T + m.RegisterDefaultPgType(&slicePtrT, arrayName) // *[]*T + + var arrayOfT Array[T] + m.RegisterDefaultPgType(arrayOfT, arrayName) // Array[T] + m.RegisterDefaultPgType(&arrayOfT, arrayName) // *Array[T] + + var arrayOfPtrT Array[*T] + m.RegisterDefaultPgType(arrayOfPtrT, arrayName) // Array[*T] + m.RegisterDefaultPgType(&arrayOfPtrT, arrayName) // *Array[*T] + + var flatArrayOfT FlatArray[T] + m.RegisterDefaultPgType(flatArrayOfT, arrayName) // FlatArray[T] + m.RegisterDefaultPgType(&flatArrayOfT, arrayName) // *FlatArray[T] + + var flatArrayOfPtrT FlatArray[*T] + m.RegisterDefaultPgType(flatArrayOfPtrT, arrayName) // FlatArray[*T] + m.RegisterDefaultPgType(&flatArrayOfPtrT, arrayName) // *FlatArray[*T] +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/register_default_pg_types_disabled.go b/vendor/github.com/jackc/pgx/v5/pgtype/register_default_pg_types_disabled.go new file mode 100644 index 00000000..56fe7c22 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/register_default_pg_types_disabled.go @@ -0,0 +1,6 @@ +//go:build nopgxregisterdefaulttypes + +package pgtype + +func registerDefaultPgTypeVariants[T any](m *Map, name string) { +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/text.go b/vendor/github.com/jackc/pgx/v5/pgtype/text.go new file mode 100644 index 00000000..021ee331 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/text.go @@ -0,0 +1,223 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/json" + "fmt" +) + +type TextScanner interface { + ScanText(v Text) error +} + +type TextValuer interface { + TextValue() (Text, error) +} + +type Text struct { + String string + Valid bool +} + +func (t *Text) ScanText(v Text) error { + *t = v + return nil +} + +func (t Text) TextValue() (Text, error) { + return t, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Text) Scan(src any) error { + if src == nil { + *dst = Text{} + return nil + } + + switch src := src.(type) { + case string: + *dst = Text{String: src, Valid: true} + return nil + case []byte: + *dst = Text{String: string(src), Valid: true} + return nil + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src Text) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + return src.String, nil +} + +func (src Text) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + + return json.Marshal(src.String) +} + +func (dst *Text) UnmarshalJSON(b []byte) error { + var s *string + err := json.Unmarshal(b, &s) + if err != nil { + return err + } + + if s == nil { + *dst = Text{} + } else { + *dst = Text{String: *s, Valid: true} + } + + return nil +} + +type TextCodec struct{} + +func (TextCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (TextCodec) PreferredFormat() int16 { + return TextFormatCode +} + +func (TextCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case TextFormatCode, BinaryFormatCode: + switch value.(type) { + case string: + return encodePlanTextCodecString{} + case []byte: + return encodePlanTextCodecByteSlice{} + case TextValuer: + return encodePlanTextCodecTextValuer{} + } + } + + return nil +} + +type encodePlanTextCodecString struct{} + +func (encodePlanTextCodecString) Encode(value any, buf []byte) (newBuf []byte, err error) { + s := value.(string) + buf = append(buf, s...) + return buf, nil +} + +type encodePlanTextCodecByteSlice struct{} + +func (encodePlanTextCodecByteSlice) Encode(value any, buf []byte) (newBuf []byte, err error) { + s := value.([]byte) + buf = append(buf, s...) + return buf, nil +} + +type encodePlanTextCodecStringer struct{} + +func (encodePlanTextCodecStringer) Encode(value any, buf []byte) (newBuf []byte, err error) { + s := value.(fmt.Stringer) + buf = append(buf, s.String()...) + return buf, nil +} + +type encodePlanTextCodecTextValuer struct{} + +func (encodePlanTextCodecTextValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + text, err := value.(TextValuer).TextValue() + if err != nil { + return nil, err + } + + if !text.Valid { + return nil, nil + } + + buf = append(buf, text.String...) + return buf, nil +} + +func (TextCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case TextFormatCode, BinaryFormatCode: + switch target.(type) { + case *string: + return scanPlanTextAnyToString{} + case *[]byte: + return scanPlanAnyToNewByteSlice{} + case BytesScanner: + return scanPlanAnyToByteScanner{} + case TextScanner: + return scanPlanTextAnyToTextScanner{} + } + } + + return nil +} + +func (c TextCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return c.DecodeValue(m, oid, format, src) +} + +func (c TextCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + return string(src), nil +} + +type scanPlanTextAnyToString struct{} + +func (scanPlanTextAnyToString) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + p := (dst).(*string) + *p = string(src) + + return nil +} + +type scanPlanAnyToNewByteSlice struct{} + +func (scanPlanAnyToNewByteSlice) Scan(src []byte, dst any) error { + p := (dst).(*[]byte) + if src == nil { + *p = nil + } else { + *p = make([]byte, len(src)) + copy(*p, src) + } + + return nil +} + +type scanPlanAnyToByteScanner struct{} + +func (scanPlanAnyToByteScanner) Scan(src []byte, dst any) error { + p := (dst).(BytesScanner) + return p.ScanBytes(src) +} + +type scanPlanTextAnyToTextScanner struct{} + +func (scanPlanTextAnyToTextScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TextScanner) + + if src == nil { + return scanner.ScanText(Text{}) + } + + return scanner.ScanText(Text{String: string(src), Valid: true}) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/text_format_only_codec.go b/vendor/github.com/jackc/pgx/v5/pgtype/text_format_only_codec.go new file mode 100644 index 00000000..d5e4cdb3 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/text_format_only_codec.go @@ -0,0 +1,13 @@ +package pgtype + +type TextFormatOnlyCodec struct { + Codec +} + +func (c *TextFormatOnlyCodec) FormatSupported(format int16) bool { + return format == TextFormatCode && c.Codec.FormatSupported(format) +} + +func (TextFormatOnlyCodec) PreferredFormat() int16 { + return TextFormatCode +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/tid.go b/vendor/github.com/jackc/pgx/v5/pgtype/tid.go new file mode 100644 index 00000000..9bc2c2a1 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/tid.go @@ -0,0 +1,241 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "strconv" + "strings" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type TIDScanner interface { + ScanTID(v TID) error +} + +type TIDValuer interface { + TIDValue() (TID, error) +} + +// TID is PostgreSQL's Tuple Identifier type. +// +// When one does +// +// select ctid, * from some_table; +// +// it is the data type of the ctid hidden system column. +// +// It is currently implemented as a pair unsigned two byte integers. +// Its conversion functions can be found in src/backend/utils/adt/tid.c +// in the PostgreSQL sources. +type TID struct { + BlockNumber uint32 + OffsetNumber uint16 + Valid bool +} + +func (b *TID) ScanTID(v TID) error { + *b = v + return nil +} + +func (b TID) TIDValue() (TID, error) { + return b, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *TID) Scan(src any) error { + if src == nil { + *dst = TID{} + return nil + } + + switch src := src.(type) { + case string: + return scanPlanTextAnyToTIDScanner{}.Scan([]byte(src), dst) + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src TID) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + + buf, err := TIDCodec{}.PlanEncode(nil, 0, TextFormatCode, src).Encode(src, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type TIDCodec struct{} + +func (TIDCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (TIDCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (TIDCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(TIDValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanTIDCodecBinary{} + case TextFormatCode: + return encodePlanTIDCodecText{} + } + + return nil +} + +type encodePlanTIDCodecBinary struct{} + +func (encodePlanTIDCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + tid, err := value.(TIDValuer).TIDValue() + if err != nil { + return nil, err + } + + if !tid.Valid { + return nil, nil + } + + buf = pgio.AppendUint32(buf, tid.BlockNumber) + buf = pgio.AppendUint16(buf, tid.OffsetNumber) + return buf, nil +} + +type encodePlanTIDCodecText struct{} + +func (encodePlanTIDCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + tid, err := value.(TIDValuer).TIDValue() + if err != nil { + return nil, err + } + + if !tid.Valid { + return nil, nil + } + + buf = append(buf, fmt.Sprintf(`(%d,%d)`, tid.BlockNumber, tid.OffsetNumber)...) + return buf, nil +} + +func (TIDCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case TIDScanner: + return scanPlanBinaryTIDToTIDScanner{} + case TextScanner: + return scanPlanBinaryTIDToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case TIDScanner: + return scanPlanTextAnyToTIDScanner{} + } + } + + return nil +} + +type scanPlanBinaryTIDToTIDScanner struct{} + +func (scanPlanBinaryTIDToTIDScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TIDScanner) + + if src == nil { + return scanner.ScanTID(TID{}) + } + + if len(src) != 6 { + return fmt.Errorf("invalid length for tid: %v", len(src)) + } + + return scanner.ScanTID(TID{ + BlockNumber: binary.BigEndian.Uint32(src), + OffsetNumber: binary.BigEndian.Uint16(src[4:]), + Valid: true, + }) +} + +type scanPlanBinaryTIDToTextScanner struct{} + +func (scanPlanBinaryTIDToTextScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TextScanner) + + if src == nil { + return scanner.ScanText(Text{}) + } + + if len(src) != 6 { + return fmt.Errorf("invalid length for tid: %v", len(src)) + } + + blockNumber := binary.BigEndian.Uint32(src) + offsetNumber := binary.BigEndian.Uint16(src[4:]) + + return scanner.ScanText(Text{ + String: fmt.Sprintf(`(%d,%d)`, blockNumber, offsetNumber), + Valid: true, + }) +} + +type scanPlanTextAnyToTIDScanner struct{} + +func (scanPlanTextAnyToTIDScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TIDScanner) + + if src == nil { + return scanner.ScanTID(TID{}) + } + + if len(src) < 5 { + return fmt.Errorf("invalid length for tid: %v", len(src)) + } + + block, offset, found := strings.Cut(string(src[1:len(src)-1]), ",") + if !found { + return fmt.Errorf("invalid format for tid") + } + + blockNumber, err := strconv.ParseUint(block, 10, 32) + if err != nil { + return err + } + + offsetNumber, err := strconv.ParseUint(offset, 10, 16) + if err != nil { + return err + } + + return scanner.ScanTID(TID{BlockNumber: uint32(blockNumber), OffsetNumber: uint16(offsetNumber), Valid: true}) +} + +func (c TIDCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c TIDCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var tid TID + err := codecScan(c, m, oid, format, src, &tid) + if err != nil { + return nil, err + } + return tid, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/time.go b/vendor/github.com/jackc/pgx/v5/pgtype/time.go new file mode 100644 index 00000000..f8fd9489 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/time.go @@ -0,0 +1,274 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "strconv" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type TimeScanner interface { + ScanTime(v Time) error +} + +type TimeValuer interface { + TimeValue() (Time, error) +} + +// Time represents the PostgreSQL time type. The PostgreSQL time is a time of day without time zone. +// +// Time is represented as the number of microseconds since midnight in the same way that PostgreSQL does. Other time and +// date types in pgtype can use time.Time as the underlying representation. However, pgtype.Time type cannot due to +// needing to handle 24:00:00. time.Time converts that to 00:00:00 on the following day. +// +// The time with time zone type is not supported. Use of time with time zone is discouraged by the PostgreSQL documentation. +type Time struct { + Microseconds int64 // Number of microseconds since midnight + Valid bool +} + +func (t *Time) ScanTime(v Time) error { + *t = v + return nil +} + +func (t Time) TimeValue() (Time, error) { + return t, nil +} + +// Scan implements the database/sql Scanner interface. +func (t *Time) Scan(src any) error { + if src == nil { + *t = Time{} + return nil + } + + switch src := src.(type) { + case string: + err := scanPlanTextAnyToTimeScanner{}.Scan([]byte(src), t) + if err != nil { + t.Microseconds = 0 + t.Valid = false + } + return err + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (t Time) Value() (driver.Value, error) { + if !t.Valid { + return nil, nil + } + + buf, err := TimeCodec{}.PlanEncode(nil, 0, TextFormatCode, t).Encode(t, nil) + if err != nil { + return nil, err + } + return string(buf), err +} + +type TimeCodec struct{} + +func (TimeCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (TimeCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (TimeCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(TimeValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanTimeCodecBinary{} + case TextFormatCode: + return encodePlanTimeCodecText{} + } + + return nil +} + +type encodePlanTimeCodecBinary struct{} + +func (encodePlanTimeCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + t, err := value.(TimeValuer).TimeValue() + if err != nil { + return nil, err + } + + if !t.Valid { + return nil, nil + } + + return pgio.AppendInt64(buf, t.Microseconds), nil +} + +type encodePlanTimeCodecText struct{} + +func (encodePlanTimeCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + t, err := value.(TimeValuer).TimeValue() + if err != nil { + return nil, err + } + + if !t.Valid { + return nil, nil + } + + usec := t.Microseconds + hours := usec / microsecondsPerHour + usec -= hours * microsecondsPerHour + minutes := usec / microsecondsPerMinute + usec -= minutes * microsecondsPerMinute + seconds := usec / microsecondsPerSecond + usec -= seconds * microsecondsPerSecond + + s := fmt.Sprintf("%02d:%02d:%02d.%06d", hours, minutes, seconds, usec) + + return append(buf, s...), nil +} + +func (TimeCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case TimeScanner: + return scanPlanBinaryTimeToTimeScanner{} + case TextScanner: + return scanPlanBinaryTimeToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case TimeScanner: + return scanPlanTextAnyToTimeScanner{} + } + } + + return nil +} + +type scanPlanBinaryTimeToTimeScanner struct{} + +func (scanPlanBinaryTimeToTimeScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TimeScanner) + + if src == nil { + return scanner.ScanTime(Time{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for time: %v", len(src)) + } + + usec := int64(binary.BigEndian.Uint64(src)) + + return scanner.ScanTime(Time{Microseconds: usec, Valid: true}) +} + +type scanPlanBinaryTimeToTextScanner struct{} + +func (scanPlanBinaryTimeToTextScanner) Scan(src []byte, dst any) error { + ts, ok := (dst).(TextScanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return ts.ScanText(Text{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for time: %v", len(src)) + } + + usec := int64(binary.BigEndian.Uint64(src)) + + tim := Time{Microseconds: usec, Valid: true} + + buf, err := TimeCodec{}.PlanEncode(nil, 0, TextFormatCode, tim).Encode(tim, nil) + if err != nil { + return err + } + + return ts.ScanText(Text{String: string(buf), Valid: true}) +} + +type scanPlanTextAnyToTimeScanner struct{} + +func (scanPlanTextAnyToTimeScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TimeScanner) + + if src == nil { + return scanner.ScanTime(Time{}) + } + + s := string(src) + + if len(s) < 8 || s[2] != ':' || s[5] != ':' { + return fmt.Errorf("cannot decode %v into Time", s) + } + + hours, err := strconv.ParseInt(s[0:2], 10, 64) + if err != nil { + return fmt.Errorf("cannot decode %v into Time", s) + } + usec := hours * microsecondsPerHour + + minutes, err := strconv.ParseInt(s[3:5], 10, 64) + if err != nil { + return fmt.Errorf("cannot decode %v into Time", s) + } + usec += minutes * microsecondsPerMinute + + seconds, err := strconv.ParseInt(s[6:8], 10, 64) + if err != nil { + return fmt.Errorf("cannot decode %v into Time", s) + } + usec += seconds * microsecondsPerSecond + + if len(s) > 9 { + if s[8] != '.' || len(s) > 15 { + return fmt.Errorf("cannot decode %v into Time", s) + } + + fraction := s[9:] + n, err := strconv.ParseInt(fraction, 10, 64) + if err != nil { + return fmt.Errorf("cannot decode %v into Time", s) + } + + for i := len(fraction); i < 6; i++ { + n *= 10 + } + + usec += n + } + + return scanner.ScanTime(Time{Microseconds: usec, Valid: true}) +} + +func (c TimeCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + return codecDecodeToTextFormat(c, m, oid, format, src) +} + +func (c TimeCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var t Time + err := codecScan(c, m, oid, format, src, &t) + if err != nil { + return nil, err + } + return t, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/timestamp.go b/vendor/github.com/jackc/pgx/v5/pgtype/timestamp.go new file mode 100644 index 00000000..677a2c6e --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/timestamp.go @@ -0,0 +1,356 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +const pgTimestampFormat = "2006-01-02 15:04:05.999999999" + +type TimestampScanner interface { + ScanTimestamp(v Timestamp) error +} + +type TimestampValuer interface { + TimestampValue() (Timestamp, error) +} + +// Timestamp represents the PostgreSQL timestamp type. +type Timestamp struct { + Time time.Time // Time zone will be ignored when encoding to PostgreSQL. + InfinityModifier InfinityModifier + Valid bool +} + +func (ts *Timestamp) ScanTimestamp(v Timestamp) error { + *ts = v + return nil +} + +func (ts Timestamp) TimestampValue() (Timestamp, error) { + return ts, nil +} + +// Scan implements the database/sql Scanner interface. +func (ts *Timestamp) Scan(src any) error { + if src == nil { + *ts = Timestamp{} + return nil + } + + switch src := src.(type) { + case string: + return (&scanPlanTextTimestampToTimestampScanner{}).Scan([]byte(src), ts) + case time.Time: + *ts = Timestamp{Time: src, Valid: true} + return nil + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (ts Timestamp) Value() (driver.Value, error) { + if !ts.Valid { + return nil, nil + } + + if ts.InfinityModifier != Finite { + return ts.InfinityModifier.String(), nil + } + return ts.Time, nil +} + +func (ts Timestamp) MarshalJSON() ([]byte, error) { + if !ts.Valid { + return []byte("null"), nil + } + + var s string + + switch ts.InfinityModifier { + case Finite: + s = ts.Time.Format(time.RFC3339Nano) + case Infinity: + s = "infinity" + case NegativeInfinity: + s = "-infinity" + } + + return json.Marshal(s) +} + +func (ts *Timestamp) UnmarshalJSON(b []byte) error { + var s *string + err := json.Unmarshal(b, &s) + if err != nil { + return err + } + + if s == nil { + *ts = Timestamp{} + return nil + } + + switch *s { + case "infinity": + *ts = Timestamp{Valid: true, InfinityModifier: Infinity} + case "-infinity": + *ts = Timestamp{Valid: true, InfinityModifier: -Infinity} + default: + // PostgreSQL uses ISO 8601 for to_json function and casting from a string to timestamptz + tim, err := time.Parse(time.RFC3339Nano, *s) + if err != nil { + return err + } + + *ts = Timestamp{Time: tim, Valid: true} + } + + return nil +} + +type TimestampCodec struct { + // ScanLocation is the location that the time is assumed to be in for scanning. This is different from + // TimestamptzCodec.ScanLocation in that this setting does change the instant in time that the timestamp represents. + ScanLocation *time.Location +} + +func (*TimestampCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (*TimestampCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (*TimestampCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(TimestampValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanTimestampCodecBinary{} + case TextFormatCode: + return encodePlanTimestampCodecText{} + } + + return nil +} + +type encodePlanTimestampCodecBinary struct{} + +func (encodePlanTimestampCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + ts, err := value.(TimestampValuer).TimestampValue() + if err != nil { + return nil, err + } + + if !ts.Valid { + return nil, nil + } + + var microsecSinceY2K int64 + switch ts.InfinityModifier { + case Finite: + t := discardTimeZone(ts.Time) + microsecSinceUnixEpoch := t.Unix()*1000000 + int64(t.Nanosecond())/1000 + microsecSinceY2K = microsecSinceUnixEpoch - microsecFromUnixEpochToY2K + case Infinity: + microsecSinceY2K = infinityMicrosecondOffset + case NegativeInfinity: + microsecSinceY2K = negativeInfinityMicrosecondOffset + } + + buf = pgio.AppendInt64(buf, microsecSinceY2K) + + return buf, nil +} + +type encodePlanTimestampCodecText struct{} + +func (encodePlanTimestampCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + ts, err := value.(TimestampValuer).TimestampValue() + if err != nil { + return nil, err + } + + if !ts.Valid { + return nil, nil + } + + var s string + + switch ts.InfinityModifier { + case Finite: + t := discardTimeZone(ts.Time) + + // Year 0000 is 1 BC + bc := false + if year := t.Year(); year <= 0 { + year = -year + 1 + t = time.Date(year, t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), time.UTC) + bc = true + } + + s = t.Truncate(time.Microsecond).Format(pgTimestampFormat) + + if bc { + s = s + " BC" + } + case Infinity: + s = "infinity" + case NegativeInfinity: + s = "-infinity" + } + + buf = append(buf, s...) + + return buf, nil +} + +func discardTimeZone(t time.Time) time.Time { + if t.Location() != time.UTC { + return time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), time.UTC) + } + + return t +} + +func (c *TimestampCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case TimestampScanner: + return &scanPlanBinaryTimestampToTimestampScanner{location: c.ScanLocation} + } + case TextFormatCode: + switch target.(type) { + case TimestampScanner: + return &scanPlanTextTimestampToTimestampScanner{location: c.ScanLocation} + } + } + + return nil +} + +type scanPlanBinaryTimestampToTimestampScanner struct{ location *time.Location } + +func (plan *scanPlanBinaryTimestampToTimestampScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TimestampScanner) + + if src == nil { + return scanner.ScanTimestamp(Timestamp{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for timestamp: %v", len(src)) + } + + var ts Timestamp + microsecSinceY2K := int64(binary.BigEndian.Uint64(src)) + + switch microsecSinceY2K { + case infinityMicrosecondOffset: + ts = Timestamp{Valid: true, InfinityModifier: Infinity} + case negativeInfinityMicrosecondOffset: + ts = Timestamp{Valid: true, InfinityModifier: -Infinity} + default: + tim := time.Unix( + microsecFromUnixEpochToY2K/1000000+microsecSinceY2K/1000000, + (microsecFromUnixEpochToY2K%1000000*1000)+(microsecSinceY2K%1000000*1000), + ).UTC() + if plan.location != nil { + tim = time.Date(tim.Year(), tim.Month(), tim.Day(), tim.Hour(), tim.Minute(), tim.Second(), tim.Nanosecond(), plan.location) + } + ts = Timestamp{Time: tim, Valid: true} + } + + return scanner.ScanTimestamp(ts) +} + +type scanPlanTextTimestampToTimestampScanner struct{ location *time.Location } + +func (plan *scanPlanTextTimestampToTimestampScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TimestampScanner) + + if src == nil { + return scanner.ScanTimestamp(Timestamp{}) + } + + var ts Timestamp + sbuf := string(src) + switch sbuf { + case "infinity": + ts = Timestamp{Valid: true, InfinityModifier: Infinity} + case "-infinity": + ts = Timestamp{Valid: true, InfinityModifier: -Infinity} + default: + bc := false + if strings.HasSuffix(sbuf, " BC") { + sbuf = sbuf[:len(sbuf)-3] + bc = true + } + tim, err := time.Parse(pgTimestampFormat, sbuf) + if err != nil { + return err + } + + if bc { + year := -tim.Year() + 1 + tim = time.Date(year, tim.Month(), tim.Day(), tim.Hour(), tim.Minute(), tim.Second(), tim.Nanosecond(), tim.Location()) + } + + if plan.location != nil { + tim = time.Date(tim.Year(), tim.Month(), tim.Day(), tim.Hour(), tim.Minute(), tim.Second(), tim.Nanosecond(), plan.location) + } + + ts = Timestamp{Time: tim, Valid: true} + } + + return scanner.ScanTimestamp(ts) +} + +func (c *TimestampCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var ts Timestamp + err := codecScan(c, m, oid, format, src, &ts) + if err != nil { + return nil, err + } + + if ts.InfinityModifier != Finite { + return ts.InfinityModifier.String(), nil + } + + return ts.Time, nil +} + +func (c *TimestampCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var ts Timestamp + err := codecScan(c, m, oid, format, src, &ts) + if err != nil { + return nil, err + } + + if ts.InfinityModifier != Finite { + return ts.InfinityModifier, nil + } + + return ts.Time, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/timestamptz.go b/vendor/github.com/jackc/pgx/v5/pgtype/timestamptz.go new file mode 100644 index 00000000..7efbcffd --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/timestamptz.go @@ -0,0 +1,366 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +const pgTimestamptzHourFormat = "2006-01-02 15:04:05.999999999Z07" +const pgTimestamptzMinuteFormat = "2006-01-02 15:04:05.999999999Z07:00" +const pgTimestamptzSecondFormat = "2006-01-02 15:04:05.999999999Z07:00:00" +const microsecFromUnixEpochToY2K = 946684800 * 1000000 + +const ( + negativeInfinityMicrosecondOffset = -9223372036854775808 + infinityMicrosecondOffset = 9223372036854775807 +) + +type TimestamptzScanner interface { + ScanTimestamptz(v Timestamptz) error +} + +type TimestamptzValuer interface { + TimestamptzValue() (Timestamptz, error) +} + +// Timestamptz represents the PostgreSQL timestamptz type. +type Timestamptz struct { + Time time.Time + InfinityModifier InfinityModifier + Valid bool +} + +func (tstz *Timestamptz) ScanTimestamptz(v Timestamptz) error { + *tstz = v + return nil +} + +func (tstz Timestamptz) TimestamptzValue() (Timestamptz, error) { + return tstz, nil +} + +// Scan implements the database/sql Scanner interface. +func (tstz *Timestamptz) Scan(src any) error { + if src == nil { + *tstz = Timestamptz{} + return nil + } + + switch src := src.(type) { + case string: + return (&scanPlanTextTimestamptzToTimestamptzScanner{}).Scan([]byte(src), tstz) + case time.Time: + *tstz = Timestamptz{Time: src, Valid: true} + return nil + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (tstz Timestamptz) Value() (driver.Value, error) { + if !tstz.Valid { + return nil, nil + } + + if tstz.InfinityModifier != Finite { + return tstz.InfinityModifier.String(), nil + } + return tstz.Time, nil +} + +func (tstz Timestamptz) MarshalJSON() ([]byte, error) { + if !tstz.Valid { + return []byte("null"), nil + } + + var s string + + switch tstz.InfinityModifier { + case Finite: + s = tstz.Time.Format(time.RFC3339Nano) + case Infinity: + s = "infinity" + case NegativeInfinity: + s = "-infinity" + } + + return json.Marshal(s) +} + +func (tstz *Timestamptz) UnmarshalJSON(b []byte) error { + var s *string + err := json.Unmarshal(b, &s) + if err != nil { + return err + } + + if s == nil { + *tstz = Timestamptz{} + return nil + } + + switch *s { + case "infinity": + *tstz = Timestamptz{Valid: true, InfinityModifier: Infinity} + case "-infinity": + *tstz = Timestamptz{Valid: true, InfinityModifier: -Infinity} + default: + // PostgreSQL uses ISO 8601 for to_json function and casting from a string to timestamptz + tim, err := time.Parse(time.RFC3339Nano, *s) + if err != nil { + return err + } + + *tstz = Timestamptz{Time: tim, Valid: true} + } + + return nil +} + +type TimestamptzCodec struct { + // ScanLocation is the location to return scanned timestamptz values in. This does not change the instant in time that + // the timestamptz represents. + ScanLocation *time.Location +} + +func (*TimestamptzCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (*TimestamptzCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (*TimestamptzCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(TimestamptzValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanTimestamptzCodecBinary{} + case TextFormatCode: + return encodePlanTimestamptzCodecText{} + } + + return nil +} + +type encodePlanTimestamptzCodecBinary struct{} + +func (encodePlanTimestamptzCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) { + ts, err := value.(TimestamptzValuer).TimestamptzValue() + if err != nil { + return nil, err + } + + if !ts.Valid { + return nil, nil + } + + var microsecSinceY2K int64 + switch ts.InfinityModifier { + case Finite: + microsecSinceUnixEpoch := ts.Time.Unix()*1000000 + int64(ts.Time.Nanosecond())/1000 + microsecSinceY2K = microsecSinceUnixEpoch - microsecFromUnixEpochToY2K + case Infinity: + microsecSinceY2K = infinityMicrosecondOffset + case NegativeInfinity: + microsecSinceY2K = negativeInfinityMicrosecondOffset + } + + buf = pgio.AppendInt64(buf, microsecSinceY2K) + + return buf, nil +} + +type encodePlanTimestamptzCodecText struct{} + +func (encodePlanTimestamptzCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) { + ts, err := value.(TimestamptzValuer).TimestamptzValue() + if err != nil { + return nil, err + } + + if !ts.Valid { + return nil, nil + } + + var s string + + switch ts.InfinityModifier { + case Finite: + + t := ts.Time.UTC().Truncate(time.Microsecond) + + // Year 0000 is 1 BC + bc := false + if year := t.Year(); year <= 0 { + year = -year + 1 + t = time.Date(year, t.Month(), t.Day(), t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), time.UTC) + bc = true + } + + s = t.Format(pgTimestamptzSecondFormat) + + if bc { + s = s + " BC" + } + case Infinity: + s = "infinity" + case NegativeInfinity: + s = "-infinity" + } + + buf = append(buf, s...) + + return buf, nil +} + +func (c *TimestamptzCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case TimestamptzScanner: + return &scanPlanBinaryTimestamptzToTimestamptzScanner{location: c.ScanLocation} + } + case TextFormatCode: + switch target.(type) { + case TimestamptzScanner: + return &scanPlanTextTimestamptzToTimestamptzScanner{location: c.ScanLocation} + } + } + + return nil +} + +type scanPlanBinaryTimestamptzToTimestamptzScanner struct{ location *time.Location } + +func (plan *scanPlanBinaryTimestamptzToTimestamptzScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TimestamptzScanner) + + if src == nil { + return scanner.ScanTimestamptz(Timestamptz{}) + } + + if len(src) != 8 { + return fmt.Errorf("invalid length for timestamptz: %v", len(src)) + } + + var tstz Timestamptz + microsecSinceY2K := int64(binary.BigEndian.Uint64(src)) + + switch microsecSinceY2K { + case infinityMicrosecondOffset: + tstz = Timestamptz{Valid: true, InfinityModifier: Infinity} + case negativeInfinityMicrosecondOffset: + tstz = Timestamptz{Valid: true, InfinityModifier: -Infinity} + default: + tim := time.Unix( + microsecFromUnixEpochToY2K/1000000+microsecSinceY2K/1000000, + (microsecFromUnixEpochToY2K%1000000*1000)+(microsecSinceY2K%1000000*1000), + ) + if plan.location != nil { + tim = tim.In(plan.location) + } + tstz = Timestamptz{Time: tim, Valid: true} + } + + return scanner.ScanTimestamptz(tstz) +} + +type scanPlanTextTimestamptzToTimestamptzScanner struct{ location *time.Location } + +func (plan *scanPlanTextTimestamptzToTimestamptzScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TimestamptzScanner) + + if src == nil { + return scanner.ScanTimestamptz(Timestamptz{}) + } + + var tstz Timestamptz + sbuf := string(src) + switch sbuf { + case "infinity": + tstz = Timestamptz{Valid: true, InfinityModifier: Infinity} + case "-infinity": + tstz = Timestamptz{Valid: true, InfinityModifier: -Infinity} + default: + bc := false + if strings.HasSuffix(sbuf, " BC") { + sbuf = sbuf[:len(sbuf)-3] + bc = true + } + + var format string + if len(sbuf) >= 9 && (sbuf[len(sbuf)-9] == '-' || sbuf[len(sbuf)-9] == '+') { + format = pgTimestamptzSecondFormat + } else if len(sbuf) >= 6 && (sbuf[len(sbuf)-6] == '-' || sbuf[len(sbuf)-6] == '+') { + format = pgTimestamptzMinuteFormat + } else { + format = pgTimestamptzHourFormat + } + + tim, err := time.Parse(format, sbuf) + if err != nil { + return err + } + + if bc { + year := -tim.Year() + 1 + tim = time.Date(year, tim.Month(), tim.Day(), tim.Hour(), tim.Minute(), tim.Second(), tim.Nanosecond(), tim.Location()) + } + + if plan.location != nil { + tim = tim.In(plan.location) + } + + tstz = Timestamptz{Time: tim, Valid: true} + } + + return scanner.ScanTimestamptz(tstz) +} + +func (c *TimestamptzCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var tstz Timestamptz + err := codecScan(c, m, oid, format, src, &tstz) + if err != nil { + return nil, err + } + + if tstz.InfinityModifier != Finite { + return tstz.InfinityModifier.String(), nil + } + + return tstz.Time, nil +} + +func (c *TimestamptzCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var tstz Timestamptz + err := codecScan(c, m, oid, format, src, &tstz) + if err != nil { + return nil, err + } + + if tstz.InfinityModifier != Finite { + return tstz.InfinityModifier, nil + } + + return tstz.Time, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/uint32.go b/vendor/github.com/jackc/pgx/v5/pgtype/uint32.go new file mode 100644 index 00000000..f2b2fa6d --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/uint32.go @@ -0,0 +1,325 @@ +package pgtype + +import ( + "database/sql/driver" + "encoding/binary" + "fmt" + "math" + "strconv" + + "github.com/jackc/pgx/v5/internal/pgio" +) + +type Uint32Scanner interface { + ScanUint32(v Uint32) error +} + +type Uint32Valuer interface { + Uint32Value() (Uint32, error) +} + +// Uint32 is the core type that is used to represent PostgreSQL types such as OID, CID, and XID. +type Uint32 struct { + Uint32 uint32 + Valid bool +} + +func (n *Uint32) ScanUint32(v Uint32) error { + *n = v + return nil +} + +func (n Uint32) Uint32Value() (Uint32, error) { + return n, nil +} + +// Scan implements the database/sql Scanner interface. +func (dst *Uint32) Scan(src any) error { + if src == nil { + *dst = Uint32{} + return nil + } + + var n int64 + + switch src := src.(type) { + case int64: + n = src + case string: + un, err := strconv.ParseUint(src, 10, 32) + if err != nil { + return err + } + n = int64(un) + default: + return fmt.Errorf("cannot scan %T", src) + } + + if n < 0 { + return fmt.Errorf("%d is less than the minimum value for Uint32", n) + } + if n > math.MaxUint32 { + return fmt.Errorf("%d is greater than maximum value for Uint32", n) + } + + *dst = Uint32{Uint32: uint32(n), Valid: true} + + return nil +} + +// Value implements the database/sql/driver Valuer interface. +func (src Uint32) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + return int64(src.Uint32), nil +} + +type Uint32Codec struct{} + +func (Uint32Codec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (Uint32Codec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (Uint32Codec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch format { + case BinaryFormatCode: + switch value.(type) { + case uint32: + return encodePlanUint32CodecBinaryUint32{} + case Uint32Valuer: + return encodePlanUint32CodecBinaryUint32Valuer{} + case Int64Valuer: + return encodePlanUint32CodecBinaryInt64Valuer{} + } + case TextFormatCode: + switch value.(type) { + case uint32: + return encodePlanUint32CodecTextUint32{} + case Int64Valuer: + return encodePlanUint32CodecTextInt64Valuer{} + } + } + + return nil +} + +type encodePlanUint32CodecBinaryUint32 struct{} + +func (encodePlanUint32CodecBinaryUint32) Encode(value any, buf []byte) (newBuf []byte, err error) { + v := value.(uint32) + return pgio.AppendUint32(buf, v), nil +} + +type encodePlanUint32CodecBinaryUint32Valuer struct{} + +func (encodePlanUint32CodecBinaryUint32Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + v, err := value.(Uint32Valuer).Uint32Value() + if err != nil { + return nil, err + } + + if !v.Valid { + return nil, nil + } + + return pgio.AppendUint32(buf, v.Uint32), nil +} + +type encodePlanUint32CodecBinaryInt64Valuer struct{} + +func (encodePlanUint32CodecBinaryInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + v, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !v.Valid { + return nil, nil + } + + if v.Int64 < 0 { + return nil, fmt.Errorf("%d is less than minimum value for uint32", v.Int64) + } + if v.Int64 > math.MaxUint32 { + return nil, fmt.Errorf("%d is greater than maximum value for uint32", v.Int64) + } + + return pgio.AppendUint32(buf, uint32(v.Int64)), nil +} + +type encodePlanUint32CodecTextUint32 struct{} + +func (encodePlanUint32CodecTextUint32) Encode(value any, buf []byte) (newBuf []byte, err error) { + v := value.(uint32) + return append(buf, strconv.FormatUint(uint64(v), 10)...), nil +} + +type encodePlanUint32CodecTextUint32Valuer struct{} + +func (encodePlanUint32CodecTextUint32Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + v, err := value.(Uint32Valuer).Uint32Value() + if err != nil { + return nil, err + } + + if !v.Valid { + return nil, nil + } + + return append(buf, strconv.FormatUint(uint64(v.Uint32), 10)...), nil +} + +type encodePlanUint32CodecTextInt64Valuer struct{} + +func (encodePlanUint32CodecTextInt64Valuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + v, err := value.(Int64Valuer).Int64Value() + if err != nil { + return nil, err + } + + if !v.Valid { + return nil, nil + } + + if v.Int64 < 0 { + return nil, fmt.Errorf("%d is less than minimum value for uint32", v.Int64) + } + if v.Int64 > math.MaxUint32 { + return nil, fmt.Errorf("%d is greater than maximum value for uint32", v.Int64) + } + + return append(buf, strconv.FormatInt(v.Int64, 10)...), nil +} + +func (Uint32Codec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + + switch format { + case BinaryFormatCode: + switch target.(type) { + case *uint32: + return scanPlanBinaryUint32ToUint32{} + case Uint32Scanner: + return scanPlanBinaryUint32ToUint32Scanner{} + case TextScanner: + return scanPlanBinaryUint32ToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case *uint32: + return scanPlanTextAnyToUint32{} + case Uint32Scanner: + return scanPlanTextAnyToUint32Scanner{} + } + } + + return nil +} + +func (c Uint32Codec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var n uint32 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return int64(n), nil +} + +func (c Uint32Codec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var n uint32 + err := codecScan(c, m, oid, format, src, &n) + if err != nil { + return nil, err + } + return n, nil +} + +type scanPlanBinaryUint32ToUint32 struct{} + +func (scanPlanBinaryUint32ToUint32) Scan(src []byte, dst any) error { + if src == nil { + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for uint32: %v", len(src)) + } + + p := (dst).(*uint32) + *p = binary.BigEndian.Uint32(src) + + return nil +} + +type scanPlanBinaryUint32ToUint32Scanner struct{} + +func (scanPlanBinaryUint32ToUint32Scanner) Scan(src []byte, dst any) error { + s, ok := (dst).(Uint32Scanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanUint32(Uint32{}) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for uint32: %v", len(src)) + } + + n := binary.BigEndian.Uint32(src) + + return s.ScanUint32(Uint32{Uint32: n, Valid: true}) +} + +type scanPlanBinaryUint32ToTextScanner struct{} + +func (scanPlanBinaryUint32ToTextScanner) Scan(src []byte, dst any) error { + s, ok := (dst).(TextScanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanText(Text{}) + } + + if len(src) != 4 { + return fmt.Errorf("invalid length for uint32: %v", len(src)) + } + + n := uint64(binary.BigEndian.Uint32(src)) + return s.ScanText(Text{String: strconv.FormatUint(n, 10), Valid: true}) +} + +type scanPlanTextAnyToUint32Scanner struct{} + +func (scanPlanTextAnyToUint32Scanner) Scan(src []byte, dst any) error { + s, ok := (dst).(Uint32Scanner) + if !ok { + return ErrScanTargetTypeChanged + } + + if src == nil { + return s.ScanUint32(Uint32{}) + } + + n, err := strconv.ParseUint(string(src), 10, 32) + if err != nil { + return err + } + + return s.ScanUint32(Uint32{Uint32: uint32(n), Valid: true}) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/uuid.go b/vendor/github.com/jackc/pgx/v5/pgtype/uuid.go new file mode 100644 index 00000000..d57c0f2f --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/uuid.go @@ -0,0 +1,281 @@ +package pgtype + +import ( + "bytes" + "database/sql/driver" + "encoding/hex" + "fmt" +) + +type UUIDScanner interface { + ScanUUID(v UUID) error +} + +type UUIDValuer interface { + UUIDValue() (UUID, error) +} + +type UUID struct { + Bytes [16]byte + Valid bool +} + +func (b *UUID) ScanUUID(v UUID) error { + *b = v + return nil +} + +func (b UUID) UUIDValue() (UUID, error) { + return b, nil +} + +// parseUUID converts a string UUID in standard form to a byte array. +func parseUUID(src string) (dst [16]byte, err error) { + switch len(src) { + case 36: + src = src[0:8] + src[9:13] + src[14:18] + src[19:23] + src[24:] + case 32: + // dashes already stripped, assume valid + default: + // assume invalid. + return dst, fmt.Errorf("cannot parse UUID %v", src) + } + + buf, err := hex.DecodeString(src) + if err != nil { + return dst, err + } + + copy(dst[:], buf) + return dst, err +} + +// encodeUUID converts a uuid byte array to UUID standard string form. +func encodeUUID(src [16]byte) string { + var buf [36]byte + + hex.Encode(buf[0:8], src[:4]) + buf[8] = '-' + hex.Encode(buf[9:13], src[4:6]) + buf[13] = '-' + hex.Encode(buf[14:18], src[6:8]) + buf[18] = '-' + hex.Encode(buf[19:23], src[8:10]) + buf[23] = '-' + hex.Encode(buf[24:], src[10:]) + + return string(buf[:]) +} + +// Scan implements the database/sql Scanner interface. +func (dst *UUID) Scan(src any) error { + if src == nil { + *dst = UUID{} + return nil + } + + switch src := src.(type) { + case string: + buf, err := parseUUID(src) + if err != nil { + return err + } + *dst = UUID{Bytes: buf, Valid: true} + return nil + } + + return fmt.Errorf("cannot scan %T", src) +} + +// Value implements the database/sql/driver Valuer interface. +func (src UUID) Value() (driver.Value, error) { + if !src.Valid { + return nil, nil + } + + return encodeUUID(src.Bytes), nil +} + +func (src UUID) MarshalJSON() ([]byte, error) { + if !src.Valid { + return []byte("null"), nil + } + + var buff bytes.Buffer + buff.WriteByte('"') + buff.WriteString(encodeUUID(src.Bytes)) + buff.WriteByte('"') + return buff.Bytes(), nil +} + +func (dst *UUID) UnmarshalJSON(src []byte) error { + if bytes.Equal(src, []byte("null")) { + *dst = UUID{} + return nil + } + if len(src) != 38 { + return fmt.Errorf("invalid length for UUID: %v", len(src)) + } + buf, err := parseUUID(string(src[1 : len(src)-1])) + if err != nil { + return err + } + *dst = UUID{Bytes: buf, Valid: true} + return nil +} + +type UUIDCodec struct{} + +func (UUIDCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (UUIDCodec) PreferredFormat() int16 { + return BinaryFormatCode +} + +func (UUIDCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + if _, ok := value.(UUIDValuer); !ok { + return nil + } + + switch format { + case BinaryFormatCode: + return encodePlanUUIDCodecBinaryUUIDValuer{} + case TextFormatCode: + return encodePlanUUIDCodecTextUUIDValuer{} + } + + return nil +} + +type encodePlanUUIDCodecBinaryUUIDValuer struct{} + +func (encodePlanUUIDCodecBinaryUUIDValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + uuid, err := value.(UUIDValuer).UUIDValue() + if err != nil { + return nil, err + } + + if !uuid.Valid { + return nil, nil + } + + return append(buf, uuid.Bytes[:]...), nil +} + +type encodePlanUUIDCodecTextUUIDValuer struct{} + +func (encodePlanUUIDCodecTextUUIDValuer) Encode(value any, buf []byte) (newBuf []byte, err error) { + uuid, err := value.(UUIDValuer).UUIDValue() + if err != nil { + return nil, err + } + + if !uuid.Valid { + return nil, nil + } + + return append(buf, encodeUUID(uuid.Bytes)...), nil +} + +func (UUIDCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch format { + case BinaryFormatCode: + switch target.(type) { + case UUIDScanner: + return scanPlanBinaryUUIDToUUIDScanner{} + case TextScanner: + return scanPlanBinaryUUIDToTextScanner{} + } + case TextFormatCode: + switch target.(type) { + case UUIDScanner: + return scanPlanTextAnyToUUIDScanner{} + } + } + + return nil +} + +type scanPlanBinaryUUIDToUUIDScanner struct{} + +func (scanPlanBinaryUUIDToUUIDScanner) Scan(src []byte, dst any) error { + scanner := (dst).(UUIDScanner) + + if src == nil { + return scanner.ScanUUID(UUID{}) + } + + if len(src) != 16 { + return fmt.Errorf("invalid length for UUID: %v", len(src)) + } + + uuid := UUID{Valid: true} + copy(uuid.Bytes[:], src) + + return scanner.ScanUUID(uuid) +} + +type scanPlanBinaryUUIDToTextScanner struct{} + +func (scanPlanBinaryUUIDToTextScanner) Scan(src []byte, dst any) error { + scanner := (dst).(TextScanner) + + if src == nil { + return scanner.ScanText(Text{}) + } + + if len(src) != 16 { + return fmt.Errorf("invalid length for UUID: %v", len(src)) + } + + var buf [16]byte + copy(buf[:], src) + + return scanner.ScanText(Text{String: encodeUUID(buf), Valid: true}) +} + +type scanPlanTextAnyToUUIDScanner struct{} + +func (scanPlanTextAnyToUUIDScanner) Scan(src []byte, dst any) error { + scanner := (dst).(UUIDScanner) + + if src == nil { + return scanner.ScanUUID(UUID{}) + } + + buf, err := parseUUID(string(src)) + if err != nil { + return err + } + + return scanner.ScanUUID(UUID{Bytes: buf, Valid: true}) +} + +func (c UUIDCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + var uuid UUID + err := codecScan(c, m, oid, format, src, &uuid) + if err != nil { + return nil, err + } + + return encodeUUID(uuid.Bytes), nil +} + +func (c UUIDCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var uuid UUID + err := codecScan(c, m, oid, format, src, &uuid) + if err != nil { + return nil, err + } + return uuid.Bytes, nil +} diff --git a/vendor/github.com/jackc/pgx/v5/pgtype/xml.go b/vendor/github.com/jackc/pgx/v5/pgtype/xml.go new file mode 100644 index 00000000..fb4c49ad --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgtype/xml.go @@ -0,0 +1,198 @@ +package pgtype + +import ( + "database/sql" + "database/sql/driver" + "encoding/xml" + "fmt" + "reflect" +) + +type XMLCodec struct { + Marshal func(v any) ([]byte, error) + Unmarshal func(data []byte, v any) error +} + +func (*XMLCodec) FormatSupported(format int16) bool { + return format == TextFormatCode || format == BinaryFormatCode +} + +func (*XMLCodec) PreferredFormat() int16 { + return TextFormatCode +} + +func (c *XMLCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan { + switch value.(type) { + case string: + return encodePlanXMLCodecEitherFormatString{} + case []byte: + return encodePlanXMLCodecEitherFormatByteSlice{} + + // Cannot rely on driver.Valuer being handled later because anything can be marshalled. + // + // https://github.com/jackc/pgx/issues/1430 + // + // Check for driver.Valuer must come before xml.Marshaler so that it is guaranteed to be used + // when both are implemented https://github.com/jackc/pgx/issues/1805 + case driver.Valuer: + return &encodePlanDriverValuer{m: m, oid: oid, formatCode: format} + + // Must come before trying wrap encode plans because a pointer to a struct may be unwrapped to a struct that can be + // marshalled. + // + // https://github.com/jackc/pgx/issues/1681 + case xml.Marshaler: + return &encodePlanXMLCodecEitherFormatMarshal{ + marshal: c.Marshal, + } + } + + // Because anything can be marshalled the normal wrapping in Map.PlanScan doesn't get a chance to run. So try the + // appropriate wrappers here. + for _, f := range []TryWrapEncodePlanFunc{ + TryWrapDerefPointerEncodePlan, + TryWrapFindUnderlyingTypeEncodePlan, + } { + if wrapperPlan, nextValue, ok := f(value); ok { + if nextPlan := c.PlanEncode(m, oid, format, nextValue); nextPlan != nil { + wrapperPlan.SetNext(nextPlan) + return wrapperPlan + } + } + } + + return &encodePlanXMLCodecEitherFormatMarshal{ + marshal: c.Marshal, + } +} + +type encodePlanXMLCodecEitherFormatString struct{} + +func (encodePlanXMLCodecEitherFormatString) Encode(value any, buf []byte) (newBuf []byte, err error) { + xmlString := value.(string) + buf = append(buf, xmlString...) + return buf, nil +} + +type encodePlanXMLCodecEitherFormatByteSlice struct{} + +func (encodePlanXMLCodecEitherFormatByteSlice) Encode(value any, buf []byte) (newBuf []byte, err error) { + xmlBytes := value.([]byte) + if xmlBytes == nil { + return nil, nil + } + + buf = append(buf, xmlBytes...) + return buf, nil +} + +type encodePlanXMLCodecEitherFormatMarshal struct { + marshal func(v any) ([]byte, error) +} + +func (e *encodePlanXMLCodecEitherFormatMarshal) Encode(value any, buf []byte) (newBuf []byte, err error) { + xmlBytes, err := e.marshal(value) + if err != nil { + return nil, err + } + + buf = append(buf, xmlBytes...) + return buf, nil +} + +func (c *XMLCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan { + switch target.(type) { + case *string: + return scanPlanAnyToString{} + + case **string: + // This is to fix **string scanning. It seems wrong to special case **string, but it's not clear what a better + // solution would be. + // + // https://github.com/jackc/pgx/issues/1470 -- **string + // https://github.com/jackc/pgx/issues/1691 -- ** anything else + + if wrapperPlan, nextDst, ok := TryPointerPointerScanPlan(target); ok { + if nextPlan := m.planScan(oid, format, nextDst); nextPlan != nil { + if _, failed := nextPlan.(*scanPlanFail); !failed { + wrapperPlan.SetNext(nextPlan) + return wrapperPlan + } + } + } + + case *[]byte: + return scanPlanXMLToByteSlice{} + case BytesScanner: + return scanPlanBinaryBytesToBytesScanner{} + + // Cannot rely on sql.Scanner being handled later because scanPlanXMLToXMLUnmarshal will take precedence. + // + // https://github.com/jackc/pgx/issues/1418 + case sql.Scanner: + return &scanPlanSQLScanner{formatCode: format} + } + + return &scanPlanXMLToXMLUnmarshal{ + unmarshal: c.Unmarshal, + } +} + +type scanPlanXMLToByteSlice struct{} + +func (scanPlanXMLToByteSlice) Scan(src []byte, dst any) error { + dstBuf := dst.(*[]byte) + if src == nil { + *dstBuf = nil + return nil + } + + *dstBuf = make([]byte, len(src)) + copy(*dstBuf, src) + return nil +} + +type scanPlanXMLToXMLUnmarshal struct { + unmarshal func(data []byte, v any) error +} + +func (s *scanPlanXMLToXMLUnmarshal) Scan(src []byte, dst any) error { + if src == nil { + dstValue := reflect.ValueOf(dst) + if dstValue.Kind() == reflect.Ptr { + el := dstValue.Elem() + switch el.Kind() { + case reflect.Ptr, reflect.Slice, reflect.Map, reflect.Interface, reflect.Struct: + el.Set(reflect.Zero(el.Type())) + return nil + } + } + + return fmt.Errorf("cannot scan NULL into %T", dst) + } + + elem := reflect.ValueOf(dst).Elem() + elem.Set(reflect.Zero(elem.Type())) + + return s.unmarshal(src, dst) +} + +func (c *XMLCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) { + if src == nil { + return nil, nil + } + + dstBuf := make([]byte, len(src)) + copy(dstBuf, src) + return dstBuf, nil +} + +func (c *XMLCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) { + if src == nil { + return nil, nil + } + + var dst any + err := c.Unmarshal(src, &dst) + return dst, err +} diff --git a/vendor/github.com/jackc/pgx/v5/pgxpool/batch_results.go b/vendor/github.com/jackc/pgx/v5/pgxpool/batch_results.go new file mode 100644 index 00000000..5d5c681d --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgxpool/batch_results.go @@ -0,0 +1,52 @@ +package pgxpool + +import ( + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +type errBatchResults struct { + err error +} + +func (br errBatchResults) Exec() (pgconn.CommandTag, error) { + return pgconn.CommandTag{}, br.err +} + +func (br errBatchResults) Query() (pgx.Rows, error) { + return errRows{err: br.err}, br.err +} + +func (br errBatchResults) QueryRow() pgx.Row { + return errRow{err: br.err} +} + +func (br errBatchResults) Close() error { + return br.err +} + +type poolBatchResults struct { + br pgx.BatchResults + c *Conn +} + +func (br *poolBatchResults) Exec() (pgconn.CommandTag, error) { + return br.br.Exec() +} + +func (br *poolBatchResults) Query() (pgx.Rows, error) { + return br.br.Query() +} + +func (br *poolBatchResults) QueryRow() pgx.Row { + return br.br.QueryRow() +} + +func (br *poolBatchResults) Close() error { + err := br.br.Close() + if br.c != nil { + br.c.Release() + br.c = nil + } + return err +} diff --git a/vendor/github.com/jackc/pgx/v5/pgxpool/conn.go b/vendor/github.com/jackc/pgx/v5/pgxpool/conn.go new file mode 100644 index 00000000..38c90f3d --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgxpool/conn.go @@ -0,0 +1,134 @@ +package pgxpool + +import ( + "context" + "sync/atomic" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/puddle/v2" +) + +// Conn is an acquired *pgx.Conn from a Pool. +type Conn struct { + res *puddle.Resource[*connResource] + p *Pool +} + +// Release returns c to the pool it was acquired from. Once Release has been called, other methods must not be called. +// However, it is safe to call Release multiple times. Subsequent calls after the first will be ignored. +func (c *Conn) Release() { + if c.res == nil { + return + } + + conn := c.Conn() + res := c.res + c.res = nil + + if c.p.releaseTracer != nil { + c.p.releaseTracer.TraceRelease(c.p, TraceReleaseData{Conn: conn}) + } + + if conn.IsClosed() || conn.PgConn().IsBusy() || conn.PgConn().TxStatus() != 'I' { + res.Destroy() + // Signal to the health check to run since we just destroyed a connections + // and we might be below minConns now + c.p.triggerHealthCheck() + return + } + + // If the pool is consistently being used, we might never get to check the + // lifetime of a connection since we only check idle connections in checkConnsHealth + // so we also check the lifetime here and force a health check + if c.p.isExpired(res) { + atomic.AddInt64(&c.p.lifetimeDestroyCount, 1) + res.Destroy() + // Signal to the health check to run since we just destroyed a connections + // and we might be below minConns now + c.p.triggerHealthCheck() + return + } + + if c.p.afterRelease == nil { + res.Release() + return + } + + go func() { + if c.p.afterRelease(conn) { + res.Release() + } else { + res.Destroy() + // Signal to the health check to run since we just destroyed a connections + // and we might be below minConns now + c.p.triggerHealthCheck() + } + }() +} + +// Hijack assumes ownership of the connection from the pool. Caller is responsible for closing the connection. Hijack +// will panic if called on an already released or hijacked connection. +func (c *Conn) Hijack() *pgx.Conn { + if c.res == nil { + panic("cannot hijack already released or hijacked connection") + } + + conn := c.Conn() + res := c.res + c.res = nil + + res.Hijack() + + return conn +} + +func (c *Conn) Exec(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + return c.Conn().Exec(ctx, sql, arguments...) +} + +func (c *Conn) Query(ctx context.Context, sql string, args ...any) (pgx.Rows, error) { + return c.Conn().Query(ctx, sql, args...) +} + +func (c *Conn) QueryRow(ctx context.Context, sql string, args ...any) pgx.Row { + return c.Conn().QueryRow(ctx, sql, args...) +} + +func (c *Conn) SendBatch(ctx context.Context, b *pgx.Batch) pgx.BatchResults { + return c.Conn().SendBatch(ctx, b) +} + +func (c *Conn) CopyFrom(ctx context.Context, tableName pgx.Identifier, columnNames []string, rowSrc pgx.CopyFromSource) (int64, error) { + return c.Conn().CopyFrom(ctx, tableName, columnNames, rowSrc) +} + +// Begin starts a transaction block from the *Conn without explicitly setting a transaction mode (see BeginTx with TxOptions if transaction mode is required). +func (c *Conn) Begin(ctx context.Context) (pgx.Tx, error) { + return c.Conn().Begin(ctx) +} + +// BeginTx starts a transaction block from the *Conn with txOptions determining the transaction mode. +func (c *Conn) BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error) { + return c.Conn().BeginTx(ctx, txOptions) +} + +func (c *Conn) Ping(ctx context.Context) error { + return c.Conn().Ping(ctx) +} + +func (c *Conn) Conn() *pgx.Conn { + return c.connResource().conn +} + +func (c *Conn) connResource() *connResource { + return c.res.Value() +} + +func (c *Conn) getPoolRow(r pgx.Row) *poolRow { + return c.connResource().getPoolRow(c, r) +} + +func (c *Conn) getPoolRows(r pgx.Rows) *poolRows { + return c.connResource().getPoolRows(c, r) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgxpool/doc.go b/vendor/github.com/jackc/pgx/v5/pgxpool/doc.go new file mode 100644 index 00000000..099443bc --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgxpool/doc.go @@ -0,0 +1,27 @@ +// Package pgxpool is a concurrency-safe connection pool for pgx. +/* +pgxpool implements a nearly identical interface to pgx connections. + +Creating a Pool + +The primary way of creating a pool is with [pgxpool.New]: + + pool, err := pgxpool.New(context.Background(), os.Getenv("DATABASE_URL")) + +The database connection string can be in URL or keyword/value format. PostgreSQL settings, pgx settings, and pool settings can be +specified here. In addition, a config struct can be created by [ParseConfig]. + + config, err := pgxpool.ParseConfig(os.Getenv("DATABASE_URL")) + if err != nil { + // ... + } + config.AfterConnect = func(ctx context.Context, conn *pgx.Conn) error { + // do something with every new connection + } + + pool, err := pgxpool.NewWithConfig(context.Background(), config) + +A pool returns without waiting for any connections to be established. Acquire a connection immediately after creating +the pool to check if a connection can successfully be established. +*/ +package pgxpool diff --git a/vendor/github.com/jackc/pgx/v5/pgxpool/pool.go b/vendor/github.com/jackc/pgx/v5/pgxpool/pool.go new file mode 100644 index 00000000..fdcba724 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgxpool/pool.go @@ -0,0 +1,717 @@ +package pgxpool + +import ( + "context" + "fmt" + "math/rand" + "runtime" + "strconv" + "sync" + "sync/atomic" + "time" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/puddle/v2" +) + +var defaultMaxConns = int32(4) +var defaultMinConns = int32(0) +var defaultMaxConnLifetime = time.Hour +var defaultMaxConnIdleTime = time.Minute * 30 +var defaultHealthCheckPeriod = time.Minute + +type connResource struct { + conn *pgx.Conn + conns []Conn + poolRows []poolRow + poolRowss []poolRows + maxAgeTime time.Time +} + +func (cr *connResource) getConn(p *Pool, res *puddle.Resource[*connResource]) *Conn { + if len(cr.conns) == 0 { + cr.conns = make([]Conn, 128) + } + + c := &cr.conns[len(cr.conns)-1] + cr.conns = cr.conns[0 : len(cr.conns)-1] + + c.res = res + c.p = p + + return c +} + +func (cr *connResource) getPoolRow(c *Conn, r pgx.Row) *poolRow { + if len(cr.poolRows) == 0 { + cr.poolRows = make([]poolRow, 128) + } + + pr := &cr.poolRows[len(cr.poolRows)-1] + cr.poolRows = cr.poolRows[0 : len(cr.poolRows)-1] + + pr.c = c + pr.r = r + + return pr +} + +func (cr *connResource) getPoolRows(c *Conn, r pgx.Rows) *poolRows { + if len(cr.poolRowss) == 0 { + cr.poolRowss = make([]poolRows, 128) + } + + pr := &cr.poolRowss[len(cr.poolRowss)-1] + cr.poolRowss = cr.poolRowss[0 : len(cr.poolRowss)-1] + + pr.c = c + pr.r = r + + return pr +} + +// Pool allows for connection reuse. +type Pool struct { + // 64 bit fields accessed with atomics must be at beginning of struct to guarantee alignment for certain 32-bit + // architectures. See BUGS section of https://pkg.go.dev/sync/atomic and https://github.com/jackc/pgx/issues/1288. + newConnsCount int64 + lifetimeDestroyCount int64 + idleDestroyCount int64 + + p *puddle.Pool[*connResource] + config *Config + beforeConnect func(context.Context, *pgx.ConnConfig) error + afterConnect func(context.Context, *pgx.Conn) error + beforeAcquire func(context.Context, *pgx.Conn) bool + afterRelease func(*pgx.Conn) bool + beforeClose func(*pgx.Conn) + minConns int32 + maxConns int32 + maxConnLifetime time.Duration + maxConnLifetimeJitter time.Duration + maxConnIdleTime time.Duration + healthCheckPeriod time.Duration + + healthCheckChan chan struct{} + + acquireTracer AcquireTracer + releaseTracer ReleaseTracer + + closeOnce sync.Once + closeChan chan struct{} +} + +// Config is the configuration struct for creating a pool. It must be created by [ParseConfig] and then it can be +// modified. +type Config struct { + ConnConfig *pgx.ConnConfig + + // BeforeConnect is called before a new connection is made. It is passed a copy of the underlying pgx.ConnConfig and + // will not impact any existing open connections. + BeforeConnect func(context.Context, *pgx.ConnConfig) error + + // AfterConnect is called after a connection is established, but before it is added to the pool. + AfterConnect func(context.Context, *pgx.Conn) error + + // BeforeAcquire is called before a connection is acquired from the pool. It must return true to allow the + // acquisition or false to indicate that the connection should be destroyed and a different connection should be + // acquired. + BeforeAcquire func(context.Context, *pgx.Conn) bool + + // AfterRelease is called after a connection is released, but before it is returned to the pool. It must return true to + // return the connection to the pool or false to destroy the connection. + AfterRelease func(*pgx.Conn) bool + + // BeforeClose is called right before a connection is closed and removed from the pool. + BeforeClose func(*pgx.Conn) + + // MaxConnLifetime is the duration since creation after which a connection will be automatically closed. + MaxConnLifetime time.Duration + + // MaxConnLifetimeJitter is the duration after MaxConnLifetime to randomly decide to close a connection. + // This helps prevent all connections from being closed at the exact same time, starving the pool. + MaxConnLifetimeJitter time.Duration + + // MaxConnIdleTime is the duration after which an idle connection will be automatically closed by the health check. + MaxConnIdleTime time.Duration + + // MaxConns is the maximum size of the pool. The default is the greater of 4 or runtime.NumCPU(). + MaxConns int32 + + // MinConns is the minimum size of the pool. After connection closes, the pool might dip below MinConns. A low + // number of MinConns might mean the pool is empty after MaxConnLifetime until the health check has a chance + // to create new connections. + MinConns int32 + + // HealthCheckPeriod is the duration between checks of the health of idle connections. + HealthCheckPeriod time.Duration + + createdByParseConfig bool // Used to enforce created by ParseConfig rule. +} + +// Copy returns a deep copy of the config that is safe to use and modify. +// The only exception is the tls.Config: +// according to the tls.Config docs it must not be modified after creation. +func (c *Config) Copy() *Config { + newConfig := new(Config) + *newConfig = *c + newConfig.ConnConfig = c.ConnConfig.Copy() + return newConfig +} + +// ConnString returns the connection string as parsed by pgxpool.ParseConfig into pgxpool.Config. +func (c *Config) ConnString() string { return c.ConnConfig.ConnString() } + +// New creates a new Pool. See [ParseConfig] for information on connString format. +func New(ctx context.Context, connString string) (*Pool, error) { + config, err := ParseConfig(connString) + if err != nil { + return nil, err + } + + return NewWithConfig(ctx, config) +} + +// NewWithConfig creates a new Pool. config must have been created by [ParseConfig]. +func NewWithConfig(ctx context.Context, config *Config) (*Pool, error) { + // Default values are set in ParseConfig. Enforce initial creation by ParseConfig rather than setting defaults from + // zero values. + if !config.createdByParseConfig { + panic("config must be created by ParseConfig") + } + + p := &Pool{ + config: config, + beforeConnect: config.BeforeConnect, + afterConnect: config.AfterConnect, + beforeAcquire: config.BeforeAcquire, + afterRelease: config.AfterRelease, + beforeClose: config.BeforeClose, + minConns: config.MinConns, + maxConns: config.MaxConns, + maxConnLifetime: config.MaxConnLifetime, + maxConnLifetimeJitter: config.MaxConnLifetimeJitter, + maxConnIdleTime: config.MaxConnIdleTime, + healthCheckPeriod: config.HealthCheckPeriod, + healthCheckChan: make(chan struct{}, 1), + closeChan: make(chan struct{}), + } + + if t, ok := config.ConnConfig.Tracer.(AcquireTracer); ok { + p.acquireTracer = t + } + + if t, ok := config.ConnConfig.Tracer.(ReleaseTracer); ok { + p.releaseTracer = t + } + + var err error + p.p, err = puddle.NewPool( + &puddle.Config[*connResource]{ + Constructor: func(ctx context.Context) (*connResource, error) { + atomic.AddInt64(&p.newConnsCount, 1) + connConfig := p.config.ConnConfig.Copy() + + // Connection will continue in background even if Acquire is canceled. Ensure that a connect won't hang forever. + if connConfig.ConnectTimeout <= 0 { + connConfig.ConnectTimeout = 2 * time.Minute + } + + if p.beforeConnect != nil { + if err := p.beforeConnect(ctx, connConfig); err != nil { + return nil, err + } + } + + conn, err := pgx.ConnectConfig(ctx, connConfig) + if err != nil { + return nil, err + } + + if p.afterConnect != nil { + err = p.afterConnect(ctx, conn) + if err != nil { + conn.Close(ctx) + return nil, err + } + } + + jitterSecs := rand.Float64() * config.MaxConnLifetimeJitter.Seconds() + maxAgeTime := time.Now().Add(config.MaxConnLifetime).Add(time.Duration(jitterSecs) * time.Second) + + cr := &connResource{ + conn: conn, + conns: make([]Conn, 64), + poolRows: make([]poolRow, 64), + poolRowss: make([]poolRows, 64), + maxAgeTime: maxAgeTime, + } + + return cr, nil + }, + Destructor: func(value *connResource) { + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + conn := value.conn + if p.beforeClose != nil { + p.beforeClose(conn) + } + conn.Close(ctx) + select { + case <-conn.PgConn().CleanupDone(): + case <-ctx.Done(): + } + cancel() + }, + MaxSize: config.MaxConns, + }, + ) + if err != nil { + return nil, err + } + + go func() { + p.createIdleResources(ctx, int(p.minConns)) + p.backgroundHealthCheck() + }() + + return p, nil +} + +// ParseConfig builds a Config from connString. It parses connString with the same behavior as [pgx.ParseConfig] with the +// addition of the following variables: +// +// - pool_max_conns: integer greater than 0 +// - pool_min_conns: integer 0 or greater +// - pool_max_conn_lifetime: duration string +// - pool_max_conn_idle_time: duration string +// - pool_health_check_period: duration string +// - pool_max_conn_lifetime_jitter: duration string +// +// See Config for definitions of these arguments. +// +// # Example Keyword/Value +// user=jack password=secret host=pg.example.com port=5432 dbname=mydb sslmode=verify-ca pool_max_conns=10 +// +// # Example URL +// postgres://jack:secret@pg.example.com:5432/mydb?sslmode=verify-ca&pool_max_conns=10 +func ParseConfig(connString string) (*Config, error) { + connConfig, err := pgx.ParseConfig(connString) + if err != nil { + return nil, err + } + + config := &Config{ + ConnConfig: connConfig, + createdByParseConfig: true, + } + + if s, ok := config.ConnConfig.Config.RuntimeParams["pool_max_conns"]; ok { + delete(connConfig.Config.RuntimeParams, "pool_max_conns") + n, err := strconv.ParseInt(s, 10, 32) + if err != nil { + return nil, fmt.Errorf("cannot parse pool_max_conns: %w", err) + } + if n < 1 { + return nil, fmt.Errorf("pool_max_conns too small: %d", n) + } + config.MaxConns = int32(n) + } else { + config.MaxConns = defaultMaxConns + if numCPU := int32(runtime.NumCPU()); numCPU > config.MaxConns { + config.MaxConns = numCPU + } + } + + if s, ok := config.ConnConfig.Config.RuntimeParams["pool_min_conns"]; ok { + delete(connConfig.Config.RuntimeParams, "pool_min_conns") + n, err := strconv.ParseInt(s, 10, 32) + if err != nil { + return nil, fmt.Errorf("cannot parse pool_min_conns: %w", err) + } + config.MinConns = int32(n) + } else { + config.MinConns = defaultMinConns + } + + if s, ok := config.ConnConfig.Config.RuntimeParams["pool_max_conn_lifetime"]; ok { + delete(connConfig.Config.RuntimeParams, "pool_max_conn_lifetime") + d, err := time.ParseDuration(s) + if err != nil { + return nil, fmt.Errorf("invalid pool_max_conn_lifetime: %w", err) + } + config.MaxConnLifetime = d + } else { + config.MaxConnLifetime = defaultMaxConnLifetime + } + + if s, ok := config.ConnConfig.Config.RuntimeParams["pool_max_conn_idle_time"]; ok { + delete(connConfig.Config.RuntimeParams, "pool_max_conn_idle_time") + d, err := time.ParseDuration(s) + if err != nil { + return nil, fmt.Errorf("invalid pool_max_conn_idle_time: %w", err) + } + config.MaxConnIdleTime = d + } else { + config.MaxConnIdleTime = defaultMaxConnIdleTime + } + + if s, ok := config.ConnConfig.Config.RuntimeParams["pool_health_check_period"]; ok { + delete(connConfig.Config.RuntimeParams, "pool_health_check_period") + d, err := time.ParseDuration(s) + if err != nil { + return nil, fmt.Errorf("invalid pool_health_check_period: %w", err) + } + config.HealthCheckPeriod = d + } else { + config.HealthCheckPeriod = defaultHealthCheckPeriod + } + + if s, ok := config.ConnConfig.Config.RuntimeParams["pool_max_conn_lifetime_jitter"]; ok { + delete(connConfig.Config.RuntimeParams, "pool_max_conn_lifetime_jitter") + d, err := time.ParseDuration(s) + if err != nil { + return nil, fmt.Errorf("invalid pool_max_conn_lifetime_jitter: %w", err) + } + config.MaxConnLifetimeJitter = d + } + + return config, nil +} + +// Close closes all connections in the pool and rejects future Acquire calls. Blocks until all connections are returned +// to pool and closed. +func (p *Pool) Close() { + p.closeOnce.Do(func() { + close(p.closeChan) + p.p.Close() + }) +} + +func (p *Pool) isExpired(res *puddle.Resource[*connResource]) bool { + return time.Now().After(res.Value().maxAgeTime) +} + +func (p *Pool) triggerHealthCheck() { + go func() { + // Destroy is asynchronous so we give it time to actually remove itself from + // the pool otherwise we might try to check the pool size too soon + time.Sleep(500 * time.Millisecond) + select { + case p.healthCheckChan <- struct{}{}: + default: + } + }() +} + +func (p *Pool) backgroundHealthCheck() { + ticker := time.NewTicker(p.healthCheckPeriod) + defer ticker.Stop() + for { + select { + case <-p.closeChan: + return + case <-p.healthCheckChan: + p.checkHealth() + case <-ticker.C: + p.checkHealth() + } + } +} + +func (p *Pool) checkHealth() { + for { + // If checkMinConns failed we don't destroy any connections since we couldn't + // even get to minConns + if err := p.checkMinConns(); err != nil { + // Should we log this error somewhere? + break + } + if !p.checkConnsHealth() { + // Since we didn't destroy any connections we can stop looping + break + } + // Technically Destroy is asynchronous but 500ms should be enough for it to + // remove it from the underlying pool + select { + case <-p.closeChan: + return + case <-time.After(500 * time.Millisecond): + } + } +} + +// checkConnsHealth will check all idle connections, destroy a connection if +// it's idle or too old, and returns true if any were destroyed +func (p *Pool) checkConnsHealth() bool { + var destroyed bool + totalConns := p.Stat().TotalConns() + resources := p.p.AcquireAllIdle() + for _, res := range resources { + // We're okay going under minConns if the lifetime is up + if p.isExpired(res) && totalConns >= p.minConns { + atomic.AddInt64(&p.lifetimeDestroyCount, 1) + res.Destroy() + destroyed = true + // Since Destroy is async we manually decrement totalConns. + totalConns-- + } else if res.IdleDuration() > p.maxConnIdleTime && totalConns > p.minConns { + atomic.AddInt64(&p.idleDestroyCount, 1) + res.Destroy() + destroyed = true + // Since Destroy is async we manually decrement totalConns. + totalConns-- + } else { + res.ReleaseUnused() + } + } + return destroyed +} + +func (p *Pool) checkMinConns() error { + // TotalConns can include ones that are being destroyed but we should have + // sleep(500ms) around all of the destroys to help prevent that from throwing + // off this check + toCreate := p.minConns - p.Stat().TotalConns() + if toCreate > 0 { + return p.createIdleResources(context.Background(), int(toCreate)) + } + return nil +} + +func (p *Pool) createIdleResources(parentCtx context.Context, targetResources int) error { + ctx, cancel := context.WithCancel(parentCtx) + defer cancel() + + errs := make(chan error, targetResources) + + for i := 0; i < targetResources; i++ { + go func() { + err := p.p.CreateResource(ctx) + // Ignore ErrNotAvailable since it means that the pool has become full since we started creating resource. + if err == puddle.ErrNotAvailable { + err = nil + } + errs <- err + }() + } + + var firstError error + for i := 0; i < targetResources; i++ { + err := <-errs + if err != nil && firstError == nil { + cancel() + firstError = err + } + } + + return firstError +} + +// Acquire returns a connection (*Conn) from the Pool +func (p *Pool) Acquire(ctx context.Context) (c *Conn, err error) { + if p.acquireTracer != nil { + ctx = p.acquireTracer.TraceAcquireStart(ctx, p, TraceAcquireStartData{}) + defer func() { + var conn *pgx.Conn + if c != nil { + conn = c.Conn() + } + p.acquireTracer.TraceAcquireEnd(ctx, p, TraceAcquireEndData{Conn: conn, Err: err}) + }() + } + + for { + res, err := p.p.Acquire(ctx) + if err != nil { + return nil, err + } + + cr := res.Value() + + if res.IdleDuration() > time.Second { + err := cr.conn.Ping(ctx) + if err != nil { + res.Destroy() + continue + } + } + + if p.beforeAcquire == nil || p.beforeAcquire(ctx, cr.conn) { + return cr.getConn(p, res), nil + } + + res.Destroy() + } +} + +// AcquireFunc acquires a *Conn and calls f with that *Conn. ctx will only affect the Acquire. It has no effect on the +// call of f. The return value is either an error acquiring the *Conn or the return value of f. The *Conn is +// automatically released after the call of f. +func (p *Pool) AcquireFunc(ctx context.Context, f func(*Conn) error) error { + conn, err := p.Acquire(ctx) + if err != nil { + return err + } + defer conn.Release() + + return f(conn) +} + +// AcquireAllIdle atomically acquires all currently idle connections. Its intended use is for health check and +// keep-alive functionality. It does not update pool statistics. +func (p *Pool) AcquireAllIdle(ctx context.Context) []*Conn { + resources := p.p.AcquireAllIdle() + conns := make([]*Conn, 0, len(resources)) + for _, res := range resources { + cr := res.Value() + if p.beforeAcquire == nil || p.beforeAcquire(ctx, cr.conn) { + conns = append(conns, cr.getConn(p, res)) + } else { + res.Destroy() + } + } + + return conns +} + +// Reset closes all connections, but leaves the pool open. It is intended for use when an error is detected that would +// disrupt all connections (such as a network interruption or a server state change). +// +// It is safe to reset a pool while connections are checked out. Those connections will be closed when they are returned +// to the pool. +func (p *Pool) Reset() { + p.p.Reset() +} + +// Config returns a copy of config that was used to initialize this pool. +func (p *Pool) Config() *Config { return p.config.Copy() } + +// Stat returns a pgxpool.Stat struct with a snapshot of Pool statistics. +func (p *Pool) Stat() *Stat { + return &Stat{ + s: p.p.Stat(), + newConnsCount: atomic.LoadInt64(&p.newConnsCount), + lifetimeDestroyCount: atomic.LoadInt64(&p.lifetimeDestroyCount), + idleDestroyCount: atomic.LoadInt64(&p.idleDestroyCount), + } +} + +// Exec acquires a connection from the Pool and executes the given SQL. +// SQL can be either a prepared statement name or an SQL string. +// Arguments should be referenced positionally from the SQL string as $1, $2, etc. +// The acquired connection is returned to the pool when the Exec function returns. +func (p *Pool) Exec(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + c, err := p.Acquire(ctx) + if err != nil { + return pgconn.CommandTag{}, err + } + defer c.Release() + + return c.Exec(ctx, sql, arguments...) +} + +// Query acquires a connection and executes a query that returns pgx.Rows. +// Arguments should be referenced positionally from the SQL string as $1, $2, etc. +// See pgx.Rows documentation to close the returned Rows and return the acquired connection to the Pool. +// +// If there is an error, the returned pgx.Rows will be returned in an error state. +// If preferred, ignore the error returned from Query and handle errors using the returned pgx.Rows. +// +// For extra control over how the query is executed, the types QuerySimpleProtocol, QueryResultFormats, and +// QueryResultFormatsByOID may be used as the first args to control exactly how the query is executed. This is rarely +// needed. See the documentation for those types for details. +func (p *Pool) Query(ctx context.Context, sql string, args ...any) (pgx.Rows, error) { + c, err := p.Acquire(ctx) + if err != nil { + return errRows{err: err}, err + } + + rows, err := c.Query(ctx, sql, args...) + if err != nil { + c.Release() + return errRows{err: err}, err + } + + return c.getPoolRows(rows), nil +} + +// QueryRow acquires a connection and executes a query that is expected +// to return at most one row (pgx.Row). Errors are deferred until pgx.Row's +// Scan method is called. If the query selects no rows, pgx.Row's Scan will +// return ErrNoRows. Otherwise, pgx.Row's Scan scans the first selected row +// and discards the rest. The acquired connection is returned to the Pool when +// pgx.Row's Scan method is called. +// +// Arguments should be referenced positionally from the SQL string as $1, $2, etc. +// +// For extra control over how the query is executed, the types QuerySimpleProtocol, QueryResultFormats, and +// QueryResultFormatsByOID may be used as the first args to control exactly how the query is executed. This is rarely +// needed. See the documentation for those types for details. +func (p *Pool) QueryRow(ctx context.Context, sql string, args ...any) pgx.Row { + c, err := p.Acquire(ctx) + if err != nil { + return errRow{err: err} + } + + row := c.QueryRow(ctx, sql, args...) + return c.getPoolRow(row) +} + +func (p *Pool) SendBatch(ctx context.Context, b *pgx.Batch) pgx.BatchResults { + c, err := p.Acquire(ctx) + if err != nil { + return errBatchResults{err: err} + } + + br := c.SendBatch(ctx, b) + return &poolBatchResults{br: br, c: c} +} + +// Begin acquires a connection from the Pool and starts a transaction. Unlike database/sql, the context only affects the begin command. i.e. there is no +// auto-rollback on context cancellation. Begin initiates a transaction block without explicitly setting a transaction mode for the block (see BeginTx with TxOptions if transaction mode is required). +// *pgxpool.Tx is returned, which implements the pgx.Tx interface. +// Commit or Rollback must be called on the returned transaction to finalize the transaction block. +func (p *Pool) Begin(ctx context.Context) (pgx.Tx, error) { + return p.BeginTx(ctx, pgx.TxOptions{}) +} + +// BeginTx acquires a connection from the Pool and starts a transaction with pgx.TxOptions determining the transaction mode. +// Unlike database/sql, the context only affects the begin command. i.e. there is no auto-rollback on context cancellation. +// *pgxpool.Tx is returned, which implements the pgx.Tx interface. +// Commit or Rollback must be called on the returned transaction to finalize the transaction block. +func (p *Pool) BeginTx(ctx context.Context, txOptions pgx.TxOptions) (pgx.Tx, error) { + c, err := p.Acquire(ctx) + if err != nil { + return nil, err + } + + t, err := c.BeginTx(ctx, txOptions) + if err != nil { + c.Release() + return nil, err + } + + return &Tx{t: t, c: c}, nil +} + +func (p *Pool) CopyFrom(ctx context.Context, tableName pgx.Identifier, columnNames []string, rowSrc pgx.CopyFromSource) (int64, error) { + c, err := p.Acquire(ctx) + if err != nil { + return 0, err + } + defer c.Release() + + return c.Conn().CopyFrom(ctx, tableName, columnNames, rowSrc) +} + +// Ping acquires a connection from the Pool and executes an empty sql statement against it. +// If the sql returns without error, the database Ping is considered successful, otherwise, the error is returned. +func (p *Pool) Ping(ctx context.Context) error { + c, err := p.Acquire(ctx) + if err != nil { + return err + } + defer c.Release() + return c.Ping(ctx) +} diff --git a/vendor/github.com/jackc/pgx/v5/pgxpool/rows.go b/vendor/github.com/jackc/pgx/v5/pgxpool/rows.go new file mode 100644 index 00000000..f834b7ec --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgxpool/rows.go @@ -0,0 +1,116 @@ +package pgxpool + +import ( + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +type errRows struct { + err error +} + +func (errRows) Close() {} +func (e errRows) Err() error { return e.err } +func (errRows) CommandTag() pgconn.CommandTag { return pgconn.CommandTag{} } +func (errRows) FieldDescriptions() []pgconn.FieldDescription { return nil } +func (errRows) Next() bool { return false } +func (e errRows) Scan(dest ...any) error { return e.err } +func (e errRows) Values() ([]any, error) { return nil, e.err } +func (e errRows) RawValues() [][]byte { return nil } +func (e errRows) Conn() *pgx.Conn { return nil } + +type errRow struct { + err error +} + +func (e errRow) Scan(dest ...any) error { return e.err } + +type poolRows struct { + r pgx.Rows + c *Conn + err error +} + +func (rows *poolRows) Close() { + rows.r.Close() + if rows.c != nil { + rows.c.Release() + rows.c = nil + } +} + +func (rows *poolRows) Err() error { + if rows.err != nil { + return rows.err + } + return rows.r.Err() +} + +func (rows *poolRows) CommandTag() pgconn.CommandTag { + return rows.r.CommandTag() +} + +func (rows *poolRows) FieldDescriptions() []pgconn.FieldDescription { + return rows.r.FieldDescriptions() +} + +func (rows *poolRows) Next() bool { + if rows.err != nil { + return false + } + + n := rows.r.Next() + if !n { + rows.Close() + } + return n +} + +func (rows *poolRows) Scan(dest ...any) error { + err := rows.r.Scan(dest...) + if err != nil { + rows.Close() + } + return err +} + +func (rows *poolRows) Values() ([]any, error) { + values, err := rows.r.Values() + if err != nil { + rows.Close() + } + return values, err +} + +func (rows *poolRows) RawValues() [][]byte { + return rows.r.RawValues() +} + +func (rows *poolRows) Conn() *pgx.Conn { + return rows.r.Conn() +} + +type poolRow struct { + r pgx.Row + c *Conn + err error +} + +func (row *poolRow) Scan(dest ...any) error { + if row.err != nil { + return row.err + } + + panicked := true + defer func() { + if panicked && row.c != nil { + row.c.Release() + } + }() + err := row.r.Scan(dest...) + panicked = false + if row.c != nil { + row.c.Release() + } + return err +} diff --git a/vendor/github.com/jackc/pgx/v5/pgxpool/stat.go b/vendor/github.com/jackc/pgx/v5/pgxpool/stat.go new file mode 100644 index 00000000..cfa0c4c5 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgxpool/stat.go @@ -0,0 +1,84 @@ +package pgxpool + +import ( + "time" + + "github.com/jackc/puddle/v2" +) + +// Stat is a snapshot of Pool statistics. +type Stat struct { + s *puddle.Stat + newConnsCount int64 + lifetimeDestroyCount int64 + idleDestroyCount int64 +} + +// AcquireCount returns the cumulative count of successful acquires from the pool. +func (s *Stat) AcquireCount() int64 { + return s.s.AcquireCount() +} + +// AcquireDuration returns the total duration of all successful acquires from +// the pool. +func (s *Stat) AcquireDuration() time.Duration { + return s.s.AcquireDuration() +} + +// AcquiredConns returns the number of currently acquired connections in the pool. +func (s *Stat) AcquiredConns() int32 { + return s.s.AcquiredResources() +} + +// CanceledAcquireCount returns the cumulative count of acquires from the pool +// that were canceled by a context. +func (s *Stat) CanceledAcquireCount() int64 { + return s.s.CanceledAcquireCount() +} + +// ConstructingConns returns the number of conns with construction in progress in +// the pool. +func (s *Stat) ConstructingConns() int32 { + return s.s.ConstructingResources() +} + +// EmptyAcquireCount returns the cumulative count of successful acquires from the pool +// that waited for a resource to be released or constructed because the pool was +// empty. +func (s *Stat) EmptyAcquireCount() int64 { + return s.s.EmptyAcquireCount() +} + +// IdleConns returns the number of currently idle conns in the pool. +func (s *Stat) IdleConns() int32 { + return s.s.IdleResources() +} + +// MaxConns returns the maximum size of the pool. +func (s *Stat) MaxConns() int32 { + return s.s.MaxResources() +} + +// TotalConns returns the total number of resources currently in the pool. +// The value is the sum of ConstructingConns, AcquiredConns, and +// IdleConns. +func (s *Stat) TotalConns() int32 { + return s.s.TotalResources() +} + +// NewConnsCount returns the cumulative count of new connections opened. +func (s *Stat) NewConnsCount() int64 { + return s.newConnsCount +} + +// MaxLifetimeDestroyCount returns the cumulative count of connections destroyed +// because they exceeded MaxConnLifetime. +func (s *Stat) MaxLifetimeDestroyCount() int64 { + return s.lifetimeDestroyCount +} + +// MaxIdleDestroyCount returns the cumulative count of connections destroyed because +// they exceeded MaxConnIdleTime. +func (s *Stat) MaxIdleDestroyCount() int64 { + return s.idleDestroyCount +} diff --git a/vendor/github.com/jackc/pgx/v5/pgxpool/tracer.go b/vendor/github.com/jackc/pgx/v5/pgxpool/tracer.go new file mode 100644 index 00000000..78b9d15a --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgxpool/tracer.go @@ -0,0 +1,33 @@ +package pgxpool + +import ( + "context" + + "github.com/jackc/pgx/v5" +) + +// AcquireTracer traces Acquire. +type AcquireTracer interface { + // TraceAcquireStart is called at the beginning of Acquire. + // The returned context is used for the rest of the call and will be passed to the TraceAcquireEnd. + TraceAcquireStart(ctx context.Context, pool *Pool, data TraceAcquireStartData) context.Context + // TraceAcquireEnd is called when a connection has been acquired. + TraceAcquireEnd(ctx context.Context, pool *Pool, data TraceAcquireEndData) +} + +type TraceAcquireStartData struct{} + +type TraceAcquireEndData struct { + Conn *pgx.Conn + Err error +} + +// ReleaseTracer traces Release. +type ReleaseTracer interface { + // TraceRelease is called at the beginning of Release. + TraceRelease(pool *Pool, data TraceReleaseData) +} + +type TraceReleaseData struct { + Conn *pgx.Conn +} diff --git a/vendor/github.com/jackc/pgx/v5/pgxpool/tx.go b/vendor/github.com/jackc/pgx/v5/pgxpool/tx.go new file mode 100644 index 00000000..b49e7f4d --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/pgxpool/tx.go @@ -0,0 +1,83 @@ +package pgxpool + +import ( + "context" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +// Tx represents a database transaction acquired from a Pool. +type Tx struct { + t pgx.Tx + c *Conn +} + +// Begin starts a pseudo nested transaction implemented with a savepoint. +func (tx *Tx) Begin(ctx context.Context) (pgx.Tx, error) { + return tx.t.Begin(ctx) +} + +// Commit commits the transaction and returns the associated connection back to the Pool. Commit will return an error +// where errors.Is(ErrTxClosed) is true if the Tx is already closed, but is otherwise safe to call multiple times. If +// the commit fails with a rollback status (e.g. the transaction was already in a broken state) then ErrTxCommitRollback +// will be returned. +func (tx *Tx) Commit(ctx context.Context) error { + err := tx.t.Commit(ctx) + if tx.c != nil { + tx.c.Release() + tx.c = nil + } + return err +} + +// Rollback rolls back the transaction and returns the associated connection back to the Pool. Rollback will return +// where an error where errors.Is(ErrTxClosed) is true if the Tx is already closed, but is otherwise safe to call +// multiple times. Hence, defer tx.Rollback() is safe even if tx.Commit() will be called first in a non-error condition. +func (tx *Tx) Rollback(ctx context.Context) error { + err := tx.t.Rollback(ctx) + if tx.c != nil { + tx.c.Release() + tx.c = nil + } + return err +} + +func (tx *Tx) CopyFrom(ctx context.Context, tableName pgx.Identifier, columnNames []string, rowSrc pgx.CopyFromSource) (int64, error) { + return tx.t.CopyFrom(ctx, tableName, columnNames, rowSrc) +} + +func (tx *Tx) SendBatch(ctx context.Context, b *pgx.Batch) pgx.BatchResults { + return tx.t.SendBatch(ctx, b) +} + +func (tx *Tx) LargeObjects() pgx.LargeObjects { + return tx.t.LargeObjects() +} + +// Prepare creates a prepared statement with name and sql. If the name is empty, +// an anonymous prepared statement will be used. sql can contain placeholders +// for bound parameters. These placeholders are referenced positionally as $1, $2, etc. +// +// Prepare is idempotent; i.e. it is safe to call Prepare multiple times with the same +// name and sql arguments. This allows a code path to Prepare and Query/Exec without +// needing to first check whether the statement has already been prepared. +func (tx *Tx) Prepare(ctx context.Context, name, sql string) (*pgconn.StatementDescription, error) { + return tx.t.Prepare(ctx, name, sql) +} + +func (tx *Tx) Exec(ctx context.Context, sql string, arguments ...any) (pgconn.CommandTag, error) { + return tx.t.Exec(ctx, sql, arguments...) +} + +func (tx *Tx) Query(ctx context.Context, sql string, args ...any) (pgx.Rows, error) { + return tx.t.Query(ctx, sql, args...) +} + +func (tx *Tx) QueryRow(ctx context.Context, sql string, args ...any) pgx.Row { + return tx.t.QueryRow(ctx, sql, args...) +} + +func (tx *Tx) Conn() *pgx.Conn { + return tx.t.Conn() +} diff --git a/vendor/github.com/jackc/pgx/v5/rows.go b/vendor/github.com/jackc/pgx/v5/rows.go new file mode 100644 index 00000000..f23625d4 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/rows.go @@ -0,0 +1,856 @@ +package pgx + +import ( + "context" + "errors" + "fmt" + "reflect" + "strings" + "sync" + "time" + + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/pgx/v5/pgtype" +) + +// Rows is the result set returned from *Conn.Query. Rows must be closed before +// the *Conn can be used again. Rows are closed by explicitly calling Close(), +// calling Next() until it returns false, or when a fatal error occurs. +// +// Once a Rows is closed the only methods that may be called are Close(), Err(), +// and CommandTag(). +// +// Rows is an interface instead of a struct to allow tests to mock Query. However, +// adding a method to an interface is technically a breaking change. Because of this +// the Rows interface is partially excluded from semantic version requirements. +// Methods will not be removed or changed, but new methods may be added. +type Rows interface { + // Close closes the rows, making the connection ready for use again. It is safe + // to call Close after rows is already closed. + Close() + + // Err returns any error that occurred while reading. Err must only be called after the Rows is closed (either by + // calling Close or by Next returning false). If it is called early it may return nil even if there was an error + // executing the query. + Err() error + + // CommandTag returns the command tag from this query. It is only available after Rows is closed. + CommandTag() pgconn.CommandTag + + // FieldDescriptions returns the field descriptions of the columns. It may return nil. In particular this can occur + // when there was an error executing the query. + FieldDescriptions() []pgconn.FieldDescription + + // Next prepares the next row for reading. It returns true if there is another + // row and false if no more rows are available or a fatal error has occurred. + // It automatically closes rows when all rows are read. + // + // Callers should check rows.Err() after rows.Next() returns false to detect + // whether result-set reading ended prematurely due to an error. See + // Conn.Query for details. + // + // For simpler error handling, consider using the higher-level pgx v5 + // CollectRows() and ForEachRow() helpers instead. + Next() bool + + // Scan reads the values from the current row into dest values positionally. + // dest can include pointers to core types, values implementing the Scanner + // interface, and nil. nil will skip the value entirely. It is an error to + // call Scan without first calling Next() and checking that it returned true. + Scan(dest ...any) error + + // Values returns the decoded row values. As with Scan(), it is an error to + // call Values without first calling Next() and checking that it returned + // true. + Values() ([]any, error) + + // RawValues returns the unparsed bytes of the row values. The returned data is only valid until the next Next + // call or the Rows is closed. + RawValues() [][]byte + + // Conn returns the underlying *Conn on which the query was executed. This may return nil if Rows did not come from a + // *Conn (e.g. if it was created by RowsFromResultReader) + Conn() *Conn +} + +// Row is a convenience wrapper over Rows that is returned by QueryRow. +// +// Row is an interface instead of a struct to allow tests to mock QueryRow. However, +// adding a method to an interface is technically a breaking change. Because of this +// the Row interface is partially excluded from semantic version requirements. +// Methods will not be removed or changed, but new methods may be added. +type Row interface { + // Scan works the same as Rows. with the following exceptions. If no + // rows were found it returns ErrNoRows. If multiple rows are returned it + // ignores all but the first. + Scan(dest ...any) error +} + +// RowScanner scans an entire row at a time into the RowScanner. +type RowScanner interface { + // ScanRows scans the row. + ScanRow(rows Rows) error +} + +// connRow implements the Row interface for Conn.QueryRow. +type connRow baseRows + +func (r *connRow) Scan(dest ...any) (err error) { + rows := (*baseRows)(r) + + if rows.Err() != nil { + return rows.Err() + } + + for _, d := range dest { + if _, ok := d.(*pgtype.DriverBytes); ok { + rows.Close() + return fmt.Errorf("cannot scan into *pgtype.DriverBytes from QueryRow") + } + } + + if !rows.Next() { + if rows.Err() == nil { + return ErrNoRows + } + return rows.Err() + } + + rows.Scan(dest...) + rows.Close() + return rows.Err() +} + +// baseRows implements the Rows interface for Conn.Query. +type baseRows struct { + typeMap *pgtype.Map + resultReader *pgconn.ResultReader + + values [][]byte + + commandTag pgconn.CommandTag + err error + closed bool + + scanPlans []pgtype.ScanPlan + scanTypes []reflect.Type + + conn *Conn + multiResultReader *pgconn.MultiResultReader + + queryTracer QueryTracer + batchTracer BatchTracer + ctx context.Context + startTime time.Time + sql string + args []any + rowCount int +} + +func (rows *baseRows) FieldDescriptions() []pgconn.FieldDescription { + return rows.resultReader.FieldDescriptions() +} + +func (rows *baseRows) Close() { + if rows.closed { + return + } + + rows.closed = true + + if rows.resultReader != nil { + var closeErr error + rows.commandTag, closeErr = rows.resultReader.Close() + if rows.err == nil { + rows.err = closeErr + } + } + + if rows.multiResultReader != nil { + closeErr := rows.multiResultReader.Close() + if rows.err == nil { + rows.err = closeErr + } + } + + if rows.err != nil && rows.conn != nil && rows.sql != "" { + if sc := rows.conn.statementCache; sc != nil { + sc.Invalidate(rows.sql) + } + + if sc := rows.conn.descriptionCache; sc != nil { + sc.Invalidate(rows.sql) + } + } + + if rows.batchTracer != nil { + rows.batchTracer.TraceBatchQuery(rows.ctx, rows.conn, TraceBatchQueryData{SQL: rows.sql, Args: rows.args, CommandTag: rows.commandTag, Err: rows.err}) + } else if rows.queryTracer != nil { + rows.queryTracer.TraceQueryEnd(rows.ctx, rows.conn, TraceQueryEndData{rows.commandTag, rows.err}) + } +} + +func (rows *baseRows) CommandTag() pgconn.CommandTag { + return rows.commandTag +} + +func (rows *baseRows) Err() error { + return rows.err +} + +// fatal signals an error occurred after the query was sent to the server. It +// closes the rows automatically. +func (rows *baseRows) fatal(err error) { + if rows.err != nil { + return + } + + rows.err = err + rows.Close() +} + +func (rows *baseRows) Next() bool { + if rows.closed { + return false + } + + if rows.resultReader.NextRow() { + rows.rowCount++ + rows.values = rows.resultReader.Values() + return true + } else { + rows.Close() + return false + } +} + +func (rows *baseRows) Scan(dest ...any) error { + m := rows.typeMap + fieldDescriptions := rows.FieldDescriptions() + values := rows.values + + if len(fieldDescriptions) != len(values) { + err := fmt.Errorf("number of field descriptions must equal number of values, got %d and %d", len(fieldDescriptions), len(values)) + rows.fatal(err) + return err + } + + if len(dest) == 1 { + if rc, ok := dest[0].(RowScanner); ok { + err := rc.ScanRow(rows) + if err != nil { + rows.fatal(err) + } + return err + } + } + + if len(fieldDescriptions) != len(dest) { + err := fmt.Errorf("number of field descriptions must equal number of destinations, got %d and %d", len(fieldDescriptions), len(dest)) + rows.fatal(err) + return err + } + + if rows.scanPlans == nil { + rows.scanPlans = make([]pgtype.ScanPlan, len(values)) + rows.scanTypes = make([]reflect.Type, len(values)) + for i := range dest { + rows.scanPlans[i] = m.PlanScan(fieldDescriptions[i].DataTypeOID, fieldDescriptions[i].Format, dest[i]) + rows.scanTypes[i] = reflect.TypeOf(dest[i]) + } + } + + for i, dst := range dest { + if dst == nil { + continue + } + + if rows.scanTypes[i] != reflect.TypeOf(dst) { + rows.scanPlans[i] = m.PlanScan(fieldDescriptions[i].DataTypeOID, fieldDescriptions[i].Format, dest[i]) + rows.scanTypes[i] = reflect.TypeOf(dest[i]) + } + + err := rows.scanPlans[i].Scan(values[i], dst) + if err != nil { + err = ScanArgError{ColumnIndex: i, Err: err} + rows.fatal(err) + return err + } + } + + return nil +} + +func (rows *baseRows) Values() ([]any, error) { + if rows.closed { + return nil, errors.New("rows is closed") + } + + values := make([]any, 0, len(rows.FieldDescriptions())) + + for i := range rows.FieldDescriptions() { + buf := rows.values[i] + fd := &rows.FieldDescriptions()[i] + + if buf == nil { + values = append(values, nil) + continue + } + + if dt, ok := rows.typeMap.TypeForOID(fd.DataTypeOID); ok { + value, err := dt.Codec.DecodeValue(rows.typeMap, fd.DataTypeOID, fd.Format, buf) + if err != nil { + rows.fatal(err) + } + values = append(values, value) + } else { + switch fd.Format { + case TextFormatCode: + values = append(values, string(buf)) + case BinaryFormatCode: + newBuf := make([]byte, len(buf)) + copy(newBuf, buf) + values = append(values, newBuf) + default: + rows.fatal(errors.New("unknown format code")) + } + } + + if rows.Err() != nil { + return nil, rows.Err() + } + } + + return values, rows.Err() +} + +func (rows *baseRows) RawValues() [][]byte { + return rows.values +} + +func (rows *baseRows) Conn() *Conn { + return rows.conn +} + +type ScanArgError struct { + ColumnIndex int + Err error +} + +func (e ScanArgError) Error() string { + return fmt.Sprintf("can't scan into dest[%d]: %v", e.ColumnIndex, e.Err) +} + +func (e ScanArgError) Unwrap() error { + return e.Err +} + +// ScanRow decodes raw row data into dest. It can be used to scan rows read from the lower level pgconn interface. +// +// typeMap - OID to Go type mapping. +// fieldDescriptions - OID and format of values +// values - the raw data as returned from the PostgreSQL server +// dest - the destination that values will be decoded into +func ScanRow(typeMap *pgtype.Map, fieldDescriptions []pgconn.FieldDescription, values [][]byte, dest ...any) error { + if len(fieldDescriptions) != len(values) { + return fmt.Errorf("number of field descriptions must equal number of values, got %d and %d", len(fieldDescriptions), len(values)) + } + if len(fieldDescriptions) != len(dest) { + return fmt.Errorf("number of field descriptions must equal number of destinations, got %d and %d", len(fieldDescriptions), len(dest)) + } + + for i, d := range dest { + if d == nil { + continue + } + + err := typeMap.Scan(fieldDescriptions[i].DataTypeOID, fieldDescriptions[i].Format, values[i], d) + if err != nil { + return ScanArgError{ColumnIndex: i, Err: err} + } + } + + return nil +} + +// RowsFromResultReader returns a Rows that will read from values resultReader and decode with typeMap. It can be used +// to read from the lower level pgconn interface. +func RowsFromResultReader(typeMap *pgtype.Map, resultReader *pgconn.ResultReader) Rows { + return &baseRows{ + typeMap: typeMap, + resultReader: resultReader, + } +} + +// ForEachRow iterates through rows. For each row it scans into the elements of scans and calls fn. If any row +// fails to scan or fn returns an error the query will be aborted and the error will be returned. Rows will be closed +// when ForEachRow returns. +func ForEachRow(rows Rows, scans []any, fn func() error) (pgconn.CommandTag, error) { + defer rows.Close() + + for rows.Next() { + err := rows.Scan(scans...) + if err != nil { + return pgconn.CommandTag{}, err + } + + err = fn() + if err != nil { + return pgconn.CommandTag{}, err + } + } + + if err := rows.Err(); err != nil { + return pgconn.CommandTag{}, err + } + + return rows.CommandTag(), nil +} + +// CollectableRow is the subset of Rows methods that a RowToFunc is allowed to call. +type CollectableRow interface { + FieldDescriptions() []pgconn.FieldDescription + Scan(dest ...any) error + Values() ([]any, error) + RawValues() [][]byte +} + +// RowToFunc is a function that scans or otherwise converts row to a T. +type RowToFunc[T any] func(row CollectableRow) (T, error) + +// AppendRows iterates through rows, calling fn for each row, and appending the results into a slice of T. +// +// This function closes the rows automatically on return. +func AppendRows[T any, S ~[]T](slice S, rows Rows, fn RowToFunc[T]) (S, error) { + defer rows.Close() + + for rows.Next() { + value, err := fn(rows) + if err != nil { + return nil, err + } + slice = append(slice, value) + } + + if err := rows.Err(); err != nil { + return nil, err + } + + return slice, nil +} + +// CollectRows iterates through rows, calling fn for each row, and collecting the results into a slice of T. +// +// This function closes the rows automatically on return. +func CollectRows[T any](rows Rows, fn RowToFunc[T]) ([]T, error) { + return AppendRows([]T{}, rows, fn) +} + +// CollectOneRow calls fn for the first row in rows and returns the result. If no rows are found returns an error where errors.Is(ErrNoRows) is true. +// CollectOneRow is to CollectRows as QueryRow is to Query. +// +// This function closes the rows automatically on return. +func CollectOneRow[T any](rows Rows, fn RowToFunc[T]) (T, error) { + defer rows.Close() + + var value T + var err error + + if !rows.Next() { + if err = rows.Err(); err != nil { + return value, err + } + return value, ErrNoRows + } + + value, err = fn(rows) + if err != nil { + return value, err + } + + rows.Close() + return value, rows.Err() +} + +// CollectExactlyOneRow calls fn for the first row in rows and returns the result. +// - If no rows are found returns an error where errors.Is(ErrNoRows) is true. +// - If more than 1 row is found returns an error where errors.Is(ErrTooManyRows) is true. +// +// This function closes the rows automatically on return. +func CollectExactlyOneRow[T any](rows Rows, fn RowToFunc[T]) (T, error) { + defer rows.Close() + + var ( + err error + value T + ) + + if !rows.Next() { + if err = rows.Err(); err != nil { + return value, err + } + + return value, ErrNoRows + } + + value, err = fn(rows) + if err != nil { + return value, err + } + + if rows.Next() { + var zero T + + return zero, ErrTooManyRows + } + + return value, rows.Err() +} + +// RowTo returns a T scanned from row. +func RowTo[T any](row CollectableRow) (T, error) { + var value T + err := row.Scan(&value) + return value, err +} + +// RowTo returns a the address of a T scanned from row. +func RowToAddrOf[T any](row CollectableRow) (*T, error) { + var value T + err := row.Scan(&value) + return &value, err +} + +// RowToMap returns a map scanned from row. +func RowToMap(row CollectableRow) (map[string]any, error) { + var value map[string]any + err := row.Scan((*mapRowScanner)(&value)) + return value, err +} + +type mapRowScanner map[string]any + +func (rs *mapRowScanner) ScanRow(rows Rows) error { + values, err := rows.Values() + if err != nil { + return err + } + + *rs = make(mapRowScanner, len(values)) + + for i := range values { + (*rs)[string(rows.FieldDescriptions()[i].Name)] = values[i] + } + + return nil +} + +// RowToStructByPos returns a T scanned from row. T must be a struct. T must have the same number a public fields as row +// has fields. The row and T fields will be matched by position. If the "db" struct tag is "-" then the field will be +// ignored. +func RowToStructByPos[T any](row CollectableRow) (T, error) { + var value T + err := (&positionalStructRowScanner{ptrToStruct: &value}).ScanRow(row) + return value, err +} + +// RowToAddrOfStructByPos returns the address of a T scanned from row. T must be a struct. T must have the same number a +// public fields as row has fields. The row and T fields will be matched by position. If the "db" struct tag is "-" then +// the field will be ignored. +func RowToAddrOfStructByPos[T any](row CollectableRow) (*T, error) { + var value T + err := (&positionalStructRowScanner{ptrToStruct: &value}).ScanRow(row) + return &value, err +} + +type positionalStructRowScanner struct { + ptrToStruct any +} + +func (rs *positionalStructRowScanner) ScanRow(rows CollectableRow) error { + typ := reflect.TypeOf(rs.ptrToStruct).Elem() + fields := lookupStructFields(typ) + if len(rows.RawValues()) > len(fields) { + return fmt.Errorf( + "got %d values, but dst struct has only %d fields", + len(rows.RawValues()), + len(fields), + ) + } + scanTargets := setupStructScanTargets(rs.ptrToStruct, fields) + return rows.Scan(scanTargets...) +} + +// Map from reflect.Type -> []structRowField +var positionalStructFieldMap sync.Map + +func lookupStructFields(t reflect.Type) []structRowField { + if cached, ok := positionalStructFieldMap.Load(t); ok { + return cached.([]structRowField) + } + + fieldStack := make([]int, 0, 1) + fields := computeStructFields(t, make([]structRowField, 0, t.NumField()), &fieldStack) + fieldsIface, _ := positionalStructFieldMap.LoadOrStore(t, fields) + return fieldsIface.([]structRowField) +} + +func computeStructFields( + t reflect.Type, + fields []structRowField, + fieldStack *[]int, +) []structRowField { + tail := len(*fieldStack) + *fieldStack = append(*fieldStack, 0) + for i := 0; i < t.NumField(); i++ { + sf := t.Field(i) + (*fieldStack)[tail] = i + // Handle anonymous struct embedding, but do not try to handle embedded pointers. + if sf.Anonymous && sf.Type.Kind() == reflect.Struct { + fields = computeStructFields(sf.Type, fields, fieldStack) + } else if sf.PkgPath == "" { + dbTag, _ := sf.Tag.Lookup(structTagKey) + if dbTag == "-" { + // Field is ignored, skip it. + continue + } + fields = append(fields, structRowField{ + path: append([]int(nil), *fieldStack...), + }) + } + } + *fieldStack = (*fieldStack)[:tail] + return fields +} + +// RowToStructByName returns a T scanned from row. T must be a struct. T must have the same number of named public +// fields as row has fields. The row and T fields will be matched by name. The match is case-insensitive. The database +// column name can be overridden with a "db" struct tag. If the "db" struct tag is "-" then the field will be ignored. +func RowToStructByName[T any](row CollectableRow) (T, error) { + var value T + err := (&namedStructRowScanner{ptrToStruct: &value}).ScanRow(row) + return value, err +} + +// RowToAddrOfStructByName returns the address of a T scanned from row. T must be a struct. T must have the same number +// of named public fields as row has fields. The row and T fields will be matched by name. The match is +// case-insensitive. The database column name can be overridden with a "db" struct tag. If the "db" struct tag is "-" +// then the field will be ignored. +func RowToAddrOfStructByName[T any](row CollectableRow) (*T, error) { + var value T + err := (&namedStructRowScanner{ptrToStruct: &value}).ScanRow(row) + return &value, err +} + +// RowToStructByNameLax returns a T scanned from row. T must be a struct. T must have greater than or equal number of named public +// fields as row has fields. The row and T fields will be matched by name. The match is case-insensitive. The database +// column name can be overridden with a "db" struct tag. If the "db" struct tag is "-" then the field will be ignored. +func RowToStructByNameLax[T any](row CollectableRow) (T, error) { + var value T + err := (&namedStructRowScanner{ptrToStruct: &value, lax: true}).ScanRow(row) + return value, err +} + +// RowToAddrOfStructByNameLax returns the address of a T scanned from row. T must be a struct. T must have greater than or +// equal number of named public fields as row has fields. The row and T fields will be matched by name. The match is +// case-insensitive. The database column name can be overridden with a "db" struct tag. If the "db" struct tag is "-" +// then the field will be ignored. +func RowToAddrOfStructByNameLax[T any](row CollectableRow) (*T, error) { + var value T + err := (&namedStructRowScanner{ptrToStruct: &value, lax: true}).ScanRow(row) + return &value, err +} + +type namedStructRowScanner struct { + ptrToStruct any + lax bool +} + +func (rs *namedStructRowScanner) ScanRow(rows CollectableRow) error { + typ := reflect.TypeOf(rs.ptrToStruct).Elem() + fldDescs := rows.FieldDescriptions() + namedStructFields, err := lookupNamedStructFields(typ, fldDescs) + if err != nil { + return err + } + if !rs.lax && namedStructFields.missingField != "" { + return fmt.Errorf("cannot find field %s in returned row", namedStructFields.missingField) + } + fields := namedStructFields.fields + scanTargets := setupStructScanTargets(rs.ptrToStruct, fields) + return rows.Scan(scanTargets...) +} + +// Map from namedStructFieldMap -> *namedStructFields +var namedStructFieldMap sync.Map + +type namedStructFieldsKey struct { + t reflect.Type + colNames string +} + +type namedStructFields struct { + fields []structRowField + // missingField is the first field from the struct without a corresponding row field. + // This is used to construct the correct error message for non-lax queries. + missingField string +} + +func lookupNamedStructFields( + t reflect.Type, + fldDescs []pgconn.FieldDescription, +) (*namedStructFields, error) { + key := namedStructFieldsKey{ + t: t, + colNames: joinFieldNames(fldDescs), + } + if cached, ok := namedStructFieldMap.Load(key); ok { + return cached.(*namedStructFields), nil + } + + // We could probably do two-levels of caching, where we compute the key -> fields mapping + // for a type only once, cache it by type, then use that to compute the column -> fields + // mapping for a given set of columns. + fieldStack := make([]int, 0, 1) + fields, missingField := computeNamedStructFields( + fldDescs, + t, + make([]structRowField, len(fldDescs)), + &fieldStack, + ) + for i, f := range fields { + if f.path == nil { + return nil, fmt.Errorf( + "struct doesn't have corresponding row field %s", + fldDescs[i].Name, + ) + } + } + + fieldsIface, _ := namedStructFieldMap.LoadOrStore( + key, + &namedStructFields{fields: fields, missingField: missingField}, + ) + return fieldsIface.(*namedStructFields), nil +} + +func joinFieldNames(fldDescs []pgconn.FieldDescription) string { + switch len(fldDescs) { + case 0: + return "" + case 1: + return fldDescs[0].Name + } + + totalSize := len(fldDescs) - 1 // Space for separator bytes. + for _, d := range fldDescs { + totalSize += len(d.Name) + } + var b strings.Builder + b.Grow(totalSize) + b.WriteString(fldDescs[0].Name) + for _, d := range fldDescs[1:] { + b.WriteByte(0) // Join with NUL byte as it's (presumably) not a valid column character. + b.WriteString(d.Name) + } + return b.String() +} + +func computeNamedStructFields( + fldDescs []pgconn.FieldDescription, + t reflect.Type, + fields []structRowField, + fieldStack *[]int, +) ([]structRowField, string) { + var missingField string + tail := len(*fieldStack) + *fieldStack = append(*fieldStack, 0) + for i := 0; i < t.NumField(); i++ { + sf := t.Field(i) + (*fieldStack)[tail] = i + if sf.PkgPath != "" && !sf.Anonymous { + // Field is unexported, skip it. + continue + } + // Handle anonymous struct embedding, but do not try to handle embedded pointers. + if sf.Anonymous && sf.Type.Kind() == reflect.Struct { + var missingSubField string + fields, missingSubField = computeNamedStructFields( + fldDescs, + sf.Type, + fields, + fieldStack, + ) + if missingField == "" { + missingField = missingSubField + } + } else { + dbTag, dbTagPresent := sf.Tag.Lookup(structTagKey) + if dbTagPresent { + dbTag, _, _ = strings.Cut(dbTag, ",") + } + if dbTag == "-" { + // Field is ignored, skip it. + continue + } + colName := dbTag + if !dbTagPresent { + colName = sf.Name + } + fpos := fieldPosByName(fldDescs, colName, !dbTagPresent) + if fpos == -1 { + if missingField == "" { + missingField = colName + } + continue + } + fields[fpos] = structRowField{ + path: append([]int(nil), *fieldStack...), + } + } + } + *fieldStack = (*fieldStack)[:tail] + + return fields, missingField +} + +const structTagKey = "db" + +func fieldPosByName(fldDescs []pgconn.FieldDescription, field string, normalize bool) (i int) { + i = -1 + + if normalize { + field = strings.ReplaceAll(field, "_", "") + } + for i, desc := range fldDescs { + if normalize { + if strings.EqualFold(strings.ReplaceAll(desc.Name, "_", ""), field) { + return i + } + } else { + if desc.Name == field { + return i + } + } + } + return +} + +// structRowField describes a field of a struct. +// +// TODO: It would be a bit more efficient to track the path using the pointer +// offset within the (outermost) struct and use unsafe.Pointer arithmetic to +// construct references when scanning rows. However, it's not clear it's worth +// using unsafe for this. +type structRowField struct { + path []int +} + +func setupStructScanTargets(receiver any, fields []structRowField) []any { + scanTargets := make([]any, len(fields)) + v := reflect.ValueOf(receiver).Elem() + for i, f := range fields { + scanTargets[i] = v.FieldByIndex(f.path).Addr().Interface() + } + return scanTargets +} diff --git a/vendor/github.com/jackc/pgx/v4/stdlib/sql.go b/vendor/github.com/jackc/pgx/v5/stdlib/sql.go similarity index 70% rename from vendor/github.com/jackc/pgx/v4/stdlib/sql.go rename to vendor/github.com/jackc/pgx/v5/stdlib/sql.go index f43ae324..c1d00ab4 100644 --- a/vendor/github.com/jackc/pgx/v4/stdlib/sql.go +++ b/vendor/github.com/jackc/pgx/v5/stdlib/sql.go @@ -4,48 +4,65 @@ // // db, err := sql.Open("pgx", "postgres://pgx_md5:secret@localhost:5432/pgx_test?sslmode=disable") // if err != nil { -// return err +// return err // } // -// Or from a DSN string. +// Or from a keyword/value string. // // db, err := sql.Open("pgx", "user=postgres password=secret host=localhost port=5432 database=pgx_test sslmode=disable") // if err != nil { -// return err +// return err // } // +// Or from a *pgxpool.Pool. +// +// pool, err := pgxpool.New(context.Background(), os.Getenv("DATABASE_URL")) +// if err != nil { +// return err +// } +// +// db := stdlib.OpenDBFromPool(pool) +// // Or a pgx.ConnConfig can be used to set configuration not accessible via connection string. In this case the // pgx.ConnConfig must first be registered with the driver. This registration returns a connection string which is used // with sql.Open. // // connConfig, _ := pgx.ParseConfig(os.Getenv("DATABASE_URL")) -// connConfig.Logger = myLogger +// connConfig.Tracer = &tracelog.TraceLog{Logger: myLogger, LogLevel: tracelog.LogLevelInfo} // connStr := stdlib.RegisterConnConfig(connConfig) // db, _ := sql.Open("pgx", connStr) // -// pgx uses standard PostgreSQL positional parameters in queries. e.g. $1, $2. -// It does not support named parameters. +// pgx uses standard PostgreSQL positional parameters in queries. e.g. $1, $2. It does not support named parameters. // // db.QueryRow("select * from users where id=$1", userID) // -// In Go 1.13 and above (*sql.Conn) Raw() can be used to get a *pgx.Conn from the standard -// database/sql.DB connection pool. This allows operations that use pgx specific functionality. +// (*sql.Conn) Raw() can be used to get a *pgx.Conn from the standard database/sql.DB connection pool. This allows +// operations that use pgx specific functionality. // // // Given db is a *sql.DB // conn, err := db.Conn(context.Background()) // if err != nil { -// // handle error from acquiring connection from DB pool +// // handle error from acquiring connection from DB pool // } // -// err = conn.Raw(func(driverConn interface{}) error { -// conn := driverConn.(*stdlib.Conn).Conn() // conn is a *pgx.Conn -// // Do pgx specific stuff with conn -// conn.CopyFrom(...) -// return nil +// err = conn.Raw(func(driverConn any) error { +// conn := driverConn.(*stdlib.Conn).Conn() // conn is a *pgx.Conn +// // Do pgx specific stuff with conn +// conn.CopyFrom(...) +// return nil // }) // if err != nil { -// // handle error that occurred while using *pgx.Conn +// // handle error that occurred while using *pgx.Conn // } +// +// # PostgreSQL Specific Data Types +// +// The pgtype package provides support for PostgreSQL specific types. *pgtype.Map.SQLScanner is an adapter that makes +// these types usable as a sql.Scanner. +// +// m := pgtype.NewMap() +// var a []int64 +// err := db.QueryRow("select '{1,2,3}'::bigint[]").Scan(m.SQLScanner(&a)) package stdlib import ( @@ -58,14 +75,16 @@ import ( "math" "math/rand" "reflect" + "slices" "strconv" "strings" "sync" "time" - "github.com/jackc/pgconn" - "github.com/jackc/pgtype" - "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/pgx/v5/pgtype" + "github.com/jackc/pgx/v5/pgxpool" ) // Only intrinsic types should be binary format with database/sql. @@ -73,24 +92,17 @@ var databaseSQLResultFormats pgx.QueryResultFormatsByOID var pgxDriver *Driver -type ctxKey int - -var ctxKeyFakeTx ctxKey = 0 - -var ErrNotPgx = errors.New("not pgx *sql.DB") - func init() { pgxDriver = &Driver{ configs: make(map[string]*pgx.ConnConfig), } - fakeTxConns = make(map[*pgx.Conn]*sql.Tx) // if pgx driver was already registered by different pgx major version then we // skip registration under the default name. - if !contains(sql.Drivers(), "pgx") { + if !slices.Contains(sql.Drivers(), "pgx") { sql.Register("pgx", pgxDriver) } - sql.Register("pgx/v4", pgxDriver) + sql.Register("pgx/v5", pgxDriver) databaseSQLResultFormats = pgx.QueryResultFormatsByOID{ pgtype.BoolOID: 1, @@ -109,34 +121,18 @@ func init() { } } -// TODO replace by slices.Contains when experimental package will be merged to stdlib -// https://pkg.go.dev/golang.org/x/exp/slices#Contains -func contains(list []string, y string) bool { - for _, x := range list { - if x == y { - return true - } - } - return false -} - -var ( - fakeTxMutex sync.Mutex - fakeTxConns map[*pgx.Conn]*sql.Tx -) - // OptionOpenDB options for configuring the driver when opening a new db pool. type OptionOpenDB func(*connector) // OptionBeforeConnect provides a callback for before connect. It is passed a shallow copy of the ConnConfig that will -// be used to connect, so only its immediate members should be modified. +// be used to connect, so only its immediate members should be modified. Used only if db is opened with *pgx.ConnConfig. func OptionBeforeConnect(bc func(context.Context, *pgx.ConnConfig) error) OptionOpenDB { return func(dc *connector) { dc.BeforeConnect = bc } } -// OptionAfterConnect provides a callback for after connect. +// OptionAfterConnect provides a callback for after connect. Used only if db is opened with *pgx.ConnConfig. func OptionAfterConnect(ac func(context.Context, *pgx.Conn) error) OptionOpenDB { return func(dc *connector) { dc.AfterConnect = ac @@ -161,7 +157,7 @@ func RandomizeHostOrderFunc(ctx context.Context, connConfig *pgx.ConnConfig) err return nil } - newFallbacks := append([]*pgconn.FallbackConfig{&pgconn.FallbackConfig{ + newFallbacks := append([]*pgconn.FallbackConfig{{ Host: connConfig.Host, Port: connConfig.Port, TLSConfig: connConfig.TLSConfig, @@ -195,13 +191,42 @@ func GetConnector(config pgx.ConnConfig, opts ...OptionOpenDB) driver.Connector return c } +// GetPoolConnector creates a new driver.Connector from the given *pgxpool.Pool. By using this be sure to set the +// maximum idle connections of the *sql.DB created with this connector to zero since they must be managed from the +// *pgxpool.Pool. This is required to avoid acquiring all the connections from the pgxpool and starving any direct +// users of the pgxpool. +func GetPoolConnector(pool *pgxpool.Pool, opts ...OptionOpenDB) driver.Connector { + c := connector{ + pool: pool, + ResetSession: func(context.Context, *pgx.Conn) error { return nil }, // noop reset session by default + driver: pgxDriver, + } + + for _, opt := range opts { + opt(&c) + } + + return c +} + func OpenDB(config pgx.ConnConfig, opts ...OptionOpenDB) *sql.DB { c := GetConnector(config, opts...) return sql.OpenDB(c) } +// OpenDBFromPool creates a new *sql.DB from the given *pgxpool.Pool. Note that this method automatically sets the +// maximum number of idle connections in *sql.DB to zero, since they must be managed from the *pgxpool.Pool. This is +// required to avoid acquiring all the connections from the pgxpool and starving any direct users of the pgxpool. +func OpenDBFromPool(pool *pgxpool.Pool, opts ...OptionOpenDB) *sql.DB { + c := GetPoolConnector(pool, opts...) + db := sql.OpenDB(c) + db.SetMaxIdleConns(0) + return db +} + type connector struct { pgx.ConnConfig + pool *pgxpool.Pool BeforeConnect func(context.Context, *pgx.ConnConfig) error // function to call before creation of every new connection AfterConnect func(context.Context, *pgx.Conn) error // function to call after creation of every new connection ResetSession func(context.Context, *pgx.Conn) error // function is called before a connection is reused @@ -211,25 +236,53 @@ type connector struct { // Connect implement driver.Connector interface func (c connector) Connect(ctx context.Context) (driver.Conn, error) { var ( - err error - conn *pgx.Conn + connConfig pgx.ConnConfig + conn *pgx.Conn + close func(context.Context) error + err error ) - // Create a shallow copy of the config, so that BeforeConnect can safely modify it - connConfig := c.ConnConfig - if err = c.BeforeConnect(ctx, &connConfig); err != nil { - return nil, err - } + if c.pool == nil { + // Create a shallow copy of the config, so that BeforeConnect can safely modify it + connConfig = c.ConnConfig - if conn, err = pgx.ConnectConfig(ctx, &connConfig); err != nil { - return nil, err - } + if err = c.BeforeConnect(ctx, &connConfig); err != nil { + return nil, err + } - if err = c.AfterConnect(ctx, conn); err != nil { - return nil, err + if conn, err = pgx.ConnectConfig(ctx, &connConfig); err != nil { + return nil, err + } + + if err = c.AfterConnect(ctx, conn); err != nil { + return nil, err + } + + close = conn.Close + } else { + var pconn *pgxpool.Conn + + pconn, err = c.pool.Acquire(ctx) + if err != nil { + return nil, err + } + + conn = pconn.Conn() + + close = func(_ context.Context) error { + pconn.Release() + return nil + } } - return &Conn{conn: conn, driver: c.driver, connConfig: connConfig, resetSessionFunc: c.ResetSession}, nil + return &Conn{ + conn: conn, + close: close, + driver: c.driver, + connConfig: connConfig, + resetSessionFunc: c.ResetSession, + psRefCounts: make(map[*pgconn.StatementDescription]int), + }, nil } // Driver implement driver.Connector interface @@ -306,9 +359,11 @@ func (dc *driverConnector) Connect(ctx context.Context) (driver.Conn, error) { c := &Conn{ conn: conn, + close: conn.Close, driver: dc.driver, connConfig: *connConfig, resetSessionFunc: func(context.Context, *pgx.Conn) error { return nil }, + psRefCounts: make(map[*pgconn.StatementDescription]int), } return c, nil @@ -329,11 +384,20 @@ func UnregisterConnConfig(connStr string) { } type Conn struct { - conn *pgx.Conn - psCount int64 // Counter used for creating unique prepared statement names - driver *Driver - connConfig pgx.ConnConfig - resetSessionFunc func(context.Context, *pgx.Conn) error // Function is called before a connection is reused + conn *pgx.Conn + close func(context.Context) error + driver *Driver + connConfig pgx.ConnConfig + resetSessionFunc func(context.Context, *pgx.Conn) error // Function is called before a connection is reused + lastResetSessionTime time.Time + + // psRefCounts contains reference counts for prepared statements. Prepare uses the underlying pgx logic to generate + // deterministic statement names from the statement text. If this query has already been prepared then the existing + // *pgconn.StatementDescription will be returned. However, this means that if Close is called on the returned Stmt + // then the underlying prepared statement will be closed even when the underlying prepared statement is still in use + // by another database/sql Stmt. To prevent this psRefCounts keeps track of how many database/sql statements are using + // the same underlying statement and only closes the underlying statement when the reference count reaches 0. + psRefCounts map[*pgconn.StatementDescription]int } // Conn returns the underlying *pgx.Conn @@ -350,13 +414,11 @@ func (c *Conn) PrepareContext(ctx context.Context, query string) (driver.Stmt, e return nil, driver.ErrBadConn } - name := fmt.Sprintf("pgx_%d", c.psCount) - c.psCount++ - - sd, err := c.conn.Prepare(ctx, name, query) + sd, err := c.conn.Prepare(ctx, query, query) if err != nil { return nil, err } + c.psRefCounts[sd]++ return &Stmt{sd: sd, conn: c}, nil } @@ -364,7 +426,7 @@ func (c *Conn) PrepareContext(ctx context.Context, query string) (driver.Stmt, e func (c *Conn) Close() error { ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() - return c.conn.Close(ctx) + return c.close(ctx) } func (c *Conn) Begin() (driver.Tx, error) { @@ -376,11 +438,6 @@ func (c *Conn) BeginTx(ctx context.Context, opts driver.TxOptions) (driver.Tx, e return nil, driver.ErrBadConn } - if pconn, ok := ctx.Value(ctxKeyFakeTx).(**pgx.Conn); ok { - *pconn = c.conn - return fakeTx{}, nil - } - var pgxOpts pgx.TxOptions switch sql.IsolationLevel(opts.Isolation) { case sql.LevelDefault: @@ -430,7 +487,7 @@ func (c *Conn) QueryContext(ctx context.Context, query string, argsV []driver.Na return nil, driver.ErrBadConn } - args := []interface{}{databaseSQLResultFormats} + args := []any{databaseSQLResultFormats} args = append(args, namedValueToInterface(argsV)...) rows, err := c.conn.Query(ctx, query, args...) @@ -476,6 +533,14 @@ func (c *Conn) ResetSession(ctx context.Context) error { return driver.ErrBadConn } + now := time.Now() + if now.Sub(c.lastResetSessionTime) > time.Second { + if err := c.conn.PgConn().Ping(ctx); err != nil { + return driver.ErrBadConn + } + } + c.lastResetSessionTime = now + return c.resetSessionFunc(ctx, c.conn) } @@ -487,7 +552,16 @@ type Stmt struct { func (s *Stmt) Close() error { ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) defer cancel() - return s.conn.conn.Deallocate(ctx, s.sd.Name) + + refCount := s.conn.psRefCounts[s.sd] + if refCount == 1 { + delete(s.conn.psRefCounts, s.sd) + } else { + s.conn.psRefCounts[s.sd]-- + return nil + } + + return s.conn.conn.Deallocate(ctx, s.sd.SQL) } func (s *Stmt) NumInput() int { @@ -499,7 +573,7 @@ func (s *Stmt) Exec(argsV []driver.Value) (driver.Result, error) { } func (s *Stmt) ExecContext(ctx context.Context, argsV []driver.NamedValue) (driver.Result, error) { - return s.conn.ExecContext(ctx, s.sd.Name, argsV) + return s.conn.ExecContext(ctx, s.sd.SQL, argsV) } func (s *Stmt) Query(argsV []driver.Value) (driver.Rows, error) { @@ -507,7 +581,7 @@ func (s *Stmt) Query(argsV []driver.Value) (driver.Rows, error) { } func (s *Stmt) QueryContext(ctx context.Context, argsV []driver.NamedValue) (driver.Rows, error) { - return s.conn.QueryContext(ctx, s.sd.Name, argsV) + return s.conn.QueryContext(ctx, s.sd.SQL, argsV) } type rowValueFunc func(src []byte) (driver.Value, error) @@ -536,7 +610,7 @@ func (r *Rows) Columns() []string { // ColumnTypeDatabaseTypeName returns the database system type name. If the name is unknown the OID is returned. func (r *Rows) ColumnTypeDatabaseTypeName(index int) string { - if dt, ok := r.conn.conn.ConnInfo().DataTypeForOID(r.rows.FieldDescriptions()[index].DataTypeOID); ok { + if dt, ok := r.conn.conn.TypeMap().TypeForOID(r.rows.FieldDescriptions()[index].DataTypeOID); ok { return strings.ToUpper(dt.Name) } @@ -611,7 +685,7 @@ func (r *Rows) Close() error { } func (r *Rows) Next(dest []driver.Value) error { - ci := r.conn.conn.ConnInfo() + m := r.conn.conn.TypeMap() fieldDescriptions := r.rows.FieldDescriptions() if r.valueFuncs == nil { @@ -624,23 +698,23 @@ func (r *Rows) Next(dest []driver.Value) error { switch fd.DataTypeOID { case pgtype.BoolOID: var d bool - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) return d, err } case pgtype.ByteaOID: var d []byte - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) return d, err } - case pgtype.CIDOID: - var d pgtype.CID - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + case pgtype.CIDOID, pgtype.OIDOID, pgtype.XIDOID: + var d pgtype.Uint32 + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) if err != nil { return nil, err } @@ -648,9 +722,9 @@ func (r *Rows) Next(dest []driver.Value) error { } case pgtype.DateOID: var d pgtype.Date - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) if err != nil { return nil, err } @@ -658,74 +732,54 @@ func (r *Rows) Next(dest []driver.Value) error { } case pgtype.Float4OID: var d float32 - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) return float64(d), err } case pgtype.Float8OID: var d float64 - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) return d, err } case pgtype.Int2OID: var d int16 - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) return int64(d), err } case pgtype.Int4OID: var d int32 - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) return int64(d), err } case pgtype.Int8OID: var d int64 - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) return d, err } - case pgtype.JSONOID: - var d pgtype.JSON - scanPlan := ci.PlanScan(dataTypeOID, format, &d) - r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) - if err != nil { - return nil, err - } - return d.Value() - } - case pgtype.JSONBOID: - var d pgtype.JSONB - scanPlan := ci.PlanScan(dataTypeOID, format, &d) - r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) - if err != nil { - return nil, err - } - return d.Value() - } - case pgtype.OIDOID: - var d pgtype.OIDValue - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + case pgtype.JSONOID, pgtype.JSONBOID: + var d []byte + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) if err != nil { return nil, err } - return d.Value() + return d, nil } case pgtype.TimestampOID: var d pgtype.Timestamp - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) if err != nil { return nil, err } @@ -733,29 +787,29 @@ func (r *Rows) Next(dest []driver.Value) error { } case pgtype.TimestamptzOID: var d pgtype.Timestamptz - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) if err != nil { return nil, err } return d.Value() } - case pgtype.XIDOID: - var d pgtype.XID - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + case pgtype.XMLOID: + var d []byte + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) if err != nil { return nil, err } - return d.Value() + return d, nil } default: var d string - scanPlan := ci.PlanScan(dataTypeOID, format, &d) + scanPlan := m.PlanScan(dataTypeOID, format, &d) r.valueFuncs[i] = func(src []byte) (driver.Value, error) { - err := scanPlan.Scan(ci, dataTypeOID, format, src, &d) + err := scanPlan.Scan(src, &d) return d, err } } @@ -783,7 +837,7 @@ func (r *Rows) Next(dest []driver.Value) error { var err error dest[i], err = r.valueFuncs[i](rv) if err != nil { - return fmt.Errorf("convert field %d failed: %v", i, err) + return fmt.Errorf("convert field %d failed: %w", i, err) } } else { dest[i] = nil @@ -793,11 +847,11 @@ func (r *Rows) Next(dest []driver.Value) error { return nil } -func valueToInterface(argsV []driver.Value) []interface{} { - args := make([]interface{}, 0, len(argsV)) +func valueToInterface(argsV []driver.Value) []any { + args := make([]any, 0, len(argsV)) for _, v := range argsV { if v != nil { - args = append(args, v.(interface{})) + args = append(args, v.(any)) } else { args = append(args, nil) } @@ -805,11 +859,11 @@ func valueToInterface(argsV []driver.Value) []interface{} { return args } -func namedValueToInterface(argsV []driver.NamedValue) []interface{} { - args := make([]interface{}, 0, len(argsV)) +func namedValueToInterface(argsV []driver.NamedValue) []any { + args := make([]any, 0, len(argsV)) for _, v := range argsV { if v.Value != nil { - args = append(args, v.Value.(interface{})) + args = append(args, v.Value.(any)) } else { args = append(args, nil) } @@ -825,55 +879,3 @@ type wrapTx struct { func (wtx wrapTx) Commit() error { return wtx.tx.Commit(wtx.ctx) } func (wtx wrapTx) Rollback() error { return wtx.tx.Rollback(wtx.ctx) } - -type fakeTx struct{} - -func (fakeTx) Commit() error { return nil } - -func (fakeTx) Rollback() error { return nil } - -// AcquireConn acquires a *pgx.Conn from database/sql connection pool. It must be released with ReleaseConn. -// -// In Go 1.13 this functionality has been incorporated into the standard library in the db.Conn.Raw() method. -func AcquireConn(db *sql.DB) (*pgx.Conn, error) { - var conn *pgx.Conn - ctx := context.WithValue(context.Background(), ctxKeyFakeTx, &conn) - tx, err := db.BeginTx(ctx, nil) - if err != nil { - return nil, err - } - if conn == nil { - tx.Rollback() - return nil, ErrNotPgx - } - - fakeTxMutex.Lock() - fakeTxConns[conn] = tx - fakeTxMutex.Unlock() - - return conn, nil -} - -// ReleaseConn releases a *pgx.Conn acquired with AcquireConn. -func ReleaseConn(db *sql.DB, conn *pgx.Conn) error { - var tx *sql.Tx - var ok bool - - if conn.PgConn().IsBusy() || conn.PgConn().TxStatus() != 'I' { - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - defer cancel() - conn.Close(ctx) - } - - fakeTxMutex.Lock() - tx, ok = fakeTxConns[conn] - if ok { - delete(fakeTxConns, conn) - fakeTxMutex.Unlock() - } else { - fakeTxMutex.Unlock() - return fmt.Errorf("can't release conn that is not acquired") - } - - return tx.Rollback() -} diff --git a/vendor/github.com/jackc/pgx/v5/tracer.go b/vendor/github.com/jackc/pgx/v5/tracer.go new file mode 100644 index 00000000..58ca99f7 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/tracer.go @@ -0,0 +1,107 @@ +package pgx + +import ( + "context" + + "github.com/jackc/pgx/v5/pgconn" +) + +// QueryTracer traces Query, QueryRow, and Exec. +type QueryTracer interface { + // TraceQueryStart is called at the beginning of Query, QueryRow, and Exec calls. The returned context is used for the + // rest of the call and will be passed to TraceQueryEnd. + TraceQueryStart(ctx context.Context, conn *Conn, data TraceQueryStartData) context.Context + + TraceQueryEnd(ctx context.Context, conn *Conn, data TraceQueryEndData) +} + +type TraceQueryStartData struct { + SQL string + Args []any +} + +type TraceQueryEndData struct { + CommandTag pgconn.CommandTag + Err error +} + +// BatchTracer traces SendBatch. +type BatchTracer interface { + // TraceBatchStart is called at the beginning of SendBatch calls. The returned context is used for the + // rest of the call and will be passed to TraceBatchQuery and TraceBatchEnd. + TraceBatchStart(ctx context.Context, conn *Conn, data TraceBatchStartData) context.Context + + TraceBatchQuery(ctx context.Context, conn *Conn, data TraceBatchQueryData) + TraceBatchEnd(ctx context.Context, conn *Conn, data TraceBatchEndData) +} + +type TraceBatchStartData struct { + Batch *Batch +} + +type TraceBatchQueryData struct { + SQL string + Args []any + CommandTag pgconn.CommandTag + Err error +} + +type TraceBatchEndData struct { + Err error +} + +// CopyFromTracer traces CopyFrom. +type CopyFromTracer interface { + // TraceCopyFromStart is called at the beginning of CopyFrom calls. The returned context is used for the + // rest of the call and will be passed to TraceCopyFromEnd. + TraceCopyFromStart(ctx context.Context, conn *Conn, data TraceCopyFromStartData) context.Context + + TraceCopyFromEnd(ctx context.Context, conn *Conn, data TraceCopyFromEndData) +} + +type TraceCopyFromStartData struct { + TableName Identifier + ColumnNames []string +} + +type TraceCopyFromEndData struct { + CommandTag pgconn.CommandTag + Err error +} + +// PrepareTracer traces Prepare. +type PrepareTracer interface { + // TracePrepareStart is called at the beginning of Prepare calls. The returned context is used for the + // rest of the call and will be passed to TracePrepareEnd. + TracePrepareStart(ctx context.Context, conn *Conn, data TracePrepareStartData) context.Context + + TracePrepareEnd(ctx context.Context, conn *Conn, data TracePrepareEndData) +} + +type TracePrepareStartData struct { + Name string + SQL string +} + +type TracePrepareEndData struct { + AlreadyPrepared bool + Err error +} + +// ConnectTracer traces Connect and ConnectConfig. +type ConnectTracer interface { + // TraceConnectStart is called at the beginning of Connect and ConnectConfig calls. The returned context is used for + // the rest of the call and will be passed to TraceConnectEnd. + TraceConnectStart(ctx context.Context, data TraceConnectStartData) context.Context + + TraceConnectEnd(ctx context.Context, data TraceConnectEndData) +} + +type TraceConnectStartData struct { + ConnConfig *ConnConfig +} + +type TraceConnectEndData struct { + Conn *Conn + Err error +} diff --git a/vendor/github.com/jackc/pgx/v4/tx.go b/vendor/github.com/jackc/pgx/v5/tx.go similarity index 70% rename from vendor/github.com/jackc/pgx/v4/tx.go rename to vendor/github.com/jackc/pgx/v5/tx.go index 9ecaa17e..8feeb512 100644 --- a/vendor/github.com/jackc/pgx/v4/tx.go +++ b/vendor/github.com/jackc/pgx/v5/tx.go @@ -1,13 +1,13 @@ package pgx import ( - "bytes" "context" "errors" "fmt" "strconv" + "strings" - "github.com/jackc/pgconn" + "github.com/jackc/pgx/v5/pgconn" ) // TxIsoLevel is the transaction isolation level (serializable, repeatable read, read committed or read uncommitted) @@ -44,6 +44,10 @@ type TxOptions struct { IsoLevel TxIsoLevel AccessMode TxAccessMode DeferrableMode TxDeferrableMode + + // BeginQuery is the SQL query that will be executed to begin the transaction. This allows using non-standard syntax + // such as BEGIN PRIORITY HIGH with CockroachDB. If set this will override the other settings. + BeginQuery string } var emptyTxOptions TxOptions @@ -52,16 +56,26 @@ func (txOptions TxOptions) beginSQL() string { if txOptions == emptyTxOptions { return "begin" } - buf := &bytes.Buffer{} + + if txOptions.BeginQuery != "" { + return txOptions.BeginQuery + } + + var buf strings.Builder + buf.Grow(64) // 64 - maximum length of string with available options buf.WriteString("begin") + if txOptions.IsoLevel != "" { - fmt.Fprintf(buf, " isolation level %s", txOptions.IsoLevel) + buf.WriteString(" isolation level ") + buf.WriteString(string(txOptions.IsoLevel)) } if txOptions.AccessMode != "" { - fmt.Fprintf(buf, " %s", txOptions.AccessMode) + buf.WriteByte(' ') + buf.WriteString(string(txOptions.AccessMode)) } if txOptions.DeferrableMode != "" { - fmt.Fprintf(buf, " %s", txOptions.DeferrableMode) + buf.WriteByte(' ') + buf.WriteString(string(txOptions.DeferrableMode)) } return buf.String() @@ -94,39 +108,6 @@ func (c *Conn) BeginTx(ctx context.Context, txOptions TxOptions) (Tx, error) { return &dbTx{conn: c}, nil } -// BeginFunc starts a transaction and calls f. If f does not return an error the transaction is committed. If f returns -// an error the transaction is rolled back. The context will be used when executing the transaction control statements -// (BEGIN, ROLLBACK, and COMMIT) but does not otherwise affect the execution of f. -func (c *Conn) BeginFunc(ctx context.Context, f func(Tx) error) (err error) { - return c.BeginTxFunc(ctx, TxOptions{}, f) -} - -// BeginTxFunc starts a transaction with txOptions determining the transaction mode and calls f. If f does not return -// an error the transaction is committed. If f returns an error the transaction is rolled back. The context will be -// used when executing the transaction control statements (BEGIN, ROLLBACK, and COMMIT) but does not otherwise affect -// the execution of f. -func (c *Conn) BeginTxFunc(ctx context.Context, txOptions TxOptions, f func(Tx) error) (err error) { - var tx Tx - tx, err = c.BeginTx(ctx, txOptions) - if err != nil { - return err - } - defer func() { - rollbackErr := tx.Rollback(ctx) - if rollbackErr != nil && !errors.Is(rollbackErr, ErrTxClosed) { - err = rollbackErr - } - }() - - fErr := f(tx) - if fErr != nil { - _ = tx.Rollback(ctx) // ignore rollback error as there is already an error to return - return fErr - } - - return tx.Commit(ctx) -} - // Tx represents a database transaction. // // Tx is an interface instead of a struct to enable connection pools to be implemented without relying on internal pgx @@ -138,20 +119,17 @@ type Tx interface { // Begin starts a pseudo nested transaction. Begin(ctx context.Context) (Tx, error) - // BeginFunc starts a pseudo nested transaction and executes f. If f does not return an err the pseudo nested - // transaction will be committed. If it does then it will be rolled back. - BeginFunc(ctx context.Context, f func(Tx) error) (err error) - // Commit commits the transaction if this is a real transaction or releases the savepoint if this is a pseudo nested - // transaction. Commit will return ErrTxClosed if the Tx is already closed, but is otherwise safe to call multiple - // times. If the commit fails with a rollback status (e.g. the transaction was already in a broken state) then - // ErrTxCommitRollback will be returned. + // transaction. Commit will return an error where errors.Is(ErrTxClosed) is true if the Tx is already closed, but is + // otherwise safe to call multiple times. If the commit fails with a rollback status (e.g. the transaction was already + // in a broken state) then an error where errors.Is(ErrTxCommitRollback) is true will be returned. Commit(ctx context.Context) error // Rollback rolls back the transaction if this is a real transaction or rolls back to the savepoint if this is a - // pseudo nested transaction. Rollback will return ErrTxClosed if the Tx is already closed, but is otherwise safe to - // call multiple times. Hence, a defer tx.Rollback() is safe even if tx.Commit() will be called first in a non-error - // condition. Any other failure of a real transaction will result in the connection being closed. + // pseudo nested transaction. Rollback will return an error where errors.Is(ErrTxClosed) is true if the Tx is already + // closed, but is otherwise safe to call multiple times. Hence, a defer tx.Rollback() is safe even if tx.Commit() will + // be called first in a non-error condition. Any other failure of a real transaction will result in the connection + // being closed. Rollback(ctx context.Context) error CopyFrom(ctx context.Context, tableName Identifier, columnNames []string, rowSrc CopyFromSource) (int64, error) @@ -160,10 +138,9 @@ type Tx interface { Prepare(ctx context.Context, name, sql string) (*pgconn.StatementDescription, error) - Exec(ctx context.Context, sql string, arguments ...interface{}) (commandTag pgconn.CommandTag, err error) - Query(ctx context.Context, sql string, args ...interface{}) (Rows, error) - QueryRow(ctx context.Context, sql string, args ...interface{}) Row - QueryFunc(ctx context.Context, sql string, args []interface{}, scans []interface{}, f func(QueryFuncRow) error) (pgconn.CommandTag, error) + Exec(ctx context.Context, sql string, arguments ...any) (commandTag pgconn.CommandTag, err error) + Query(ctx context.Context, sql string, args ...any) (Rows, error) + QueryRow(ctx context.Context, sql string, args ...any) Row // Conn returns the underlying *Conn that on which this transaction is executing. Conn() *Conn @@ -175,7 +152,6 @@ type Tx interface { // called on the dbTx. type dbTx struct { conn *Conn - err error savepointNum int64 closed bool } @@ -195,32 +171,6 @@ func (tx *dbTx) Begin(ctx context.Context) (Tx, error) { return &dbSimulatedNestedTx{tx: tx, savepointNum: tx.savepointNum}, nil } -func (tx *dbTx) BeginFunc(ctx context.Context, f func(Tx) error) (err error) { - if tx.closed { - return ErrTxClosed - } - - var savepoint Tx - savepoint, err = tx.Begin(ctx) - if err != nil { - return err - } - defer func() { - rollbackErr := savepoint.Rollback(ctx) - if rollbackErr != nil && !errors.Is(rollbackErr, ErrTxClosed) { - err = rollbackErr - } - }() - - fErr := f(savepoint) - if fErr != nil { - _ = savepoint.Rollback(ctx) // ignore rollback error as there is already an error to return - return fErr - } - - return savepoint.Commit(ctx) -} - // Commit commits the transaction. func (tx *dbTx) Commit(ctx context.Context) error { if tx.closed { @@ -235,7 +185,7 @@ func (tx *dbTx) Commit(ctx context.Context) error { } return err } - if string(commandTag) == "ROLLBACK" { + if commandTag.String() == "ROLLBACK" { return ErrTxCommitRollback } @@ -263,7 +213,7 @@ func (tx *dbTx) Rollback(ctx context.Context) error { } // Exec delegates to the underlying *Conn -func (tx *dbTx) Exec(ctx context.Context, sql string, arguments ...interface{}) (commandTag pgconn.CommandTag, err error) { +func (tx *dbTx) Exec(ctx context.Context, sql string, arguments ...any) (commandTag pgconn.CommandTag, err error) { if tx.closed { return pgconn.CommandTag{}, ErrTxClosed } @@ -281,29 +231,20 @@ func (tx *dbTx) Prepare(ctx context.Context, name, sql string) (*pgconn.Statemen } // Query delegates to the underlying *Conn -func (tx *dbTx) Query(ctx context.Context, sql string, args ...interface{}) (Rows, error) { +func (tx *dbTx) Query(ctx context.Context, sql string, args ...any) (Rows, error) { if tx.closed { // Because checking for errors can be deferred to the *Rows, build one with the error err := ErrTxClosed - return &connRows{closed: true, err: err}, err + return &baseRows{closed: true, err: err}, err } return tx.conn.Query(ctx, sql, args...) } // QueryRow delegates to the underlying *Conn -func (tx *dbTx) QueryRow(ctx context.Context, sql string, args ...interface{}) Row { +func (tx *dbTx) QueryRow(ctx context.Context, sql string, args ...any) Row { rows, _ := tx.Query(ctx, sql, args...) - return (*connRow)(rows.(*connRows)) -} - -// QueryFunc delegates to the underlying *Conn. -func (tx *dbTx) QueryFunc(ctx context.Context, sql string, args []interface{}, scans []interface{}, f func(QueryFuncRow) error) (pgconn.CommandTag, error) { - if tx.closed { - return nil, ErrTxClosed - } - - return tx.conn.QueryFunc(ctx, sql, args, scans, f) + return (*connRow)(rows.(*baseRows)) } // CopyFrom delegates to the underlying *Conn @@ -349,14 +290,6 @@ func (sp *dbSimulatedNestedTx) Begin(ctx context.Context) (Tx, error) { return sp.tx.Begin(ctx) } -func (sp *dbSimulatedNestedTx) BeginFunc(ctx context.Context, f func(Tx) error) (err error) { - if sp.closed { - return ErrTxClosed - } - - return sp.tx.BeginFunc(ctx, f) -} - // Commit releases the savepoint essentially committing the pseudo nested transaction. func (sp *dbSimulatedNestedTx) Commit(ctx context.Context) error { if sp.closed { @@ -382,9 +315,9 @@ func (sp *dbSimulatedNestedTx) Rollback(ctx context.Context) error { } // Exec delegates to the underlying Tx -func (sp *dbSimulatedNestedTx) Exec(ctx context.Context, sql string, arguments ...interface{}) (commandTag pgconn.CommandTag, err error) { +func (sp *dbSimulatedNestedTx) Exec(ctx context.Context, sql string, arguments ...any) (commandTag pgconn.CommandTag, err error) { if sp.closed { - return nil, ErrTxClosed + return pgconn.CommandTag{}, ErrTxClosed } return sp.tx.Exec(ctx, sql, arguments...) @@ -400,29 +333,20 @@ func (sp *dbSimulatedNestedTx) Prepare(ctx context.Context, name, sql string) (* } // Query delegates to the underlying Tx -func (sp *dbSimulatedNestedTx) Query(ctx context.Context, sql string, args ...interface{}) (Rows, error) { +func (sp *dbSimulatedNestedTx) Query(ctx context.Context, sql string, args ...any) (Rows, error) { if sp.closed { // Because checking for errors can be deferred to the *Rows, build one with the error err := ErrTxClosed - return &connRows{closed: true, err: err}, err + return &baseRows{closed: true, err: err}, err } return sp.tx.Query(ctx, sql, args...) } // QueryRow delegates to the underlying Tx -func (sp *dbSimulatedNestedTx) QueryRow(ctx context.Context, sql string, args ...interface{}) Row { +func (sp *dbSimulatedNestedTx) QueryRow(ctx context.Context, sql string, args ...any) Row { rows, _ := sp.Query(ctx, sql, args...) - return (*connRow)(rows.(*connRows)) -} - -// QueryFunc delegates to the underlying Tx. -func (sp *dbSimulatedNestedTx) QueryFunc(ctx context.Context, sql string, args []interface{}, scans []interface{}, f func(QueryFuncRow) error) (pgconn.CommandTag, error) { - if sp.closed { - return nil, ErrTxClosed - } - - return sp.tx.QueryFunc(ctx, sql, args, scans, f) + return (*connRow)(rows.(*baseRows)) } // CopyFrom delegates to the underlying *Conn @@ -450,3 +374,59 @@ func (sp *dbSimulatedNestedTx) LargeObjects() LargeObjects { func (sp *dbSimulatedNestedTx) Conn() *Conn { return sp.tx.Conn() } + +// BeginFunc calls Begin on db and then calls fn. If fn does not return an error then it calls Commit on db. If fn +// returns an error it calls Rollback on db. The context will be used when executing the transaction control statements +// (BEGIN, ROLLBACK, and COMMIT) but does not otherwise affect the execution of fn. +func BeginFunc( + ctx context.Context, + db interface { + Begin(ctx context.Context) (Tx, error) + }, + fn func(Tx) error, +) (err error) { + var tx Tx + tx, err = db.Begin(ctx) + if err != nil { + return err + } + + return beginFuncExec(ctx, tx, fn) +} + +// BeginTxFunc calls BeginTx on db and then calls fn. If fn does not return an error then it calls Commit on db. If fn +// returns an error it calls Rollback on db. The context will be used when executing the transaction control statements +// (BEGIN, ROLLBACK, and COMMIT) but does not otherwise affect the execution of fn. +func BeginTxFunc( + ctx context.Context, + db interface { + BeginTx(ctx context.Context, txOptions TxOptions) (Tx, error) + }, + txOptions TxOptions, + fn func(Tx) error, +) (err error) { + var tx Tx + tx, err = db.BeginTx(ctx, txOptions) + if err != nil { + return err + } + + return beginFuncExec(ctx, tx, fn) +} + +func beginFuncExec(ctx context.Context, tx Tx, fn func(Tx) error) (err error) { + defer func() { + rollbackErr := tx.Rollback(ctx) + if rollbackErr != nil && !errors.Is(rollbackErr, ErrTxClosed) { + err = rollbackErr + } + }() + + fErr := fn(tx) + if fErr != nil { + _ = tx.Rollback(ctx) // ignore rollback error as there is already an error to return + return fErr + } + + return tx.Commit(ctx) +} diff --git a/vendor/github.com/jackc/pgx/v5/values.go b/vendor/github.com/jackc/pgx/v5/values.go new file mode 100644 index 00000000..6e2ff300 --- /dev/null +++ b/vendor/github.com/jackc/pgx/v5/values.go @@ -0,0 +1,63 @@ +package pgx + +import ( + "errors" + + "github.com/jackc/pgx/v5/internal/pgio" + "github.com/jackc/pgx/v5/pgtype" +) + +// PostgreSQL format codes +const ( + TextFormatCode = 0 + BinaryFormatCode = 1 +) + +func convertSimpleArgument(m *pgtype.Map, arg any) (any, error) { + buf, err := m.Encode(0, TextFormatCode, arg, []byte{}) + if err != nil { + return nil, err + } + if buf == nil { + return nil, nil + } + return string(buf), nil +} + +func encodeCopyValue(m *pgtype.Map, buf []byte, oid uint32, arg any) ([]byte, error) { + sp := len(buf) + buf = pgio.AppendInt32(buf, -1) + argBuf, err := m.Encode(oid, BinaryFormatCode, arg, buf) + if err != nil { + if argBuf2, err2 := tryScanStringCopyValueThenEncode(m, buf, oid, arg); err2 == nil { + argBuf = argBuf2 + } else { + return nil, err + } + } + + if argBuf != nil { + buf = argBuf + pgio.SetInt32(buf[sp:], int32(len(buf[sp:])-4)) + } + return buf, nil +} + +func tryScanStringCopyValueThenEncode(m *pgtype.Map, buf []byte, oid uint32, arg any) ([]byte, error) { + s, ok := arg.(string) + if !ok { + textBuf, err := m.Encode(oid, TextFormatCode, arg, nil) + if err != nil { + return nil, errors.New("not a string and cannot be encoded as text") + } + s = string(textBuf) + } + + var v any + err := m.Scan(oid, TextFormatCode, []byte(s), &v) + if err != nil { + return nil, err + } + + return m.Encode(oid, BinaryFormatCode, v, buf) +} diff --git a/vendor/github.com/jackc/puddle/v2/CHANGELOG.md b/vendor/github.com/jackc/puddle/v2/CHANGELOG.md new file mode 100644 index 00000000..d0d202c7 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/CHANGELOG.md @@ -0,0 +1,79 @@ +# 2.2.2 (September 10, 2024) + +* Add empty acquire time to stats (Maxim Ivanov) +* Stop importing nanotime from runtime via linkname (maypok86) + +# 2.2.1 (July 15, 2023) + +* Fix: CreateResource cannot overflow pool. This changes documented behavior of CreateResource. Previously, + CreateResource could create a resource even if the pool was full. This could cause the pool to overflow. While this + was documented, it was documenting incorrect behavior. CreateResource now returns an error if the pool is full. + +# 2.2.0 (February 11, 2023) + +* Use Go 1.19 atomics and drop go.uber.org/atomic dependency + +# 2.1.2 (November 12, 2022) + +* Restore support to Go 1.18 via go.uber.org/atomic + +# 2.1.1 (November 11, 2022) + +* Fix create resource concurrently with Stat call race + +# 2.1.0 (October 28, 2022) + +* Concurrency control is now implemented with a semaphore. This simplifies some internal logic, resolves a few error conditions (including a deadlock), and improves performance. (Jan Dubsky) +* Go 1.19 is now required for the improved atomic support. + +# 2.0.1 (October 28, 2022) + +* Fix race condition when Close is called concurrently with multiple constructors + +# 2.0.0 (September 17, 2022) + +* Use generics instead of interface{} (Столяров Владимир Алексеевич) +* Add Reset +* Do not cancel resource construction when Acquire is canceled +* NewPool takes Config + +# 1.3.0 (August 27, 2022) + +* Acquire creates resources in background to allow creation to continue after Acquire is canceled (James Hartig) + +# 1.2.1 (December 2, 2021) + +* TryAcquire now does not block when background constructing resource + +# 1.2.0 (November 20, 2021) + +* Add TryAcquire (A. Jensen) +* Fix: remove memory leak / unintentionally pinned memory when shrinking slices (Alexander Staubo) +* Fix: Do not leave pool locked after panic from nil context + +# 1.1.4 (September 11, 2021) + +* Fix: Deadlock in CreateResource if pool was closed during resource acquisition (Dmitriy Matrenichev) + +# 1.1.3 (December 3, 2020) + +* Fix: Failed resource creation could cause concurrent Acquire to hang. (Evgeny Vanslov) + +# 1.1.2 (September 26, 2020) + +* Fix: Resource.Destroy no longer removes itself from the pool before its destructor has completed. +* Fix: Prevent crash when pool is closed while resource is being created. + +# 1.1.1 (April 2, 2020) + +* Pool.Close can be safely called multiple times +* AcquireAllIDle immediately returns nil if pool is closed +* CreateResource checks if pool is closed before taking any action +* Fix potential race condition when CreateResource and Close are called concurrently. CreateResource now checks if pool is closed before adding newly created resource to pool. + +# 1.1.0 (February 5, 2020) + +* Use runtime.nanotime for faster tracking of acquire time and last usage time. +* Track resource idle time to enable client health check logic. (Patrick Ellul) +* Add CreateResource to construct a new resource without acquiring it. (Patrick Ellul) +* Fix deadlock race when acquire is cancelled. (Michael Tharp) diff --git a/vendor/github.com/jackc/chunkreader/v2/LICENSE b/vendor/github.com/jackc/puddle/v2/LICENSE similarity index 96% rename from vendor/github.com/jackc/chunkreader/v2/LICENSE rename to vendor/github.com/jackc/puddle/v2/LICENSE index c1c4f50f..bcc286c5 100644 --- a/vendor/github.com/jackc/chunkreader/v2/LICENSE +++ b/vendor/github.com/jackc/puddle/v2/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2019 Jack Christensen +Copyright (c) 2018 Jack Christensen MIT License diff --git a/vendor/github.com/jackc/puddle/v2/README.md b/vendor/github.com/jackc/puddle/v2/README.md new file mode 100644 index 00000000..fa82a9d4 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/README.md @@ -0,0 +1,80 @@ +[![Go Reference](https://pkg.go.dev/badge/github.com/jackc/puddle/v2.svg)](https://pkg.go.dev/github.com/jackc/puddle/v2) +![Build Status](https://github.com/jackc/puddle/actions/workflows/ci.yml/badge.svg) + +# Puddle + +Puddle is a tiny generic resource pool library for Go that uses the standard +context library to signal cancellation of acquires. It is designed to contain +the minimum functionality required for a resource pool. It can be used directly +or it can be used as the base for a domain specific resource pool. For example, +a database connection pool may use puddle internally and implement health checks +and keep-alive behavior without needing to implement any concurrent code of its +own. + +## Features + +* Acquire cancellation via context standard library +* Statistics API for monitoring pool pressure +* No dependencies outside of standard library and golang.org/x/sync +* High performance +* 100% test coverage of reachable code + +## Example Usage + +```go +package main + +import ( + "context" + "log" + "net" + + "github.com/jackc/puddle/v2" +) + +func main() { + constructor := func(context.Context) (net.Conn, error) { + return net.Dial("tcp", "127.0.0.1:8080") + } + destructor := func(value net.Conn) { + value.Close() + } + maxPoolSize := int32(10) + + pool, err := puddle.NewPool(&puddle.Config[net.Conn]{Constructor: constructor, Destructor: destructor, MaxSize: maxPoolSize}) + if err != nil { + log.Fatal(err) + } + + // Acquire resource from the pool. + res, err := pool.Acquire(context.Background()) + if err != nil { + log.Fatal(err) + } + + // Use resource. + _, err = res.Value().Write([]byte{1}) + if err != nil { + log.Fatal(err) + } + + // Release when done. + res.Release() +} +``` + +## Status + +Puddle is stable and feature complete. + +* Bug reports and fixes are welcome. +* New features will usually not be accepted if they can be feasibly implemented in a wrapper. +* Performance optimizations will usually not be accepted unless the performance issue rises to the level of a bug. + +## Supported Go Versions + +puddle supports the same versions of Go that are supported by the Go project. For [Go](https://golang.org/doc/devel/release.html#policy) that is the two most recent major releases. This means puddle supports Go 1.19 and higher. + +## License + +MIT diff --git a/vendor/github.com/jackc/puddle/v2/context.go b/vendor/github.com/jackc/puddle/v2/context.go new file mode 100644 index 00000000..e19d2a60 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/context.go @@ -0,0 +1,24 @@ +package puddle + +import ( + "context" + "time" +) + +// valueCancelCtx combines two contexts into one. One context is used for values and the other is used for cancellation. +type valueCancelCtx struct { + valueCtx context.Context + cancelCtx context.Context +} + +func (ctx *valueCancelCtx) Deadline() (time.Time, bool) { return ctx.cancelCtx.Deadline() } +func (ctx *valueCancelCtx) Done() <-chan struct{} { return ctx.cancelCtx.Done() } +func (ctx *valueCancelCtx) Err() error { return ctx.cancelCtx.Err() } +func (ctx *valueCancelCtx) Value(key any) any { return ctx.valueCtx.Value(key) } + +func newValueCancelCtx(valueCtx, cancelContext context.Context) context.Context { + return &valueCancelCtx{ + valueCtx: valueCtx, + cancelCtx: cancelContext, + } +} diff --git a/vendor/github.com/jackc/puddle/v2/doc.go b/vendor/github.com/jackc/puddle/v2/doc.go new file mode 100644 index 00000000..818e4a69 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/doc.go @@ -0,0 +1,11 @@ +// Package puddle is a generic resource pool with type-parametrized api. +/* + +Puddle is a tiny generic resource pool library for Go that uses the standard +context library to signal cancellation of acquires. It is designed to contain +the minimum functionality a resource pool needs that cannot be implemented +without concurrency concerns. For example, a database connection pool may use +puddle internally and implement health checks and keep-alive behavior without +needing to implement any concurrent code of its own. +*/ +package puddle diff --git a/vendor/github.com/jackc/puddle/v2/internal/genstack/gen_stack.go b/vendor/github.com/jackc/puddle/v2/internal/genstack/gen_stack.go new file mode 100644 index 00000000..7e4660c8 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/internal/genstack/gen_stack.go @@ -0,0 +1,85 @@ +package genstack + +// GenStack implements a generational stack. +// +// GenStack works as common stack except for the fact that all elements in the +// older generation are guaranteed to be popped before any element in the newer +// generation. New elements are always pushed to the current (newest) +// generation. +// +// We could also say that GenStack behaves as a stack in case of a single +// generation, but it behaves as a queue of individual generation stacks. +type GenStack[T any] struct { + // We can represent arbitrary number of generations using 2 stacks. The + // new stack stores all new pushes and the old stack serves all reads. + // Old stack can represent multiple generations. If old == new, then all + // elements pushed in previous (not current) generations have already + // been popped. + + old *stack[T] + new *stack[T] +} + +// NewGenStack creates a new empty GenStack. +func NewGenStack[T any]() *GenStack[T] { + s := &stack[T]{} + return &GenStack[T]{ + old: s, + new: s, + } +} + +func (s *GenStack[T]) Pop() (T, bool) { + // Pushes always append to the new stack, so if the old once becomes + // empty, it will remail empty forever. + if s.old.len() == 0 && s.old != s.new { + s.old = s.new + } + + if s.old.len() == 0 { + var zero T + return zero, false + } + + return s.old.pop(), true +} + +// Push pushes a new element at the top of the stack. +func (s *GenStack[T]) Push(v T) { s.new.push(v) } + +// NextGen starts a new stack generation. +func (s *GenStack[T]) NextGen() { + if s.old == s.new { + s.new = &stack[T]{} + return + } + + // We need to pop from the old stack to the top of the new stack. Let's + // have an example: + // + // Old: 4 3 2 1 + // New: 8 7 6 5 + // PopOrder: 1 2 3 4 5 6 7 8 + // + // + // To preserve pop order, we have to take all elements from the old + // stack and push them to the top of new stack: + // + // New: 8 7 6 5 4 3 2 1 + // + s.new.push(s.old.takeAll()...) + + // We have the old stack allocated and empty, so why not to reuse it as + // new new stack. + s.old, s.new = s.new, s.old +} + +// Len returns number of elements in the stack. +func (s *GenStack[T]) Len() int { + l := s.old.len() + if s.old != s.new { + l += s.new.len() + } + + return l +} diff --git a/vendor/github.com/jackc/puddle/v2/internal/genstack/stack.go b/vendor/github.com/jackc/puddle/v2/internal/genstack/stack.go new file mode 100644 index 00000000..dbced0c7 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/internal/genstack/stack.go @@ -0,0 +1,39 @@ +package genstack + +// stack is a wrapper around an array implementing a stack. +// +// We cannot use slice to represent the stack because append might change the +// pointer value of the slice. That would be an issue in GenStack +// implementation. +type stack[T any] struct { + arr []T +} + +// push pushes a new element at the top of a stack. +func (s *stack[T]) push(vs ...T) { s.arr = append(s.arr, vs...) } + +// pop pops the stack top-most element. +// +// If stack length is zero, this method panics. +func (s *stack[T]) pop() T { + idx := s.len() - 1 + val := s.arr[idx] + + // Avoid memory leak + var zero T + s.arr[idx] = zero + + s.arr = s.arr[:idx] + return val +} + +// takeAll returns all elements in the stack in order as they are stored - i.e. +// the top-most stack element is the last one. +func (s *stack[T]) takeAll() []T { + arr := s.arr + s.arr = nil + return arr +} + +// len returns number of elements in the stack. +func (s *stack[T]) len() int { return len(s.arr) } diff --git a/vendor/github.com/jackc/puddle/v2/log.go b/vendor/github.com/jackc/puddle/v2/log.go new file mode 100644 index 00000000..b21b9463 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/log.go @@ -0,0 +1,32 @@ +package puddle + +import "unsafe" + +type ints interface { + int | int8 | int16 | int32 | int64 | uint | uint8 | uint16 | uint32 | uint64 +} + +// log2Int returns log2 of an integer. This function panics if val < 0. For val +// == 0, returns 0. +func log2Int[T ints](val T) uint8 { + if val <= 0 { + panic("log2 of non-positive number does not exist") + } + + return log2IntRange(val, 0, uint8(8*unsafe.Sizeof(val))) +} + +func log2IntRange[T ints](val T, begin, end uint8) uint8 { + length := end - begin + if length == 1 { + return begin + } + + delim := begin + length/2 + mask := T(1) << delim + if mask > val { + return log2IntRange(val, begin, delim) + } else { + return log2IntRange(val, delim, end) + } +} diff --git a/vendor/github.com/jackc/puddle/v2/nanotime.go b/vendor/github.com/jackc/puddle/v2/nanotime.go new file mode 100644 index 00000000..8a5351a0 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/nanotime.go @@ -0,0 +1,16 @@ +package puddle + +import "time" + +// nanotime returns the time in nanoseconds since process start. +// +// This approach, described at +// https://github.com/golang/go/issues/61765#issuecomment-1672090302, +// is fast, monotonic, and portable, and avoids the previous +// dependence on runtime.nanotime using the (unsafe) linkname hack. +// In particular, time.Since does less work than time.Now. +func nanotime() int64 { + return time.Since(globalStart).Nanoseconds() +} + +var globalStart = time.Now() diff --git a/vendor/github.com/jackc/puddle/v2/pool.go b/vendor/github.com/jackc/puddle/v2/pool.go new file mode 100644 index 00000000..c411d2f6 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/pool.go @@ -0,0 +1,710 @@ +package puddle + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "time" + + "github.com/jackc/puddle/v2/internal/genstack" + "golang.org/x/sync/semaphore" +) + +const ( + resourceStatusConstructing = 0 + resourceStatusIdle = iota + resourceStatusAcquired = iota + resourceStatusHijacked = iota +) + +// ErrClosedPool occurs on an attempt to acquire a connection from a closed pool +// or a pool that is closed while the acquire is waiting. +var ErrClosedPool = errors.New("closed pool") + +// ErrNotAvailable occurs on an attempt to acquire a resource from a pool +// that is at maximum capacity and has no available resources. +var ErrNotAvailable = errors.New("resource not available") + +// Constructor is a function called by the pool to construct a resource. +type Constructor[T any] func(ctx context.Context) (res T, err error) + +// Destructor is a function called by the pool to destroy a resource. +type Destructor[T any] func(res T) + +// Resource is the resource handle returned by acquiring from the pool. +type Resource[T any] struct { + value T + pool *Pool[T] + creationTime time.Time + lastUsedNano int64 + poolResetCount int + status byte +} + +// Value returns the resource value. +func (res *Resource[T]) Value() T { + if !(res.status == resourceStatusAcquired || res.status == resourceStatusHijacked) { + panic("tried to access resource that is not acquired or hijacked") + } + return res.value +} + +// Release returns the resource to the pool. res must not be subsequently used. +func (res *Resource[T]) Release() { + if res.status != resourceStatusAcquired { + panic("tried to release resource that is not acquired") + } + res.pool.releaseAcquiredResource(res, nanotime()) +} + +// ReleaseUnused returns the resource to the pool without updating when it was last used used. i.e. LastUsedNanotime +// will not change. res must not be subsequently used. +func (res *Resource[T]) ReleaseUnused() { + if res.status != resourceStatusAcquired { + panic("tried to release resource that is not acquired") + } + res.pool.releaseAcquiredResource(res, res.lastUsedNano) +} + +// Destroy returns the resource to the pool for destruction. res must not be +// subsequently used. +func (res *Resource[T]) Destroy() { + if res.status != resourceStatusAcquired { + panic("tried to destroy resource that is not acquired") + } + go res.pool.destroyAcquiredResource(res) +} + +// Hijack assumes ownership of the resource from the pool. Caller is responsible +// for cleanup of resource value. +func (res *Resource[T]) Hijack() { + if res.status != resourceStatusAcquired { + panic("tried to hijack resource that is not acquired") + } + res.pool.hijackAcquiredResource(res) +} + +// CreationTime returns when the resource was created by the pool. +func (res *Resource[T]) CreationTime() time.Time { + if !(res.status == resourceStatusAcquired || res.status == resourceStatusHijacked) { + panic("tried to access resource that is not acquired or hijacked") + } + return res.creationTime +} + +// LastUsedNanotime returns when Release was last called on the resource measured in nanoseconds from an arbitrary time +// (a monotonic time). Returns creation time if Release has never been called. This is only useful to compare with +// other calls to LastUsedNanotime. In almost all cases, IdleDuration should be used instead. +func (res *Resource[T]) LastUsedNanotime() int64 { + if !(res.status == resourceStatusAcquired || res.status == resourceStatusHijacked) { + panic("tried to access resource that is not acquired or hijacked") + } + + return res.lastUsedNano +} + +// IdleDuration returns the duration since Release was last called on the resource. This is equivalent to subtracting +// LastUsedNanotime to the current nanotime. +func (res *Resource[T]) IdleDuration() time.Duration { + if !(res.status == resourceStatusAcquired || res.status == resourceStatusHijacked) { + panic("tried to access resource that is not acquired or hijacked") + } + + return time.Duration(nanotime() - res.lastUsedNano) +} + +// Pool is a concurrency-safe resource pool. +type Pool[T any] struct { + // mux is the pool internal lock. Any modification of shared state of + // the pool (but Acquires of acquireSem) must be performed only by + // holder of the lock. Long running operations are not allowed when mux + // is held. + mux sync.Mutex + // acquireSem provides an allowance to acquire a resource. + // + // Releases are allowed only when caller holds mux. Acquires have to + // happen before mux is locked (doesn't apply to semaphore.TryAcquire in + // AcquireAllIdle). + acquireSem *semaphore.Weighted + destructWG sync.WaitGroup + + allResources resList[T] + idleResources *genstack.GenStack[*Resource[T]] + + constructor Constructor[T] + destructor Destructor[T] + maxSize int32 + + acquireCount int64 + acquireDuration time.Duration + emptyAcquireCount int64 + emptyAcquireWaitTime time.Duration + canceledAcquireCount atomic.Int64 + + resetCount int + + baseAcquireCtx context.Context + cancelBaseAcquireCtx context.CancelFunc + closed bool +} + +type Config[T any] struct { + Constructor Constructor[T] + Destructor Destructor[T] + MaxSize int32 +} + +// NewPool creates a new pool. Returns an error iff MaxSize is less than 1. +func NewPool[T any](config *Config[T]) (*Pool[T], error) { + if config.MaxSize < 1 { + return nil, errors.New("MaxSize must be >= 1") + } + + baseAcquireCtx, cancelBaseAcquireCtx := context.WithCancel(context.Background()) + + return &Pool[T]{ + acquireSem: semaphore.NewWeighted(int64(config.MaxSize)), + idleResources: genstack.NewGenStack[*Resource[T]](), + maxSize: config.MaxSize, + constructor: config.Constructor, + destructor: config.Destructor, + baseAcquireCtx: baseAcquireCtx, + cancelBaseAcquireCtx: cancelBaseAcquireCtx, + }, nil +} + +// Close destroys all resources in the pool and rejects future Acquire calls. +// Blocks until all resources are returned to pool and destroyed. +func (p *Pool[T]) Close() { + defer p.destructWG.Wait() + + p.mux.Lock() + defer p.mux.Unlock() + + if p.closed { + return + } + p.closed = true + p.cancelBaseAcquireCtx() + + for res, ok := p.idleResources.Pop(); ok; res, ok = p.idleResources.Pop() { + p.allResources.remove(res) + go p.destructResourceValue(res.value) + } +} + +// Stat is a snapshot of Pool statistics. +type Stat struct { + constructingResources int32 + acquiredResources int32 + idleResources int32 + maxResources int32 + acquireCount int64 + acquireDuration time.Duration + emptyAcquireCount int64 + emptyAcquireWaitTime time.Duration + canceledAcquireCount int64 +} + +// TotalResources returns the total number of resources currently in the pool. +// The value is the sum of ConstructingResources, AcquiredResources, and +// IdleResources. +func (s *Stat) TotalResources() int32 { + return s.constructingResources + s.acquiredResources + s.idleResources +} + +// ConstructingResources returns the number of resources with construction in progress in +// the pool. +func (s *Stat) ConstructingResources() int32 { + return s.constructingResources +} + +// AcquiredResources returns the number of currently acquired resources in the pool. +func (s *Stat) AcquiredResources() int32 { + return s.acquiredResources +} + +// IdleResources returns the number of currently idle resources in the pool. +func (s *Stat) IdleResources() int32 { + return s.idleResources +} + +// MaxResources returns the maximum size of the pool. +func (s *Stat) MaxResources() int32 { + return s.maxResources +} + +// AcquireCount returns the cumulative count of successful acquires from the pool. +func (s *Stat) AcquireCount() int64 { + return s.acquireCount +} + +// AcquireDuration returns the total duration of all successful acquires from +// the pool. +func (s *Stat) AcquireDuration() time.Duration { + return s.acquireDuration +} + +// EmptyAcquireCount returns the cumulative count of successful acquires from the pool +// that waited for a resource to be released or constructed because the pool was +// empty. +func (s *Stat) EmptyAcquireCount() int64 { + return s.emptyAcquireCount +} + +// EmptyAcquireWaitTime returns the cumulative time waited for successful acquires +// from the pool for a resource to be released or constructed because the pool was +// empty. +func (s *Stat) EmptyAcquireWaitTime() time.Duration { + return s.emptyAcquireWaitTime +} + +// CanceledAcquireCount returns the cumulative count of acquires from the pool +// that were canceled by a context. +func (s *Stat) CanceledAcquireCount() int64 { + return s.canceledAcquireCount +} + +// Stat returns the current pool statistics. +func (p *Pool[T]) Stat() *Stat { + p.mux.Lock() + defer p.mux.Unlock() + + s := &Stat{ + maxResources: p.maxSize, + acquireCount: p.acquireCount, + emptyAcquireCount: p.emptyAcquireCount, + emptyAcquireWaitTime: p.emptyAcquireWaitTime, + canceledAcquireCount: p.canceledAcquireCount.Load(), + acquireDuration: p.acquireDuration, + } + + for _, res := range p.allResources { + switch res.status { + case resourceStatusConstructing: + s.constructingResources += 1 + case resourceStatusIdle: + s.idleResources += 1 + case resourceStatusAcquired: + s.acquiredResources += 1 + } + } + + return s +} + +// tryAcquireIdleResource checks if there is any idle resource. If there is +// some, this method removes it from idle list and returns it. If the idle pool +// is empty, this method returns nil and doesn't modify the idleResources slice. +// +// WARNING: Caller of this method must hold the pool mutex! +func (p *Pool[T]) tryAcquireIdleResource() *Resource[T] { + res, ok := p.idleResources.Pop() + if !ok { + return nil + } + + res.status = resourceStatusAcquired + return res +} + +// createNewResource creates a new resource and inserts it into list of pool +// resources. +// +// WARNING: Caller of this method must hold the pool mutex! +func (p *Pool[T]) createNewResource() *Resource[T] { + res := &Resource[T]{ + pool: p, + creationTime: time.Now(), + lastUsedNano: nanotime(), + poolResetCount: p.resetCount, + status: resourceStatusConstructing, + } + + p.allResources.append(res) + p.destructWG.Add(1) + + return res +} + +// Acquire gets a resource from the pool. If no resources are available and the pool is not at maximum capacity it will +// create a new resource. If the pool is at maximum capacity it will block until a resource is available. ctx can be +// used to cancel the Acquire. +// +// If Acquire creates a new resource the resource constructor function will receive a context that delegates Value() to +// ctx. Canceling ctx will cause Acquire to return immediately but it will not cancel the resource creation. This avoids +// the problem of it being impossible to create resources when the time to create a resource is greater than any one +// caller of Acquire is willing to wait. +func (p *Pool[T]) Acquire(ctx context.Context) (_ *Resource[T], err error) { + select { + case <-ctx.Done(): + p.canceledAcquireCount.Add(1) + return nil, ctx.Err() + default: + } + + return p.acquire(ctx) +} + +// acquire is a continuation of Acquire function that doesn't check context +// validity. +// +// This function exists solely only for benchmarking purposes. +func (p *Pool[T]) acquire(ctx context.Context) (*Resource[T], error) { + startNano := nanotime() + + var waitedForLock bool + if !p.acquireSem.TryAcquire(1) { + waitedForLock = true + err := p.acquireSem.Acquire(ctx, 1) + if err != nil { + p.canceledAcquireCount.Add(1) + return nil, err + } + } + + p.mux.Lock() + if p.closed { + p.acquireSem.Release(1) + p.mux.Unlock() + return nil, ErrClosedPool + } + + // If a resource is available in the pool. + if res := p.tryAcquireIdleResource(); res != nil { + waitTime := time.Duration(nanotime() - startNano) + if waitedForLock { + p.emptyAcquireCount += 1 + p.emptyAcquireWaitTime += waitTime + } + p.acquireCount += 1 + p.acquireDuration += waitTime + p.mux.Unlock() + return res, nil + } + + if len(p.allResources) >= int(p.maxSize) { + // Unreachable code. + panic("bug: semaphore allowed more acquires than pool allows") + } + + // The resource is not idle, but there is enough space to create one. + res := p.createNewResource() + p.mux.Unlock() + + res, err := p.initResourceValue(ctx, res) + if err != nil { + return nil, err + } + + p.mux.Lock() + defer p.mux.Unlock() + + p.emptyAcquireCount += 1 + p.acquireCount += 1 + waitTime := time.Duration(nanotime() - startNano) + p.acquireDuration += waitTime + p.emptyAcquireWaitTime += waitTime + + return res, nil +} + +func (p *Pool[T]) initResourceValue(ctx context.Context, res *Resource[T]) (*Resource[T], error) { + // Create the resource in a goroutine to immediately return from Acquire + // if ctx is canceled without also canceling the constructor. + // + // See: + // - https://github.com/jackc/pgx/issues/1287 + // - https://github.com/jackc/pgx/issues/1259 + constructErrChan := make(chan error) + go func() { + constructorCtx := newValueCancelCtx(ctx, p.baseAcquireCtx) + value, err := p.constructor(constructorCtx) + if err != nil { + p.mux.Lock() + p.allResources.remove(res) + p.destructWG.Done() + + // The resource won't be acquired because its + // construction failed. We have to allow someone else to + // take that resouce. + p.acquireSem.Release(1) + p.mux.Unlock() + + select { + case constructErrChan <- err: + case <-ctx.Done(): + // The caller is cancelled, so no-one awaits the + // error. This branch avoid goroutine leak. + } + return + } + + // The resource is already in p.allResources where it might be read. So we need to acquire the lock to update its + // status. + p.mux.Lock() + res.value = value + res.status = resourceStatusAcquired + p.mux.Unlock() + + // This select works because the channel is unbuffered. + select { + case constructErrChan <- nil: + case <-ctx.Done(): + p.releaseAcquiredResource(res, res.lastUsedNano) + } + }() + + select { + case <-ctx.Done(): + p.canceledAcquireCount.Add(1) + return nil, ctx.Err() + case err := <-constructErrChan: + if err != nil { + return nil, err + } + return res, nil + } +} + +// TryAcquire gets a resource from the pool if one is immediately available. If not, it returns ErrNotAvailable. If no +// resources are available but the pool has room to grow, a resource will be created in the background. ctx is only +// used to cancel the background creation. +func (p *Pool[T]) TryAcquire(ctx context.Context) (*Resource[T], error) { + if !p.acquireSem.TryAcquire(1) { + return nil, ErrNotAvailable + } + + p.mux.Lock() + defer p.mux.Unlock() + + if p.closed { + p.acquireSem.Release(1) + return nil, ErrClosedPool + } + + // If a resource is available now + if res := p.tryAcquireIdleResource(); res != nil { + p.acquireCount += 1 + return res, nil + } + + if len(p.allResources) >= int(p.maxSize) { + // Unreachable code. + panic("bug: semaphore allowed more acquires than pool allows") + } + + res := p.createNewResource() + go func() { + value, err := p.constructor(ctx) + + p.mux.Lock() + defer p.mux.Unlock() + // We have to create the resource and only then release the + // semaphore - For the time being there is no resource that + // someone could acquire. + defer p.acquireSem.Release(1) + + if err != nil { + p.allResources.remove(res) + p.destructWG.Done() + return + } + + res.value = value + res.status = resourceStatusIdle + p.idleResources.Push(res) + }() + + return nil, ErrNotAvailable +} + +// acquireSemAll tries to acquire num free tokens from sem. This function is +// guaranteed to acquire at least the lowest number of tokens that has been +// available in the semaphore during runtime of this function. +// +// For the time being, semaphore doesn't allow to acquire all tokens atomically +// (see https://github.com/golang/sync/pull/19). We simulate this by trying all +// powers of 2 that are less or equal to num. +// +// For example, let's immagine we have 19 free tokens in the semaphore which in +// total has 24 tokens (i.e. the maxSize of the pool is 24 resources). Then if +// num is 24, the log2Uint(24) is 4 and we try to acquire 16, 8, 4, 2 and 1 +// tokens. Out of those, the acquire of 16, 2 and 1 tokens will succeed. +// +// Naturally, Acquires and Releases of the semaphore might take place +// concurrently. For this reason, it's not guaranteed that absolutely all free +// tokens in the semaphore will be acquired. But it's guaranteed that at least +// the minimal number of tokens that has been present over the whole process +// will be acquired. This is sufficient for the use-case we have in this +// package. +// +// TODO: Replace this with acquireSem.TryAcquireAll() if it gets to +// upstream. https://github.com/golang/sync/pull/19 +func acquireSemAll(sem *semaphore.Weighted, num int) int { + if sem.TryAcquire(int64(num)) { + return num + } + + var acquired int + for i := int(log2Int(num)); i >= 0; i-- { + val := 1 << i + if sem.TryAcquire(int64(val)) { + acquired += val + } + } + + return acquired +} + +// AcquireAllIdle acquires all currently idle resources. Its intended use is for +// health check and keep-alive functionality. It does not update pool +// statistics. +func (p *Pool[T]) AcquireAllIdle() []*Resource[T] { + p.mux.Lock() + defer p.mux.Unlock() + + if p.closed { + return nil + } + + numIdle := p.idleResources.Len() + if numIdle == 0 { + return nil + } + + // In acquireSemAll we use only TryAcquire and not Acquire. Because + // TryAcquire cannot block, the fact that we hold mutex locked and try + // to acquire semaphore cannot result in dead-lock. + // + // Because the mutex is locked, no parallel Release can run. This + // implies that the number of tokens can only decrease because some + // Acquire/TryAcquire call can consume the semaphore token. Consequently + // acquired is always less or equal to numIdle. Moreover if acquired < + // numIdle, then there are some parallel Acquire/TryAcquire calls that + // will take the remaining idle connections. + acquired := acquireSemAll(p.acquireSem, numIdle) + + idle := make([]*Resource[T], acquired) + for i := range idle { + res, _ := p.idleResources.Pop() + res.status = resourceStatusAcquired + idle[i] = res + } + + // We have to bump the generation to ensure that Acquire/TryAcquire + // calls running in parallel (those which caused acquired < numIdle) + // will consume old connections and not freshly released connections + // instead. + p.idleResources.NextGen() + + return idle +} + +// CreateResource constructs a new resource without acquiring it. It goes straight in the IdlePool. If the pool is full +// it returns an error. It can be useful to maintain warm resources under little load. +func (p *Pool[T]) CreateResource(ctx context.Context) error { + if !p.acquireSem.TryAcquire(1) { + return ErrNotAvailable + } + + p.mux.Lock() + if p.closed { + p.acquireSem.Release(1) + p.mux.Unlock() + return ErrClosedPool + } + + if len(p.allResources) >= int(p.maxSize) { + p.acquireSem.Release(1) + p.mux.Unlock() + return ErrNotAvailable + } + + res := p.createNewResource() + p.mux.Unlock() + + value, err := p.constructor(ctx) + p.mux.Lock() + defer p.mux.Unlock() + defer p.acquireSem.Release(1) + if err != nil { + p.allResources.remove(res) + p.destructWG.Done() + return err + } + + res.value = value + res.status = resourceStatusIdle + + // If closed while constructing resource then destroy it and return an error + if p.closed { + go p.destructResourceValue(res.value) + return ErrClosedPool + } + + p.idleResources.Push(res) + + return nil +} + +// Reset destroys all resources, but leaves the pool open. It is intended for use when an error is detected that would +// disrupt all resources (such as a network interruption or a server state change). +// +// It is safe to reset a pool while resources are checked out. Those resources will be destroyed when they are returned +// to the pool. +func (p *Pool[T]) Reset() { + p.mux.Lock() + defer p.mux.Unlock() + + p.resetCount++ + + for res, ok := p.idleResources.Pop(); ok; res, ok = p.idleResources.Pop() { + p.allResources.remove(res) + go p.destructResourceValue(res.value) + } +} + +// releaseAcquiredResource returns res to the the pool. +func (p *Pool[T]) releaseAcquiredResource(res *Resource[T], lastUsedNano int64) { + p.mux.Lock() + defer p.mux.Unlock() + defer p.acquireSem.Release(1) + + if p.closed || res.poolResetCount != p.resetCount { + p.allResources.remove(res) + go p.destructResourceValue(res.value) + } else { + res.lastUsedNano = lastUsedNano + res.status = resourceStatusIdle + p.idleResources.Push(res) + } +} + +// Remove removes res from the pool and closes it. If res is not part of the +// pool Remove will panic. +func (p *Pool[T]) destroyAcquiredResource(res *Resource[T]) { + p.destructResourceValue(res.value) + + p.mux.Lock() + defer p.mux.Unlock() + defer p.acquireSem.Release(1) + + p.allResources.remove(res) +} + +func (p *Pool[T]) hijackAcquiredResource(res *Resource[T]) { + p.mux.Lock() + defer p.mux.Unlock() + defer p.acquireSem.Release(1) + + p.allResources.remove(res) + res.status = resourceStatusHijacked + p.destructWG.Done() // not responsible for destructing hijacked resources +} + +func (p *Pool[T]) destructResourceValue(value T) { + p.destructor(value) + p.destructWG.Done() +} diff --git a/vendor/github.com/jackc/puddle/v2/resource_list.go b/vendor/github.com/jackc/puddle/v2/resource_list.go new file mode 100644 index 00000000..b2430959 --- /dev/null +++ b/vendor/github.com/jackc/puddle/v2/resource_list.go @@ -0,0 +1,28 @@ +package puddle + +type resList[T any] []*Resource[T] + +func (l *resList[T]) append(val *Resource[T]) { *l = append(*l, val) } + +func (l *resList[T]) popBack() *Resource[T] { + idx := len(*l) - 1 + val := (*l)[idx] + (*l)[idx] = nil // Avoid memory leak + *l = (*l)[:idx] + + return val +} + +func (l *resList[T]) remove(val *Resource[T]) { + for i, elem := range *l { + if elem == val { + lastIdx := len(*l) - 1 + (*l)[i] = (*l)[lastIdx] + (*l)[lastIdx] = nil // Avoid memory leak + (*l) = (*l)[:lastIdx] + return + } + } + + panic("BUG: removeResource could not find res in slice") +} diff --git a/vendor/github.com/jaypipes/ghw/Dockerfile b/vendor/github.com/jaypipes/ghw/Dockerfile index cbd587d6..ee5d2761 100644 --- a/vendor/github.com/jaypipes/ghw/Dockerfile +++ b/vendor/github.com/jaypipes/ghw/Dockerfile @@ -1,8 +1,6 @@ -FROM golang:1.15-buster as builder +FROM golang:1.21-buster as builder WORKDIR /go/src/github.com/jaypipes/ghw -# Force the go compiler to use modules. -ENV GO111MODULE=on ENV GOPROXY=direct # go.mod and go.sum go into their own layers. @@ -16,7 +14,7 @@ COPY . . RUN CGO_ENABLED=0 go build -o ghwc ./cmd/ghwc/ -FROM alpine:3.7 +FROM alpine:3.7@sha256:8421d9a84432575381bfabd248f1eb56f3aa21d9d7cd2511583c68c9b7511d10 RUN apk add --no-cache ethtool WORKDIR /bin diff --git a/vendor/github.com/jaypipes/ghw/Makefile b/vendor/github.com/jaypipes/ghw/Makefile index c7e0db40..75d2bcc8 100644 --- a/vendor/github.com/jaypipes/ghw/Makefile +++ b/vendor/github.com/jaypipes/ghw/Makefile @@ -1,39 +1,16 @@ -VENDOR := vendor -PKGS := $(shell go list ./... | grep -v /$(VENDOR)/) -SRC = $(shell find . -type f -name '*.go' -not -path "*/$(VENDOR)/*") -BIN_DIR := $(GOPATH)/bin -GOMETALINTER := $(BIN_DIR)/gometalinter - .PHONY: test test: vet - go test $(PKGS) - -$(GOMETALINTER): - go get -u github.com/alecthomas/gometalinter - $(GOMETALINTER) --install &> /dev/null - -.PHONY: lint -lint: $(GOMETALINTER) - $(GOMETALINTER) ./... --vendor + go test -v ./... .PHONY: fmt fmt: @echo "Running gofmt on all sources..." - @gofmt -s -l -w $(SRC) + @gofmt -s -l -w . .PHONY: fmtcheck fmtcheck: - @bash -c "diff -u <(echo -n) <(gofmt -d $(SRC))" + @bash -c "diff -u <(echo -n) <(gofmt -d .)" .PHONY: vet vet: - go vet $(PKGS) - -.PHONY: cover -cover: - $(shell [ -e coverage.out ] && rm coverage.out) - @echo "mode: count" > coverage-all.out - $(foreach pkg,$(PKGS),\ - go test -coverprofile=coverage.out -covermode=count $(pkg);\ - tail -n +2 coverage.out >> coverage-all.out;) - go tool cover -html=coverage-all.out -o=coverage-all.html + go vet ./... diff --git a/vendor/github.com/jaypipes/ghw/README.md b/vendor/github.com/jaypipes/ghw/README.md index 1f3f04a7..c1cdb106 100644 --- a/vendor/github.com/jaypipes/ghw/README.md +++ b/vendor/github.com/jaypipes/ghw/README.md @@ -1,13 +1,14 @@ -# `ghw` - Golang HardWare discovery/inspection library +# `ghw` - Go HardWare discovery/inspection library -[![Build Status](https://github.com/jaypipes/ghw/actions/workflows/go.yml/badge.svg?branch=main)](https://github.com/jaypipes/ghw/actions) +[![Go Reference](https://pkg.go.dev/badge/github.com/jaypipes/ghw.svg)](https://pkg.go.dev/github.com/jaypipes/ghw) [![Go Report Card](https://goreportcard.com/badge/github.com/jaypipes/ghw)](https://goreportcard.com/report/github.com/jaypipes/ghw) +[![Build Status](https://github.com/jaypipes/ghw/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/jaypipes/ghw/actions) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE_OF_CONDUCT.md) ![ghw mascot](images/ghw-gopher.png) -`ghw` is a small Golang library providing hardware inspection and discovery -for Linux and Windows. There currently exists partial support for MacOSX. +`ghw` is a Go library providing hardware inspection and discovery for Linux and +Windows. There currently exists partial support for MacOSX. ## Design Principles @@ -20,13 +21,12 @@ for Linux and Windows. There currently exists partial support for MacOSX. Elevated privileges are indeed required to query for some information, but `ghw` will never error out if blocked from reading that information. Instead, `ghw` will print a warning message about the information that could not be - retrieved. You may disable these warning messages with `GHW_DISABLE_WARNINGS` - environment variable. + retrieved. You may disable these warning messages with the + `GHW_DISABLE_WARNINGS` environment variable. * Well-documented code and plenty of example code - The code itself should be well-documented with lots of usage - examples. + The code itself should be well-documented with lots of usage examples. * Interfaces should be consistent across modules @@ -42,250 +42,158 @@ and **capabilities**. It is important to point out that `ghw` does **NOT** report information that is temporary or variable. It is **NOT** a system monitor nor is it an appropriate tool for gathering data points for metrics that change over time. If you are -looking for a system that tracks usage of CPU, memory, network I/O or disk I/O, -there are plenty of great open source tools that do this! Check out the +looking for a system that tracks **usage** of CPU, memory, network I/O or disk +I/O, there are plenty of great open source tools that do this! Check out the [Prometheus project](https://prometheus.io/) for a great example. ## Usage -You can use the functions in `ghw` to determine various hardware-related -information about the host computer: - -* [Memory](#memory) -* [CPU](#cpu) -* [Block storage](#block-storage) -* [Topology](#topology) -* [Network](#network) -* [PCI](#pci) -* [GPU](#gpu) -* [Chassis](#chassis) -* [BIOS](#bios) -* [Baseboard](#baseboard) -* [Product](#product) -* [YAML and JSON serialization](#serialization) +`ghw` has functions that return an `Info` object about a particular hardware +domain (e.g. CPU, Memory, Block storage, etc). + +Use the following functions in `ghw` to inspect information about the host +hardware: + +* [`ghw.CPU()`](#cpu) +* [`ghw.Memory()`](#memory) +* [`ghw.Block()`](#block-storage) (block storage) +* [`ghw.Topology()`](#topology) (processor architecture, NUMA topology and + memory cache hierarchy) +* [`ghw.Network()`](#network) +* [`ghw.PCI()`](#pci) +* [`ghw.GPU()`](#gpu) (graphical processing unit) +* [`ghw.Chassis()`](#chassis) +* [`ghw.BIOS()`](#bios) +* [`ghw.Baseboard()`](#baseboard) +* [`ghw.Product()`](#product) -### Overriding the root mountpoint `ghw` uses - -The default root mountpoint that `ghw` uses when looking for information about -the host system is `/`. So, for example, when looking up CPU information on a -Linux system, `ghw.CPU()` will use the path `/proc/cpuinfo`. - -If you are calling `ghw` from a system that has an alternate root mountpoint, -you can either set the `GHW_CHROOT` environment variable to that alternate -path, or call the module constructor function with the `ghw.WithChroot()` -modifier. - -For example, if you are executing from within an application container that has -bind-mounted the root host filesystem to the mount point `/host`, you would set -`GHW_CHROOT` to `/host` so that `ghw` can find `/proc/cpuinfo` at -`/host/proc/cpuinfo`. - -Alternately, you can use the `ghw.WithChroot()` function like so: - -```go -cpu, err := ghw.CPU(ghw.WithChroot("/host")) -``` - -### Overriding the per-mountpoint `ghw` uses - -When running inside containers, it could be a bit cumbersome to just override -the root mountpoint. Inside containers, when granting access to the host -file systems, is more common to bind-mount them in non standard location, -like `/sys` on `/host-sys` or `/proc` on `/host-proc`. -Is rarer to mount them in a common subtree (e.g. `/sys` on `/host/sys` and - `/proc` on /host/proc...) - -To better cover this use case, `ghw` allows to *programmatically* override -the initial component of filesystems subtrees, allowing to access `sysfs` -(or `procfs` or...) mounted on non-standard locations. - - -```go -cpu, err := ghw.CPU(ghw.WithPathOverrides(ghw.PathOverrides{ - "/proc": "/host-proc", - "/sys": "/host-sys", -})) -``` - -Please note -- this feature works in addition and is composable with the - `WithChroot`/`GHW_CHROOT` feature. -- `ghw` doesn't support yet environs variable to override individual - mountpoints, because this could lead to significant environs variables - proliferation. - -### Consuming snapshots - -You can make `ghw` read from snapshots (created with `ghw-snapshot`) using -environment variables or programmatically. -Please check `SNAPSHOT.md` to learn more about how ghw creates and consumes -snapshots. - -The environment variable `GHW_SNAPSHOT_PATH` let users specify a snapshot -that `ghw` will automatically consume. All the needed chroot changes will be -automatically performed. By default, the snapshot is unpacked on a temporary -directory managed by `ghw`, and cleaned up when no longer needed, avoiding -leftovers. - -The rest of the environment variables are relevant iff `GHW_SNAPSHOT_PATH` is given. -`GHW_SNAPSHOT_ROOT` let users specify the directory -on which the snapshot should be unpacked. This moves the ownership of that -directory from `ghw` to users. For this reason, `ghw` will *not* clean up automatically -the content unpacked in `GHW_SNAPSHOT_ROOT`. - -`GHW_SNAPSHOT_EXCLUSIVE` is relevant iff `GHW_SNAPSHOT_ROOT` is given. -Set it to any value to toggle it on. This tells `ghw` that the directory is meant -only to contain the given snapshot, thus `ghw` will *not* attempt to unpack it -(and will go ahead silently!) unless the directory is empty. -You can use both `GHW_SNAPSHOT_ROOT` and `GHW_SNAPSHOT_EXCLUSIVE` to make sure -`ghw` unpacks the snapshot only once regardless of how many `ghw` packages -(e.g. cpu, memory) access it. - -Set `GHW_SNAPSHOT_PRESERVE` to any value to enable it. If set, `ghw` will *not* -clean up the unpacked snapshot once done, leaving it into the system. +### CPU -```go -cpu, err := ghw.CPU(ghw.WithSnapshot(ghw.SnapshotOptions{ - Path: "/path/to/linux-amd64-d4771ed3300339bc75f856be09fc6430.tar.gz", -})) +The `ghw.CPU()` function returns a `ghw.CPUInfo` struct that contains +information about the CPUs on the host system. +`ghw.CPUInfo` contains the following fields: -myRoot := "/my/safe/directory" -cpu, err := ghw.CPU(ghw.WithSnapshot(ghw.SnapshotOptions{ - Path: "/path/to/linux-amd64-d4771ed3300339bc75f856be09fc6430.tar.gz", - Root: &myRoot, -})) +* `ghw.CPUInfo.TotalCores` has the total number of physical cores the host + system contains +* `ghw.CPUInfo.TotalThreads` has the total number of hardware threads the + host system contains +* `ghw.CPUInfo.Processors` is an array of `ghw.Processor` structs, one for each + physical processor package contained in the host -myOtherRoot := "/my/other/safe/directory" -cpu, err := ghw.CPU(ghw.WithSnapshot(ghw.SnapshotOptions{ - Path: "/path/to/linux-amd64-d4771ed3300339bc75f856be09fc6430.tar.gz", - Root: &myOtherRoot, - Exclusive: true, -})) -``` +Each `ghw.Processor` struct contains a number of fields: -### Creating snapshots +* `ghw.Processor.ID` is the physical processor `uint32` ID according to the + system +* `ghw.Processor.NumCores` is the number of physical cores in the processor + package +* `ghw.Processor.NumThreads` is the number of hardware threads in the processor + package +* `ghw.Processor.Vendor` is a string containing the vendor name +* `ghw.Processor.Model` is a string containing the vendor's model name +* `ghw.Processor.Capabilities` (Linux only) is an array of strings indicating + the features the processor has enabled +* `ghw.Processor.Cores` (Linux only) is an array of `ghw.ProcessorCore` structs + that are packed onto this physical processor -You can create ghw snapshots in two ways. -You can just consume the `ghw-snapshot` tool, or you can create them programmatically -from your golang code. We explore now the latter case. +A `ghw.ProcessorCore` has the following fields: -Snapshotting takes two phases: -1. clone the relevant pseudofiles/pseudodirectories into a temporary tree - This tree is usually deleted once the packing is successful. -2. pack the cloned tree into a tar.gz +* `ghw.ProcessorCore.ID` is the `uint32` identifier that the host gave this + core. Note that this does *not* necessarily equate to a zero-based index of + the core within a physical package. For example, the core IDs for an Intel Core + i7 are 0, 1, 2, 8, 9, and 10 +* `ghw.ProcessorCore.NumThreads` is the number of hardware threads associated + with the core +* `ghw.ProcessorCore.LogicalProcessors` is an array of ints representing the + logical processor IDs assigned to any processing unit for the core. These are + sometimes called the "thread siblings". Logical processor IDs are the + *zero-based* index of the processor on the host and are *not* related to the + core ID. ```go +package main import ( "fmt" - "io/ioutil" - "os" + "math" + "strings" - "github.com/jaypipes/ghw/pkg/snapshot" + "github.com/jaypipes/ghw" ) -// ... - -scratchDir, err := ioutil.TempDir("", "ghw-snapshot-*") -if err != nil { - fmt.Printf("Error creating clone directory: %v", err) -} -defer os.RemoveAll(scratchDir) - -// this step clones all the files and directories ghw cares about -if err := snapshot.CloneTreeInto(scratchDir); err != nil { - fmt.Printf("error cloning into %q: %v", scratchDir, err) -} - -// optionally, you may add extra content into your snapshot. -// ghw will ignore the extra content. -// Glob patterns like `filepath.Glob` are supported. -fileSpecs := []string{ - "/proc/cmdline", -} +func main() { + cpu, err := ghw.CPU() + if err != nil { + fmt.Printf("Error getting CPU info: %v", err) + } -// options allows the client code to optionally deference symlinks, or copy -// them into the cloned tree as symlinks -var opts *snapshot.CopyFileOptions -if err := snapshot.CopyFilesInto(fileSpecs, scratchDir, opts); err != nil { - fmt.Printf("error cloning extra files into %q: %v", scratchDir, err) -} + fmt.Printf("%v\n", cpu) -// automates the creation of the gzipped tarball out of the given tree. -if err := snapshot.PackFrom("my-snapshot.tgz", scratchDir); err != nil { - fmt.Printf("error packing %q into %q: %v", scratchDir, *output, err) + for _, proc := range cpu.Processors { + fmt.Printf(" %v\n", proc) + for _, core := range proc.Cores { + fmt.Printf(" %v\n", core) + } + if len(proc.Capabilities) > 0 { + // pretty-print the (large) block of capability strings into rows + // of 6 capability strings + rows := int(math.Ceil(float64(len(proc.Capabilities)) / float64(6))) + for row := 1; row < rows; row = row + 1 { + rowStart := (row * 6) - 1 + rowEnd := int(math.Min(float64(rowStart+6), float64(len(proc.Capabilities)))) + rowElems := proc.Capabilities[rowStart:rowEnd] + capStr := strings.Join(rowElems, " ") + if row == 1 { + fmt.Printf(" capabilities: [%s\n", capStr) + } else if rowEnd < len(proc.Capabilities) { + fmt.Printf(" %s\n", capStr) + } else { + fmt.Printf(" %s]\n", capStr) + } + } + } + } } ``` -### Disabling warning messages - -When `ghw` isn't able to retrieve some information, it may print certain -warning messages to `stderr`. To disable these warnings, simply set the -`GHW_DISABLE_WARNINGS` environs variable: - -``` -$ ghwc memory -WARNING: -Could not determine total physical bytes of memory. This may -be due to the host being a virtual machine or container with no -/var/log/syslog file, or the current user may not have necessary -privileges to read the syslog. We are falling back to setting the -total physical amount of memory to the total usable amount of memory -memory (24GB physical, 24GB usable) -``` +Example output from my personal workstation: ``` -$ GHW_DISABLE_WARNINGS=1 ghwc memory -memory (24GB physical, 24GB usable) -``` - -You can disable warning programmatically using the `WithDisableWarnings` option: - -```go - -import ( - "github.com/jaypipes/ghw" -) - -mem, err := ghw.Memory(ghw.WithDisableWarnings()) +cpu (1 physical package, 6 cores, 12 hardware threads) + physical package #0 (6 cores, 12 hardware threads) + processor core #0 (2 threads), logical processors [0 6] + processor core #1 (2 threads), logical processors [1 7] + processor core #2 (2 threads), logical processors [2 8] + processor core #3 (2 threads), logical processors [3 9] + processor core #4 (2 threads), logical processors [4 10] + processor core #5 (2 threads), logical processors [5 11] + capabilities: [msr pae mce cx8 apic sep + mtrr pge mca cmov pat pse36 + clflush dts acpi mmx fxsr sse + sse2 ss ht tm pbe syscall + nx pdpe1gb rdtscp lm constant_tsc arch_perfmon + pebs bts rep_good nopl xtopology nonstop_tsc + cpuid aperfmperf pni pclmulqdq dtes64 monitor + ds_cpl vmx est tm2 ssse3 cx16 + xtpr pdcm pcid sse4_1 sse4_2 popcnt + aes lahf_lm pti retpoline tpr_shadow vnmi + flexpriority ept vpid dtherm ida arat] ``` -`WithDisableWarnings` is a alias for the `WithNullAlerter` option, which in turn -leverages the more general `Alerter` feature of ghw. - -You may supply a `Alerter` to ghw to redirect all the warnings there, like -logger objects (see for example golang's stdlib `log.Logger`). -`Alerter` is in fact the minimal logging interface `ghw needs. -To learn more, please check the `option.Alerter` interface and the `ghw.WithAlerter()` -function. - ### Memory -The basic building block of the memory support in ghw is the `ghw.MemoryArea` struct. -A "memory area" is a block of memory which share common properties. In the simplest -case, the whole system memory fits in a single memory area; in more complex scenarios, -like multi-NUMA systems, many memory areas may be present in the system (e.g. one for -each NUMA cell). +The `ghw.Memory()` function returns a `ghw.MemoryInfo` struct that contains +information about the RAM on the host system. -The `ghw.MemoryArea` struct contains the following fields: +`ghw.MemoryInfo` contains the following fields: * `ghw.MemoryInfo.TotalPhysicalBytes` contains the amount of physical memory on the host * `ghw.MemoryInfo.TotalUsableBytes` contains the amount of memory the system can actually use. Usable memory accounts for things like the kernel's - resident memory size and some reserved system bits - -Information about the host computer's memory can be retrieved using the -`ghw.Memory()` function which returns a pointer to a `ghw.MemoryInfo` struct. -`ghw.MemoryInfo` is a superset of `ghw.MemoryArea`. Thus, it contains all the -fields found in the `ghw.MemoryArea` (replicated for clarity) plus some: - -* `ghw.MemoryInfo.TotalPhysicalBytes` contains the amount of physical memory on - the host -* `ghw.MemoryInfo.TotalUsableBytes` contains the amount of memory the - system can actually use. Usable memory accounts for things like the kernel's - resident memory size and some reserved system bits + resident memory size and some reserved system bits. Please note this value is + **NOT** the amount of memory currently in use by processes in the system. See + [the discussion][#physical-versus-usage-memory] about the difference. * `ghw.MemoryInfo.SupportedPageSizes` is an array of integers representing the size, in bytes, of memory pages the system supports * `ghw.MemoryInfo.Modules` is an array of pointers to `ghw.MemoryModule` @@ -373,153 +281,43 @@ system with a Linux GRUB bootloader: The bootloader consumes 3832720 bytes of RAM ``` -### CPU - -The `ghw.CPU()` function returns a `ghw.CPUInfo` struct that contains -information about the CPUs on the host system. - -`ghw.CPUInfo` contains the following fields: - -* `ghw.CPUInfo.TotalCores` has the total number of physical cores the host - system contains -* `ghw.CPUInfo.TotalThreads` has the total number of hardware threads the - host system contains -* `ghw.CPUInfo.Processors` is an array of `ghw.Processor` structs, one for each - physical processor package contained in the host - -Each `ghw.Processor` struct contains a number of fields: - -* `ghw.Processor.ID` is the physical processor `uint32` ID according to the - system -* `ghw.Processor.NumCores` is the number of physical cores in the processor - package -* `ghw.Processor.NumThreads` is the number of hardware threads in the processor - package -* `ghw.Processor.Vendor` is a string containing the vendor name -* `ghw.Processor.Model` is a string containing the vendor's model name -* `ghw.Processor.Capabilities` is an array of strings indicating the features - the processor has enabled -* `ghw.Processor.Cores` is an array of `ghw.ProcessorCore` structs that are - packed onto this physical processor - -A `ghw.ProcessorCore` has the following fields: - -* `ghw.ProcessorCore.ID` is the `uint32` identifier that the host gave this - core. Note that this does *not* necessarily equate to a zero-based index of - the core within a physical package. For example, the core IDs for an Intel Core - i7 are 0, 1, 2, 8, 9, and 10 -* `ghw.ProcessorCore.NumThreads` is the number of hardware threads associated - with the core -* `ghw.ProcessorCore.LogicalProcessors` is an array of ints representing the - logical processor IDs assigned to any processing unit for the core. These are - sometimes called the "thread siblings". Logical processor IDs are the - *zero-based* index of the processor on the host and are *not* related to the - core ID. - -```go -package main - -import ( - "fmt" - "math" - "strings" - - "github.com/jaypipes/ghw" -) - -func main() { - cpu, err := ghw.CPU() - if err != nil { - fmt.Printf("Error getting CPU info: %v", err) - } - - fmt.Printf("%v\n", cpu) - - for _, proc := range cpu.Processors { - fmt.Printf(" %v\n", proc) - for _, core := range proc.Cores { - fmt.Printf(" %v\n", core) - } - if len(proc.Capabilities) > 0 { - // pretty-print the (large) block of capability strings into rows - // of 6 capability strings - rows := int(math.Ceil(float64(len(proc.Capabilities)) / float64(6))) - for row := 1; row < rows; row = row + 1 { - rowStart := (row * 6) - 1 - rowEnd := int(math.Min(float64(rowStart+6), float64(len(proc.Capabilities)))) - rowElems := proc.Capabilities[rowStart:rowEnd] - capStr := strings.Join(rowElems, " ") - if row == 1 { - fmt.Printf(" capabilities: [%s\n", capStr) - } else if rowEnd < len(proc.Capabilities) { - fmt.Printf(" %s\n", capStr) - } else { - fmt.Printf(" %s]\n", capStr) - } - } - } - } -} -``` - -Example output from my personal workstation: - -``` -cpu (1 physical package, 6 cores, 12 hardware threads) - physical package #0 (6 cores, 12 hardware threads) - processor core #0 (2 threads), logical processors [0 6] - processor core #1 (2 threads), logical processors [1 7] - processor core #2 (2 threads), logical processors [2 8] - processor core #3 (2 threads), logical processors [3 9] - processor core #4 (2 threads), logical processors [4 10] - processor core #5 (2 threads), logical processors [5 11] - capabilities: [msr pae mce cx8 apic sep - mtrr pge mca cmov pat pse36 - clflush dts acpi mmx fxsr sse - sse2 ss ht tm pbe syscall - nx pdpe1gb rdtscp lm constant_tsc arch_perfmon - pebs bts rep_good nopl xtopology nonstop_tsc - cpuid aperfmperf pni pclmulqdq dtes64 monitor - ds_cpl vmx est tm2 ssse3 cx16 - xtpr pdcm pcid sse4_1 sse4_2 popcnt - aes lahf_lm pti retpoline tpr_shadow vnmi - flexpriority ept vpid dtherm ida arat] -``` - ### Block storage -Information about the host computer's local block storage is returned from the -`ghw.Block()` function. This function returns a pointer to a `ghw.BlockInfo` -struct. +The `ghw.Block()` function returns a `ghw.BlockInfo` struct that contains +information about the block storage on the host system. -The `ghw.BlockInfo` struct contains two fields: +`ghw.BlockInfo` contains the following fields: -* `ghw.BlockInfo.TotalPhysicalBytes` contains the amount of physical block - storage on the host +* `ghw.BlockInfo.TotalSizeBytes` contains the amount of physical block storage + on the host. * `ghw.BlockInfo.Disks` is an array of pointers to `ghw.Disk` structs, one for - each disk drive found by the system + each disk found by the system Each `ghw.Disk` struct contains the following fields: * `ghw.Disk.Name` contains a string with the short name of the disk, e.g. "sda" * `ghw.Disk.SizeBytes` contains the amount of storage the disk provides * `ghw.Disk.PhysicalBlockSizeBytes` contains the size of the physical blocks - used on the disk, in bytes + used on the disk, in bytes. This is typically the minimum amount of data that + will be written in a single write operation for the disk. * `ghw.Disk.IsRemovable` contains a boolean indicating if the disk drive is removable * `ghw.Disk.DriveType` is the type of drive. It is of type `ghw.DriveType` which has a `ghw.DriveType.String()` method that can be called to return a - string representation of the bus. This string will be "HDD", "FDD", "ODD", - or "SSD", which correspond to a hard disk drive (rotational), floppy drive, + string representation of the bus. This string will be `HDD`, `FDD`, `ODD`, + or `SSD`, which correspond to a hard disk drive (rotational), floppy drive, optical (CD/DVD) drive and solid-state drive. -* `ghw.Disk.StorageController` is the type of storage controller/drive. It is - of type `ghw.StorageController` which has a `ghw.StorageController.String()` - method that can be called to return a string representation of the bus. This - string will be "SCSI", "IDE", "virtio", "MMC", or "NVMe" -* `ghw.Disk.NUMANodeID` is the numeric index of the NUMA node this disk is - local to, or -1 +* `ghw.Disk.StorageController` is the type of storage controller. It is of type + `ghw.StorageController` which has a `ghw.StorageController.String()` method + that can be called to return a string representation of the bus. This string + will be `SCSI`, `IDE`, `virtio`, `MMC`, or `NVMe` +* `ghw.Disk.BusPath` (Linux, Darwin only) is the filepath to the bus used by + the disk. +* `ghw.Disk.NUMANodeID` (Linux only) is the numeric index of the NUMA node this + disk is local to, or -1 if the host system is not a NUMA system or is not + Linux. * `ghw.Disk.Vendor` contains a string with the name of the hardware vendor for - the disk drive + the disk * `ghw.Disk.Model` contains a string with the vendor-assigned disk model name * `ghw.Disk.SerialNumber` contains a string with the disk's serial number * `ghw.Disk.WWN` contains a string with the disk's @@ -530,27 +328,27 @@ Each `ghw.Disk` struct contains the following fields: Each `ghw.Partition` struct contains these fields: * `ghw.Partition.Name` contains a string with the short name of the partition, - e.g. "sda1" + e.g. `sda1` * `ghw.Partition.Label` contains the label for the partition itself. On Linux - systems, this is derived from the `ID_PART_ENTRY_NAME` udev entry for the - partition. + systems, this is derived from the `ID_PART_ENTRY_NAME` [udev][udev] entry for + the partition. * `ghw.Partition.FilesystemLabel` contains the label for the filesystem housed on the partition. On Linux systems, this is derived from the `ID_FS_NAME` - udev entry for the partition. + [udev][udev] entry for the partition. * `ghw.Partition.SizeBytes` contains the amount of storage the partition provides * `ghw.Partition.MountPoint` contains a string with the partition's mount - point, or "" if no mount point was discovered + point, or `""` if no mount point was discovered * `ghw.Partition.Type` contains a string indicated the filesystem type for the - partition, or "" if the system could not determine the type + partition, or `""` if the system could not determine the type * `ghw.Partition.IsReadOnly` is a bool indicating the partition is read-only * `ghw.Partition.Disk` is a pointer to the `ghw.Disk` object associated with - the partition. This will be `nil` if the `ghw.Partition` struct was returned - by the `ghw.DiskPartitions()` library function. + the partition. * `ghw.Partition.UUID` is a string containing the partition UUID on Linux, the - partition UUID on MacOS and nothing on Windows. On Linux - systems, this is derived from the `ID_PART_ENTRY_UUID` udev entry for the - partition. + partition UUID on MacOS and nothing on Windows. On Linux systems, this is + derived from the `ID_PART_ENTRY_UUID` [udev][udev] entry for the partition. + +[udev]: https://en.wikipedia.org/wiki/Udev ```go package main @@ -591,7 +389,7 @@ block storage (1 disk, 2TB physical storage) /dev/sda6 (2TB) [ext4] mounted@/ ``` -> Note that `ghw` looks in the udev runtime database for some information. If +> **NOTE**: `ghw` looks in the udev runtime database for some information. If > you are using `ghw` in a container, remember to bind mount `/dev/disk` and > `/run` into your container, otherwise `ghw` won't be able to query the udev > DB or sysfs paths for information. @@ -601,9 +399,9 @@ block storage (1 disk, 2TB physical storage) > **NOTE**: Topology support is currently Linux-only. Windows support is > [planned](https://github.com/jaypipes/ghw/issues/166). -Information about the host computer's architecture (NUMA vs. SMP), the host's -node layout and processor caches can be retrieved from the `ghw.Topology()` -function. This function returns a pointer to a `ghw.TopologyInfo` struct. +The `ghw.Topology()` function returns a `ghw.TopologyInfo` struct that contains +information about the host computer's architecture (NUMA vs. SMP), the host's +NUMA node layout and processor-specific memory caches. The `ghw.TopologyInfo` struct contains two fields: @@ -616,6 +414,8 @@ The `ghw.TopologyInfo` struct contains two fields: Each `ghw.TopologyNode` struct contains the following fields: * `ghw.TopologyNode.ID` is the system's `uint32` identifier for the node +* `ghw.TopologyNode.Memory` is a `ghw.MemoryArea` struct describing the memory + attached to this node. * `ghw.TopologyNode.Cores` is an array of pointers to `ghw.ProcessorCore` structs that are contained in this node * `ghw.TopologyNode.Caches` is an array of pointers to `ghw.MemoryCache` structs that @@ -623,8 +423,25 @@ Each `ghw.TopologyNode` struct contains the following fields: system * `ghw.TopologyNode.Distance` is an array of distances between NUMA nodes as reported by the system. -* `ghw.TopologyNode.Memory` is a struct describing the memory attached to this node. - Please refer to the documentation of `ghw.MemoryArea`. + +`ghw.MemoryArea` describes a collection of *physical* RAM on the host. + +In the simplest and most common case, all system memory fits in a single memory +area. In more complex host systems, like [NUMA systems][numa], many memory +areas may be present in the host system (e.g. one for each NUMA cell). + +[numa]: https://en.wikipedia.org/wiki/Non-uniform_memory_access + +The `ghw.MemoryArea` struct contains the following fields: + +* `ghw.MemoryArea.TotalPhysicalBytes` contains the amount of physical memory + associated with this memory area. +* `ghw.MemoryArea.TotalUsableBytes` contains the amount of memory of this + memory area the system can actually use. Usable memory accounts for things + like the kernel's resident memory size and some reserved system bits. Please + note this value is **NOT** the amount of memory currently in use by processes + in the system. See [the discussion][#physical-versus-usage-memory] about + the difference. See above in the [CPU](#cpu) section for information about the `ghw.ProcessorCore` struct and how to use and query it. @@ -635,7 +452,8 @@ Each `ghw.MemoryCache` struct contains the following fields: `ghw.INSTRUCTION` or `ghw.UNIFIED` depending on whether the cache stores CPU instructions, program data, or both * `ghw.MemoryCache.Level` is a positive integer indicating how close the cache - is to the processor + is to the processor. The lower the number, the closer the cache is to the + processor and the faster the processor can access its contents * `ghw.MemoryCache.SizeBytes` is an integer containing the number of bytes the cache can contain * `ghw.MemoryCache.LogicalProcessors` is an array of integers representing the @@ -695,9 +513,8 @@ topology SMP (1 nodes) ### Network -Information about the host computer's networking hardware is returned from the -`ghw.Network()` function. This function returns a pointer to a -`ghw.NetworkInfo` struct. +The `ghw.Network()` function returns a `ghw.NetworkInfo` struct that contains +information about the host computer's networking hardware. The `ghw.NetworkInfo` struct contains one field: @@ -707,30 +524,32 @@ The `ghw.NetworkInfo` struct contains one field: Each `ghw.NIC` struct contains the following fields: * `ghw.NIC.Name` is the system's identifier for the NIC -* `ghw.NIC.MacAddress` is the MAC address for the NIC, if any +* `ghw.NIC.MACAddress` is the Media Access Control (MAC) address for the NIC, + if any * `ghw.NIC.IsVirtual` is a boolean indicating if the NIC is a virtualized device -* `ghw.NIC.Capabilities` is an array of pointers to `ghw.NICCapability` structs - that can describe the things the NIC supports. These capabilities match the - returned values from the `ethtool -k ` call on Linux as well as the - AutoNegotiation and PauseFrameUse capabilities from `ethtool`. -* `ghw.NIC.PCIAddress` is the PCI device address of the device backing the NIC. - this is not-nil only if the backing device is indeed a PCI device; more backing - devices (e.g. USB) will be added in future versions. -* `ghw.NIC.Speed` is a string showing the current link speed. On Linux, this - field will be present even if `ethtool` is not available. -* `ghw.NIC.Duplex` is a string showing the current link duplex. On Linux, this - field will be present even if `ethtool` is not available. -* `ghw.NIC.SupportedLinkModes` is a string slice containing a list of - supported link modes -* `ghw.NIC.SupportedPorts` is a string slice containing the list of - supported port types (MII, TP, FIBRE) -* `ghw.NIC.SupportedFECModes` is a string slice containing a list of - supported FEC Modes. -* `ghw.NIC.AdvertisedLinkModes` is a string slice containing the +* `ghw.NIC.Capabilities` (Linux only) is an array of pointers to + `ghw.NICCapability` structs that can describe the things the NIC supports. + These capabilities match the returned values from the `ethtool -k ` + call on Linux as well as the AutoNegotiation and PauseFrameUse capabilities + from `ethtool`. +* `ghw.NIC.PCIAddress` (Linux only) is the PCI device address of the device + backing the NIC. this is not-nil only if the backing device is indeed a PCI + device; more backing devices (e.g. USB) will be added in future versions. +* `ghw.NIC.Speed` (Linux only) is a string showing the current link speed. On + Linux, this field will be present even if `ethtool` is not available. +* `ghw.NIC.Duplex` (Linux only) is a string showing the current link duplex. On + Linux, this field will be present even if `ethtool` is not available. +* `ghw.NIC.SupportedLinkModes` (Linux only) is a string slice containing a list + of supported link modes, e.g. "10baseT/Half", "1000baseT/Full". +* `ghw.NIC.SupportedPorts` (Linux only) is a string slice containing the list + of supported port types, e.g. "MII", "TP", "FIBRE", "Twisted Pair". +* `ghw.NIC.SupportedFECModes` (Linux only) is a string slice containing a list + of supported Forward Error Correction (FEC) Modes. +* `ghw.NIC.AdvertisedLinkModes` (Linux only) is a string slice containing the link modes being advertised during auto negotiation. -* `ghw.NIC.AdvertisedFECModes` is a string slice containing the FEC - modes advertised during auto negotiation. +* `ghw.NIC.AdvertisedFECModes` (Linux only) is a string slice containing the + Forward Error Correction (FEC) modes advertised during auto negotiation. The `ghw.NICCapability` struct contains the following fields: @@ -837,35 +656,22 @@ developers to not only gather information about devices on a local PCI bus but also query for information about hardware device classes, vendor and product information. -**NOTE**: Parsing of the PCI-IDS file database is provided by the separate -[github.com/jaypipes/pcidb library](http://github.com/jaypipes/pcidb). You can -read that library's README for more information about the various structs that -are exposed on the `ghw.PCIInfo` struct. +> **NOTE**: Parsing of the PCI-IDS file database is provided by the separate +> [github.com/jaypipes/pcidb library](http://github.com/jaypipes/pcidb). You +> can read that library's README for more information about the various structs +> that are exposed on the `ghw.PCIInfo` struct. + +The `ghw.PCI()` function returns a `ghw.PCIInfo` struct that contains +information about the host computer's PCI devices. -The `ghw.PCI()` function returns a `ghw.PCIInfo` struct. The `ghw.PCIInfo` -struct contains a number of fields that may be queried for PCI information: +The `ghw.PCIInfo` struct contains one field: * `ghw.PCIInfo.Devices` is a slice of pointers to `ghw.PCIDevice` structs that describe the PCI devices on the host system -* `ghw.PCIInfo.Classes` is a map, keyed by the PCI class ID (a hex-encoded - string) of pointers to `pcidb.Class` structs, one for each class of PCI - device known to `ghw` - (**DEPRECATED**, will be removed in `ghw` `v1.0`. Use the - `github.com/jaypipes/pcidb` library for exploring PCI database information) -* `ghw.PCIInfo.Vendors` is a map, keyed by the PCI vendor ID (a hex-encoded - string) of pointers to `pcidb.Vendor` structs, one for each PCI vendor - known to `ghw` - (**DEPRECATED**, will be removed in `ghw` `v1.0`. Use the - `github.com/jaypipes/pcidb` library for exploring PCI database information) -* `ghw.PCIInfo.Products` is a map, keyed by the PCI product ID (a hex-encoded - string) of pointers to `pcidb.Product` structs, one for each PCI product - known to `ghw` - (**DEPRECATED**, will be removed in `ghw` `v1.0`. Use the - `github.com/jaypipes/pcidb` library for exploring PCI database information) - -**NOTE**: PCI products are often referred to by their "device ID". We use -the term "product ID" in `ghw` because it more accurately reflects what the -identifier is for: a specific product line produced by the vendor. + +> **NOTE**: PCI products are often referred to by their "device ID". We use the +> term "product ID" in `ghw` because it more accurately reflects what the +> identifier is for: a specific product line produced by the vendor. The `ghw.PCIDevice` struct has the following fields: @@ -884,9 +690,9 @@ The `ghw.PCIDevice` struct has the following fields: programming interface. This will always be non-nil. * `ghw.PCIDevice.Driver` is a string representing the device driver the system is using to handle this device. Can be empty string if this - information is not available. If the information is not available, - this doesn't mean at all the device is not functioning, but only the - fact `ghw` was not able to retrieve this information. + information is not available. If the information is not available, this does + not mean the device is not functioning, but rather that `ghw` was not able to + retrieve driver information. The `ghw.PCIAddress` (which is an alias for the `ghw.pci.address.Address` struct) contains the PCI address fields. It has a `ghw.PCIAddress.String()` @@ -904,19 +710,12 @@ The `ghw.PCIAddress` struct has the following fields: * `ghw.PCIAddress.Function` is a string representing the PCI function component of the address. -**NOTE**: Older versions (pre-`v0.9.0`) erroneously referred to the `Device` -field as the `Slot` field. As noted by [@pearsonk](https://github.com/pearsonk) -in [#220](https://github.com/jaypipes/ghw/issues/220), this was a misnomer. - -#### Finding a PCI device by PCI address - -In addition to the above information, the `ghw.PCIInfo` struct has the -following method: - -* `ghw.PCIInfo.GetDevice(address string)` +> **NOTE**: Older versions (pre-`v0.9.0`) erroneously referred to the `Device` +> field as the `Slot` field. As noted by [@pearsonk](https://github.com/pearsonk) +> in [#220](https://github.com/jaypipes/ghw/issues/220), this was a misnomer. -The following code snippet shows how to call the `ghw.PCIInfo.ListDevices()` -method and output a simple list of PCI address and vendor/product information: +The following code snippet shows how to list the PCI devices on the host system +and output a simple list of PCI address and vendor/product information: ```go package main @@ -1015,6 +814,13 @@ host PCI devices: 0000:3f:06.3 Intel Corporation Xeon 5600 Series Integrated Memory Co... ``` +#### Finding a PCI device by PCI address + +In addition to the above information, the `ghw.PCIInfo` struct has the +following method: + +* `ghw.PCIInfo.GetDevice(address string)` + The following code snippet shows how to call the `ghw.PCIInfo.GetDevice()` method and use its returned `ghw.PCIDevice` struct pointer: @@ -1081,9 +887,8 @@ Programming Interface: VGA controller [00] ### GPU -Information about the host computer's graphics hardware is returned from the -`ghw.GPU()` function. This function returns a pointer to a `ghw.GPUInfo` -struct. +The `ghw.GPU()` function returns a `ghw.GPUInfo` struct that contains +information about the host computer's graphics hardware. The `ghw.GPUInfo` struct contains one field: @@ -1142,20 +947,21 @@ subsystem ### Chassis -The host's chassis information is accessible with the `ghw.Chassis()` function. This -function returns a pointer to a `ghw.ChassisInfo` struct. +The `ghw.Chassis()` function returns a `ghw.ChassisInfo` struct that contains +information about the host computer's hardware chassis. The `ghw.ChassisInfo` struct contains multiple fields: * `ghw.ChassisInfo.AssetTag` is a string with the chassis asset tag * `ghw.ChassisInfo.SerialNumber` is a string with the chassis serial number * `ghw.ChassisInfo.Type` is a string with the chassis type *code* -* `ghw.ChassisInfo.TypeDescription` is a string with a description of the chassis type +* `ghw.ChassisInfo.TypeDescription` is a string with a description of the + chassis type * `ghw.ChassisInfo.Vendor` is a string with the chassis vendor * `ghw.ChassisInfo.Version` is a string with the chassis version -**NOTE**: These fields are often missing for non-server hardware. Don't be -surprised to see empty string or "None" values. +> **NOTE**: These fields are often missing for non-server hardware. Don't be +> surprised to see empty string or "None" values. ```go package main @@ -1182,10 +988,10 @@ Example output from my personal workstation: chassis type=Desktop vendor=System76 version=thelio-r1 ``` -**NOTE**: Some of the values such as serial numbers are shown as unknown because -the Linux kernel by default disallows access to those fields if you're not running -as root. They will be populated if it runs as root or otherwise you may see warnings -like the following: +> **NOTE**: Some of the values such as serial numbers are shown as unknown +> because the Linux kernel by default disallows access to those fields if +> you're not running as root. They will be populated if it runs as root or +> otherwise you may see warnings like the following: ``` WARNING: Unable to read chassis_serial: open /sys/class/dmi/id/chassis_serial: permission denied @@ -1196,8 +1002,8 @@ feature to quiet things down. ### BIOS -The host's basis input/output system (BIOS) information is accessible with the `ghw.BIOS()` function. This -function returns a pointer to a `ghw.BIOSInfo` struct. +The `ghw.BIOS()` function returns a `ghw.BIOSInfo` struct that contains +information about the host computer's basis input/output system (BIOS). The `ghw.BIOSInfo` struct contains multiple fields: @@ -1232,8 +1038,8 @@ bios vendor=System76 version=F2 Z5 date=11/14/2018 ### Baseboard -The host's baseboard information is accessible with the `ghw.Baseboard()` function. This -function returns a pointer to a `ghw.BaseboardInfo` struct. +The `ghw.Baseboard()` function returns a `ghw.BaseboardInfo` struct that +contains information about the host computer's hardware baseboard. The `ghw.BaseboardInfo` struct contains multiple fields: @@ -1244,8 +1050,8 @@ The `ghw.BaseboardInfo` struct contains multiple fields: Product on Windows * `ghw.BaseboardInfo.Version` is a string with the baseboard version -**NOTE**: These fields are often missing for non-server hardware. Don't be -surprised to see empty string or "None" values. +> **NOTE**: These fields are often missing for non-server hardware. Don't be +> surprised to see empty string or "None" values. ```go package main @@ -1272,10 +1078,10 @@ Example output from my personal workstation: baseboard vendor=System76 version=thelio-r1 ``` -**NOTE**: Some of the values such as serial numbers are shown as unknown because -the Linux kernel by default disallows access to those fields if you're not running -as root. They will be populated if it runs as root or otherwise you may see warnings -like the following: +> **NOTE**: Some of the values such as serial numbers are shown as unknown +> because the Linux kernel by default disallows access to those fields if +> you're not running as root. They will be populated if it runs as root or +> otherwise you may see warnings like the following: ``` WARNING: Unable to read board_serial: open /sys/class/dmi/id/board_serial: permission denied @@ -1286,8 +1092,8 @@ feature to quiet things down. ### Product -The host's product information is accessible with the `ghw.Product()` function. This -function returns a pointer to a `ghw.ProductInfo` struct. +The `ghw.Product()` function returns a `ghw.ProductInfo` struct that +contains information about the host computer's hardware product line. The `ghw.ProductInfo` struct contains multiple fields: @@ -1295,12 +1101,13 @@ The `ghw.ProductInfo` struct contains multiple fields: * `ghw.ProductInfo.Name` is a string with the product name * `ghw.ProductInfo.SerialNumber` is a string with the product serial number * `ghw.ProductInfo.UUID` is a string with the product UUID -* `ghw.ProductInfo.SKU` is a string with the product stock unit identifier (SKU) +* `ghw.ProductInfo.SKU` is a string with the product stock unit identifier + (SKU) * `ghw.ProductInfo.Vendor` is a string with the product vendor * `ghw.ProductInfo.Version` is a string with the product version -**NOTE**: These fields are often missing for non-server hardware. Don't be -surprised to see empty string, "Default string" or "None" values. +> **NOTE**: These fields are often missing for non-server hardware. Don't be +> surprised to see empty string, "Default string" or "None" values. ```go package main @@ -1327,10 +1134,10 @@ Example output from my personal workstation: product family=Default string name=Thelio vendor=System76 sku=Default string version=thelio-r1 ``` -**NOTE**: Some of the values such as serial numbers are shown as unknown because -the Linux kernel by default disallows access to those fields if you're not running -as root. They will be populated if it runs as root or otherwise you may see warnings -like the following: +> **NOTE**: Some of the values such as serial numbers are shown as unknown +> because the Linux kernel by default disallows access to those fields if +> you're not running as root. They will be populated if it runs as root or +> otherwise you may see warnings like the following: ``` WARNING: Unable to read product_serial: open /sys/class/dmi/id/product_serial: permission denied @@ -1339,7 +1146,73 @@ WARNING: Unable to read product_serial: open /sys/class/dmi/id/product_serial: p You can ignore them or use the [Disabling warning messages](#disabling-warning-messages) feature to quiet things down. -## Serialization +## Advanced Usage + +### Disabling warning messages + +When `ghw` isn't able to retrieve some information, it may print certain +warning messages to `stderr`. To disable these warnings, simply set the +`GHW_DISABLE_WARNINGS` environs variable: + +``` +$ ghwc memory +WARNING: +Could not determine total physical bytes of memory. This may +be due to the host being a virtual machine or container with no +/var/log/syslog file, or the current user may not have necessary +privileges to read the syslog. We are falling back to setting the +total physical amount of memory to the total usable amount of memory +memory (24GB physical, 24GB usable) +``` + +``` +$ GHW_DISABLE_WARNINGS=1 ghwc memory +memory (24GB physical, 24GB usable) +``` + +You can disable warning programmatically using the `WithDisableWarnings` option: + +```go + +import ( + "github.com/jaypipes/ghw" +) + +mem, err := ghw.Memory(ghw.WithDisableWarnings()) +``` + +`WithDisableWarnings` is a alias for the `WithNullAlerter` option, which in turn +leverages the more general `Alerter` feature of ghw. + +You may supply a `Alerter` to ghw to redirect all the warnings there, like +logger objects (see for example golang's stdlib `log.Logger`). +`Alerter` is in fact the minimal logging interface `ghw needs. +To learn more, please check the `option.Alerter` interface and the `ghw.WithAlerter()` +function. + +### Overriding the root mountpoint `ghw` uses + +When `ghw` looks for information about the host system, it considers `/` as its +root mountpoint. So, for example, when looking up CPU information on a Linux +system, `ghw.CPU()` will use the path `/proc/cpuinfo`. + +If you are calling `ghw` from a system that has an alternate root mountpoint, +you can either set the `GHW_CHROOT` environment variable to that alternate +path, or call one of the functions like `ghw.CPU()` or `ghw.Memory()` with the +`ghw.WithChroot()` modifier. + +For example, if you are executing from within an application container that has +bind-mounted the root host filesystem to the mount point `/host`, you would set +`GHW_CHROOT` to `/host` so that `ghw` can find `/proc/cpuinfo` at +`/host/proc/cpuinfo`. + +Alternately, you can use the `ghw.WithChroot()` function like so: + +```go +cpu, err := ghw.CPU(ghw.WithChroot("/host")) +``` + +### Serialization to JSON or YAML All of the `ghw` `XXXInfo` structs -- e.g. `ghw.CPUInfo` -- have two methods for producing a serialized JSON or YAML string representation of the contained @@ -1381,36 +1254,153 @@ memory: total_usable_bytes: 25263415296 ``` -## Calling external programs +### Overriding a specific mountpoint (Linux only) -By default ghw may call external programs, for example `ethtool`, to learn about hardware capabilities. -In some rare circumstances it may be useful to opt out from this behaviour and rely only on the data -provided by pseudo-filesystems, like sysfs. -The most common use case is when we want to consume a snapshot from ghw. In these cases the information -provided by tools will be most likely inconsistent with the data from the snapshot - they will run on -a different host! -To prevent ghw from calling external tools, set the environs variable `GHW_DISABLE_TOOLS` to any value, -or, programmatically, check the `WithDisableTools` function. -The default behaviour of ghw is to call external tools when available. - -**WARNING**: -- on all platforms, disabling external tools make ghw return less data. - Unless noted otherwise, there is _no fallback flow_ if external tools are disabled. -- on darwin, disabling external tools disable block support entirely +When running inside containers, it can be cumbersome to only override the root +mountpoint. Inside containers, when granting access to the host file systems, +it is common to bind-mount them to a non-standard location, like `/sys` on +`/host-sys` or `/proc` to `/host-proc`. It is rarer to mount them to a common +subtree (e.g. `/sys` to `/host/sys` and `/proc` to `/host/proc`...) -## Developers +To better cover this use case, `ghw.WithPathOverrides()` can be used to supply +a mapping of directories to mountpoints, like this example shows: -[Contributions](CONTRIBUTING.md) to `ghw` are welcomed! Fork the repo on GitHub -and submit a pull request with your proposed changes. Or, feel free to log an -issue for a feature request or bug report. +```go +cpu, err := ghw.CPU(ghw.WithPathOverrides(ghw.PathOverrides{ + "/proc": "/host-proc", + "/sys": "/host-sys", +})) +``` + +**NOTE**: This feature works in addition and is composable with the +`ghw.WithChroot()` function and `GHW_CHROOT` environment variable. + +### Reading hardware information from a `ghw` snapshot (Linux only) + +The `ghw-snapshot` tool can create a snapshot of a host's hardware information. + +Please read [`SNAPSHOT.md`](SNAPSHOT.md) to learn about creating snapshots with +the `ghw-snapshot` tool. + +You can make `ghw` read hardware information from a snapshot created with +`ghw-snapshot` using environment variables or programmatically. -### Running tests +Use the `GHW_SNAPSHOT_PATH` environment variable to specify the filepath to a +snapshot that `ghw` will read to determine hardware information. All the needed +chroot changes will be automatically performed. By default, the snapshot is +unpacked into a temporary directory managed by `ghw`. This temporary directory +is automatically deleted when `ghw` is finished reading the snapshot. -You can run unit tests easily using the `make test` command, like so: +Three other environment variables are relevant if and only if `GHW_SNAPSHOT_PATH` +is not empty: +* `GHW_SNAPSHOT_ROOT` let users specify the directory on which the snapshot + should be unpacked. This moves the ownership of that directory from `ghw` to + users. For this reason, `ghw` will *not* automatically clean up the content + unpacked into `GHW_SNAPSHOT_ROOT`. +* `GHW_SNAPSHOT_EXCLUSIVE` tells `ghw` that the directory is meant only to + contain the given snapshot, thus `ghw` will *not* attempt to unpack it unless + the directory is empty. You can use both `GHW_SNAPSHOT_ROOT` and + `GHW_SNAPSHOT_EXCLUSIVE` to make sure `ghw` unpacks the snapshot only once + regardless of how many `ghw` packages (e.g. cpu, memory) access it. Set the + value of this environment variable to any non-empty string. +* `GHW_SNAPSHOT_PRESERVE` tells `ghw` not to clean up the unpacked snapshot. + Set the value of this environment variable to any non-empty string. + +```go +cpu, err := ghw.CPU(ghw.WithSnapshot(ghw.SnapshotOptions{ + Path: "/path/to/linux-amd64-d4771ed3300339bc75f856be09fc6430.tar.gz", +})) + + +myRoot := "/my/safe/directory" +cpu, err := ghw.CPU(ghw.WithSnapshot(ghw.SnapshotOptions{ + Path: "/path/to/linux-amd64-d4771ed3300339bc75f856be09fc6430.tar.gz", + Root: &myRoot, +})) + +myOtherRoot := "/my/other/safe/directory" +cpu, err := ghw.CPU(ghw.WithSnapshot(ghw.SnapshotOptions{ + Path: "/path/to/linux-amd64-d4771ed3300339bc75f856be09fc6430.tar.gz", + Root: &myOtherRoot, + Exclusive: true, +})) ``` -[jaypipes@uberbox ghw]$ make test -go test github.com/jaypipes/ghw github.com/jaypipes/ghw/cmd/ghwc -ok github.com/jaypipes/ghw 0.084s -? github.com/jaypipes/ghw/cmd/ghwc [no test files] + +### Creating snapshots + +You can create `ghw` snapshots using the `ghw-snapshot` tool or +programmatically using the `pkg/snapshot` package. + +Below is an example of creating a `ghw` snapshot using the `pkg/snapshot` +package. + +```go + +import ( + "fmt" + "os" + + "github.com/jaypipes/ghw/pkg/snapshot" +) + +// ... + +scratchDir, err := os.MkdirTemp("", "ghw-snapshot-*") +if err != nil { + fmt.Printf("Error creating clone directory: %v", err) +} +defer os.RemoveAll(scratchDir) + +// this step clones all the files and directories ghw cares about +if err := snapshot.CloneTreeInto(scratchDir); err != nil { + fmt.Printf("error cloning into %q: %v", scratchDir, err) +} + +// optionally, you may add extra content into your snapshot. +// ghw will ignore the extra content. +// Glob patterns like `filepath.Glob` are supported. +fileSpecs := []string{ + "/proc/cmdline", +} + +// options allows the client code to optionally deference symlinks, or copy +// them into the cloned tree as symlinks +var opts *snapshot.CopyFileOptions +if err := snapshot.CopyFilesInto(fileSpecs, scratchDir, opts); err != nil { + fmt.Printf("error cloning extra files into %q: %v", scratchDir, err) +} + +// automates the creation of the gzipped tarball out of the given tree. +if err := snapshot.PackFrom("my-snapshot.tgz", scratchDir); err != nil { + fmt.Printf("error packing %q into %q: %v", scratchDir, *output, err) +} ``` + +## Calling external programs + +By default `ghw` may call external programs, for example `ethtool`, to learn +about hardware capabilities. In some rare circumstances it may be useful to +opt out from this behaviour and rely only on the data provided by +pseudo-filesystems, like sysfs. + +The most common use case is when we want to read a snapshot from `ghw`. In +these cases the information provided by tools will be inconsistent with the +data from the snapshot - since they will be run on a different host than the +host the snapshot was created for. + +To prevent `ghw` from calling external tools, set the `GHW_DISABLE_TOOLS` +environment variable to any value, or, programmatically, use the +`ghw.WithDisableTools()` function. The default behaviour of ghw is to call +external tools when available. + +> **WARNING**: on all platforms, disabling external tools make ghw return less +> data. Unless noted otherwise, there is _no fallback flow_ if external tools +> are disabled. On MacOSX/Darwin, disabling external tools disables block +> support entirely + +## Developers + +[Contributions](CONTRIBUTING.md) to `ghw` are welcomed! Fork the repo on GitHub +and submit a pull request with your proposed changes. Or, feel free to log an +issue for a feature request or bug report. diff --git a/vendor/github.com/jaypipes/ghw/SECURITY.md b/vendor/github.com/jaypipes/ghw/SECURITY.md new file mode 100644 index 00000000..660f2e63 --- /dev/null +++ b/vendor/github.com/jaypipes/ghw/SECURITY.md @@ -0,0 +1,23 @@ +# Security Policy + +We take security vulnerabilities seriously (and so should you!) + +Our policy on reported vulnerabilities (see below on how to report) is that we will +respond to the reporter of a vulnerability within two (2) business days of receiving +the report and notify the reporter whether and when a remediation will be committed. + +When a remediation for a security vulnerability is committed, we will cut a tagged +release of `ghw` and include in the release notes for that tagged release a description +of the vulnerability and a discussion of how it was remediated, along with a note +urging users to update to that fixed version. + +## Reporting a Vulnerability + +While `ghw` does have automated Github Dependabot alerts about security vulnerabilities +in `ghw`'s dependencies, there is always a chance that a vulnerability in a dependency +goes undetected by Dependabot. If you are aware of a vulnerability either in `ghw` or +one of its dependencies, please do not hesitate to reach out to `ghw` maintainers via +email or Slack. **Do not discuss vulnerabilities in a public forum**. + +`ghw`'s primary maintainer is Jay Pipes, who can be found on the Kubernetes Slack +community as `@jaypipes` and reached via email at jaypipes at gmail dot com. diff --git a/vendor/github.com/jaypipes/ghw/alias.go b/vendor/github.com/jaypipes/ghw/alias.go index 2e679a96..83157d00 100644 --- a/vendor/github.com/jaypipes/ghw/alias.go +++ b/vendor/github.com/jaypipes/ghw/alias.go @@ -47,13 +47,20 @@ var ( type MemoryArea = memory.Area type MemoryInfo = memory.Info +type MemoryCache = memory.Cache type MemoryCacheType = memory.CacheType type MemoryModule = memory.Module const ( - MEMORY_CACHE_TYPE_UNIFIED = memory.CACHE_TYPE_UNIFIED + MemoryCacheTypeUnified = memory.CacheTypeUnified + // DEPRECATED: Please use MemoryCacheTypeUnified + MEMORY_CACHE_TYPE_UNIFIED = memory.CACHE_TYPE_UNIFIED + MemoryCacheTypeInstruction = memory.CacheTypeInstruction + // DEPRECATED: Please use MemoryCacheTypeInstruction MEMORY_CACHE_TYPE_INSTRUCTION = memory.CACHE_TYPE_INSTRUCTION - MEMORY_CACHE_TYPE_DATA = memory.CACHE_TYPE_DATA + MemoryCacheTypeData = memory.CacheTypeData + // DEPRECATED: Please use MemoryCacheTypeData + MEMORY_CACHE_TYPE_DATA = memory.CACHE_TYPE_DATA ) var ( @@ -71,22 +78,44 @@ var ( type DriveType = block.DriveType const ( + DriveTypeUnknown = block.DriveTypeUnknown + // DEPRECATED: Please use DriveTypeUnknown DRIVE_TYPE_UNKNOWN = block.DRIVE_TYPE_UNKNOWN - DRIVE_TYPE_HDD = block.DRIVE_TYPE_HDD - DRIVE_TYPE_FDD = block.DRIVE_TYPE_FDD - DRIVE_TYPE_ODD = block.DRIVE_TYPE_ODD - DRIVE_TYPE_SSD = block.DRIVE_TYPE_SSD + DriveTypeHDD = block.DriveTypeHDD + // DEPRECATED: Please use DriveTypeHDD + DRIVE_TYPE_HDD = block.DRIVE_TYPE_HDD + DriveTypeFDD = block.DriveTypeFDD + // DEPRECATED: Please use DriveTypeFDD + DRIVE_TYPE_FDD = block.DRIVE_TYPE_FDD + DriveTypeODD = block.DriveTypeODD + // DEPRECATED: Please use DriveTypeODD + DRIVE_TYPE_ODD = block.DRIVE_TYPE_ODD + DriveTypeSSD = block.DriveTypeSSD + // DEPRECATED: Please use DriveTypeSSD + DRIVE_TYPE_SSD = block.DRIVE_TYPE_SSD ) type StorageController = block.StorageController const ( + StorageControllerUnknown = block.StorageControllerUnknown + // DEPRECATED: Please use StorageControllerUnknown STORAGE_CONTROLLER_UNKNOWN = block.STORAGE_CONTROLLER_UNKNOWN - STORAGE_CONTROLLER_IDE = block.STORAGE_CONTROLLER_IDE - STORAGE_CONTROLLER_SCSI = block.STORAGE_CONTROLLER_SCSI - STORAGE_CONTROLLER_NVME = block.STORAGE_CONTROLLER_NVME - STORAGE_CONTROLLER_VIRTIO = block.STORAGE_CONTROLLER_VIRTIO - STORAGE_CONTROLLER_MMC = block.STORAGE_CONTROLLER_MMC + StorageControllerIDE = block.StorageControllerIDE + // DEPRECATED: Please use StorageControllerIDE + STORAGE_CONTROLLER_IDE = block.STORAGE_CONTROLLER_IDE + StorageControllerSCSI = block.StorageControllerSCSI + // DEPRECATED: Please use StorageControllerSCSI + STORAGE_CONTROLLER_SCSI = block.STORAGE_CONTROLLER_SCSI + StorageControllerNVMe = block.StorageControllerNVMe + // DEPRECATED: Please use StorageControllerNVMe + STORAGE_CONTROLLER_NVME = block.STORAGE_CONTROLLER_NVME + StorageControllerVirtIO = block.StorageControllerVirtIO + // DEPRECATED: Please use StorageControllerVirtIO + STORAGE_CONTROLLER_VIRTIO = block.STORAGE_CONTROLLER_VIRTIO + StorageControllerMMC = block.StorageControllerMMC + // DEPRECATED: Please use StorageControllerMMC + STORAGE_CONTROLLER_MMC = block.STORAGE_CONTROLLER_MMC ) type NetworkInfo = net.Info diff --git a/vendor/github.com/jaypipes/ghw/doc.go b/vendor/github.com/jaypipes/ghw/doc.go index 6722cda7..826ed7b3 100644 --- a/vendor/github.com/jaypipes/ghw/doc.go +++ b/vendor/github.com/jaypipes/ghw/doc.go @@ -5,310 +5,10 @@ // /* -package ghw can determine various hardware-related -information about the host computer: +package ghw discovers hardware-related information about the host computer, +including CPU, memory, block storage, NUMA topology, network devices, PCI, GPU, +and baseboard/BIOS/chassis/product information. -* Memory -* CPU -* Block storage -* Topology -* Network -* PCI -* GPU - -# Memory - -Information about the host computer's memory can be retrieved using the -Memory function which returns a pointer to a MemoryInfo struct. - - package main - - import ( - "fmt" - - "github.com/jaypipes/ghw" - ) - - func main() { - memory, err := ghw.Memory() - if err != nil { - fmt.Printf("Error getting memory info: %v", err) - } - - fmt.Println(memory.String()) - } - -# CPU - -The CPU function returns a CPUInfo struct that contains information about -the CPUs on the host system. - - package main - - import ( - "fmt" - "math" - "strings" - - "github.com/jaypipes/ghw" - ) - - func main() { - cpu, err := ghw.CPU() - if err != nil { - fmt.Printf("Error getting CPU info: %v", err) - } - - fmt.Printf("%v\n", cpu) - - for _, proc := range cpu.Processors { - fmt.Printf(" %v\n", proc) - for _, core := range proc.Cores { - fmt.Printf(" %v\n", core) - } - if len(proc.Capabilities) > 0 { - // pretty-print the (large) block of capability strings into rows - // of 6 capability strings - rows := int(math.Ceil(float64(len(proc.Capabilities)) / float64(6))) - for row := 1; row < rows; row = row + 1 { - rowStart := (row * 6) - 1 - rowEnd := int(math.Min(float64(rowStart+6), float64(len(proc.Capabilities)))) - rowElems := proc.Capabilities[rowStart:rowEnd] - capStr := strings.Join(rowElems, " ") - if row == 1 { - fmt.Printf(" capabilities: [%s\n", capStr) - } else if rowEnd < len(proc.Capabilities) { - fmt.Printf(" %s\n", capStr) - } else { - fmt.Printf(" %s]\n", capStr) - } - } - } - } - } - -# Block storage - -Information about the host computer's local block storage is returned from -the Block function. This function returns a pointer to a BlockInfo struct. - - package main - - import ( - "fmt" - - "github.com/jaypipes/ghw" - ) - - func main() { - block, err := ghw.Block() - if err != nil { - fmt.Printf("Error getting block storage info: %v", err) - } - - fmt.Printf("%v\n", block) - - for _, disk := range block.Disks { - fmt.Printf(" %v\n", disk) - for _, part := range disk.Partitions { - fmt.Printf(" %v\n", part) - } - } - } - -# Topology - -Information about the host computer's architecture (NUMA vs. SMP), the -host's node layout and processor caches can be retrieved from the Topology -function. This function returns a pointer to a TopologyInfo struct. - - package main - - import ( - "fmt" - - "github.com/jaypipes/ghw" - ) - - func main() { - topology, err := ghw.Topology() - if err != nil { - fmt.Printf("Error getting topology info: %v", err) - } - - fmt.Printf("%v\n", topology) - - for _, node := range topology.Nodes { - fmt.Printf(" %v\n", node) - for _, cache := range node.Caches { - fmt.Printf(" %v\n", cache) - } - } - } - -# Network - -Information about the host computer's networking hardware is returned from -the Network function. This function returns a pointer to a NetworkInfo -struct. - - package main - - import ( - "fmt" - - "github.com/jaypipes/ghw" - ) - - func main() { - net, err := ghw.Network() - if err != nil { - fmt.Printf("Error getting network info: %v", err) - } - - fmt.Printf("%v\n", net) - - for _, nic := range net.NICs { - fmt.Printf(" %v\n", nic) - - enabledCaps := make([]int, 0) - for x, cap := range nic.Capabilities { - if cap.IsEnabled { - enabledCaps = append(enabledCaps, x) - } - } - if len(enabledCaps) > 0 { - fmt.Printf(" enabled capabilities:\n") - for _, x := range enabledCaps { - fmt.Printf(" - %s\n", nic.Capabilities[x].Name) - } - } - } - } - -# PCI - -ghw contains a PCI database inspection and querying facility that allows -developers to not only gather information about devices on a local PCI bus -but also query for information about hardware device classes, vendor and -product information. - -**NOTE**: Parsing of the PCI-IDS file database is provided by the separate -http://github.com/jaypipes/pcidb library. You can read that library's -README for more information about the various structs that are exposed on -the PCIInfo struct. - -PCIInfo.ListDevices is used to iterate over a host's PCI devices: - - package main - - import ( - "fmt" - - "github.com/jaypipes/ghw" - ) - - func main() { - pci, err := ghw.PCI() - if err != nil { - fmt.Printf("Error getting PCI info: %v", err) - } - fmt.Printf("host PCI devices:\n") - fmt.Println("====================================================") - devices := pci.ListDevices() - if len(devices) == 0 { - fmt.Printf("error: could not retrieve PCI devices\n") - return - } - - for _, device := range devices { - vendor := device.Vendor - vendorName := vendor.Name - if len(vendor.Name) > 20 { - vendorName = string([]byte(vendorName)[0:17]) + "..." - } - product := device.Product - productName := product.Name - if len(product.Name) > 40 { - productName = string([]byte(productName)[0:37]) + "..." - } - fmt.Printf("%-12s\t%-20s\t%-40s\n", device.Address, vendorName, productName) - } - } - -The following code snippet shows how to call the PCIInfo.GetDevice method -and use its returned PCIDevice struct pointer: - - package main - - import ( - "fmt" - "os" - - "github.com/jaypipes/ghw" - ) - - func main() { - pci, err := ghw.PCI() - if err != nil { - fmt.Printf("Error getting PCI info: %v", err) - } - - addr := "0000:00:00.0" - if len(os.Args) == 2 { - addr = os.Args[1] - } - fmt.Printf("PCI device information for %s\n", addr) - fmt.Println("====================================================") - deviceInfo := pci.GetDevice(addr) - if deviceInfo == nil { - fmt.Printf("could not retrieve PCI device information for %s\n", addr) - return - } - - vendor := deviceInfo.Vendor - fmt.Printf("Vendor: %s [%s]\n", vendor.Name, vendor.ID) - product := deviceInfo.Product - fmt.Printf("Product: %s [%s]\n", product.Name, product.ID) - subsystem := deviceInfo.Subsystem - subvendor := pci.Vendors[subsystem.VendorID] - subvendorName := "UNKNOWN" - if subvendor != nil { - subvendorName = subvendor.Name - } - fmt.Printf("Subsystem: %s [%s] (Subvendor: %s)\n", subsystem.Name, subsystem.ID, subvendorName) - class := deviceInfo.Class - fmt.Printf("Class: %s [%s]\n", class.Name, class.ID) - subclass := deviceInfo.Subclass - fmt.Printf("Subclass: %s [%s]\n", subclass.Name, subclass.ID) - progIface := deviceInfo.ProgrammingInterface - fmt.Printf("Programming Interface: %s [%s]\n", progIface.Name, progIface.ID) - } - -# GPU - -Information about the host computer's graphics hardware is returned from -the GPU function. This function returns a pointer to a GPUInfo struct. - - package main - - import ( - "fmt" - - "github.com/jaypipes/ghw" - ) - - func main() { - gpu, err := ghw.GPU() - if err != nil { - fmt.Printf("Error getting GPU info: %v", err) - } - - fmt.Printf("%v\n", gpu) - - for _, card := range gpu.GraphicsCards { - fmt.Printf(" %v\n", card) - } - } +Please see the extensive README.md document for examples of usage. */ package ghw diff --git a/vendor/github.com/jaypipes/ghw/pkg/block/block.go b/vendor/github.com/jaypipes/ghw/pkg/block/block.go index a495f69c..5e75eea6 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/block/block.go +++ b/vendor/github.com/jaypipes/ghw/pkg/block/block.go @@ -24,22 +24,43 @@ import ( type DriveType int const ( - DRIVE_TYPE_UNKNOWN DriveType = iota - DRIVE_TYPE_HDD // Hard disk drive - DRIVE_TYPE_FDD // Floppy disk drive - DRIVE_TYPE_ODD // Optical disk drive - DRIVE_TYPE_SSD // Solid-state drive - DRIVE_TYPE_VIRTUAL // virtual drive i.e. loop devices + // DriveTypeUnknown means we could not determine the drive type of the disk + DriveTypeUnknown DriveType = iota + // DriveTypeHDD indicates a hard disk drive + DriveTypeHDD + // DriveTypeFDD indicates a floppy disk drive + DriveTypeFDD + // DriveTypeODD indicates an optical disk drive + DriveTypeODD + // DriveTypeSSD indicates a solid-state drive + DriveTypeSSD + // DriveTypeVirtual indicates a virtual drive i.e. loop devices + DriveTypeVirtual +) + +const ( + // DEPRECATED: Please use DriveTypeUnknown + DRIVE_TYPE_UNKNOWN = DriveTypeUnknown + // DEPRECATED: Please use DriveTypeHDD + DRIVE_TYPE_HDD = DriveTypeHDD + // DEPRECATED: Please use DriveTypeFDD + DRIVE_TYPE_FDD = DriveTypeFDD + // DEPRECATED: Please use DriveTypeODD + DRIVE_TYPE_ODD = DriveTypeODD + // DEPRECATED: Please use DriveTypeSSD + DRIVE_TYPE_SSD = DriveTypeSSD + // DEPRECATED: Please use DriveTypeVirtual + DRIVE_TYPE_VIRTUAL = DriveTypeVirtual ) var ( driveTypeString = map[DriveType]string{ - DRIVE_TYPE_UNKNOWN: "Unknown", - DRIVE_TYPE_HDD: "HDD", - DRIVE_TYPE_FDD: "FDD", - DRIVE_TYPE_ODD: "ODD", - DRIVE_TYPE_SSD: "SSD", - DRIVE_TYPE_VIRTUAL: "virtual", + DriveTypeUnknown: "Unknown", + DriveTypeHDD: "HDD", + DriveTypeFDD: "FDD", + DriveTypeODD: "ODD", + DriveTypeSSD: "SSD", + DriveTypeVirtual: "virtual", } // NOTE(fromani): the keys are all lowercase and do not match @@ -48,12 +69,12 @@ var ( // DriveType::MarshalJSON. // We use this table only in UnmarshalJSON, so it should be OK. stringDriveType = map[string]DriveType{ - "unknown": DRIVE_TYPE_UNKNOWN, - "hdd": DRIVE_TYPE_HDD, - "fdd": DRIVE_TYPE_FDD, - "odd": DRIVE_TYPE_ODD, - "ssd": DRIVE_TYPE_SSD, - "virtual": DRIVE_TYPE_VIRTUAL, + "unknown": DriveTypeUnknown, + "hdd": DriveTypeHDD, + "fdd": DriveTypeFDD, + "odd": DriveTypeODD, + "ssd": DriveTypeSSD, + "virtual": DriveTypeVirtual, } ) @@ -90,24 +111,54 @@ func (dt *DriveType) UnmarshalJSON(b []byte) error { type StorageController int const ( - STORAGE_CONTROLLER_UNKNOWN StorageController = iota - STORAGE_CONTROLLER_IDE // Integrated Drive Electronics - STORAGE_CONTROLLER_SCSI // Small computer system interface - STORAGE_CONTROLLER_NVME // Non-volatile Memory Express - STORAGE_CONTROLLER_VIRTIO // Virtualized storage controller/driver - STORAGE_CONTROLLER_MMC // Multi-media controller (used for mobile phone storage devices) - STORAGE_CONTROLLER_LOOP // loop device + // StorageControllerUnknown indicates we could not determine the storage + // controller for the disk + StorageControllerUnknown StorageController = iota + // StorageControllerIDE indicates a Integrated Drive Electronics (IDE) + // controller + StorageControllerIDE + // StorageControllerSCSI indicates a Small computer system interface + // (SCSI) controller + StorageControllerSCSI + // StorageControllerNVMe indicates a Non-volatile Memory Express (NVMe) + // controller + StorageControllerNVMe + // StorageControllerVirtIO indicates a virtualized storage + // controller/driver + StorageControllerVirtIO + // StorageControllerMMC indicates a Multi-media controller (used for mobile + // phone storage devices) + StorageControllerMMC + // StorageControllerLoop indicates a loopback storage controller + StorageControllerLoop +) + +const ( + // DEPRECATED: Please use StorageControllerUnknown + STORAGE_CONTROLLER_UNKNOWN = StorageControllerUnknown + // DEPRECATED: Please use StorageControllerIDE + STORAGE_CONTROLLER_IDE = StorageControllerIDE + // DEPRECATED: Please use StorageControllerSCSI + STORAGE_CONTROLLER_SCSI = StorageControllerSCSI + // DEPRECATED: Please use StorageControllerNVMe + STORAGE_CONTROLLER_NVME = StorageControllerNVMe + // DEPRECATED: Please use StorageControllerVirtIO + STORAGE_CONTROLLER_VIRTIO = StorageControllerVirtIO + // DEPRECATED: Please use StorageControllerMMC + STORAGE_CONTROLLER_MMC = StorageControllerMMC + // DEPRECATED: Please use StorageControllerLoop + STORAGE_CONTROLLER_LOOP = StorageControllerLoop ) var ( storageControllerString = map[StorageController]string{ - STORAGE_CONTROLLER_UNKNOWN: "Unknown", - STORAGE_CONTROLLER_IDE: "IDE", - STORAGE_CONTROLLER_SCSI: "SCSI", - STORAGE_CONTROLLER_NVME: "NVMe", - STORAGE_CONTROLLER_VIRTIO: "virtio", - STORAGE_CONTROLLER_MMC: "MMC", - STORAGE_CONTROLLER_LOOP: "loop", + StorageControllerUnknown: "Unknown", + StorageControllerIDE: "IDE", + StorageControllerSCSI: "SCSI", + StorageControllerNVMe: "NVMe", + StorageControllerVirtIO: "virtio", + StorageControllerMMC: "MMC", + StorageControllerLoop: "loop", } // NOTE(fromani): the keys are all lowercase and do not match @@ -116,13 +167,13 @@ var ( // StorageController::MarshalJSON. // We use this table only in UnmarshalJSON, so it should be OK. stringStorageController = map[string]StorageController{ - "unknown": STORAGE_CONTROLLER_UNKNOWN, - "ide": STORAGE_CONTROLLER_IDE, - "scsi": STORAGE_CONTROLLER_SCSI, - "nvme": STORAGE_CONTROLLER_NVME, - "virtio": STORAGE_CONTROLLER_VIRTIO, - "mmc": STORAGE_CONTROLLER_MMC, - "loop": STORAGE_CONTROLLER_LOOP, + "unknown": StorageControllerUnknown, + "ide": StorageControllerIDE, + "scsi": StorageControllerSCSI, + "nvme": StorageControllerNVMe, + "virtio": StorageControllerVirtIO, + "mmc": StorageControllerMMC, + "loop": StorageControllerLoop, } ) @@ -154,45 +205,88 @@ func (sc StorageController) MarshalJSON() ([]byte, error) { // Disk describes a single disk drive on the host system. Disk drives provide // raw block storage resources. type Disk struct { - Name string `json:"name"` - SizeBytes uint64 `json:"size_bytes"` - PhysicalBlockSizeBytes uint64 `json:"physical_block_size_bytes"` - DriveType DriveType `json:"drive_type"` - IsRemovable bool `json:"removable"` - StorageController StorageController `json:"storage_controller"` - BusPath string `json:"bus_path"` + // Name contains a short name for the disk, e.g. `sda` + Name string `json:"name"` + // SizeBytes contains the total amount of storage, in bytes, for this disk + SizeBytes uint64 `json:"size_bytes"` + // PhysicalBlockSizeBytes is the size, in bytes, of the physical blocks in + // this disk. This is typically the minimum amount of data that can be + // written to a disk in a single write operation. + PhysicalBlockSizeBytes uint64 `json:"physical_block_size_bytes"` + // DriveType is the category of disk drive for this disk. + DriveType DriveType `json:"drive_type"` + // IsRemovable indicates if the disk drive is removable. + IsRemovable bool `json:"removable"` + // StorageController is the category of storage controller used by the + // disk. + StorageController StorageController `json:"storage_controller"` + // BusPath is the filepath to the bus for this disk. + BusPath string `json:"bus_path"` + // NUMANodeID contains the numeric index (0-based) of the NUMA Node this + // disk is affined to, or -1 if the host system is non-NUMA. // TODO(jaypipes): Convert this to a TopologyNode struct pointer and then // add to serialized output as "numa_node,omitempty" - NUMANodeID int `json:"-"` - Vendor string `json:"vendor"` - Model string `json:"model"` - SerialNumber string `json:"serial_number"` - WWN string `json:"wwn"` - Partitions []*Partition `json:"partitions"` + NUMANodeID int `json:"-"` + // Vendor is the manufacturer of the disk. + Vendor string `json:"vendor"` + // Model is the model number of the disk. + Model string `json:"model"` + // SerialNumber is the serial number of the disk. + SerialNumber string `json:"serial_number"` + // WWN is the World-wide Name of the disk. + // See: https://en.wikipedia.org/wiki/World_Wide_Name + WWN string `json:"wwn"` + // WWNNoExtension is the World-wide Name of the disk with any vendor + // extensions excluded. + // See: https://en.wikipedia.org/wiki/World_Wide_Name + WWNNoExtension string `json:"wwnNoExtension"` + // Partitions contains an array of pointers to `Partition` structs, one for + // each partition on the disk. + Partitions []*Partition `json:"partitions"` // TODO(jaypipes): Add PCI field for accessing PCI device information // PCI *PCIDevice `json:"pci"` } // Partition describes a logical division of a Disk. type Partition struct { - Disk *Disk `json:"-"` - Name string `json:"name"` - Label string `json:"label"` - MountPoint string `json:"mount_point"` - SizeBytes uint64 `json:"size_bytes"` - Type string `json:"type"` - IsReadOnly bool `json:"read_only"` - UUID string `json:"uuid"` // This would be volume UUID on macOS, PartUUID on linux, empty on Windows + // Disk is a pointer to the `Disk` struct that houses this partition. + Disk *Disk `json:"-"` + // Name is the system name given to the partition, e.g. "sda1". + Name string `json:"name"` + // Label is the human-readable label given to the partition. On Linux, this + // is derived from the `ID_PART_ENTRY_NAME` udev entry. + Label string `json:"label"` + // MountPoint is the path where this partition is mounted. + MountPoint string `json:"mount_point"` + // SizeBytes contains the total amount of storage, in bytes, this partition + // can consume. + SizeBytes uint64 `json:"size_bytes"` + // Type contains the type of the partition. + Type string `json:"type"` + // IsReadOnly indicates if the partition is marked read-only. + IsReadOnly bool `json:"read_only"` + // UUID is the universally-unique identifier (UUID) for the partition. + // This will be volume UUID on Darwin, PartUUID on linux, empty on Windows. + UUID string `json:"uuid"` + // FilesystemLabel is the label of the filesystem contained on the + // partition. On Linux, this is derived from the `ID_FS_NAME` udev entry. FilesystemLabel string `json:"filesystem_label"` } // Info describes all disk drives and partitions in the host system. type Info struct { ctx *context.Context - // TODO(jaypipes): Deprecate this field and replace with TotalSizeBytes - TotalPhysicalBytes uint64 `json:"total_size_bytes"` - Disks []*Disk `json:"disks"` - Partitions []*Partition `json:"-"` + // TotalSizeBytes contains the total amount of storage, in bytes, on the + // host system. + TotalSizeBytes uint64 `json:"total_size_bytes"` + // DEPRECATED: Please use TotalSizeBytes + TotalPhysicalBytes uint64 `json:"-"` + // Disks contains an array of pointers to `Disk` structs, one for each disk + // drive on the host system. + Disks []*Disk `json:"disks"` + // Partitions contains an array of pointers to `Partition` structs, one for + // each partition on any disk drive on the host system. + Partitions []*Partition `json:"-"` } // New returns a pointer to an Info struct that describes the block storage @@ -206,6 +300,8 @@ func New(opts ...*option.Option) (*Info, error) { return info, nil } +// String returns a short string indicating important information about the +// block storage on the host system. func (i *Info) String() string { tpbs := util.UNKNOWN if i.TotalPhysicalBytes > 0 { @@ -222,6 +318,8 @@ func (i *Info) String() string { len(i.Disks), dplural, tpbs) } +// String returns a short string indicating important information about the +// disk. func (d *Disk) String() string { sizeStr := util.UNKNOWN if d.SizeBytes > 0 { @@ -272,6 +370,8 @@ func (d *Disk) String() string { ) } +// String returns a short string indicating important information about the +// partition. func (p *Partition) String() string { typeStr := "" if p.Type != "" { diff --git a/vendor/github.com/jaypipes/ghw/pkg/block/block_darwin.go b/vendor/github.com/jaypipes/ghw/pkg/block/block_darwin.go index 5115d404..c6b6c266 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/block/block_darwin.go +++ b/vendor/github.com/jaypipes/ghw/pkg/block/block_darwin.go @@ -185,9 +185,9 @@ func makePartition(disk, s diskOrPartitionPlistNode, isAPFS bool) (*Partition, e // driveTypeFromPlist looks at the supplied property list struct and attempts to // determine the disk type func driveTypeFromPlist(infoPlist *diskUtilInfoPlist) DriveType { - dt := DRIVE_TYPE_HDD + dt := DriveTypeHDD if infoPlist.SolidState { - dt = DRIVE_TYPE_SSD + dt = DriveTypeSSD } // TODO(jaypipes): Figure out how to determine floppy and/or CD/optical // drive type on Mac @@ -197,9 +197,9 @@ func driveTypeFromPlist(infoPlist *diskUtilInfoPlist) DriveType { // storageControllerFromPlist looks at the supplied property list struct and // attempts to determine the storage controller in use for the device func storageControllerFromPlist(infoPlist *diskUtilInfoPlist) StorageController { - sc := STORAGE_CONTROLLER_SCSI + sc := StorageControllerSCSI if strings.HasSuffix(infoPlist.DeviceTreePath, "IONVMeController") { - sc = STORAGE_CONTROLLER_NVME + sc = StorageControllerNVMe } // TODO(jaypipes): I don't know if Mac even supports IDE controllers and // the "virtio" controller is libvirt-specific @@ -217,7 +217,7 @@ func (info *Info) load() error { return err } - info.TotalPhysicalBytes = 0 + var tsb uint64 info.Disks = make([]*Disk, 0, len(listPlist.AllDisksAndPartitions)) info.Partitions = []*Partition{} @@ -258,6 +258,7 @@ func (info *Info) load() error { Model: ioregPlist.ModelNumber, SerialNumber: ioregPlist.SerialNumber, WWN: "", + WWNNoExtension: "", Partitions: make([]*Partition, 0, len(disk.Partitions)+len(disk.APFSVolumes)), } @@ -278,10 +279,12 @@ func (info *Info) load() error { diskReport.Partitions = append(diskReport.Partitions, part) } - info.TotalPhysicalBytes += uint64(disk.Size) + tsb += uint64(disk.Size) info.Disks = append(info.Disks, diskReport) info.Partitions = append(info.Partitions, diskReport.Partitions...) } + info.TotalSizeBytes = tsb + info.TotalPhysicalBytes = tsb return nil } diff --git a/vendor/github.com/jaypipes/ghw/pkg/block/block_linux.go b/vendor/github.com/jaypipes/ghw/pkg/block/block_linux.go index ce164132..376b5ff5 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/block/block_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/block/block_linux.go @@ -8,7 +8,6 @@ package block import ( "bufio" "io" - "io/ioutil" "os" "path/filepath" "strconv" @@ -26,11 +25,12 @@ const ( func (i *Info) load() error { paths := linuxpath.New(i.ctx) i.Disks = disks(i.ctx, paths) - var tpb uint64 + var tsb uint64 for _, d := range i.Disks { - tpb += d.SizeBytes + tsb += d.SizeBytes } - i.TotalPhysicalBytes = tpb + i.TotalSizeBytes = tsb + i.TotalPhysicalBytes = tsb return nil } @@ -38,7 +38,7 @@ func diskPhysicalBlockSizeBytes(paths *linuxpath.Paths, disk string) uint64 { // We can find the sector size in Linux by looking at the // /sys/block/$DEVICE/queue/physical_block_size file in sysfs path := filepath.Join(paths.SysBlock, disk, "queue", "physical_block_size") - contents, err := ioutil.ReadFile(path) + contents, err := os.ReadFile(path) if err != nil { return 0 } @@ -53,7 +53,7 @@ func diskSizeBytes(paths *linuxpath.Paths, disk string) uint64 { // We can find the number of 512-byte sectors by examining the contents of // /sys/block/$DEVICE/size and calculate the physical bytes accordingly. path := filepath.Join(paths.SysBlock, disk, "size") - contents, err := ioutil.ReadFile(path) + contents, err := os.ReadFile(path) if err != nil { return 0 } @@ -70,7 +70,7 @@ func diskNUMANodeID(paths *linuxpath.Paths, disk string) int { return -1 } for partial := link; strings.HasPrefix(partial, "../devices/"); partial = filepath.Base(partial) { - if nodeContents, err := ioutil.ReadFile(filepath.Join(paths.SysBlock, partial, "numa_node")); err != nil { + if nodeContents, err := os.ReadFile(filepath.Join(paths.SysBlock, partial, "numa_node")); err != nil { if nodeInt, err := strconv.Atoi(string(nodeContents)); err != nil { return nodeInt } @@ -83,7 +83,7 @@ func diskVendor(paths *linuxpath.Paths, disk string) string { // In Linux, the vendor for a disk device is found in the // /sys/block/$DEVICE/device/vendor file in sysfs path := filepath.Join(paths.SysBlock, disk, "device", "vendor") - contents, err := ioutil.ReadFile(path) + contents, err := os.ReadFile(path) if err != nil { return util.UNKNOWN } @@ -93,7 +93,7 @@ func diskVendor(paths *linuxpath.Paths, disk string) string { // udevInfoDisk gets the udev info for a disk func udevInfoDisk(paths *linuxpath.Paths, disk string) (map[string]string, error) { // Get device major:minor numbers - devNo, err := ioutil.ReadFile(filepath.Join(paths.SysBlock, disk, "dev")) + devNo, err := os.ReadFile(filepath.Join(paths.SysBlock, disk, "dev")) if err != nil { return nil, err } @@ -103,7 +103,7 @@ func udevInfoDisk(paths *linuxpath.Paths, disk string) (map[string]string, error // udevInfoPartition gets the udev info for a partition func udevInfoPartition(paths *linuxpath.Paths, disk string, partition string) (map[string]string, error) { // Get device major:minor numbers - devNo, err := ioutil.ReadFile(filepath.Join(paths.SysBlock, disk, partition, "dev")) + devNo, err := os.ReadFile(filepath.Join(paths.SysBlock, disk, partition, "dev")) if err != nil { return nil, err } @@ -113,7 +113,7 @@ func udevInfoPartition(paths *linuxpath.Paths, disk string, partition string) (m func udevInfo(paths *linuxpath.Paths, devNo string) (map[string]string, error) { // Look up block device in udev runtime database udevID := "b" + strings.TrimSpace(devNo) - udevBytes, err := ioutil.ReadFile(filepath.Join(paths.RunUdevData, udevID)) + udevBytes, err := os.ReadFile(filepath.Join(paths.RunUdevData, udevID)) if err != nil { return nil, err } @@ -147,6 +147,16 @@ func diskSerialNumber(paths *linuxpath.Paths, disk string) string { return util.UNKNOWN } + // First try to use the serial from sg3_utils + if serial, ok := info["SCSI_IDENT_SERIAL"]; ok { + return serial + } + + // Fall back to ID_SCSI_SERIAL + if serial, ok := info["ID_SCSI_SERIAL"]; ok { + return serial + } + // There are two serial number keys, ID_SERIAL and ID_SERIAL_SHORT The // non-_SHORT version often duplicates vendor information collected // elsewhere, so use _SHORT and fall back to ID_SERIAL if missing... @@ -173,6 +183,18 @@ func diskBusPath(paths *linuxpath.Paths, disk string) string { return util.UNKNOWN } +func diskWWNNoExtension(paths *linuxpath.Paths, disk string) string { + info, err := udevInfoDisk(paths, disk) + if err != nil { + return util.UNKNOWN + } + + if wwn, ok := info["ID_WWN"]; ok { + return wwn + } + return util.UNKNOWN +} + func diskWWN(paths *linuxpath.Paths, disk string) string { info, err := udevInfoDisk(paths, disk) if err != nil { @@ -196,7 +218,7 @@ func diskWWN(paths *linuxpath.Paths, disk string) string { func diskPartitions(ctx *context.Context, paths *linuxpath.Paths, disk string) []*Partition { out := make([]*Partition, 0) path := filepath.Join(paths.SysBlock, disk) - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { ctx.Warn("failed to read disk partitions: %s\n", err) return out @@ -281,7 +303,7 @@ func diskPartUUID(paths *linuxpath.Paths, disk string, partition string) string func diskIsRemovable(paths *linuxpath.Paths, disk string) bool { path := filepath.Join(paths.SysBlock, disk, "removable") - contents, err := ioutil.ReadFile(path) + contents, err := os.ReadFile(path) if err != nil { return false } @@ -295,7 +317,7 @@ func disks(ctx *context.Context, paths *linuxpath.Paths) []*Disk { // run. We can get all of this information by examining the /sys/block // and /sys/class/block files disks := make([]*Disk, 0) - files, err := ioutil.ReadDir(paths.SysBlock) + files, err := os.ReadDir(paths.SysBlock) if err != nil { return nil } @@ -316,6 +338,7 @@ func disks(ctx *context.Context, paths *linuxpath.Paths) []*Disk { model := diskModel(paths, dname) serialNo := diskSerialNumber(paths, dname) wwn := diskWWN(paths, dname) + wwnNoExtension := diskWWNNoExtension(paths, dname) removable := diskIsRemovable(paths, dname) if storageController == STORAGE_CONTROLLER_LOOP && size == 0 { @@ -335,6 +358,7 @@ func disks(ctx *context.Context, paths *linuxpath.Paths) []*Disk { Model: model, SerialNumber: serialNo, WWN: wwn, + WWNNoExtension: wwnNoExtension, } parts := diskPartitions(ctx, paths, dname) @@ -358,34 +382,34 @@ func diskTypes(dname string) ( // The conditionals below which set the controller and drive type are // based on information listed here: // https://en.wikipedia.org/wiki/Device_file - driveType := DRIVE_TYPE_UNKNOWN - storageController := STORAGE_CONTROLLER_UNKNOWN + driveType := DriveTypeUnknown + storageController := StorageControllerUnknown if strings.HasPrefix(dname, "fd") { - driveType = DRIVE_TYPE_FDD + driveType = DriveTypeFDD } else if strings.HasPrefix(dname, "sd") { - driveType = DRIVE_TYPE_HDD - storageController = STORAGE_CONTROLLER_SCSI + driveType = DriveTypeHDD + storageController = StorageControllerSCSI } else if strings.HasPrefix(dname, "hd") { - driveType = DRIVE_TYPE_HDD - storageController = STORAGE_CONTROLLER_IDE + driveType = DriveTypeHDD + storageController = StorageControllerIDE } else if strings.HasPrefix(dname, "vd") { - driveType = DRIVE_TYPE_HDD - storageController = STORAGE_CONTROLLER_VIRTIO + driveType = DriveTypeHDD + storageController = StorageControllerVirtIO } else if strings.HasPrefix(dname, "nvme") { - driveType = DRIVE_TYPE_SSD - storageController = STORAGE_CONTROLLER_NVME + driveType = DriveTypeSSD + storageController = StorageControllerNVMe } else if strings.HasPrefix(dname, "sr") { - driveType = DRIVE_TYPE_ODD - storageController = STORAGE_CONTROLLER_SCSI + driveType = DriveTypeODD + storageController = StorageControllerSCSI } else if strings.HasPrefix(dname, "xvd") { - driveType = DRIVE_TYPE_HDD - storageController = STORAGE_CONTROLLER_SCSI + driveType = DriveTypeHDD + storageController = StorageControllerSCSI } else if strings.HasPrefix(dname, "mmc") { - driveType = DRIVE_TYPE_SSD - storageController = STORAGE_CONTROLLER_MMC + driveType = DriveTypeSSD + storageController = StorageControllerMMC } else if strings.HasPrefix(dname, "loop") { - driveType = DRIVE_TYPE_VIRTUAL - storageController = STORAGE_CONTROLLER_LOOP + driveType = DriveTypeVirtual + storageController = StorageControllerLoop } return driveType, storageController @@ -403,7 +427,7 @@ func diskIsRotational(ctx *context.Context, paths *linuxpath.Paths, devName stri // paths. func partitionSizeBytes(paths *linuxpath.Paths, disk string, part string) uint64 { path := filepath.Join(paths.SysBlock, disk, part, "size") - contents, err := ioutil.ReadFile(path) + contents, err := os.ReadFile(path) if err != nil { return 0 } diff --git a/vendor/github.com/jaypipes/ghw/pkg/block/block_windows.go b/vendor/github.com/jaypipes/ghw/pkg/block/block_windows.go index 574f5612..270e19f9 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/block/block_windows.go +++ b/vendor/github.com/jaypipes/ghw/pkg/block/block_windows.go @@ -17,24 +17,24 @@ import ( type physicalDiskMediaType int const ( - PHYSICAL_DISK_MEDIA_TYPE_UNSPECIFIED physicalDiskMediaType = 0 - PHYSICAL_DISK_MEDIA_TYPE_HDD physicalDiskMediaType = 3 - PHYSICAL_DISK_MEDIA_TYPE_SSD physicalDiskMediaType = 4 - PHYSICAL_DISK_MEDIA_TYPE_SCM physicalDiskMediaType = 5 + physicalDiskMediaTypeUnspecified physicalDiskMediaType = 0 + physicalDiskMediaTypeHDD physicalDiskMediaType = 3 + physicalDiskMediaTypeSSD physicalDiskMediaType = 4 + physicalDiskMediaTypeSCM physicalDiskMediaType = 5 ) func (dt physicalDiskMediaType) ToDriveType() DriveType { switch dt { - case PHYSICAL_DISK_MEDIA_TYPE_UNSPECIFIED: - return DRIVE_TYPE_UNKNOWN - case PHYSICAL_DISK_MEDIA_TYPE_HDD: - return DRIVE_TYPE_HDD - case PHYSICAL_DISK_MEDIA_TYPE_SSD: - return DRIVE_TYPE_SSD - case PHYSICAL_DISK_MEDIA_TYPE_SCM: - return DRIVE_TYPE_UNKNOWN + case physicalDiskMediaTypeUnspecified: + return DriveTypeUnknown + case physicalDiskMediaTypeHDD: + return DriveTypeHDD + case physicalDiskMediaTypeSSD: + return DriveTypeSSD + case physicalDiskMediaTypeSCM: + return DriveTypeUnknown } - return DRIVE_TYPE_UNKNOWN + return DriveTypeUnknown } const wqlDiskDrive = "SELECT Caption, CreationClassName, DefaultBlockSize, Description, DeviceID, Index, InterfaceType, Manufacturer, MediaType, Model, Name, Partitions, SerialNumber, Size, TotalCylinders, TotalHeads, TotalSectors, TotalTracks, TracksPerCylinder FROM Win32_DiskDrive" @@ -155,6 +155,7 @@ func (i *Info) load() error { Model: strings.TrimSpace(*diskdrive.Caption), SerialNumber: strings.TrimSpace(*diskdrive.SerialNumber), WWN: util.UNKNOWN, // TODO: add information + WWNNoExtension: util.UNKNOWN, // TODO: add information Partitions: make([]*Partition, 0), } for _, diskpartition := range win32DiskPartitionDescriptions { @@ -191,11 +192,12 @@ func (i *Info) load() error { } i.Disks = disks - var tpb uint64 + var tsb uint64 for _, d := range i.Disks { - tpb += d.SizeBytes + tsb += d.SizeBytes } - i.TotalPhysicalBytes = tpb + i.TotalSizeBytes = tsb + i.TotalPhysicalBytes = tsb return nil } @@ -245,18 +247,18 @@ func getPhysicalDisks() ([]win32PhysicalDisk, error) { } func toDriveType(physicalDiskMediaType physicalDiskMediaType, mediaType string, caption string) DriveType { - if driveType := physicalDiskMediaType.ToDriveType(); driveType != DRIVE_TYPE_UNKNOWN { + if driveType := physicalDiskMediaType.ToDriveType(); driveType != DriveTypeUnknown { return driveType } mediaType = strings.ToLower(mediaType) caption = strings.ToLower(caption) if strings.Contains(mediaType, "fixed") || strings.Contains(mediaType, "ssd") || strings.Contains(caption, "ssd") { - return DRIVE_TYPE_SSD + return DriveTypeSSD } else if strings.ContainsAny(mediaType, "hdd") { - return DRIVE_TYPE_HDD + return DriveTypeHDD } - return DRIVE_TYPE_UNKNOWN + return DriveTypeUnknown } // TODO: improve @@ -264,11 +266,11 @@ func toStorageController(interfaceType string) StorageController { var storageController StorageController switch interfaceType { case "SCSI": - storageController = STORAGE_CONTROLLER_SCSI + storageController = StorageControllerSCSI case "IDE": - storageController = STORAGE_CONTROLLER_IDE + storageController = StorageControllerIDE default: - storageController = STORAGE_CONTROLLER_UNKNOWN + storageController = StorageControllerUnknown } return storageController } diff --git a/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_darwin.go b/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_darwin.go new file mode 100644 index 00000000..7e9e8fc7 --- /dev/null +++ b/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_darwin.go @@ -0,0 +1,135 @@ +package cpu + +import ( + "fmt" + "github.com/pkg/errors" + "os/exec" + "strconv" + "strings" +) + +var ( + hasARMArchitecture = false // determine if ARM + sysctlOutput = make(map[string]string) // store all the sysctl output +) + +func (i *Info) load() error { + err := populateSysctlOutput() + if err != nil { + return errors.Wrap(err, "unable to populate sysctl map") + } + + i.TotalCores = getTotalCores() + i.TotalThreads = getTotalThreads() + i.Processors = getProcessors() + + return nil +} + +// getProcessors some more info https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_system_capabilities +func getProcessors() []*Processor { + p := make([]*Processor, getProcTopCount()) + for i, _ := range p { + p[i] = new(Processor) + p[i].Vendor = sysctlOutput[fmt.Sprintf("hw.perflevel%s.name", strconv.Itoa(i))] + p[i].Model = getVendor() + p[i].NumCores = getNumberCoresFromPerfLevel(i) + p[i].Capabilities = getCapabilities() + p[i].Cores = make([]*ProcessorCore, getTotalCores()) + } + return p +} + +// getCapabilities valid for ARM, see https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics +func getCapabilities() []string { + var caps []string + + // add ARM capabilities + if hasARMArchitecture { + for cap, isEnabled := range sysctlOutput { + if isEnabled == "1" { + // capabilities with keys with a common prefix + commonPrefix := "hw.optional.arm." + if strings.HasPrefix(cap, commonPrefix) { + caps = append(caps, strings.TrimPrefix(cap, commonPrefix)) + } + // not following prefix convention but are important + if cap == "hw.optional.AdvSIMD_HPFPCvt" { + caps = append(caps, "AdvSIMD_HPFPCvt") + } + if cap == "hw.optional.armv8_crc32" { + caps = append(caps, "armv8_crc32") + } + } + } + + // hw.optional.AdvSIMD and hw.optional.floatingpoint are always enabled (see linked doc) + caps = append(caps, "AdvSIMD") + caps = append(caps, "floatingpoint") + } + + return caps +} + +// populateSysctlOutput to populate a map to quickly retrieve values later +func populateSysctlOutput() error { + // get sysctl output + o, err := exec.Command("sysctl", "-a").CombinedOutput() + if err != nil { + return err + } + + // clean up and store sysctl output + oS := strings.Split(string(o), "\n") + for _, l := range oS { + if l != "" { + s := strings.SplitN(l, ":", 2) + k, v := strings.TrimSpace(s[0]), strings.TrimSpace(s[1]) + sysctlOutput[k] = v + + // see if it's possible to determine if ARM + if k == "hw.optional.arm64" && v == "1" { + hasARMArchitecture = true + } + } + } + + return nil +} + +func getNumberCoresFromPerfLevel(i int) uint32 { + key := fmt.Sprintf("hw.perflevel%s.physicalcpu_max", strconv.Itoa(i)) + nCores := sysctlOutput[key] + return stringToUint32(nCores) +} + +func getVendor() string { + v := sysctlOutput["machdep.cpu.brand_string"] + return v +} + +func getProcTopCount() int { + pC, ok := sysctlOutput["hw.nperflevels"] + if !ok { + // most likely intel so no performance/efficiency core seperation + return 1 + } + i, _ := strconv.Atoi(pC) + return i +} + +// num of physical cores +func getTotalCores() uint32 { + nCores := sysctlOutput["hw.physicalcpu_max"] + return stringToUint32(nCores) +} + +func getTotalThreads() uint32 { + nThreads := sysctlOutput["machdep.cpu.thread_count"] + return stringToUint32(nThreads) +} + +func stringToUint32(s string) uint32 { + o, _ := strconv.ParseUint(s, 10, 0) + return uint32(o) +} diff --git a/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_linux.go b/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_linux.go index 00e70194..3ec2e847 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_linux.go @@ -8,10 +8,10 @@ package cpu import ( "bufio" "fmt" - "io/ioutil" "os" "path/filepath" "regexp" + "sort" "strconv" "strings" @@ -22,6 +22,7 @@ import ( var ( regexForCpulCore = regexp.MustCompile("^cpu([0-9]+)$") + onlineFile = "online" ) func (i *Info) load() error { @@ -47,7 +48,7 @@ func processorsGet(ctx *context.Context) []*Processor { // /sys/devices/system/cpu pseudodir contains N number of pseudodirs with // information about the logical processors on the host. These logical // processor pseudodirs are of the pattern /sys/devices/system/cpu/cpu{N} - fnames, err := ioutil.ReadDir(paths.SysDevicesSystemCPU) + fnames, err := os.ReadDir(paths.SysDevicesSystemCPU) if err != nil { ctx.Warn("failed to read /sys/devices/system/cpu: %s", err) return []*Processor{} @@ -64,6 +65,10 @@ func processorsGet(ctx *context.Context) []*Processor { continue } + onlineFilePath := filepath.Join(paths.SysDevicesSystemCPU, fmt.Sprintf("cpu%d", lpID), onlineFile) + if util.SafeIntFromFile(ctx, onlineFilePath) == 0 { + continue + } procID := processorIDFromLogicalProcessorID(ctx, lpID) proc, found := procs[procID] if !found { @@ -113,6 +118,9 @@ func processorsGet(ctx *context.Context) []*Processor { } res := []*Processor{} for _, p := range procs { + for _, c := range p.Cores { + sort.Ints(c.LogicalProcessors) + } res = append(res, p) } return res @@ -172,7 +180,7 @@ func CoresForNode(ctx *context.Context, nodeID int) ([]*ProcessorCore, error) { return c } - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { return nil, err } @@ -198,6 +206,10 @@ func CoresForNode(ctx *context.Context, nodeID int) ([]*ProcessorCore, error) { ) continue } + onlineFilePath := filepath.Join(cpuPath, onlineFile) + if util.SafeIntFromFile(ctx, onlineFilePath) == 0 { + continue + } coreIDPath := filepath.Join(cpuPath, "topology", "core_id") coreID := util.SafeIntFromFile(ctx, coreIDPath) core := findCoreByID(coreID) diff --git a/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_stub.go b/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_stub.go index 5d07ee43..85156069 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_stub.go +++ b/vendor/github.com/jaypipes/ghw/pkg/cpu/cpu_stub.go @@ -1,5 +1,5 @@ -//go:build !linux && !windows -// +build !linux,!windows +//go:build !linux && !windows && !darwin +// +build !linux,!windows,!darwin // Use and distribution licensed under the Apache license version 2. // diff --git a/vendor/github.com/jaypipes/ghw/pkg/gpu/gpu_linux.go b/vendor/github.com/jaypipes/ghw/pkg/gpu/gpu_linux.go index a2791e86..8f9c9b8c 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/gpu/gpu_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/gpu/gpu_linux.go @@ -6,7 +6,6 @@ package gpu import ( - "io/ioutil" "os" "path/filepath" "strconv" @@ -55,7 +54,7 @@ func (i *Info) load() error { // subsystem (we query the modalias file of the PCI device's sysfs // directory using the `ghw.PCIInfo.GetDevice()` function. paths := linuxpath.New(i.ctx) - links, err := ioutil.ReadDir(paths.SysClassDRM) + links, err := os.ReadDir(paths.SysClassDRM) if err != nil { i.ctx.Warn(_WARN_NO_SYS_CLASS_DRM) return nil diff --git a/vendor/github.com/jaypipes/ghw/pkg/linuxdmi/dmi_linux.go b/vendor/github.com/jaypipes/ghw/pkg/linuxdmi/dmi_linux.go index 09398d36..8e6d8302 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/linuxdmi/dmi_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/linuxdmi/dmi_linux.go @@ -6,7 +6,7 @@ package linuxdmi import ( - "io/ioutil" + "os" "path/filepath" "strings" @@ -19,7 +19,7 @@ func Item(ctx *context.Context, value string) string { paths := linuxpath.New(ctx) path := filepath.Join(paths.SysClassDMI, "id", value) - b, err := ioutil.ReadFile(path) + b, err := os.ReadFile(path) if err != nil { ctx.Warn("Unable to read %s: %s\n", value, err) return util.UNKNOWN diff --git a/vendor/github.com/jaypipes/ghw/pkg/marshal/marshal.go b/vendor/github.com/jaypipes/ghw/pkg/marshal/marshal.go index e8f1bbea..e442d6af 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/marshal/marshal.go +++ b/vendor/github.com/jaypipes/ghw/pkg/marshal/marshal.go @@ -9,28 +9,36 @@ package marshal import ( "encoding/json" - "github.com/ghodss/yaml" "github.com/jaypipes/ghw/pkg/context" + yaml "gopkg.in/yaml.v3" ) -// safeYAML returns a string after marshalling the supplied parameter into YAML +// SafeYAML returns a string after marshalling the supplied parameter into YAML. func SafeYAML(ctx *context.Context, p interface{}) string { b, err := json.Marshal(p) if err != nil { ctx.Warn("error marshalling JSON: %s", err) return "" } - yb, err := yaml.JSONToYAML(b) - if err != nil { + + var jsonObj interface{} + if err := yaml.Unmarshal(b, &jsonObj); err != nil { ctx.Warn("error converting JSON to YAML: %s", err) return "" } + + yb, err := yaml.Marshal(jsonObj) + if err != nil { + ctx.Warn("error marshalling YAML: %s", err) + return "" + } + return string(yb) } -// safeJSON returns a string after marshalling the supplied parameter into +// SafeJSON returns a string after marshalling the supplied parameter into // JSON. Accepts an optional argument to trigger pretty/indented formatting of -// the JSON string +// the JSON string. func SafeJSON(ctx *context.Context, p interface{}, indent bool) string { var b []byte var err error diff --git a/vendor/github.com/jaypipes/ghw/pkg/memory/memory.go b/vendor/github.com/jaypipes/ghw/pkg/memory/memory.go index bdf1ab1a..81e0dc66 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/memory/memory.go +++ b/vendor/github.com/jaypipes/ghw/pkg/memory/memory.go @@ -17,6 +17,10 @@ import ( "github.com/jaypipes/ghw/pkg/util" ) +// Module describes a single physical memory module for a host system. Pretty +// much all modern systems contain dual in-line memory modules (DIMMs). +// +// See https://en.wikipedia.org/wiki/DIMM type Module struct { Label string `json:"label"` Location string `json:"location"` @@ -25,6 +29,10 @@ type Module struct { Vendor string `json:"vendor"` } +// Area describes a set of physical memory on a host system. Non-NUMA systems +// will almost always have a single memory area containing all memory the +// system can use. NUMA systems will have multiple memory areas, one or more +// for each NUMA node/cell in the system. type Area struct { TotalPhysicalBytes int64 `json:"total_physical_bytes"` TotalUsableBytes int64 `json:"total_usable_bytes"` @@ -33,6 +41,8 @@ type Area struct { Modules []*Module `json:"modules"` } +// String returns a short string with a summary of information for this memory +// area func (a *Area) String() string { tpbs := util.UNKNOWN if a.TotalPhysicalBytes > 0 { @@ -51,11 +61,13 @@ func (a *Area) String() string { return fmt.Sprintf("memory (%s physical, %s usable)", tpbs, tubs) } +// Info contains information about the memory on a host system. type Info struct { ctx *context.Context Area } +// New returns an Info struct that describes the memory on a host system. func New(opts ...*option.Option) (*Info, error) { ctx := context.New(opts...) info := &Info{ctx: ctx} @@ -65,6 +77,7 @@ func New(opts ...*option.Option) (*Info, error) { return info, nil } +// String returns a short string with a summary of memory information func (i *Info) String() string { return i.Area.String() } diff --git a/vendor/github.com/jaypipes/ghw/pkg/memory/memory_cache.go b/vendor/github.com/jaypipes/ghw/pkg/memory/memory_cache.go index 8bc4074d..2d8f88d7 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/memory/memory_cache.go +++ b/vendor/github.com/jaypipes/ghw/pkg/memory/memory_cache.go @@ -15,19 +15,35 @@ import ( "github.com/jaypipes/ghw/pkg/unitutil" ) +// CacheType indicates the type of memory stored in a memory cache. type CacheType int const ( - CACHE_TYPE_UNIFIED CacheType = iota - CACHE_TYPE_INSTRUCTION - CACHE_TYPE_DATA + // CacheTypeUnified indicates the memory cache stores both instructions and + // data. + CacheTypeUnified CacheType = iota + // CacheTypeInstruction indicates the memory cache stores only instructions + // (executable bytecode). + CacheTypeInstruction + // CacheTypeData indicates the memory cache stores only data + // (non-executable bytecode). + CacheTypeData +) + +const ( + // DEPRECATED: Please use CacheTypeUnified + CACHE_TYPE_UNIFIED = CacheTypeUnified + // DEPRECATED: Please use CacheTypeUnified + CACHE_TYPE_INSTRUCTION = CacheTypeInstruction + // DEPRECATED: Please use CacheTypeUnified + CACHE_TYPE_DATA = CacheTypeData ) var ( memoryCacheTypeString = map[CacheType]string{ - CACHE_TYPE_UNIFIED: "Unified", - CACHE_TYPE_INSTRUCTION: "Instruction", - CACHE_TYPE_DATA: "Data", + CacheTypeUnified: "Unified", + CacheTypeInstruction: "Instruction", + CacheTypeData: "Data", } // NOTE(fromani): the keys are all lowercase and do not match @@ -36,9 +52,9 @@ var ( // CacheType:MarshalJSON. // We use this table only in UnmarshalJSON, so it should be OK. stringMemoryCacheType = map[string]CacheType{ - "unified": CACHE_TYPE_UNIFIED, - "instruction": CACHE_TYPE_INSTRUCTION, - "data": CACHE_TYPE_DATA, + "unified": CacheTypeUnified, + "instruction": CacheTypeInstruction, + "data": CacheTypeData, } ) @@ -92,21 +108,33 @@ func (a SortByLogicalProcessorId) Len() int { return len(a) } func (a SortByLogicalProcessorId) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a SortByLogicalProcessorId) Less(i, j int) bool { return a[i] < a[j] } +// Cache contains information about a single memory cache on a physical CPU +// package. Caches have a 1-based numeric level, with lower numbers indicating +// the cache is "closer" to the processing cores and reading memory from the +// cache will be faster relative to caches with higher levels. Note that this +// has nothing to do with RAM or memory modules like DIMMs. type Cache struct { - Level uint8 `json:"level"` - Type CacheType `json:"type"` - SizeBytes uint64 `json:"size_bytes"` - // The set of logical processors (hardware threads) that have access to the - // cache + // Level is a 1-based numeric level that indicates the relative closeness + // of this cache to processing cores on the physical package. Lower numbers + // are "closer" to the processing cores and therefore have faster access + // times. + Level uint8 `json:"level"` + // Type indicates what type of memory is stored in the cache. Can be + // instruction (executable bytecodes), data or both. + Type CacheType `json:"type"` + // SizeBytes indicates the size of the cache in bytes. + SizeBytes uint64 `json:"size_bytes"` + // The set of logical processors (hardware threads) that have access to + // this cache. LogicalProcessors []uint32 `json:"logical_processors"` } func (c *Cache) String() string { sizeKb := c.SizeBytes / uint64(unitutil.KB) typeStr := "" - if c.Type == CACHE_TYPE_INSTRUCTION { + if c.Type == CacheTypeInstruction { typeStr = "i" - } else if c.Type == CACHE_TYPE_DATA { + } else if c.Type == CacheTypeData { typeStr = "d" } cacheIDStr := fmt.Sprintf("L%d%s", c.Level, typeStr) diff --git a/vendor/github.com/jaypipes/ghw/pkg/memory/memory_cache_linux.go b/vendor/github.com/jaypipes/ghw/pkg/memory/memory_cache_linux.go index dfb5c1f1..12258ea4 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/memory/memory_cache_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/memory/memory_cache_linux.go @@ -8,7 +8,6 @@ package memory import ( "errors" "fmt" - "io/ioutil" "os" "path/filepath" "sort" @@ -35,7 +34,7 @@ func CachesForNode(ctx *context.Context, nodeID int) ([]*Cache, error) { ) caches := make(map[string]*Cache) - files, err := ioutil.ReadDir(path) + files, err := os.ReadDir(path) if err != nil { return nil, err } @@ -66,7 +65,7 @@ func CachesForNode(ctx *context.Context, nodeID int) ([]*Cache, error) { if _, err = os.Stat(cachePath); errors.Is(err, os.ErrNotExist) { continue } - cacheDirFiles, err := ioutil.ReadDir(cachePath) + cacheDirFiles, err := os.ReadDir(cachePath) if err != nil { return nil, err } @@ -120,7 +119,7 @@ func memoryCacheLevel(ctx *context.Context, paths *linuxpath.Paths, nodeID int, paths.NodeCPUCacheIndex(nodeID, lpID, cacheIndex), "level", ) - levelContents, err := ioutil.ReadFile(levelPath) + levelContents, err := os.ReadFile(levelPath) if err != nil { ctx.Warn("%s", err) return -1 @@ -140,7 +139,7 @@ func memoryCacheSize(ctx *context.Context, paths *linuxpath.Paths, nodeID int, l paths.NodeCPUCacheIndex(nodeID, lpID, cacheIndex), "size", ) - sizeContents, err := ioutil.ReadFile(sizePath) + sizeContents, err := os.ReadFile(sizePath) if err != nil { ctx.Warn("%s", err) return -1 @@ -159,18 +158,18 @@ func memoryCacheType(ctx *context.Context, paths *linuxpath.Paths, nodeID int, l paths.NodeCPUCacheIndex(nodeID, lpID, cacheIndex), "type", ) - cacheTypeContents, err := ioutil.ReadFile(typePath) + cacheTypeContents, err := os.ReadFile(typePath) if err != nil { ctx.Warn("%s", err) - return CACHE_TYPE_UNIFIED + return CacheTypeUnified } switch string(cacheTypeContents[:len(cacheTypeContents)-1]) { case "Data": - return CACHE_TYPE_DATA + return CacheTypeData case "Instruction": - return CACHE_TYPE_INSTRUCTION + return CacheTypeInstruction default: - return CACHE_TYPE_UNIFIED + return CacheTypeUnified } } @@ -179,7 +178,7 @@ func memoryCacheSharedCPUMap(ctx *context.Context, paths *linuxpath.Paths, nodeI paths.NodeCPUCacheIndex(nodeID, lpID, cacheIndex), "shared_cpu_map", ) - sharedCpuMap, err := ioutil.ReadFile(scpuPath) + sharedCpuMap, err := os.ReadFile(scpuPath) if err != nil { ctx.Warn("%s", err) return "" diff --git a/vendor/github.com/jaypipes/ghw/pkg/memory/memory_linux.go b/vendor/github.com/jaypipes/ghw/pkg/memory/memory_linux.go index 21d10f2f..d5a54101 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/memory/memory_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/memory/memory_linux.go @@ -10,7 +10,6 @@ import ( "compress/gzip" "fmt" "io" - "io/ioutil" "os" "path/filepath" "regexp" @@ -24,7 +23,7 @@ import ( ) const ( - _WARN_CANNOT_DETERMINE_PHYSICAL_MEMORY = ` + warnCannotDeterminePhysicalMemory = ` Could not determine total physical bytes of memory. This may be due to the host being a virtual machine or container with no /var/log/syslog file or /sys/devices/system/memory directory, or @@ -37,7 +36,7 @@ the total usable amount of memory var ( // System log lines will look similar to the following: // ... kernel: [0.000000] Memory: 24633272K/25155024K ... - _REGEX_SYSLOG_MEMLINE = regexp.MustCompile(`Memory:\s+\d+K\/(\d+)K`) + regexSyslogMemline = regexp.MustCompile(`Memory:\s+\d+K\/(\d+)K`) // regexMemoryBlockDirname matches a subdirectory in either // /sys/devices/system/memory or /sys/devices/system/node/nodeX that // represents information on a specific memory cell/block @@ -54,7 +53,7 @@ func (i *Info) load() error { tpb := memTotalPhysicalBytes(paths) i.TotalPhysicalBytes = tpb if tpb < 1 { - i.ctx.Warn(_WARN_CANNOT_DETERMINE_PHYSICAL_MEMORY) + i.ctx.Warn(warnCannotDeterminePhysicalMemory) i.TotalPhysicalBytes = tub } i.SupportedPageSizes, _ = memorySupportedPageSizes(paths.SysKernelMMHugepages) @@ -111,7 +110,7 @@ func memoryBlockSizeBytes(dir string) (uint64, error) { // get the memory block size in byte in hexadecimal notation blockSize := filepath.Join(dir, "block_size_bytes") - d, err := ioutil.ReadFile(blockSize) + d, err := os.ReadFile(blockSize) if err != nil { return 0, err } @@ -149,7 +148,7 @@ func memTotalPhysicalBytes(paths *linuxpath.Paths) (total int64) { // size in bytes func memoryTotalPhysicalBytesFromPath(dir string, blockSizeBytes uint64) (int64, error) { var total int64 - files, err := ioutil.ReadDir(dir) + files, err := os.ReadDir(dir) if err != nil { return -1, err } @@ -165,7 +164,7 @@ func memoryTotalPhysicalBytesFromPath(dir string, blockSizeBytes uint64) (int64, if !regexMemoryBlockDirname.MatchString(fname) { continue } - s, err := ioutil.ReadFile(filepath.Join(dir, fname, "state")) + s, err := os.ReadFile(filepath.Join(dir, fname, "state")) if err != nil { return -1, err } @@ -186,7 +185,7 @@ func memTotalPhysicalBytesFromSyslog(paths *linuxpath.Paths) int64 { // so instead we examine the system logs for startup information containing // total physical memory and cache the results of this. findPhysicalKb := func(line string) int64 { - matches := _REGEX_SYSLOG_MEMLINE.FindStringSubmatch(line) + matches := regexSyslogMemline.FindStringSubmatch(line) if len(matches) == 2 { i, err := strconv.Atoi(matches[1]) if err != nil { @@ -202,7 +201,7 @@ func memTotalPhysicalBytesFromSyslog(paths *linuxpath.Paths) int64 { // search each, stopping when we match a system log record line that // contains physical memory information. logDir := paths.VarLog - logFiles, err := ioutil.ReadDir(logDir) + logFiles, err := os.ReadDir(logDir) if err != nil { return -1 } @@ -304,7 +303,7 @@ func memorySupportedPageSizes(hpDir string) ([]uint64, error) { // 'hugepages-{pagesize}kb' out := make([]uint64, 0) - files, err := ioutil.ReadDir(hpDir) + files, err := os.ReadDir(hpDir) if err != nil { return out, err } diff --git a/vendor/github.com/jaypipes/ghw/pkg/net/net.go b/vendor/github.com/jaypipes/ghw/pkg/net/net.go index 82d3226a..e26dab70 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/net/net.go +++ b/vendor/github.com/jaypipes/ghw/pkg/net/net.go @@ -14,28 +14,63 @@ import ( "github.com/jaypipes/ghw/pkg/option" ) +// NICCapability is a feature/capability of a Network Interface Controller +// (NIC) type NICCapability struct { - Name string `json:"name"` - IsEnabled bool `json:"is_enabled"` - CanEnable bool `json:"can_enable"` + // Name is the string name for the capability, e.g. + // "tcp-segmentation-offload" + Name string `json:"name"` + // IsEnabled is true if the capability is currently enabled on the NIC, + // false otherwise. + IsEnabled bool `json:"is_enabled"` + // CanEnable is true if the capability can be enabled on the NIC, false + // otherwise. + CanEnable bool `json:"can_enable"` } +// NIC contains information about a single Network Interface Controller (NIC). type NIC struct { - Name string `json:"name"` - MacAddress string `json:"mac_address"` - IsVirtual bool `json:"is_virtual"` - Capabilities []*NICCapability `json:"capabilities"` - PCIAddress *string `json:"pci_address,omitempty"` - Speed string `json:"speed"` - Duplex string `json:"duplex"` - SupportedLinkModes []string `json:"supported_link_modes,omitempty"` - SupportedPorts []string `json:"supported_ports,omitempty"` - SupportedFECModes []string `json:"supported_fec_modes,omitempty"` - AdvertisedLinkModes []string `json:"advertised_link_modes,omitempty"` - AdvertisedFECModes []string `json:"advertised_fec_modes,omitempty"` + // Name is the string identifier the system gave this NIC. + Name string `json:"name"` + // MACAddress is the Media Access Control (MAC) address of this NIC. + MACAddress string `json:"mac_address"` + // DEPRECATED: Please use MACAddress instead. + MacAddress string `json:"-"` + // IsVirtual is true if the NIC is entirely virtual/emulated, false + // otherwise. + IsVirtual bool `json:"is_virtual"` + // Capabilities is a slice of pointers to `NICCapability` structs + // describing a feature/capability of this NIC. + Capabilities []*NICCapability `json:"capabilities"` + // PCIAddress is a pointer to the PCI address for this NIC, or nil if there + // is no PCI address for this NIC. + PCIAddress *string `json:"pci_address,omitempty"` + // Speed is a string describing the link speed of this NIC, e.g. "1000Mb/s" + Speed string `json:"speed"` + // Duplex is a string indicating the current duplex setting of this NIC, + // e.g. "Full" + Duplex string `json:"duplex"` + // SupportedLinkModes is a slice of strings containing the supported link + // modes of this NIC, e.g. "10baseT/Half", "1000baseT/Full", etc. + SupportedLinkModes []string `json:"supported_link_modes,omitempty"` + // SupportedPorts is a slice of strings containing the supported physical + // ports on this NIC, e.g. "Twisted Pair" + SupportedPorts []string `json:"supported_ports,omitempty"` + // SupportedFECModes is a slice of strings containing the supported Forward + // Error Correction (FEC) modes for this NIC. + SupportedFECModes []string `json:"supported_fec_modes,omitempty"` + // AdvertiseLinkModes is a slice of strings containing the advertised + // (during auto-negotiation) link modes of this NIC, e.g. "10baseT/Half", + // "1000baseT/Full", etc. + AdvertisedLinkModes []string `json:"advertised_link_modes,omitempty"` + // AvertisedFECModes is a slice of strings containing the advertised + // (during auto-negotiation) Forward Error Correction (FEC) modes for this + // NIC. + AdvertisedFECModes []string `json:"advertised_fec_modes,omitempty"` // TODO(fromani): add other hw addresses (USB) when we support them } +// String returns a short string with information about the NIC capability. func (nc *NICCapability) String() string { return fmt.Sprintf( "{Name:%s IsEnabled:%t CanEnable:%t}", @@ -45,6 +80,7 @@ func (nc *NICCapability) String() string { ) } +// String returns a short string with information about the NIC. func (n *NIC) String() string { isVirtualStr := "" if n.IsVirtual { @@ -57,8 +93,11 @@ func (n *NIC) String() string { ) } +// Info describes all network interface controllers (NICs) in the host system. type Info struct { - ctx *context.Context + ctx *context.Context + // NICs is a slice of pointers to `NIC` structs describing the network + // interface controllers (NICs) on the host system. NICs []*NIC `json:"nics"` } @@ -73,6 +112,8 @@ func New(opts ...*option.Option) (*Info, error) { return info, nil } +// String returns a short string with information about the networking on the +// host system. func (i *Info) String() string { return fmt.Sprintf( "net (%d NICs)", diff --git a/vendor/github.com/jaypipes/ghw/pkg/net/net_linux.go b/vendor/github.com/jaypipes/ghw/pkg/net/net_linux.go index cbdea304..d7d7e8ca 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/net/net_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/net/net_linux.go @@ -9,7 +9,6 @@ import ( "bufio" "bytes" "fmt" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -21,7 +20,7 @@ import ( ) const ( - _WARN_ETHTOOL_NOT_INSTALLED = `ethtool not installed. Cannot grab NIC capabilities` + warnEthtoolNotInstalled = `ethtool not installed. Cannot grab NIC capabilities` ) func (i *Info) load() error { @@ -33,7 +32,7 @@ func nics(ctx *context.Context) []*NIC { nics := make([]*NIC, 0) paths := linuxpath.New(ctx) - files, err := ioutil.ReadDir(paths.SysClassNet) + files, err := os.ReadDir(paths.SysClassNet) if err != nil { return nics } @@ -41,7 +40,7 @@ func nics(ctx *context.Context) []*NIC { etAvailable := ctx.EnableTools if etAvailable { if etInstalled := ethtoolInstalled(); !etInstalled { - ctx.Warn(_WARN_ETHTOOL_NOT_INSTALLED) + ctx.Warn(warnEthtoolNotInstalled) etAvailable = false } } @@ -67,6 +66,7 @@ func nics(ctx *context.Context) []*NIC { mac := netDeviceMacAddress(paths, filename) nic.MacAddress = mac + nic.MACAddress = mac if etAvailable { nic.netDeviceParseEthtool(ctx, filename) } else { @@ -88,7 +88,7 @@ func netDeviceMacAddress(paths *linuxpath.Paths, dev string) string { // that have addr_assign_type != 0, return None since the MAC address is // random. aatPath := filepath.Join(paths.SysClassNet, dev, "addr_assign_type") - contents, err := ioutil.ReadFile(aatPath) + contents, err := os.ReadFile(aatPath) if err != nil { return "" } @@ -96,7 +96,7 @@ func netDeviceMacAddress(paths *linuxpath.Paths, dev string) string { return "" } addrPath := filepath.Join(paths.SysClassNet, dev, "address") - contents, err = ioutil.ReadFile(addrPath) + contents, err = os.ReadFile(addrPath) if err != nil { return "" } @@ -256,7 +256,7 @@ func (nic *NIC) setNicAttrSysFs(paths *linuxpath.Paths, dev string) { } func readFile(path string) string { - contents, err := ioutil.ReadFile(path) + contents, err := os.ReadFile(path) if err != nil { return "" } diff --git a/vendor/github.com/jaypipes/ghw/pkg/net/net_windows.go b/vendor/github.com/jaypipes/ghw/pkg/net/net_windows.go index 0b46aa56..7efc0946 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/net/net_windows.go +++ b/vendor/github.com/jaypipes/ghw/pkg/net/net_windows.go @@ -45,10 +45,10 @@ func nics(win32NetDescriptions []win32NetworkAdapter) []*NIC { nic := &NIC{ Name: netDeviceName(nicDescription), MacAddress: *nicDescription.MACAddress, + MACAddress: *nicDescription.MACAddress, IsVirtual: netIsVirtual(nicDescription), Capabilities: []*NICCapability{}, } - // Appenging NIC to NICs nics = append(nics, nic) } diff --git a/vendor/github.com/jaypipes/ghw/pkg/option/option.go b/vendor/github.com/jaypipes/ghw/pkg/option/option.go index 6cd231de..7ce14016 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/option/option.go +++ b/vendor/github.com/jaypipes/ghw/pkg/option/option.go @@ -8,7 +8,6 @@ package option import ( "io" - "io/ioutil" "log" "os" ) @@ -35,7 +34,7 @@ type Alerter interface { } var ( - NullAlerter = log.New(ioutil.Discard, "", 0) + NullAlerter = log.New(io.Discard, "", 0) ) // EnvOrDefaultAlerter returns the default instance ghw will use to emit @@ -45,7 +44,7 @@ var ( func EnvOrDefaultAlerter() Alerter { var dest io.Writer if _, exists := os.LookupEnv(envKeyDisableWarnings); exists { - dest = ioutil.Discard + dest = io.Discard } else { // default dest = os.Stderr diff --git a/vendor/github.com/jaypipes/ghw/pkg/pci/address/address.go b/vendor/github.com/jaypipes/ghw/pkg/pci/address/address.go index 6a8a4e45..660238c2 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/pci/address/address.go +++ b/vendor/github.com/jaypipes/ghw/pkg/pci/address/address.go @@ -13,7 +13,7 @@ import ( var ( regexAddress *regexp.Regexp = regexp.MustCompile( - `^(([0-9a-f]{0,4}):)?([0-9a-f]{2}):([0-9a-f]{2})\.([0-9a-f]{1})$`, + `^((1?[0-9a-f]{0,4}):)?([0-9a-f]{2}):([0-9a-f]{2})\.([0-9a-f]{1})$`, ) ) @@ -30,12 +30,11 @@ func (addr *Address) String() string { return addr.Domain + ":" + addr.Bus + ":" + addr.Device + "." + addr.Function } -// FromString returns an Address struct from an ddress string in either -// $BUS:$DEVICE.$FUNCTION (BDF) format or it can be a full PCI address that -// includes the 4-digit $DOMAIN information as well: -// $DOMAIN:$BUS:$DEVICE.$FUNCTION. +// FromString returns [Address] from an address string in either +// $BUS:$DEVICE.$FUNCTION (BDF) format or a full PCI address that +// includes the domain: $DOMAIN:$BUS:$DEVICE.$FUNCTION. // -// Returns "" if the address string wasn't a valid PCI address. +// If the address string isn't a valid PCI address, then nil is returned. func FromString(address string) *Address { addrLowered := strings.ToLower(address) matches := regexAddress.FindStringSubmatch(addrLowered) diff --git a/vendor/github.com/jaypipes/ghw/pkg/pci/pci.go b/vendor/github.com/jaypipes/ghw/pkg/pci/pci.go index 86cc7b25..49adde62 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/pci/pci.go +++ b/vendor/github.com/jaypipes/ghw/pkg/pci/pci.go @@ -15,17 +15,10 @@ import ( "github.com/jaypipes/ghw/pkg/context" "github.com/jaypipes/ghw/pkg/marshal" "github.com/jaypipes/ghw/pkg/option" - pciaddr "github.com/jaypipes/ghw/pkg/pci/address" "github.com/jaypipes/ghw/pkg/topology" "github.com/jaypipes/ghw/pkg/util" ) -// backward compatibility, to be removed in 1.0.0 -type Address pciaddr.Address - -// backward compatibility, to be removed in 1.0.0 -var AddressFromString = pciaddr.FromString - type Device struct { // The PCI address of the device Address string `json:"address"` @@ -123,22 +116,11 @@ func (d *Device) String() string { } type Info struct { + db *pcidb.PCIDB arch topology.Architecture ctx *context.Context // All PCI devices on the host system Devices []*Device - // hash of class ID -> class information - // DEPRECATED. Will be removed in v1.0. Please use - // github.com/jaypipes/pcidb to explore PCIDB information - Classes map[string]*pcidb.Class `json:"-"` - // hash of vendor ID -> vendor information - // DEPRECATED. Will be removed in v1.0. Please use - // github.com/jaypipes/pcidb to explore PCIDB information - Vendors map[string]*pcidb.Vendor `json:"-"` - // hash of vendor ID + product/device ID -> product information - // DEPRECATED. Will be removed in v1.0. Please use - // github.com/jaypipes/pcidb to explore PCIDB information - Products map[string]*pcidb.Product `json:"-"` } func (i *Info) String() string { diff --git a/vendor/github.com/jaypipes/ghw/pkg/pci/pci_linux.go b/vendor/github.com/jaypipes/ghw/pkg/pci/pci_linux.go index 087da33d..538e77f3 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/pci/pci_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/pci/pci_linux.go @@ -6,7 +6,6 @@ package pci import ( - "io/ioutil" "os" "path/filepath" "strings" @@ -44,10 +43,8 @@ func (i *Info) load() error { if err != nil { return err } - i.Classes = db.Classes - i.Vendors = db.Vendors - i.Products = db.Products - i.Devices = i.ListDevices() + i.db = db + i.Devices = i.getDevices() return nil } @@ -71,7 +68,7 @@ func getDeviceRevision(ctx *context.Context, pciAddr *pciaddr.Address) string { if _, err := os.Stat(revisionPath); err != nil { return "" } - revision, err := ioutil.ReadFile(revisionPath) + revision, err := os.ReadFile(revisionPath) if err != nil { return "" } @@ -125,7 +122,7 @@ func parseModaliasFile(fp string) *deviceModaliasInfo { if _, err := os.Stat(fp); err != nil { return nil } - data, err := ioutil.ReadFile(fp) + data, err := os.ReadFile(fp) if err != nil { return nil } @@ -179,7 +176,7 @@ func parseModaliasData(data string) *deviceModaliasInfo { // pcidb.Vendor struct populated with "unknown" vendor Name attribute and // empty Products attribute. func findPCIVendor(info *Info, vendorID string) *pcidb.Vendor { - vendor := info.Vendors[vendorID] + vendor := info.db.Vendors[vendorID] if vendor == nil { return &pcidb.Vendor{ ID: vendorID, @@ -199,7 +196,7 @@ func findPCIProduct( vendorID string, productID string, ) *pcidb.Product { - product := info.Products[vendorID+productID] + product := info.db.Products[vendorID+productID] if product == nil { return &pcidb.Product{ ID: productID, @@ -221,8 +218,8 @@ func findPCISubsystem( subvendorID string, subproductID string, ) *pcidb.Product { - product := info.Products[vendorID+productID] - subvendor := info.Vendors[subvendorID] + product := info.db.Products[vendorID+productID] + subvendor := info.db.Vendors[subvendorID] if subvendor != nil && product != nil { for _, p := range product.Subsystems { if p.ID == subproductID { @@ -242,7 +239,7 @@ func findPCISubsystem( // pcidb.Class struct populated with "unknown" class Name attribute and // empty Subclasses attribute. func findPCIClass(info *Info, classID string) *pcidb.Class { - class := info.Classes[classID] + class := info.db.Classes[classID] if class == nil { return &pcidb.Class{ ID: classID, @@ -262,7 +259,7 @@ func findPCISubclass( classID string, subclassID string, ) *pcidb.Subclass { - class := info.Classes[classID] + class := info.db.Classes[classID] if class != nil { for _, sc := range class.Subclasses { if sc.ID == subclassID { @@ -346,7 +343,10 @@ func (info *Info) ParseDevice(address, modalias string) *Device { return info.getDeviceFromModaliasInfo(address, modaliasInfo) } -func (info *Info) getDeviceFromModaliasInfo(address string, modaliasInfo *deviceModaliasInfo) *Device { +func (info *Info) getDeviceFromModaliasInfo( + address string, + modaliasInfo *deviceModaliasInfo, +) *Device { vendor := findPCIVendor(info, modaliasInfo.vendorID) product := findPCIProduct( info, @@ -384,18 +384,16 @@ func (info *Info) getDeviceFromModaliasInfo(address string, modaliasInfo *device } } -// ListDevices returns a list of pointers to Device structs present on the +// getDevices returns a list of pointers to Device structs present on the // host system -// DEPRECATED. Will be removed in v1.0. Please use -// github.com/jaypipes/pcidb to explore PCIDB information -func (info *Info) ListDevices() []*Device { +func (info *Info) getDevices() []*Device { paths := linuxpath.New(info.ctx) devs := make([]*Device, 0) // We scan the /sys/bus/pci/devices directory which contains a collection // of symlinks. The names of the symlinks are all the known PCI addresses // for the host. For each address, we grab a *Device matching the // address and append to the returned array. - links, err := ioutil.ReadDir(paths.SysBusPciDevices) + links, err := os.ReadDir(paths.SysBusPciDevices) if err != nil { info.ctx.Warn("failed to read /sys/bus/pci/devices") return nil diff --git a/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree.go b/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree.go index 519a874d..020e7e67 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree.go +++ b/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree.go @@ -8,7 +8,6 @@ package snapshot import ( "errors" - "io/ioutil" "os" "path/filepath" "strings" @@ -182,7 +181,7 @@ func copyLink(path, targetPath string) error { } func copyPseudoFile(path, targetPath string) error { - buf, err := ioutil.ReadFile(path) + buf, err := os.ReadFile(path) if err != nil { return err } diff --git a/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_block_linux.go b/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_block_linux.go index 18e2161a..f692d413 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_block_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_block_linux.go @@ -8,7 +8,6 @@ package snapshot import ( "errors" - "io/ioutil" "os" "path/filepath" "strings" @@ -18,7 +17,7 @@ func createBlockDevices(buildDir string) error { // Grab all the block device pseudo-directories from /sys/block symlinks // (excluding loopback devices) and inject them into our build filesystem // with all but the circular symlink'd subsystem directories - devLinks, err := ioutil.ReadDir("/sys/block") + devLinks, err := os.ReadDir("/sys/block") if err != nil { return err } @@ -78,7 +77,7 @@ func createBlockDeviceDir(buildDeviceDir string, srcDeviceDir string) error { // Populate the supplied directory (in our build filesystem) with all the // appropriate information pseudofile contents for the block device. devName := filepath.Base(srcDeviceDir) - devFiles, err := ioutil.ReadDir(srcDeviceDir) + devFiles, err := os.ReadDir(srcDeviceDir) if err != nil { return err } @@ -119,7 +118,7 @@ func createBlockDeviceDir(buildDeviceDir string, srcDeviceDir string) error { // Regular files in the block device directory are both regular and // pseudofiles containing information such as the size (in sectors) // and whether the device is read-only - buf, err := ioutil.ReadFile(fp) + buf, err := os.ReadFile(fp) if err != nil { if errors.Is(err, os.ErrPermission) { // example: /sys/devices/virtual/block/zram0/compact is 0400 @@ -156,7 +155,7 @@ func createBlockDeviceDir(buildDeviceDir string, srcDeviceDir string) error { return err } fp := filepath.Join(srcQueueDir, "rotational") - buf, err := ioutil.ReadFile(fp) + buf, err := os.ReadFile(fp) if err != nil { return err } @@ -177,7 +176,7 @@ func createBlockDeviceDir(buildDeviceDir string, srcDeviceDir string) error { func createPartitionDir(buildPartitionDir string, srcPartitionDir string) error { // Populate the supplied directory (in our build filesystem) with all the // appropriate information pseudofile contents for the partition. - partFiles, err := ioutil.ReadDir(srcPartitionDir) + partFiles, err := os.ReadDir(srcPartitionDir) if err != nil { return err } @@ -201,7 +200,7 @@ func createPartitionDir(buildPartitionDir string, srcPartitionDir string) error // Regular files in the block device directory are both regular and // pseudofiles containing information such as the size (in sectors) // and whether the device is read-only - buf, err := ioutil.ReadFile(fp) + buf, err := os.ReadFile(fp) if err != nil { return err } diff --git a/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_linux.go b/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_linux.go index 0ccd6935..68fdeceb 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_linux.go @@ -7,7 +7,6 @@ package snapshot import ( - "io/ioutil" "os" "path/filepath" ) @@ -67,7 +66,7 @@ func cloneContentByClass(devClass string, subEntries []string, filterName filter // warning: don't use the context package here, this means not even the linuxpath package. // TODO(fromani) remove the path duplication sysClass := filepath.Join("sys", "class", devClass) - entries, err := ioutil.ReadDir(sysClass) + entries, err := os.ReadDir(sysClass) if err != nil { // we should not import context, hence we can't Warn() return fileSpecs diff --git a/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_pci_linux.go b/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_pci_linux.go index dbc3fc83..e7aa7d26 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_pci_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/snapshot/clonetree_pci_linux.go @@ -8,7 +8,6 @@ package snapshot import ( "fmt" - "io/ioutil" "os" "path/filepath" @@ -70,7 +69,7 @@ func scanPCIDeviceRoot(root string) (fileSpecs []string, pciRoots []string) { "revision", "vendor", } - entries, err := ioutil.ReadDir(root) + entries, err := os.ReadDir(root) if err != nil { return []string{}, []string{} } @@ -124,7 +123,7 @@ func findPCIEntryFromPath(root, entryName string) (string, error) { } func isPCIBridge(entryPath string) bool { - subNodes, err := ioutil.ReadDir(entryPath) + subNodes, err := os.ReadDir(entryPath) if err != nil { // this is so unlikely we don't even return error. But we trace just in case. trace("error scanning device entry path %q: %v", entryPath, err) diff --git a/vendor/github.com/jaypipes/ghw/pkg/snapshot/unpack.go b/vendor/github.com/jaypipes/ghw/pkg/snapshot/unpack.go index 3df395e2..f05f8f79 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/snapshot/unpack.go +++ b/vendor/github.com/jaypipes/ghw/pkg/snapshot/unpack.go @@ -10,7 +10,6 @@ import ( "archive/tar" "compress/gzip" "io" - "io/ioutil" "os" "path/filepath" @@ -39,7 +38,7 @@ func Cleanup(targetRoot string) error { // Unpack expands the given snapshot in a temporary directory managed by `ghw`. Returns the path of that directory. func Unpack(snapshotName string) (string, error) { - targetRoot, err := ioutil.TempDir("", TargetRoot) + targetRoot, err := os.MkdirTemp("", TargetRoot) if err != nil { return "", err } @@ -121,7 +120,7 @@ func Untar(root string, r io.Reader) error { } func isEmptyDir(name string) bool { - entries, err := ioutil.ReadDir(name) + entries, err := os.ReadDir(name) if err != nil { return false } diff --git a/vendor/github.com/jaypipes/ghw/pkg/topology/topology_linux.go b/vendor/github.com/jaypipes/ghw/pkg/topology/topology_linux.go index 6844dd96..9d8434cb 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/topology/topology_linux.go +++ b/vendor/github.com/jaypipes/ghw/pkg/topology/topology_linux.go @@ -7,7 +7,7 @@ package topology import ( "fmt" - "io/ioutil" + "os" "path/filepath" "strconv" "strings" @@ -32,7 +32,7 @@ func topologyNodes(ctx *context.Context) []*Node { paths := linuxpath.New(ctx) nodes := make([]*Node, 0) - files, err := ioutil.ReadDir(paths.SysDevicesSystemNode) + files, err := os.ReadDir(paths.SysDevicesSystemNode) if err != nil { ctx.Warn("failed to determine nodes: %s\n", err) return nodes @@ -89,7 +89,7 @@ func distancesForNode(ctx *context.Context, nodeID int) ([]int, error) { "distance", ) - data, err := ioutil.ReadFile(path) + data, err := os.ReadFile(path) if err != nil { return nil, err } diff --git a/vendor/github.com/jaypipes/ghw/pkg/util/util.go b/vendor/github.com/jaypipes/ghw/pkg/util/util.go index 5d57bda2..816aeb1b 100644 --- a/vendor/github.com/jaypipes/ghw/pkg/util/util.go +++ b/vendor/github.com/jaypipes/ghw/pkg/util/util.go @@ -8,7 +8,6 @@ package util import ( "fmt" - "io/ioutil" "os" "strconv" "strings" @@ -37,7 +36,7 @@ func SafeClose(c closer) { // message is printed to STDERR and -1 is returned. func SafeIntFromFile(ctx *context.Context, path string) int { msg := "failed to read int from file: %s\n" - buf, err := ioutil.ReadFile(path) + buf, err := os.ReadFile(path) if err != nil { ctx.Warn(msg, err) return -1 diff --git a/vendor/github.com/jaypipes/pcidb/README.md b/vendor/github.com/jaypipes/pcidb/README.md index ddfcde6b..27485dca 100644 --- a/vendor/github.com/jaypipes/pcidb/README.md +++ b/vendor/github.com/jaypipes/pcidb/README.md @@ -1,6 +1,6 @@ # `pcidb` - the Golang PCI DB library -[![Build Status](https://github.com/jaypipes/pcidb/actions/workflows/go.yml/badge.svg?branch=main)](https://github.com/jaypipes/pcidb/actions) +[![Build Status](https://github.com/jaypipes/pcidb/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/jaypipes/pcidb/actions) [![Go Report Card](https://goreportcard.com/badge/github.com/jaypipes/pcidb)](https://goreportcard.com/report/github.com/jaypipes/pcidb) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE_OF_CONDUCT.md) diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index 7a008a4d..4528059c 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -1,9 +1,8 @@ -# This is an example goreleaser.yaml file with some sane defaults. -# Make sure to check the documentation at http://goreleaser.com +version: 2 + before: hooks: - ./gen.sh - - go install mvdan.cc/garble@v0.9.3 builds: - @@ -32,7 +31,6 @@ builds: - mips64le goarm: - 7 - gobinary: garble - id: "s2d" binary: s2d @@ -59,7 +57,6 @@ builds: - mips64le goarm: - 7 - gobinary: garble - id: "s2sx" binary: s2sx @@ -87,21 +84,11 @@ builds: - mips64le goarm: - 7 - gobinary: garble archives: - id: s2-binaries - name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}" - replacements: - aix: AIX - darwin: OSX - linux: Linux - windows: Windows - 386: i386 - amd64: x86_64 - freebsd: FreeBSD - netbsd: NetBSD + name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" format_overrides: - goos: windows format: zip @@ -112,7 +99,7 @@ archives: checksum: name_template: 'checksums.txt' snapshot: - name_template: "{{ .Tag }}-next" + version_template: "{{ .Tag }}-next" changelog: sort: asc filters: @@ -125,7 +112,7 @@ changelog: nfpms: - - file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}" + file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" vendor: Klaus Post homepage: https://github.com/klauspost/compress maintainer: Klaus Post @@ -134,8 +121,3 @@ nfpms: formats: - deb - rpm - replacements: - darwin: Darwin - linux: Linux - freebsd: FreeBSD - amd64: x86_64 diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 4002a16a..de264c85 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,75 @@ This package provides various compression algorithms. # changelog +* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10) + * gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978 + * gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002 + * s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982 + * zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007 + * flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996 + +* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9) + * s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949 + * flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963 + * Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971 + * zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951 + +* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8) + * zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885 + * zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938 + +* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7) + * s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927 + * s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930 + +* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6) + * zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923 + * s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925 + +* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5) + * flate: Fix reset with dictionary on custom window encodes https://github.com/klauspost/compress/pull/912 + * zstd: Add Frame header encoding and stripping https://github.com/klauspost/compress/pull/908 + * zstd: Limit better/best default window to 8MB https://github.com/klauspost/compress/pull/913 + * zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910 + * s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917 +https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918 + +* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4) + * huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887 + * huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886 + * gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892 + * gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890 + * gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891 + +* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3) + * fse: Fix max header size https://github.com/klauspost/compress/pull/881 + * zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877 + * gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883 + +* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2) + * zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876 + +* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1) + * s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871 + * flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869 + * s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867 + +* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0) + * Add experimental dictionary builder https://github.com/klauspost/compress/pull/853 + * Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838 + * flate: Add limited window compression https://github.com/klauspost/compress/pull/843 + * s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839 + * flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837 + * gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860 + +
+ See changes to v1.16.x + + +* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7) + * zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829 + * s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832 + * June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6) * zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806 * zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824 @@ -33,7 +102,7 @@ This package provides various compression algorithms. * zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795 * s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779 * s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780 - * gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 + * gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 * Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 @@ -49,7 +118,11 @@ This package provides various compression algorithms. * s2: Add LZ4 block converter. https://github.com/klauspost/compress/pull/748 * s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747 * s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746 +
+
+ See changes to v1.15.x + * Jan 21st, 2023 (v1.15.15) * deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739 * zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728 @@ -84,7 +157,7 @@ This package provides various compression algorithms. * zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649 * Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651 * flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656 - * zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657 + * zstd: Improve "better" compression https://github.com/klauspost/compress/pull/657 * s2: Improve "best" compression https://github.com/klauspost/compress/pull/658 * s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635 * s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646 @@ -176,6 +249,8 @@ Stream decompression is now faster on asynchronous, since the goroutine allocati While the release has been extensively tested, it is recommended to testing when upgrading. +
+
See changes to v1.14.x @@ -285,7 +360,7 @@ While the release has been extensively tested, it is recommended to testing when * s2: Fix binaries. * Feb 25, 2021 (v1.11.8) - * s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended. + * s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended. * s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315) * s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322) * zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314) @@ -464,7 +539,7 @@ While the release has been extensively tested, it is recommended to testing when * Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster. * Feb 19, 2016: Handle small payloads faster in level 1-3. * Feb 19, 2016: Added faster level 2 + 3 compression modes. -* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5. +* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5. * Feb 14, 2016: Snappy: Merge upstream changes. * Feb 14, 2016: Snappy: Fix aggressive skipping. * Feb 14, 2016: Snappy: Update benchmark. @@ -511,6 +586,8 @@ the stateless compress described below. For compression performance, see: [this spreadsheet](https://docs.google.com/spreadsheets/d/1nuNE2nPfuINCZJRMt6wFWhKpToF95I47XjSsc-1rbPQ/edit?usp=sharing). +To disable all assembly add `-tags=noasm`. This works across all packages. + # Stateless compression This package offers stateless compression as a special option for gzip/deflate. @@ -529,7 +606,7 @@ For direct deflate use, NewStatelessWriter and StatelessDeflate are available. S A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer: -``` +```go // replace 'ioutil.Discard' with your output. gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression) if err != nil { @@ -636,6 +713,8 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv * [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer. * [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression. * [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression. +* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index. +* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor. # license diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go deleted file mode 100644 index 5faea0b2..00000000 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ /dev/null @@ -1,988 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Copyright (c) 2015 Klaus Post -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "encoding/binary" - "fmt" - "io" - "math" -) - -const ( - NoCompression = 0 - BestSpeed = 1 - BestCompression = 9 - DefaultCompression = -1 - - // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman - // entropy encoding. This mode is useful in compressing data that has - // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) - // that lacks an entropy encoder. Compression gains are achieved when - // certain bytes in the input stream occur more frequently than others. - // - // Note that HuffmanOnly produces a compressed output that is - // RFC 1951 compliant. That is, any valid DEFLATE decompressor will - // continue to be able to decompress this output. - HuffmanOnly = -2 - ConstantCompression = HuffmanOnly // compatibility alias. - - logWindowSize = 15 - windowSize = 1 << logWindowSize - windowMask = windowSize - 1 - logMaxOffsetSize = 15 // Standard DEFLATE - minMatchLength = 4 // The smallest match that the compressor looks for - maxMatchLength = 258 // The longest match for the compressor - minOffsetSize = 1 // The shortest offset that makes any sense - - // The maximum number of tokens we will encode at the time. - // Smaller sizes usually creates less optimal blocks. - // Bigger can make context switching slow. - // We use this for levels 7-9, so we make it big. - maxFlateBlockTokens = 1 << 15 - maxStoreBlockSize = 65535 - hashBits = 17 // After 17 performance degrades - hashSize = 1 << hashBits - hashMask = (1 << hashBits) - 1 - hashShift = (hashBits + minMatchLength - 1) / minMatchLength - maxHashOffset = 1 << 28 - - skipNever = math.MaxInt32 - - debugDeflate = false -) - -type compressionLevel struct { - good, lazy, nice, chain, fastSkipHashing, level int -} - -// Compression levels have been rebalanced from zlib deflate defaults -// to give a bigger spread in speed and compression. -// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ -var levels = []compressionLevel{ - {}, // 0 - // Level 1-6 uses specialized algorithm - values not used - {0, 0, 0, 0, 0, 1}, - {0, 0, 0, 0, 0, 2}, - {0, 0, 0, 0, 0, 3}, - {0, 0, 0, 0, 0, 4}, - {0, 0, 0, 0, 0, 5}, - {0, 0, 0, 0, 0, 6}, - // Levels 7-9 use increasingly more lazy matching - // and increasingly stringent conditions for "good enough". - {8, 12, 16, 24, skipNever, 7}, - {16, 30, 40, 64, skipNever, 8}, - {32, 258, 258, 1024, skipNever, 9}, -} - -// advancedState contains state for the advanced levels, with bigger hash tables, etc. -type advancedState struct { - // deflate state - length int - offset int - maxInsertIndex int - chainHead int - hashOffset int - - ii uint16 // position of last match, intended to overflow to reset. - - // input window: unprocessed data is window[index:windowEnd] - index int - hashMatch [maxMatchLength + minMatchLength]uint32 - - // Input hash chains - // hashHead[hashValue] contains the largest inputIndex with the specified hash value - // If hashHead[hashValue] is within the current window, then - // hashPrev[hashHead[hashValue] & windowMask] contains the previous index - // with the same hash value. - hashHead [hashSize]uint32 - hashPrev [windowSize]uint32 -} - -type compressor struct { - compressionLevel - - h *huffmanEncoder - w *huffmanBitWriter - - // compression algorithm - fill func(*compressor, []byte) int // copy data to window - step func(*compressor) // process window - - window []byte - windowEnd int - blockStart int // window index where current tokens start - err error - - // queued output tokens - tokens tokens - fast fastEnc - state *advancedState - - sync bool // requesting flush - byteAvailable bool // if true, still need to process window[index-1]. -} - -func (d *compressor) fillDeflate(b []byte) int { - s := d.state - if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { - // shift the window by windowSize - //copy(d.window[:], d.window[windowSize:2*windowSize]) - *(*[windowSize]byte)(d.window) = *(*[windowSize]byte)(d.window[windowSize:]) - s.index -= windowSize - d.windowEnd -= windowSize - if d.blockStart >= windowSize { - d.blockStart -= windowSize - } else { - d.blockStart = math.MaxInt32 - } - s.hashOffset += windowSize - if s.hashOffset > maxHashOffset { - delta := s.hashOffset - 1 - s.hashOffset -= delta - s.chainHead -= delta - // Iterate over slices instead of arrays to avoid copying - // the entire table onto the stack (Issue #18625). - for i, v := range s.hashPrev[:] { - if int(v) > delta { - s.hashPrev[i] = uint32(int(v) - delta) - } else { - s.hashPrev[i] = 0 - } - } - for i, v := range s.hashHead[:] { - if int(v) > delta { - s.hashHead[i] = uint32(int(v) - delta) - } else { - s.hashHead[i] = 0 - } - } - } - } - n := copy(d.window[d.windowEnd:], b) - d.windowEnd += n - return n -} - -func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { - if index > 0 || eof { - var window []byte - if d.blockStart <= index { - window = d.window[d.blockStart:index] - } - d.blockStart = index - //d.w.writeBlock(tok, eof, window) - d.w.writeBlockDynamic(tok, eof, window, d.sync) - return d.w.err - } - return nil -} - -// writeBlockSkip writes the current block and uses the number of tokens -// to determine if the block should be stored on no matches, or -// only huffman encoded. -func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { - if index > 0 || eof { - if d.blockStart <= index { - window := d.window[d.blockStart:index] - // If we removed less than a 64th of all literals - // we huffman compress the block. - if int(tok.n) > len(window)-int(tok.n>>6) { - d.w.writeBlockHuff(eof, window, d.sync) - } else { - // Write a dynamic huffman block. - d.w.writeBlockDynamic(tok, eof, window, d.sync) - } - } else { - d.w.writeBlock(tok, eof, nil) - } - d.blockStart = index - return d.w.err - } - return nil -} - -// fillWindow will fill the current window with the supplied -// dictionary and calculate all hashes. -// This is much faster than doing a full encode. -// Should only be used after a start/reset. -func (d *compressor) fillWindow(b []byte) { - // Do not fill window if we are in store-only or huffman mode. - if d.level <= 0 { - return - } - if d.fast != nil { - // encode the last data, but discard the result - if len(b) > maxMatchOffset { - b = b[len(b)-maxMatchOffset:] - } - d.fast.Encode(&d.tokens, b) - d.tokens.Reset() - return - } - s := d.state - // If we are given too much, cut it. - if len(b) > windowSize { - b = b[len(b)-windowSize:] - } - // Add all to window. - n := copy(d.window[d.windowEnd:], b) - - // Calculate 256 hashes at the time (more L1 cache hits) - loops := (n + 256 - minMatchLength) / 256 - for j := 0; j < loops; j++ { - startindex := j * 256 - end := startindex + 256 + minMatchLength - 1 - if end > n { - end = n - } - tocheck := d.window[startindex:end] - dstSize := len(tocheck) - minMatchLength + 1 - - if dstSize <= 0 { - continue - } - - dst := s.hashMatch[:dstSize] - bulkHash4(tocheck, dst) - var newH uint32 - for i, val := range dst { - di := i + startindex - newH = val & hashMask - // Get previous value with the same hash. - // Our chain should point to the previous value. - s.hashPrev[di&windowMask] = s.hashHead[newH] - // Set the head of the hash chain to us. - s.hashHead[newH] = uint32(di + s.hashOffset) - } - } - // Update window information. - d.windowEnd += n - s.index = n -} - -// Try to find a match starting at index whose length is greater than prevSize. -// We only look at chainCount possibilities before giving up. -// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead -func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { - minMatchLook := maxMatchLength - if lookahead < minMatchLook { - minMatchLook = lookahead - } - - win := d.window[0 : pos+minMatchLook] - - // We quit when we get a match that's at least nice long - nice := len(win) - pos - if d.nice < nice { - nice = d.nice - } - - // If we've got a match that's good enough, only look in 1/4 the chain. - tries := d.chain - length = minMatchLength - 1 - - wEnd := win[pos+length] - wPos := win[pos:] - minIndex := pos - windowSize - if minIndex < 0 { - minIndex = 0 - } - offset = 0 - - if d.chain < 100 { - for i := prevHead; tries > 0; tries-- { - if wEnd == win[i+length] { - n := matchLen(win[i:i+minMatchLook], wPos) - if n > length { - length = n - offset = pos - i - ok = true - if n >= nice { - // The match is good enough that we don't try to find a better one. - break - } - wEnd = win[pos+n] - } - } - if i <= minIndex { - // hashPrev[i & windowMask] has already been overwritten, so stop now. - break - } - i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset - if i < minIndex { - break - } - } - return - } - - // Minimum gain to accept a match. - cGain := 4 - - // Some like it higher (CSV), some like it lower (JSON) - const baseCost = 3 - // Base is 4 bytes at with an additional cost. - // Matches must be better than this. - - for i := prevHead; tries > 0; tries-- { - if wEnd == win[i+length] { - n := matchLen(win[i:i+minMatchLook], wPos) - if n > length { - // Calculate gain. Estimate - newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) - - //fmt.Println("gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n]), "this-len:", n, "prev-len:", length) - if newGain > cGain { - length = n - offset = pos - i - cGain = newGain - ok = true - if n >= nice { - // The match is good enough that we don't try to find a better one. - break - } - wEnd = win[pos+n] - } - } - } - if i <= minIndex { - // hashPrev[i & windowMask] has already been overwritten, so stop now. - break - } - i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset - if i < minIndex { - break - } - } - return -} - -func (d *compressor) writeStoredBlock(buf []byte) error { - if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { - return d.w.err - } - d.w.writeBytes(buf) - return d.w.err -} - -// hash4 returns a hash representation of the first 4 bytes -// of the supplied slice. -// The caller must ensure that len(b) >= 4. -func hash4(b []byte) uint32 { - return hash4u(binary.LittleEndian.Uint32(b), hashBits) -} - -// hash4 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4u(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> (32 - h) -} - -// bulkHash4 will compute hashes using the same -// algorithm as hash4 -func bulkHash4(b []byte, dst []uint32) { - if len(b) < 4 { - return - } - hb := binary.LittleEndian.Uint32(b) - - dst[0] = hash4u(hb, hashBits) - end := len(b) - 4 + 1 - for i := 1; i < end; i++ { - hb = (hb >> 8) | uint32(b[i+3])<<24 - dst[i] = hash4u(hb, hashBits) - } -} - -func (d *compressor) initDeflate() { - d.window = make([]byte, 2*windowSize) - d.byteAvailable = false - d.err = nil - if d.state == nil { - return - } - s := d.state - s.index = 0 - s.hashOffset = 1 - s.length = minMatchLength - 1 - s.offset = 0 - s.chainHead = -1 -} - -// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, -// meaning it always has lazy matching on. -func (d *compressor) deflateLazy() { - s := d.state - // Sanity enables additional runtime tests. - // It's intended to be used during development - // to supplement the currently ad-hoc unit tests. - const sanity = debugDeflate - - if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { - return - } - if d.windowEnd != s.index && d.chain > 100 { - // Get literal huffman coder. - if d.h == nil { - d.h = newHuffmanEncoder(maxFlateBlockTokens) - } - var tmp [256]uint16 - for _, v := range d.window[s.index:d.windowEnd] { - tmp[v]++ - } - d.h.generate(tmp[:], 15) - } - - s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) - - for { - if sanity && s.index > d.windowEnd { - panic("index > windowEnd") - } - lookahead := d.windowEnd - s.index - if lookahead < minMatchLength+maxMatchLength { - if !d.sync { - return - } - if sanity && s.index > d.windowEnd { - panic("index > windowEnd") - } - if lookahead == 0 { - // Flush current output block if any. - if d.byteAvailable { - // There is still one pending token that needs to be flushed - d.tokens.AddLiteral(d.window[s.index-1]) - d.byteAvailable = false - } - if d.tokens.n > 0 { - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - return - } - } - if s.index < s.maxInsertIndex { - // Update the hash - hash := hash4(d.window[s.index:]) - ch := s.hashHead[hash] - s.chainHead = int(ch) - s.hashPrev[s.index&windowMask] = ch - s.hashHead[hash] = uint32(s.index + s.hashOffset) - } - prevLength := s.length - prevOffset := s.offset - s.length = minMatchLength - 1 - s.offset = 0 - minIndex := s.index - windowSize - if minIndex < 0 { - minIndex = 0 - } - - if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { - if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { - s.length = newLength - s.offset = newOffset - } - } - - if prevLength >= minMatchLength && s.length <= prevLength { - // No better match, but check for better match at end... - // - // Skip forward a number of bytes. - // Offset of 2 seems to yield best results. 3 is sometimes better. - const checkOff = 2 - - // Check all, except full length - if prevLength < maxMatchLength-checkOff { - prevIndex := s.index - 1 - if prevIndex+prevLength < s.maxInsertIndex { - end := lookahead - if lookahead > maxMatchLength+checkOff { - end = maxMatchLength + checkOff - } - end += prevIndex - - // Hash at match end. - h := hash4(d.window[prevIndex+prevLength:]) - ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength - if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { - length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) - // It seems like a pure length metric is best. - if length > prevLength { - prevLength = length - prevOffset = prevIndex - ch2 - - // Extend back... - for i := checkOff - 1; i >= 0; i-- { - if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i] { - // Emit tokens we "owe" - for j := 0; j <= i; j++ { - d.tokens.AddLiteral(d.window[prevIndex+j]) - if d.tokens.n == maxFlateBlockTokens { - // The block includes the current character - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - s.index++ - if s.index < s.maxInsertIndex { - h := hash4(d.window[s.index:]) - ch := s.hashHead[h] - s.chainHead = int(ch) - s.hashPrev[s.index&windowMask] = ch - s.hashHead[h] = uint32(s.index + s.hashOffset) - } - } - break - } else { - prevLength++ - } - } - } else if false { - // Check one further ahead. - // Only rarely better, disabled for now. - prevIndex++ - h := hash4(d.window[prevIndex+prevLength:]) - ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength - if prevIndex-ch2 != prevOffset && ch2 > minIndex+checkOff { - length := matchLen(d.window[prevIndex+checkOff:end], d.window[ch2+checkOff:]) - // It seems like a pure length metric is best. - if length > prevLength+checkOff { - prevLength = length - prevOffset = prevIndex - ch2 - prevIndex-- - - // Extend back... - for i := checkOff; i >= 0; i-- { - if prevLength >= maxMatchLength || d.window[prevIndex+i] != d.window[ch2+i-1] { - // Emit tokens we "owe" - for j := 0; j <= i; j++ { - d.tokens.AddLiteral(d.window[prevIndex+j]) - if d.tokens.n == maxFlateBlockTokens { - // The block includes the current character - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - s.index++ - if s.index < s.maxInsertIndex { - h := hash4(d.window[s.index:]) - ch := s.hashHead[h] - s.chainHead = int(ch) - s.hashPrev[s.index&windowMask] = ch - s.hashHead[h] = uint32(s.index + s.hashOffset) - } - } - break - } else { - prevLength++ - } - } - } - } - } - } - } - } - // There was a match at the previous step, and the current match is - // not better. Output the previous match. - d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) - - // Insert in the hash table all strings up to the end of the match. - // index and index-1 are already inserted. If there is not enough - // lookahead, the last two strings are not inserted into the hash - // table. - newIndex := s.index + prevLength - 1 - // Calculate missing hashes - end := newIndex - if end > s.maxInsertIndex { - end = s.maxInsertIndex - } - end += minMatchLength - 1 - startindex := s.index + 1 - if startindex > s.maxInsertIndex { - startindex = s.maxInsertIndex - } - tocheck := d.window[startindex:end] - dstSize := len(tocheck) - minMatchLength + 1 - if dstSize > 0 { - dst := s.hashMatch[:dstSize] - bulkHash4(tocheck, dst) - var newH uint32 - for i, val := range dst { - di := i + startindex - newH = val & hashMask - // Get previous value with the same hash. - // Our chain should point to the previous value. - s.hashPrev[di&windowMask] = s.hashHead[newH] - // Set the head of the hash chain to us. - s.hashHead[newH] = uint32(di + s.hashOffset) - } - } - - s.index = newIndex - d.byteAvailable = false - s.length = minMatchLength - 1 - if d.tokens.n == maxFlateBlockTokens { - // The block includes the current character - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - s.ii = 0 - } else { - // Reset, if we got a match this run. - if s.length >= minMatchLength { - s.ii = 0 - } - // We have a byte waiting. Emit it. - if d.byteAvailable { - s.ii++ - d.tokens.AddLiteral(d.window[s.index-1]) - if d.tokens.n == maxFlateBlockTokens { - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - s.index++ - - // If we have a long run of no matches, skip additional bytes - // Resets when s.ii overflows after 64KB. - if n := int(s.ii) - d.chain; n > 0 { - n = 1 + int(n>>6) - for j := 0; j < n; j++ { - if s.index >= d.windowEnd-1 { - break - } - d.tokens.AddLiteral(d.window[s.index-1]) - if d.tokens.n == maxFlateBlockTokens { - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - // Index... - if s.index < s.maxInsertIndex { - h := hash4(d.window[s.index:]) - ch := s.hashHead[h] - s.chainHead = int(ch) - s.hashPrev[s.index&windowMask] = ch - s.hashHead[h] = uint32(s.index + s.hashOffset) - } - s.index++ - } - // Flush last byte - d.tokens.AddLiteral(d.window[s.index-1]) - d.byteAvailable = false - // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength - if d.tokens.n == maxFlateBlockTokens { - if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { - return - } - d.tokens.Reset() - } - } - } else { - s.index++ - d.byteAvailable = true - } - } - } -} - -func (d *compressor) store() { - if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { - d.err = d.writeStoredBlock(d.window[:d.windowEnd]) - d.windowEnd = 0 - } -} - -// fillWindow will fill the buffer with data for huffman-only compression. -// The number of bytes copied is returned. -func (d *compressor) fillBlock(b []byte) int { - n := copy(d.window[d.windowEnd:], b) - d.windowEnd += n - return n -} - -// storeHuff will compress and store the currently added data, -// if enough has been accumulated or we at the end of the stream. -// Any error that occurred will be in d.err -func (d *compressor) storeHuff() { - if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { - return - } - d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) - d.err = d.w.err - d.windowEnd = 0 -} - -// storeFast will compress and store the currently added data, -// if enough has been accumulated or we at the end of the stream. -// Any error that occurred will be in d.err -func (d *compressor) storeFast() { - // We only compress if we have maxStoreBlockSize. - if d.windowEnd < len(d.window) { - if !d.sync { - return - } - // Handle extremely small sizes. - if d.windowEnd < 128 { - if d.windowEnd == 0 { - return - } - if d.windowEnd <= 32 { - d.err = d.writeStoredBlock(d.window[:d.windowEnd]) - } else { - d.w.writeBlockHuff(false, d.window[:d.windowEnd], true) - d.err = d.w.err - } - d.tokens.Reset() - d.windowEnd = 0 - d.fast.Reset() - return - } - } - - d.fast.Encode(&d.tokens, d.window[:d.windowEnd]) - // If we made zero matches, store the block as is. - if d.tokens.n == 0 { - d.err = d.writeStoredBlock(d.window[:d.windowEnd]) - // If we removed less than 1/16th, huffman compress the block. - } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { - d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) - d.err = d.w.err - } else { - d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync) - d.err = d.w.err - } - d.tokens.Reset() - d.windowEnd = 0 -} - -// write will add input byte to the stream. -// Unless an error occurs all bytes will be consumed. -func (d *compressor) write(b []byte) (n int, err error) { - if d.err != nil { - return 0, d.err - } - n = len(b) - for len(b) > 0 { - if d.windowEnd == len(d.window) || d.sync { - d.step(d) - } - b = b[d.fill(d, b):] - if d.err != nil { - return 0, d.err - } - } - return n, d.err -} - -func (d *compressor) syncFlush() error { - d.sync = true - if d.err != nil { - return d.err - } - d.step(d) - if d.err == nil { - d.w.writeStoredHeader(0, false) - d.w.flush() - d.err = d.w.err - } - d.sync = false - return d.err -} - -func (d *compressor) init(w io.Writer, level int) (err error) { - d.w = newHuffmanBitWriter(w) - - switch { - case level == NoCompression: - d.window = make([]byte, maxStoreBlockSize) - d.fill = (*compressor).fillBlock - d.step = (*compressor).store - case level == ConstantCompression: - d.w.logNewTablePenalty = 10 - d.window = make([]byte, 32<<10) - d.fill = (*compressor).fillBlock - d.step = (*compressor).storeHuff - case level == DefaultCompression: - level = 5 - fallthrough - case level >= 1 && level <= 6: - d.w.logNewTablePenalty = 7 - d.fast = newFastEnc(level) - d.window = make([]byte, maxStoreBlockSize) - d.fill = (*compressor).fillBlock - d.step = (*compressor).storeFast - case 7 <= level && level <= 9: - d.w.logNewTablePenalty = 8 - d.state = &advancedState{} - d.compressionLevel = levels[level] - d.initDeflate() - d.fill = (*compressor).fillDeflate - d.step = (*compressor).deflateLazy - default: - return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) - } - d.level = level - return nil -} - -// reset the state of the compressor. -func (d *compressor) reset(w io.Writer) { - d.w.reset(w) - d.sync = false - d.err = nil - // We only need to reset a few things for Snappy. - if d.fast != nil { - d.fast.Reset() - d.windowEnd = 0 - d.tokens.Reset() - return - } - switch d.compressionLevel.chain { - case 0: - // level was NoCompression or ConstantCompresssion. - d.windowEnd = 0 - default: - s := d.state - s.chainHead = -1 - for i := range s.hashHead { - s.hashHead[i] = 0 - } - for i := range s.hashPrev { - s.hashPrev[i] = 0 - } - s.hashOffset = 1 - s.index, d.windowEnd = 0, 0 - d.blockStart, d.byteAvailable = 0, false - d.tokens.Reset() - s.length = minMatchLength - 1 - s.offset = 0 - s.ii = 0 - s.maxInsertIndex = 0 - } -} - -func (d *compressor) close() error { - if d.err != nil { - return d.err - } - d.sync = true - d.step(d) - if d.err != nil { - return d.err - } - if d.w.writeStoredHeader(0, true); d.w.err != nil { - return d.w.err - } - d.w.flush() - d.w.reset(nil) - return d.w.err -} - -// NewWriter returns a new Writer compressing data at the given level. -// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); -// higher levels typically run slower but compress more. -// Level 0 (NoCompression) does not attempt any compression; it only adds the -// necessary DEFLATE framing. -// Level -1 (DefaultCompression) uses the default compression level. -// Level -2 (ConstantCompression) will use Huffman compression only, giving -// a very fast compression for all types of input, but sacrificing considerable -// compression efficiency. -// -// If level is in the range [-2, 9] then the error returned will be nil. -// Otherwise the error returned will be non-nil. -func NewWriter(w io.Writer, level int) (*Writer, error) { - var dw Writer - if err := dw.d.init(w, level); err != nil { - return nil, err - } - return &dw, nil -} - -// NewWriterDict is like NewWriter but initializes the new -// Writer with a preset dictionary. The returned Writer behaves -// as if the dictionary had been written to it without producing -// any compressed output. The compressed data written to w -// can only be decompressed by a Reader initialized with the -// same dictionary. -func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { - zw, err := NewWriter(w, level) - if err != nil { - return nil, err - } - zw.d.fillWindow(dict) - zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. - return zw, err -} - -// A Writer takes data written to it and writes the compressed -// form of that data to an underlying writer (see NewWriter). -type Writer struct { - d compressor - dict []byte -} - -// Write writes data to w, which will eventually write the -// compressed form of data to its underlying writer. -func (w *Writer) Write(data []byte) (n int, err error) { - return w.d.write(data) -} - -// Flush flushes any pending data to the underlying writer. -// It is useful mainly in compressed network protocols, to ensure that -// a remote reader has enough data to reconstruct a packet. -// Flush does not return until the data has been written. -// Calling Flush when there is no pending data still causes the Writer -// to emit a sync marker of at least 4 bytes. -// If the underlying writer returns an error, Flush returns that error. -// -// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. -func (w *Writer) Flush() error { - // For more about flushing: - // http://www.bolet.org/~pornin/deflate-flush.html - return w.d.syncFlush() -} - -// Close flushes and closes the writer. -func (w *Writer) Close() error { - return w.d.close() -} - -// Reset discards the writer's state and makes it equivalent to -// the result of NewWriter or NewWriterDict called with dst -// and w's level and dictionary. -func (w *Writer) Reset(dst io.Writer) { - if len(w.dict) > 0 { - // w was created with NewWriterDict - w.d.reset(dst) - if dst != nil { - w.d.fillWindow(w.dict) - } - } else { - // w was created with NewWriter - w.d.reset(dst) - } -} - -// ResetDict discards the writer's state and makes it equivalent to -// the result of NewWriter or NewWriterDict called with dst -// and w's level, but sets a specific dictionary. -func (w *Writer) ResetDict(dst io.Writer, dict []byte) { - w.dict = dict - w.d.reset(dst) - w.d.fillWindow(w.dict) -} diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go deleted file mode 100644 index bb36351a..00000000 --- a/vendor/github.com/klauspost/compress/flate/dict_decoder.go +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// dictDecoder implements the LZ77 sliding dictionary as used in decompression. -// LZ77 decompresses data through sequences of two forms of commands: -// -// - Literal insertions: Runs of one or more symbols are inserted into the data -// stream as is. This is accomplished through the writeByte method for a -// single symbol, or combinations of writeSlice/writeMark for multiple symbols. -// Any valid stream must start with a literal insertion if no preset dictionary -// is used. -// -// - Backward copies: Runs of one or more symbols are copied from previously -// emitted data. Backward copies come as the tuple (dist, length) where dist -// determines how far back in the stream to copy from and length determines how -// many bytes to copy. Note that it is valid for the length to be greater than -// the distance. Since LZ77 uses forward copies, that situation is used to -// perform a form of run-length encoding on repeated runs of symbols. -// The writeCopy and tryWriteCopy are used to implement this command. -// -// For performance reasons, this implementation performs little to no sanity -// checks about the arguments. As such, the invariants documented for each -// method call must be respected. -type dictDecoder struct { - hist []byte // Sliding window history - - // Invariant: 0 <= rdPos <= wrPos <= len(hist) - wrPos int // Current output position in buffer - rdPos int // Have emitted hist[:rdPos] already - full bool // Has a full window length been written yet? -} - -// init initializes dictDecoder to have a sliding window dictionary of the given -// size. If a preset dict is provided, it will initialize the dictionary with -// the contents of dict. -func (dd *dictDecoder) init(size int, dict []byte) { - *dd = dictDecoder{hist: dd.hist} - - if cap(dd.hist) < size { - dd.hist = make([]byte, size) - } - dd.hist = dd.hist[:size] - - if len(dict) > len(dd.hist) { - dict = dict[len(dict)-len(dd.hist):] - } - dd.wrPos = copy(dd.hist, dict) - if dd.wrPos == len(dd.hist) { - dd.wrPos = 0 - dd.full = true - } - dd.rdPos = dd.wrPos -} - -// histSize reports the total amount of historical data in the dictionary. -func (dd *dictDecoder) histSize() int { - if dd.full { - return len(dd.hist) - } - return dd.wrPos -} - -// availRead reports the number of bytes that can be flushed by readFlush. -func (dd *dictDecoder) availRead() int { - return dd.wrPos - dd.rdPos -} - -// availWrite reports the available amount of output buffer space. -func (dd *dictDecoder) availWrite() int { - return len(dd.hist) - dd.wrPos -} - -// writeSlice returns a slice of the available buffer to write data to. -// -// This invariant will be kept: len(s) <= availWrite() -func (dd *dictDecoder) writeSlice() []byte { - return dd.hist[dd.wrPos:] -} - -// writeMark advances the writer pointer by cnt. -// -// This invariant must be kept: 0 <= cnt <= availWrite() -func (dd *dictDecoder) writeMark(cnt int) { - dd.wrPos += cnt -} - -// writeByte writes a single byte to the dictionary. -// -// This invariant must be kept: 0 < availWrite() -func (dd *dictDecoder) writeByte(c byte) { - dd.hist[dd.wrPos] = c - dd.wrPos++ -} - -// writeCopy copies a string at a given (dist, length) to the output. -// This returns the number of bytes copied and may be less than the requested -// length if the available space in the output buffer is too small. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) writeCopy(dist, length int) int { - dstBase := dd.wrPos - dstPos := dstBase - srcPos := dstPos - dist - endPos := dstPos + length - if endPos > len(dd.hist) { - endPos = len(dd.hist) - } - - // Copy non-overlapping section after destination position. - // - // This section is non-overlapping in that the copy length for this section - // is always less than or equal to the backwards distance. This can occur - // if a distance refers to data that wraps-around in the buffer. - // Thus, a backwards copy is performed here; that is, the exact bytes in - // the source prior to the copy is placed in the destination. - if srcPos < 0 { - srcPos += len(dd.hist) - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) - srcPos = 0 - } - - // Copy possibly overlapping section before destination position. - // - // This section can overlap if the copy length for this section is larger - // than the backwards distance. This is allowed by LZ77 so that repeated - // strings can be succinctly represented using (dist, length) pairs. - // Thus, a forwards copy is performed here; that is, the bytes copied is - // possibly dependent on the resulting bytes in the destination as the copy - // progresses along. This is functionally equivalent to the following: - // - // for i := 0; i < endPos-dstPos; i++ { - // dd.hist[dstPos+i] = dd.hist[srcPos+i] - // } - // dstPos = endPos - // - for dstPos < endPos { - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// tryWriteCopy tries to copy a string at a given (distance, length) to the -// output. This specialized version is optimized for short distances. -// -// This method is designed to be inlined for performance reasons. -// -// This invariant must be kept: 0 < dist <= histSize() -func (dd *dictDecoder) tryWriteCopy(dist, length int) int { - dstPos := dd.wrPos - endPos := dstPos + length - if dstPos < dist || endPos > len(dd.hist) { - return 0 - } - dstBase := dstPos - srcPos := dstPos - dist - - // Copy possibly overlapping section before destination position. -loop: - dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) - if dstPos < endPos { - goto loop // Avoid for-loop so that this function can be inlined - } - - dd.wrPos = dstPos - return dstPos - dstBase -} - -// readFlush returns a slice of the historical buffer that is ready to be -// emitted to the user. The data returned by readFlush must be fully consumed -// before calling any other dictDecoder methods. -func (dd *dictDecoder) readFlush() []byte { - toRead := dd.hist[dd.rdPos:dd.wrPos] - dd.rdPos = dd.wrPos - if dd.wrPos == len(dd.hist) { - dd.wrPos, dd.rdPos = 0, 0 - dd.full = true - } - return toRead -} diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go deleted file mode 100644 index 24caf5f7..00000000 --- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go +++ /dev/null @@ -1,216 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Modified for deflate by Klaus Post (c) 2015. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "encoding/binary" - "fmt" - "math/bits" -) - -type fastEnc interface { - Encode(dst *tokens, src []byte) - Reset() -} - -func newFastEnc(level int) fastEnc { - switch level { - case 1: - return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}} - case 2: - return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}} - case 3: - return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}} - case 4: - return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}} - case 5: - return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}} - case 6: - return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}} - default: - panic("invalid level specified") - } -} - -const ( - tableBits = 15 // Bits used in the table - tableSize = 1 << tableBits // Size of the table - tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. - baseMatchOffset = 1 // The smallest match offset - baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 - maxMatchOffset = 1 << 15 // The largest match offset - - bTableBits = 17 // Bits used in the big tables - bTableSize = 1 << bTableBits // Size of the table - allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history. - bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. -) - -const ( - prime3bytes = 506832829 - prime4bytes = 2654435761 - prime5bytes = 889523592379 - prime6bytes = 227718039650203 - prime7bytes = 58295818150454627 - prime8bytes = 0xcf1bbcdcb7a56463 -) - -func load3232(b []byte, i int32) uint32 { - return binary.LittleEndian.Uint32(b[i:]) -} - -func load6432(b []byte, i int32) uint64 { - return binary.LittleEndian.Uint64(b[i:]) -} - -type tableEntry struct { - offset int32 -} - -// fastGen maintains the table for matches, -// and the previous byte block for level 2. -// This is the generic implementation. -type fastGen struct { - hist []byte - cur int32 -} - -func (e *fastGen) addBlock(src []byte) int32 { - // check if we have space already - if len(e.hist)+len(src) > cap(e.hist) { - if cap(e.hist) == 0 { - e.hist = make([]byte, 0, allocHistory) - } else { - if cap(e.hist) < maxMatchOffset*2 { - panic("unexpected buffer size") - } - // Move down - offset := int32(len(e.hist)) - maxMatchOffset - // copy(e.hist[0:maxMatchOffset], e.hist[offset:]) - *(*[maxMatchOffset]byte)(e.hist) = *(*[maxMatchOffset]byte)(e.hist[offset:]) - e.cur += offset - e.hist = e.hist[:maxMatchOffset] - } - } - s := int32(len(e.hist)) - e.hist = append(e.hist, src...) - return s -} - -type tableEntryPrev struct { - Cur tableEntry - Prev tableEntry -} - -// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash7(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) -} - -// hashLen returns a hash of the lowest mls bytes of with length output bits. -// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. -// length should always be < 32. -// Preferably length and mls should be a constant for inlining. -func hashLen(u uint64, length, mls uint8) uint32 { - switch mls { - case 3: - return (uint32(u<<8) * prime3bytes) >> (32 - length) - case 5: - return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) - case 6: - return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) - case 7: - return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) - case 8: - return uint32((u * prime8bytes) >> (64 - length)) - default: - return (uint32(u) * prime4bytes) >> (32 - length) - } -} - -// matchlen will return the match length between offsets and t in src. -// The maximum length returned is maxMatchLength - 4. -// It is assumed that s > t, that t >=0 and s < len(src). -func (e *fastGen) matchlen(s, t int32, src []byte) int32 { - if debugDecode { - if t >= s { - panic(fmt.Sprint("t >=s:", t, s)) - } - if int(s) >= len(src) { - panic(fmt.Sprint("s >= len(src):", s, len(src))) - } - if t < 0 { - panic(fmt.Sprint("t < 0:", t)) - } - if s-t > maxMatchOffset { - panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) - } - } - s1 := int(s) + maxMatchLength - 4 - if s1 > len(src) { - s1 = len(src) - } - - // Extend the match to be as long as possible. - return int32(matchLen(src[s:s1], src[t:])) -} - -// matchlenLong will return the match length between offsets and t in src. -// It is assumed that s > t, that t >=0 and s < len(src). -func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { - if debugDeflate { - if t >= s { - panic(fmt.Sprint("t >=s:", t, s)) - } - if int(s) >= len(src) { - panic(fmt.Sprint("s >= len(src):", s, len(src))) - } - if t < 0 { - panic(fmt.Sprint("t < 0:", t)) - } - if s-t > maxMatchOffset { - panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) - } - } - // Extend the match to be as long as possible. - return int32(matchLen(src[s:], src[t:])) -} - -// Reset the encoding table. -func (e *fastGen) Reset() { - if cap(e.hist) < allocHistory { - e.hist = make([]byte, 0, allocHistory) - } - // We offset current position so everything will be out of reach. - // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. - if e.cur <= bufferReset { - e.cur += maxMatchOffset + int32(len(e.hist)) - } - e.hist = e.hist[:0] -} - -// matchLen returns the maximum length. -// 'a' must be the shortest of the two. -func matchLen(a, b []byte) int { - var checked int - - for len(a) >= 8 { - if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { - return checked + (bits.TrailingZeros64(diff) >> 3) - } - checked += 8 - a = a[8:] - b = b[8:] - } - b = b[:len(a)] - for i := range a { - if a[i] != b[i] { - return i + checked - } - } - return len(a) + checked -} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go deleted file mode 100644 index f70594c3..00000000 --- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go +++ /dev/null @@ -1,1182 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "encoding/binary" - "fmt" - "io" - "math" -) - -const ( - // The largest offset code. - offsetCodeCount = 30 - - // The special code used to mark the end of a block. - endBlockMarker = 256 - - // The first length code. - lengthCodesStart = 257 - - // The number of codegen codes. - codegenCodeCount = 19 - badCode = 255 - - // maxPredefinedTokens is the maximum number of tokens - // where we check if fixed size is smaller. - maxPredefinedTokens = 250 - - // bufferFlushSize indicates the buffer size - // after which bytes are flushed to the writer. - // Should preferably be a multiple of 6, since - // we accumulate 6 bytes between writes to the buffer. - bufferFlushSize = 246 -) - -// Minimum length code that emits bits. -const lengthExtraBitsMinCode = 8 - -// The number of extra bits needed by length code X - LENGTH_CODES_START. -var lengthExtraBits = [32]uint8{ - /* 257 */ 0, 0, 0, - /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, - /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, - /* 280 */ 4, 5, 5, 5, 5, 0, -} - -// The length indicated by length code X - LENGTH_CODES_START. -var lengthBase = [32]uint8{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, - 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, - 64, 80, 96, 112, 128, 160, 192, 224, 255, -} - -// Minimum offset code that emits bits. -const offsetExtraBitsMinCode = 4 - -// offset code word extra bits. -var offsetExtraBits = [32]int8{ - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, - 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, - /* extended window */ - 14, 14, -} - -var offsetCombined = [32]uint32{} - -func init() { - var offsetBase = [32]uint32{ - /* normal deflate */ - 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, - 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, - 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, - 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, - 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, - 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, - - /* extended window */ - 0x008000, 0x00c000, - } - - for i := range offsetCombined[:] { - // Don't use extended window values... - if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 { - continue - } - offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8) - } -} - -// The odd order in which the codegen code sizes are written. -var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} - -type huffmanBitWriter struct { - // writer is the underlying writer. - // Do not use it directly; use the write method, which ensures - // that Write errors are sticky. - writer io.Writer - - // Data waiting to be written is bytes[0:nbytes] - // and then the low nbits of bits. - bits uint64 - nbits uint8 - nbytes uint8 - lastHuffMan bool - literalEncoding *huffmanEncoder - tmpLitEncoding *huffmanEncoder - offsetEncoding *huffmanEncoder - codegenEncoding *huffmanEncoder - err error - lastHeader int - // Set between 0 (reused block can be up to 2x the size) - logNewTablePenalty uint - bytes [256 + 8]byte - literalFreq [lengthCodesStart + 32]uint16 - offsetFreq [32]uint16 - codegenFreq [codegenCodeCount]uint16 - - // codegen must have an extra space for the final symbol. - codegen [literalCount + offsetCodeCount + 1]uint8 -} - -// Huffman reuse. -// -// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections. -// -// This is controlled by several variables: -// -// If lastHeader is non-zero the Huffman table can be reused. -// This also indicates that a Huffman table has been generated that can output all -// possible symbols. -// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated -// an EOB with the previous table must be written. -// -// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. -// -// An incoming block estimates the output size of a new table using a 'fresh' by calculating the -// optimal size and adding a penalty in 'logNewTablePenalty'. -// A Huffman table is not optimal, which is why we add a penalty, and generating a new table -// is slower both for compression and decompression. - -func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { - return &huffmanBitWriter{ - writer: w, - literalEncoding: newHuffmanEncoder(literalCount), - tmpLitEncoding: newHuffmanEncoder(literalCount), - codegenEncoding: newHuffmanEncoder(codegenCodeCount), - offsetEncoding: newHuffmanEncoder(offsetCodeCount), - } -} - -func (w *huffmanBitWriter) reset(writer io.Writer) { - w.writer = writer - w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil - w.lastHeader = 0 - w.lastHuffMan = false -} - -func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { - a := t.offHist[:offsetCodeCount] - b := w.offsetEncoding.codes - b = b[:len(a)] - for i, v := range a { - if v != 0 && b[i].zero() { - return false - } - } - - a = t.extraHist[:literalCount-256] - b = w.literalEncoding.codes[256:literalCount] - b = b[:len(a)] - for i, v := range a { - if v != 0 && b[i].zero() { - return false - } - } - - a = t.litHist[:256] - b = w.literalEncoding.codes[:len(a)] - for i, v := range a { - if v != 0 && b[i].zero() { - return false - } - } - return true -} - -func (w *huffmanBitWriter) flush() { - if w.err != nil { - w.nbits = 0 - return - } - if w.lastHeader > 0 { - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - n := w.nbytes - for w.nbits != 0 { - w.bytes[n] = byte(w.bits) - w.bits >>= 8 - if w.nbits > 8 { // Avoid underflow - w.nbits -= 8 - } else { - w.nbits = 0 - } - n++ - } - w.bits = 0 - w.write(w.bytes[:n]) - w.nbytes = 0 -} - -func (w *huffmanBitWriter) write(b []byte) { - if w.err != nil { - return - } - _, w.err = w.writer.Write(b) -} - -func (w *huffmanBitWriter) writeBits(b int32, nb uint8) { - w.bits |= uint64(b) << (w.nbits & 63) - w.nbits += nb - if w.nbits >= 48 { - w.writeOutBits() - } -} - -func (w *huffmanBitWriter) writeBytes(bytes []byte) { - if w.err != nil { - return - } - n := w.nbytes - if w.nbits&7 != 0 { - w.err = InternalError("writeBytes with unfinished bits") - return - } - for w.nbits != 0 { - w.bytes[n] = byte(w.bits) - w.bits >>= 8 - w.nbits -= 8 - n++ - } - if n != 0 { - w.write(w.bytes[:n]) - } - w.nbytes = 0 - w.write(bytes) -} - -// RFC 1951 3.2.7 specifies a special run-length encoding for specifying -// the literal and offset lengths arrays (which are concatenated into a single -// array). This method generates that run-length encoding. -// -// The result is written into the codegen array, and the frequencies -// of each code is written into the codegenFreq array. -// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional -// information. Code badCode is an end marker -// -// numLiterals The number of literals in literalEncoding -// numOffsets The number of offsets in offsetEncoding -// litenc, offenc The literal and offset encoder to use -func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { - for i := range w.codegenFreq { - w.codegenFreq[i] = 0 - } - // Note that we are using codegen both as a temporary variable for holding - // a copy of the frequencies, and as the place where we put the result. - // This is fine because the output is always shorter than the input used - // so far. - codegen := w.codegen[:] // cache - // Copy the concatenated code sizes to codegen. Put a marker at the end. - cgnl := codegen[:numLiterals] - for i := range cgnl { - cgnl[i] = litEnc.codes[i].len() - } - - cgnl = codegen[numLiterals : numLiterals+numOffsets] - for i := range cgnl { - cgnl[i] = offEnc.codes[i].len() - } - codegen[numLiterals+numOffsets] = badCode - - size := codegen[0] - count := 1 - outIndex := 0 - for inIndex := 1; size != badCode; inIndex++ { - // INVARIANT: We have seen "count" copies of size that have not yet - // had output generated for them. - nextSize := codegen[inIndex] - if nextSize == size { - count++ - continue - } - // We need to generate codegen indicating "count" of size. - if size != 0 { - codegen[outIndex] = size - outIndex++ - w.codegenFreq[size]++ - count-- - for count >= 3 { - n := 6 - if n > count { - n = count - } - codegen[outIndex] = 16 - outIndex++ - codegen[outIndex] = uint8(n - 3) - outIndex++ - w.codegenFreq[16]++ - count -= n - } - } else { - for count >= 11 { - n := 138 - if n > count { - n = count - } - codegen[outIndex] = 18 - outIndex++ - codegen[outIndex] = uint8(n - 11) - outIndex++ - w.codegenFreq[18]++ - count -= n - } - if count >= 3 { - // count >= 3 && count <= 10 - codegen[outIndex] = 17 - outIndex++ - codegen[outIndex] = uint8(count - 3) - outIndex++ - w.codegenFreq[17]++ - count = 0 - } - } - count-- - for ; count >= 0; count-- { - codegen[outIndex] = size - outIndex++ - w.codegenFreq[size]++ - } - // Set up invariant for next time through the loop. - size = nextSize - count = 1 - } - // Marker indicating the end of the codegen. - codegen[outIndex] = badCode -} - -func (w *huffmanBitWriter) codegens() int { - numCodegens := len(w.codegenFreq) - for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { - numCodegens-- - } - return numCodegens -} - -func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { - numCodegens = len(w.codegenFreq) - for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { - numCodegens-- - } - return 3 + 5 + 5 + 4 + (3 * numCodegens) + - w.codegenEncoding.bitLength(w.codegenFreq[:]) + - int(w.codegenFreq[16])*2 + - int(w.codegenFreq[17])*3 + - int(w.codegenFreq[18])*7, numCodegens -} - -// dynamicSize returns the size of dynamically encoded data in bits. -func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { - size = litEnc.bitLength(w.literalFreq[:]) + - offEnc.bitLength(w.offsetFreq[:]) - return size -} - -// dynamicSize returns the size of dynamically encoded data in bits. -func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { - header, numCodegens := w.headerSize() - size = header + - litEnc.bitLength(w.literalFreq[:]) + - offEnc.bitLength(w.offsetFreq[:]) + - extraBits - return size, numCodegens -} - -// extraBitSize will return the number of bits that will be written -// as "extra" bits on matches. -func (w *huffmanBitWriter) extraBitSize() int { - total := 0 - for i, n := range w.literalFreq[257:literalCount] { - total += int(n) * int(lengthExtraBits[i&31]) - } - for i, n := range w.offsetFreq[:offsetCodeCount] { - total += int(n) * int(offsetExtraBits[i&31]) - } - return total -} - -// fixedSize returns the size of dynamically encoded data in bits. -func (w *huffmanBitWriter) fixedSize(extraBits int) int { - return 3 + - fixedLiteralEncoding.bitLength(w.literalFreq[:]) + - fixedOffsetEncoding.bitLength(w.offsetFreq[:]) + - extraBits -} - -// storedSize calculates the stored size, including header. -// The function returns the size in bits and whether the block -// fits inside a single block. -func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { - if in == nil { - return 0, false - } - if len(in) <= maxStoreBlockSize { - return (len(in) + 5) * 8, true - } - return 0, false -} - -func (w *huffmanBitWriter) writeCode(c hcode) { - // The function does not get inlined if we "& 63" the shift. - w.bits |= c.code64() << (w.nbits & 63) - w.nbits += c.len() - if w.nbits >= 48 { - w.writeOutBits() - } -} - -// writeOutBits will write bits to the buffer. -func (w *huffmanBitWriter) writeOutBits() { - bits := w.bits - w.bits >>= 48 - w.nbits -= 48 - n := w.nbytes - - // We over-write, but faster... - binary.LittleEndian.PutUint64(w.bytes[n:], bits) - n += 6 - - if n >= bufferFlushSize { - if w.err != nil { - n = 0 - return - } - w.write(w.bytes[:n]) - n = 0 - } - - w.nbytes = n -} - -// Write the header of a dynamic Huffman block to the output stream. -// -// numLiterals The number of literals specified in codegen -// numOffsets The number of offsets specified in codegen -// numCodegens The number of codegens used in codegen -func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { - if w.err != nil { - return - } - var firstBits int32 = 4 - if isEof { - firstBits = 5 - } - w.writeBits(firstBits, 3) - w.writeBits(int32(numLiterals-257), 5) - w.writeBits(int32(numOffsets-1), 5) - w.writeBits(int32(numCodegens-4), 4) - - for i := 0; i < numCodegens; i++ { - value := uint(w.codegenEncoding.codes[codegenOrder[i]].len()) - w.writeBits(int32(value), 3) - } - - i := 0 - for { - var codeWord = uint32(w.codegen[i]) - i++ - if codeWord == badCode { - break - } - w.writeCode(w.codegenEncoding.codes[codeWord]) - - switch codeWord { - case 16: - w.writeBits(int32(w.codegen[i]), 2) - i++ - case 17: - w.writeBits(int32(w.codegen[i]), 3) - i++ - case 18: - w.writeBits(int32(w.codegen[i]), 7) - i++ - } - } -} - -// writeStoredHeader will write a stored header. -// If the stored block is only used for EOF, -// it is replaced with a fixed huffman block. -func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { - if w.err != nil { - return - } - if w.lastHeader > 0 { - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - - // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. - if length == 0 && isEof { - w.writeFixedHeader(isEof) - // EOB: 7 bits, value: 0 - w.writeBits(0, 7) - w.flush() - return - } - - var flag int32 - if isEof { - flag = 1 - } - w.writeBits(flag, 3) - w.flush() - w.writeBits(int32(length), 16) - w.writeBits(int32(^uint16(length)), 16) -} - -func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { - if w.err != nil { - return - } - if w.lastHeader > 0 { - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - - // Indicate that we are a fixed Huffman block - var value int32 = 2 - if isEof { - value = 3 - } - w.writeBits(value, 3) -} - -// writeBlock will write a block of tokens with the smallest encoding. -// The original input can be supplied, and if the huffman encoded data -// is larger than the original bytes, the data will be written as a -// stored block. -// If the input is nil, the tokens will always be Huffman encoded. -func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { - if w.err != nil { - return - } - - tokens.AddEOB() - if w.lastHeader > 0 { - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - numLiterals, numOffsets := w.indexTokens(tokens, false) - w.generate() - var extraBits int - storedSize, storable := w.storedSize(input) - if storable { - extraBits = w.extraBitSize() - } - - // Figure out smallest code. - // Fixed Huffman baseline. - var literalEncoding = fixedLiteralEncoding - var offsetEncoding = fixedOffsetEncoding - var size = math.MaxInt32 - if tokens.n < maxPredefinedTokens { - size = w.fixedSize(extraBits) - } - - // Dynamic Huffman? - var numCodegens int - - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literalEncoding and the offsetEncoding. - w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) - w.codegenEncoding.generate(w.codegenFreq[:], 7) - dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) - - if dynamicSize < size { - size = dynamicSize - literalEncoding = w.literalEncoding - offsetEncoding = w.offsetEncoding - } - - // Stored bytes? - if storable && storedSize <= size { - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - - // Huffman. - if literalEncoding == fixedLiteralEncoding { - w.writeFixedHeader(eof) - } else { - w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - } - - // Write the tokens. - w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes) -} - -// writeBlockDynamic encodes a block using a dynamic Huffman table. -// This should be used if the symbols used have a disproportionate -// histogram distribution. -// If input is supplied and the compression savings are below 1/16th of the -// input size the block is stored. -func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) { - if w.err != nil { - return - } - - sync = sync || eof - if sync { - tokens.AddEOB() - } - - // We cannot reuse pure huffman table, and must mark as EOF. - if (w.lastHuffMan || eof) && w.lastHeader > 0 { - // We will not try to reuse. - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - w.lastHuffMan = false - } - - // fillReuse enables filling of empty values. - // This will make encodings always reusable without testing. - // However, this does not appear to benefit on most cases. - const fillReuse = false - - // Check if we can reuse... - if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) { - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } - - numLiterals, numOffsets := w.indexTokens(tokens, !sync) - extraBits := 0 - ssize, storable := w.storedSize(input) - - const usePrefs = true - if storable || w.lastHeader > 0 { - extraBits = w.extraBitSize() - } - - var size int - - // Check if we should reuse. - if w.lastHeader > 0 { - // Estimate size for using a new table. - // Use the previous header size as the best estimate. - newSize := w.lastHeader + tokens.EstimatedBits() - newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty - - // The estimated size is calculated as an optimal table. - // We add a penalty to make it more realistic and re-use a bit more. - reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits - - // Check if a new table is better. - if newSize < reuseSize { - // Write the EOB we owe. - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - size = newSize - w.lastHeader = 0 - } else { - size = reuseSize - } - - if tokens.n < maxPredefinedTokens { - if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size { - // Check if we get a reasonable size decrease. - if storable && ssize <= size { - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - w.writeFixedHeader(eof) - if !sync { - tokens.AddEOB() - } - w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) - return - } - } - // Check if we get a reasonable size decrease. - if storable && ssize <= size { - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - } - - // We want a new block/table - if w.lastHeader == 0 { - if fillReuse && !sync { - w.fillTokens() - numLiterals, numOffsets = maxNumLit, maxNumDist - } else { - w.literalFreq[endBlockMarker] = 1 - } - - w.generate() - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literalEncoding and the offsetEncoding. - w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) - w.codegenEncoding.generate(w.codegenFreq[:], 7) - - var numCodegens int - if fillReuse && !sync { - // Reindex for accurate size... - w.indexTokens(tokens, true) - } - size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) - - // Store predefined, if we don't get a reasonable improvement. - if tokens.n < maxPredefinedTokens { - if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size { - // Store bytes, if we don't get an improvement. - if storable && ssize <= preSize { - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - w.writeFixedHeader(eof) - if !sync { - tokens.AddEOB() - } - w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) - return - } - } - - if storable && ssize <= size { - // Store bytes, if we don't get an improvement. - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - - // Write Huffman table. - w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - if !sync { - w.lastHeader, _ = w.headerSize() - } - w.lastHuffMan = false - } - - if sync { - w.lastHeader = 0 - } - // Write the tokens. - w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) -} - -func (w *huffmanBitWriter) fillTokens() { - for i, v := range w.literalFreq[:literalCount] { - if v == 0 { - w.literalFreq[i] = 1 - } - } - for i, v := range w.offsetFreq[:offsetCodeCount] { - if v == 0 { - w.offsetFreq[i] = 1 - } - } -} - -// indexTokens indexes a slice of tokens, and updates -// literalFreq and offsetFreq, and generates literalEncoding -// and offsetEncoding. -// The number of literal and offset tokens is returned. -func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { - //copy(w.literalFreq[:], t.litHist[:]) - *(*[256]uint16)(w.literalFreq[:]) = t.litHist - //copy(w.literalFreq[256:], t.extraHist[:]) - *(*[32]uint16)(w.literalFreq[256:]) = t.extraHist - w.offsetFreq = t.offHist - - if t.n == 0 { - return - } - if filled { - return maxNumLit, maxNumDist - } - // get the number of literals - numLiterals = len(w.literalFreq) - for w.literalFreq[numLiterals-1] == 0 { - numLiterals-- - } - // get the number of offsets - numOffsets = len(w.offsetFreq) - for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { - numOffsets-- - } - if numOffsets == 0 { - // We haven't found a single match. If we want to go with the dynamic encoding, - // we should count at least one offset to be sure that the offset huffman tree could be encoded. - w.offsetFreq[0] = 1 - numOffsets = 1 - } - return -} - -func (w *huffmanBitWriter) generate() { - w.literalEncoding.generate(w.literalFreq[:literalCount], 15) - w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) -} - -// writeTokens writes a slice of tokens to the output. -// codes for literal and offset encoding must be supplied. -func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { - if w.err != nil { - return - } - if len(tokens) == 0 { - return - } - - // Only last token should be endBlockMarker. - var deferEOB bool - if tokens[len(tokens)-1] == endBlockMarker { - tokens = tokens[:len(tokens)-1] - deferEOB = true - } - - // Create slices up to the next power of two to avoid bounds checks. - lits := leCodes[:256] - offs := oeCodes[:32] - lengths := leCodes[lengthCodesStart:] - lengths = lengths[:32] - - // Go 1.16 LOVES having these on stack. - bits, nbits, nbytes := w.bits, w.nbits, w.nbytes - - for _, t := range tokens { - if t < 256 { - //w.writeCode(lits[t.literal()]) - c := lits[t] - bits |= c.code64() << (nbits & 63) - nbits += c.len() - if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) - //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - continue - } - - // Write the length - length := t.length() - lengthCode := lengthCode(length) & 31 - if false { - w.writeCode(lengths[lengthCode]) - } else { - // inlined - c := lengths[lengthCode] - bits |= c.code64() << (nbits & 63) - nbits += c.len() - if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) - //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - } - - if lengthCode >= lengthExtraBitsMinCode { - extraLengthBits := lengthExtraBits[lengthCode] - //w.writeBits(extraLength, extraLengthBits) - extraLength := int32(length - lengthBase[lengthCode]) - bits |= uint64(extraLength) << (nbits & 63) - nbits += extraLengthBits - if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) - //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - } - // Write the offset - offset := t.offset() - offsetCode := (offset >> 16) & 31 - if false { - w.writeCode(offs[offsetCode]) - } else { - // inlined - c := offs[offsetCode] - bits |= c.code64() << (nbits & 63) - nbits += c.len() - if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) - //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - } - - if offsetCode >= offsetExtraBitsMinCode { - offsetComb := offsetCombined[offsetCode] - //w.writeBits(extraOffset, extraOffsetBits) - bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63) - nbits += uint8(offsetComb) - if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) - //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - } - } - // Restore... - w.bits, w.nbits, w.nbytes = bits, nbits, nbytes - - if deferEOB { - w.writeCode(leCodes[endBlockMarker]) - } -} - -// huffOffset is a static offset encoder used for huffman only encoding. -// It can be reused since we will not be encoding offset values. -var huffOffset *huffmanEncoder - -func init() { - w := newHuffmanBitWriter(nil) - w.offsetFreq[0] = 1 - huffOffset = newHuffmanEncoder(offsetCodeCount) - huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15) -} - -// writeBlockHuff encodes a block of bytes as either -// Huffman encoded literals or uncompressed bytes if the -// results only gains very little from compression. -func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { - if w.err != nil { - return - } - - // Clear histogram - for i := range w.literalFreq[:] { - w.literalFreq[i] = 0 - } - if !w.lastHuffMan { - for i := range w.offsetFreq[:] { - w.offsetFreq[i] = 0 - } - } - - const numLiterals = endBlockMarker + 1 - const numOffsets = 1 - - // Add everything as literals - // We have to estimate the header size. - // Assume header is around 70 bytes: - // https://stackoverflow.com/a/25454430 - const guessHeaderSizeBits = 70 * 8 - histogram(input, w.literalFreq[:numLiterals]) - ssize, storable := w.storedSize(input) - if storable && len(input) > 1024 { - // Quick check for incompressible content. - abs := float64(0) - avg := float64(len(input)) / 256 - max := float64(len(input) * 2) - for _, v := range w.literalFreq[:256] { - diff := float64(v) - avg - abs += diff * diff - if abs > max { - break - } - } - if abs < max { - if debugDeflate { - fmt.Println("stored", abs, "<", max) - } - // No chance we can compress this... - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - } - w.literalFreq[endBlockMarker] = 1 - w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) - estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) - if estBits < math.MaxInt32 { - estBits += w.lastHeader - if w.lastHeader == 0 { - estBits += guessHeaderSizeBits - } - estBits += estBits >> w.logNewTablePenalty - } - - // Store bytes, if we don't get a reasonable improvement. - if storable && ssize <= estBits { - if debugDeflate { - fmt.Println("stored,", ssize, "<=", estBits) - } - w.writeStoredHeader(len(input), eof) - w.writeBytes(input) - return - } - - if w.lastHeader > 0 { - reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256]) - - if estBits < reuseSize { - if debugDeflate { - fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes") - } - // We owe an EOB - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - } else if debugDeflate { - fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) - } - } - - count := 0 - if w.lastHeader == 0 { - // Use the temp encoding, so swap. - w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literalEncoding and the offsetEncoding. - w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) - w.codegenEncoding.generate(w.codegenFreq[:], 7) - numCodegens := w.codegens() - - // Huffman. - w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - w.lastHuffMan = true - w.lastHeader, _ = w.headerSize() - if debugDeflate { - count += w.lastHeader - fmt.Println("header:", count/8) - } - } - - encoding := w.literalEncoding.codes[:256] - // Go 1.16 LOVES having these on stack. At least 1.5x the speed. - bits, nbits, nbytes := w.bits, w.nbits, w.nbytes - - if debugDeflate { - count -= int(nbytes)*8 + int(nbits) - } - // Unroll, write 3 codes/loop. - // Fastest number of unrolls. - for len(input) > 3 { - // We must have at least 48 bits free. - if nbits >= 8 { - n := nbits >> 3 - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) - bits >>= (n * 8) & 63 - nbits -= n * 8 - nbytes += n - } - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - if debugDeflate { - count += int(nbytes) * 8 - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - a, b := encoding[input[0]], encoding[input[1]] - bits |= a.code64() << (nbits & 63) - bits |= b.code64() << ((nbits + a.len()) & 63) - c := encoding[input[2]] - nbits += b.len() + a.len() - bits |= c.code64() << (nbits & 63) - nbits += c.len() - input = input[3:] - } - - // Remaining... - for _, t := range input { - if nbits >= 48 { - binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) - //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits - bits >>= 48 - nbits -= 48 - nbytes += 6 - if nbytes >= bufferFlushSize { - if w.err != nil { - nbytes = 0 - return - } - if debugDeflate { - count += int(nbytes) * 8 - } - _, w.err = w.writer.Write(w.bytes[:nbytes]) - nbytes = 0 - } - } - // Bitwriting inlined, ~30% speedup - c := encoding[t] - bits |= c.code64() << (nbits & 63) - - nbits += c.len() - if debugDeflate { - count += int(c.len()) - } - } - // Restore... - w.bits, w.nbits, w.nbytes = bits, nbits, nbytes - - if debugDeflate { - nb := count + int(nbytes)*8 + int(nbits) - fmt.Println("wrote", nb, "bits,", nb/8, "bytes.") - } - // Flush if needed to have space. - if w.nbits >= 48 { - w.writeOutBits() - } - - if eof || sync { - w.writeCode(w.literalEncoding.codes[endBlockMarker]) - w.lastHeader = 0 - w.lastHuffMan = false - } -} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go deleted file mode 100644 index be7b58b4..00000000 --- a/vendor/github.com/klauspost/compress/flate/huffman_code.go +++ /dev/null @@ -1,417 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "math" - "math/bits" -) - -const ( - maxBitsLimit = 16 - // number of valid literals - literalCount = 286 -) - -// hcode is a huffman code with a bit code and bit length. -type hcode uint32 - -func (h hcode) len() uint8 { - return uint8(h) -} - -func (h hcode) code64() uint64 { - return uint64(h >> 8) -} - -func (h hcode) zero() bool { - return h == 0 -} - -type huffmanEncoder struct { - codes []hcode - bitCount [17]int32 - - // Allocate a reusable buffer with the longest possible frequency table. - // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. - // The largest of these is literalCount, so we allocate for that case. - freqcache [literalCount + 1]literalNode -} - -type literalNode struct { - literal uint16 - freq uint16 -} - -// A levelInfo describes the state of the constructed tree for a given depth. -type levelInfo struct { - // Our level. for better printing - level int32 - - // The frequency of the last node at this level - lastFreq int32 - - // The frequency of the next character to add to this level - nextCharFreq int32 - - // The frequency of the next pair (from level below) to add to this level. - // Only valid if the "needed" value of the next lower level is 0. - nextPairFreq int32 - - // The number of chains remaining to generate for this level before moving - // up to the next level - needed int32 -} - -// set sets the code and length of an hcode. -func (h *hcode) set(code uint16, length uint8) { - *h = hcode(length) | (hcode(code) << 8) -} - -func newhcode(code uint16, length uint8) hcode { - return hcode(length) | (hcode(code) << 8) -} - -func reverseBits(number uint16, bitLength byte) uint16 { - return bits.Reverse16(number << ((16 - bitLength) & 15)) -} - -func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} } - -func newHuffmanEncoder(size int) *huffmanEncoder { - // Make capacity to next power of two. - c := uint(bits.Len32(uint32(size - 1))) - return &huffmanEncoder{codes: make([]hcode, size, 1<= 3 -// The cases of 0, 1, and 2 literals are handled by special case code. -// -// list An array of the literals with non-zero frequencies -// -// and their associated frequencies. The array is in order of increasing -// frequency, and has as its last element a special element with frequency -// MaxInt32 -// -// maxBits The maximum number of bits that should be used to encode any literal. -// -// Must be less than 16. -// -// return An integer array in which array[i] indicates the number of literals -// -// that should be encoded in i bits. -func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { - if maxBits >= maxBitsLimit { - panic("flate: maxBits too large") - } - n := int32(len(list)) - list = list[0 : n+1] - list[n] = maxNode() - - // The tree can't have greater depth than n - 1, no matter what. This - // saves a little bit of work in some small cases - if maxBits > n-1 { - maxBits = n - 1 - } - - // Create information about each of the levels. - // A bogus "Level 0" whose sole purpose is so that - // level1.prev.needed==0. This makes level1.nextPairFreq - // be a legitimate value that never gets chosen. - var levels [maxBitsLimit]levelInfo - // leafCounts[i] counts the number of literals at the left - // of ancestors of the rightmost node at level i. - // leafCounts[i][j] is the number of literals at the left - // of the level j ancestor. - var leafCounts [maxBitsLimit][maxBitsLimit]int32 - - // Descending to only have 1 bounds check. - l2f := int32(list[2].freq) - l1f := int32(list[1].freq) - l0f := int32(list[0].freq) + int32(list[1].freq) - - for level := int32(1); level <= maxBits; level++ { - // For every level, the first two items are the first two characters. - // We initialize the levels as if we had already figured this out. - levels[level] = levelInfo{ - level: level, - lastFreq: l1f, - nextCharFreq: l2f, - nextPairFreq: l0f, - } - leafCounts[level][level] = 2 - if level == 1 { - levels[level].nextPairFreq = math.MaxInt32 - } - } - - // We need a total of 2*n - 2 items at top level and have already generated 2. - levels[maxBits].needed = 2*n - 4 - - level := uint32(maxBits) - for level < 16 { - l := &levels[level] - if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { - // We've run out of both leafs and pairs. - // End all calculations for this level. - // To make sure we never come back to this level or any lower level, - // set nextPairFreq impossibly large. - l.needed = 0 - levels[level+1].nextPairFreq = math.MaxInt32 - level++ - continue - } - - prevFreq := l.lastFreq - if l.nextCharFreq < l.nextPairFreq { - // The next item on this row is a leaf node. - n := leafCounts[level][level] + 1 - l.lastFreq = l.nextCharFreq - // Lower leafCounts are the same of the previous node. - leafCounts[level][level] = n - e := list[n] - if e.literal < math.MaxUint16 { - l.nextCharFreq = int32(e.freq) - } else { - l.nextCharFreq = math.MaxInt32 - } - } else { - // The next item on this row is a pair from the previous row. - // nextPairFreq isn't valid until we generate two - // more values in the level below - l.lastFreq = l.nextPairFreq - // Take leaf counts from the lower level, except counts[level] remains the same. - if true { - save := leafCounts[level][level] - leafCounts[level] = leafCounts[level-1] - leafCounts[level][level] = save - } else { - copy(leafCounts[level][:level], leafCounts[level-1][:level]) - } - levels[l.level-1].needed = 2 - } - - if l.needed--; l.needed == 0 { - // We've done everything we need to do for this level. - // Continue calculating one level up. Fill in nextPairFreq - // of that level with the sum of the two nodes we've just calculated on - // this level. - if l.level == maxBits { - // All done! - break - } - levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq - level++ - } else { - // If we stole from below, move down temporarily to replenish it. - for levels[level-1].needed > 0 { - level-- - } - } - } - - // Somethings is wrong if at the end, the top level is null or hasn't used - // all of the leaves. - if leafCounts[maxBits][maxBits] != n { - panic("leafCounts[maxBits][maxBits] != n") - } - - bitCount := h.bitCount[:maxBits+1] - bits := 1 - counts := &leafCounts[maxBits] - for level := maxBits; level > 0; level-- { - // chain.leafCount gives the number of literals requiring at least "bits" - // bits to encode. - bitCount[bits] = counts[level] - counts[level-1] - bits++ - } - return bitCount -} - -// Look at the leaves and assign them a bit count and an encoding as specified -// in RFC 1951 3.2.2 -func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { - code := uint16(0) - for n, bits := range bitCount { - code <<= 1 - if n == 0 || bits == 0 { - continue - } - // The literals list[len(list)-bits] .. list[len(list)-bits] - // are encoded using "bits" bits, and get the values - // code, code + 1, .... The code values are - // assigned in literal order (not frequency order). - chunk := list[len(list)-int(bits):] - - sortByLiteral(chunk) - for _, node := range chunk { - h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n)) - code++ - } - list = list[0 : len(list)-int(bits)] - } -} - -// Update this Huffman Code object to be the minimum code for the specified frequency count. -// -// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. -// maxBits The maximum number of bits to use for any literal. -func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { - list := h.freqcache[:len(freq)+1] - codes := h.codes[:len(freq)] - // Number of non-zero literals - count := 0 - // Set list to be the set of all non-zero literals and their frequencies - for i, f := range freq { - if f != 0 { - list[count] = literalNode{uint16(i), f} - count++ - } else { - codes[i] = 0 - } - } - list[count] = literalNode{} - - list = list[:count] - if count <= 2 { - // Handle the small cases here, because they are awkward for the general case code. With - // two or fewer literals, everything has bit length 1. - for i, node := range list { - // "list" is in order of increasing literal value. - h.codes[node.literal].set(uint16(i), 1) - } - return - } - sortByFreq(list) - - // Get the number of literals for each bit count - bitCount := h.bitCounts(list, maxBits) - // And do the assignment - h.assignEncodingAndSize(bitCount, list) -} - -// atLeastOne clamps the result between 1 and 15. -func atLeastOne(v float32) float32 { - if v < 1 { - return 1 - } - if v > 15 { - return 15 - } - return v -} - -func histogram(b []byte, h []uint16) { - if true && len(b) >= 8<<10 { - // Split for bigger inputs - histogramSplit(b, h) - } else { - h = h[:256] - for _, t := range b { - h[t]++ - } - } -} - -func histogramSplit(b []byte, h []uint16) { - // Tested, and slightly faster than 2-way. - // Writing to separate arrays and combining is also slightly slower. - h = h[:256] - for len(b)&3 != 0 { - h[b[0]]++ - b = b[1:] - } - n := len(b) / 4 - x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:] - y, z, w = y[:len(x)], z[:len(x)], w[:len(x)] - for i, t := range x { - v0 := &h[t] - v1 := &h[y[i]] - v3 := &h[w[i]] - v2 := &h[z[i]] - *v0++ - *v1++ - *v2++ - *v3++ - } -} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go deleted file mode 100644 index 6c05ba8c..00000000 --- a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// Sort sorts data. -// It makes one call to data.Len to determine n, and O(n*log(n)) calls to -// data.Less and data.Swap. The sort is not guaranteed to be stable. -func sortByFreq(data []literalNode) { - n := len(data) - quickSortByFreq(data, 0, n, maxDepth(n)) -} - -func quickSortByFreq(data []literalNode, a, b, maxDepth int) { - for b-a > 12 { // Use ShellSort for slices <= 12 elements - if maxDepth == 0 { - heapSort(data, a, b) - return - } - maxDepth-- - mlo, mhi := doPivotByFreq(data, a, b) - // Avoiding recursion on the larger subproblem guarantees - // a stack depth of at most lg(b-a). - if mlo-a < b-mhi { - quickSortByFreq(data, a, mlo, maxDepth) - a = mhi // i.e., quickSortByFreq(data, mhi, b) - } else { - quickSortByFreq(data, mhi, b, maxDepth) - b = mlo // i.e., quickSortByFreq(data, a, mlo) - } - } - if b-a > 1 { - // Do ShellSort pass with gap 6 - // It could be written in this simplified form cause b-a <= 12 - for i := a + 6; i < b; i++ { - if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { - data[i], data[i-6] = data[i-6], data[i] - } - } - insertionSortByFreq(data, a, b) - } -} - -func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { - m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. - if hi-lo > 40 { - // Tukey's ``Ninther,'' median of three medians of three. - s := (hi - lo) / 8 - medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) - medianOfThreeSortByFreq(data, m, m-s, m+s) - medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) - } - medianOfThreeSortByFreq(data, lo, m, hi-1) - - // Invariants are: - // data[lo] = pivot (set up by ChoosePivot) - // data[lo < i < a] < pivot - // data[a <= i < b] <= pivot - // data[b <= i < c] unexamined - // data[c <= i < hi-1] > pivot - // data[hi-1] >= pivot - pivot := lo - a, c := lo+1, hi-1 - - for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { - } - b := a - for { - for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot - } - for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot - } - if b >= c { - break - } - // data[b] > pivot; data[c-1] <= pivot - data[b], data[c-1] = data[c-1], data[b] - b++ - c-- - } - // If hi-c<3 then there are duplicates (by property of median of nine). - // Let's be a bit more conservative, and set border to 5. - protect := hi-c < 5 - if !protect && hi-c < (hi-lo)/4 { - // Lets test some points for equality to pivot - dups := 0 - if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot - data[c], data[hi-1] = data[hi-1], data[c] - c++ - dups++ - } - if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot - b-- - dups++ - } - // m-lo = (hi-lo)/2 > 6 - // b-lo > (hi-lo)*3/4-1 > 8 - // ==> m < b ==> data[m] <= pivot - if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot - data[m], data[b-1] = data[b-1], data[m] - b-- - dups++ - } - // if at least 2 points are equal to pivot, assume skewed distribution - protect = dups > 1 - } - if protect { - // Protect against a lot of duplicates - // Add invariant: - // data[a <= i < b] unexamined - // data[b <= i < c] = pivot - for { - for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot - } - for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot - } - if a >= b { - break - } - // data[a] == pivot; data[b-1] < pivot - data[a], data[b-1] = data[b-1], data[a] - a++ - b-- - } - } - // Swap pivot into middle - data[pivot], data[b-1] = data[b-1], data[pivot] - return b - 1, c -} - -// Insertion sort -func insertionSortByFreq(data []literalNode, a, b int) { - for i := a + 1; i < b; i++ { - for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { - data[j], data[j-1] = data[j-1], data[j] - } - } -} - -// quickSortByFreq, loosely following Bentley and McIlroy, -// ``Engineering a Sort Function,'' SP&E November 1993. - -// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. -func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { - // sort 3 elements - if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { - data[m1], data[m0] = data[m0], data[m1] - } - // data[m0] <= data[m1] - if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { - data[m2], data[m1] = data[m1], data[m2] - // data[m0] <= data[m2] && data[m1] < data[m2] - if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { - data[m1], data[m0] = data[m0], data[m1] - } - } - // now data[m0] <= data[m1] <= data[m2] -} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go deleted file mode 100644 index 93f1aea1..00000000 --- a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -// Sort sorts data. -// It makes one call to data.Len to determine n, and O(n*log(n)) calls to -// data.Less and data.Swap. The sort is not guaranteed to be stable. -func sortByLiteral(data []literalNode) { - n := len(data) - quickSort(data, 0, n, maxDepth(n)) -} - -func quickSort(data []literalNode, a, b, maxDepth int) { - for b-a > 12 { // Use ShellSort for slices <= 12 elements - if maxDepth == 0 { - heapSort(data, a, b) - return - } - maxDepth-- - mlo, mhi := doPivot(data, a, b) - // Avoiding recursion on the larger subproblem guarantees - // a stack depth of at most lg(b-a). - if mlo-a < b-mhi { - quickSort(data, a, mlo, maxDepth) - a = mhi // i.e., quickSort(data, mhi, b) - } else { - quickSort(data, mhi, b, maxDepth) - b = mlo // i.e., quickSort(data, a, mlo) - } - } - if b-a > 1 { - // Do ShellSort pass with gap 6 - // It could be written in this simplified form cause b-a <= 12 - for i := a + 6; i < b; i++ { - if data[i].literal < data[i-6].literal { - data[i], data[i-6] = data[i-6], data[i] - } - } - insertionSort(data, a, b) - } -} -func heapSort(data []literalNode, a, b int) { - first := a - lo := 0 - hi := b - a - - // Build heap with greatest element at top. - for i := (hi - 1) / 2; i >= 0; i-- { - siftDown(data, i, hi, first) - } - - // Pop elements, largest first, into end of data. - for i := hi - 1; i >= 0; i-- { - data[first], data[first+i] = data[first+i], data[first] - siftDown(data, lo, i, first) - } -} - -// siftDown implements the heap property on data[lo, hi). -// first is an offset into the array where the root of the heap lies. -func siftDown(data []literalNode, lo, hi, first int) { - root := lo - for { - child := 2*root + 1 - if child >= hi { - break - } - if child+1 < hi && data[first+child].literal < data[first+child+1].literal { - child++ - } - if data[first+root].literal > data[first+child].literal { - return - } - data[first+root], data[first+child] = data[first+child], data[first+root] - root = child - } -} -func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { - m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. - if hi-lo > 40 { - // Tukey's ``Ninther,'' median of three medians of three. - s := (hi - lo) / 8 - medianOfThree(data, lo, lo+s, lo+2*s) - medianOfThree(data, m, m-s, m+s) - medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) - } - medianOfThree(data, lo, m, hi-1) - - // Invariants are: - // data[lo] = pivot (set up by ChoosePivot) - // data[lo < i < a] < pivot - // data[a <= i < b] <= pivot - // data[b <= i < c] unexamined - // data[c <= i < hi-1] > pivot - // data[hi-1] >= pivot - pivot := lo - a, c := lo+1, hi-1 - - for ; a < c && data[a].literal < data[pivot].literal; a++ { - } - b := a - for { - for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot - } - for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot - } - if b >= c { - break - } - // data[b] > pivot; data[c-1] <= pivot - data[b], data[c-1] = data[c-1], data[b] - b++ - c-- - } - // If hi-c<3 then there are duplicates (by property of median of nine). - // Let's be a bit more conservative, and set border to 5. - protect := hi-c < 5 - if !protect && hi-c < (hi-lo)/4 { - // Lets test some points for equality to pivot - dups := 0 - if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot - data[c], data[hi-1] = data[hi-1], data[c] - c++ - dups++ - } - if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot - b-- - dups++ - } - // m-lo = (hi-lo)/2 > 6 - // b-lo > (hi-lo)*3/4-1 > 8 - // ==> m < b ==> data[m] <= pivot - if data[m].literal > data[pivot].literal { // data[m] = pivot - data[m], data[b-1] = data[b-1], data[m] - b-- - dups++ - } - // if at least 2 points are equal to pivot, assume skewed distribution - protect = dups > 1 - } - if protect { - // Protect against a lot of duplicates - // Add invariant: - // data[a <= i < b] unexamined - // data[b <= i < c] = pivot - for { - for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot - } - for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot - } - if a >= b { - break - } - // data[a] == pivot; data[b-1] < pivot - data[a], data[b-1] = data[b-1], data[a] - a++ - b-- - } - } - // Swap pivot into middle - data[pivot], data[b-1] = data[b-1], data[pivot] - return b - 1, c -} - -// Insertion sort -func insertionSort(data []literalNode, a, b int) { - for i := a + 1; i < b; i++ { - for j := i; j > a && data[j].literal < data[j-1].literal; j-- { - data[j], data[j-1] = data[j-1], data[j] - } - } -} - -// maxDepth returns a threshold at which quicksort should switch -// to heapsort. It returns 2*ceil(lg(n+1)). -func maxDepth(n int) int { - var depth int - for i := n; i > 0; i >>= 1 { - depth++ - } - return depth * 2 -} - -// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. -func medianOfThree(data []literalNode, m1, m0, m2 int) { - // sort 3 elements - if data[m1].literal < data[m0].literal { - data[m1], data[m0] = data[m0], data[m1] - } - // data[m0] <= data[m1] - if data[m2].literal < data[m1].literal { - data[m2], data[m1] = data[m1], data[m2] - // data[m0] <= data[m2] && data[m1] < data[m2] - if data[m1].literal < data[m0].literal { - data[m1], data[m0] = data[m0], data[m1] - } - } - // now data[m0] <= data[m1] <= data[m2] -} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go deleted file mode 100644 index 414c0bea..00000000 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ /dev/null @@ -1,793 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package flate implements the DEFLATE compressed data format, described in -// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file -// formats. -package flate - -import ( - "bufio" - "compress/flate" - "fmt" - "io" - "math/bits" - "sync" -) - -const ( - maxCodeLen = 16 // max length of Huffman code - maxCodeLenMask = 15 // mask for max length of Huffman code - // The next three numbers come from the RFC section 3.2.7, with the - // additional proviso in section 3.2.5 which implies that distance codes - // 30 and 31 should never occur in compressed data. - maxNumLit = 286 - maxNumDist = 30 - numCodes = 19 // number of codes in Huffman meta-code - - debugDecode = false -) - -// Value of length - 3 and extra bits. -type lengthExtra struct { - length, extra uint8 -} - -var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}} - -var bitMask32 = [32]uint32{ - 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, - 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, - 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, - 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, -} // up to 32 bits - -// Initialize the fixedHuffmanDecoder only once upon first use. -var fixedOnce sync.Once -var fixedHuffmanDecoder huffmanDecoder - -// A CorruptInputError reports the presence of corrupt input at a given offset. -type CorruptInputError = flate.CorruptInputError - -// An InternalError reports an error in the flate code itself. -type InternalError string - -func (e InternalError) Error() string { return "flate: internal error: " + string(e) } - -// A ReadError reports an error encountered while reading input. -// -// Deprecated: No longer returned. -type ReadError = flate.ReadError - -// A WriteError reports an error encountered while writing output. -// -// Deprecated: No longer returned. -type WriteError = flate.WriteError - -// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to -// to switch to a new underlying Reader. This permits reusing a ReadCloser -// instead of allocating a new one. -type Resetter interface { - // Reset discards any buffered data and resets the Resetter as if it was - // newly initialized with the given reader. - Reset(r io.Reader, dict []byte) error -} - -// The data structure for decoding Huffman tables is based on that of -// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), -// For codes smaller than the table width, there are multiple entries -// (each combination of trailing bits has the same value). For codes -// larger than the table width, the table contains a link to an overflow -// table. The width of each entry in the link table is the maximum code -// size minus the chunk width. -// -// Note that you can do a lookup in the table even without all bits -// filled. Since the extra bits are zero, and the DEFLATE Huffman codes -// have the property that shorter codes come before longer ones, the -// bit length estimate in the result is a lower bound on the actual -// number of bits. -// -// See the following: -// http://www.gzip.org/algorithm.txt - -// chunk & 15 is number of bits -// chunk >> 4 is value, including table link - -const ( - huffmanChunkBits = 9 - huffmanNumChunks = 1 << huffmanChunkBits - huffmanCountMask = 15 - huffmanValueShift = 4 -) - -type huffmanDecoder struct { - maxRead int // the maximum number of bits we can read and not overread - chunks *[huffmanNumChunks]uint16 // chunks as described above - links [][]uint16 // overflow links - linkMask uint32 // mask the width of the link table -} - -// Initialize Huffman decoding tables from array of code lengths. -// Following this function, h is guaranteed to be initialized into a complete -// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a -// degenerate case where the tree has only a single symbol with length 1. Empty -// trees are permitted. -func (h *huffmanDecoder) init(lengths []int) bool { - // Sanity enables additional runtime tests during Huffman - // table construction. It's intended to be used during - // development to supplement the currently ad-hoc unit tests. - const sanity = false - - if h.chunks == nil { - h.chunks = &[huffmanNumChunks]uint16{} - } - if h.maxRead != 0 { - *h = huffmanDecoder{chunks: h.chunks, links: h.links} - } - - // Count number of codes of each length, - // compute maxRead and max length. - var count [maxCodeLen]int - var min, max int - for _, n := range lengths { - if n == 0 { - continue - } - if min == 0 || n < min { - min = n - } - if n > max { - max = n - } - count[n&maxCodeLenMask]++ - } - - // Empty tree. The decompressor.huffSym function will fail later if the tree - // is used. Technically, an empty tree is only valid for the HDIST tree and - // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree - // is guaranteed to fail since it will attempt to use the tree to decode the - // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is - // guaranteed to fail later since the compressed data section must be - // composed of at least one symbol (the end-of-block marker). - if max == 0 { - return true - } - - code := 0 - var nextcode [maxCodeLen]int - for i := min; i <= max; i++ { - code <<= 1 - nextcode[i&maxCodeLenMask] = code - code += count[i&maxCodeLenMask] - } - - // Check that the coding is complete (i.e., that we've - // assigned all 2-to-the-max possible bit sequences). - // Exception: To be compatible with zlib, we also need to - // accept degenerate single-code codings. See also - // TestDegenerateHuffmanCoding. - if code != 1< huffmanChunkBits { - numLinks := 1 << (uint(max) - huffmanChunkBits) - h.linkMask = uint32(numLinks - 1) - - // create link tables - link := nextcode[huffmanChunkBits+1] >> 1 - if cap(h.links) < huffmanNumChunks-link { - h.links = make([][]uint16, huffmanNumChunks-link) - } else { - h.links = h.links[:huffmanNumChunks-link] - } - for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(bits.Reverse16(uint16(j))) - reverse >>= uint(16 - huffmanChunkBits) - off := j - uint(link) - if sanity && h.chunks[reverse] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[reverse] = uint16(off<>= uint(16 - n) - if n <= huffmanChunkBits { - for off := reverse; off < len(h.chunks); off += 1 << uint(n) { - // We should never need to overwrite - // an existing chunk. Also, 0 is - // never a valid chunk, because the - // lower 4 "count" bits should be - // between 1 and 15. - if sanity && h.chunks[off] != 0 { - panic("impossible: overwriting existing chunk") - } - h.chunks[off] = chunk - } - } else { - j := reverse & (huffmanNumChunks - 1) - if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { - // Longer codes should have been - // associated with a link table above. - panic("impossible: not an indirect chunk") - } - value := h.chunks[j] >> huffmanValueShift - linktab := h.links[value] - reverse >>= huffmanChunkBits - for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { - if sanity && linktab[off] != 0 { - panic("impossible: overwriting existing chunk") - } - linktab[off] = chunk - } - } - } - - if sanity { - // Above we've sanity checked that we never overwrote - // an existing entry. Here we additionally check that - // we filled the tables completely. - for i, chunk := range h.chunks { - if chunk == 0 { - // As an exception, in the degenerate - // single-code case, we allow odd - // chunks to be missing. - if code == 1 && i%2 == 1 { - continue - } - panic("impossible: missing chunk") - } - } - for _, linktab := range h.links { - for _, chunk := range linktab { - if chunk == 0 { - panic("impossible: missing chunk") - } - } - } - } - - return true -} - -// The actual read interface needed by NewReader. -// If the passed in io.Reader does not also have ReadByte, -// the NewReader will introduce its own buffering. -type Reader interface { - io.Reader - io.ByteReader -} - -// Decompress state. -type decompressor struct { - // Input source. - r Reader - roffset int64 - - // Huffman decoders for literal/length, distance. - h1, h2 huffmanDecoder - - // Length arrays used to define Huffman codes. - bits *[maxNumLit + maxNumDist]int - codebits *[numCodes]int - - // Output history, buffer. - dict dictDecoder - - // Next step in the decompression, - // and decompression state. - step func(*decompressor) - stepState int - err error - toRead []byte - hl, hd *huffmanDecoder - copyLen int - copyDist int - - // Temporary buffer (avoids repeated allocation). - buf [4]byte - - // Input bits, in top of b. - b uint32 - - nb uint - final bool -} - -func (f *decompressor) nextBlock() { - for f.nb < 1+2 { - if f.err = f.moreBits(); f.err != nil { - return - } - } - f.final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - f.dataBlock() - if debugDecode { - fmt.Println("stored block") - } - case 1: - // compressed, fixed Huffman tables - f.hl = &fixedHuffmanDecoder - f.hd = nil - f.huffmanBlockDecoder()() - if debugDecode { - fmt.Println("predefinied huffman block") - } - case 2: - // compressed, dynamic Huffman tables - if f.err = f.readHuffman(); f.err != nil { - break - } - f.hl = &f.h1 - f.hd = &f.h2 - f.huffmanBlockDecoder()() - if debugDecode { - fmt.Println("dynamic huffman block") - } - default: - // 3 is reserved. - if debugDecode { - fmt.Println("reserved data block encountered") - } - f.err = CorruptInputError(f.roffset) - } -} - -func (f *decompressor) Read(b []byte) (int, error) { - for { - if len(f.toRead) > 0 { - n := copy(b, f.toRead) - f.toRead = f.toRead[n:] - if len(f.toRead) == 0 { - return n, f.err - } - return n, nil - } - if f.err != nil { - return 0, f.err - } - f.step(f) - if f.err != nil && len(f.toRead) == 0 { - f.toRead = f.dict.readFlush() // Flush what's left in case of error - } - } -} - -// Support the io.WriteTo interface for io.Copy and friends. -func (f *decompressor) WriteTo(w io.Writer) (int64, error) { - total := int64(0) - flushed := false - for { - if len(f.toRead) > 0 { - n, err := w.Write(f.toRead) - total += int64(n) - if err != nil { - f.err = err - return total, err - } - if n != len(f.toRead) { - return total, io.ErrShortWrite - } - f.toRead = f.toRead[:0] - } - if f.err != nil && flushed { - if f.err == io.EOF { - return total, nil - } - return total, f.err - } - if f.err == nil { - f.step(f) - } - if len(f.toRead) == 0 && f.err != nil && !flushed { - f.toRead = f.dict.readFlush() // Flush what's left in case of error - flushed = true - } - } -} - -func (f *decompressor) Close() error { - if f.err == io.EOF { - return nil - } - return f.err -} - -// RFC 1951 section 3.2.7. -// Compression with dynamic Huffman codes - -var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} - -func (f *decompressor) readHuffman() error { - // HLIT[5], HDIST[5], HCLEN[4]. - for f.nb < 5+5+4 { - if err := f.moreBits(); err != nil { - return err - } - } - nlit := int(f.b&0x1F) + 257 - if nlit > maxNumLit { - if debugDecode { - fmt.Println("nlit > maxNumLit", nlit) - } - return CorruptInputError(f.roffset) - } - f.b >>= 5 - ndist := int(f.b&0x1F) + 1 - if ndist > maxNumDist { - if debugDecode { - fmt.Println("ndist > maxNumDist", ndist) - } - return CorruptInputError(f.roffset) - } - f.b >>= 5 - nclen := int(f.b&0xF) + 4 - // numCodes is 19, so nclen is always valid. - f.b >>= 4 - f.nb -= 5 + 5 + 4 - - // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. - for i := 0; i < nclen; i++ { - for f.nb < 3 { - if err := f.moreBits(); err != nil { - return err - } - } - f.codebits[codeOrder[i]] = int(f.b & 0x7) - f.b >>= 3 - f.nb -= 3 - } - for i := nclen; i < len(codeOrder); i++ { - f.codebits[codeOrder[i]] = 0 - } - if !f.h1.init(f.codebits[0:]) { - if debugDecode { - fmt.Println("init codebits failed") - } - return CorruptInputError(f.roffset) - } - - // HLIT + 257 code lengths, HDIST + 1 code lengths, - // using the code length Huffman code. - for i, n := 0, nlit+ndist; i < n; { - x, err := f.huffSym(&f.h1) - if err != nil { - return err - } - if x < 16 { - // Actual length. - f.bits[i] = x - i++ - continue - } - // Repeat previous length or zero. - var rep int - var nb uint - var b int - switch x { - default: - return InternalError("unexpected length code") - case 16: - rep = 3 - nb = 2 - if i == 0 { - if debugDecode { - fmt.Println("i==0") - } - return CorruptInputError(f.roffset) - } - b = f.bits[i-1] - case 17: - rep = 3 - nb = 3 - b = 0 - case 18: - rep = 11 - nb = 7 - b = 0 - } - for f.nb < nb { - if err := f.moreBits(); err != nil { - if debugDecode { - fmt.Println("morebits:", err) - } - return err - } - } - rep += int(f.b & uint32(1<<(nb®SizeMaskUint32)-1)) - f.b >>= nb & regSizeMaskUint32 - f.nb -= nb - if i+rep > n { - if debugDecode { - fmt.Println("i+rep > n", i, rep, n) - } - return CorruptInputError(f.roffset) - } - for j := 0; j < rep; j++ { - f.bits[i] = b - i++ - } - } - - if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { - if debugDecode { - fmt.Println("init2 failed") - } - return CorruptInputError(f.roffset) - } - - // As an optimization, we can initialize the maxRead bits to read at a time - // for the HLIT tree to the length of the EOB marker since we know that - // every block must terminate with one. This preserves the property that - // we never read any extra bytes after the end of the DEFLATE stream. - if f.h1.maxRead < f.bits[endBlockMarker] { - f.h1.maxRead = f.bits[endBlockMarker] - } - if !f.final { - // If not the final block, the smallest block possible is - // a predefined table, BTYPE=01, with a single EOB marker. - // This will take up 3 + 7 bits. - f.h1.maxRead += 10 - } - - return nil -} - -// Copy a single uncompressed data block from input to output. -func (f *decompressor) dataBlock() { - // Uncompressed. - // Discard current half-byte. - left := (f.nb) & 7 - f.nb -= left - f.b >>= left - - offBytes := f.nb >> 3 - // Unfilled values will be overwritten. - f.buf[0] = uint8(f.b) - f.buf[1] = uint8(f.b >> 8) - f.buf[2] = uint8(f.b >> 16) - f.buf[3] = uint8(f.b >> 24) - - f.roffset += int64(offBytes) - f.nb, f.b = 0, 0 - - // Length then ones-complement of length. - nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) - f.roffset += int64(nr) - if err != nil { - f.err = noEOF(err) - return - } - n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 - nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 - if nn != ^n { - if debugDecode { - ncomp := ^n - fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) - } - f.err = CorruptInputError(f.roffset) - return - } - - if n == 0 { - f.toRead = f.dict.readFlush() - f.finishBlock() - return - } - - f.copyLen = int(n) - f.copyData() -} - -// copyData copies f.copyLen bytes from the underlying reader into f.hist. -// It pauses for reads when f.hist is full. -func (f *decompressor) copyData() { - buf := f.dict.writeSlice() - if len(buf) > f.copyLen { - buf = buf[:f.copyLen] - } - - cnt, err := io.ReadFull(f.r, buf) - f.roffset += int64(cnt) - f.copyLen -= cnt - f.dict.writeMark(cnt) - if err != nil { - f.err = noEOF(err) - return - } - - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() - f.step = (*decompressor).copyData - return - } - f.finishBlock() -} - -func (f *decompressor) finishBlock() { - if f.final { - if f.dict.availRead() > 0 { - f.toRead = f.dict.readFlush() - } - f.err = io.EOF - } - f.step = (*decompressor).nextBlock -} - -// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. -func noEOF(e error) error { - if e == io.EOF { - return io.ErrUnexpectedEOF - } - return e -} - -func (f *decompressor) moreBits() error { - c, err := f.r.ReadByte() - if err != nil { - return noEOF(err) - } - f.roffset++ - f.b |= uint32(c) << (f.nb & regSizeMaskUint32) - f.nb += 8 - return nil -} - -// Read the next Huffman-encoded symbol from f according to h. -func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(h.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - nb, b := f.nb, f.b - for { - for nb < n { - c, err := f.r.ReadByte() - if err != nil { - f.b = b - f.nb = nb - return 0, noEOF(err) - } - f.roffset++ - b |= uint32(c) << (nb & regSizeMaskUint32) - nb += 8 - } - chunk := h.chunks[b&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= nb { - if n == 0 { - f.b = b - f.nb = nb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return 0, f.err - } - f.b = b >> (n & regSizeMaskUint32) - f.nb = nb - n - return int(chunk >> huffmanValueShift), nil - } - } -} - -func makeReader(r io.Reader) Reader { - if rr, ok := r.(Reader); ok { - return rr - } - return bufio.NewReader(r) -} - -func fixedHuffmanDecoderInit() { - fixedOnce.Do(func() { - // These come from the RFC section 3.2.6. - var bits [288]int - for i := 0; i < 144; i++ { - bits[i] = 8 - } - for i := 144; i < 256; i++ { - bits[i] = 9 - } - for i := 256; i < 280; i++ { - bits[i] = 7 - } - for i := 280; i < 288; i++ { - bits[i] = 8 - } - fixedHuffmanDecoder.init(bits[:]) - }) -} - -func (f *decompressor) Reset(r io.Reader, dict []byte) error { - *f = decompressor{ - r: makeReader(r), - bits: f.bits, - codebits: f.codebits, - h1: f.h1, - h2: f.h2, - dict: f.dict, - step: (*decompressor).nextBlock, - } - f.dict.init(maxMatchOffset, dict) - return nil -} - -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement io.ByteReader, -// the decompressor may read more data than necessary from r. -// It is the caller's responsibility to call Close on the ReadCloser -// when finished reading. -// -// The ReadCloser returned by NewReader also implements Resetter. -func NewReader(r io.Reader) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.r = makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, nil) - return &f -} - -// NewReaderDict is like NewReader but initializes the reader -// with a preset dictionary. The returned Reader behaves as if -// the uncompressed data stream started with the given dictionary, -// which has already been read. NewReaderDict is typically used -// to read data compressed by NewWriterDict. -// -// The ReadCloser returned by NewReader also implements Resetter. -func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.r = makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = (*decompressor).nextBlock - f.dict.init(maxMatchOffset, dict) - return &f -} diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go deleted file mode 100644 index 61342b6b..00000000 --- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go +++ /dev/null @@ -1,1283 +0,0 @@ -// Code generated by go generate gen_inflate.go. DO NOT EDIT. - -package flate - -import ( - "bufio" - "bytes" - "fmt" - "math/bits" - "strings" -) - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBytesBuffer() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(*bytes.Buffer) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBytesReader() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(*bytes.Reader) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBytesReader // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanBufioReader() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(*bufio.Reader) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanBufioReader // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanStringsReader() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(*strings.Reader) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanStringsReader // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -// Decode a single Huffman block from f. -// hl and hd are the Huffman states for the lit/length values -// and the distance values, respectively. If hd == nil, using the -// fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) huffmanGenericReader() { - const ( - stateInit = iota // Zero value must be stateInit - stateDict - ) - fr := f.r.(Reader) - - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb, dict := f.nb, f.b, &f.dict - - switch f.stepState { - case stateInit: - goto readLiteral - case stateDict: - goto copyHistory - } - -readLiteral: - // Read literal and/or (length, distance) according to RFC section 3.2.3. - { - var v int - { - // Inlined v, err := f.huffSym(f.hl) - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hl.maxRead) - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - v = int(chunk >> huffmanValueShift) - break - } - } - } - - var length int - switch { - case v < 256: - dict.writeByte(byte(v)) - if dict.availWrite() == 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader - f.stepState = stateInit - f.b, f.nb = fb, fnb - return - } - goto readLiteral - case v == 256: - f.b, f.nb = fb, fnb - f.finishBlock() - return - // otherwise, reference to older data - case v < 265: - length = v - (257 - 3) - case v < maxNumLit: - val := decCodeToLen[(v - 257)] - length = int(val.length) + 3 - n := uint(val.extra) - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits n>0:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - length += int(fb & bitMask32[n]) - fb >>= n & regSizeMaskUint32 - fnb -= n - default: - if debugDecode { - fmt.Println(v, ">= maxNumLit") - } - f.err = CorruptInputError(f.roffset) - f.b, f.nb = fb, fnb - return - } - - var dist uint32 - if f.hd == nil { - for fnb < 5 { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb<5:", err) - } - f.err = err - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) - fb >>= 5 - fnb -= 5 - } else { - // Since a huffmanDecoder can be empty or be composed of a degenerate tree - // with single element, huffSym must error on these two edge cases. In both - // cases, the chunks slice will be 0 for the invalid sequence, leading it - // satisfy the n == 0 check below. - n := uint(f.hd.maxRead) - // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, - // but is smart enough to keep local variables in registers, so use nb and b, - // inline call to moreBits and reassign b,nb back to f on return. - for { - for fnb < n { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - f.err = noEOF(err) - return - } - f.roffset++ - fb |= uint32(c) << (fnb & regSizeMaskUint32) - fnb += 8 - } - chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] - n = uint(chunk & huffmanCountMask) - if n > huffmanChunkBits { - chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] - n = uint(chunk & huffmanCountMask) - } - if n <= fnb { - if n == 0 { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("huffsym: n==0") - } - f.err = CorruptInputError(f.roffset) - return - } - fb = fb >> (n & regSizeMaskUint32) - fnb = fnb - n - dist = uint32(chunk >> huffmanValueShift) - break - } - } - } - - switch { - case dist < 4: - dist++ - case dist < maxNumDist: - nb := uint(dist-2) >> 1 - // have 1 bit in bottom of dist, need nb more. - extra := (dist & 1) << (nb & regSizeMaskUint32) - for fnb < nb { - c, err := fr.ReadByte() - if err != nil { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("morebits f.nb>= nb & regSizeMaskUint32 - fnb -= nb - dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra - // slower: dist = bitMask32[nb+1] + 2 + extra - default: - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist too big:", dist, maxNumDist) - } - f.err = CorruptInputError(f.roffset) - return - } - - // No check on length; encoding can be prescient. - if dist > uint32(dict.histSize()) { - f.b, f.nb = fb, fnb - if debugDecode { - fmt.Println("dist > dict.histSize():", dist, dict.histSize()) - } - f.err = CorruptInputError(f.roffset) - return - } - - f.copyLen, f.copyDist = length, int(dist) - goto copyHistory - } - -copyHistory: - // Perform a backwards copy according to RFC section 3.2.3. - { - cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) - if cnt == 0 { - cnt = dict.writeCopy(f.copyDist, f.copyLen) - } - f.copyLen -= cnt - - if dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = dict.readFlush() - f.step = (*decompressor).huffmanGenericReader // We need to continue this work - f.stepState = stateDict - f.b, f.nb = fb, fnb - return - } - goto readLiteral - } - // Not reached -} - -func (f *decompressor) huffmanBlockDecoder() func() { - switch f.r.(type) { - case *bytes.Buffer: - return f.huffmanBytesBuffer - case *bytes.Reader: - return f.huffmanBytesReader - case *bufio.Reader: - return f.huffmanBufioReader - case *strings.Reader: - return f.huffmanStringsReader - case Reader: - return f.huffmanGenericReader - default: - return f.huffmanGenericReader - } -} diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go deleted file mode 100644 index 703b9a89..00000000 --- a/vendor/github.com/klauspost/compress/flate/level1.go +++ /dev/null @@ -1,241 +0,0 @@ -package flate - -import ( - "encoding/binary" - "fmt" - "math/bits" -) - -// fastGen maintains the table for matches, -// and the previous byte block for level 2. -// This is the generic implementation. -type fastEncL1 struct { - fastGen - table [tableSize]tableEntry -} - -// EncodeL1 uses a similar algorithm to level 1 -func (e *fastEncL1) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashBytes = 5 - ) - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - - for { - const skipLog = 5 - const doEvery = 2 - - nextS := s - var candidate tableEntry - for { - nextHash := hashLen(cv, tableBits, hashBytes) - candidate = e.table[nextHash] - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - - now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur} - nextHash = hashLen(now, tableBits, hashBytes) - - offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - e.table[nextHash] = tableEntry{offset: nextS + e.cur} - break - } - - // Do one right away... - cv = now - s = nextS - nextS++ - candidate = e.table[nextHash] - now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur} - - offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - e.table[nextHash] = tableEntry{offset: nextS + e.cur} - break - } - cv = now - s = nextS - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - - // Extend the 4-byte match as long as possible. - t := candidate.offset - e.cur - var l = int32(4) - if false { - l = e.matchlenLong(s+4, t+4, src) + 4 - } else { - // inlined: - a := src[s+4:] - b := src[t+4:] - for len(a) >= 8 { - if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { - l += int32(bits.TrailingZeros64(diff) >> 3) - break - } - l += 8 - a = a[8:] - b = b[8:] - } - if len(a) < 8 { - b = b[:len(a)] - for i := range a { - if a[i] != b[i] { - break - } - l++ - } - } - } - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - - // Save the match found - if false { - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - } else { - // Inlined... - xoffset := uint32(s - t - baseMatchOffset) - xlength := l - oc := offsetCode(xoffset) - xoffset |= oc << 16 - for xlength > 0 { - xl := xlength - if xl > 258 { - if xl > 258+baseMatchLength { - xl = 258 - } else { - xl = 258 - baseMatchLength - } - } - xlength -= xl - xl -= baseMatchLength - dst.extraHist[lengthCodes1[uint8(xl)]]++ - dst.offHist[oc]++ - dst.tokens[dst.n] = token(matchType | uint32(xl)<= s { - s = nextS + 1 - } - if s >= sLimit { - // Index first pair after match end. - if int(s+l+8) < len(src) { - cv := load6432(src, s) - e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur} - } - goto emitRemainder - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-2 and at s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load6432(src, s-2) - o := e.cur + s - 2 - prevHash := hashLen(x, tableBits, hashBytes) - e.table[prevHash] = tableEntry{offset: o} - x >>= 16 - currHash := hashLen(x, tableBits, hashBytes) - candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2} - - offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { - cv = x >> 8 - s++ - break - } - } - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go deleted file mode 100644 index 876dfbe3..00000000 --- a/vendor/github.com/klauspost/compress/flate/level2.go +++ /dev/null @@ -1,214 +0,0 @@ -package flate - -import "fmt" - -// fastGen maintains the table for matches, -// and the previous byte block for level 2. -// This is the generic implementation. -type fastEncL2 struct { - fastGen - table [bTableSize]tableEntry -} - -// EncodeL2 uses a similar algorithm to level 1, but is capable -// of matching across blocks giving better compression at a small slowdown. -func (e *fastEncL2) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashBytes = 5 - ) - - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - // When should we start skipping if we haven't found matches in a long while. - const skipLog = 5 - const doEvery = 2 - - nextS := s - var candidate tableEntry - for { - nextHash := hashLen(cv, bTableBits, hashBytes) - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - candidate = e.table[nextHash] - now := load6432(src, nextS) - e.table[nextHash] = tableEntry{offset: s + e.cur} - nextHash = hashLen(now, bTableBits, hashBytes) - - offset := s - (candidate.offset - e.cur) - if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - e.table[nextHash] = tableEntry{offset: nextS + e.cur} - break - } - - // Do one right away... - cv = now - s = nextS - nextS++ - candidate = e.table[nextHash] - now >>= 8 - e.table[nextHash] = tableEntry{offset: s + e.cur} - - offset = s - (candidate.offset - e.cur) - if offset < maxMatchOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - break - } - cv = now - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - // Call emitCopy, and then see if another emitCopy could be our next - // move. Repeat until we find no match for the input immediately after - // what was consumed by the last emitCopy call. - // - // If we exit this loop normally then we need to call emitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can - // exit this loop via goto if we get close to exhausting the input. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - - // Extend the 4-byte match as long as possible. - t := candidate.offset - e.cur - l := e.matchlenLong(s+4, t+4, src) + 4 - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - // Index first pair after match end. - if int(s+l+8) < len(src) { - cv := load6432(src, s) - e.table[hashLen(cv, bTableBits, hashBytes)] = tableEntry{offset: s + e.cur} - } - goto emitRemainder - } - - // Store every second hash in-between, but offset by 1. - for i := s - l + 2; i < s-5; i += 7 { - x := load6432(src, i) - nextHash := hashLen(x, bTableBits, hashBytes) - e.table[nextHash] = tableEntry{offset: e.cur + i} - // Skip one - x >>= 16 - nextHash = hashLen(x, bTableBits, hashBytes) - e.table[nextHash] = tableEntry{offset: e.cur + i + 2} - // Skip one - x >>= 16 - nextHash = hashLen(x, bTableBits, hashBytes) - e.table[nextHash] = tableEntry{offset: e.cur + i + 4} - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-2 to s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load6432(src, s-2) - o := e.cur + s - 2 - prevHash := hashLen(x, bTableBits, hashBytes) - prevHash2 := hashLen(x>>8, bTableBits, hashBytes) - e.table[prevHash] = tableEntry{offset: o} - e.table[prevHash2] = tableEntry{offset: o + 1} - currHash := hashLen(x>>16, bTableBits, hashBytes) - candidate = e.table[currHash] - e.table[currHash] = tableEntry{offset: o + 2} - - offset := s - (candidate.offset - e.cur) - if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { - cv = x >> 24 - s++ - break - } - } - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go deleted file mode 100644 index 7aa2b72a..00000000 --- a/vendor/github.com/klauspost/compress/flate/level3.go +++ /dev/null @@ -1,241 +0,0 @@ -package flate - -import "fmt" - -// fastEncL3 -type fastEncL3 struct { - fastGen - table [1 << 16]tableEntryPrev -} - -// Encode uses a similar algorithm to level 2, will check up to two candidates. -func (e *fastEncL3) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - tableBits = 16 - tableSize = 1 << tableBits - hashBytes = 5 - ) - - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntryPrev{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i] - if v.Cur.offset <= minOff { - v.Cur.offset = 0 - } else { - v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset - } - if v.Prev.offset <= minOff { - v.Prev.offset = 0 - } else { - v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset - } - e.table[i] = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // Skip if too small. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - const skipLog = 7 - nextS := s - var candidate tableEntry - for { - nextHash := hashLen(cv, tableBits, hashBytes) - s = nextS - nextS = s + 1 + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - candidates := e.table[nextHash] - now := load6432(src, nextS) - - // Safe offset distance until s + 4... - minOffset := e.cur + s - (maxMatchOffset - 4) - e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} - - // Check both candidates - candidate = candidates.Cur - if candidate.offset < minOffset { - cv = now - // Previous will also be invalid, we have nothing. - continue - } - - if uint32(cv) == load3232(src, candidate.offset-e.cur) { - if candidates.Prev.offset < minOffset || uint32(cv) != load3232(src, candidates.Prev.offset-e.cur) { - break - } - // Both match and are valid, pick longest. - offset := s - (candidate.offset - e.cur) - o2 := s - (candidates.Prev.offset - e.cur) - l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) - if l2 > l1 { - candidate = candidates.Prev - } - break - } else { - // We only check if value mismatches. - // Offset will always be invalid in other cases. - candidate = candidates.Prev - if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - break - } - } - cv = now - } - - // Call emitCopy, and then see if another emitCopy could be our next - // move. Repeat until we find no match for the input immediately after - // what was consumed by the last emitCopy call. - // - // If we exit this loop normally then we need to call emitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can - // exit this loop via goto if we get close to exhausting the input. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - - // Extend the 4-byte match as long as possible. - // - t := candidate.offset - e.cur - l := e.matchlenLong(s+4, t+4, src) + 4 - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - t += l - // Index first pair after match end. - if int(t+8) < len(src) && t > 0 { - cv = load6432(src, t) - nextHash := hashLen(cv, tableBits, hashBytes) - e.table[nextHash] = tableEntryPrev{ - Prev: e.table[nextHash].Cur, - Cur: tableEntry{offset: e.cur + t}, - } - } - goto emitRemainder - } - - // Store every 5th hash in-between. - for i := s - l + 2; i < s-5; i += 6 { - nextHash := hashLen(load6432(src, i), tableBits, hashBytes) - e.table[nextHash] = tableEntryPrev{ - Prev: e.table[nextHash].Cur, - Cur: tableEntry{offset: e.cur + i}} - } - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-2 to s. - x := load6432(src, s-2) - prevHash := hashLen(x, tableBits, hashBytes) - - e.table[prevHash] = tableEntryPrev{ - Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 2}, - } - x >>= 8 - prevHash = hashLen(x, tableBits, hashBytes) - - e.table[prevHash] = tableEntryPrev{ - Prev: e.table[prevHash].Cur, - Cur: tableEntry{offset: e.cur + s - 1}, - } - x >>= 8 - currHash := hashLen(x, tableBits, hashBytes) - candidates := e.table[currHash] - cv = x - e.table[currHash] = tableEntryPrev{ - Prev: candidates.Cur, - Cur: tableEntry{offset: s + e.cur}, - } - - // Check both candidates - candidate = candidates.Cur - minOffset := e.cur + s - (maxMatchOffset - 4) - - if candidate.offset > minOffset { - if uint32(cv) == load3232(src, candidate.offset-e.cur) { - // Found a match... - continue - } - candidate = candidates.Prev - if candidate.offset > minOffset && uint32(cv) == load3232(src, candidate.offset-e.cur) { - // Match at prev... - continue - } - } - cv = x >> 8 - s++ - break - } - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go deleted file mode 100644 index 23c08b32..00000000 --- a/vendor/github.com/klauspost/compress/flate/level4.go +++ /dev/null @@ -1,221 +0,0 @@ -package flate - -import "fmt" - -type fastEncL4 struct { - fastGen - table [tableSize]tableEntry - bTable [tableSize]tableEntry -} - -func (e *fastEncL4) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashShortBytes = 4 - ) - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.bTable[:] { - e.bTable[i] = tableEntry{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - for i := range e.bTable[:] { - v := e.bTable[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.bTable[i].offset = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - const skipLog = 6 - const doEvery = 1 - - nextS := s - var t int32 - for { - nextHashS := hashLen(cv, tableBits, hashShortBytes) - nextHashL := hash7(cv, tableBits) - - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - // Fetch a short+long candidate - sCandidate := e.table[nextHashS] - lCandidate := e.bTable[nextHashL] - next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur} - e.table[nextHashS] = entry - e.bTable[nextHashL] = entry - - t = lCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) { - // We got a long match. Use that. - break - } - - t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { - // Found a 4 match... - lCandidate = e.bTable[hash7(next, tableBits)] - - // If the next long is a candidate, check if we should use that instead... - lOff := nextS - (lCandidate.offset - e.cur) - if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) { - l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) - if l2 > l1 { - s = nextS - t = lCandidate.offset - e.cur - } - } - break - } - cv = next - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - // Extend the 4-byte match as long as possible. - l := e.matchlenLong(s+4, t+4, src) + 4 - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - if debugDeflate { - if t >= s { - panic("s-t") - } - if (s - t) > maxMatchOffset { - panic(fmt.Sprintln("mmo", t)) - } - if l < baseMatchLength { - panic("bml") - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - // Index first pair after match end. - if int(s+8) < len(src) { - cv := load6432(src, s) - e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur} - e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} - } - goto emitRemainder - } - - // Store every 3rd hash in-between - if true { - i := nextS - if i < s-1 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - e.bTable[hash7(cv, tableBits)] = t - e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 - - i += 3 - for ; i < s-1; i += 3 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - e.bTable[hash7(cv, tableBits)] = t - e.bTable[hash7(cv>>8, tableBits)] = t2 - e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 - } - } - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. - x := load6432(src, s-1) - o := e.cur + s - 1 - prevHashS := hashLen(x, tableBits, hashShortBytes) - prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o} - e.bTable[prevHashL] = tableEntry{offset: o} - cv = x >> 8 - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go deleted file mode 100644 index 83ef50ba..00000000 --- a/vendor/github.com/klauspost/compress/flate/level5.go +++ /dev/null @@ -1,310 +0,0 @@ -package flate - -import "fmt" - -type fastEncL5 struct { - fastGen - table [tableSize]tableEntry - bTable [tableSize]tableEntryPrev -} - -func (e *fastEncL5) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashShortBytes = 4 - ) - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.bTable[:] { - e.bTable[i] = tableEntryPrev{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - for i := range e.bTable[:] { - v := e.bTable[i] - if v.Cur.offset <= minOff { - v.Cur.offset = 0 - v.Prev.offset = 0 - } else { - v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset - if v.Prev.offset <= minOff { - v.Prev.offset = 0 - } else { - v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset - } - } - e.bTable[i] = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - for { - const skipLog = 6 - const doEvery = 1 - - nextS := s - var l int32 - var t int32 - for { - nextHashS := hashLen(cv, tableBits, hashShortBytes) - nextHashL := hash7(cv, tableBits) - - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - // Fetch a short+long candidate - sCandidate := e.table[nextHashS] - lCandidate := e.bTable[nextHashL] - next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur} - e.table[nextHashS] = entry - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = entry, eLong.Cur - - nextHashS = hashLen(next, tableBits, hashShortBytes) - nextHashL = hash7(next, tableBits) - - t = lCandidate.Cur.offset - e.cur - if s-t < maxMatchOffset { - if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { - l = e.matchlen(s+4, t+4, src) + 4 - ml1 := e.matchlen(s+4, t2+4, src) + 4 - if ml1 > l { - t = t2 - l = ml1 - break - } - } - break - } - t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - break - } - } - - t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { - // Found a 4 match... - l = e.matchlen(s+4, t+4, src) + 4 - lCandidate = e.bTable[nextHashL] - // Store the next match - - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - // If the next long is a candidate, use that... - t2 := lCandidate.Cur.offset - e.cur - if nextS-t2 < maxMatchOffset { - if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { - ml := e.matchlen(nextS+4, t2+4, src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - break - } - } - // If the previous long is a candidate, use that... - t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { - ml := e.matchlen(nextS+4, t2+4, src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - break - } - } - } - break - } - cv = next - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - if l == 0 { - // Extend the 4-byte match as long as possible. - l = e.matchlenLong(s+4, t+4, src) + 4 - } else if l == maxMatchLength { - l += e.matchlenLong(s+l, t+l, src) - } - - // Try to locate a better match by checking the end of best match... - if sAt := s + l; l < 30 && sAt < sLimit { - // Allow some bytes at the beginning to mismatch. - // Sweet spot is 2/3 bytes depending on input. - // 3 is only a little better when it is but sometimes a lot worse. - // The skipped bytes are tested in Extend backwards, - // and still picked up as part of the match if they do. - const skipBeginning = 2 - eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset - t2 := eLong - e.cur - l + skipBeginning - s2 := s + skipBeginning - off := s2 - t2 - if t2 >= 0 && off < maxMatchOffset && off > 0 { - if l2 := e.matchlenLong(s2, t2, src); l2 > l { - t = t2 - l = l2 - s = s2 - } - } - } - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - if debugDeflate { - if t >= s { - panic(fmt.Sprintln("s-t", s, t)) - } - if (s - t) > maxMatchOffset { - panic(fmt.Sprintln("mmo", s-t)) - } - if l < baseMatchLength { - panic("bml") - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - goto emitRemainder - } - - // Store every 3rd hash in-between. - if true { - const hashEvery = 3 - i := s - l + 1 - if i < s-1 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - e.table[hashLen(cv, tableBits, hashShortBytes)] = t - eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - - // Do an long at i+1 - cv >>= 8 - t = tableEntry{offset: t.offset + 1} - eLong = &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - - // We only have enough bits for a short entry at i+2 - cv >>= 8 - t = tableEntry{offset: t.offset + 1} - e.table[hashLen(cv, tableBits, hashShortBytes)] = t - - // Skip one - otherwise we risk hitting 's' - i += 4 - for ; i < s-1; i += hashEvery { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = t, eLong.Cur - e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2 - } - } - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. - x := load6432(src, s-1) - o := e.cur + s - 1 - prevHashS := hashLen(x, tableBits, hashShortBytes) - prevHashL := hash7(x, tableBits) - e.table[prevHashS] = tableEntry{offset: o} - eLong := &e.bTable[prevHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur - cv = x >> 8 - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go deleted file mode 100644 index f1e9d98f..00000000 --- a/vendor/github.com/klauspost/compress/flate/level6.go +++ /dev/null @@ -1,325 +0,0 @@ -package flate - -import "fmt" - -type fastEncL6 struct { - fastGen - table [tableSize]tableEntry - bTable [tableSize]tableEntryPrev -} - -func (e *fastEncL6) Encode(dst *tokens, src []byte) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - hashShortBytes = 4 - ) - if debugDeflate && e.cur < 0 { - panic(fmt.Sprint("e.cur < 0: ", e.cur)) - } - - // Protect against e.cur wraparound. - for e.cur >= bufferReset { - if len(e.hist) == 0 { - for i := range e.table[:] { - e.table[i] = tableEntry{} - } - for i := range e.bTable[:] { - e.bTable[i] = tableEntryPrev{} - } - e.cur = maxMatchOffset - break - } - // Shift down everything in the table that isn't already too far away. - minOff := e.cur + int32(len(e.hist)) - maxMatchOffset - for i := range e.table[:] { - v := e.table[i].offset - if v <= minOff { - v = 0 - } else { - v = v - e.cur + maxMatchOffset - } - e.table[i].offset = v - } - for i := range e.bTable[:] { - v := e.bTable[i] - if v.Cur.offset <= minOff { - v.Cur.offset = 0 - v.Prev.offset = 0 - } else { - v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset - if v.Prev.offset <= minOff { - v.Prev.offset = 0 - } else { - v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset - } - } - e.bTable[i] = v - } - e.cur = maxMatchOffset - } - - s := e.addBlock(src) - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = uint16(len(src)) - return - } - - // Override src - src = e.hist - nextEmit := s - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int32(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load6432(src, s) - // Repeat MUST be > 1 and within range - repeat := int32(1) - for { - const skipLog = 7 - const doEvery = 1 - - nextS := s - var l int32 - var t int32 - for { - nextHashS := hashLen(cv, tableBits, hashShortBytes) - nextHashL := hash7(cv, tableBits) - s = nextS - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit { - goto emitRemainder - } - // Fetch a short+long candidate - sCandidate := e.table[nextHashS] - lCandidate := e.bTable[nextHashL] - next := load6432(src, nextS) - entry := tableEntry{offset: s + e.cur} - e.table[nextHashS] = entry - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = entry, eLong.Cur - - // Calculate hashes of 'next' - nextHashS = hashLen(next, tableBits, hashShortBytes) - nextHashL = hash7(next, tableBits) - - t = lCandidate.Cur.offset - e.cur - if s-t < maxMatchOffset { - if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { - // Long candidate matches at least 4 bytes. - - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - // Check the previous long candidate as well. - t2 := lCandidate.Prev.offset - e.cur - if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { - l = e.matchlen(s+4, t+4, src) + 4 - ml1 := e.matchlen(s+4, t2+4, src) + 4 - if ml1 > l { - t = t2 - l = ml1 - break - } - } - break - } - // Current value did not match, but check if previous long value does. - t = lCandidate.Prev.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - break - } - } - - t = sCandidate.offset - e.cur - if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { - // Found a 4 match... - l = e.matchlen(s+4, t+4, src) + 4 - - // Look up next long candidate (at nextS) - lCandidate = e.bTable[nextHashL] - - // Store the next match - e.table[nextHashS] = tableEntry{offset: nextS + e.cur} - eLong := &e.bTable[nextHashL] - eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur - - // Check repeat at s + repOff - const repOff = 1 - t2 := s - repeat + repOff - if load3232(src, t2) == uint32(cv>>(8*repOff)) { - ml := e.matchlen(s+4+repOff, t2+4, src) + 4 - if ml > l { - t = t2 - l = ml - s += repOff - // Not worth checking more. - break - } - } - - // If the next long is a candidate, use that... - t2 = lCandidate.Cur.offset - e.cur - if nextS-t2 < maxMatchOffset { - if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { - ml := e.matchlen(nextS+4, t2+4, src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - // This is ok, but check previous as well. - } - } - // If the previous long is a candidate, use that... - t2 = lCandidate.Prev.offset - e.cur - if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { - ml := e.matchlen(nextS+4, t2+4, src) + 4 - if ml > l { - t = t2 - s = nextS - l = ml - break - } - } - } - break - } - cv = next - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - - // Extend the 4-byte match as long as possible. - if l == 0 { - l = e.matchlenLong(s+4, t+4, src) + 4 - } else if l == maxMatchLength { - l += e.matchlenLong(s+l, t+l, src) - } - - // Try to locate a better match by checking the end-of-match... - if sAt := s + l; sAt < sLimit { - // Allow some bytes at the beginning to mismatch. - // Sweet spot is 2/3 bytes depending on input. - // 3 is only a little better when it is but sometimes a lot worse. - // The skipped bytes are tested in Extend backwards, - // and still picked up as part of the match if they do. - const skipBeginning = 2 - eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] - // Test current - t2 := eLong.Cur.offset - e.cur - l + skipBeginning - s2 := s + skipBeginning - off := s2 - t2 - if off < maxMatchOffset { - if off > 0 && t2 >= 0 { - if l2 := e.matchlenLong(s2, t2, src); l2 > l { - t = t2 - l = l2 - s = s2 - } - } - // Test next: - t2 = eLong.Prev.offset - e.cur - l + skipBeginning - off := s2 - t2 - if off > 0 && off < maxMatchOffset && t2 >= 0 { - if l2 := e.matchlenLong(s2, t2, src); l2 > l { - t = t2 - l = l2 - s = s2 - } - } - } - } - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - if false { - if t >= s { - panic(fmt.Sprintln("s-t", s, t)) - } - if (s - t) > maxMatchOffset { - panic(fmt.Sprintln("mmo", s-t)) - } - if l < baseMatchLength { - panic("bml") - } - } - - dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) - repeat = s - t - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - - if s >= sLimit { - // Index after match end. - for i := nextS + 1; i < int32(len(src))-8; i += 2 { - cv := load6432(src, i) - e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur} - eLong := &e.bTable[hash7(cv, tableBits)] - eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur - } - goto emitRemainder - } - - // Store every long hash in-between and every second short. - if true { - for i := nextS + 1; i < s-1; i += 2 { - cv := load6432(src, i) - t := tableEntry{offset: i + e.cur} - t2 := tableEntry{offset: t.offset + 1} - eLong := &e.bTable[hash7(cv, tableBits)] - eLong2 := &e.bTable[hash7(cv>>8, tableBits)] - e.table[hashLen(cv, tableBits, hashShortBytes)] = t - eLong.Cur, eLong.Prev = t, eLong.Cur - eLong2.Cur, eLong2.Prev = t2, eLong2.Cur - } - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. - cv = load6432(src, s) - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/vendor/github.com/klauspost/compress/flate/regmask_amd64.go b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go deleted file mode 100644 index 6ed28061..00000000 --- a/vendor/github.com/klauspost/compress/flate/regmask_amd64.go +++ /dev/null @@ -1,37 +0,0 @@ -package flate - -const ( - // Masks for shifts with register sizes of the shift value. - // This can be used to work around the x86 design of shifting by mod register size. - // It can be used when a variable shift is always smaller than the register size. - - // reg8SizeMaskX - shift value is 8 bits, shifted is X - reg8SizeMask8 = 7 - reg8SizeMask16 = 15 - reg8SizeMask32 = 31 - reg8SizeMask64 = 63 - - // reg16SizeMaskX - shift value is 16 bits, shifted is X - reg16SizeMask8 = reg8SizeMask8 - reg16SizeMask16 = reg8SizeMask16 - reg16SizeMask32 = reg8SizeMask32 - reg16SizeMask64 = reg8SizeMask64 - - // reg32SizeMaskX - shift value is 32 bits, shifted is X - reg32SizeMask8 = reg8SizeMask8 - reg32SizeMask16 = reg8SizeMask16 - reg32SizeMask32 = reg8SizeMask32 - reg32SizeMask64 = reg8SizeMask64 - - // reg64SizeMaskX - shift value is 64 bits, shifted is X - reg64SizeMask8 = reg8SizeMask8 - reg64SizeMask16 = reg8SizeMask16 - reg64SizeMask32 = reg8SizeMask32 - reg64SizeMask64 = reg8SizeMask64 - - // regSizeMaskUintX - shift value is uint, shifted is X - regSizeMaskUint8 = reg8SizeMask8 - regSizeMaskUint16 = reg8SizeMask16 - regSizeMaskUint32 = reg8SizeMask32 - regSizeMaskUint64 = reg8SizeMask64 -) diff --git a/vendor/github.com/klauspost/compress/flate/regmask_other.go b/vendor/github.com/klauspost/compress/flate/regmask_other.go deleted file mode 100644 index 1b7a2cbd..00000000 --- a/vendor/github.com/klauspost/compress/flate/regmask_other.go +++ /dev/null @@ -1,40 +0,0 @@ -//go:build !amd64 -// +build !amd64 - -package flate - -const ( - // Masks for shifts with register sizes of the shift value. - // This can be used to work around the x86 design of shifting by mod register size. - // It can be used when a variable shift is always smaller than the register size. - - // reg8SizeMaskX - shift value is 8 bits, shifted is X - reg8SizeMask8 = 0xff - reg8SizeMask16 = 0xff - reg8SizeMask32 = 0xff - reg8SizeMask64 = 0xff - - // reg16SizeMaskX - shift value is 16 bits, shifted is X - reg16SizeMask8 = 0xffff - reg16SizeMask16 = 0xffff - reg16SizeMask32 = 0xffff - reg16SizeMask64 = 0xffff - - // reg32SizeMaskX - shift value is 32 bits, shifted is X - reg32SizeMask8 = 0xffffffff - reg32SizeMask16 = 0xffffffff - reg32SizeMask32 = 0xffffffff - reg32SizeMask64 = 0xffffffff - - // reg64SizeMaskX - shift value is 64 bits, shifted is X - reg64SizeMask8 = 0xffffffffffffffff - reg64SizeMask16 = 0xffffffffffffffff - reg64SizeMask32 = 0xffffffffffffffff - reg64SizeMask64 = 0xffffffffffffffff - - // regSizeMaskUintX - shift value is uint, shifted is X - regSizeMaskUint8 = ^uint(0) - regSizeMaskUint16 = ^uint(0) - regSizeMaskUint32 = ^uint(0) - regSizeMaskUint64 = ^uint(0) -) diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go deleted file mode 100644 index f3d4139e..00000000 --- a/vendor/github.com/klauspost/compress/flate/stateless.go +++ /dev/null @@ -1,318 +0,0 @@ -package flate - -import ( - "io" - "math" - "sync" -) - -const ( - maxStatelessBlock = math.MaxInt16 - // dictionary will be taken from maxStatelessBlock, so limit it. - maxStatelessDict = 8 << 10 - - slTableBits = 13 - slTableSize = 1 << slTableBits - slTableShift = 32 - slTableBits -) - -type statelessWriter struct { - dst io.Writer - closed bool -} - -func (s *statelessWriter) Close() error { - if s.closed { - return nil - } - s.closed = true - // Emit EOF block - return StatelessDeflate(s.dst, nil, true, nil) -} - -func (s *statelessWriter) Write(p []byte) (n int, err error) { - err = StatelessDeflate(s.dst, p, false, nil) - if err != nil { - return 0, err - } - return len(p), nil -} - -func (s *statelessWriter) Reset(w io.Writer) { - s.dst = w - s.closed = false -} - -// NewStatelessWriter will do compression but without maintaining any state -// between Write calls. -// There will be no memory kept between Write calls, -// but compression and speed will be suboptimal. -// Because of this, the size of actual Write calls will affect output size. -func NewStatelessWriter(dst io.Writer) io.WriteCloser { - return &statelessWriter{dst: dst} -} - -// bitWriterPool contains bit writers that can be reused. -var bitWriterPool = sync.Pool{ - New: func() interface{} { - return newHuffmanBitWriter(nil) - }, -} - -// StatelessDeflate allows compressing directly to a Writer without retaining state. -// When returning everything will be flushed. -// Up to 8KB of an optional dictionary can be given which is presumed to precede the block. -// Longer dictionaries will be truncated and will still produce valid output. -// Sending nil dictionary is perfectly fine. -func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { - var dst tokens - bw := bitWriterPool.Get().(*huffmanBitWriter) - bw.reset(out) - defer func() { - // don't keep a reference to our output - bw.reset(nil) - bitWriterPool.Put(bw) - }() - if eof && len(in) == 0 { - // Just write an EOF block. - // Could be faster... - bw.writeStoredHeader(0, true) - bw.flush() - return bw.err - } - - // Truncate dict - if len(dict) > maxStatelessDict { - dict = dict[len(dict)-maxStatelessDict:] - } - - // For subsequent loops, keep shallow dict reference to avoid alloc+copy. - var inDict []byte - - for len(in) > 0 { - todo := in - if len(inDict) > 0 { - if len(todo) > maxStatelessBlock-maxStatelessDict { - todo = todo[:maxStatelessBlock-maxStatelessDict] - } - } else if len(todo) > maxStatelessBlock-len(dict) { - todo = todo[:maxStatelessBlock-len(dict)] - } - inOrg := in - in = in[len(todo):] - uncompressed := todo - if len(dict) > 0 { - // combine dict and source - bufLen := len(todo) + len(dict) - combined := make([]byte, bufLen) - copy(combined, dict) - copy(combined[len(dict):], todo) - todo = combined - } - // Compress - if len(inDict) == 0 { - statelessEnc(&dst, todo, int16(len(dict))) - } else { - statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict) - } - isEof := eof && len(in) == 0 - - if dst.n == 0 { - bw.writeStoredHeader(len(uncompressed), isEof) - if bw.err != nil { - return bw.err - } - bw.writeBytes(uncompressed) - } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { - // If we removed less than 1/16th, huffman compress the block. - bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) - } else { - bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) - } - if len(in) > 0 { - // Retain a dict if we have more - inDict = inOrg[len(uncompressed)-maxStatelessDict:] - dict = nil - dst.Reset() - } - if bw.err != nil { - return bw.err - } - } - if !eof { - // Align, only a stored block can do that. - bw.writeStoredHeader(0, false) - } - bw.flush() - return bw.err -} - -func hashSL(u uint32) uint32 { - return (u * 0x1e35a7bd) >> slTableShift -} - -func load3216(b []byte, i int16) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 -} - -func load6416(b []byte, i int16) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 -} - -func statelessEnc(dst *tokens, src []byte, startAt int16) { - const ( - inputMargin = 12 - 1 - minNonLiteralBlockSize = 1 + 1 + inputMargin - ) - - type tableEntry struct { - offset int16 - } - - var table [slTableSize]tableEntry - - // This check isn't in the Snappy implementation, but there, the caller - // instead of the callee handles this case. - if len(src)-int(startAt) < minNonLiteralBlockSize { - // We do not fill the token table. - // This will be picked up by caller. - dst.n = 0 - return - } - // Index until startAt - if startAt > 0 { - cv := load3232(src, 0) - for i := int16(0); i < startAt; i++ { - table[hashSL(cv)] = tableEntry{offset: i} - cv = (cv >> 8) | (uint32(src[i+4]) << 24) - } - } - - s := startAt + 1 - nextEmit := startAt - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := int16(len(src) - inputMargin) - - // nextEmit is where in src the next emitLiteral should start from. - cv := load3216(src, s) - - for { - const skipLog = 5 - const doEvery = 2 - - nextS := s - var candidate tableEntry - for { - nextHash := hashSL(cv) - candidate = table[nextHash] - nextS = s + doEvery + (s-nextEmit)>>skipLog - if nextS > sLimit || nextS <= 0 { - goto emitRemainder - } - - now := load6416(src, nextS) - table[nextHash] = tableEntry{offset: s} - nextHash = hashSL(uint32(now)) - - if cv == load3216(src, candidate.offset) { - table[nextHash] = tableEntry{offset: nextS} - break - } - - // Do one right away... - cv = uint32(now) - s = nextS - nextS++ - candidate = table[nextHash] - now >>= 8 - table[nextHash] = tableEntry{offset: s} - - if cv == load3216(src, candidate.offset) { - table[nextHash] = tableEntry{offset: nextS} - break - } - cv = uint32(now) - s = nextS - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - - // Extend the 4-byte match as long as possible. - t := candidate.offset - l := int16(matchLen(src[s+4:], src[t+4:]) + 4) - - // Extend backwards - for t > 0 && s > nextEmit && src[t-1] == src[s-1] { - s-- - t-- - l++ - } - if nextEmit < s { - if false { - emitLiteral(dst, src[nextEmit:s]) - } else { - for _, v := range src[nextEmit:s] { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } - } - } - - // Save the match found - dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) - s += l - nextEmit = s - if nextS >= s { - s = nextS + 1 - } - if s >= sLimit { - goto emitRemainder - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-2 and at s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load6416(src, s-2) - o := s - 2 - prevHash := hashSL(uint32(x)) - table[prevHash] = tableEntry{offset: o} - x >>= 16 - currHash := hashSL(uint32(x)) - candidate = table[currHash] - table[currHash] = tableEntry{offset: o + 2} - - if uint32(x) != load3216(src, candidate.offset) { - cv = uint32(x >> 8) - s++ - break - } - } - } - -emitRemainder: - if int(nextEmit) < len(src) { - // If nothing was added, don't encode literals. - if dst.n == 0 { - return - } - emitLiteral(dst, src[nextEmit:]) - } -} diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go deleted file mode 100644 index d818790c..00000000 --- a/vendor/github.com/klauspost/compress/flate/token.go +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -import ( - "bytes" - "encoding/binary" - "fmt" - "io" - "math" -) - -const ( - // bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits - // bits 16-22 offsetcode - 5 bits - // bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits - // bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits - lengthShift = 22 - offsetMask = 1<maxnumlit - offHist [32]uint16 // offset codes - litHist [256]uint16 // codes 0->255 - nFilled int - n uint16 // Must be able to contain maxStoreBlockSize - tokens [maxStoreBlockSize + 1]token -} - -func (t *tokens) Reset() { - if t.n == 0 { - return - } - t.n = 0 - t.nFilled = 0 - for i := range t.litHist[:] { - t.litHist[i] = 0 - } - for i := range t.extraHist[:] { - t.extraHist[i] = 0 - } - for i := range t.offHist[:] { - t.offHist[i] = 0 - } -} - -func (t *tokens) Fill() { - if t.n == 0 { - return - } - for i, v := range t.litHist[:] { - if v == 0 { - t.litHist[i] = 1 - t.nFilled++ - } - } - for i, v := range t.extraHist[:literalCount-256] { - if v == 0 { - t.nFilled++ - t.extraHist[i] = 1 - } - } - for i, v := range t.offHist[:offsetCodeCount] { - if v == 0 { - t.offHist[i] = 1 - } - } -} - -func indexTokens(in []token) tokens { - var t tokens - t.indexTokens(in) - return t -} - -func (t *tokens) indexTokens(in []token) { - t.Reset() - for _, tok := range in { - if tok < matchType { - t.AddLiteral(tok.literal()) - continue - } - t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask) - } -} - -// emitLiteral writes a literal chunk and returns the number of bytes written. -func emitLiteral(dst *tokens, lit []byte) { - for _, v := range lit { - dst.tokens[dst.n] = token(v) - dst.litHist[v]++ - dst.n++ - } -} - -func (t *tokens) AddLiteral(lit byte) { - t.tokens[t.n] = token(lit) - t.litHist[lit]++ - t.n++ -} - -// from https://stackoverflow.com/a/28730362 -func mFastLog2(val float32) float32 { - ux := int32(math.Float32bits(val)) - log2 := (float32)(((ux >> 23) & 255) - 128) - ux &= -0x7f800001 - ux += 127 << 23 - uval := math.Float32frombits(uint32(ux)) - log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 - return log2 -} - -// EstimatedBits will return an minimum size estimated by an *optimal* -// compression of the block. -// The size of the block -func (t *tokens) EstimatedBits() int { - shannon := float32(0) - bits := int(0) - nMatches := 0 - total := int(t.n) + t.nFilled - if total > 0 { - invTotal := 1.0 / float32(total) - for _, v := range t.litHist[:] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - } - } - // Just add 15 for EOB - shannon += 15 - for i, v := range t.extraHist[1 : literalCount-256] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - bits += int(lengthExtraBits[i&31]) * int(v) - nMatches += int(v) - } - } - } - if nMatches > 0 { - invTotal := 1.0 / float32(nMatches) - for i, v := range t.offHist[:offsetCodeCount] { - if v > 0 { - n := float32(v) - shannon += atLeastOne(-mFastLog2(n*invTotal)) * n - bits += int(offsetExtraBits[i&31]) * int(v) - } - } - } - return int(shannon) + bits -} - -// AddMatch adds a match to the tokens. -// This function is very sensitive to inlining and right on the border. -func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { - if debugDeflate { - if xlength >= maxMatchLength+baseMatchLength { - panic(fmt.Errorf("invalid length: %v", xlength)) - } - if xoffset >= maxMatchOffset+baseMatchOffset { - panic(fmt.Errorf("invalid offset: %v", xoffset)) - } - } - oCode := offsetCode(xoffset) - xoffset |= oCode << 16 - - t.extraHist[lengthCodes1[uint8(xlength)]]++ - t.offHist[oCode&31]++ - t.tokens[t.n] = token(matchType | xlength<= maxMatchOffset+baseMatchOffset { - panic(fmt.Errorf("invalid offset: %v", xoffset)) - } - } - oc := offsetCode(xoffset) - xoffset |= oc << 16 - for xlength > 0 { - xl := xlength - if xl > 258 { - // We need to have at least baseMatchLength left over for next loop. - if xl > 258+baseMatchLength { - xl = 258 - } else { - xl = 258 - baseMatchLength - } - } - xlength -= xl - xl -= baseMatchLength - t.extraHist[lengthCodes1[uint8(xl)]]++ - t.offHist[oc&31]++ - t.tokens[t.n] = token(matchType | uint32(xl)<> lengthShift) } - -// Convert length to code. -func lengthCode(len uint8) uint8 { return lengthCodes[len] } - -// Returns the offset code corresponding to a specific offset -func offsetCode(off uint32) uint32 { - if false { - if off < uint32(len(offsetCodes)) { - return offsetCodes[off&255] - } else if off>>7 < uint32(len(offsetCodes)) { - return offsetCodes[(off>>7)&255] + 14 - } else { - return offsetCodes[(off>>14)&255] + 28 - } - } - if off < uint32(len(offsetCodes)) { - return offsetCodes[uint8(off)] - } - return offsetCodes14[uint8(off>>7)] -} diff --git a/vendor/github.com/klauspost/compress/fse/bitwriter.go b/vendor/github.com/klauspost/compress/fse/bitwriter.go index 43e46361..e82fa3bb 100644 --- a/vendor/github.com/klauspost/compress/fse/bitwriter.go +++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go @@ -152,12 +152,11 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } // reset and continue writing by appending to out. diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index dac97e58..074018d8 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -199,7 +199,8 @@ func (s *Scratch) compress(src []byte) error { c2.flush(s.actualTableLog) c1.flush(s.actualTableLog) - return s.bw.close() + s.bw.close() + return nil } // writeCount will write the normalized histogram count to header. @@ -211,7 +212,7 @@ func (s *Scratch) writeCount() error { previous0 bool charnum uint16 - maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3 // Write Table Size bitStream = uint32(tableLog - minTablelog) diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index cc05d0f7..0c7dd4ff 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -15,7 +15,7 @@ const ( // It is possible, but by no way guaranteed that corrupt data will // return an error. // It is up to the caller to verify integrity of the returned data. -// Use a predefined Scrach to set maximum acceptable output size. +// Use a predefined Scratch to set maximum acceptable output size. func Decompress(b []byte, s *Scratch) ([]byte, error) { s, err := s.prepare(b) if err != nil { diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go index b4d7164e..0ebc9aaa 100644 --- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -94,10 +94,9 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } diff --git a/vendor/github.com/klauspost/compress/huff0/bytereader.go b/vendor/github.com/klauspost/compress/huff0/bytereader.go deleted file mode 100644 index 4dcab8d2..00000000 --- a/vendor/github.com/klauspost/compress/huff0/bytereader.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2018 Klaus Post. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. - -package huff0 - -// byteReader provides a byte reader that reads -// little endian values from a byte stream. -// The input stream is manually advanced. -// The reader performs no bounds checks. -type byteReader struct { - b []byte - off int -} - -// init will initialize the reader and set the input. -func (b *byteReader) init(in []byte) { - b.b = in - b.off = 0 -} - -// Int32 returns a little endian int32 starting at current offset. -func (b byteReader) Int32() int32 { - v3 := int32(b.b[b.off+3]) - v2 := int32(b.b[b.off+2]) - v1 := int32(b.b[b.off+1]) - v0 := int32(b.b[b.off]) - return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 -} - -// Uint32 returns a little endian uint32 starting at current offset. -func (b byteReader) Uint32() uint32 { - v3 := uint32(b.b[b.off+3]) - v2 := uint32(b.b[b.off+2]) - v1 := uint32(b.b[b.off+1]) - v0 := uint32(b.b[b.off]) - return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 -} - -// remain will return the number of bytes remaining. -func (b byteReader) remain() int { - return len(b.b) - b.off -} diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index 4ee4fa18..84aa3d12 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -227,10 +227,10 @@ func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err err } func (s *Scratch) compress1X(src []byte) ([]byte, error) { - return s.compress1xDo(s.Out, src) + return s.compress1xDo(s.Out, src), nil } -func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { +func (s *Scratch) compress1xDo(dst, src []byte) []byte { var bw = bitWriter{out: dst} // N is length divisible by 4. @@ -260,8 +260,8 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { bw.encTwoSymbols(cTable, tmp[1], tmp[0]) } } - err := bw.close() - return bw.out, err + bw.close() + return bw.out } var sixZeros [6]byte @@ -283,12 +283,8 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) { } src = src[len(toDo):] - var err error idx := len(s.Out) - s.Out, err = s.compress1xDo(s.Out, toDo) - if err != nil { - return nil, err - } + s.Out = s.compress1xDo(s.Out, toDo) if len(s.Out)-idx > math.MaxUint16 { // We cannot store the size in the jump table return nil, ErrIncompressible @@ -315,7 +311,6 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { segmentSize := (len(src) + 3) / 4 var wg sync.WaitGroup - var errs [4]error wg.Add(4) for i := 0; i < 4; i++ { toDo := src @@ -326,15 +321,12 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { // Separate goroutine for each block. go func(i int) { - s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) + s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) wg.Done() }(i) } wg.Wait() for i := 0; i < 4; i++ { - if errs[i] != nil { - return nil, errs[i] - } o := s.tmpOut[i] if len(o) > math.MaxUint16 { // We cannot store the size in the jump table @@ -358,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { // Does not update s.clearCount. func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { reuse = true + _ = s.count // Assert that s != nil to speed up the following loop. for _, v := range in { s.count[v]++ } @@ -423,7 +416,7 @@ func (s *Scratch) validateTable(c cTable) bool { // minTableLog provides the minimum logSize to safely represent a distribution. func (s *Scratch) minTableLog() uint8 { - minBitsSrc := highBit32(uint32(s.br.remain())) + 1 + minBitsSrc := highBit32(uint32(s.srcLen)) + 1 minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2 if minBitsSrc < minBitsSymbols { return uint8(minBitsSrc) @@ -435,7 +428,7 @@ func (s *Scratch) minTableLog() uint8 { func (s *Scratch) optimalTableLog() { tableLog := s.TableLog minBits := s.minTableLog() - maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1 + maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1 if maxBitsSrc < tableLog { // Accuracy can be reduced tableLog = maxBitsSrc diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index 54bd08b2..0f56b02d 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -1136,7 +1136,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 0 { - fmt.Fprintf(w, "%d errros in base, stopping\n", errs) + fmt.Fprintf(w, "%d errors in base, stopping\n", errs) continue } // Ensure that all combinations are covered. @@ -1152,7 +1152,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 20 { - fmt.Fprintf(w, "%d errros, stopping\n", errs) + fmt.Fprintf(w, "%d errors, stopping\n", errs) break } } diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go index e8ad17ad..77ecd68e 100644 --- a/vendor/github.com/klauspost/compress/huff0/huff0.go +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -88,7 +88,7 @@ type Scratch struct { // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. MaxDecodedSize int - br byteReader + srcLen int // MaxSymbolValue will override the maximum symbol value of the next block. MaxSymbolValue uint8 @@ -170,7 +170,7 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) { if s.fse == nil { s.fse = &fse.Scratch{} } - s.br.init(in) + s.srcLen = len(in) return s, nil } diff --git a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go index 2aa6a95a..2754bac6 100644 --- a/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go +++ b/vendor/github.com/klauspost/compress/internal/snapref/encode_other.go @@ -51,7 +51,7 @@ func emitCopy(dst []byte, offset, length int) int { i := 0 // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The // threshold for this loop is a little higher (at 68 = 64 + 4), and the - // length emitted down below is is a little lower (at 60 = 64 - 4), because + // length emitted down below is a little lower (at 60 = 64 - 4), because // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as diff --git a/vendor/github.com/klauspost/compress/s2sx.mod b/vendor/github.com/klauspost/compress/s2sx.mod index 2263853f..5a4412f9 100644 --- a/vendor/github.com/klauspost/compress/s2sx.mod +++ b/vendor/github.com/klauspost/compress/s2sx.mod @@ -1,4 +1,4 @@ module github.com/klauspost/compress -go 1.16 +go 1.19 diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index bdd49c8b..92e2347b 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68 ## Decompressor -Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested. +Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested. This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), kindly supplied by [fuzzit.dev](https://fuzzit.dev/). diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go index 97299d49..25ca9839 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitreader.go +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -17,7 +17,6 @@ import ( // for aligning the input. type bitReader struct { in []byte - off uint // next byte to read is at in[off - 1] value uint64 // Maybe use [16]byte, but shifting is awkward. bitsRead uint8 } @@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error { return errors.New("corrupt stream: too short") } b.in = in - b.off = uint(len(in)) // The highest bit of the last byte indicates where to start v := in[len(in)-1] if v == 0 { @@ -69,21 +67,19 @@ func (b *bitReader) fillFast() { if b.bitsRead < 32 { return } - // 2 bounds checks. - v := b.in[b.off-4:] - v = v[:4] + v := b.in[len(b.in)-4:] + b.in = b.in[:len(b.in)-4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 - b.off -= 4 } // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. func (b *bitReader) fillFastStart() { - // Do single re-slice to avoid bounds checks. - b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) + v := b.in[len(b.in)-8:] + b.in = b.in[:len(b.in)-8] + b.value = binary.LittleEndian.Uint64(v) b.bitsRead = 0 - b.off -= 8 } // fill() will make sure at least 32 bits are available. @@ -91,25 +87,25 @@ func (b *bitReader) fill() { if b.bitsRead < 32 { return } - if b.off >= 4 { - v := b.in[b.off-4:] - v = v[:4] + if len(b.in) >= 4 { + v := b.in[len(b.in)-4:] + b.in = b.in[:len(b.in)-4] low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) b.value = (b.value << 32) | uint64(low) b.bitsRead -= 32 - b.off -= 4 return } - for b.off > 0 { - b.value = (b.value << 8) | uint64(b.in[b.off-1]) - b.bitsRead -= 8 - b.off-- + + b.bitsRead -= uint8(8 * len(b.in)) + for len(b.in) > 0 { + b.value = (b.value << 8) | uint64(b.in[len(b.in)-1]) + b.in = b.in[:len(b.in)-1] } } // finished returns true if all bits have been read from the bit stream. func (b *bitReader) finished() bool { - return b.off == 0 && b.bitsRead >= 64 + return len(b.in) == 0 && b.bitsRead >= 64 } // overread returns true if more bits have been requested than is on the stream. @@ -119,7 +115,7 @@ func (b *bitReader) overread() bool { // remain returns the number of bits remaining. func (b *bitReader) remain() uint { - return b.off*8 + 64 - uint(b.bitsRead) + return 8*uint(len(b.in)) + 64 - uint(b.bitsRead) } // close the bitstream and returns an error if out-of-buffer reads occurred. diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go index 78b3c61b..1952f175 100644 --- a/vendor/github.com/klauspost/compress/zstd/bitwriter.go +++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go @@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() { // close will write the alignment bit and write the final byte(s) // to the output. -func (b *bitWriter) close() error { +func (b *bitWriter) close() { // End mark b.addBits16Clean(1, 1) // flush until next byte. b.flushAlign() - return nil } // reset and continue writing by appending to out. diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 9f17ce60..9c28840c 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -554,6 +554,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { if debugDecoder { printf("Compression modes: 0b%b", compMode) } + if compMode&3 != 0 { + return errors.New("corrupt block: reserved bits not zero") + } for i := uint(0); i < 3; i++ { mode := seqCompMode((compMode >> (6 - i*2)) & 3) if debugDecoder { @@ -595,7 +598,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { printf("RLE set to 0x%x, code: %v", symb, v) } case compModeFSE: - println("Reading table for", tableIndex(i)) + if debugDecoder { + println("Reading table for", tableIndex(i)) + } if seq.fse == nil || seq.fse.preDefined { seq.fse = fseDecoderPool.Get().(*fseDecoder) } diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index fd4a36f7..32a7f401 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { if len(lits) >= 1024 { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(lits, b.litEnc) - } else if len(lits) > 32 { + } else if len(lits) > 16 { // Use 1 stream single = true out, reUsed, err = huff0.Compress1X(lits, b.litEnc) } else { err = huff0.ErrIncompressible } - + if err == nil && len(out)+5 > len(lits) { + // If we are close, we may still be worse or equal to raw. + var lh literalsHeader + lh.setSizes(len(out), len(lits), single) + if len(out)+lh.size() >= len(lits) { + err = huff0.ErrIncompressible + } + } switch err { case huff0.ErrIncompressible: if debugEncoder { @@ -420,6 +427,16 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { return nil } +// encodeRLE will encode an RLE block. +func (b *blockEnc) encodeRLE(val byte, length uint32) { + var bh blockHeader + bh.setLast(b.last) + bh.setSize(length) + bh.setType(blockTypeRLE) + b.output = bh.appendTo(b.output) + b.output = append(b.output, val) +} + // fuzzFseEncoder can be used to fuzz the FSE encoder. func fuzzFseEncoder(data []byte) int { if len(data) > maxSequences || len(data) < 2 { @@ -472,6 +489,16 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if len(b.sequences) == 0 { return b.encodeLits(b.literals, rawAllLits) } + if len(b.sequences) == 1 && len(org) > 0 && len(b.literals) <= 1 { + // Check common RLE cases. + seq := b.sequences[0] + if seq.litLen == uint32(len(b.literals)) && seq.offset-3 == 1 { + // Offset == 1 and 0 or 1 literals. + b.encodeRLE(org[0], b.sequences[0].matchLen+zstdMinMatch+seq.litLen) + return nil + } + } + // We want some difference to at least account for the headers. saved := b.size - len(b.literals) - (b.size >> 6) if saved < 16 { @@ -503,7 +530,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if len(b.literals) >= 1024 && !raw { // Use 4 Streams. out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) - } else if len(b.literals) > 32 && !raw { + } else if len(b.literals) > 16 && !raw { // Use 1 stream single = true out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) @@ -511,6 +538,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { err = huff0.ErrIncompressible } + if err == nil && len(out)+5 > len(b.literals) { + // If we are close, we may still be worse or equal to raw. + var lh literalsHeader + lh.setSize(len(b.literals)) + szRaw := lh.size() + lh.setSizes(len(out), len(b.literals), single) + szComp := lh.size() + if len(out)+szComp >= len(b.literals)+szRaw { + err = huff0.ErrIncompressible + } + } switch err { case huff0.ErrIncompressible: lh.setType(literalsBlockRaw) @@ -773,10 +811,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { ml.flush(mlEnc.actualTableLog) of.flush(ofEnc.actualTableLog) ll.flush(llEnc.actualTableLog) - err = wr.close() - if err != nil { - return err - } + wr.close() b.output = wr.out // Maybe even add a bigger margin. diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go index f6a24097..6a5a2988 100644 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -95,42 +95,54 @@ type Header struct { // If there isn't enough input, io.ErrUnexpectedEOF is returned. // The FirstBlock.OK will indicate if enough information was available to decode the first block header. func (h *Header) Decode(in []byte) error { + _, err := h.DecodeAndStrip(in) + return err +} + +// DecodeAndStrip will decode the header from the beginning of the stream +// and on success return the remaining bytes. +// This will decode the frame header and the first block header if enough bytes are provided. +// It is recommended to provide at least HeaderMaxSize bytes. +// If the frame header cannot be read an error will be returned. +// If there isn't enough input, io.ErrUnexpectedEOF is returned. +// The FirstBlock.OK will indicate if enough information was available to decode the first block header. +func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) { *h = Header{} if len(in) < 4 { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } h.HeaderSize += 4 b, in := in[:4], in[4:] if string(b) != frameMagic { if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 { - return ErrMagicMismatch + return nil, ErrMagicMismatch } if len(in) < 4 { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } h.HeaderSize += 4 h.Skippable = true h.SkippableID = int(b[0] & 0xf) h.SkippableSize = binary.LittleEndian.Uint32(in) - return nil + return in[4:], nil } // Read Window_Descriptor // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor if len(in) < 1 { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } fhd, in := in[0], in[1:] h.HeaderSize++ h.SingleSegment = fhd&(1<<5) != 0 h.HasCheckSum = fhd&(1<<2) != 0 if fhd&(1<<3) != 0 { - return errors.New("reserved bit set on frame header") + return nil, errors.New("reserved bit set on frame header") } if !h.SingleSegment { if len(in) < 1 { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } var wd byte wd, in = in[0], in[1:] @@ -148,7 +160,7 @@ func (h *Header) Decode(in []byte) error { size = 4 } if len(in) < int(size) { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } b, in = in[:size], in[size:] h.HeaderSize += int(size) @@ -178,7 +190,7 @@ func (h *Header) Decode(in []byte) error { if fcsSize > 0 { h.HasFCS = true if len(in) < fcsSize { - return io.ErrUnexpectedEOF + return nil, io.ErrUnexpectedEOF } b, in = in[:fcsSize], in[fcsSize:] h.HeaderSize += int(fcsSize) @@ -199,7 +211,7 @@ func (h *Header) Decode(in []byte) error { // Frame Header done, we will not fail from now on. if len(in) < 3 { - return nil + return in, nil } tmp := in[:3] bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) @@ -209,7 +221,7 @@ func (h *Header) Decode(in []byte) error { cSize := int(bh >> 3) switch blockType { case blockTypeReserved: - return nil + return in, nil case blockTypeRLE: h.FirstBlock.Compressed = true h.FirstBlock.DecompressedSize = cSize @@ -225,5 +237,25 @@ func (h *Header) Decode(in []byte) error { } h.FirstBlock.OK = true - return nil + return in, nil +} + +// AppendTo will append the encoded header to the dst slice. +// There is no error checking performed on the header values. +func (h *Header) AppendTo(dst []byte) ([]byte, error) { + if h.Skippable { + magic := [4]byte{0x50, 0x2a, 0x4d, 0x18} + magic[0] |= byte(h.SkippableID & 0xf) + dst = append(dst, magic[:]...) + f := h.SkippableSize + return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil + } + f := frameHeader{ + ContentSize: h.FrameContentSize, + WindowSize: uint32(h.WindowSize), + SingleSegment: h.SingleSegment, + Checksum: h.HasCheckSum, + DictID: h.DictionaryID, + } + return f.appendTo(dst), nil } diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index f04aaa21..bbca1723 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -82,7 +82,7 @@ var ( // can run multiple concurrent stateless decodes. It is even possible to // use stateless decodes while a stream is being decoded. // -// The Reset function can be used to initiate a new stream, which is will considerably +// The Reset function can be used to initiate a new stream, which will considerably // reduce the allocations normally caused by NewReader. func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { initPredefined() diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go index ca095145..b7b83164 100644 --- a/vendor/github.com/klauspost/compress/zstd/dict.go +++ b/vendor/github.com/klauspost/compress/zstd/dict.go @@ -1,10 +1,13 @@ package zstd import ( + "bytes" "encoding/binary" "errors" "fmt" "io" + "math" + "sort" "github.com/klauspost/compress/huff0" ) @@ -14,9 +17,8 @@ type dict struct { litEnc *huff0.Scratch llDec, ofDec, mlDec sequenceDec - //llEnc, ofEnc, mlEnc []*fseEncoder - offsets [3]int - content []byte + offsets [3]int + content []byte } const dictMagic = "\x37\xa4\x30\xec" @@ -159,3 +161,405 @@ func InspectDictionary(b []byte) (interface { d, err := loadDict(b) return d, err } + +type BuildDictOptions struct { + // Dictionary ID. + ID uint32 + + // Content to use to create dictionary tables. + Contents [][]byte + + // History to use for all blocks. + History []byte + + // Offsets to use. + Offsets [3]int + + // CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier. + // See https://github.com/facebook/zstd/issues/3724 + CompatV155 bool + + // Use the specified encoder level. + // The dictionary will be built using the specified encoder level, + // which will reflect speed and make the dictionary tailored for that level. + // If not set SpeedBestCompression will be used. + Level EncoderLevel + + // DebugOut will write stats and other details here if set. + DebugOut io.Writer +} + +func BuildDict(o BuildDictOptions) ([]byte, error) { + initPredefined() + hist := o.History + contents := o.Contents + debug := o.DebugOut != nil + println := func(args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprintln(o.DebugOut, args...) + } + } + printf := func(s string, args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprintf(o.DebugOut, s, args...) + } + } + print := func(args ...interface{}) { + if o.DebugOut != nil { + fmt.Fprint(o.DebugOut, args...) + } + } + + if int64(len(hist)) > dictMaxLength { + return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength)) + } + if len(hist) < 8 { + return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8) + } + if len(contents) == 0 { + return nil, errors.New("no content provided") + } + d := dict{ + id: o.ID, + litEnc: nil, + llDec: sequenceDec{}, + ofDec: sequenceDec{}, + mlDec: sequenceDec{}, + offsets: o.Offsets, + content: hist, + } + block := blockEnc{lowMem: false} + block.init() + enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}}) + if o.Level != 0 { + eOpts := encoderOptions{ + level: o.Level, + blockSize: maxMatchLen, + windowSize: maxMatchLen, + dict: &d, + lowMem: false, + } + enc = eOpts.encoder() + } else { + o.Level = SpeedBestCompression + } + var ( + remain [256]int + ll [256]int + ml [256]int + of [256]int + ) + addValues := func(dst *[256]int, src []byte) { + for _, v := range src { + dst[v]++ + } + } + addHist := func(dst *[256]int, src *[256]uint32) { + for i, v := range src { + dst[i] += int(v) + } + } + seqs := 0 + nUsed := 0 + litTotal := 0 + newOffsets := make(map[uint32]int, 1000) + for _, b := range contents { + block.reset(nil) + if len(b) < 8 { + continue + } + nUsed++ + enc.Reset(&d, true) + enc.Encode(&block, b) + addValues(&remain, block.literals) + litTotal += len(block.literals) + if len(block.sequences) == 0 { + continue + } + seqs += len(block.sequences) + block.genCodes() + addHist(&ll, block.coders.llEnc.Histogram()) + addHist(&ml, block.coders.mlEnc.Histogram()) + addHist(&of, block.coders.ofEnc.Histogram()) + for i, seq := range block.sequences { + if i > 3 { + break + } + offset := seq.offset + if offset == 0 { + continue + } + if int(offset) >= len(o.History) { + continue + } + if offset > 3 { + newOffsets[offset-3]++ + } else { + newOffsets[uint32(o.Offsets[offset-1])]++ + } + } + } + // Find most used offsets. + var sortedOffsets []uint32 + for k := range newOffsets { + sortedOffsets = append(sortedOffsets, k) + } + sort.Slice(sortedOffsets, func(i, j int) bool { + a, b := sortedOffsets[i], sortedOffsets[j] + if a == b { + // Prefer the longer offset + return sortedOffsets[i] > sortedOffsets[j] + } + return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]] + }) + if len(sortedOffsets) > 3 { + if debug { + print("Offsets:") + for i, v := range sortedOffsets { + if i > 20 { + break + } + printf("[%d: %d],", v, newOffsets[v]) + } + println("") + } + + sortedOffsets = sortedOffsets[:3] + } + for i, v := range sortedOffsets { + o.Offsets[i] = int(v) + } + if debug { + println("New repeat offsets", o.Offsets) + } + + if nUsed == 0 || seqs == 0 { + return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs) + } + if debug { + println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal) + } + if seqs/nUsed < 512 { + // Use 512 as minimum. + nUsed = seqs / 512 + if nUsed == 0 { + nUsed = 1 + } + } + copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) { + hist := dst.Histogram() + var maxSym uint8 + var maxCount int + var fakeLength int + for i, v := range src { + if v > 0 { + v = v / nUsed + if v == 0 { + v = 1 + } + } + if v > maxCount { + maxCount = v + } + if v != 0 { + maxSym = uint8(i) + } + fakeLength += v + hist[i] = uint32(v) + } + + // Ensure we aren't trying to represent RLE. + if maxCount == fakeLength { + for i := range hist { + if uint8(i) == maxSym { + fakeLength++ + maxSym++ + hist[i+1] = 1 + if maxSym > 1 { + break + } + } + if hist[0] == 0 { + fakeLength++ + hist[i] = 1 + if maxSym > 1 { + break + } + } + } + } + + dst.HistogramFinished(maxSym, maxCount) + dst.reUsed = false + dst.useRLE = false + err := dst.normalizeCount(fakeLength) + if err != nil { + return nil, err + } + if debug { + println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength) + } + return dst.writeCount(nil) + } + if debug { + print("Literal lengths: ") + } + llTable, err := copyHist(block.coders.llEnc, &ll) + if err != nil { + return nil, err + } + if debug { + print("Match lengths: ") + } + mlTable, err := copyHist(block.coders.mlEnc, &ml) + if err != nil { + return nil, err + } + if debug { + print("Offsets: ") + } + ofTable, err := copyHist(block.coders.ofEnc, &of) + if err != nil { + return nil, err + } + + // Literal table + avgSize := litTotal + if avgSize > huff0.BlockSizeMax/2 { + avgSize = huff0.BlockSizeMax / 2 + } + huffBuff := make([]byte, 0, avgSize) + // Target size + div := litTotal / avgSize + if div < 1 { + div = 1 + } + if debug { + println("Huffman weights:") + } + for i, n := range remain[:] { + if n > 0 { + n = n / div + // Allow all entries to be represented. + if n == 0 { + n = 1 + } + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + if debug { + printf("[%d: %d], ", i, n) + } + } + } + if o.CompatV155 && remain[255]/div == 0 { + huffBuff = append(huffBuff, 255) + } + scratch := &huff0.Scratch{TableLog: 11} + for tries := 0; tries < 255; tries++ { + scratch = &huff0.Scratch{TableLog: 11} + _, _, err = huff0.Compress1X(huffBuff, scratch) + if err == nil { + break + } + if debug { + printf("Try %d: Huffman error: %v\n", tries+1, err) + } + huffBuff = huffBuff[:0] + if tries == 250 { + if debug { + println("Huffman: Bailing out with predefined table") + } + + // Bail out.... Just generate something + huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...) + for i := 0; i < 128; i++ { + huffBuff = append(huffBuff, byte(i)) + } + continue + } + if errors.Is(err, huff0.ErrIncompressible) { + // Try truncating least common. + for i, n := range remain[:] { + if n > 0 { + n = n / (div * (i + 1)) + if n > 0 { + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + } + } + } + if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 { + huffBuff = append(huffBuff, 255) + } + if len(huffBuff) == 0 { + huffBuff = append(huffBuff, 0, 255) + } + } + if errors.Is(err, huff0.ErrUseRLE) { + for i, n := range remain[:] { + n = n / (div * (i + 1)) + // Allow all entries to be represented. + if n == 0 { + n = 1 + } + huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...) + } + } + } + + var out bytes.Buffer + out.Write([]byte(dictMagic)) + out.Write(binary.LittleEndian.AppendUint32(nil, o.ID)) + out.Write(scratch.OutTable) + if debug { + println("huff table:", len(scratch.OutTable), "bytes") + println("of table:", len(ofTable), "bytes") + println("ml table:", len(mlTable), "bytes") + println("ll table:", len(llTable), "bytes") + } + out.Write(ofTable) + out.Write(mlTable) + out.Write(llTable) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0]))) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1]))) + out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2]))) + out.Write(hist) + if debug { + _, err := loadDict(out.Bytes()) + if err != nil { + panic(err) + } + i, err := InspectDictionary(out.Bytes()) + if err != nil { + panic(err) + } + println("ID:", i.ID()) + println("Content size:", i.ContentSize()) + println("Encoder:", i.LitEncoder() != nil) + println("Offsets:", i.Offsets()) + var totalSize int + for _, b := range contents { + totalSize += len(b) + } + + encWith := func(opts ...EOption) int { + enc, err := NewWriter(nil, opts...) + if err != nil { + panic(err) + } + defer enc.Close() + var dst []byte + var totalSize int + for _, b := range contents { + dst = enc.EncodeAll(b, dst[:0]) + totalSize += len(dst) + } + return totalSize + } + plain := encWith(WithEncoderLevel(o.Level)) + withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes())) + println("Input size:", totalSize) + println("Plain Compressed:", plain) + println("Dict Compressed:", withDict) + println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)") + } + return out.Bytes(), nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go index 9819d414..4613724e 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_best.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go @@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) { if m.rep < 0 { ofc = ofCode(uint32(m.s-m.offset) + 3) } else { - ofc = ofCode(uint32(m.rep)) + ofc = ofCode(uint32(m.rep) & 3) } // Cost, excluding ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc] @@ -135,8 +135,20 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) { break } + // Add block to history s := e.addBlock(src) blk.size = len(src) + + // Check RLE first + if len(src) > zstdMinMatch { + ml := matchLen(src[1:], src) + if ml == len(src)-1 { + blk.literals = append(blk.literals, src[0]) + blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3}) + return + } + } + if len(src) < minNonLiteralBlockSize { blk.extraLits = len(src) blk.literals = blk.literals[:len(src)] @@ -197,17 +209,10 @@ encodeLoop: // Set m to a match at offset if it looks like that will improve compression. improve := func(m *match, offset int32, s int32, first uint32, rep int32) { - if s-offset >= e.maxMatchOff || load3232(src, offset) != first { + delta := s - offset + if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first { return } - if debugAsserts { - if offset <= 0 { - panic(offset) - } - if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { - panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) - } - } // Try to quick reject if we already have a long match. if m.length > 16 { left := len(src) - int(m.s+m.length) @@ -226,8 +231,10 @@ encodeLoop: } } l := 4 + e.matchlen(s+4, offset+4, src) - if rep < 0 { + if m.rep <= 0 { // Extend candidate match backwards as far as possible. + // Do not extend repeats as we can assume they are optimal + // and offsets change if s == nextEmit. tMin := s - e.maxMatchOff if tMin < 0 { tMin = 0 @@ -238,7 +245,14 @@ encodeLoop: l++ } } - + if debugAsserts { + if offset >= s { + panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff)) + } + if !bytes.Equal(src[s:s+l], src[offset:offset+l]) { + panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) + } + } cand := match{offset: offset, s: s, length: l, rep: rep} cand.estBits(bitsPerByte) if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { @@ -281,6 +295,7 @@ encodeLoop: // Load next and check... e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset} e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset} + index0 := s + 1 // Look far ahead, unless we have a really long match already... if best.length < goodEnough { @@ -334,41 +349,45 @@ encodeLoop: } if debugAsserts { + if best.offset >= best.s { + panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s)) + } + if best.s < nextEmit { + panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit)) + } + if best.offset < s-e.maxMatchOff { + panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff)) + } if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) { panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length])) } } // We have a match, we can store the forward value + s = best.s if best.rep > 0 { var seq seq seq.matchLen = uint32(best.length - zstdMinMatch) - if debugAsserts && s <= nextEmit { - panic("s <= nextEmit") - } addLiterals(&seq, best.s) // Repeat. If bit 4 is set, this is a non-lit repeat. seq.offset = uint32(best.rep & 3) if debugSequences { - println("repeat sequence", seq, "next s:", s) + println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset) } blk.sequences = append(blk.sequences, seq) // Index old s + 1 -> s - 1 - index0 := s + 1 s = best.s + best.length - nextEmit = s - if s >= sLimit { - if debugEncoder { - println("repeat ended", s, best.length) - } - break encodeLoop - } + // Index skipped... + end := s + if s > sLimit+4 { + end = sLimit + 4 + } off := index0 + e.cur - for index0 < s { + for index0 < end { cv0 := load6432(src, index0) h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen) @@ -377,6 +396,7 @@ encodeLoop: off++ index0++ } + switch best.rep { case 2, 4 | 1: offset1, offset2 = offset2, offset1 @@ -385,13 +405,17 @@ encodeLoop: case 4 | 3: offset1, offset2, offset3 = offset1-1, offset1, offset2 } + if s >= sLimit { + if debugEncoder { + println("repeat ended", s, best.length) + } + break encodeLoop + } continue } // A 4-byte match has been found. Update recent offsets. // We'll later see if more than 4 bytes. - index0 := s + 1 - s = best.s t := best.offset offset1, offset2, offset3 = s-t, offset1, offset2 @@ -418,19 +442,25 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) nextEmit = s - if s >= sLimit { - break encodeLoop + + // Index old s + 1 -> s - 1 or sLimit + end := s + if s > sLimit-4 { + end = sLimit - 4 } - // Index old s + 1 -> s - 1 - for index0 < s { + off := index0 + e.cur + for index0 < end { cv0 := load6432(src, index0) h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen) - off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} index0++ + off++ + } + if s >= sLimit { + break encodeLoop } } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index 8582f31a..84a79fde 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -102,9 +102,20 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { e.cur = e.maxMatchOff break } - + // Add block to history s := e.addBlock(src) blk.size = len(src) + + // Check RLE first + if len(src) > zstdMinMatch { + ml := matchLen(src[1:], src) + if ml == len(src)-1 { + blk.literals = append(blk.literals, src[0]) + blk.sequences = append(blk.sequences, seq{litLen: 1, matchLen: uint32(len(src)-1) - zstdMinMatch, offset: 1 + 3}) + return + } + } + if len(src) < minNonLiteralBlockSize { blk.extraLits = len(src) blk.literals = blk.literals[:len(src)] @@ -145,7 +156,7 @@ encodeLoop: var t int32 // We allow the encoder to optionally turn off repeat offsets across blocks canRepeat := len(blk.sequences) > 2 - var matched int32 + var matched, index0 int32 for { if debugAsserts && canRepeat && offset1 == 0 { @@ -162,14 +173,15 @@ encodeLoop: off := s + e.cur e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + index0 = s + 1 if canRepeat { if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -198,12 +210,12 @@ encodeLoop: // Index match start+1 (long) -> s - 1 index0 := s + repOff - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -229,9 +241,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -258,12 +270,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - index0 := s + repOff2 - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -498,15 +509,15 @@ encodeLoop: } // Index match start+1 (long) -> s - 1 - index0 := s - l + 1 + off := index0 + e.cur for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 + off += 2 } cv = load6432(src, s) @@ -672,7 +683,7 @@ encodeLoop: var t int32 // We allow the encoder to optionally turn off repeat offsets across blocks canRepeat := len(blk.sequences) > 2 - var matched int32 + var matched, index0 int32 for { if debugAsserts && canRepeat && offset1 == 0 { @@ -691,14 +702,15 @@ encodeLoop: e.markLongShardDirty(nextHashL) e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} e.markShortShardDirty(nextHashS) + index0 = s + 1 if canRepeat { if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -726,13 +738,12 @@ encodeLoop: blk.sequences = append(blk.sequences, seq) // Index match start+1 (long) -> s - 1 - index0 := s + repOff - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -761,9 +772,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -790,12 +801,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - index0 := s + repOff2 - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -1024,18 +1034,18 @@ encodeLoop: } // Index match start+1 (long) -> s - 1 - index0 := s - l + 1 + off := index0 + e.cur for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 h0 := hashLen(cv0, betterLongTableBits, betterLongLen) - off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.markLongShardDirty(h0) h1 := hashLen(cv1, betterShortTableBits, betterShortLen) e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.markShortShardDirty(h1) index0 += 2 + off += 2 } cv = load6432(src, s) diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index a154c18f..d36be7bd 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -138,9 +138,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -166,11 +166,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -798,9 +798,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -826,11 +826,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 4de0aed0..8f8223cd 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -6,6 +6,7 @@ package zstd import ( "crypto/rand" + "errors" "fmt" "io" "math" @@ -149,6 +150,9 @@ func (e *Encoder) ResetContentSize(w io.Writer, size int64) { // and write CRC if requested. func (e *Encoder) Write(p []byte) (n int, err error) { s := &e.state + if s.eofWritten { + return 0, ErrEncoderClosed + } for len(p) > 0 { if len(p)+len(s.filling) < e.o.blockSize { if e.o.crc { @@ -202,7 +206,7 @@ func (e *Encoder) nextBlock(final bool) error { return nil } if final && len(s.filling) > 0 { - s.current = e.EncodeAll(s.filling, s.current[:0]) + s.current = e.encodeAll(s.encoder, s.filling, s.current[:0]) var n2 int n2, s.err = s.w.Write(s.current) if s.err != nil { @@ -227,10 +231,7 @@ func (e *Encoder) nextBlock(final bool) error { DictID: e.o.dict.ID(), } - dst, err := fh.appendTo(tmp[:0]) - if err != nil { - return err - } + dst := fh.appendTo(tmp[:0]) s.headerWritten = true s.wWg.Wait() var n2 int @@ -291,6 +292,9 @@ func (e *Encoder) nextBlock(final bool) error { s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current s.nInput += int64(len(s.current)) s.wg.Add(1) + if final { + s.eofWritten = true + } go func(src []byte) { if debugEncoder { println("Adding block,", len(src), "bytes, final:", final) @@ -306,9 +310,6 @@ func (e *Encoder) nextBlock(final bool) error { blk := enc.Block() enc.Encode(blk, src) blk.last = final - if final { - s.eofWritten = true - } // Wait for pending writes. s.wWg.Wait() if s.writeErr != nil { @@ -404,12 +405,20 @@ func (e *Encoder) Flush() error { if len(s.filling) > 0 { err := e.nextBlock(false) if err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } } s.wg.Wait() s.wWg.Wait() if s.err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return s.err } return s.writeErr @@ -425,6 +434,9 @@ func (e *Encoder) Close() error { } err := e.nextBlock(true) if err != nil { + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } if s.frameContentSize > 0 { @@ -462,6 +474,11 @@ func (e *Encoder) Close() error { } _, s.err = s.w.Write(frame) } + if s.err == nil { + s.err = ErrEncoderClosed + return nil + } + return s.err } @@ -472,6 +489,15 @@ func (e *Encoder) Close() error { // Data compressed with EncodeAll can be decoded with the Decoder, // using either a stream or DecodeAll. func (e *Encoder) EncodeAll(src, dst []byte) []byte { + e.init.Do(e.initialize) + enc := <-e.encoders + defer func() { + e.encoders <- enc + }() + return e.encodeAll(enc, src, dst) +} + +func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte { if len(src) == 0 { if e.o.fullZero { // Add frame header. @@ -483,7 +509,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { Checksum: false, DictID: 0, } - dst, _ = fh.appendTo(dst) + dst = fh.appendTo(dst) // Write raw block as last one only. var blk blockHeader @@ -494,13 +520,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(e.initialize) - enc := <-e.encoders - defer func() { - // Release encoder reference to last block. - // If a non-single block is needed the encoder will reset again. - e.encoders <- enc - }() + // Use single segments when above minimum window and below window size. single := len(src) <= e.o.windowSize && len(src) > MinWindowSize if e.o.single != nil { @@ -518,10 +538,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { dst = make([]byte, 0, len(src)) } - dst, err := fh.appendTo(dst) - if err != nil { - panic(err) - } + dst = fh.appendTo(dst) // If we can do everything in one block, prefer that. if len(src) <= e.o.blockSize { @@ -581,6 +598,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { // Add padding with content from crypto/rand.Reader if e.o.pad > 0 { add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad)) + var err error dst, err = skippableFrame(dst, add, rand.Reader) if err != nil { panic(err) diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index faaf8192..20671dcb 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -94,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption { // The value must be a power of two between MinWindowSize and MaxWindowSize. // A larger value will enable better compression but allocate more memory and, // for above-default values, take considerably longer. -// The default value is determined by the compression level. +// The default value is determined by the compression level and max 8MB. func WithWindowSize(n int) EOption { return func(o *encoderOptions) error { switch { @@ -232,9 +232,9 @@ func WithEncoderLevel(l EncoderLevel) EOption { case SpeedDefault: o.windowSize = 8 << 20 case SpeedBetterCompression: - o.windowSize = 16 << 20 + o.windowSize = 8 << 20 case SpeedBestCompression: - o.windowSize = 32 << 20 + o.windowSize = 8 << 20 } } if !o.customALEntropy { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 53e160f7..e47af66e 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -146,7 +146,9 @@ func (d *frameDec) reset(br byteBuffer) error { } return err } - printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + if debugDecoder { + printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + } windowLog := 10 + (wd >> 3) windowBase := uint64(1) << windowLog windowAdd := (windowBase / 8) * uint64(wd&0x7) diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go index 4ef7f5a3..667ca067 100644 --- a/vendor/github.com/klauspost/compress/zstd/frameenc.go +++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go @@ -22,7 +22,7 @@ type frameHeader struct { const maxHeaderSize = 14 -func (f frameHeader) appendTo(dst []byte) ([]byte, error) { +func (f frameHeader) appendTo(dst []byte) []byte { dst = append(dst, frameMagic...) var fhd uint8 if f.Checksum { @@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) { if f.SingleSegment { dst = append(dst, uint8(f.ContentSize)) } - // Unless SingleSegment is set, framessizes < 256 are nto stored. + // Unless SingleSegment is set, framessizes < 256 are not stored. case 1: f.ContentSize -= 256 dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8)) @@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) { default: panic("invalid fcs") } - return dst, nil + return dst } const skippableFrameHeader = 4 + 4 diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go index 332e51fe..8adfebb0 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go @@ -20,10 +20,9 @@ func (s *fseDecoder) buildDtable() error { if v == -1 { s.dt[highThreshold].setAddBits(uint8(i)) highThreshold-- - symbolNext[i] = 1 - } else { - symbolNext[i] = uint16(v) + v = 1 } + symbolNext[i] = uint16(v) } } @@ -35,10 +34,12 @@ func (s *fseDecoder) buildDtable() error { for ss, v := range s.norm[:s.symbolLen] { for i := 0; i < int(v); i++ { s.dt[position].setAddBits(uint8(ss)) - position = (position + step) & tableMask - for position > highThreshold { + for { // lowprob area position = (position + step) & tableMask + if position <= highThreshold { + break + } } } } diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s index 17901e08..ae7d4d32 100644 --- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_arm64.s @@ -162,12 +162,12 @@ finalize: MOVD h, ret+24(FP) RET -// func writeBlocks(d *Digest, b []byte) int +// func writeBlocks(s *Digest, b []byte) int TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 LDP ·primes+0(SB), (prime1, prime2) // Load state. Assume v[1-4] are stored contiguously. - MOVD d+0(FP), digest + MOVD s+0(FP), digest LDP 0(digest), (v1, v2) LDP 16(digest), (v3, v4) diff --git a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s index 9a7655c0..0782b86e 100644 --- a/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/matchlen_amd64.s @@ -5,7 +5,6 @@ #include "textflag.h" // func matchLen(a []byte, b []byte) int -// Requires: BMI TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 JB matchlen_match4_standalone matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone #ifdef GOAMD64_v3 TZCNTQ BX, BX #else BSFQ BX, BX #endif - SARQ $0x03, BX + SHRL $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index 9405fcf1..d7fe6d82 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -245,7 +245,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error { return io.ErrUnexpectedEOF } var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { + if len(br.in) > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) @@ -452,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) // extra bits are stored in reverse order. br.fill() - if s.maxBits <= 32 { - mo += br.getBits(moB) - ml += br.getBits(mlB) - ll += br.getBits(llB) - } else { - mo += br.getBits(moB) + mo += br.getBits(moB) + if s.maxBits > 32 { br.fill() - // matchlength+literal length, max 32 bits - ml += br.getBits(mlB) - ll += br.getBits(llB) - } + // matchlength+literal length, max 32 bits + ml += br.getBits(mlB) + ll += br.getBits(llB) mo = s.adjustOffset(mo, ll, moB) return } diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go index 8adabd82..c59f17e0 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -146,7 +146,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) default: - return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode) } s.seqSize += ctx.litRemain @@ -292,7 +292,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { return io.ErrUnexpectedEOF } - return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode) } if ctx.litRemain < 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index b6f4ba6f..f5591fa1 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -5,11 +5,11 @@ // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_amd64(SB), $8-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -157,8 +157,7 @@ sequenceDecs_decode_amd64_ll_update_zero: // Update Literal Length State MOVBQZX DI, R14 - SHRQ $0x10, DI - MOVWQZX DI, DI + SHRL $0x10, DI LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -177,8 +176,7 @@ sequenceDecs_decode_amd64_ll_update_zero: // Update Match Length State MOVBQZX R8, R14 - SHRQ $0x10, R8 - MOVWQZX R8, R8 + SHRL $0x10, R8 LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -197,8 +195,7 @@ sequenceDecs_decode_amd64_ll_update_zero: // Update Offset State MOVBQZX R9, R14 - SHRQ $0x10, R9 - MOVWQZX R9, R9 + SHRL $0x10, R9 LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -301,9 +298,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok: MOVQ R12, 152(AX) MOVQ R13, 160(AX) MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -336,11 +333,11 @@ error_overread: // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -459,8 +456,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero: // Update Literal Length State MOVBQZX DI, R14 - SHRQ $0x10, DI - MOVWQZX DI, DI + SHRL $0x10, DI LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -479,8 +475,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero: // Update Match Length State MOVBQZX R8, R14 - SHRQ $0x10, R8 - MOVWQZX R8, R8 + SHRL $0x10, R8 LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -499,8 +494,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero: // Update Offset State MOVBQZX R9, R14 - SHRQ $0x10, R9 - MOVWQZX R9, R9 + SHRL $0x10, R9 LEAQ (BX)(R14*1), CX MOVQ DX, R15 MOVQ CX, BX @@ -603,9 +597,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok: MOVQ R12, 152(AX) MOVQ R13, 160(AX) MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -638,11 +632,11 @@ error_overread: // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -772,11 +766,10 @@ sequenceDecs_decode_bmi2_fill_2_end: BZHIQ R14, R15, R15 // Update Offset State - BZHIQ R8, R15, CX - SHRXQ R8, R15, R15 - MOVQ $0x00001010, R14 - BEXTRQ R14, R8, R8 - ADDQ CX, R8 + BZHIQ R8, R15, CX + SHRXQ R8, R15, R15 + SHRL $0x10, R8 + ADDQ CX, R8 // Load ctx.ofTable MOVQ ctx+16(FP), CX @@ -784,11 +777,10 @@ sequenceDecs_decode_bmi2_fill_2_end: MOVQ (CX)(R8*8), R8 // Update Match Length State - BZHIQ DI, R15, CX - SHRXQ DI, R15, R15 - MOVQ $0x00001010, R14 - BEXTRQ R14, DI, DI - ADDQ CX, DI + BZHIQ DI, R15, CX + SHRXQ DI, R15, R15 + SHRL $0x10, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX @@ -796,10 +788,9 @@ sequenceDecs_decode_bmi2_fill_2_end: MOVQ (CX)(DI*8), DI // Update Literal Length State - BZHIQ SI, R15, CX - MOVQ $0x00001010, R14 - BEXTRQ R14, SI, SI - ADDQ CX, SI + BZHIQ SI, R15, CX + SHRL $0x10, SI + ADDQ CX, SI // Load ctx.llTable MOVQ ctx+16(FP), CX @@ -892,9 +883,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok: MOVQ R11, 152(CX) MOVQ R12, 160(CX) MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -927,11 +918,11 @@ error_overread: // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -1032,11 +1023,10 @@ sequenceDecs_decode_56_bmi2_fill_end: BZHIQ R14, R15, R15 // Update Offset State - BZHIQ R8, R15, CX - SHRXQ R8, R15, R15 - MOVQ $0x00001010, R14 - BEXTRQ R14, R8, R8 - ADDQ CX, R8 + BZHIQ R8, R15, CX + SHRXQ R8, R15, R15 + SHRL $0x10, R8 + ADDQ CX, R8 // Load ctx.ofTable MOVQ ctx+16(FP), CX @@ -1044,11 +1034,10 @@ sequenceDecs_decode_56_bmi2_fill_end: MOVQ (CX)(R8*8), R8 // Update Match Length State - BZHIQ DI, R15, CX - SHRXQ DI, R15, R15 - MOVQ $0x00001010, R14 - BEXTRQ R14, DI, DI - ADDQ CX, DI + BZHIQ DI, R15, CX + SHRXQ DI, R15, R15 + SHRL $0x10, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX @@ -1056,10 +1045,9 @@ sequenceDecs_decode_56_bmi2_fill_end: MOVQ (CX)(DI*8), DI // Update Literal Length State - BZHIQ SI, R15, CX - MOVQ $0x00001010, R14 - BEXTRQ R14, SI, SI - ADDQ CX, SI + BZHIQ SI, R15, CX + SHRL $0x10, SI + ADDQ CX, SI // Load ctx.llTable MOVQ ctx+16(FP), CX @@ -1152,9 +1140,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok: MOVQ R11, 152(CX) MOVQ R12, 160(CX) MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Return success MOVQ $0x00000000, ret+24(FP) @@ -1797,11 +1785,11 @@ empty_seqs: // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: CMOV, SSE TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -1826,7 +1814,7 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -1967,8 +1955,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero: // Update Literal Length State MOVBQZX DI, R13 - SHRQ $0x10, DI - MOVWQZX DI, DI + SHRL $0x10, DI LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -1987,8 +1974,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero: // Update Match Length State MOVBQZX R8, R13 - SHRQ $0x10, R8 - MOVWQZX R8, R8 + SHRL $0x10, R8 LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -2007,8 +1993,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero: // Update Offset State MOVBQZX R9, R13 - SHRQ $0x10, R9 - MOVWQZX R9, R9 + SHRL $0x10, R9 LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -2295,9 +2280,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Update the context MOVQ ctx+16(FP), AX @@ -2362,11 +2347,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: BMI, BMI2, CMOV, SSE TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -2391,7 +2376,7 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition @@ -2514,11 +2499,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end: BZHIQ R13, R14, R14 // Update Offset State - BZHIQ R8, R14, CX - SHRXQ R8, R14, R14 - MOVQ $0x00001010, R13 - BEXTRQ R13, R8, R8 - ADDQ CX, R8 + BZHIQ R8, R14, CX + SHRXQ R8, R14, R14 + SHRL $0x10, R8 + ADDQ CX, R8 // Load ctx.ofTable MOVQ ctx+16(FP), CX @@ -2526,11 +2510,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end: MOVQ (CX)(R8*8), R8 // Update Match Length State - BZHIQ DI, R14, CX - SHRXQ DI, R14, R14 - MOVQ $0x00001010, R13 - BEXTRQ R13, DI, DI - ADDQ CX, DI + BZHIQ DI, R14, CX + SHRXQ DI, R14, R14 + SHRL $0x10, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX @@ -2538,10 +2521,9 @@ sequenceDecs_decodeSync_bmi2_fill_2_end: MOVQ (CX)(DI*8), DI // Update Literal Length State - BZHIQ SI, R14, CX - MOVQ $0x00001010, R13 - BEXTRQ R13, SI, SI - ADDQ CX, SI + BZHIQ SI, R14, CX + SHRL $0x10, SI + ADDQ CX, SI // Load ctx.llTable MOVQ ctx+16(FP), CX @@ -2818,9 +2800,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Update the context MOVQ ctx+16(FP), AX @@ -2885,11 +2867,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: CMOV, SSE TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 - MOVQ br+8(FP), AX - MOVQ 32(AX), DX - MOVBQZX 40(AX), BX - MOVQ 24(AX), SI - MOVQ (AX), AX + MOVQ br+8(FP), CX + MOVQ 24(CX), DX + MOVBQZX 32(CX), BX + MOVQ (CX), AX + MOVQ 8(CX), SI ADDQ SI, AX MOVQ AX, (SP) MOVQ ctx+16(FP), AX @@ -2914,7 +2896,7 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -3055,8 +3037,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero: // Update Literal Length State MOVBQZX DI, R13 - SHRQ $0x10, DI - MOVWQZX DI, DI + SHRL $0x10, DI LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -3075,8 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero: // Update Match Length State MOVBQZX R8, R13 - SHRQ $0x10, R8 - MOVWQZX R8, R8 + SHRL $0x10, R8 LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -3095,8 +3075,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero: // Update Offset State MOVBQZX R9, R13 - SHRQ $0x10, R9 - MOVWQZX R9, R9 + SHRL $0x10, R9 LEAQ (BX)(R13*1), CX MOVQ DX, R14 MOVQ CX, BX @@ -3485,9 +3464,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), AX - MOVQ DX, 32(AX) - MOVB BL, 40(AX) - MOVQ SI, 24(AX) + MOVQ DX, 24(AX) + MOVB BL, 32(AX) + MOVQ SI, 8(AX) // Update the context MOVQ ctx+16(FP), AX @@ -3552,11 +3531,11 @@ error_not_enough_space: // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // Requires: BMI, BMI2, CMOV, SSE TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 - MOVQ br+8(FP), CX - MOVQ 32(CX), AX - MOVBQZX 40(CX), DX - MOVQ 24(CX), BX - MOVQ (CX), CX + MOVQ br+8(FP), BX + MOVQ 24(BX), AX + MOVBQZX 32(BX), DX + MOVQ (BX), CX + MOVQ 8(BX), BX ADDQ BX, CX MOVQ CX, (SP) MOVQ ctx+16(FP), CX @@ -3581,7 +3560,7 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition @@ -3704,11 +3683,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end: BZHIQ R13, R14, R14 // Update Offset State - BZHIQ R8, R14, CX - SHRXQ R8, R14, R14 - MOVQ $0x00001010, R13 - BEXTRQ R13, R8, R8 - ADDQ CX, R8 + BZHIQ R8, R14, CX + SHRXQ R8, R14, R14 + SHRL $0x10, R8 + ADDQ CX, R8 // Load ctx.ofTable MOVQ ctx+16(FP), CX @@ -3716,11 +3694,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end: MOVQ (CX)(R8*8), R8 // Update Match Length State - BZHIQ DI, R14, CX - SHRXQ DI, R14, R14 - MOVQ $0x00001010, R13 - BEXTRQ R13, DI, DI - ADDQ CX, DI + BZHIQ DI, R14, CX + SHRXQ DI, R14, R14 + SHRL $0x10, DI + ADDQ CX, DI // Load ctx.mlTable MOVQ ctx+16(FP), CX @@ -3728,10 +3705,9 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end: MOVQ (CX)(DI*8), DI // Update Literal Length State - BZHIQ SI, R14, CX - MOVQ $0x00001010, R13 - BEXTRQ R13, SI, SI - ADDQ CX, SI + BZHIQ SI, R14, CX + SHRL $0x10, SI + ADDQ CX, SI // Load ctx.llTable MOVQ ctx+16(FP), CX @@ -4110,9 +4086,9 @@ handle_loop: loop_finished: MOVQ br+8(FP), CX - MOVQ AX, 32(CX) - MOVB DL, 40(CX) - MOVQ BX, 24(CX) + MOVQ AX, 24(CX) + MOVB DL, 32(CX) + MOVQ BX, 8(CX) // Update the context MOVQ ctx+16(FP), AX diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go index ac2a80d2..2fb35b78 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go @@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { } for i := range seqs { var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { + if len(br.in) > 4+((maxOffsetBits+16+16)>>3) { // inlined function: // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index 9e1baad7..ec13594e 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { var written int64 var readHeader bool { - var header []byte - var n int - header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0]) + header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0]) + var n int n, r.err = w.Write(header) if r.err != nil { return written, r.err diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 4be7cc73..066bef2a 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -88,6 +88,10 @@ var ( // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + // ErrEncoderClosed will be returned if the Encoder was used after + // Close has been called. + ErrEncoderClosed = errors.New("encoder used after Close") + // ErrDecoderNilInput is returned when a nil Reader was provided // and an operation other than Reset/DecodeAll/Close was attempted. ErrDecoderNilInput = errors.New("nil input provided as reader") diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md index accd7aba..21508edb 100644 --- a/vendor/github.com/klauspost/cpuid/v2/README.md +++ b/vendor/github.com/klauspost/cpuid/v2/README.md @@ -9,10 +9,7 @@ You can access the CPU information by accessing the shared CPU variable of the c Package home: https://github.com/klauspost/cpuid [![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2) -[![Build Status][3]][4] - -[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master -[4]: https://travis-ci.org/klauspost/cpuid +[![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml) ## installing @@ -285,7 +282,12 @@ Exit Code 1 | AMXINT8 | Tile computational operations on 8-bit integers | | AMXFP16 | Tile computational operations on FP16 numbers | | AMXTILE | Tile architecture | +| APX_F | Intel APX | | AVX | AVX functions | +| AVX10 | If set the Intel AVX10 Converged Vector ISA is supported | +| AVX10_128 | If set indicates that AVX10 128-bit vector support is present | +| AVX10_256 | If set indicates that AVX10 256-bit vector support is present | +| AVX10_512 | If set indicates that AVX10 512-bit vector support is present | | AVX2 | AVX2 functions | | AVX512BF16 | AVX-512 BFLOAT16 Instructions | | AVX512BITALG | AVX-512 Bit Algorithms | @@ -308,6 +310,7 @@ Exit Code 1 | AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one | | AVXVNNI | AVX (VEX encoded) VNNI neural network instructions | | AVXVNNIINT8 | AVX-VNNI-INT8 instructions | +| AVXVNNIINT16 | AVX-VNNI-INT16 instructions | | BHI_CTRL | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 | | BMI1 | Bit Manipulation Instruction Set 1 | | BMI2 | Bit Manipulation Instruction Set 2 | @@ -365,6 +368,8 @@ Exit Code 1 | IDPRED_CTRL | IPRED_DIS | | INT_WBINVD | WBINVD/WBNOINVD are interruptible. | | INVLPGB | NVLPGB and TLBSYNC instruction supported | +| KEYLOCKER | Key locker | +| KEYLOCKERW | Key locker wide | | LAHF | LAHF/SAHF in long mode | | LAM | If set, CPU supports Linear Address Masking | | LBRVIRT | LBR virtualization | @@ -380,7 +385,7 @@ Exit Code 1 | MOVDIRI | Move Doubleword as Direct Store | | MOVSB_ZL | Fast Zero-Length MOVSB | | MPX | Intel MPX (Memory Protection Extensions) | -| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | +| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | | MSRIRC | Instruction Retired Counter MSR available | | MSRLIST | Read/Write List of Model Specific Registers | | MSR_PAGEFLUSH | Page Flush MSR available | diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go index d015c744..53bc18ca 100644 --- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -67,188 +67,201 @@ const ( // Keep index -1 as unknown UNKNOWN = -1 - // Add features - ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) - AESNI // Advanced Encryption Standard New Instructions - AMD3DNOW // AMD 3DNOW - AMD3DNOWEXT // AMD 3DNowExt - AMXBF16 // Tile computational operations on BFLOAT16 numbers - AMXFP16 // Tile computational operations on FP16 numbers - AMXINT8 // Tile computational operations on 8-bit integers - AMXTILE // Tile architecture - AVX // AVX functions - AVX2 // AVX2 functions - AVX512BF16 // AVX-512 BFLOAT16 Instructions - AVX512BITALG // AVX-512 Bit Algorithms - AVX512BW // AVX-512 Byte and Word Instructions - AVX512CD // AVX-512 Conflict Detection Instructions - AVX512DQ // AVX-512 Doubleword and Quadword Instructions - AVX512ER // AVX-512 Exponential and Reciprocal Instructions - AVX512F // AVX-512 Foundation - AVX512FP16 // AVX-512 FP16 Instructions - AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions - AVX512PF // AVX-512 Prefetch Instructions - AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions - AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 - AVX512VL // AVX-512 Vector Length Extensions - AVX512VNNI // AVX-512 Vector Neural Network Instructions - AVX512VP2INTERSECT // AVX-512 Intersect for D/Q - AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword - AVXIFMA // AVX-IFMA instructions - AVXNECONVERT // AVX-NE-CONVERT instructions - AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one - AVXVNNI // AVX (VEX encoded) VNNI neural network instructions - AVXVNNIINT8 // AVX-VNNI-INT8 instructions - BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 - BMI1 // Bit Manipulation Instruction Set 1 - BMI2 // Bit Manipulation Instruction Set 2 - CETIBT // Intel CET Indirect Branch Tracking - CETSS // Intel CET Shadow Stack - CLDEMOTE // Cache Line Demote - CLMUL // Carry-less Multiplication - CLZERO // CLZERO instruction supported - CMOV // i686 CMOV - CMPCCXADD // CMPCCXADD instructions - CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB - CMPXCHG8 // CMPXCHG8 instruction - CPBOOST // Core Performance Boost - CPPC // AMD: Collaborative Processor Performance Control - CX16 // CMPXCHG16B Instruction - EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ - ENQCMD // Enqueue Command - ERMS // Enhanced REP MOVSB/STOSB - F16C // Half-precision floating-point conversion - FLUSH_L1D // Flush L1D cache - FMA3 // Intel FMA 3. Does not imply AVX. - FMA4 // Bulldozer FMA4 functions - FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide - FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide - FSRM // Fast Short Rep Mov - FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 - FXSROPT // FXSAVE/FXRSTOR optimizations - GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. - HLE // Hardware Lock Elision - HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR - HTT // Hyperthreading (enabled) - HWA // Hardware assert supported. Indicates support for MSRC001_10 - HYBRID_CPU // This part has CPUs of more than one type. - HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors - IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) - IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR - IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) - IBRS // AMD: Indirect Branch Restricted Speculation - IBRS_PREFERRED // AMD: IBRS is preferred over software solution - IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection - IBS // Instruction Based Sampling (AMD) - IBSBRNTRGT // Instruction Based Sampling Feature (AMD) - IBSFETCHSAM // Instruction Based Sampling Feature (AMD) - IBSFFV // Instruction Based Sampling Feature (AMD) - IBSOPCNT // Instruction Based Sampling Feature (AMD) - IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) - IBSOPSAM // Instruction Based Sampling Feature (AMD) - IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) - IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) - IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported - IBS_OPDATA4 // AMD: IBS op data 4 MSR supported - IBS_OPFUSE // AMD: Indicates support for IbsOpFuse - IBS_PREVENTHOST // Disallowing IBS use by the host supported - IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 - IDPRED_CTRL // IPRED_DIS - INT_WBINVD // WBINVD/WBNOINVD are interruptible. - INVLPGB // NVLPGB and TLBSYNC instruction supported - LAHF // LAHF/SAHF in long mode - LAM // If set, CPU supports Linear Address Masking - LBRVIRT // LBR virtualization - LZCNT // LZCNT instruction - MCAOVERFLOW // MCA overflow recovery support. - MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. - MCOMMIT // MCOMMIT instruction supported - MD_CLEAR // VERW clears CPU buffers - MMX // standard MMX - MMXEXT // SSE integer functions or AMD MMX ext - MOVBE // MOVBE instruction (big-endian) - MOVDIR64B // Move 64 Bytes as Direct Store - MOVDIRI // Move Doubleword as Direct Store - MOVSB_ZL // Fast Zero-Length MOVSB - MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD - MPX // Intel MPX (Memory Protection Extensions) - MSRIRC // Instruction Retired Counter MSR available - MSRLIST // Read/Write List of Model Specific Registers - MSR_PAGEFLUSH // Page Flush MSR available - NRIPS // Indicates support for NRIP save on VMEXIT - NX // NX (No-Execute) bit - OSXSAVE // XSAVE enabled by OS - PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption - POPCNT // POPCNT instruction - PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled - PREFETCHI // PREFETCHIT0/1 instructions - PSFD // Predictive Store Forward Disable - RDPRU // RDPRU instruction supported - RDRAND // RDRAND instruction is available - RDSEED // RDSEED instruction is available - RDTSCP // RDTSCP Instruction - RRSBA_CTRL // Restricted RSB Alternate - RTM // Restricted Transactional Memory - RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. - SERIALIZE // Serialize Instruction Execution - SEV // AMD Secure Encrypted Virtualization supported - SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host - SEV_ALTERNATIVE // AMD SEV Alternate Injection supported - SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests - SEV_ES // AMD SEV Encrypted State supported - SEV_RESTRICTED // AMD SEV Restricted Injection supported - SEV_SNP // AMD SEV Secure Nested Paging supported - SGX // Software Guard Extensions - SGXLC // Software Guard Extensions Launch Control - SHA // Intel SHA Extensions - SME // AMD Secure Memory Encryption supported - SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced - SPEC_CTRL_SSBD // Speculative Store Bypass Disable - SRBDS_CTRL // SRBDS mitigation MSR available - SSE // SSE functions - SSE2 // P4 SSE functions - SSE3 // Prescott SSE3 functions - SSE4 // Penryn SSE4.1 functions - SSE42 // Nehalem SSE4.2 functions - SSE4A // AMD Barcelona microarchitecture SSE4a instructions - SSSE3 // Conroe SSSE3 functions - STIBP // Single Thread Indirect Branch Predictors - STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On - STOSB_SHORT // Fast short STOSB - SUCCOR // Software uncorrectable error containment and recovery capability. - SVM // AMD Secure Virtual Machine - SVMDA // Indicates support for the SVM decode assists. - SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control - SVML // AMD SVM lock. Indicates support for SVM-Lock. - SVMNP // AMD SVM nested paging - SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter - SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold - SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. - SYSEE // SYSENTER and SYSEXIT instructions - TBM // AMD Trailing Bit Manipulation - TDX_GUEST // Intel Trust Domain Extensions Guest - TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations - TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. - TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. - TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 - TSXLDTRK // Intel TSX Suspend Load Address Tracking - VAES // Vector AES. AVX(512) versions requires additional checks. - VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. - VMPL // AMD VM Permission Levels supported - VMSA_REGPROT // AMD VMSA Register Protection supported - VMX // Virtual Machine Extensions - VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. - VTE // AMD Virtual Transparent Encryption supported - WAITPKG // TPAUSE, UMONITOR, UMWAIT - WBNOINVD // Write Back and Do Not Invalidate Cache - WRMSRNS // Non-Serializing Write to Model Specific Register - X87 // FPU - XGETBV1 // Supports XGETBV with ECX = 1 - XOP // Bulldozer XOP functions - XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV - XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. - XSAVEOPT // XSAVEOPT available - XSAVES // Supports XSAVES/XRSTORS and IA32_XSS + // x86 features + ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + AESNI // Advanced Encryption Standard New Instructions + AMD3DNOW // AMD 3DNOW + AMD3DNOWEXT // AMD 3DNowExt + AMXBF16 // Tile computational operations on BFLOAT16 numbers + AMXFP16 // Tile computational operations on FP16 numbers + AMXINT8 // Tile computational operations on 8-bit integers + AMXTILE // Tile architecture + APX_F // Intel APX + AVX // AVX functions + AVX10 // If set the Intel AVX10 Converged Vector ISA is supported + AVX10_128 // If set indicates that AVX10 128-bit vector support is present + AVX10_256 // If set indicates that AVX10 256-bit vector support is present + AVX10_512 // If set indicates that AVX10 512-bit vector support is present + AVX2 // AVX2 functions + AVX512BF16 // AVX-512 BFLOAT16 Instructions + AVX512BITALG // AVX-512 Bit Algorithms + AVX512BW // AVX-512 Byte and Word Instructions + AVX512CD // AVX-512 Conflict Detection Instructions + AVX512DQ // AVX-512 Doubleword and Quadword Instructions + AVX512ER // AVX-512 Exponential and Reciprocal Instructions + AVX512F // AVX-512 Foundation + AVX512FP16 // AVX-512 FP16 Instructions + AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF // AVX-512 Prefetch Instructions + AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions + AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 + AVX512VL // AVX-512 Vector Length Extensions + AVX512VNNI // AVX-512 Vector Neural Network Instructions + AVX512VP2INTERSECT // AVX-512 Intersect for D/Q + AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword + AVXIFMA // AVX-IFMA instructions + AVXNECONVERT // AVX-NE-CONVERT instructions + AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one + AVXVNNI // AVX (VEX encoded) VNNI neural network instructions + AVXVNNIINT8 // AVX-VNNI-INT8 instructions + AVXVNNIINT16 // AVX-VNNI-INT16 instructions + BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 + BMI1 // Bit Manipulation Instruction Set 1 + BMI2 // Bit Manipulation Instruction Set 2 + CETIBT // Intel CET Indirect Branch Tracking + CETSS // Intel CET Shadow Stack + CLDEMOTE // Cache Line Demote + CLMUL // Carry-less Multiplication + CLZERO // CLZERO instruction supported + CMOV // i686 CMOV + CMPCCXADD // CMPCCXADD instructions + CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB + CMPXCHG8 // CMPXCHG8 instruction + CPBOOST // Core Performance Boost + CPPC // AMD: Collaborative Processor Performance Control + CX16 // CMPXCHG16B Instruction + EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ + ENQCMD // Enqueue Command + ERMS // Enhanced REP MOVSB/STOSB + F16C // Half-precision floating-point conversion + FLUSH_L1D // Flush L1D cache + FMA3 // Intel FMA 3. Does not imply AVX. + FMA4 // Bulldozer FMA4 functions + FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide + FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide + FSRM // Fast Short Rep Mov + FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 + FXSROPT // FXSAVE/FXRSTOR optimizations + GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. + HLE // Hardware Lock Elision + HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR + HTT // Hyperthreading (enabled) + HWA // Hardware assert supported. Indicates support for MSRC001_10 + HYBRID_CPU // This part has CPUs of more than one type. + HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors + IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) + IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR + IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + IBPB_BRTYPE // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes all branch type predictions from the CPU branch predictor + IBRS // AMD: Indirect Branch Restricted Speculation + IBRS_PREFERRED // AMD: IBRS is preferred over software solution + IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection + IBS // Instruction Based Sampling (AMD) + IBSBRNTRGT // Instruction Based Sampling Feature (AMD) + IBSFETCHSAM // Instruction Based Sampling Feature (AMD) + IBSFFV // Instruction Based Sampling Feature (AMD) + IBSOPCNT // Instruction Based Sampling Feature (AMD) + IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) + IBSOPSAM // Instruction Based Sampling Feature (AMD) + IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) + IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) + IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported + IBS_OPDATA4 // AMD: IBS op data 4 MSR supported + IBS_OPFUSE // AMD: Indicates support for IbsOpFuse + IBS_PREVENTHOST // Disallowing IBS use by the host supported + IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 + IDPRED_CTRL // IPRED_DIS + INT_WBINVD // WBINVD/WBNOINVD are interruptible. + INVLPGB // NVLPGB and TLBSYNC instruction supported + KEYLOCKER // Key locker + KEYLOCKERW // Key locker wide + LAHF // LAHF/SAHF in long mode + LAM // If set, CPU supports Linear Address Masking + LBRVIRT // LBR virtualization + LZCNT // LZCNT instruction + MCAOVERFLOW // MCA overflow recovery support. + MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. + MCOMMIT // MCOMMIT instruction supported + MD_CLEAR // VERW clears CPU buffers + MMX // standard MMX + MMXEXT // SSE integer functions or AMD MMX ext + MOVBE // MOVBE instruction (big-endian) + MOVDIR64B // Move 64 Bytes as Direct Store + MOVDIRI // Move Doubleword as Direct Store + MOVSB_ZL // Fast Zero-Length MOVSB + MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD + MPX // Intel MPX (Memory Protection Extensions) + MSRIRC // Instruction Retired Counter MSR available + MSRLIST // Read/Write List of Model Specific Registers + MSR_PAGEFLUSH // Page Flush MSR available + NRIPS // Indicates support for NRIP save on VMEXIT + NX // NX (No-Execute) bit + OSXSAVE // XSAVE enabled by OS + PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption + POPCNT // POPCNT instruction + PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled + PREFETCHI // PREFETCHIT0/1 instructions + PSFD // Predictive Store Forward Disable + RDPRU // RDPRU instruction supported + RDRAND // RDRAND instruction is available + RDSEED // RDSEED instruction is available + RDTSCP // RDTSCP Instruction + RRSBA_CTRL // Restricted RSB Alternate + RTM // Restricted Transactional Memory + RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. + SBPB // Indicates support for the Selective Branch Predictor Barrier + SERIALIZE // Serialize Instruction Execution + SEV // AMD Secure Encrypted Virtualization supported + SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host + SEV_ALTERNATIVE // AMD SEV Alternate Injection supported + SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests + SEV_ES // AMD SEV Encrypted State supported + SEV_RESTRICTED // AMD SEV Restricted Injection supported + SEV_SNP // AMD SEV Secure Nested Paging supported + SGX // Software Guard Extensions + SGXLC // Software Guard Extensions Launch Control + SHA // Intel SHA Extensions + SME // AMD Secure Memory Encryption supported + SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced + SPEC_CTRL_SSBD // Speculative Store Bypass Disable + SRBDS_CTRL // SRBDS mitigation MSR available + SRSO_MSR_FIX // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO. + SRSO_NO // Indicates the CPU is not subject to the SRSO vulnerability + SRSO_USER_KERNEL_NO // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries + SSE // SSE functions + SSE2 // P4 SSE functions + SSE3 // Prescott SSE3 functions + SSE4 // Penryn SSE4.1 functions + SSE42 // Nehalem SSE4.2 functions + SSE4A // AMD Barcelona microarchitecture SSE4a instructions + SSSE3 // Conroe SSSE3 functions + STIBP // Single Thread Indirect Branch Predictors + STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On + STOSB_SHORT // Fast short STOSB + SUCCOR // Software uncorrectable error containment and recovery capability. + SVM // AMD Secure Virtual Machine + SVMDA // Indicates support for the SVM decode assists. + SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control + SVML // AMD SVM lock. Indicates support for SVM-Lock. + SVMNP // AMD SVM nested paging + SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter + SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold + SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. + SYSEE // SYSENTER and SYSEXIT instructions + TBM // AMD Trailing Bit Manipulation + TDX_GUEST // Intel Trust Domain Extensions Guest + TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations + TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. + TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 + TSXLDTRK // Intel TSX Suspend Load Address Tracking + VAES // Vector AES. AVX(512) versions requires additional checks. + VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. + VMPL // AMD VM Permission Levels supported + VMSA_REGPROT // AMD VMSA Register Protection supported + VMX // Virtual Machine Extensions + VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. + VTE // AMD Virtual Transparent Encryption supported + WAITPKG // TPAUSE, UMONITOR, UMWAIT + WBNOINVD // Write Back and Do Not Invalidate Cache + WRMSRNS // Non-Serializing Write to Model Specific Register + X87 // FPU + XGETBV1 // Supports XGETBV with ECX = 1 + XOP // Bulldozer XOP functions + XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV + XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. + XSAVEOPT // XSAVEOPT available + XSAVES // Supports XSAVES/XRSTORS and IA32_XSS // ARM features: AESARM // AES instructions @@ -302,9 +315,11 @@ type CPUInfo struct { L2 int // L2 Cache (per core or shared). Will be -1 if undetected L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected } - SGX SGXSupport - maxFunc uint32 - maxExFunc uint32 + SGX SGXSupport + AMDMemEncryption AMDMemEncryptionSupport + AVX10Level uint8 + maxFunc uint32 + maxExFunc uint32 } var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) @@ -1071,6 +1086,32 @@ func hasSGX(available, lc bool) (rval SGXSupport) { return } +type AMDMemEncryptionSupport struct { + Available bool + CBitPossition uint32 + NumVMPL uint32 + PhysAddrReduction uint32 + NumEntryptedGuests uint32 + MinSevNoEsAsid uint32 +} + +func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) { + rval.Available = available + if !available { + return + } + + _, b, c, d := cpuidex(0x8000001f, 0) + + rval.CBitPossition = b & 0x3f + rval.PhysAddrReduction = (b >> 6) & 0x3F + rval.NumVMPL = (b >> 12) & 0xf + rval.NumEntryptedGuests = c + rval.MinSevNoEsAsid = d + + return +} + func support() flagSet { var fs flagSet mfi := maxFunctionID() @@ -1165,6 +1206,7 @@ func support() flagSet { fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) fs.setIf(ecx&(1<<13) != 0, TME) fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) + fs.setIf(ecx&(1<<23) != 0, KEYLOCKER) fs.setIf(ecx&(1<<27) != 0, MOVDIRI) fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) fs.setIf(ecx&(1<<29) != 0, ENQCMD) @@ -1201,7 +1243,10 @@ func support() flagSet { // CPUID.(EAX=7, ECX=1).EDX fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8) fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT) + fs.setIf(edx1&(1<<10) != 0, AVXVNNIINT16) fs.setIf(edx1&(1<<14) != 0, PREFETCHI) + fs.setIf(edx1&(1<<19) != 0, AVX10) + fs.setIf(edx1&(1<<21) != 0, APX_F) // Only detect AVX-512 features if XGETBV is supported if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { @@ -1252,6 +1297,19 @@ func support() flagSet { fs.setIf(edx&(1<<4) != 0, BHI_CTRL) fs.setIf(edx&(1<<5) != 0, MCDT_NO) + // Add keylocker features. + if fs.inSet(KEYLOCKER) && mfi >= 0x19 { + _, ebx, _, _ := cpuidex(0x19, 0) + fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4) + } + + // Add AVX10 features. + if fs.inSet(AVX10) && mfi >= 0x24 { + _, ebx, _, _ := cpuidex(0x24, 0) + fs.setIf(ebx&(1<<16) != 0, AVX10_128) + fs.setIf(ebx&(1<<17) != 0, AVX10_256) + fs.setIf(ebx&(1<<18) != 0, AVX10_512) + } } // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) @@ -1394,6 +1452,29 @@ func support() flagSet { fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) } + if maxExtendedFunction() >= 0x80000021 && vend == AMD { + a, _, _, _ := cpuid(0x80000021) + fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX) + fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO) + fs.setIf((a>>29)&1 == 1, SRSO_NO) + fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE) + fs.setIf((a>>27)&1 == 1, SBPB) + } + + if mfi >= 0x20 { + // Microsoft has decided to purposefully hide the information + // of the guest TEE when VMs are being created using Hyper-V. + // + // This leads us to check for the Hyper-V cpuid features + // (0x4000000C), and then for the `ebx` value set. + // + // For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part + // we're mostly interested about,according to: + // https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174 + _, ebx, _, _ := cpuid(0x4000000C) + fs.setIf(ebx == 0xbe3, TDX_GUEST) + } + if mfi >= 0x21 { // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21). _, ebx, ecx, edx := cpuid(0x21) @@ -1404,6 +1485,14 @@ func support() flagSet { return fs } +func (c *CPUInfo) supportAVX10() uint8 { + if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) { + _, ebx, _, _ := cpuidex(0x24, 0) + return uint8(ebx) + } + return 0 +} + func valAsString(values ...uint32) []byte { r := make([]byte, 4*len(values)) for i, v := range values { diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go index c946824e..799b400c 100644 --- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go +++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go @@ -27,10 +27,12 @@ func addInfo(c *CPUInfo, safe bool) { c.Family, c.Model, c.Stepping = familyModel() c.featureSet = support() c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC)) + c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV)) c.ThreadsPerCore = threadsPerCore() c.LogicalCores = logicalCores() c.PhysicalCores = physicalCores() c.VendorID, c.VendorString = vendorID() + c.AVX10Level = c.supportAVX10() c.cacheSize() c.frequencies() } diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go index 024c706a..3a256031 100644 --- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go +++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go @@ -16,210 +16,223 @@ func _() { _ = x[AMXFP16-6] _ = x[AMXINT8-7] _ = x[AMXTILE-8] - _ = x[AVX-9] - _ = x[AVX2-10] - _ = x[AVX512BF16-11] - _ = x[AVX512BITALG-12] - _ = x[AVX512BW-13] - _ = x[AVX512CD-14] - _ = x[AVX512DQ-15] - _ = x[AVX512ER-16] - _ = x[AVX512F-17] - _ = x[AVX512FP16-18] - _ = x[AVX512IFMA-19] - _ = x[AVX512PF-20] - _ = x[AVX512VBMI-21] - _ = x[AVX512VBMI2-22] - _ = x[AVX512VL-23] - _ = x[AVX512VNNI-24] - _ = x[AVX512VP2INTERSECT-25] - _ = x[AVX512VPOPCNTDQ-26] - _ = x[AVXIFMA-27] - _ = x[AVXNECONVERT-28] - _ = x[AVXSLOW-29] - _ = x[AVXVNNI-30] - _ = x[AVXVNNIINT8-31] - _ = x[BHI_CTRL-32] - _ = x[BMI1-33] - _ = x[BMI2-34] - _ = x[CETIBT-35] - _ = x[CETSS-36] - _ = x[CLDEMOTE-37] - _ = x[CLMUL-38] - _ = x[CLZERO-39] - _ = x[CMOV-40] - _ = x[CMPCCXADD-41] - _ = x[CMPSB_SCADBS_SHORT-42] - _ = x[CMPXCHG8-43] - _ = x[CPBOOST-44] - _ = x[CPPC-45] - _ = x[CX16-46] - _ = x[EFER_LMSLE_UNS-47] - _ = x[ENQCMD-48] - _ = x[ERMS-49] - _ = x[F16C-50] - _ = x[FLUSH_L1D-51] - _ = x[FMA3-52] - _ = x[FMA4-53] - _ = x[FP128-54] - _ = x[FP256-55] - _ = x[FSRM-56] - _ = x[FXSR-57] - _ = x[FXSROPT-58] - _ = x[GFNI-59] - _ = x[HLE-60] - _ = x[HRESET-61] - _ = x[HTT-62] - _ = x[HWA-63] - _ = x[HYBRID_CPU-64] - _ = x[HYPERVISOR-65] - _ = x[IA32_ARCH_CAP-66] - _ = x[IA32_CORE_CAP-67] - _ = x[IBPB-68] - _ = x[IBRS-69] - _ = x[IBRS_PREFERRED-70] - _ = x[IBRS_PROVIDES_SMP-71] - _ = x[IBS-72] - _ = x[IBSBRNTRGT-73] - _ = x[IBSFETCHSAM-74] - _ = x[IBSFFV-75] - _ = x[IBSOPCNT-76] - _ = x[IBSOPCNTEXT-77] - _ = x[IBSOPSAM-78] - _ = x[IBSRDWROPCNT-79] - _ = x[IBSRIPINVALIDCHK-80] - _ = x[IBS_FETCH_CTLX-81] - _ = x[IBS_OPDATA4-82] - _ = x[IBS_OPFUSE-83] - _ = x[IBS_PREVENTHOST-84] - _ = x[IBS_ZEN4-85] - _ = x[IDPRED_CTRL-86] - _ = x[INT_WBINVD-87] - _ = x[INVLPGB-88] - _ = x[LAHF-89] - _ = x[LAM-90] - _ = x[LBRVIRT-91] - _ = x[LZCNT-92] - _ = x[MCAOVERFLOW-93] - _ = x[MCDT_NO-94] - _ = x[MCOMMIT-95] - _ = x[MD_CLEAR-96] - _ = x[MMX-97] - _ = x[MMXEXT-98] - _ = x[MOVBE-99] - _ = x[MOVDIR64B-100] - _ = x[MOVDIRI-101] - _ = x[MOVSB_ZL-102] - _ = x[MOVU-103] - _ = x[MPX-104] - _ = x[MSRIRC-105] - _ = x[MSRLIST-106] - _ = x[MSR_PAGEFLUSH-107] - _ = x[NRIPS-108] - _ = x[NX-109] - _ = x[OSXSAVE-110] - _ = x[PCONFIG-111] - _ = x[POPCNT-112] - _ = x[PPIN-113] - _ = x[PREFETCHI-114] - _ = x[PSFD-115] - _ = x[RDPRU-116] - _ = x[RDRAND-117] - _ = x[RDSEED-118] - _ = x[RDTSCP-119] - _ = x[RRSBA_CTRL-120] - _ = x[RTM-121] - _ = x[RTM_ALWAYS_ABORT-122] - _ = x[SERIALIZE-123] - _ = x[SEV-124] - _ = x[SEV_64BIT-125] - _ = x[SEV_ALTERNATIVE-126] - _ = x[SEV_DEBUGSWAP-127] - _ = x[SEV_ES-128] - _ = x[SEV_RESTRICTED-129] - _ = x[SEV_SNP-130] - _ = x[SGX-131] - _ = x[SGXLC-132] - _ = x[SHA-133] - _ = x[SME-134] - _ = x[SME_COHERENT-135] - _ = x[SPEC_CTRL_SSBD-136] - _ = x[SRBDS_CTRL-137] - _ = x[SSE-138] - _ = x[SSE2-139] - _ = x[SSE3-140] - _ = x[SSE4-141] - _ = x[SSE42-142] - _ = x[SSE4A-143] - _ = x[SSSE3-144] - _ = x[STIBP-145] - _ = x[STIBP_ALWAYSON-146] - _ = x[STOSB_SHORT-147] - _ = x[SUCCOR-148] - _ = x[SVM-149] - _ = x[SVMDA-150] - _ = x[SVMFBASID-151] - _ = x[SVML-152] - _ = x[SVMNP-153] - _ = x[SVMPF-154] - _ = x[SVMPFT-155] - _ = x[SYSCALL-156] - _ = x[SYSEE-157] - _ = x[TBM-158] - _ = x[TDX_GUEST-159] - _ = x[TLB_FLUSH_NESTED-160] - _ = x[TME-161] - _ = x[TOPEXT-162] - _ = x[TSCRATEMSR-163] - _ = x[TSXLDTRK-164] - _ = x[VAES-165] - _ = x[VMCBCLEAN-166] - _ = x[VMPL-167] - _ = x[VMSA_REGPROT-168] - _ = x[VMX-169] - _ = x[VPCLMULQDQ-170] - _ = x[VTE-171] - _ = x[WAITPKG-172] - _ = x[WBNOINVD-173] - _ = x[WRMSRNS-174] - _ = x[X87-175] - _ = x[XGETBV1-176] - _ = x[XOP-177] - _ = x[XSAVE-178] - _ = x[XSAVEC-179] - _ = x[XSAVEOPT-180] - _ = x[XSAVES-181] - _ = x[AESARM-182] - _ = x[ARMCPUID-183] - _ = x[ASIMD-184] - _ = x[ASIMDDP-185] - _ = x[ASIMDHP-186] - _ = x[ASIMDRDM-187] - _ = x[ATOMICS-188] - _ = x[CRC32-189] - _ = x[DCPOP-190] - _ = x[EVTSTRM-191] - _ = x[FCMA-192] - _ = x[FP-193] - _ = x[FPHP-194] - _ = x[GPA-195] - _ = x[JSCVT-196] - _ = x[LRCPC-197] - _ = x[PMULL-198] - _ = x[SHA1-199] - _ = x[SHA2-200] - _ = x[SHA3-201] - _ = x[SHA512-202] - _ = x[SM3-203] - _ = x[SM4-204] - _ = x[SVE-205] - _ = x[lastID-206] + _ = x[APX_F-9] + _ = x[AVX-10] + _ = x[AVX10-11] + _ = x[AVX10_128-12] + _ = x[AVX10_256-13] + _ = x[AVX10_512-14] + _ = x[AVX2-15] + _ = x[AVX512BF16-16] + _ = x[AVX512BITALG-17] + _ = x[AVX512BW-18] + _ = x[AVX512CD-19] + _ = x[AVX512DQ-20] + _ = x[AVX512ER-21] + _ = x[AVX512F-22] + _ = x[AVX512FP16-23] + _ = x[AVX512IFMA-24] + _ = x[AVX512PF-25] + _ = x[AVX512VBMI-26] + _ = x[AVX512VBMI2-27] + _ = x[AVX512VL-28] + _ = x[AVX512VNNI-29] + _ = x[AVX512VP2INTERSECT-30] + _ = x[AVX512VPOPCNTDQ-31] + _ = x[AVXIFMA-32] + _ = x[AVXNECONVERT-33] + _ = x[AVXSLOW-34] + _ = x[AVXVNNI-35] + _ = x[AVXVNNIINT8-36] + _ = x[AVXVNNIINT16-37] + _ = x[BHI_CTRL-38] + _ = x[BMI1-39] + _ = x[BMI2-40] + _ = x[CETIBT-41] + _ = x[CETSS-42] + _ = x[CLDEMOTE-43] + _ = x[CLMUL-44] + _ = x[CLZERO-45] + _ = x[CMOV-46] + _ = x[CMPCCXADD-47] + _ = x[CMPSB_SCADBS_SHORT-48] + _ = x[CMPXCHG8-49] + _ = x[CPBOOST-50] + _ = x[CPPC-51] + _ = x[CX16-52] + _ = x[EFER_LMSLE_UNS-53] + _ = x[ENQCMD-54] + _ = x[ERMS-55] + _ = x[F16C-56] + _ = x[FLUSH_L1D-57] + _ = x[FMA3-58] + _ = x[FMA4-59] + _ = x[FP128-60] + _ = x[FP256-61] + _ = x[FSRM-62] + _ = x[FXSR-63] + _ = x[FXSROPT-64] + _ = x[GFNI-65] + _ = x[HLE-66] + _ = x[HRESET-67] + _ = x[HTT-68] + _ = x[HWA-69] + _ = x[HYBRID_CPU-70] + _ = x[HYPERVISOR-71] + _ = x[IA32_ARCH_CAP-72] + _ = x[IA32_CORE_CAP-73] + _ = x[IBPB-74] + _ = x[IBPB_BRTYPE-75] + _ = x[IBRS-76] + _ = x[IBRS_PREFERRED-77] + _ = x[IBRS_PROVIDES_SMP-78] + _ = x[IBS-79] + _ = x[IBSBRNTRGT-80] + _ = x[IBSFETCHSAM-81] + _ = x[IBSFFV-82] + _ = x[IBSOPCNT-83] + _ = x[IBSOPCNTEXT-84] + _ = x[IBSOPSAM-85] + _ = x[IBSRDWROPCNT-86] + _ = x[IBSRIPINVALIDCHK-87] + _ = x[IBS_FETCH_CTLX-88] + _ = x[IBS_OPDATA4-89] + _ = x[IBS_OPFUSE-90] + _ = x[IBS_PREVENTHOST-91] + _ = x[IBS_ZEN4-92] + _ = x[IDPRED_CTRL-93] + _ = x[INT_WBINVD-94] + _ = x[INVLPGB-95] + _ = x[KEYLOCKER-96] + _ = x[KEYLOCKERW-97] + _ = x[LAHF-98] + _ = x[LAM-99] + _ = x[LBRVIRT-100] + _ = x[LZCNT-101] + _ = x[MCAOVERFLOW-102] + _ = x[MCDT_NO-103] + _ = x[MCOMMIT-104] + _ = x[MD_CLEAR-105] + _ = x[MMX-106] + _ = x[MMXEXT-107] + _ = x[MOVBE-108] + _ = x[MOVDIR64B-109] + _ = x[MOVDIRI-110] + _ = x[MOVSB_ZL-111] + _ = x[MOVU-112] + _ = x[MPX-113] + _ = x[MSRIRC-114] + _ = x[MSRLIST-115] + _ = x[MSR_PAGEFLUSH-116] + _ = x[NRIPS-117] + _ = x[NX-118] + _ = x[OSXSAVE-119] + _ = x[PCONFIG-120] + _ = x[POPCNT-121] + _ = x[PPIN-122] + _ = x[PREFETCHI-123] + _ = x[PSFD-124] + _ = x[RDPRU-125] + _ = x[RDRAND-126] + _ = x[RDSEED-127] + _ = x[RDTSCP-128] + _ = x[RRSBA_CTRL-129] + _ = x[RTM-130] + _ = x[RTM_ALWAYS_ABORT-131] + _ = x[SBPB-132] + _ = x[SERIALIZE-133] + _ = x[SEV-134] + _ = x[SEV_64BIT-135] + _ = x[SEV_ALTERNATIVE-136] + _ = x[SEV_DEBUGSWAP-137] + _ = x[SEV_ES-138] + _ = x[SEV_RESTRICTED-139] + _ = x[SEV_SNP-140] + _ = x[SGX-141] + _ = x[SGXLC-142] + _ = x[SHA-143] + _ = x[SME-144] + _ = x[SME_COHERENT-145] + _ = x[SPEC_CTRL_SSBD-146] + _ = x[SRBDS_CTRL-147] + _ = x[SRSO_MSR_FIX-148] + _ = x[SRSO_NO-149] + _ = x[SRSO_USER_KERNEL_NO-150] + _ = x[SSE-151] + _ = x[SSE2-152] + _ = x[SSE3-153] + _ = x[SSE4-154] + _ = x[SSE42-155] + _ = x[SSE4A-156] + _ = x[SSSE3-157] + _ = x[STIBP-158] + _ = x[STIBP_ALWAYSON-159] + _ = x[STOSB_SHORT-160] + _ = x[SUCCOR-161] + _ = x[SVM-162] + _ = x[SVMDA-163] + _ = x[SVMFBASID-164] + _ = x[SVML-165] + _ = x[SVMNP-166] + _ = x[SVMPF-167] + _ = x[SVMPFT-168] + _ = x[SYSCALL-169] + _ = x[SYSEE-170] + _ = x[TBM-171] + _ = x[TDX_GUEST-172] + _ = x[TLB_FLUSH_NESTED-173] + _ = x[TME-174] + _ = x[TOPEXT-175] + _ = x[TSCRATEMSR-176] + _ = x[TSXLDTRK-177] + _ = x[VAES-178] + _ = x[VMCBCLEAN-179] + _ = x[VMPL-180] + _ = x[VMSA_REGPROT-181] + _ = x[VMX-182] + _ = x[VPCLMULQDQ-183] + _ = x[VTE-184] + _ = x[WAITPKG-185] + _ = x[WBNOINVD-186] + _ = x[WRMSRNS-187] + _ = x[X87-188] + _ = x[XGETBV1-189] + _ = x[XOP-190] + _ = x[XSAVE-191] + _ = x[XSAVEC-192] + _ = x[XSAVEOPT-193] + _ = x[XSAVES-194] + _ = x[AESARM-195] + _ = x[ARMCPUID-196] + _ = x[ASIMD-197] + _ = x[ASIMDDP-198] + _ = x[ASIMDHP-199] + _ = x[ASIMDRDM-200] + _ = x[ATOMICS-201] + _ = x[CRC32-202] + _ = x[DCPOP-203] + _ = x[EVTSTRM-204] + _ = x[FCMA-205] + _ = x[FP-206] + _ = x[FPHP-207] + _ = x[GPA-208] + _ = x[JSCVT-209] + _ = x[LRCPC-210] + _ = x[PMULL-211] + _ = x[SHA1-212] + _ = x[SHA2-213] + _ = x[SHA3-214] + _ = x[SHA512-215] + _ = x[SM3-216] + _ = x[SM4-217] + _ = x[SVE-218] + _ = x[lastID-219] _ = x[firstID-0] } -const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" +const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8AVXVNNIINT16BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" -var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 282, 286, 290, 296, 301, 309, 314, 320, 324, 333, 351, 359, 366, 370, 374, 388, 394, 398, 402, 411, 415, 419, 424, 429, 433, 437, 444, 448, 451, 457, 460, 463, 473, 483, 496, 509, 513, 517, 531, 548, 551, 561, 572, 578, 586, 597, 605, 617, 633, 647, 658, 668, 683, 691, 702, 712, 719, 723, 726, 733, 738, 749, 756, 763, 771, 774, 780, 785, 794, 801, 809, 813, 816, 822, 829, 842, 847, 849, 856, 863, 869, 873, 882, 886, 891, 897, 903, 909, 919, 922, 938, 947, 950, 959, 974, 987, 993, 1007, 1014, 1017, 1022, 1025, 1028, 1040, 1054, 1064, 1067, 1071, 1075, 1079, 1084, 1089, 1094, 1099, 1113, 1124, 1130, 1133, 1138, 1147, 1151, 1156, 1161, 1167, 1174, 1179, 1182, 1191, 1207, 1210, 1216, 1226, 1234, 1238, 1247, 1251, 1263, 1266, 1276, 1279, 1286, 1294, 1301, 1304, 1311, 1314, 1319, 1325, 1333, 1339, 1345, 1353, 1358, 1365, 1372, 1380, 1387, 1392, 1397, 1404, 1408, 1410, 1414, 1417, 1422, 1427, 1432, 1436, 1440, 1444, 1450, 1453, 1456, 1459, 1465} +var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 323, 331, 335, 339, 345, 350, 358, 363, 369, 373, 382, 400, 408, 415, 419, 423, 437, 443, 447, 451, 460, 464, 468, 473, 478, 482, 486, 493, 497, 500, 506, 509, 512, 522, 532, 545, 558, 562, 573, 577, 591, 608, 611, 621, 632, 638, 646, 657, 665, 677, 693, 707, 718, 728, 743, 751, 762, 772, 779, 788, 798, 802, 805, 812, 817, 828, 835, 842, 850, 853, 859, 864, 873, 880, 888, 892, 895, 901, 908, 921, 926, 928, 935, 942, 948, 952, 961, 965, 970, 976, 982, 988, 998, 1001, 1017, 1021, 1030, 1033, 1042, 1057, 1070, 1076, 1090, 1097, 1100, 1105, 1108, 1111, 1123, 1137, 1147, 1159, 1166, 1185, 1188, 1192, 1196, 1200, 1205, 1210, 1215, 1220, 1234, 1245, 1251, 1254, 1259, 1268, 1272, 1277, 1282, 1288, 1295, 1300, 1303, 1312, 1328, 1331, 1337, 1347, 1355, 1359, 1368, 1372, 1384, 1387, 1397, 1400, 1407, 1415, 1422, 1425, 1432, 1435, 1440, 1446, 1454, 1460, 1466, 1474, 1479, 1486, 1493, 1501, 1508, 1513, 1518, 1525, 1529, 1531, 1535, 1538, 1543, 1548, 1553, 1557, 1561, 1565, 1571, 1574, 1577, 1580, 1586} func (i FeatureID) String() string { if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { diff --git a/vendor/github.com/klauspost/reedsolomon/README.md b/vendor/github.com/klauspost/reedsolomon/README.md index e9c148ff..d94512a3 100644 --- a/vendor/github.com/klauspost/reedsolomon/README.md +++ b/vendor/github.com/klauspost/reedsolomon/README.md @@ -25,6 +25,10 @@ Using Go modules is recommended. # Changes +## 2024 + + * Auto-generation of SVE and NEON routines for ARM based on AVX2 code. This results in a speedup of 2x for SVE (as measured using Graviton 3 on AWS) and a speedup of 1.5x as compared to the existing NEON-accelerated code. + ## 2022 * [GFNI](https://github.com/klauspost/reedsolomon/pull/224) support for amd64, for up to 3x faster processing. @@ -534,6 +538,21 @@ BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x ``` +# Legal + +> None of section below is legal advice. Seek your own legal counsel. +> As stated by the [LICENSE](LICENSE) the authors will not be held reliable for any use of this library. +> Users are encouraged to independently verify they comply with all legal requirements. + +As can be seen in [recent news](https://www.datanami.com/2023/10/16/cloudera-hit-with-240-million-judgement-over-erasure-coding/) +there has been lawsuits related to possible patents of aspects of erasure coding functionality. + +As a possible mitigation it is possible to use the tag `nopshufb` when compiling any code which includes this package. +This will remove all inclusion and use of `PSHUFB` and equivalent on other platforms. + +This is done by adding `-tags=nopshufb` to `go build` and similar commands that produce binary output. + +The removed code may not be infringing and even after `-tags=nopshufb` there may still be infringing code left. # Links * [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/). @@ -543,8 +562,9 @@ BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x * [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance. * [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation. * [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests. -* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations. +* [Screaming Fast Galois Field Arithmetic](https://www.snia.org/sites/default/files/files2/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations. * [Leopard-RS](https://github.com/catid/leopard) C library used as basis for GF16 implementation. +* [reed-solomon-simd](https://github.com/AndersTrier/reed-solomon-simd) Leopard-RS Rust implementation. # License diff --git a/vendor/github.com/klauspost/reedsolomon/galois.go b/vendor/github.com/klauspost/reedsolomon/galois.go index 479fa447..bbc521f4 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois.go +++ b/vendor/github.com/klauspost/reedsolomon/galois.go @@ -62,21 +62,17 @@ var logTable = [fieldSize]byte{ /** * Inverse of the logarithm table. Maps integer logarithms - * to members of the field. There is no entry for 255 - * because the highest log is 254. + * to members of the field. Entry 255 is the same as entry 0 sue to mod 255. * * This table was generated by `go run gentables.go` + * Table has been truncated to 256 bytes, since no lookups are bigger. */ -var expTable = []byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e} +var expTable = [256]byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x1} func galAdd(a, b byte) byte { return a ^ b } -func galSub(a, b byte) byte { - return a ^ b -} - // Table from https://github.com/templexxx/reedsolomon var invTable = [256]byte{0x0, 0x1, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d, 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58, 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a, 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89, 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9, 0xab, 0xc, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, 0xa6, 0x4, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93, 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1, 0xcb, 0x63, 0x97, 0xe, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b, 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79, 0xdb, 0x77, 0x6, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54, 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d, 0x53, 0x69, 0x2, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, 0xf, 0x5c, 0xb, 0xdc, 0xbd, 0x94, 0xac, 0x9, 0xc7, 0xa2, 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x5, 0xce, 0x3b, 0xd, 0x3c, 0x9c, 0x8, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b, 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x7, 0x7b, 0x95, 0x9a, 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e, 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71, 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78, 0x99, 0xa, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a, 0xe3, 0xe7, 0xb5, 0xea, 0x3, 0x8f, 0xd3, 0xc9, 0x42, 0xd4, 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd} @@ -883,6 +879,15 @@ func galDivide(a, b byte) byte { if logResult < 0 { logResult += 255 } + return expTable[uint8(logResult)] +} + +// galOneOver is the same as galDivide(1, a). +func galOneOver(a byte) byte { + if a == 0 { + panic("Argument 'divisor' is 0") + } + logResult := logTable[a] ^ 255 return expTable[logResult] } @@ -902,17 +907,17 @@ func galExp(a byte, n int) byte { for logResult >= 255 { logResult -= 255 } - return expTable[logResult] + return expTable[uint8(logResult)] } -func genAvx2Matrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []byte) []byte { - if !avx2CodeGen { +func genCodeGenMatrix(matrixRows [][]byte, inputs, inIdx, outputs, vectorLength int, dst []byte) []byte { + if !codeGen { panic("codegen not enabled") } total := inputs * outputs // Duplicated in+out - wantBytes := total * 32 * 2 + wantBytes := total * vectorLength * 2 if cap(dst) < wantBytes { dst = AllocAligned(1, wantBytes)[0] } else { @@ -920,15 +925,16 @@ func genAvx2Matrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []byte) } for i, row := range matrixRows[:outputs] { for j, idx := range row[inIdx : inIdx+inputs] { - dstIdx := (j*outputs + i) * 64 + dstIdx := (j*outputs + i) * vectorLength * 2 dstPart := dst[dstIdx:] - dstPart = dstPart[:64] + dstPart = dstPart[:vectorLength*2] lo := mulTableLow[idx][:] hi := mulTableHigh[idx][:] - copy(dstPart[:16], lo) - copy(dstPart[16:32], lo) - copy(dstPart[32:48], hi) - copy(dstPart[48:64], hi) + + for k := 0; k < vectorLength; k += 16 { + copy(dstPart[k:k+16], lo) + copy(dstPart[vectorLength*2-(k+16):vectorLength*2-k], hi) + } } } return dst @@ -937,7 +943,7 @@ func genAvx2Matrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []byte) var gf2p811dMulMatrices = [256]uint64{0, 0x102040810204080, 0x8001828488102040, 0x8103868c983060c0, 0x408041c2c4881020, 0x418245cad4a850a0, 0xc081c3464c983060, 0xc183c74e5cb870e0, 0x2040a061e2c48810, 0x2142a469f2e4c890, 0xa04122e56ad4a850, 0xa14326ed7af4e8d0, 0x60c0e1a3264c9830, 0x61c2e5ab366cd8b0, 0xe0c16327ae5cb870, 0xe1c3672fbe7cf8f0, 0x102050b071e2c488, 0x112254b861c28408, 0x9021d234f9f2e4c8, 0x9123d63ce9d2a448, 0x50a01172b56ad4a8, 0x51a2157aa54a9428, 0xd0a193f63d7af4e8, 0xd1a397fe2d5ab468, 0x3060f0d193264c98, 0x3162f4d983060c18, 0xb06172551b366cd8, 0xb163765d0b162c58, 0x70e0b11357ae5cb8, 0x71e2b51b478e1c38, 0xf0e13397dfbe7cf8, 0xf1e3379fcf9e3c78, 0x8810a8d83871e2c4, 0x8912acd02851a244, 0x8112a5cb061c284, 0x9132e54a0418204, 0xc890e91afcf9f2e4, 0xc992ed12ecd9b264, 0x48916b9e74e9d2a4, 0x49936f9664c99224, 0xa85008b9dab56ad4, 0xa9520cb1ca952a54, 0x28518a3d52a54a94, 0x29538e3542850a14, 0xe8d0497b1e3d7af4, 0xe9d24d730e1d3a74, 0x68d1cbff962d5ab4, 0x69d3cff7860d1a34, 0x9830f8684993264c, 0x9932fc6059b366cc, 0x18317aecc183060c, 0x19337ee4d1a3468c, 0xd8b0b9aa8d1b366c, 0xd9b2bda29d3b76ec, 0x58b13b2e050b162c, 0x59b33f26152b56ac, 0xb8705809ab57ae5c, 0xb9725c01bb77eedc, 0x3871da8d23478e1c, 0x3973de853367ce9c, 0xf8f019cb6fdfbe7c, 0xf9f21dc37ffffefc, 0x78f19b4fe7cf9e3c, 0x79f39f47f7efdebc, 0xc488d46c1c3871e2, 0xc58ad0640c183162, 0x448956e8942851a2, 0x458b52e084081122, 0x840895aed8b061c2, 0x850a91a6c8902142, 0x409172a50a04182, 0x50b132240800102, 0xe4c8740dfefcf9f2, 0xe5ca7005eedcb972, 0x64c9f68976ecd9b2, 0x65cbf28166cc9932, 0xa44835cf3a74e9d2, 0xa54a31c72a54a952, 0x2449b74bb264c992, 0x254bb343a2448912, 0xd4a884dc6ddab56a, 0xd5aa80d47dfaf5ea, 0x54a90658e5ca952a, 0x55ab0250f5ead5aa, 0x9428c51ea952a54a, 0x952ac116b972e5ca, 0x1429479a2142850a, 0x152b43923162c58a, 0xf4e824bd8f1e3d7a, 0xf5ea20b59f3e7dfa, 0x74e9a639070e1d3a, 0x75eba231172e5dba, 0xb468657f4b962d5a, 0xb56a61775bb66dda, 0x3469e7fbc3860d1a, 0x356be3f3d3a64d9a, 0x4c987cb424499326, 0x4d9a78bc3469d3a6, 0xcc99fe30ac59b366, 0xcd9bfa38bc79f3e6, 0xc183d76e0c18306, 0xd1a397ef0e1c386, 0x8c19bff268d1a346, 0x8d1bbbfa78f1e3c6, 0x6cd8dcd5c68d1b36, 0x6ddad8ddd6ad5bb6, 0xecd95e514e9d3b76, 0xeddb5a595ebd7bf6, 0x2c589d1702050b16, 0x2d5a991f12254b96, 0xac591f938a152b56, 0xad5b1b9b9a356bd6, 0x5cb82c0455ab57ae, 0x5dba280c458b172e, 0xdcb9ae80ddbb77ee, 0xddbbaa88cd9b376e, 0x1c386dc69123478e, 0x1d3a69ce8103070e, 0x9c39ef42193367ce, 0x9d3beb4a0913274e, 0x7cf88c65b76fdfbe, 0x7dfa886da74f9f3e, 0xfcf90ee13f7ffffe, 0xfdfb0ae92f5fbf7e, 0x3c78cda773e7cf9e, 0x3d7ac9af63c78f1e, 0xbc794f23fbf7efde, 0xbd7b4b2bebd7af5e, 0xe2c46a368e1c3871, 0xe3c66e3e9e3c78f1, 0x62c5e8b2060c1831, 0x63c7ecba162c58b1, 0xa2442bf44a942851, 0xa3462ffc5ab468d1, 0x2245a970c2840811, 0x2347ad78d2a44891, 0xc284ca576cd8b061, 0xc386ce5f7cf8f0e1, 0x428548d3e4c89021, 0x43874cdbf4e8d0a1, 0x82048b95a850a041, 0x83068f9db870e0c1, 0x205091120408001, 0x3070d193060c081, 0xf2e43a86fffefcf9, 0xf3e63e8eefdebc79, 0x72e5b80277eedcb9, 0x73e7bc0a67ce9c39, 0xb2647b443b76ecd9, 0xb3667f4c2b56ac59, 0x3265f9c0b366cc99, 0x3367fdc8a3468c19, 0xd2a49ae71d3a74e9, 0xd3a69eef0d1a3469, 0x52a51863952a54a9, 0x53a71c6b850a1429, 0x9224db25d9b264c9, 0x9326df2dc9922449, 0x122559a151a24489, 0x13275da941820409, 0x6ad4c2eeb66ddab5, 0x6bd6c6e6a64d9a35, 0xead5406a3e7dfaf5, 0xebd744622e5dba75, 0x2a54832c72e5ca95, 0x2b56872462c58a15, 0xaa5501a8faf5ead5, 0xab5705a0ead5aa55, 0x4a94628f54a952a5, 0x4b96668744891225, 0xca95e00bdcb972e5, 0xcb97e403cc993265, 0xa14234d90214285, 0xb16274580010205, 0x8a15a1c9183162c5, 0x8b17a5c108112245, 0x7af4925ec78f1e3d, 0x7bf69656d7af5ebd, 0xfaf510da4f9f3e7d, 0xfbf714d25fbf7efd, 0x3a74d39c03070e1d, 0x3b76d79413274e9d, 0xba7551188b172e5d, 0xbb7755109b376edd, 0x5ab4323f254b962d, 0x5bb63637356bd6ad, 0xdab5b0bbad5bb66d, 0xdbb7b4b3bd7bf6ed, 0x1a3473fde1c3860d, 0x1b3677f5f1e3c68d, 0x9a35f17969d3a64d, 0x9b37f57179f3e6cd, 0x264cbe5a92244993, 0x274eba5282040913, 0xa64d3cde1a3469d3, 0xa74f38d60a142953, 0x66ccff9856ac59b3, 0x67cefb90468c1933, 0xe6cd7d1cdebc79f3, 0xe7cf7914ce9c3973, 0x60c1e3b70e0c183, 0x70e1a3360c08103, 0x860d9cbff8f0e1c3, 0x870f98b7e8d0a143, 0x468c5ff9b468d1a3, 0x478e5bf1a4489123, 0xc68ddd7d3c78f1e3, 0xc78fd9752c58b163, 0x366ceeeae3c68d1b, 0x376eeae2f3e6cd9b, 0xb66d6c6e6bd6ad5b, 0xb76f68667bf6eddb, 0x76ecaf28274e9d3b, 0x77eeab20376eddbb, 0xf6ed2dacaf5ebd7b, 0xf7ef29a4bf7efdfb, 0x162c4e8b0102050b, 0x172e4a831122458b, 0x962dcc0f8912254b, 0x972fc807993265cb, 0x56ac0f49c58a152b, 0x57ae0b41d5aa55ab, 0xd6ad8dcd4d9a356b, 0xd7af89c55dba75eb, 0xae5c1682aa55ab57, 0xaf5e128aba75ebd7, 0x2e5d940622458b17, 0x2f5f900e3265cb97, 0xeedc57406eddbb77, 0xefde53487efdfbf7, 0x6eddd5c4e6cd9b37, 0x6fdfd1ccf6eddbb7, 0x8e1cb6e348912347, 0x8f1eb2eb58b163c7, 0xe1d3467c0810307, 0xf1f306fd0a14387, 0xce9cf7218c193367, 0xcf9ef3299c3973e7, 0x4e9d75a504091327, 0x4f9f71ad142953a7, 0xbe7c4632dbb76fdf, 0xbf7e423acb972f5f, 0x3e7dc4b653a74f9f, 0x3f7fc0be43870f1f, 0xfefc07f01f3f7fff, 0xfffe03f80f1f3f7f, 0x7efd8574972f5fbf, 0x7fff817c870f1f3f, 0x9e3ce6533973e7cf, 0x9f3ee25b2953a74f, 0x1e3d64d7b163c78f, 0x1f3f60dfa143870f, 0xdebca791fdfbf7ef, 0xdfbea399eddbb76f, 0x5ebd251575ebd7af, 0x5fbf211d65cb972f} func genGFNIMatrix(matrixRows [][]byte, inputs, inIdx, outputs int, dst []uint64) []uint64 { - if !avx2CodeGen { + if !codeGen { panic("codegen not enabled") } total := inputs * outputs diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go index 9f84276b..8025560f 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go @@ -1,10 +1,11 @@ -//go:build !noasm && !appengine && !gccgo -// +build !noasm,!appengine,!gccgo +//go:build !noasm && !appengine && !gccgo && !nopshufb // Copyright 2015, Klaus Post, see LICENSE for details. package reedsolomon +const pshufb = true + //go:noescape func galMulSSSE3(low, high, in, out []byte) @@ -17,21 +18,12 @@ func galMulAVX2Xor(low, high, in, out []byte) //go:noescape func galMulAVX2(low, high, in, out []byte) -//go:noescape -func sSE2XorSlice(in, out []byte) - //go:noescape func galMulAVX2Xor_64(low, high, in, out []byte) //go:noescape func galMulAVX2_64(low, high, in, out []byte) -//go:noescape -func sSE2XorSlice_64(in, out []byte) - -//go:noescape -func avx2XorSlice_64(in, out []byte) - // This is what the assembler routines do in blocks of 16 bytes: /* func galMulSSSE3(low, high, in, out []byte) { @@ -61,20 +53,32 @@ func galMulSlice(c byte, in, out []byte, o *options) { } if o.useAVX2 { if len(in) >= bigSwitchover { - galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } if len(in) > 32 { - galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } } else if o.useSSSE3 { - galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } @@ -93,20 +97,32 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { if o.useAVX2 { if len(in) >= bigSwitchover { - galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } if len(in) >= 32 { - galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } } else if o.useSSSE3 { - galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) in = in[done:] out = out[done:] } @@ -125,20 +141,32 @@ func sliceXor(in, out []byte, o *options) { if o.useSSE2 { if len(in) >= bigSwitchover { if o.useAVX2 { - avx2XorSlice_64(in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + avx2XorSlice_64(in, out) in = in[done:] out = out[done:] } else { - sSE2XorSlice_64(in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + sSE2XorSlice_64(in, out) in = in[done:] out = out[done:] } } if len(in) >= 16 { - sSE2XorSlice(in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + sSE2XorSlice(in, out) in = in[done:] out = out[done:] } @@ -233,7 +261,7 @@ func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *optio return } - if false && o.useGFNI { + if false && o.useAvx512GFNI { // Note that these currently require that length is multiple of 64. t01 := gf2p811dMulMatrices[log_m01] t23 := gf2p811dMulMatrices[log_m23] @@ -388,7 +416,7 @@ func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *option return } - if false && o.useGFNI { + if false && o.useAvx512GFNI { t01 := gf2p811dMulMatrices[log_m01] t23 := gf2p811dMulMatrices[log_m23] t02 := gf2p811dMulMatrices[log_m02] @@ -470,9 +498,17 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } fftDIT2_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } fftDIT2_ssse3(x, y, tmp) } else { // Reference version: @@ -488,11 +524,15 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) { } if o.useAVX2 { + done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } fftDIT28_avx2(x, y, &multiply256LUT8[log_m]) if len(x)&63 == 0 { return } - done := (len(y) >> 6) << 6 y = y[done:] x = x[done:] } @@ -507,11 +547,15 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) { } if o.useAVX2 { + done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } ifftDIT28_avx2(x, y, &multiply256LUT8[log_m]) if len(x)&63 == 0 { return } - done := (len(y) >> 6) << 6 y = y[done:] x = x[done:] } @@ -522,14 +566,22 @@ func ifftDIT28(x, y []byte, log_m ffe8, o *options) { func mulAdd8(x, y []byte, log_m ffe8, o *options) { if o.useAVX2 { t := &multiply256LUT8[log_m] - galMulAVX2Xor_64(t[:16], t[16:32], y, x) done := (len(y) >> 6) << 6 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } + galMulAVX2Xor_64(t[:16], t[16:32], y, x) y = y[done:] x = x[done:] } else if o.useSSSE3 { t := &multiply256LUT8[log_m] - galMulSSSE3Xor(t[:16], t[16:32], y, x) done := (len(y) >> 4) << 4 + if raceEnabled { + raceReadSlice(y[:done]) + raceWriteSlice(x[:done]) + } + galMulSSSE3Xor(t[:16], t[16:32], y, x) y = y[done:] x = x[done:] } @@ -543,9 +595,19 @@ func ifftDIT2(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } + ifftDIT2_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } + ifftDIT2_ssse3(x, y, tmp) } else { // Reference version: @@ -560,9 +622,17 @@ func mulgf16(x, y []byte, log_m ffe, o *options) { } if o.useAVX2 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } mulgf16_avx2(x, y, tmp) } else if o.useSSSE3 { tmp := &multiply256LUT[log_m] + if raceEnabled { + raceReadSlice(y) + raceWriteSlice(x) + } mulgf16_ssse3(x, y, tmp) } else { refMul(x, y, log_m) @@ -572,14 +642,23 @@ func mulgf16(x, y []byte, log_m ffe, o *options) { func mulgf8(out, in []byte, log_m ffe8, o *options) { if o.useAVX2 { t := &multiply256LUT8[log_m] - galMulAVX2_64(t[:16], t[16:32], in, out) done := (len(in) >> 6) << 6 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + + galMulAVX2_64(t[:16], t[16:32], in, out) in = in[done:] out = out[done:] } else if o.useSSSE3 { t := &multiply256LUT8[log_m] - galMulSSSE3(t[:16], t[16:32], in, out) done := (len(in) >> 4) << 4 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulSSSE3(t[:16], t[16:32], in, out) in = in[done:] out = out[done:] } diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s index 3e97c7c1..18e08c31 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s +++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s @@ -1,6 +1,7 @@ //+build !noasm //+build !appengine //+build !gccgo +//+build !nopshufb // Copyright 2015, Klaus Post, see LICENSE for details. @@ -215,28 +216,6 @@ done_avx2: VZEROUPPER RET -// func sSE2XorSlice(in, out []byte) -TEXT ·sSE2XorSlice(SB), 7, $0 - MOVQ in+0(FP), SI // SI: &in - MOVQ in_len+8(FP), R9 // R9: len(in) - MOVQ out+24(FP), DX // DX: &out - SHRQ $4, R9 // len(in) / 16 - CMPQ R9, $0 - JEQ done_xor_sse2 - -loopback_xor_sse2: - MOVOU (SI), X0 // in[x] - MOVOU (DX), X1 // out[x] - PXOR X0, X1 - MOVOU X1, (DX) - ADDQ $16, SI // in+=16 - ADDQ $16, DX // out+=16 - SUBQ $1, R9 - JNZ loopback_xor_sse2 - -done_xor_sse2: - RET - // func galMulAVX2Xor_64(low, high, in, out []byte) TEXT ·galMulAVX2Xor_64(SB), 7, $0 MOVQ low+0(FP), SI // SI: &low @@ -329,66 +308,3 @@ loopback_avx2_64: done_avx2_64: VZEROUPPER RET - -// func sSE2XorSlice_64(in, out []byte) -TEXT ·sSE2XorSlice_64(SB), 7, $0 - MOVQ in+0(FP), SI // SI: &in - MOVQ in_len+8(FP), R9 // R9: len(in) - MOVQ out+24(FP), DX // DX: &out - SHRQ $6, R9 // len(in) / 64 - CMPQ R9, $0 - JEQ done_xor_sse2_64 - -loopback_xor_sse2_64: - MOVOU (SI), X0 // in[x] - MOVOU 16(SI), X2 // in[x] - MOVOU 32(SI), X4 // in[x] - MOVOU 48(SI), X6 // in[x] - MOVOU (DX), X1 // out[x] - MOVOU 16(DX), X3 // out[x] - MOVOU 32(DX), X5 // out[x] - MOVOU 48(DX), X7 // out[x] - PXOR X0, X1 - PXOR X2, X3 - PXOR X4, X5 - PXOR X6, X7 - MOVOU X1, (DX) - MOVOU X3, 16(DX) - MOVOU X5, 32(DX) - MOVOU X7, 48(DX) - ADDQ $64, SI // in+=64 - ADDQ $64, DX // out+=64 - SUBQ $1, R9 - JNZ loopback_xor_sse2_64 - -done_xor_sse2_64: - RET - -// func avx2XorSlice_64(in, out []byte) -TEXT ·avx2XorSlice_64(SB), 7, $0 - MOVQ in+0(FP), SI // SI: &in - MOVQ in_len+8(FP), R9 // R9: len(in) - MOVQ out+24(FP), DX // DX: &out - SHRQ $6, R9 // len(in) / 64 - CMPQ R9, $0 - JEQ done_xor_avx2_64 - -loopback_xor_avx2_64: - VMOVDQU (SI), Y0 - VMOVDQU 32(SI), Y2 - VMOVDQU (DX), Y1 - VMOVDQU 32(DX), Y3 - VPXOR Y0, Y1, Y1 - VPXOR Y2, Y3, Y3 - VMOVDQU Y1, (DX) - VMOVDQU Y3, 32(DX) - - ADDQ $64, SI // in+=64 - ADDQ $64, DX // out+=64 - SUBQ $1, R9 - JNZ loopback_xor_avx2_64 - VZEROUPPER - -done_xor_avx2_64: - - RET diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go index 9ab27941..d860525c 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go @@ -1,19 +1,31 @@ -//go:build !noasm && !appengine && !gccgo -// +build !noasm,!appengine,!gccgo +//go:build !noasm && !appengine && !gccgo && !nopshufb // Copyright 2015, Klaus Post, see LICENSE for details. // Copyright 2017, Minio, Inc. package reedsolomon +const pshufb = true + //go:noescape func galMulNEON(low, high, in, out []byte) //go:noescape func galMulXorNEON(low, high, in, out []byte) -//go:noescape -func galXorNEON(in, out []byte) +func getVectorLength() (vl, pl uint64) + +func init() { + if defaultOptions.useSVE { + if vl, _ := getVectorLength(); vl <= 256 { + // set vector length in bytes + defaultOptions.vectorLength = int(vl) >> 3 + } else { + // disable SVE for hardware implementatons over 256 bits (only know to be Fujitsu A64FX atm) + defaultOptions.useSVE = false + } + } +} func galMulSlice(c byte, in, out []byte, o *options) { if c == 1 { @@ -21,8 +33,12 @@ func galMulSlice(c byte, in, out []byte, o *options) { return } var done int - galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out) done = (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } + galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out) remain := len(in) - done if remain > 0 { @@ -38,9 +54,12 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { sliceXor(in, out, o) return } - var done int + done := (len(in) >> 5) << 5 + if raceEnabled { + raceReadSlice(in[:done]) + raceWriteSlice(out[:done]) + } galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out) - done = (len(in) >> 5) << 5 remain := len(in) - done if remain > 0 { @@ -51,20 +70,6 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } } -// simple slice xor -func sliceXor(in, out []byte, o *options) { - - galXorNEON(in, out) - done := (len(in) >> 5) << 5 - - remain := len(in) - done - if remain > 0 { - for i := done; i < len(in); i++ { - out[i] ^= in[i] - } - } -} - // 4-way butterfly func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) { ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o) @@ -90,7 +95,7 @@ func fftDIT2(x, y []byte, log_m ffe, o *options) { // Reference version: refMulAdd(x, y, log_m) // 64 byte aligned, always full. - galXorNEON(x, y) + xorSliceNEON(x, y) } // 2-way butterfly forward @@ -103,7 +108,7 @@ func fftDIT28(x, y []byte, log_m ffe8, o *options) { // 2-way butterfly func ifftDIT2(x, y []byte, log_m ffe, o *options) { // 64 byte aligned, always full. - galXorNEON(x, y) + xorSliceNEON(x, y) // Reference version: refMulAdd(x, y, log_m) } diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s index 3ae32372..ca3c9120 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s +++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s @@ -1,10 +1,13 @@ //+build !noasm //+build !appengine //+build !gccgo +//+build !nopshufb // Copyright 2015, Klaus Post, see LICENSE for details. // Copyright 2017, Minio, Inc. +#include "textflag.h" + #define LOAD(LO1, LO2, HI1, HI2) \ VLD1.P 32(R1), [LO1.B16, LO2.B16] \ \ @@ -100,28 +103,13 @@ loopXor: completeXor: RET -// func galXorNEON(in, out []byte) -TEXT ·galXorNEON(SB), 7, $0 - MOVD in_base+0(FP), R1 - MOVD in_len+8(FP), R2 // length of message - MOVD out_base+24(FP), R5 - SUBS $32, R2 - BMI completeXor - -loopXor: - // Main loop - VLD1.P 32(R1), [V0.B16, V1.B16] - VLD1 (R5), [V20.B16, V21.B16] - - VEOR V20.B16, V0.B16, V4.B16 - VEOR V21.B16, V1.B16, V5.B16 - - // Store result - VST1.P [V4.D2, V5.D2], 32(R5) - - SUBS $32, R2 - BPL loopXor - -completeXor: - RET - +TEXT ·getVectorLength(SB), NOSPLIT, $0 + WORD $0xd2800002 // mov x2, #0 + WORD $0x04225022 // addvl x2, x2, #1 + WORD $0xd37df042 // lsl x2, x2, #3 + WORD $0xd2800003 // mov x3, #0 + WORD $0x04635023 // addpl x3, x3, #1 + WORD $0xd37df063 // lsl x3, x3, #3 + MOVD R2, vl+0(FP) + MOVD R3, pl+8(FP) + RET diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go index 5f53c3b4..dac9b136 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go @@ -1,16 +1,19 @@ // Code generated by command: go run gen.go -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon. DO NOT EDIT. -//go:build !appengine && !noasm && !nogen && gc +//go:build !appengine && !noasm && !nogen && !nopshufb && gc package reedsolomon func _dummy_() -// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs. -// The output is initialized to 0. -// //go:noescape -func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func sSE2XorSlice(in []byte, out []byte) + +//go:noescape +func sSE2XorSlice_64(in []byte, out []byte) + +//go:noescape +func avx2XorSlice_64(in []byte, out []byte) // mulAvxTwo_1x1_64 takes 1 inputs and produces 1 outputs. // The output is initialized to 0. @@ -24,27 +27,27 @@ func mulAvxTwo_1x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_1x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x1 takes 1 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x1_64Xor takes 1 inputs and produces 1 outputs. // //go:noescape func mulGFNI_1x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_1x1Xor takes 1 inputs and produces 1 outputs. +// mulAvxGFNI_1x1Xor takes 1 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_1x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_1x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_1x1_64Xor takes 1 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_1x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_1x2 takes 1 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_1x2_64 takes 1 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -57,27 +60,27 @@ func mulAvxTwo_1x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_1x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x2 takes 1 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x2_64Xor takes 1 inputs and produces 2 outputs. // //go:noescape func mulGFNI_1x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_1x2Xor takes 1 inputs and produces 2 outputs. +// mulAvxGFNI_1x2Xor takes 1 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_1x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_1x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_1x2_64Xor takes 1 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_1x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_1x3 takes 1 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_1x3_64 takes 1 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -90,15 +93,21 @@ func mulAvxTwo_1x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_1x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x3 takes 1 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x3_64Xor takes 1 inputs and produces 3 outputs. // //go:noescape func mulGFNI_1x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_1x3Xor takes 1 inputs and produces 3 outputs. +// mulAvxGFNI_1x3Xor takes 1 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_1x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_1x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_1x3_64Xor takes 1 inputs and produces 3 outputs. // @@ -117,11 +126,22 @@ func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_1x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x4 takes 1 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x4_64Xor takes 1 inputs and produces 4 outputs. // //go:noescape func mulGFNI_1x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x4Xor takes 1 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_1x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_1x4Xor takes 1 inputs and produces 4 outputs. // //go:noescape @@ -139,11 +159,22 @@ func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_1x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x5 takes 1 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x5_64Xor takes 1 inputs and produces 5 outputs. // //go:noescape func mulGFNI_1x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x5Xor takes 1 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_1x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_1x5Xor takes 1 inputs and produces 5 outputs. // //go:noescape @@ -161,11 +192,22 @@ func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_1x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x6 takes 1 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x6_64Xor takes 1 inputs and produces 6 outputs. // //go:noescape func mulGFNI_1x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x6Xor takes 1 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_1x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_1x6Xor takes 1 inputs and produces 6 outputs. // //go:noescape @@ -183,11 +225,22 @@ func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_1x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x7 takes 1 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x7_64Xor takes 1 inputs and produces 7 outputs. // //go:noescape func mulGFNI_1x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x7Xor takes 1 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_1x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_1x7Xor takes 1 inputs and produces 7 outputs. // //go:noescape @@ -205,11 +258,22 @@ func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_1x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x8 takes 1 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x8_64Xor takes 1 inputs and produces 8 outputs. // //go:noescape func mulGFNI_1x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x8Xor takes 1 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_1x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_1x8Xor takes 1 inputs and produces 8 outputs. // //go:noescape @@ -227,11 +291,22 @@ func mulAvxTwo_1x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_1x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x9 takes 1 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x9_64Xor takes 1 inputs and produces 9 outputs. // //go:noescape func mulGFNI_1x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x9Xor takes 1 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_1x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_1x9Xor takes 1 inputs and produces 9 outputs. // //go:noescape @@ -249,21 +324,26 @@ func mulAvxTwo_1x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_1x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_1x10 takes 1 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_1x10_64Xor takes 1 inputs and produces 10 outputs. // //go:noescape func mulGFNI_1x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_1x10Xor takes 1 inputs and produces 10 outputs. +// mulAvxGFNI_1x10Xor takes 1 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_1x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_1x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_1x10Xor takes 1 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_1x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_2x1_64 takes 2 inputs and produces 1 outputs. // The output is initialized to 0. @@ -277,27 +357,27 @@ func mulAvxTwo_2x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_2x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x1 takes 2 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x1_64Xor takes 2 inputs and produces 1 outputs. // //go:noescape func mulGFNI_2x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_2x1Xor takes 2 inputs and produces 1 outputs. +// mulAvxGFNI_2x1Xor takes 2 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_2x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_2x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_2x1_64Xor takes 2 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_2x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_2x2_64 takes 2 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -310,27 +390,27 @@ func mulAvxTwo_2x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_2x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x2 takes 2 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x2_64Xor takes 2 inputs and produces 2 outputs. // //go:noescape func mulGFNI_2x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_2x2Xor takes 2 inputs and produces 2 outputs. +// mulAvxGFNI_2x2Xor takes 2 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_2x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_2x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_2x2_64Xor takes 2 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_2x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_2x3_64 takes 2 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -343,15 +423,21 @@ func mulAvxTwo_2x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_2x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x3 takes 2 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x3_64Xor takes 2 inputs and produces 3 outputs. // //go:noescape func mulGFNI_2x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_2x3Xor takes 2 inputs and produces 3 outputs. +// mulAvxGFNI_2x3Xor takes 2 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_2x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_2x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_2x3_64Xor takes 2 inputs and produces 3 outputs. // @@ -370,11 +456,22 @@ func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_2x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x4 takes 2 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x4_64Xor takes 2 inputs and produces 4 outputs. // //go:noescape func mulGFNI_2x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x4Xor takes 2 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_2x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_2x4Xor takes 2 inputs and produces 4 outputs. // //go:noescape @@ -392,11 +489,22 @@ func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_2x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x5 takes 2 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x5_64Xor takes 2 inputs and produces 5 outputs. // //go:noescape func mulGFNI_2x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x5Xor takes 2 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_2x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_2x5Xor takes 2 inputs and produces 5 outputs. // //go:noescape @@ -414,11 +522,22 @@ func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_2x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x6 takes 2 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x6_64Xor takes 2 inputs and produces 6 outputs. // //go:noescape func mulGFNI_2x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x6Xor takes 2 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_2x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_2x6Xor takes 2 inputs and produces 6 outputs. // //go:noescape @@ -436,11 +555,22 @@ func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_2x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x7 takes 2 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x7_64Xor takes 2 inputs and produces 7 outputs. // //go:noescape func mulGFNI_2x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x7Xor takes 2 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_2x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_2x7Xor takes 2 inputs and produces 7 outputs. // //go:noescape @@ -458,11 +588,22 @@ func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_2x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x8 takes 2 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x8_64Xor takes 2 inputs and produces 8 outputs. // //go:noescape func mulGFNI_2x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x8Xor takes 2 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_2x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_2x8Xor takes 2 inputs and produces 8 outputs. // //go:noescape @@ -480,11 +621,22 @@ func mulAvxTwo_2x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_2x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x9 takes 2 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x9_64Xor takes 2 inputs and produces 9 outputs. // //go:noescape func mulGFNI_2x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x9Xor takes 2 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_2x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_2x9Xor takes 2 inputs and produces 9 outputs. // //go:noescape @@ -502,21 +654,26 @@ func mulAvxTwo_2x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_2x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_2x10 takes 2 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_2x10_64Xor takes 2 inputs and produces 10 outputs. // //go:noescape func mulGFNI_2x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_2x10Xor takes 2 inputs and produces 10 outputs. +// mulAvxGFNI_2x10Xor takes 2 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_2x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_2x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_2x10Xor takes 2 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_2x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_3x1_64 takes 3 inputs and produces 1 outputs. // The output is initialized to 0. @@ -530,27 +687,27 @@ func mulAvxTwo_3x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_3x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x1 takes 3 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x1_64Xor takes 3 inputs and produces 1 outputs. // //go:noescape func mulGFNI_3x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_3x1Xor takes 3 inputs and produces 1 outputs. +// mulAvxGFNI_3x1Xor takes 3 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_3x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_3x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_3x1_64Xor takes 3 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_3x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_3x2_64 takes 3 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -563,27 +720,27 @@ func mulAvxTwo_3x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_3x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x2 takes 3 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x2_64Xor takes 3 inputs and produces 2 outputs. // //go:noescape func mulGFNI_3x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_3x2Xor takes 3 inputs and produces 2 outputs. +// mulAvxGFNI_3x2Xor takes 3 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_3x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_3x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_3x2_64Xor takes 3 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_3x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_3x3_64 takes 3 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -596,15 +753,21 @@ func mulAvxTwo_3x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_3x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x3 takes 3 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x3_64Xor takes 3 inputs and produces 3 outputs. // //go:noescape func mulGFNI_3x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_3x3Xor takes 3 inputs and produces 3 outputs. +// mulAvxGFNI_3x3Xor takes 3 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_3x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_3x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_3x3_64Xor takes 3 inputs and produces 3 outputs. // @@ -623,11 +786,22 @@ func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_3x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x4 takes 3 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x4_64Xor takes 3 inputs and produces 4 outputs. // //go:noescape func mulGFNI_3x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x4Xor takes 3 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_3x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_3x4Xor takes 3 inputs and produces 4 outputs. // //go:noescape @@ -645,11 +819,22 @@ func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_3x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x5 takes 3 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x5_64Xor takes 3 inputs and produces 5 outputs. // //go:noescape func mulGFNI_3x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x5Xor takes 3 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_3x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_3x5Xor takes 3 inputs and produces 5 outputs. // //go:noescape @@ -667,11 +852,22 @@ func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_3x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x6 takes 3 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x6_64Xor takes 3 inputs and produces 6 outputs. // //go:noescape func mulGFNI_3x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x6Xor takes 3 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_3x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_3x6Xor takes 3 inputs and produces 6 outputs. // //go:noescape @@ -689,11 +885,22 @@ func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_3x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x7 takes 3 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x7_64Xor takes 3 inputs and produces 7 outputs. // //go:noescape func mulGFNI_3x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x7Xor takes 3 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_3x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_3x7Xor takes 3 inputs and produces 7 outputs. // //go:noescape @@ -711,11 +918,22 @@ func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_3x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x8 takes 3 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x8_64Xor takes 3 inputs and produces 8 outputs. // //go:noescape func mulGFNI_3x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x8Xor takes 3 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_3x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_3x8Xor takes 3 inputs and produces 8 outputs. // //go:noescape @@ -733,11 +951,22 @@ func mulAvxTwo_3x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_3x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x9 takes 3 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x9_64Xor takes 3 inputs and produces 9 outputs. // //go:noescape func mulGFNI_3x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x9Xor takes 3 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_3x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_3x9Xor takes 3 inputs and produces 9 outputs. // //go:noescape @@ -755,21 +984,26 @@ func mulAvxTwo_3x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_3x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_3x10 takes 3 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_3x10_64Xor takes 3 inputs and produces 10 outputs. // //go:noescape func mulGFNI_3x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_3x10Xor takes 3 inputs and produces 10 outputs. +// mulAvxGFNI_3x10Xor takes 3 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_3x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_3x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_3x10Xor takes 3 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_3x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_4x1_64 takes 4 inputs and produces 1 outputs. // The output is initialized to 0. @@ -783,27 +1017,27 @@ func mulAvxTwo_4x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_4x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x1 takes 4 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x1_64Xor takes 4 inputs and produces 1 outputs. // //go:noescape func mulGFNI_4x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_4x1Xor takes 4 inputs and produces 1 outputs. +// mulAvxGFNI_4x1Xor takes 4 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_4x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_4x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_4x1_64Xor takes 4 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_4x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_4x2_64 takes 4 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -816,27 +1050,27 @@ func mulAvxTwo_4x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_4x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x2 takes 4 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x2_64Xor takes 4 inputs and produces 2 outputs. // //go:noescape func mulGFNI_4x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_4x2Xor takes 4 inputs and produces 2 outputs. +// mulAvxGFNI_4x2Xor takes 4 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_4x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_4x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_4x2_64Xor takes 4 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_4x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_4x3_64 takes 4 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -849,15 +1083,21 @@ func mulAvxTwo_4x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_4x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x3 takes 4 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x3_64Xor takes 4 inputs and produces 3 outputs. // //go:noescape func mulGFNI_4x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_4x3Xor takes 4 inputs and produces 3 outputs. +// mulAvxGFNI_4x3Xor takes 4 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_4x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_4x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_4x3_64Xor takes 4 inputs and produces 3 outputs. // @@ -876,11 +1116,22 @@ func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_4x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x4 takes 4 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x4_64Xor takes 4 inputs and produces 4 outputs. // //go:noescape func mulGFNI_4x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x4Xor takes 4 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_4x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_4x4Xor takes 4 inputs and produces 4 outputs. // //go:noescape @@ -898,11 +1149,22 @@ func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_4x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x5 takes 4 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x5_64Xor takes 4 inputs and produces 5 outputs. // //go:noescape func mulGFNI_4x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x5Xor takes 4 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_4x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_4x5Xor takes 4 inputs and produces 5 outputs. // //go:noescape @@ -920,10 +1182,21 @@ func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_4x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulGFNI_4x6_64Xor takes 4 inputs and produces 6 outputs. +// mulAvxGFNI_4x6 takes 4 inputs and produces 6 outputs. +// The output is initialized to 0. // //go:noescape -func mulGFNI_4x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_4x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x6_64Xor takes 4 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_4x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x6Xor takes 4 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_4x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_4x6Xor takes 4 inputs and produces 6 outputs. // @@ -942,11 +1215,22 @@ func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_4x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x7 takes 4 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x7_64Xor takes 4 inputs and produces 7 outputs. // //go:noescape func mulGFNI_4x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x7Xor takes 4 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_4x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_4x7Xor takes 4 inputs and produces 7 outputs. // //go:noescape @@ -964,11 +1248,22 @@ func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_4x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x8 takes 4 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x8_64Xor takes 4 inputs and produces 8 outputs. // //go:noescape func mulGFNI_4x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x8Xor takes 4 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_4x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_4x8Xor takes 4 inputs and produces 8 outputs. // //go:noescape @@ -986,11 +1281,22 @@ func mulAvxTwo_4x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_4x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x9 takes 4 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x9_64Xor takes 4 inputs and produces 9 outputs. // //go:noescape func mulGFNI_4x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x9Xor takes 4 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_4x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_4x9Xor takes 4 inputs and produces 9 outputs. // //go:noescape @@ -1008,21 +1314,26 @@ func mulAvxTwo_4x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_4x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_4x10 takes 4 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_4x10_64Xor takes 4 inputs and produces 10 outputs. // //go:noescape func mulGFNI_4x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_4x10Xor takes 4 inputs and produces 10 outputs. +// mulAvxGFNI_4x10Xor takes 4 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_4x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_4x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_4x10Xor takes 4 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_4x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_5x1_64 takes 5 inputs and produces 1 outputs. // The output is initialized to 0. @@ -1036,27 +1347,27 @@ func mulAvxTwo_5x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_5x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x1 takes 5 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x1_64Xor takes 5 inputs and produces 1 outputs. // //go:noescape func mulGFNI_5x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_5x1Xor takes 5 inputs and produces 1 outputs. +// mulAvxGFNI_5x1Xor takes 5 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_5x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_5x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_5x1_64Xor takes 5 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_5x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_5x2 takes 5 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_5x2_64 takes 5 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -1069,27 +1380,27 @@ func mulAvxTwo_5x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_5x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x2 takes 5 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x2_64Xor takes 5 inputs and produces 2 outputs. // //go:noescape func mulGFNI_5x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_5x2Xor takes 5 inputs and produces 2 outputs. +// mulAvxGFNI_5x2Xor takes 5 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_5x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_5x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_5x2_64Xor takes 5 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_5x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_5x3 takes 5 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_5x3_64 takes 5 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -1102,15 +1413,21 @@ func mulAvxTwo_5x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_5x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x3 takes 5 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x3_64Xor takes 5 inputs and produces 3 outputs. // //go:noescape func mulGFNI_5x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_5x3Xor takes 5 inputs and produces 3 outputs. +// mulAvxGFNI_5x3Xor takes 5 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_5x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_5x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_5x3_64Xor takes 5 inputs and produces 3 outputs. // @@ -1129,11 +1446,22 @@ func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_5x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x4 takes 5 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x4_64Xor takes 5 inputs and produces 4 outputs. // //go:noescape func mulGFNI_5x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x4Xor takes 5 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_5x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_5x4Xor takes 5 inputs and produces 4 outputs. // //go:noescape @@ -1151,11 +1479,22 @@ func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_5x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x5 takes 5 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x5_64Xor takes 5 inputs and produces 5 outputs. // //go:noescape func mulGFNI_5x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x5Xor takes 5 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_5x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_5x5Xor takes 5 inputs and produces 5 outputs. // //go:noescape @@ -1173,11 +1512,22 @@ func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_5x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x6 takes 5 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x6_64Xor takes 5 inputs and produces 6 outputs. // //go:noescape func mulGFNI_5x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x6Xor takes 5 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_5x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_5x6Xor takes 5 inputs and produces 6 outputs. // //go:noescape @@ -1195,11 +1545,22 @@ func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_5x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x7 takes 5 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x7_64Xor takes 5 inputs and produces 7 outputs. // //go:noescape func mulGFNI_5x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x7Xor takes 5 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_5x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_5x7Xor takes 5 inputs and produces 7 outputs. // //go:noescape @@ -1217,11 +1578,22 @@ func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_5x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x8 takes 5 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x8_64Xor takes 5 inputs and produces 8 outputs. // //go:noescape func mulGFNI_5x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x8Xor takes 5 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_5x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_5x8Xor takes 5 inputs and produces 8 outputs. // //go:noescape @@ -1239,11 +1611,22 @@ func mulAvxTwo_5x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_5x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x9 takes 5 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x9_64Xor takes 5 inputs and produces 9 outputs. // //go:noescape func mulGFNI_5x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x9Xor takes 5 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_5x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_5x9Xor takes 5 inputs and produces 9 outputs. // //go:noescape @@ -1261,21 +1644,26 @@ func mulAvxTwo_5x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_5x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_5x10 takes 5 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_5x10_64Xor takes 5 inputs and produces 10 outputs. // //go:noescape func mulGFNI_5x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_5x10Xor takes 5 inputs and produces 10 outputs. +// mulAvxGFNI_5x10Xor takes 5 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_5x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_5x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_6x1 takes 6 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_5x10Xor takes 5 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_5x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_6x1_64 takes 6 inputs and produces 1 outputs. // The output is initialized to 0. @@ -1289,27 +1677,27 @@ func mulAvxTwo_6x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_6x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x1 takes 6 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x1_64Xor takes 6 inputs and produces 1 outputs. // //go:noescape func mulGFNI_6x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_6x1Xor takes 6 inputs and produces 1 outputs. +// mulAvxGFNI_6x1Xor takes 6 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_6x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_6x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_6x1_64Xor takes 6 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_6x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_6x2 takes 6 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_6x2_64 takes 6 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -1322,27 +1710,27 @@ func mulAvxTwo_6x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_6x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x2 takes 6 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x2_64Xor takes 6 inputs and produces 2 outputs. // //go:noescape func mulGFNI_6x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_6x2Xor takes 6 inputs and produces 2 outputs. +// mulAvxGFNI_6x2Xor takes 6 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_6x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_6x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_6x2_64Xor takes 6 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_6x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_6x3 takes 6 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_6x3_64 takes 6 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -1355,15 +1743,21 @@ func mulAvxTwo_6x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_6x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x3 takes 6 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x3_64Xor takes 6 inputs and produces 3 outputs. // //go:noescape func mulGFNI_6x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_6x3Xor takes 6 inputs and produces 3 outputs. +// mulAvxGFNI_6x3Xor takes 6 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_6x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_6x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_6x3_64Xor takes 6 inputs and produces 3 outputs. // @@ -1382,11 +1776,22 @@ func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_6x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x4 takes 6 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x4_64Xor takes 6 inputs and produces 4 outputs. // //go:noescape func mulGFNI_6x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x4Xor takes 6 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_6x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_6x4Xor takes 6 inputs and produces 4 outputs. // //go:noescape @@ -1404,11 +1809,22 @@ func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_6x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x5 takes 6 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x5_64Xor takes 6 inputs and produces 5 outputs. // //go:noescape func mulGFNI_6x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x5Xor takes 6 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_6x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_6x5Xor takes 6 inputs and produces 5 outputs. // //go:noescape @@ -1426,11 +1842,22 @@ func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_6x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x6 takes 6 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x6_64Xor takes 6 inputs and produces 6 outputs. // //go:noescape func mulGFNI_6x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x6Xor takes 6 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_6x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_6x6Xor takes 6 inputs and produces 6 outputs. // //go:noescape @@ -1448,11 +1875,22 @@ func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_6x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x7 takes 6 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x7_64Xor takes 6 inputs and produces 7 outputs. // //go:noescape func mulGFNI_6x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x7Xor takes 6 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_6x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_6x7Xor takes 6 inputs and produces 7 outputs. // //go:noescape @@ -1470,11 +1908,22 @@ func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_6x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x8 takes 6 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x8_64Xor takes 6 inputs and produces 8 outputs. // //go:noescape func mulGFNI_6x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x8Xor takes 6 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_6x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_6x8Xor takes 6 inputs and produces 8 outputs. // //go:noescape @@ -1492,11 +1941,22 @@ func mulAvxTwo_6x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_6x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x9 takes 6 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x9_64Xor takes 6 inputs and produces 9 outputs. // //go:noescape func mulGFNI_6x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x9Xor takes 6 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_6x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_6x9Xor takes 6 inputs and produces 9 outputs. // //go:noescape @@ -1514,21 +1974,26 @@ func mulAvxTwo_6x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_6x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_6x10 takes 6 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_6x10_64Xor takes 6 inputs and produces 10 outputs. // //go:noescape func mulGFNI_6x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_6x10Xor takes 6 inputs and produces 10 outputs. +// mulAvxGFNI_6x10Xor takes 6 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_6x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_6x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_7x1 takes 7 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_6x10Xor takes 6 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_6x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_7x1_64 takes 7 inputs and produces 1 outputs. // The output is initialized to 0. @@ -1542,27 +2007,27 @@ func mulAvxTwo_7x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_7x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x1 takes 7 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x1_64Xor takes 7 inputs and produces 1 outputs. // //go:noescape func mulGFNI_7x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_7x1Xor takes 7 inputs and produces 1 outputs. +// mulAvxGFNI_7x1Xor takes 7 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_7x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_7x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_7x1_64Xor takes 7 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_7x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_7x2 takes 7 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_7x2_64 takes 7 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -1575,27 +2040,27 @@ func mulAvxTwo_7x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_7x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x2 takes 7 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x2_64Xor takes 7 inputs and produces 2 outputs. // //go:noescape func mulGFNI_7x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_7x2Xor takes 7 inputs and produces 2 outputs. +// mulAvxGFNI_7x2Xor takes 7 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_7x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_7x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_7x2_64Xor takes 7 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_7x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_7x3 takes 7 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_7x3_64 takes 7 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -1608,15 +2073,21 @@ func mulAvxTwo_7x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_7x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x3 takes 7 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x3_64Xor takes 7 inputs and produces 3 outputs. // //go:noescape func mulGFNI_7x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_7x3Xor takes 7 inputs and produces 3 outputs. +// mulAvxGFNI_7x3Xor takes 7 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_7x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_7x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_7x3_64Xor takes 7 inputs and produces 3 outputs. // @@ -1635,11 +2106,22 @@ func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_7x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x4 takes 7 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x4_64Xor takes 7 inputs and produces 4 outputs. // //go:noescape func mulGFNI_7x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x4Xor takes 7 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_7x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_7x4Xor takes 7 inputs and produces 4 outputs. // //go:noescape @@ -1657,11 +2139,22 @@ func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_7x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x5 takes 7 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x5_64Xor takes 7 inputs and produces 5 outputs. // //go:noescape func mulGFNI_7x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x5Xor takes 7 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_7x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_7x5Xor takes 7 inputs and produces 5 outputs. // //go:noescape @@ -1679,11 +2172,22 @@ func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_7x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x6 takes 7 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x6_64Xor takes 7 inputs and produces 6 outputs. // //go:noescape func mulGFNI_7x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x6Xor takes 7 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_7x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_7x6Xor takes 7 inputs and produces 6 outputs. // //go:noescape @@ -1701,11 +2205,22 @@ func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_7x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x7 takes 7 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x7_64Xor takes 7 inputs and produces 7 outputs. // //go:noescape func mulGFNI_7x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x7Xor takes 7 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_7x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_7x7Xor takes 7 inputs and produces 7 outputs. // //go:noescape @@ -1723,11 +2238,22 @@ func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_7x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x8 takes 7 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x8_64Xor takes 7 inputs and produces 8 outputs. // //go:noescape func mulGFNI_7x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x8Xor takes 7 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_7x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_7x8Xor takes 7 inputs and produces 8 outputs. // //go:noescape @@ -1745,11 +2271,22 @@ func mulAvxTwo_7x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_7x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x9 takes 7 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x9_64Xor takes 7 inputs and produces 9 outputs. // //go:noescape func mulGFNI_7x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x9Xor takes 7 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_7x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_7x9Xor takes 7 inputs and produces 9 outputs. // //go:noescape @@ -1767,21 +2304,26 @@ func mulAvxTwo_7x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_7x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_7x10 takes 7 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_7x10_64Xor takes 7 inputs and produces 10 outputs. // //go:noescape func mulGFNI_7x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_7x10Xor takes 7 inputs and produces 10 outputs. +// mulAvxGFNI_7x10Xor takes 7 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_7x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_7x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_8x1 takes 8 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_7x10Xor takes 7 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_7x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_8x1_64 takes 8 inputs and produces 1 outputs. // The output is initialized to 0. @@ -1795,27 +2337,27 @@ func mulAvxTwo_8x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_8x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x1 takes 8 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x1_64Xor takes 8 inputs and produces 1 outputs. // //go:noescape func mulGFNI_8x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_8x1Xor takes 8 inputs and produces 1 outputs. +// mulAvxGFNI_8x1Xor takes 8 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_8x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_8x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_8x1_64Xor takes 8 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_8x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_8x2 takes 8 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_8x2_64 takes 8 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -1828,27 +2370,27 @@ func mulAvxTwo_8x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_8x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x2 takes 8 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x2_64Xor takes 8 inputs and produces 2 outputs. // //go:noescape func mulGFNI_8x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_8x2Xor takes 8 inputs and produces 2 outputs. +// mulAvxGFNI_8x2Xor takes 8 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_8x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_8x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_8x2_64Xor takes 8 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_8x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_8x3 takes 8 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_8x3_64 takes 8 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -1861,15 +2403,21 @@ func mulAvxTwo_8x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_8x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x3 takes 8 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x3_64Xor takes 8 inputs and produces 3 outputs. // //go:noescape func mulGFNI_8x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_8x3Xor takes 8 inputs and produces 3 outputs. +// mulAvxGFNI_8x3Xor takes 8 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_8x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_8x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_8x3_64Xor takes 8 inputs and produces 3 outputs. // @@ -1888,11 +2436,22 @@ func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_8x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x4 takes 8 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x4_64Xor takes 8 inputs and produces 4 outputs. // //go:noescape func mulGFNI_8x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x4Xor takes 8 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_8x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_8x4Xor takes 8 inputs and produces 4 outputs. // //go:noescape @@ -1910,11 +2469,22 @@ func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_8x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x5 takes 8 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x5_64Xor takes 8 inputs and produces 5 outputs. // //go:noescape func mulGFNI_8x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x5Xor takes 8 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_8x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_8x5Xor takes 8 inputs and produces 5 outputs. // //go:noescape @@ -1932,11 +2502,22 @@ func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_8x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x6 takes 8 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x6_64Xor takes 8 inputs and produces 6 outputs. // //go:noescape func mulGFNI_8x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x6Xor takes 8 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_8x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_8x6Xor takes 8 inputs and produces 6 outputs. // //go:noescape @@ -1954,11 +2535,22 @@ func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_8x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x7 takes 8 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x7_64Xor takes 8 inputs and produces 7 outputs. // //go:noescape func mulGFNI_8x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x7Xor takes 8 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_8x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_8x7Xor takes 8 inputs and produces 7 outputs. // //go:noescape @@ -1976,11 +2568,22 @@ func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_8x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x8 takes 8 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x8_64Xor takes 8 inputs and produces 8 outputs. // //go:noescape func mulGFNI_8x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x8Xor takes 8 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_8x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_8x8Xor takes 8 inputs and produces 8 outputs. // //go:noescape @@ -1998,11 +2601,22 @@ func mulAvxTwo_8x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_8x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x9 takes 8 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x9_64Xor takes 8 inputs and produces 9 outputs. // //go:noescape func mulGFNI_8x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x9Xor takes 8 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_8x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_8x9Xor takes 8 inputs and produces 9 outputs. // //go:noescape @@ -2020,21 +2634,26 @@ func mulAvxTwo_8x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_8x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_8x10 takes 8 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_8x10_64Xor takes 8 inputs and produces 10 outputs. // //go:noescape func mulGFNI_8x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_8x10Xor takes 8 inputs and produces 10 outputs. +// mulAvxGFNI_8x10Xor takes 8 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_8x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_8x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_9x1 takes 9 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_8x10Xor takes 8 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_8x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_9x1_64 takes 9 inputs and produces 1 outputs. // The output is initialized to 0. @@ -2048,27 +2667,27 @@ func mulAvxTwo_9x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_9x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x1 takes 9 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x1_64Xor takes 9 inputs and produces 1 outputs. // //go:noescape func mulGFNI_9x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_9x1Xor takes 9 inputs and produces 1 outputs. +// mulAvxGFNI_9x1Xor takes 9 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_9x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_9x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_9x1_64Xor takes 9 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_9x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_9x2 takes 9 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_9x2_64 takes 9 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -2081,27 +2700,27 @@ func mulAvxTwo_9x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_9x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x2 takes 9 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x2_64Xor takes 9 inputs and produces 2 outputs. // //go:noescape func mulGFNI_9x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_9x2Xor takes 9 inputs and produces 2 outputs. +// mulAvxGFNI_9x2Xor takes 9 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_9x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_9x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_9x2_64Xor takes 9 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_9x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_9x3 takes 9 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_9x3_64 takes 9 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -2114,15 +2733,21 @@ func mulAvxTwo_9x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int //go:noescape func mulGFNI_9x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x3 takes 9 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x3_64Xor takes 9 inputs and produces 3 outputs. // //go:noescape func mulGFNI_9x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_9x3Xor takes 9 inputs and produces 3 outputs. +// mulAvxGFNI_9x3Xor takes 9 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_9x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_9x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_9x3_64Xor takes 9 inputs and produces 3 outputs. // @@ -2141,11 +2766,22 @@ func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_9x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x4 takes 9 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x4_64Xor takes 9 inputs and produces 4 outputs. // //go:noescape func mulGFNI_9x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x4Xor takes 9 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_9x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_9x4Xor takes 9 inputs and produces 4 outputs. // //go:noescape @@ -2163,11 +2799,22 @@ func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_9x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x5 takes 9 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x5_64Xor takes 9 inputs and produces 5 outputs. // //go:noescape func mulGFNI_9x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x5Xor takes 9 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_9x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_9x5Xor takes 9 inputs and produces 5 outputs. // //go:noescape @@ -2185,11 +2832,22 @@ func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_9x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x6 takes 9 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x6_64Xor takes 9 inputs and produces 6 outputs. // //go:noescape func mulGFNI_9x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x6Xor takes 9 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_9x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_9x6Xor takes 9 inputs and produces 6 outputs. // //go:noescape @@ -2207,11 +2865,22 @@ func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_9x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x7 takes 9 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x7_64Xor takes 9 inputs and produces 7 outputs. // //go:noescape func mulGFNI_9x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x7Xor takes 9 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_9x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_9x7Xor takes 9 inputs and produces 7 outputs. // //go:noescape @@ -2229,11 +2898,22 @@ func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_9x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x8 takes 9 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x8_64Xor takes 9 inputs and produces 8 outputs. // //go:noescape func mulGFNI_9x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x8Xor takes 9 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_9x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_9x8Xor takes 9 inputs and produces 8 outputs. // //go:noescape @@ -2251,11 +2931,22 @@ func mulAvxTwo_9x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_9x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x9 takes 9 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x9_64Xor takes 9 inputs and produces 9 outputs. // //go:noescape func mulGFNI_9x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x9Xor takes 9 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_9x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_9x9Xor takes 9 inputs and produces 9 outputs. // //go:noescape @@ -2273,21 +2964,26 @@ func mulAvxTwo_9x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_9x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_9x10 takes 9 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_9x10_64Xor takes 9 inputs and produces 10 outputs. // //go:noescape func mulGFNI_9x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_9x10Xor takes 9 inputs and produces 10 outputs. +// mulAvxGFNI_9x10Xor takes 9 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_9x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_9x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_10x1 takes 10 inputs and produces 1 outputs. -// The output is initialized to 0. +// mulAvxTwo_9x10Xor takes 9 inputs and produces 10 outputs. // //go:noescape -func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxTwo_9x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_10x1_64 takes 10 inputs and produces 1 outputs. // The output is initialized to 0. @@ -2301,27 +2997,27 @@ func mulAvxTwo_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n in //go:noescape func mulGFNI_10x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x1 takes 10 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x1_64Xor takes 10 inputs and produces 1 outputs. // //go:noescape func mulGFNI_10x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_10x1Xor takes 10 inputs and produces 1 outputs. +// mulAvxGFNI_10x1Xor takes 10 inputs and produces 1 outputs. // //go:noescape -func mulAvxTwo_10x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_10x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_10x1_64Xor takes 10 inputs and produces 1 outputs. // //go:noescape func mulAvxTwo_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_10x2 takes 10 inputs and produces 2 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_10x2_64 takes 10 inputs and produces 2 outputs. // The output is initialized to 0. // @@ -2334,27 +3030,27 @@ func mulAvxTwo_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n in //go:noescape func mulGFNI_10x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x2 takes 10 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x2_64Xor takes 10 inputs and produces 2 outputs. // //go:noescape func mulGFNI_10x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_10x2Xor takes 10 inputs and produces 2 outputs. +// mulAvxGFNI_10x2Xor takes 10 inputs and produces 2 outputs. // //go:noescape -func mulAvxTwo_10x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_10x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_10x2_64Xor takes 10 inputs and produces 2 outputs. // //go:noescape func mulAvxTwo_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_10x3 takes 10 inputs and produces 3 outputs. -// The output is initialized to 0. -// -//go:noescape -func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) - // mulAvxTwo_10x3_64 takes 10 inputs and produces 3 outputs. // The output is initialized to 0. // @@ -2367,15 +3063,21 @@ func mulAvxTwo_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n in //go:noescape func mulGFNI_10x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x3 takes 10 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x3_64Xor takes 10 inputs and produces 3 outputs. // //go:noescape func mulGFNI_10x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) -// mulAvxTwo_10x3Xor takes 10 inputs and produces 3 outputs. +// mulAvxGFNI_10x3Xor takes 10 inputs and produces 3 outputs. // //go:noescape -func mulAvxTwo_10x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +func mulAvxGFNI_10x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // mulAvxTwo_10x3_64Xor takes 10 inputs and produces 3 outputs. // @@ -2394,11 +3096,22 @@ func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_10x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x4 takes 10 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x4_64Xor takes 10 inputs and produces 4 outputs. // //go:noescape func mulGFNI_10x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x4Xor takes 10 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_10x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_10x4Xor takes 10 inputs and produces 4 outputs. // //go:noescape @@ -2416,11 +3129,22 @@ func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_10x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x5 takes 10 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x5_64Xor takes 10 inputs and produces 5 outputs. // //go:noescape func mulGFNI_10x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x5Xor takes 10 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_10x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_10x5Xor takes 10 inputs and produces 5 outputs. // //go:noescape @@ -2438,11 +3162,22 @@ func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_10x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x6 takes 10 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x6_64Xor takes 10 inputs and produces 6 outputs. // //go:noescape func mulGFNI_10x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x6Xor takes 10 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_10x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_10x6Xor takes 10 inputs and produces 6 outputs. // //go:noescape @@ -2460,11 +3195,22 @@ func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_10x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x7 takes 10 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x7_64Xor takes 10 inputs and produces 7 outputs. // //go:noescape func mulGFNI_10x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x7Xor takes 10 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_10x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_10x7Xor takes 10 inputs and produces 7 outputs. // //go:noescape @@ -2482,11 +3228,22 @@ func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_10x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x8 takes 10 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x8_64Xor takes 10 inputs and produces 8 outputs. // //go:noescape func mulGFNI_10x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x8Xor takes 10 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_10x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_10x8Xor takes 10 inputs and produces 8 outputs. // //go:noescape @@ -2504,11 +3261,22 @@ func mulAvxTwo_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_10x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x9 takes 10 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x9_64Xor takes 10 inputs and produces 9 outputs. // //go:noescape func mulGFNI_10x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x9Xor takes 10 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_10x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_10x9Xor takes 10 inputs and produces 9 outputs. // //go:noescape @@ -2526,11 +3294,22 @@ func mulAvxTwo_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) //go:noescape func mulGFNI_10x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x10 takes 10 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulGFNI_10x10_64Xor takes 10 inputs and produces 10 outputs. // //go:noescape func mulGFNI_10x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// mulAvxGFNI_10x10Xor takes 10 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_10x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + // mulAvxTwo_10x10Xor takes 10 inputs and produces 10 outputs. // //go:noescape diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s index 3a2acace..8ff74bf4 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s @@ -1,6 +1,6 @@ // Code generated by command: go run gen.go -out ../galois_gen_amd64.s -stubs ../galois_gen_amd64.go -pkg=reedsolomon. DO NOT EDIT. -//go:build !appengine && !noasm && !nogen && gc +//go:build !appengine && !noasm && !nogen && !nopshufb && gc #include "textflag.h" @@ -18,55 +18,96 @@ TEXT ·_dummy_(SB), $0 #endif RET -// func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, SSE2 -TEXT ·mulAvxTwo_1x1(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 6 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_1x1_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - MOVQ in_base+24(FP), CX - MOVQ (CX), CX - MOVQ out_base+48(FP), DX - MOVQ (DX), DX - MOVQ start+72(FP), BX +// sSE2XorSlice will XOR in with out and store in out. +// Processes 16 bytes/loop. - // Add start offset to output - ADDQ BX, DX +// func sSE2XorSlice(in []byte, out []byte) +// Requires: SSE2 +TEXT ·sSE2XorSlice(SB), $0-48 + MOVQ in_base+0(FP), AX + MOVQ out_base+24(FP), CX + MOVQ in_len+8(FP), DX + SHRQ $0x04, DX + JZ end - // Add start offset to input - ADDQ BX, CX - MOVQ $0x0000000f, BX - MOVQ BX, X3 - VPBROADCASTB X3, Y3 +loop: + MOVOU (AX), X0 + MOVOU (CX), X1 + PXOR X0, X1 + MOVOU X1, (CX) + ADDQ $0x10, AX + ADDQ $0x10, CX + DECQ DX + JNZ loop + +end: + RET + +// sSE2XorSlice_64 will XOR in with out and store in out. +// Processes 64 bytes/loop. + +// func sSE2XorSlice_64(in []byte, out []byte) +// Requires: SSE2 +TEXT ·sSE2XorSlice_64(SB), $0-48 + MOVQ in_base+0(FP), AX + MOVQ out_base+24(FP), CX + MOVQ in_len+8(FP), DX + SHRQ $0x06, DX + JZ end -mulAvxTwo_1x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (CX), Y2 - ADDQ $0x20, CX - VPSRLQ $0x04, Y2, Y4 - VPAND Y3, Y2, Y2 - VPAND Y3, Y4, Y4 - VPSHUFB Y2, Y0, Y2 - VPSHUFB Y4, Y1, Y4 - VPXOR Y2, Y4, Y2 +loop: + MOVOU (AX), X0 + MOVOU 16(AX), X2 + MOVOU 32(AX), X4 + MOVOU 48(AX), X6 + MOVOU (CX), X1 + MOVOU 16(CX), X3 + MOVOU 32(CX), X5 + MOVOU 48(CX), X7 + PXOR X0, X1 + PXOR X2, X3 + PXOR X4, X5 + PXOR X6, X7 + MOVOU X1, (CX) + MOVOU X3, 16(CX) + MOVOU X5, 32(CX) + MOVOU X7, 48(CX) + ADDQ $0x40, AX + ADDQ $0x40, CX + DECQ DX + JNZ loop + +end: + RET + +// avx2XorSlice_64 will XOR in with out and store in out. +// Processes 64 bytes/loop. + +// func avx2XorSlice_64(in []byte, out []byte) +// Requires: AVX, AVX2 +TEXT ·avx2XorSlice_64(SB), $0-48 + MOVQ in_base+0(FP), AX + MOVQ out_base+24(FP), CX + MOVQ in_len+8(FP), DX + SHRQ $0x06, DX + JZ end - // Store 1 outputs - VMOVDQU Y2, (DX) - ADDQ $0x20, DX +loop: + VMOVDQU (AX), Y0 + VMOVDQU 32(AX), Y2 + VMOVDQU (CX), Y1 + VMOVDQU 32(CX), Y3 + VPXOR Y0, Y1, Y1 + VPXOR Y2, Y3, Y3 + VMOVDQU Y1, (CX) + VMOVDQU Y3, 32(CX) + ADDQ $0x40, AX + ADDQ $0x40, CX + DECQ DX + JNZ loop - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_1x1_loop +end: VZEROUPPER - -mulAvxTwo_1x1_end: RET // func mulAvxTwo_1x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -85,7 +126,6 @@ TEXT ·mulAvxTwo_1x1_64(SB), $0-88 MOVQ in_base+24(FP), CX MOVQ (CX), CX MOVQ out_base+48(FP), DX - MOVQ out_base+48(FP), DX MOVQ (DX), DX MOVQ start+72(FP), BX @@ -172,6 +212,49 @@ mulGFNI_1x1_64_loop: mulGFNI_1x1_64_end: RET +// func mulAvxGFNI_1x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 4 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x1_end + VBROADCASTSD (CX), Y0 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ start+72(FP), BX + + // Add start offset to output + ADDQ BX, DX + + // Add start offset to input + ADDQ BX, CX + +mulAvxGFNI_1x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (CX), Y1 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y1, Y1 + + // Store 1 outputs + VMOVDQU Y1, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x1_loop + VZEROUPPER + +mulAvxGFNI_1x1_end: + RET + // func mulGFNI_1x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x1_64Xor(SB), $0-88 @@ -219,56 +302,51 @@ mulGFNI_1x1_64Xor_loop: mulGFNI_1x1_64Xor_end: RET -// func mulAvxTwo_1x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_1x1Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_1x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x1Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 6 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_1x1Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - MOVQ in_base+24(FP), CX - MOVQ (CX), CX - MOVQ out_base+48(FP), DX - MOVQ (DX), DX - MOVQ start+72(FP), BX + // Full registers estimated 4 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x1Xor_end + VBROADCASTSD (CX), Y0 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ start+72(FP), BX // Add start offset to output ADDQ BX, DX // Add start offset to input - ADDQ BX, CX - MOVQ $0x0000000f, BX - MOVQ BX, X3 - VPBROADCASTB X3, Y3 + ADDQ BX, CX + +mulAvxGFNI_1x1Xor_loop: + // Load 1 outputs + VMOVDQU (DX), Y1 -mulAvxTwo_1x1Xor_loop: // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (CX), Y4 - ADDQ $0x20, CX - VPSRLQ $0x04, Y4, Y5 - VPAND Y3, Y4, Y4 - VPAND Y3, Y5, Y5 - VMOVDQU (DX), Y2 - VPSHUFB Y4, Y0, Y4 - VPSHUFB Y5, Y1, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (CX), Y2 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y2, Y2 + VXORPD Y1, Y2, Y1 // Store 1 outputs - VMOVDQU Y2, (DX) + VMOVDQU Y1, (DX) ADDQ $0x20, DX // Prepare for next loop DECQ AX - JNZ mulAvxTwo_1x1Xor_loop + JNZ mulAvxGFNI_1x1Xor_loop VZEROUPPER -mulAvxTwo_1x1Xor_end: +mulAvxGFNI_1x1Xor_end: RET // func mulAvxTwo_1x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -287,7 +365,6 @@ TEXT ·mulAvxTwo_1x1_64Xor(SB), $0-88 MOVQ in_base+24(FP), CX MOVQ (CX), CX MOVQ out_base+48(FP), DX - MOVQ out_base+48(FP), DX MOVQ (DX), DX MOVQ start+72(FP), BX @@ -335,66 +412,6 @@ mulAvxTwo_1x1_64Xor_loop: mulAvxTwo_1x1_64Xor_end: RET -// func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, SSE2 -TEXT ·mulAvxTwo_1x2(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 11 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_1x2_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - MOVQ in_base+24(FP), CX - MOVQ (CX), CX - MOVQ out_base+48(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), DX - MOVQ start+72(FP), SI - - // Add start offset to output - ADDQ SI, BX - ADDQ SI, DX - - // Add start offset to input - ADDQ SI, CX - MOVQ $0x0000000f, SI - MOVQ SI, X6 - VPBROADCASTB X6, Y6 - -mulAvxTwo_1x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (CX), Y8 - ADDQ $0x20, CX - VPSRLQ $0x04, Y8, Y9 - VPAND Y6, Y8, Y8 - VPAND Y6, Y9, Y9 - VPSHUFB Y8, Y0, Y5 - VPSHUFB Y9, Y1, Y7 - VPXOR Y5, Y7, Y4 - VPSHUFB Y8, Y2, Y5 - VPSHUFB Y9, Y3, Y7 - VPXOR Y5, Y7, Y5 - - // Store 2 outputs - VMOVDQU Y4, (BX) - ADDQ $0x20, BX - VMOVDQU Y5, (DX) - ADDQ $0x20, DX - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_1x2_loop - VZEROUPPER - -mulAvxTwo_1x2_end: - RET - // func mulAvxTwo_1x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, SSE2 TEXT ·mulAvxTwo_1x2_64(SB), $0-88 @@ -409,7 +426,6 @@ TEXT ·mulAvxTwo_1x2_64(SB), $0-88 MOVQ in_base+24(FP), DX MOVQ (DX), DX MOVQ out_base+48(FP), BX - MOVQ out_base+48(FP), BX MOVQ (BX), SI MOVQ 24(BX), BX MOVQ start+72(FP), DI @@ -517,6 +533,55 @@ mulGFNI_1x2_64_loop: mulGFNI_1x2_64_end: RET +// func mulAvxGFNI_1x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + ADDQ SI, DX + + // Add start offset to input + ADDQ SI, CX + +mulAvxGFNI_1x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (CX), Y3 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y3, Y2 + VGF2P8AFFINEQB $0x00, Y1, Y3, Y3 + + // Store 2 outputs + VMOVDQU Y2, (BX) + ADDQ $0x20, BX + VMOVDQU Y3, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x2_loop + VZEROUPPER + +mulAvxGFNI_1x2_end: + RET + // func mulGFNI_1x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x2_64Xor(SB), $0-88 @@ -572,66 +637,59 @@ mulGFNI_1x2_64Xor_loop: mulGFNI_1x2_64Xor_end: RET -// func mulAvxTwo_1x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_1x2Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_1x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x2Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 11 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_1x2Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - MOVQ in_base+24(FP), CX - MOVQ (CX), CX - MOVQ out_base+48(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), DX - MOVQ start+72(FP), SI + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ start+72(FP), SI // Add start offset to output ADDQ SI, BX ADDQ SI, DX // Add start offset to input - ADDQ SI, CX - MOVQ $0x0000000f, SI - MOVQ SI, X6 - VPBROADCASTB X6, Y6 + ADDQ SI, CX + +mulAvxGFNI_1x2Xor_loop: + // Load 2 outputs + VMOVDQU (BX), Y2 + VMOVDQU (DX), Y3 -mulAvxTwo_1x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (CX), Y9 - ADDQ $0x20, CX - VPSRLQ $0x04, Y9, Y10 - VPAND Y6, Y9, Y9 - VPAND Y6, Y10, Y10 - VMOVDQU (BX), Y4 - VPSHUFB Y9, Y0, Y7 - VPSHUFB Y10, Y1, Y8 - XOR3WAY( $0x00, Y7, Y8, Y4) - VMOVDQU (DX), Y5 - VPSHUFB Y9, Y2, Y7 - VPSHUFB Y10, Y3, Y8 - XOR3WAY( $0x00, Y7, Y8, Y5) + VMOVDQU (CX), Y4 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y4, Y5 + VXORPD Y2, Y5, Y2 + VGF2P8AFFINEQB $0x00, Y1, Y4, Y5 + VXORPD Y3, Y5, Y3 // Store 2 outputs - VMOVDQU Y4, (BX) + VMOVDQU Y2, (BX) ADDQ $0x20, BX - VMOVDQU Y5, (DX) + VMOVDQU Y3, (DX) ADDQ $0x20, DX // Prepare for next loop DECQ AX - JNZ mulAvxTwo_1x2Xor_loop + JNZ mulAvxGFNI_1x2Xor_loop VZEROUPPER -mulAvxTwo_1x2Xor_end: +mulAvxGFNI_1x2Xor_end: RET // func mulAvxTwo_1x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -648,7 +706,6 @@ TEXT ·mulAvxTwo_1x2_64Xor(SB), $0-88 MOVQ in_base+24(FP), DX MOVQ (DX), DX MOVQ out_base+48(FP), BX - MOVQ out_base+48(FP), BX MOVQ (BX), SI MOVQ 24(BX), BX MOVQ start+72(FP), DI @@ -713,75 +770,6 @@ mulAvxTwo_1x2_64Xor_loop: mulAvxTwo_1x2_64Xor_end: RET -// func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, SSE2 -TEXT ·mulAvxTwo_1x3(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 14 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_1x3_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - MOVQ in_base+24(FP), CX - MOVQ (CX), CX - MOVQ out_base+48(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DX - MOVQ start+72(FP), DI - - // Add start offset to output - ADDQ DI, BX - ADDQ DI, SI - ADDQ DI, DX - - // Add start offset to input - ADDQ DI, CX - MOVQ $0x0000000f, DI - MOVQ DI, X9 - VPBROADCASTB X9, Y9 - -mulAvxTwo_1x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (CX), Y11 - ADDQ $0x20, CX - VPSRLQ $0x04, Y11, Y12 - VPAND Y9, Y11, Y11 - VPAND Y9, Y12, Y12 - VPSHUFB Y11, Y0, Y8 - VPSHUFB Y12, Y1, Y10 - VPXOR Y8, Y10, Y6 - VPSHUFB Y11, Y2, Y8 - VPSHUFB Y12, Y3, Y10 - VPXOR Y8, Y10, Y7 - VPSHUFB Y11, Y4, Y8 - VPSHUFB Y12, Y5, Y10 - VPXOR Y8, Y10, Y8 - - // Store 3 outputs - VMOVDQU Y6, (BX) - ADDQ $0x20, BX - VMOVDQU Y7, (SI) - ADDQ $0x20, SI - VMOVDQU Y8, (DX) - ADDQ $0x20, DX - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_1x3_loop - VZEROUPPER - -mulAvxTwo_1x3_end: - RET - // func mulAvxTwo_1x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, SSE2 TEXT ·mulAvxTwo_1x3_64(SB), $0-88 @@ -796,7 +784,6 @@ TEXT ·mulAvxTwo_1x3_64(SB), $0-88 MOVQ in_base+24(FP), DX MOVQ (DX), DX MOVQ out_base+48(FP), BX - MOVQ out_base+48(FP), BX MOVQ (BX), SI MOVQ 24(BX), DI MOVQ 48(BX), BX @@ -923,6 +910,61 @@ mulGFNI_1x3_64_loop: mulGFNI_1x3_64_end: RET +// func mulAvxGFNI_1x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x3(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, BX + ADDQ DI, SI + ADDQ DI, DX + + // Add start offset to input + ADDQ DI, CX + +mulAvxGFNI_1x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (CX), Y5 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y5, Y3 + VGF2P8AFFINEQB $0x00, Y1, Y5, Y4 + VGF2P8AFFINEQB $0x00, Y2, Y5, Y5 + + // Store 3 outputs + VMOVDQU Y3, (BX) + ADDQ $0x20, BX + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x3_loop + VZEROUPPER + +mulAvxGFNI_1x3_end: + RET + // func mulGFNI_1x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x3_64Xor(SB), $0-88 @@ -986,30 +1028,28 @@ mulGFNI_1x3_64Xor_loop: mulGFNI_1x3_64Xor_end: RET -// func mulAvxTwo_1x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_1x3Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_1x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x3Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 14 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_1x3Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - MOVQ in_base+24(FP), CX - MOVQ (CX), CX - MOVQ out_base+48(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DX - MOVQ start+72(FP), DI + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ start+72(FP), DI // Add start offset to output ADDQ DI, BX @@ -1017,45 +1057,38 @@ TEXT ·mulAvxTwo_1x3Xor(SB), NOSPLIT, $0-88 ADDQ DI, DX // Add start offset to input - ADDQ DI, CX - MOVQ $0x0000000f, DI - MOVQ DI, X9 - VPBROADCASTB X9, Y9 + ADDQ DI, CX + +mulAvxGFNI_1x3Xor_loop: + // Load 3 outputs + VMOVDQU (BX), Y3 + VMOVDQU (SI), Y4 + VMOVDQU (DX), Y5 -mulAvxTwo_1x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (CX), Y12 - ADDQ $0x20, CX - VPSRLQ $0x04, Y12, Y13 - VPAND Y9, Y12, Y12 - VPAND Y9, Y13, Y13 - VMOVDQU (BX), Y6 - VPSHUFB Y12, Y0, Y10 - VPSHUFB Y13, Y1, Y11 - XOR3WAY( $0x00, Y10, Y11, Y6) - VMOVDQU (SI), Y7 - VPSHUFB Y12, Y2, Y10 - VPSHUFB Y13, Y3, Y11 - XOR3WAY( $0x00, Y10, Y11, Y7) - VMOVDQU (DX), Y8 - VPSHUFB Y12, Y4, Y10 - VPSHUFB Y13, Y5, Y11 - XOR3WAY( $0x00, Y10, Y11, Y8) + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y7 + VXORPD Y3, Y7, Y3 + VGF2P8AFFINEQB $0x00, Y1, Y6, Y7 + VXORPD Y4, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y2, Y6, Y7 + VXORPD Y5, Y7, Y5 // Store 3 outputs - VMOVDQU Y6, (BX) + VMOVDQU Y3, (BX) ADDQ $0x20, BX - VMOVDQU Y7, (SI) + VMOVDQU Y4, (SI) ADDQ $0x20, SI - VMOVDQU Y8, (DX) + VMOVDQU Y5, (DX) ADDQ $0x20, DX // Prepare for next loop DECQ AX - JNZ mulAvxTwo_1x3Xor_loop + JNZ mulAvxGFNI_1x3Xor_loop VZEROUPPER -mulAvxTwo_1x3Xor_end: +mulAvxGFNI_1x3Xor_end: RET // func mulAvxTwo_1x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -1072,7 +1105,6 @@ TEXT ·mulAvxTwo_1x3_64Xor(SB), $0-88 MOVQ in_base+24(FP), DX MOVQ (DX), DX MOVQ out_base+48(FP), BX - MOVQ out_base+48(FP), BX MOVQ (BX), SI MOVQ 24(BX), DI MOVQ 48(BX), BX @@ -1291,6 +1323,67 @@ mulGFNI_1x4_64_loop: mulGFNI_1x4_64_end: RET +// func mulAvxGFNI_1x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x4(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, DX + + // Add start offset to input + ADDQ R8, CX + +mulAvxGFNI_1x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (CX), Y7 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y7, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y7, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y7, Y7 + + // Store 4 outputs + VMOVDQU Y4, (BX) + ADDQ $0x20, BX + VMOVDQU Y5, (SI) + ADDQ $0x20, SI + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x4_loop + VZEROUPPER + +mulAvxGFNI_1x4_end: + RET + // func mulGFNI_1x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x4_64Xor(SB), $0-88 @@ -1362,6 +1455,77 @@ mulGFNI_1x4_64Xor_loop: mulGFNI_1x4_64Xor_end: RET +// func mulAvxGFNI_1x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x4Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, DX + + // Add start offset to input + ADDQ R8, CX + +mulAvxGFNI_1x4Xor_loop: + // Load 4 outputs + VMOVDQU (BX), Y4 + VMOVDQU (SI), Y5 + VMOVDQU (DI), Y6 + VMOVDQU (DX), Y7 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y9 + VXORPD Y4, Y9, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y8, Y9 + VXORPD Y5, Y9, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Store 4 outputs + VMOVDQU Y4, (BX) + ADDQ $0x20, BX + VMOVDQU Y5, (SI) + ADDQ $0x20, SI + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x4Xor_loop + VZEROUPPER + +mulAvxGFNI_1x4Xor_end: + RET + // func mulAvxTwo_1x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_1x4Xor(SB), NOSPLIT, $0-88 @@ -1598,6 +1762,73 @@ mulGFNI_1x5_64_loop: mulGFNI_1x5_64_end: RET +// func mulAvxGFNI_1x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x5(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, DX + + // Add start offset to input + ADDQ R9, CX + +mulAvxGFNI_1x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y9, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y9, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y9 + + // Store 5 outputs + VMOVDQU Y5, (BX) + ADDQ $0x20, BX + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x5_loop + VZEROUPPER + +mulAvxGFNI_1x5_end: + RET + // func mulGFNI_1x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x5_64Xor(SB), $0-88 @@ -1677,6 +1908,85 @@ mulGFNI_1x5_64Xor_loop: mulGFNI_1x5_64Xor_end: RET +// func mulAvxGFNI_1x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x5Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, DX + + // Add start offset to input + ADDQ R9, CX + +mulAvxGFNI_1x5Xor_loop: + // Load 5 outputs + VMOVDQU (BX), Y5 + VMOVDQU (SI), Y6 + VMOVDQU (DI), Y7 + VMOVDQU (R8), Y8 + VMOVDQU (DX), Y9 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y11 + VXORPD Y5, Y11, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y10, Y11 + VXORPD Y6, Y11, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y10, Y11 + VXORPD Y7, Y11, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Store 5 outputs + VMOVDQU Y5, (BX) + ADDQ $0x20, BX + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x5Xor_loop + VZEROUPPER + +mulAvxGFNI_1x5Xor_end: + RET + // func mulAvxTwo_1x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_1x5Xor(SB), NOSPLIT, $0-88 @@ -1938,6 +2248,79 @@ mulGFNI_1x6_64_loop: mulGFNI_1x6_64_end: RET +// func mulAvxGFNI_1x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x6(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, DX + + // Add start offset to input + ADDQ R10, CX + +mulAvxGFNI_1x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (CX), Y11 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y11, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y11, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y11, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y11, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y11, Y11 + + // Store 6 outputs + VMOVDQU Y6, (BX) + ADDQ $0x20, BX + VMOVDQU Y7, (SI) + ADDQ $0x20, SI + VMOVDQU Y8, (DI) + ADDQ $0x20, DI + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x6_loop + VZEROUPPER + +mulAvxGFNI_1x6_end: + RET + // func mulGFNI_1x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x6_64Xor(SB), $0-88 @@ -2025,6 +2408,93 @@ mulGFNI_1x6_64Xor_loop: mulGFNI_1x6_64Xor_end: RET +// func mulAvxGFNI_1x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x6Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, DX + + // Add start offset to input + ADDQ R10, CX + +mulAvxGFNI_1x6Xor_loop: + // Load 6 outputs + VMOVDQU (BX), Y6 + VMOVDQU (SI), Y7 + VMOVDQU (DI), Y8 + VMOVDQU (R8), Y9 + VMOVDQU (R9), Y10 + VMOVDQU (DX), Y11 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y13 + VXORPD Y6, Y13, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y13 + VXORPD Y7, Y13, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y8, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 6 outputs + VMOVDQU Y6, (BX) + ADDQ $0x20, BX + VMOVDQU Y7, (SI) + ADDQ $0x20, SI + VMOVDQU Y8, (DI) + ADDQ $0x20, DI + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x6Xor_loop + VZEROUPPER + +mulAvxGFNI_1x6Xor_end: + RET + // func mulAvxTwo_1x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_1x6Xor(SB), NOSPLIT, $0-88 @@ -2311,6 +2781,85 @@ mulGFNI_1x7_64_loop: mulGFNI_1x7_64_end: RET +// func mulAvxGFNI_1x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x7(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DX + + // Add start offset to input + ADDQ R11, CX + +mulAvxGFNI_1x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (CX), Y13 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y13, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y13, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y13, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y13, Y13 + + // Store 7 outputs + VMOVDQU Y7, (BX) + ADDQ $0x20, BX + VMOVDQU Y8, (SI) + ADDQ $0x20, SI + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x7_loop + VZEROUPPER + +mulAvxGFNI_1x7_end: + RET + // func mulGFNI_1x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x7_64Xor(SB), $0-88 @@ -2406,6 +2955,101 @@ mulGFNI_1x7_64Xor_loop: mulGFNI_1x7_64Xor_end: RET +// func mulAvxGFNI_1x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x7Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DX + + // Add start offset to input + ADDQ R11, CX + +mulAvxGFNI_1x7Xor_loop: + // Load 7 outputs + VMOVDQU (BX), Y7 + VMOVDQU (SI), Y8 + VMOVDQU (DI), Y9 + VMOVDQU (R8), Y10 + VMOVDQU (R9), Y11 + VMOVDQU (R10), Y12 + VMOVDQU (DX), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (CX), Y14 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (BX) + ADDQ $0x20, BX + VMOVDQU Y8, (SI) + ADDQ $0x20, SI + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x7Xor_loop + VZEROUPPER + +mulAvxGFNI_1x7Xor_end: + RET + // func mulAvxTwo_1x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_1x7Xor(SB), NOSPLIT, $0-88 @@ -2717,6 +3361,91 @@ mulGFNI_1x8_64_loop: mulGFNI_1x8_64_end: RET +// func mulAvxGFNI_1x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x8(SB), $0-88 + // Loading 6 of 8 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), BX + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, BX + + // Add start offset to input + ADDQ R13, DX + +mulAvxGFNI_1x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (DX), Y13 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y13, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y13, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y13, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y13, Y12 + VBROADCASTSD 56(CX), Y14 + VGF2P8AFFINEQB $0x00, Y14, Y13, Y13 + + // Store 8 outputs + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x8_loop + VZEROUPPER + +mulAvxGFNI_1x8_end: + RET + // func mulGFNI_1x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x8_64Xor(SB), $0-88 @@ -2820,6 +3549,109 @@ mulGFNI_1x8_64Xor_loop: mulGFNI_1x8_64Xor_end: RET +// func mulAvxGFNI_1x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x8Xor(SB), $0-88 + // Loading 6 of 8 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), BX + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, BX + + // Add start offset to input + ADDQ R13, DX + +mulAvxGFNI_1x8Xor_loop: + // Load 8 outputs + VMOVDQU (SI), Y6 + VMOVDQU (DI), Y7 + VMOVDQU (R8), Y8 + VMOVDQU (R9), Y9 + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (BX), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x8Xor_loop + VZEROUPPER + +mulAvxGFNI_1x8Xor_end: + RET + // func mulAvxTwo_1x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_1x8Xor(SB), NOSPLIT, $0-88 @@ -3156,6 +3988,97 @@ mulGFNI_1x9_64_loop: mulGFNI_1x9_64_end: RET +// func mulAvxGFNI_1x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x9(SB), $0-88 + // Loading 5 of 9 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), BX + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, BX + + // Add start offset to input + ADDQ R14, DX + +mulAvxGFNI_1x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (DX), Y13 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y13, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y13, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y13, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y13, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y13, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y13, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y13, Y12 + VBROADCASTSD 64(CX), Y14 + VGF2P8AFFINEQB $0x00, Y14, Y13, Y13 + + // Store 9 outputs + VMOVDQU Y5, (SI) + ADDQ $0x20, SI + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x9_loop + VZEROUPPER + +mulAvxGFNI_1x9_end: + RET + // func mulGFNI_1x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x9_64Xor(SB), $0-88 @@ -3267,6 +4190,117 @@ mulGFNI_1x9_64Xor_loop: mulGFNI_1x9_64Xor_end: RET +// func mulAvxGFNI_1x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x9Xor(SB), $0-88 + // Loading 5 of 9 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), BX + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, BX + + // Add start offset to input + ADDQ R14, DX + +mulAvxGFNI_1x9Xor_loop: + // Load 9 outputs + VMOVDQU (SI), Y5 + VMOVDQU (DI), Y6 + VMOVDQU (R8), Y7 + VMOVDQU (R9), Y8 + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (BX), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (SI) + ADDQ $0x20, SI + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x9Xor_loop + VZEROUPPER + +mulAvxGFNI_1x9Xor_end: + RET + // func mulAvxTwo_1x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_1x9Xor(SB), NOSPLIT, $0-88 @@ -3628,6 +4662,103 @@ mulGFNI_1x10_64_loop: mulGFNI_1x10_64_end: RET +// func mulAvxGFNI_1x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x10(SB), $0-88 + // Loading 4 of 10 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), R14 + MOVQ 216(BX), BX + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, BX + + // Add start offset to input + ADDQ R15, DX + +mulAvxGFNI_1x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (DX), Y13 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y13, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y13, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y13, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y13, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y13, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y13, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y13, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y13, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y13, Y12 + VBROADCASTSD 72(CX), Y14 + VGF2P8AFFINEQB $0x00, Y14, Y13, Y13 + + // Store 10 outputs + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (DI) + ADDQ $0x20, DI + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x10_loop + VZEROUPPER + +mulAvxGFNI_1x10_end: + RET + // func mulGFNI_1x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_1x10_64Xor(SB), $0-88 @@ -3747,6 +4878,125 @@ mulGFNI_1x10_64Xor_loop: mulGFNI_1x10_64Xor_end: RET +// func mulAvxGFNI_1x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x10Xor(SB), $0-88 + // Loading 4 of 10 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), R14 + MOVQ 216(BX), BX + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, BX + + // Add start offset to input + ADDQ R15, DX + +mulAvxGFNI_1x10Xor_loop: + // Load 10 outputs + VMOVDQU (SI), Y4 + VMOVDQU (DI), Y5 + VMOVDQU (R8), Y6 + VMOVDQU (R9), Y7 + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (BX), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (DI) + ADDQ $0x20, DI + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x10Xor_loop + VZEROUPPER + +mulAvxGFNI_1x10Xor_end: + RET + // func mulAvxTwo_1x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_1x10Xor(SB), NOSPLIT, $0-88 @@ -3889,71 +5139,6 @@ mulAvxTwo_1x10Xor_loop: mulAvxTwo_1x10Xor_end: RET -// func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_2x1(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 8 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_2x1_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), CX - MOVQ out_base+48(FP), BX - MOVQ (BX), BX - MOVQ start+72(FP), SI - - // Add start offset to output - ADDQ SI, BX - - // Add start offset to input - ADDQ SI, DX - ADDQ SI, CX - MOVQ $0x0000000f, SI - MOVQ SI, X5 - VPBROADCASTB X5, Y5 - -mulAvxTwo_2x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y5, Y6, Y6 - VPAND Y5, Y7, Y7 - VPSHUFB Y6, Y0, Y6 - VPSHUFB Y7, Y1, Y7 - VPXOR Y6, Y7, Y4 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (CX), Y6 - ADDQ $0x20, CX - VPSRLQ $0x04, Y6, Y7 - VPAND Y5, Y6, Y6 - VPAND Y5, Y7, Y7 - VPSHUFB Y6, Y2, Y6 - VPSHUFB Y7, Y3, Y7 - XOR3WAY( $0x00, Y6, Y7, Y4) - - // Store 1 outputs - VMOVDQU Y4, (BX) - ADDQ $0x20, BX - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_2x1_loop - VZEROUPPER - -mulAvxTwo_2x1_end: - RET - // func mulAvxTwo_2x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x1_64(SB), $0-88 @@ -3973,7 +5158,6 @@ TEXT ·mulAvxTwo_2x1_64(SB), $0-88 MOVQ (CX), DX MOVQ 24(CX), CX MOVQ out_base+48(FP), BX - MOVQ out_base+48(FP), BX MOVQ (BX), BX MOVQ start+72(FP), SI @@ -4087,6 +5271,58 @@ mulGFNI_2x1_64_loop: mulGFNI_2x1_64_end: RET +// func mulAvxGFNI_2x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 5 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), BX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + + // Add start offset to input + ADDQ SI, DX + ADDQ SI, CX + +mulAvxGFNI_2x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y3 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y3, Y2 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (CX), Y3 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y1, Y3, Y3 + VXORPD Y2, Y3, Y2 + + // Store 1 outputs + VMOVDQU Y2, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x1_loop + VZEROUPPER + +mulAvxGFNI_2x1_end: + RET + // func mulGFNI_2x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x1_64Xor(SB), $0-88 @@ -4143,70 +5379,60 @@ mulGFNI_2x1_64Xor_loop: mulGFNI_2x1_64Xor_end: RET -// func mulAvxTwo_2x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_2x1Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_2x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x1Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 8 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_2x1Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), CX - MOVQ out_base+48(FP), BX - MOVQ (BX), BX - MOVQ start+72(FP), SI + // Full registers estimated 5 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), BX + MOVQ start+72(FP), SI // Add start offset to output ADDQ SI, BX // Add start offset to input - ADDQ SI, DX - ADDQ SI, CX - MOVQ $0x0000000f, SI - MOVQ SI, X5 - VPBROADCASTB X5, Y5 + ADDQ SI, DX + ADDQ SI, CX + +mulAvxGFNI_2x1Xor_loop: + // Load 1 outputs + VMOVDQU (BX), Y2 -mulAvxTwo_2x1Xor_loop: // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y5, Y6, Y6 - VPAND Y5, Y7, Y7 - VMOVDQU (BX), Y4 - VPSHUFB Y6, Y0, Y6 - VPSHUFB Y7, Y1, Y7 - XOR3WAY( $0x00, Y6, Y7, Y4) + VMOVDQU (DX), Y3 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y3, Y3 + VXORPD Y2, Y3, Y2 // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (CX), Y6 - ADDQ $0x20, CX - VPSRLQ $0x04, Y6, Y7 - VPAND Y5, Y6, Y6 - VPAND Y5, Y7, Y7 - VPSHUFB Y6, Y2, Y6 - VPSHUFB Y7, Y3, Y7 - XOR3WAY( $0x00, Y6, Y7, Y4) + VMOVDQU (CX), Y3 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y1, Y3, Y3 + VXORPD Y2, Y3, Y2 // Store 1 outputs - VMOVDQU Y4, (BX) + VMOVDQU Y2, (BX) ADDQ $0x20, BX // Prepare for next loop DECQ AX - JNZ mulAvxTwo_2x1Xor_loop + JNZ mulAvxGFNI_2x1Xor_loop VZEROUPPER -mulAvxTwo_2x1Xor_end: +mulAvxGFNI_2x1Xor_end: RET // func mulAvxTwo_2x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -4228,7 +5454,6 @@ TEXT ·mulAvxTwo_2x1_64Xor(SB), $0-88 MOVQ (CX), DX MOVQ 24(CX), CX MOVQ out_base+48(FP), BX - MOVQ out_base+48(FP), BX MOVQ (BX), BX MOVQ start+72(FP), SI @@ -4294,85 +5519,6 @@ mulAvxTwo_2x1_64Xor_loop: mulAvxTwo_2x1_64Xor_end: RET -// func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_2x2(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 15 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_2x2_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VMOVDQU 192(CX), Y6 - VMOVDQU 224(CX), Y7 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), CX - MOVQ out_base+48(FP), BX - MOVQ (BX), SI - MOVQ 24(BX), BX - MOVQ start+72(FP), DI - - // Add start offset to output - ADDQ DI, SI - ADDQ DI, BX - - // Add start offset to input - ADDQ DI, DX - ADDQ DI, CX - MOVQ $0x0000000f, DI - MOVQ DI, X10 - VPBROADCASTB X10, Y10 - -mulAvxTwo_2x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (DX), Y13 - ADDQ $0x20, DX - VPSRLQ $0x04, Y13, Y14 - VPAND Y10, Y13, Y13 - VPAND Y10, Y14, Y14 - VPSHUFB Y13, Y0, Y11 - VPSHUFB Y14, Y1, Y12 - VPXOR Y11, Y12, Y8 - VPSHUFB Y13, Y2, Y11 - VPSHUFB Y14, Y3, Y12 - VPXOR Y11, Y12, Y9 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (CX), Y13 - ADDQ $0x20, CX - VPSRLQ $0x04, Y13, Y14 - VPAND Y10, Y13, Y13 - VPAND Y10, Y14, Y14 - VPSHUFB Y13, Y4, Y11 - VPSHUFB Y14, Y5, Y12 - XOR3WAY( $0x00, Y11, Y12, Y8) - VPSHUFB Y13, Y6, Y11 - VPSHUFB Y14, Y7, Y12 - XOR3WAY( $0x00, Y11, Y12, Y9) - - // Store 2 outputs - VMOVDQU Y8, (SI) - ADDQ $0x20, SI - VMOVDQU Y9, (BX) - ADDQ $0x20, BX - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_2x2_loop - VZEROUPPER - -mulAvxTwo_2x2_end: - RET - // func mulAvxTwo_2x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x2_64(SB), $0-88 @@ -4388,7 +5534,6 @@ TEXT ·mulAvxTwo_2x2_64(SB), $0-88 MOVQ (DX), BX MOVQ 24(DX), DX MOVQ out_base+48(FP), SI - MOVQ out_base+48(FP), SI MOVQ (SI), DI MOVQ 24(SI), SI MOVQ start+72(FP), R8 @@ -4536,6 +5681,67 @@ mulGFNI_2x2_64_loop: mulGFNI_2x2_64_end: RET +// func mulAvxGFNI_2x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), BX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + ADDQ DI, BX + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, CX + +mulAvxGFNI_2x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y6 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y6, Y5 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y2, Y6, Y7 + VXORPD Y4, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y3, Y6, Y7 + VXORPD Y5, Y7, Y5 + + // Store 2 outputs + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x2_loop + VZEROUPPER + +mulAvxGFNI_2x2_end: + RET + // func mulGFNI_2x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x2_64Xor(SB), $0-88 @@ -4603,85 +5809,71 @@ mulGFNI_2x2_64Xor_loop: mulGFNI_2x2_64Xor_end: RET -// func mulAvxTwo_2x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_2x2Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_2x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x2Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 15 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_2x2Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VMOVDQU 192(CX), Y6 - VMOVDQU 224(CX), Y7 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), CX - MOVQ out_base+48(FP), BX - MOVQ (BX), SI - MOVQ 24(BX), BX - MOVQ start+72(FP), DI + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), BX + MOVQ start+72(FP), DI // Add start offset to output ADDQ DI, SI ADDQ DI, BX // Add start offset to input - ADDQ DI, DX - ADDQ DI, CX - MOVQ $0x0000000f, DI - MOVQ DI, X10 - VPBROADCASTB X10, Y10 + ADDQ DI, DX + ADDQ DI, CX + +mulAvxGFNI_2x2Xor_loop: + // Load 2 outputs + VMOVDQU (SI), Y4 + VMOVDQU (BX), Y5 -mulAvxTwo_2x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (DX), Y13 - ADDQ $0x20, DX - VPSRLQ $0x04, Y13, Y14 - VPAND Y10, Y13, Y13 - VPAND Y10, Y14, Y14 - VMOVDQU (SI), Y8 - VPSHUFB Y13, Y0, Y11 - VPSHUFB Y14, Y1, Y12 - XOR3WAY( $0x00, Y11, Y12, Y8) - VMOVDQU (BX), Y9 - VPSHUFB Y13, Y2, Y11 - VPSHUFB Y14, Y3, Y12 - XOR3WAY( $0x00, Y11, Y12, Y9) + VMOVDQU (DX), Y6 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y7 + VXORPD Y4, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y6, Y7 + VXORPD Y5, Y7, Y5 // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (CX), Y13 - ADDQ $0x20, CX - VPSRLQ $0x04, Y13, Y14 - VPAND Y10, Y13, Y13 - VPAND Y10, Y14, Y14 - VPSHUFB Y13, Y4, Y11 - VPSHUFB Y14, Y5, Y12 - XOR3WAY( $0x00, Y11, Y12, Y8) - VPSHUFB Y13, Y6, Y11 - VPSHUFB Y14, Y7, Y12 - XOR3WAY( $0x00, Y11, Y12, Y9) + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y2, Y6, Y7 + VXORPD Y4, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y3, Y6, Y7 + VXORPD Y5, Y7, Y5 // Store 2 outputs - VMOVDQU Y8, (SI) + VMOVDQU Y4, (SI) ADDQ $0x20, SI - VMOVDQU Y9, (BX) + VMOVDQU Y5, (BX) ADDQ $0x20, BX // Prepare for next loop DECQ AX - JNZ mulAvxTwo_2x2Xor_loop + JNZ mulAvxGFNI_2x2Xor_loop VZEROUPPER -mulAvxTwo_2x2Xor_end: +mulAvxGFNI_2x2Xor_end: RET // func mulAvxTwo_2x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -4699,7 +5891,6 @@ TEXT ·mulAvxTwo_2x2_64Xor(SB), $0-88 MOVQ (DX), BX MOVQ 24(DX), DX MOVQ out_base+48(FP), SI - MOVQ out_base+48(FP), SI MOVQ (SI), DI MOVQ 24(SI), SI MOVQ start+72(FP), R8 @@ -4792,99 +5983,6 @@ mulAvxTwo_2x2_64Xor_loop: mulAvxTwo_2x2_64Xor_end: RET -// func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_2x3(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 20 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_2x3_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), DX - MOVQ out_base+48(FP), SI - MOVQ (SI), DI - MOVQ 24(SI), R8 - MOVQ 48(SI), SI - MOVQ start+72(FP), R9 - - // Add start offset to output - ADDQ R9, DI - ADDQ R9, R8 - ADDQ R9, SI - - // Add start offset to input - ADDQ R9, BX - ADDQ R9, DX - MOVQ $0x0000000f, R9 - MOVQ R9, X3 - VPBROADCASTB X3, Y3 - -mulAvxTwo_2x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (DI) - ADDQ $0x20, DI - VMOVDQU Y1, (R8) - ADDQ $0x20, R8 - VMOVDQU Y2, (SI) - ADDQ $0x20, SI - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_2x3_loop - VZEROUPPER - -mulAvxTwo_2x3_end: - RET - // func mulAvxTwo_2x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x3_64(SB), $0-88 @@ -4900,7 +5998,6 @@ TEXT ·mulAvxTwo_2x3_64(SB), $0-88 MOVQ (DX), BX MOVQ 24(DX), DX MOVQ out_base+48(FP), SI - MOVQ out_base+48(FP), SI MOVQ (SI), DI MOVQ 24(SI), R8 MOVQ 48(SI), SI @@ -5078,6 +6175,76 @@ mulGFNI_2x3_64_loop: mulGFNI_2x3_64_end: RET +// func mulAvxGFNI_2x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x3(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), BX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, BX + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, CX + +mulAvxGFNI_2x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y9 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y9, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y9, Y8 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y3, Y9, Y10 + VXORPD Y6, Y10, Y6 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y10 + VXORPD Y7, Y10, Y7 + VGF2P8AFFINEQB $0x00, Y5, Y9, Y10 + VXORPD Y8, Y10, Y8 + + // Store 3 outputs + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x3_loop + VZEROUPPER + +mulAvxGFNI_2x3_end: + RET + // func mulGFNI_2x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x3_64Xor(SB), $0-88 @@ -5156,100 +6323,82 @@ mulGFNI_2x3_64Xor_loop: mulGFNI_2x3_64Xor_end: RET -// func mulAvxTwo_2x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_2x3Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_2x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x3Xor(SB), $0-88 + // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 20 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_2x3Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), DX - MOVQ out_base+48(FP), SI - MOVQ (SI), DI - MOVQ 24(SI), R8 - MOVQ 48(SI), SI - MOVQ start+72(FP), R9 + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), BX + MOVQ start+72(FP), R8 // Add start offset to output - ADDQ R9, DI - ADDQ R9, R8 - ADDQ R9, SI + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, BX // Add start offset to input - ADDQ R9, BX - ADDQ R9, DX - MOVQ $0x0000000f, R9 - MOVQ R9, X3 - VPBROADCASTB X3, Y3 + ADDQ R8, DX + ADDQ R8, CX + +mulAvxGFNI_2x3Xor_loop: + // Load 3 outputs + VMOVDQU (SI), Y6 + VMOVDQU (DI), Y7 + VMOVDQU (BX), Y8 -mulAvxTwo_2x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (DI), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R8), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (SI), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y9 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y10 + VXORPD Y6, Y10, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y9, Y10 + VXORPD Y7, Y10, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y9, Y10 + VXORPD Y8, Y10, Y8 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y3, Y9, Y10 + VXORPD Y6, Y10, Y6 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y10 + VXORPD Y7, Y10, Y7 + VGF2P8AFFINEQB $0x00, Y5, Y9, Y10 + VXORPD Y8, Y10, Y8 // Store 3 outputs - VMOVDQU Y0, (DI) - ADDQ $0x20, DI - VMOVDQU Y1, (R8) - ADDQ $0x20, R8 - VMOVDQU Y2, (SI) + VMOVDQU Y6, (SI) ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (BX) + ADDQ $0x20, BX // Prepare for next loop DECQ AX - JNZ mulAvxTwo_2x3Xor_loop + JNZ mulAvxGFNI_2x3Xor_loop VZEROUPPER -mulAvxTwo_2x3Xor_end: +mulAvxGFNI_2x3Xor_end: RET // func mulAvxTwo_2x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -5267,7 +6416,6 @@ TEXT ·mulAvxTwo_2x3_64Xor(SB), $0-88 MOVQ (DX), BX MOVQ 24(DX), DX MOVQ out_base+48(FP), SI - MOVQ out_base+48(FP), SI MOVQ (SI), DI MOVQ 24(SI), R8 MOVQ 48(SI), SI @@ -5569,6 +6717,85 @@ mulGFNI_2x4_64_loop: mulGFNI_2x4_64_end: RET +// func mulAvxGFNI_2x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x4(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), BX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, BX + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, CX + +mulAvxGFNI_2x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y11 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y8, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 4 outputs + VMOVDQU Y8, (SI) + ADDQ $0x20, SI + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x4_loop + VZEROUPPER + +mulAvxGFNI_2x4_end: + RET + // func mulGFNI_2x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x4_64Xor(SB), $0-88 @@ -5658,6 +6885,95 @@ mulGFNI_2x4_64Xor_loop: mulGFNI_2x4_64Xor_end: RET +// func mulAvxGFNI_2x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x4Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), BX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, BX + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, CX + +mulAvxGFNI_2x4Xor_loop: + // Load 4 outputs + VMOVDQU (SI), Y8 + VMOVDQU (DI), Y9 + VMOVDQU (R8), Y10 + VMOVDQU (BX), Y11 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y13 + VXORPD Y8, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y8, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 4 outputs + VMOVDQU Y8, (SI) + ADDQ $0x20, SI + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x4Xor_loop + VZEROUPPER + +mulAvxGFNI_2x4Xor_end: + RET + // func mulAvxTwo_2x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x4Xor(SB), NOSPLIT, $0-88 @@ -5978,6 +7294,94 @@ mulGFNI_2x5_64_loop: mulGFNI_2x5_64_end: RET +// func mulAvxGFNI_2x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x5(SB), $0-88 + // Loading 9 of 10 tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), SI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, SI + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, DX + +mulAvxGFNI_2x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x5_loop + VZEROUPPER + +mulAvxGFNI_2x5_end: + RET + // func mulGFNI_2x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x5_64Xor(SB), $0-88 @@ -6078,6 +7482,106 @@ mulGFNI_2x5_64Xor_loop: mulGFNI_2x5_64Xor_end: RET +// func mulAvxGFNI_2x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x5Xor(SB), $0-88 + // Loading 9 of 10 tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), SI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, SI + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, DX + +mulAvxGFNI_2x5Xor_loop: + // Load 5 outputs + VMOVDQU (DI), Y9 + VMOVDQU (R8), Y10 + VMOVDQU (R9), Y11 + VMOVDQU (R10), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x5Xor_loop + VZEROUPPER + +mulAvxGFNI_2x5Xor_end: + RET + // func mulAvxTwo_2x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x5Xor(SB), NOSPLIT, $0-88 @@ -6436,6 +7940,103 @@ mulGFNI_2x6_64_loop: mulGFNI_2x6_64_end: RET +// func mulAvxGFNI_2x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x6(SB), $0-88 + // Loading 8 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), SI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, SI + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, DX + +mulAvxGFNI_2x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (DI) + ADDQ $0x20, DI + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x6_loop + VZEROUPPER + +mulAvxGFNI_2x6_end: + RET + // func mulGFNI_2x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x6_64Xor(SB), $0-88 @@ -6547,6 +8148,117 @@ mulGFNI_2x6_64Xor_loop: mulGFNI_2x6_64Xor_end: RET +// func mulAvxGFNI_2x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x6Xor(SB), $0-88 + // Loading 8 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), SI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, SI + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, DX + +mulAvxGFNI_2x6Xor_loop: + // Load 6 outputs + VMOVDQU (DI), Y8 + VMOVDQU (R8), Y9 + VMOVDQU (R9), Y10 + VMOVDQU (R10), Y11 + VMOVDQU (R11), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (DI) + ADDQ $0x20, DI + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x6Xor_loop + VZEROUPPER + +mulAvxGFNI_2x6Xor_end: + RET + // func mulAvxTwo_2x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x6Xor(SB), NOSPLIT, $0-88 @@ -6943,6 +8655,112 @@ mulGFNI_2x7_64_loop: mulGFNI_2x7_64_end: RET +// func mulAvxGFNI_2x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x7(SB), $0-88 + // Loading 7 of 14 tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), SI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, SI + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, DX + +mulAvxGFNI_2x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x7_loop + VZEROUPPER + +mulAvxGFNI_2x7_end: + RET + // func mulGFNI_2x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x7_64Xor(SB), $0-88 @@ -7065,6 +8883,128 @@ mulGFNI_2x7_64Xor_loop: mulGFNI_2x7_64Xor_end: RET +// func mulAvxGFNI_2x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x7Xor(SB), $0-88 + // Loading 7 of 14 tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), SI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, SI + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, DX + +mulAvxGFNI_2x7Xor_loop: + // Load 7 outputs + VMOVDQU (DI), Y7 + VMOVDQU (R8), Y8 + VMOVDQU (R9), Y9 + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x7Xor_loop + VZEROUPPER + +mulAvxGFNI_2x7Xor_end: + RET + // func mulAvxTwo_2x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x7Xor(SB), NOSPLIT, $0-88 @@ -7499,6 +9439,121 @@ mulGFNI_2x8_64_loop: mulGFNI_2x8_64_end: RET +// func mulAvxGFNI_2x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x8(SB), $0-88 + // Loading 6 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), SI + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, SI + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, DX + +mulAvxGFNI_2x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x8_loop + VZEROUPPER + +mulAvxGFNI_2x8_end: + RET + // func mulGFNI_2x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x8_64Xor(SB), $0-88 @@ -7632,6 +9687,139 @@ mulGFNI_2x8_64Xor_loop: mulGFNI_2x8_64Xor_end: RET +// func mulAvxGFNI_2x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x8Xor(SB), $0-88 + // Loading 6 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), SI + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, SI + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, DX + +mulAvxGFNI_2x8Xor_loop: + // Load 8 outputs + VMOVDQU (DI), Y6 + VMOVDQU (R8), Y7 + VMOVDQU (R9), Y8 + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x8Xor_loop + VZEROUPPER + +mulAvxGFNI_2x8Xor_end: + RET + // func mulAvxTwo_2x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x8Xor(SB), NOSPLIT, $0-88 @@ -8104,6 +10292,130 @@ mulGFNI_2x9_64_loop: mulGFNI_2x9_64_end: RET +// func mulAvxGFNI_2x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x9(SB), $0-88 + // Loading 5 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), SI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, SI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, DX + +mulAvxGFNI_2x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (DI) + ADDQ $0x20, DI + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x9_loop + VZEROUPPER + +mulAvxGFNI_2x9_end: + RET + // func mulGFNI_2x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x9_64Xor(SB), $0-88 @@ -8248,6 +10560,150 @@ mulGFNI_2x9_64Xor_loop: mulGFNI_2x9_64Xor_end: RET +// func mulAvxGFNI_2x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x9Xor(SB), $0-88 + // Loading 5 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), SI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, SI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, DX + +mulAvxGFNI_2x9Xor_loop: + // Load 9 outputs + VMOVDQU (DI), Y5 + VMOVDQU (R8), Y6 + VMOVDQU (R9), Y7 + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (DI) + ADDQ $0x20, DI + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x9Xor_loop + VZEROUPPER + +mulAvxGFNI_2x9Xor_end: + RET + // func mulAvxTwo_2x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x9Xor(SB), NOSPLIT, $0-88 @@ -8758,6 +11214,139 @@ mulGFNI_2x10_64_loop: mulGFNI_2x10_64_end: RET +// func mulAvxGFNI_2x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x10(SB), $8-88 + // Loading 4 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, DX + +mulAvxGFNI_2x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x10_loop + VZEROUPPER + +mulAvxGFNI_2x10_end: + RET + // func mulGFNI_2x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_2x10_64Xor(SB), $0-88 @@ -8913,6 +11502,161 @@ mulGFNI_2x10_64Xor_loop: mulGFNI_2x10_64Xor_end: RET +// func mulAvxGFNI_2x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x10Xor(SB), $8-88 + // Loading 4 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, DX + +mulAvxGFNI_2x10Xor_loop: + // Load 10 outputs + VMOVDQU (DI), Y4 + VMOVDQU (R8), Y5 + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x10Xor_loop + VZEROUPPER + +mulAvxGFNI_2x10Xor_end: + RET + // func mulAvxTwo_2x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_2x10Xor(SB), NOSPLIT, $8-88 @@ -9114,85 +11858,6 @@ mulAvxTwo_2x10Xor_loop: mulAvxTwo_2x10Xor_end: RET -// func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_3x1(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 10 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_3x1_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), BX - MOVQ 48(CX), CX - MOVQ out_base+48(FP), SI - MOVQ (SI), SI - MOVQ start+72(FP), DI - - // Add start offset to output - ADDQ DI, SI - - // Add start offset to input - ADDQ DI, DX - ADDQ DI, BX - ADDQ DI, CX - MOVQ $0x0000000f, DI - MOVQ DI, X7 - VPBROADCASTB X7, Y7 - -mulAvxTwo_3x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y8 - ADDQ $0x20, DX - VPSRLQ $0x04, Y8, Y9 - VPAND Y7, Y8, Y8 - VPAND Y7, Y9, Y9 - VPSHUFB Y8, Y0, Y8 - VPSHUFB Y9, Y1, Y9 - VPXOR Y8, Y9, Y6 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (BX), Y8 - ADDQ $0x20, BX - VPSRLQ $0x04, Y8, Y9 - VPAND Y7, Y8, Y8 - VPAND Y7, Y9, Y9 - VPSHUFB Y8, Y2, Y8 - VPSHUFB Y9, Y3, Y9 - XOR3WAY( $0x00, Y8, Y9, Y6) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (CX), Y8 - ADDQ $0x20, CX - VPSRLQ $0x04, Y8, Y9 - VPAND Y7, Y8, Y8 - VPAND Y7, Y9, Y9 - VPSHUFB Y8, Y4, Y8 - VPSHUFB Y9, Y5, Y9 - XOR3WAY( $0x00, Y8, Y9, Y6) - - // Store 1 outputs - VMOVDQU Y6, (SI) - ADDQ $0x20, SI - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_3x1_loop - VZEROUPPER - -mulAvxTwo_3x1_end: - RET - // func mulAvxTwo_3x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x1_64(SB), $0-88 @@ -9209,7 +11874,6 @@ TEXT ·mulAvxTwo_3x1_64(SB), $0-88 MOVQ 24(DX), SI MOVQ 48(DX), DX MOVQ out_base+48(FP), DI - MOVQ out_base+48(FP), DI MOVQ (DI), DI MOVQ start+72(FP), R8 @@ -9356,6 +12020,67 @@ mulGFNI_3x1_64_loop: mulGFNI_3x1_64_end: RET +// func mulAvxGFNI_3x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), SI + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, BX + ADDQ DI, CX + +mulAvxGFNI_3x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y4 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y4, Y3 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y4 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y4, Y4 + VXORPD Y3, Y4, Y3 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (CX), Y4 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y2, Y4, Y4 + VXORPD Y3, Y4, Y3 + + // Store 1 outputs + VMOVDQU Y3, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x1_loop + VZEROUPPER + +mulAvxGFNI_3x1_end: + RET + // func mulGFNI_3x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x1_64Xor(SB), $0-88 @@ -9421,84 +12146,69 @@ mulGFNI_3x1_64Xor_loop: mulGFNI_3x1_64Xor_end: RET -// func mulAvxTwo_3x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_3x1Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_3x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x1Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 10 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_3x1Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), BX - MOVQ 48(CX), CX - MOVQ out_base+48(FP), SI - MOVQ (SI), SI - MOVQ start+72(FP), DI + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), SI + MOVQ start+72(FP), DI // Add start offset to output ADDQ DI, SI // Add start offset to input - ADDQ DI, DX - ADDQ DI, BX - ADDQ DI, CX - MOVQ $0x0000000f, DI - MOVQ DI, X7 - VPBROADCASTB X7, Y7 + ADDQ DI, DX + ADDQ DI, BX + ADDQ DI, CX + +mulAvxGFNI_3x1Xor_loop: + // Load 1 outputs + VMOVDQU (SI), Y3 -mulAvxTwo_3x1Xor_loop: // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y8 - ADDQ $0x20, DX - VPSRLQ $0x04, Y8, Y9 - VPAND Y7, Y8, Y8 - VPAND Y7, Y9, Y9 - VMOVDQU (SI), Y6 - VPSHUFB Y8, Y0, Y8 - VPSHUFB Y9, Y1, Y9 - XOR3WAY( $0x00, Y8, Y9, Y6) + VMOVDQU (DX), Y4 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y4, Y4 + VXORPD Y3, Y4, Y3 // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (BX), Y8 - ADDQ $0x20, BX - VPSRLQ $0x04, Y8, Y9 - VPAND Y7, Y8, Y8 - VPAND Y7, Y9, Y9 - VPSHUFB Y8, Y2, Y8 - VPSHUFB Y9, Y3, Y9 - XOR3WAY( $0x00, Y8, Y9, Y6) + VMOVDQU (BX), Y4 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y4, Y4 + VXORPD Y3, Y4, Y3 // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (CX), Y8 - ADDQ $0x20, CX - VPSRLQ $0x04, Y8, Y9 - VPAND Y7, Y8, Y8 - VPAND Y7, Y9, Y9 - VPSHUFB Y8, Y4, Y8 - VPSHUFB Y9, Y5, Y9 - XOR3WAY( $0x00, Y8, Y9, Y6) + VMOVDQU (CX), Y4 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y2, Y4, Y4 + VXORPD Y3, Y4, Y3 // Store 1 outputs - VMOVDQU Y6, (SI) + VMOVDQU Y3, (SI) ADDQ $0x20, SI // Prepare for next loop DECQ AX - JNZ mulAvxTwo_3x1Xor_loop + JNZ mulAvxGFNI_3x1Xor_loop VZEROUPPER -mulAvxTwo_3x1Xor_end: +mulAvxGFNI_3x1Xor_end: RET // func mulAvxTwo_3x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -9517,7 +12227,6 @@ TEXT ·mulAvxTwo_3x1_64Xor(SB), $0-88 MOVQ 24(DX), SI MOVQ 48(DX), DX MOVQ out_base+48(FP), DI - MOVQ out_base+48(FP), DI MOVQ (DI), DI MOVQ start+72(FP), R8 @@ -9607,104 +12316,6 @@ mulAvxTwo_3x1_64Xor_loop: mulAvxTwo_3x1_64Xor_end: RET -// func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_3x2(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 19 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_3x2_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DX - MOVQ out_base+48(FP), DI - MOVQ (DI), R8 - MOVQ 24(DI), DI - MOVQ start+72(FP), R9 - - // Add start offset to output - ADDQ R9, R8 - ADDQ R9, DI - - // Add start offset to input - ADDQ R9, BX - ADDQ R9, SI - ADDQ R9, DX - MOVQ $0x0000000f, R9 - MOVQ R9, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_3x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y0 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y1 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R8) - ADDQ $0x20, R8 - VMOVDQU Y1, (DI) - ADDQ $0x20, DI - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_3x2_loop - VZEROUPPER - -mulAvxTwo_3x2_end: - RET - // func mulAvxTwo_3x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x2_64(SB), $0-88 @@ -9721,7 +12332,6 @@ TEXT ·mulAvxTwo_3x2_64(SB), $0-88 MOVQ 24(DX), SI MOVQ 48(DX), DX MOVQ out_base+48(FP), DI - MOVQ out_base+48(FP), DI MOVQ (DI), R8 MOVQ 24(DI), DI MOVQ start+72(FP), R9 @@ -9909,6 +12519,79 @@ mulGFNI_3x2_64_loop: mulGFNI_3x2_64_end: RET +// func mulAvxGFNI_3x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), SI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + ADDQ R8, SI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, CX + +mulAvxGFNI_3x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y8 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y8, Y7 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y8 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y5, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Store 2 outputs + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x2_loop + VZEROUPPER + +mulAvxGFNI_3x2_end: + RET + // func mulGFNI_3x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x2_64Xor(SB), $0-88 @@ -9988,104 +12671,83 @@ mulGFNI_3x2_64Xor_loop: mulGFNI_3x2_64Xor_end: RET -// func mulAvxTwo_3x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_3x2Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_3x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x2Xor(SB), $0-88 + // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 19 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_3x2Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DX - MOVQ out_base+48(FP), DI - MOVQ (DI), R8 - MOVQ 24(DI), DI - MOVQ start+72(FP), R9 + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), SI + MOVQ start+72(FP), R8 // Add start offset to output - ADDQ R9, R8 - ADDQ R9, DI + ADDQ R8, DI + ADDQ R8, SI // Add start offset to input - ADDQ R9, BX - ADDQ R9, SI - ADDQ R9, DX - MOVQ $0x0000000f, R9 - MOVQ R9, X2 - VPBROADCASTB X2, Y2 + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, CX + +mulAvxGFNI_3x2Xor_loop: + // Load 2 outputs + VMOVDQU (DI), Y6 + VMOVDQU (SI), Y7 -mulAvxTwo_3x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (R8), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU (DI), Y1 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DX), Y8 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y8, Y9 + VXORPD Y7, Y9, Y7 // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (BX), Y8 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y8, Y9 + VXORPD Y7, Y9, Y7 // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y5, Y8, Y9 + VXORPD Y7, Y9, Y7 // Store 2 outputs - VMOVDQU Y0, (R8) - ADDQ $0x20, R8 - VMOVDQU Y1, (DI) + VMOVDQU Y6, (DI) ADDQ $0x20, DI + VMOVDQU Y7, (SI) + ADDQ $0x20, SI // Prepare for next loop DECQ AX - JNZ mulAvxTwo_3x2Xor_loop + JNZ mulAvxGFNI_3x2Xor_loop VZEROUPPER -mulAvxTwo_3x2Xor_end: +mulAvxGFNI_3x2Xor_end: RET // func mulAvxTwo_3x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -10104,7 +12766,6 @@ TEXT ·mulAvxTwo_3x2_64Xor(SB), $0-88 MOVQ 24(DX), SI MOVQ 48(DX), DX MOVQ out_base+48(FP), DI - MOVQ out_base+48(FP), DI MOVQ (DI), R8 MOVQ 24(DI), DI MOVQ start+72(FP), R9 @@ -10225,123 +12886,6 @@ mulAvxTwo_3x2_64Xor_loop: mulAvxTwo_3x2_64Xor_end: RET -// func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_3x3(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 26 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_3x3_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DX - MOVQ out_base+48(FP), DI - MOVQ (DI), R8 - MOVQ 24(DI), R9 - MOVQ 48(DI), DI - MOVQ start+72(FP), R10 - - // Add start offset to output - ADDQ R10, R8 - ADDQ R10, R9 - ADDQ R10, DI - - // Add start offset to input - ADDQ R10, BX - ADDQ R10, SI - ADDQ R10, DX - MOVQ $0x0000000f, R10 - MOVQ R10, X3 - VPBROADCASTB X3, Y3 - -mulAvxTwo_3x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (R8) - ADDQ $0x20, R8 - VMOVDQU Y1, (R9) - ADDQ $0x20, R9 - VMOVDQU Y2, (DI) - ADDQ $0x20, DI - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_3x3_loop - VZEROUPPER - -mulAvxTwo_3x3_end: - RET - // func mulAvxTwo_3x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x3_64(SB), $0-88 @@ -10358,7 +12902,6 @@ TEXT ·mulAvxTwo_3x3_64(SB), $0-88 MOVQ 24(DX), SI MOVQ 48(DX), DX MOVQ out_base+48(FP), DI - MOVQ out_base+48(FP), DI MOVQ (DI), R8 MOVQ 24(DI), R9 MOVQ 48(DI), DI @@ -10587,6 +13130,91 @@ mulGFNI_3x3_64_loop: mulGFNI_3x3_64_end: RET +// func mulAvxGFNI_3x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x3(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), SI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, SI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, CX + +mulAvxGFNI_3x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y11 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (BX), Y12 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y8, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 3 outputs + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x3_loop + VZEROUPPER + +mulAvxGFNI_3x3_end: + RET + // func mulGFNI_3x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x3_64Xor(SB), $0-88 @@ -10680,124 +13308,97 @@ mulGFNI_3x3_64Xor_loop: mulGFNI_3x3_64Xor_end: RET -// func mulAvxTwo_3x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_3x3Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_3x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x3Xor(SB), $0-88 + // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 26 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_3x3Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DX - MOVQ out_base+48(FP), DI - MOVQ (DI), R8 - MOVQ 24(DI), R9 - MOVQ 48(DI), DI - MOVQ start+72(FP), R10 + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), SI + MOVQ start+72(FP), R9 // Add start offset to output - ADDQ R10, R8 - ADDQ R10, R9 - ADDQ R10, DI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, SI // Add start offset to input - ADDQ R10, BX - ADDQ R10, SI - ADDQ R10, DX - MOVQ $0x0000000f, R10 - MOVQ R10, X3 - VPBROADCASTB X3, Y3 + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, CX + +mulAvxGFNI_3x3Xor_loop: + // Load 3 outputs + VMOVDQU (DI), Y9 + VMOVDQU (R8), Y10 + VMOVDQU (SI), Y11 -mulAvxTwo_3x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (R8), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R9), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (DI), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y11, Y13, Y11 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (BX), Y12 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y8, Y12, Y13 + VXORPD Y11, Y13, Y11 // Store 3 outputs - VMOVDQU Y0, (R8) - ADDQ $0x20, R8 - VMOVDQU Y1, (R9) - ADDQ $0x20, R9 - VMOVDQU Y2, (DI) + VMOVDQU Y9, (DI) ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (SI) + ADDQ $0x20, SI // Prepare for next loop DECQ AX - JNZ mulAvxTwo_3x3Xor_loop + JNZ mulAvxGFNI_3x3Xor_loop VZEROUPPER -mulAvxTwo_3x3Xor_end: +mulAvxGFNI_3x3Xor_end: RET // func mulAvxTwo_3x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -10816,7 +13417,6 @@ TEXT ·mulAvxTwo_3x3_64Xor(SB), $0-88 MOVQ 24(DX), SI MOVQ 48(DX), DX MOVQ out_base+48(FP), DI - MOVQ out_base+48(FP), DI MOVQ (DI), R8 MOVQ 24(DI), R9 MOVQ 48(DI), DI @@ -11201,6 +13801,103 @@ mulGFNI_3x4_64_loop: mulGFNI_3x4_64_end: RET +// func mulAvxGFNI_3x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x4(SB), $0-88 + // Loading 10 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), DI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DI + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DX + +mulAvxGFNI_3x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x4_loop + VZEROUPPER + +mulAvxGFNI_3x4_end: + RET + // func mulGFNI_3x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x4_64Xor(SB), $0-88 @@ -11308,6 +14005,113 @@ mulGFNI_3x4_64Xor_loop: mulGFNI_3x4_64Xor_end: RET +// func mulAvxGFNI_3x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x4Xor(SB), $0-88 + // Loading 10 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), DI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DI + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DX + +mulAvxGFNI_3x4Xor_loop: + // Load 4 outputs + VMOVDQU (R8), Y10 + VMOVDQU (R9), Y11 + VMOVDQU (R10), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x4Xor_loop + VZEROUPPER + +mulAvxGFNI_3x4Xor_end: + RET + // func mulAvxTwo_3x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x4Xor(SB), NOSPLIT, $0-88 @@ -11712,6 +14516,115 @@ mulGFNI_3x5_64_loop: mulGFNI_3x5_64_end: RET +// func mulAvxGFNI_3x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x5(SB), $0-88 + // Loading 9 of 15 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), DI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, DI + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DX + +mulAvxGFNI_3x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x5_loop + VZEROUPPER + +mulAvxGFNI_3x5_end: + RET + // func mulGFNI_3x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x5_64Xor(SB), $0-88 @@ -11833,6 +14746,127 @@ mulGFNI_3x5_64Xor_loop: mulGFNI_3x5_64Xor_end: RET +// func mulAvxGFNI_3x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x5Xor(SB), $0-88 + // Loading 9 of 15 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), DI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, DI + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DX + +mulAvxGFNI_3x5Xor_loop: + // Load 5 outputs + VMOVDQU (R8), Y9 + VMOVDQU (R9), Y10 + VMOVDQU (R10), Y11 + VMOVDQU (R11), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x5Xor_loop + VZEROUPPER + +mulAvxGFNI_3x5Xor_end: + RET + // func mulAvxTwo_3x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x5Xor(SB), NOSPLIT, $0-88 @@ -12288,6 +15322,127 @@ mulGFNI_3x6_64_loop: mulGFNI_3x6_64_end: RET +// func mulAvxGFNI_3x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x6(SB), $0-88 + // Loading 8 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), DI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, DI + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DX + +mulAvxGFNI_3x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x6_loop + VZEROUPPER + +mulAvxGFNI_3x6_end: + RET + // func mulGFNI_3x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x6_64Xor(SB), $0-88 @@ -12423,6 +15578,141 @@ mulGFNI_3x6_64Xor_loop: mulGFNI_3x6_64Xor_end: RET +// func mulAvxGFNI_3x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x6Xor(SB), $0-88 + // Loading 8 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), DI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, DI + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DX + +mulAvxGFNI_3x6Xor_loop: + // Load 6 outputs + VMOVDQU (R8), Y8 + VMOVDQU (R9), Y9 + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x6Xor_loop + VZEROUPPER + +mulAvxGFNI_3x6Xor_end: + RET + // func mulAvxTwo_3x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x6Xor(SB), NOSPLIT, $0-88 @@ -12929,6 +16219,139 @@ mulGFNI_3x7_64_loop: mulGFNI_3x7_64_end: RET +// func mulAvxGFNI_3x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x7(SB), $0-88 + // Loading 7 of 21 tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), DI + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, DI + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DX + +mulAvxGFNI_3x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x7_loop + VZEROUPPER + +mulAvxGFNI_3x7_end: + RET + // func mulGFNI_3x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x7_64Xor(SB), $0-88 @@ -13078,6 +16501,155 @@ mulGFNI_3x7_64Xor_loop: mulGFNI_3x7_64Xor_end: RET +// func mulAvxGFNI_3x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x7Xor(SB), $0-88 + // Loading 7 of 21 tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), DI + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, DI + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DX + +mulAvxGFNI_3x7Xor_loop: + // Load 7 outputs + VMOVDQU (R8), Y7 + VMOVDQU (R9), Y8 + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x7Xor_loop + VZEROUPPER + +mulAvxGFNI_3x7Xor_end: + RET + // func mulAvxTwo_3x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x7Xor(SB), NOSPLIT, $0-88 @@ -13633,6 +17205,151 @@ mulGFNI_3x8_64_loop: mulGFNI_3x8_64_end: RET +// func mulAvxGFNI_3x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x8(SB), $0-88 + // Loading 6 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), DI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, DI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DX + +mulAvxGFNI_3x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x8_loop + VZEROUPPER + +mulAvxGFNI_3x8_end: + RET + // func mulGFNI_3x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x8_64Xor(SB), $0-88 @@ -13794,6 +17511,169 @@ mulGFNI_3x8_64Xor_loop: mulGFNI_3x8_64Xor_end: RET +// func mulAvxGFNI_3x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x8Xor(SB), $0-88 + // Loading 6 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), DI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, DI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DX + +mulAvxGFNI_3x8Xor_loop: + // Load 8 outputs + VMOVDQU (R8), Y6 + VMOVDQU (R9), Y7 + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x8Xor_loop + VZEROUPPER + +mulAvxGFNI_3x8Xor_end: + RET + // func mulAvxTwo_3x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x8Xor(SB), NOSPLIT, $0-88 @@ -14396,6 +18276,163 @@ mulGFNI_3x9_64_loop: mulGFNI_3x9_64_end: RET +// func mulAvxGFNI_3x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x9(SB), $8-88 + // Loading 5 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DX + +mulAvxGFNI_3x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x9_loop + VZEROUPPER + +mulAvxGFNI_3x9_end: + RET + // func mulGFNI_3x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x9_64Xor(SB), $8-88 @@ -14567,6 +18604,183 @@ mulGFNI_3x9_64Xor_loop: mulGFNI_3x9_64Xor_end: RET +// func mulAvxGFNI_3x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x9Xor(SB), $8-88 + // Loading 5 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DX + +mulAvxGFNI_3x9Xor_loop: + // Load 9 outputs + VMOVDQU (R8), Y5 + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x9Xor_loop + VZEROUPPER + +mulAvxGFNI_3x9Xor_end: + RET + // func mulAvxTwo_3x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x9Xor(SB), NOSPLIT, $8-88 @@ -15222,6 +19436,179 @@ mulGFNI_3x10_64_loop: mulGFNI_3x10_64_end: RET +// func mulAvxGFNI_3x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x10(SB), $8-88 + // Loading 4 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), AX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_3x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_3x10_loop + VZEROUPPER + +mulAvxGFNI_3x10_end: + RET + // func mulGFNI_3x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_3x10_64Xor(SB), $8-88 @@ -15407,6 +19794,201 @@ mulGFNI_3x10_64Xor_loop: mulGFNI_3x10_64Xor_end: RET +// func mulAvxGFNI_3x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x10Xor(SB), $8-88 + // Loading 4 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), AX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_3x10Xor_loop: + // Load 10 outputs + VMOVDQU (DI), Y4 + VMOVDQU (R8), Y5 + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_3x10Xor_loop + VZEROUPPER + +mulAvxGFNI_3x10Xor_end: + RET + // func mulAvxTwo_3x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_3x10Xor(SB), NOSPLIT, $8-88 @@ -15669,99 +20251,6 @@ mulAvxTwo_3x10Xor_loop: mulAvxTwo_3x10Xor_end: RET -// func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_4x1(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 12 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_4x1_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VMOVDQU 192(CX), Y6 - VMOVDQU 224(CX), Y7 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), BX - MOVQ 48(CX), SI - MOVQ 72(CX), CX - MOVQ out_base+48(FP), DI - MOVQ (DI), DI - MOVQ start+72(FP), R8 - - // Add start offset to output - ADDQ R8, DI - - // Add start offset to input - ADDQ R8, DX - ADDQ R8, BX - ADDQ R8, SI - ADDQ R8, CX - MOVQ $0x0000000f, R8 - MOVQ R8, X9 - VPBROADCASTB X9, Y9 - -mulAvxTwo_4x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y10 - ADDQ $0x20, DX - VPSRLQ $0x04, Y10, Y11 - VPAND Y9, Y10, Y10 - VPAND Y9, Y11, Y11 - VPSHUFB Y10, Y0, Y10 - VPSHUFB Y11, Y1, Y11 - VPXOR Y10, Y11, Y8 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (BX), Y10 - ADDQ $0x20, BX - VPSRLQ $0x04, Y10, Y11 - VPAND Y9, Y10, Y10 - VPAND Y9, Y11, Y11 - VPSHUFB Y10, Y2, Y10 - VPSHUFB Y11, Y3, Y11 - XOR3WAY( $0x00, Y10, Y11, Y8) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (SI), Y10 - ADDQ $0x20, SI - VPSRLQ $0x04, Y10, Y11 - VPAND Y9, Y10, Y10 - VPAND Y9, Y11, Y11 - VPSHUFB Y10, Y4, Y10 - VPSHUFB Y11, Y5, Y11 - XOR3WAY( $0x00, Y10, Y11, Y8) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (CX), Y10 - ADDQ $0x20, CX - VPSRLQ $0x04, Y10, Y11 - VPAND Y9, Y10, Y10 - VPAND Y9, Y11, Y11 - VPSHUFB Y10, Y6, Y10 - VPSHUFB Y11, Y7, Y11 - XOR3WAY( $0x00, Y10, Y11, Y8) - - // Store 1 outputs - VMOVDQU Y8, (DI) - ADDQ $0x20, DI - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_4x1_loop - VZEROUPPER - -mulAvxTwo_4x1_end: - RET - // func mulAvxTwo_4x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x1_64(SB), $0-88 @@ -15779,7 +20268,6 @@ TEXT ·mulAvxTwo_4x1_64(SB), $0-88 MOVQ 48(DX), DI MOVQ 72(DX), DX MOVQ out_base+48(FP), R8 - MOVQ out_base+48(FP), R8 MOVQ (R8), R8 MOVQ start+72(FP), R9 @@ -15955,6 +20443,76 @@ mulGFNI_4x1_64_loop: mulGFNI_4x1_64_end: RET +// func mulAvxGFNI_4x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 7 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), DI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, CX + +mulAvxGFNI_4x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y5 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y5, Y4 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y5 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y5 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (CX), Y5 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y3, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Store 1 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x1_loop + VZEROUPPER + +mulAvxGFNI_4x1_end: + RET + // func mulGFNI_4x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x1_64Xor(SB), $0-88 @@ -16029,98 +20587,78 @@ mulGFNI_4x1_64Xor_loop: mulGFNI_4x1_64Xor_end: RET -// func mulAvxTwo_4x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_4x1Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_4x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x1Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 12 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_4x1Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VMOVDQU 192(CX), Y6 - VMOVDQU 224(CX), Y7 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), BX - MOVQ 48(CX), SI - MOVQ 72(CX), CX - MOVQ out_base+48(FP), DI - MOVQ (DI), DI - MOVQ start+72(FP), R8 + // Full registers estimated 7 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), DI + MOVQ start+72(FP), R8 // Add start offset to output ADDQ R8, DI // Add start offset to input - ADDQ R8, DX - ADDQ R8, BX - ADDQ R8, SI - ADDQ R8, CX - MOVQ $0x0000000f, R8 - MOVQ R8, X9 - VPBROADCASTB X9, Y9 + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, CX + +mulAvxGFNI_4x1Xor_loop: + // Load 1 outputs + VMOVDQU (DI), Y4 -mulAvxTwo_4x1Xor_loop: // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y10 - ADDQ $0x20, DX - VPSRLQ $0x04, Y10, Y11 - VPAND Y9, Y10, Y10 - VPAND Y9, Y11, Y11 - VMOVDQU (DI), Y8 - VPSHUFB Y10, Y0, Y10 - VPSHUFB Y11, Y1, Y11 - XOR3WAY( $0x00, Y10, Y11, Y8) + VMOVDQU (DX), Y5 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y5, Y5 + VXORPD Y4, Y5, Y4 // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (BX), Y10 - ADDQ $0x20, BX - VPSRLQ $0x04, Y10, Y11 - VPAND Y9, Y10, Y10 - VPAND Y9, Y11, Y11 - VPSHUFB Y10, Y2, Y10 - VPSHUFB Y11, Y3, Y11 - XOR3WAY( $0x00, Y10, Y11, Y8) + VMOVDQU (BX), Y5 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y5, Y5 + VXORPD Y4, Y5, Y4 // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (SI), Y10 - ADDQ $0x20, SI - VPSRLQ $0x04, Y10, Y11 - VPAND Y9, Y10, Y10 - VPAND Y9, Y11, Y11 - VPSHUFB Y10, Y4, Y10 - VPSHUFB Y11, Y5, Y11 - XOR3WAY( $0x00, Y10, Y11, Y8) + VMOVDQU (SI), Y5 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y5, Y5 + VXORPD Y4, Y5, Y4 // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (CX), Y10 - ADDQ $0x20, CX - VPSRLQ $0x04, Y10, Y11 - VPAND Y9, Y10, Y10 - VPAND Y9, Y11, Y11 - VPSHUFB Y10, Y6, Y10 - VPSHUFB Y11, Y7, Y11 - XOR3WAY( $0x00, Y10, Y11, Y8) + VMOVDQU (CX), Y5 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y3, Y5, Y5 + VXORPD Y4, Y5, Y4 // Store 1 outputs - VMOVDQU Y8, (DI) + VMOVDQU Y4, (DI) ADDQ $0x20, DI // Prepare for next loop DECQ AX - JNZ mulAvxTwo_4x1Xor_loop + JNZ mulAvxGFNI_4x1Xor_loop VZEROUPPER -mulAvxTwo_4x1Xor_end: +mulAvxGFNI_4x1Xor_end: RET // func mulAvxTwo_4x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -16140,7 +20678,6 @@ TEXT ·mulAvxTwo_4x1_64Xor(SB), $0-88 MOVQ 48(DX), DI MOVQ 72(DX), DX MOVQ out_base+48(FP), R8 - MOVQ out_base+48(FP), R8 MOVQ (R8), R8 MOVQ start+72(FP), R9 @@ -16250,123 +20787,6 @@ mulAvxTwo_4x1_64Xor_loop: mulAvxTwo_4x1_64Xor_end: RET -// func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_4x2(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 23 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_4x2_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), DX - MOVQ out_base+48(FP), R8 - MOVQ (R8), R9 - MOVQ 24(R8), R8 - MOVQ start+72(FP), R10 - - // Add start offset to output - ADDQ R10, R9 - ADDQ R10, R8 - - // Add start offset to input - ADDQ R10, BX - ADDQ R10, SI - ADDQ R10, DI - ADDQ R10, DX - MOVQ $0x0000000f, R10 - MOVQ R10, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_4x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y0 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y1 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R9) - ADDQ $0x20, R9 - VMOVDQU Y1, (R8) - ADDQ $0x20, R8 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_4x2_loop - VZEROUPPER - -mulAvxTwo_4x2_end: - RET - // func mulAvxTwo_4x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x2_64(SB), $0-88 @@ -16384,7 +20804,6 @@ TEXT ·mulAvxTwo_4x2_64(SB), $0-88 MOVQ 48(DX), DI MOVQ 72(DX), DX MOVQ out_base+48(FP), R8 - MOVQ out_base+48(FP), R8 MOVQ (R8), R9 MOVQ 24(R8), R8 MOVQ start+72(FP), R10 @@ -16612,6 +21031,91 @@ mulGFNI_4x2_64_loop: mulGFNI_4x2_64_end: RET +// func mulAvxGFNI_4x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), DI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + ADDQ R9, DI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, CX + +mulAvxGFNI_4x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y10 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y10, Y9 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y10 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y10 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y5, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Store 2 outputs + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x2_loop + VZEROUPPER + +mulAvxGFNI_4x2_end: + RET + // func mulGFNI_4x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x2_64Xor(SB), $0-88 @@ -16703,123 +21207,95 @@ mulGFNI_4x2_64Xor_loop: mulGFNI_4x2_64Xor_end: RET -// func mulAvxTwo_4x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_4x2Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_4x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x2Xor(SB), $0-88 + // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 23 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_4x2Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), DX - MOVQ out_base+48(FP), R8 - MOVQ (R8), R9 - MOVQ 24(R8), R8 - MOVQ start+72(FP), R10 + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), DI + MOVQ start+72(FP), R9 // Add start offset to output - ADDQ R10, R9 - ADDQ R10, R8 + ADDQ R9, R8 + ADDQ R9, DI // Add start offset to input - ADDQ R10, BX - ADDQ R10, SI - ADDQ R10, DI - ADDQ R10, DX - MOVQ $0x0000000f, R10 - MOVQ R10, X2 - VPBROADCASTB X2, Y2 + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, CX + +mulAvxGFNI_4x2Xor_loop: + // Load 2 outputs + VMOVDQU (R8), Y8 + VMOVDQU (DI), Y9 -mulAvxTwo_4x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (R9), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU (R8), Y1 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DX), Y10 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y10, Y11 + VXORPD Y9, Y11, Y9 // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (BX), Y10 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y10, Y11 + VXORPD Y9, Y11, Y9 // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (SI), Y10 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y5, Y10, Y11 + VXORPD Y9, Y11, Y9 // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y10, Y11 + VXORPD Y9, Y11, Y9 // Store 2 outputs - VMOVDQU Y0, (R9) - ADDQ $0x20, R9 - VMOVDQU Y1, (R8) + VMOVDQU Y8, (R8) ADDQ $0x20, R8 + VMOVDQU Y9, (DI) + ADDQ $0x20, DI // Prepare for next loop DECQ AX - JNZ mulAvxTwo_4x2Xor_loop + JNZ mulAvxGFNI_4x2Xor_loop VZEROUPPER -mulAvxTwo_4x2Xor_end: +mulAvxGFNI_4x2Xor_end: RET // func mulAvxTwo_4x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -16839,7 +21315,6 @@ TEXT ·mulAvxTwo_4x2_64Xor(SB), $0-88 MOVQ 48(DX), DI MOVQ 72(DX), DX MOVQ out_base+48(FP), R8 - MOVQ out_base+48(FP), R8 MOVQ (R8), R9 MOVQ 24(R8), R8 MOVQ start+72(FP), R10 @@ -16988,147 +21463,6 @@ mulAvxTwo_4x2_64Xor_loop: mulAvxTwo_4x2_64Xor_end: RET -// func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_4x3(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 32 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_4x3_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), DX - MOVQ out_base+48(FP), R8 - MOVQ (R8), R9 - MOVQ 24(R8), R10 - MOVQ 48(R8), R8 - MOVQ start+72(FP), R11 - - // Add start offset to output - ADDQ R11, R9 - ADDQ R11, R10 - ADDQ R11, R8 - - // Add start offset to input - ADDQ R11, BX - ADDQ R11, SI - ADDQ R11, DI - ADDQ R11, DX - MOVQ $0x0000000f, R11 - MOVQ R11, X3 - VPBROADCASTB X3, Y3 - -mulAvxTwo_4x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (R9) - ADDQ $0x20, R9 - VMOVDQU Y1, (R10) - ADDQ $0x20, R10 - VMOVDQU Y2, (R8) - ADDQ $0x20, R8 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_4x3_loop - VZEROUPPER - -mulAvxTwo_4x3_end: - RET - // func mulAvxTwo_4x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x3_64(SB), $0-88 @@ -17146,7 +21480,6 @@ TEXT ·mulAvxTwo_4x3_64(SB), $0-88 MOVQ 48(DX), DI MOVQ 72(DX), DX MOVQ out_base+48(FP), R8 - MOVQ out_base+48(FP), R8 MOVQ (R8), R9 MOVQ 24(R8), R10 MOVQ 48(R8), R8 @@ -17426,6 +21759,106 @@ mulGFNI_4x3_64_loop: mulGFNI_4x3_64_end: RET +// func mulAvxGFNI_4x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x3(SB), $0-88 + // Loading 11 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R8 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, R8 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, DX + +mulAvxGFNI_4x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x3_loop + VZEROUPPER + +mulAvxGFNI_4x3_end: + RET + // func mulGFNI_4x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x3_64Xor(SB), $0-88 @@ -17534,27 +21967,39 @@ mulGFNI_4x3_64Xor_loop: mulGFNI_4x3_64Xor_end: RET -// func mulAvxTwo_4x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_4x3Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_4x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x3Xor(SB), $0-88 + // Loading 11 of 12 tables to registers // Destination kept in GP registers - // Full registers estimated 32 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_4x3Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), DX - MOVQ out_base+48(FP), R8 - MOVQ (R8), R9 - MOVQ 24(R8), R10 - MOVQ 48(R8), R8 - MOVQ start+72(FP), R11 + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R8 + MOVQ start+72(FP), R11 // Add start offset to output ADDQ R11, R9 @@ -17562,120 +22007,72 @@ TEXT ·mulAvxTwo_4x3Xor(SB), NOSPLIT, $0-88 ADDQ R11, R8 // Add start offset to input - ADDQ R11, BX - ADDQ R11, SI - ADDQ R11, DI - ADDQ R11, DX - MOVQ $0x0000000f, R11 - MOVQ R11, X3 - VPBROADCASTB X3, Y3 + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, DX + +mulAvxGFNI_4x3Xor_loop: + // Load 3 outputs + VMOVDQU (R9), Y11 + VMOVDQU (R10), Y12 + VMOVDQU (R8), Y13 -mulAvxTwo_4x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (R9), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R10), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (R8), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 3 outputs - VMOVDQU Y0, (R9) + VMOVDQU Y11, (R9) ADDQ $0x20, R9 - VMOVDQU Y1, (R10) + VMOVDQU Y12, (R10) ADDQ $0x20, R10 - VMOVDQU Y2, (R8) + VMOVDQU Y13, (R8) ADDQ $0x20, R8 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_4x3Xor_loop + JNZ mulAvxGFNI_4x3Xor_loop VZEROUPPER -mulAvxTwo_4x3Xor_end: +mulAvxGFNI_4x3Xor_end: RET // func mulAvxTwo_4x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -17695,7 +22092,6 @@ TEXT ·mulAvxTwo_4x3_64Xor(SB), $0-88 MOVQ 48(DX), DI MOVQ 72(DX), DX MOVQ out_base+48(FP), R8 - MOVQ out_base+48(FP), R8 MOVQ (R8), R9 MOVQ 24(R8), R10 MOVQ 48(R8), R8 @@ -18163,6 +22559,121 @@ mulGFNI_4x4_64_loop: mulGFNI_4x4_64_end: RET +// func mulAvxGFNI_4x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x4(SB), $0-88 + // Loading 10 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R8 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R8 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, DX + +mulAvxGFNI_4x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x4_loop + VZEROUPPER + +mulAvxGFNI_4x4_end: + RET + // func mulGFNI_4x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x4_64Xor(SB), $0-88 @@ -18288,6 +22799,131 @@ mulGFNI_4x4_64Xor_loop: mulGFNI_4x4_64Xor_end: RET +// func mulAvxGFNI_4x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x4Xor(SB), $0-88 + // Loading 10 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R8 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R8 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, DX + +mulAvxGFNI_4x4Xor_loop: + // Load 4 outputs + VMOVDQU (R9), Y10 + VMOVDQU (R10), Y11 + VMOVDQU (R11), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x4Xor_loop + VZEROUPPER + +mulAvxGFNI_4x4Xor_end: + RET + // func mulAvxTwo_4x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x4Xor(SB), NOSPLIT, $0-88 @@ -18776,6 +23412,136 @@ mulGFNI_4x5_64_loop: mulGFNI_4x5_64_end: RET +// func mulAvxGFNI_4x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x5(SB), $0-88 + // Loading 9 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 27 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R8 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R8 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, DX + +mulAvxGFNI_4x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x5_loop + VZEROUPPER + +mulAvxGFNI_4x5_end: + RET + // func mulGFNI_4x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x5_64Xor(SB), $0-88 @@ -18918,6 +23684,148 @@ mulGFNI_4x5_64Xor_loop: mulGFNI_4x5_64Xor_end: RET +// func mulAvxGFNI_4x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x5Xor(SB), $0-88 + // Loading 9 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 27 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R8 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R8 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, DX + +mulAvxGFNI_4x5Xor_loop: + // Load 5 outputs + VMOVDQU (R9), Y9 + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x5Xor_loop + VZEROUPPER + +mulAvxGFNI_4x5Xor_end: + RET + // func mulAvxTwo_4x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x5Xor(SB), NOSPLIT, $0-88 @@ -19470,6 +24378,151 @@ mulGFNI_4x6_64_loop: mulGFNI_4x6_64_end: RET +// func mulAvxGFNI_4x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x6(SB), $0-88 + // Loading 8 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R8 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R8 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, DX + +mulAvxGFNI_4x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x6_loop + VZEROUPPER + +mulAvxGFNI_4x6_end: + RET + // func mulGFNI_4x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x6_64Xor(SB), $0-88 @@ -19629,6 +24682,165 @@ mulGFNI_4x6_64Xor_loop: mulGFNI_4x6_64Xor_end: RET +// func mulAvxGFNI_4x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x6Xor(SB), $0-88 + // Loading 8 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R8 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R8 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, DX + +mulAvxGFNI_4x6Xor_loop: + // Load 6 outputs + VMOVDQU (R9), Y8 + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x6Xor_loop + VZEROUPPER + +mulAvxGFNI_4x6Xor_end: + RET + // func mulAvxTwo_4x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x6Xor(SB), NOSPLIT, $0-88 @@ -20240,6 +25452,166 @@ mulGFNI_4x7_64_loop: mulGFNI_4x7_64_end: RET +// func mulAvxGFNI_4x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x7(SB), $0-88 + // Loading 7 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R8 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R8 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, DX + +mulAvxGFNI_4x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x7_loop + VZEROUPPER + +mulAvxGFNI_4x7_end: + RET + // func mulGFNI_4x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x7_64Xor(SB), $0-88 @@ -20411,6 +25783,182 @@ mulGFNI_4x7_64Xor_loop: mulGFNI_4x7_64Xor_end: RET +// func mulAvxGFNI_4x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x7Xor(SB), $0-88 + // Loading 7 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R8 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R8 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, DX + +mulAvxGFNI_4x7Xor_loop: + // Load 7 outputs + VMOVDQU (R9), Y7 + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x7Xor_loop + VZEROUPPER + +mulAvxGFNI_4x7Xor_end: + RET + // func mulAvxTwo_4x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x7Xor(SB), NOSPLIT, $0-88 @@ -21081,6 +26629,181 @@ mulGFNI_4x8_64_loop: mulGFNI_4x8_64_end: RET +// func mulAvxGFNI_4x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x8(SB), $8-88 + // Loading 6 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, DX + +mulAvxGFNI_4x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x8_loop + VZEROUPPER + +mulAvxGFNI_4x8_end: + RET + // func mulGFNI_4x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x8_64Xor(SB), $8-88 @@ -21264,6 +26987,199 @@ mulGFNI_4x8_64Xor_loop: mulGFNI_4x8_64Xor_end: RET +// func mulAvxGFNI_4x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x8Xor(SB), $8-88 + // Loading 6 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, DX + +mulAvxGFNI_4x8Xor_loop: + // Load 8 outputs + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x8Xor_loop + VZEROUPPER + +mulAvxGFNI_4x8Xor_end: + RET + // func mulAvxTwo_4x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x8Xor(SB), NOSPLIT, $8-88 @@ -21999,6 +27915,200 @@ mulGFNI_4x9_64_loop: mulGFNI_4x9_64_end: RET +// func mulAvxGFNI_4x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x9(SB), $8-88 + // Loading 5 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), AX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_4x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_4x9_loop + VZEROUPPER + +mulAvxGFNI_4x9_end: + RET + // func mulGFNI_4x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x9_64Xor(SB), $8-88 @@ -22198,6 +28308,220 @@ mulGFNI_4x9_64Xor_loop: mulGFNI_4x9_64Xor_end: RET +// func mulAvxGFNI_4x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x9Xor(SB), $8-88 + // Loading 5 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), AX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_4x9Xor_loop: + // Load 9 outputs + VMOVDQU (R8), Y5 + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_4x9Xor_loop + VZEROUPPER + +mulAvxGFNI_4x9Xor_end: + RET + // func mulAvxTwo_4x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x9Xor(SB), NOSPLIT, $8-88 @@ -22946,6 +29270,190 @@ mulGFNI_4x10_64_loop: mulGFNI_4x10_64_end: RET +// func mulAvxGFNI_4x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x10(SB), $0-88 + // Loading 4 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ start+72(FP), R9 + + // Add start offset to input + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, DX + +mulAvxGFNI_4x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R8), R10 + VMOVDQU Y4, (R10)(R9*1) + MOVQ 24(R8), R10 + VMOVDQU Y5, (R10)(R9*1) + MOVQ 48(R8), R10 + VMOVDQU Y6, (R10)(R9*1) + MOVQ 72(R8), R10 + VMOVDQU Y7, (R10)(R9*1) + MOVQ 96(R8), R10 + VMOVDQU Y8, (R10)(R9*1) + MOVQ 120(R8), R10 + VMOVDQU Y9, (R10)(R9*1) + MOVQ 144(R8), R10 + VMOVDQU Y10, (R10)(R9*1) + MOVQ 168(R8), R10 + VMOVDQU Y11, (R10)(R9*1) + MOVQ 192(R8), R10 + VMOVDQU Y12, (R10)(R9*1) + MOVQ 216(R8), R10 + VMOVDQU Y13, (R10)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxGFNI_4x10_loop + VZEROUPPER + +mulAvxGFNI_4x10_end: + RET + // func mulGFNI_4x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_4x10_64Xor(SB), $0-88 @@ -23142,6 +29650,222 @@ mulGFNI_4x10_64Xor_loop: mulGFNI_4x10_64Xor_end: RET +// func mulAvxGFNI_4x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x10Xor(SB), $0-88 + // Loading 4 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ start+72(FP), R9 + + // Add start offset to input + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, DX + +mulAvxGFNI_4x10Xor_loop: + // Load 10 outputs + MOVQ (R8), R10 + VMOVDQU (R10)(R9*1), Y4 + MOVQ 24(R8), R10 + VMOVDQU (R10)(R9*1), Y5 + MOVQ 48(R8), R10 + VMOVDQU (R10)(R9*1), Y6 + MOVQ 72(R8), R10 + VMOVDQU (R10)(R9*1), Y7 + MOVQ 96(R8), R10 + VMOVDQU (R10)(R9*1), Y8 + MOVQ 120(R8), R10 + VMOVDQU (R10)(R9*1), Y9 + MOVQ 144(R8), R10 + VMOVDQU (R10)(R9*1), Y10 + MOVQ 168(R8), R10 + VMOVDQU (R10)(R9*1), Y11 + MOVQ 192(R8), R10 + VMOVDQU (R10)(R9*1), Y12 + MOVQ 216(R8), R10 + VMOVDQU (R10)(R9*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R8), R10 + VMOVDQU Y4, (R10)(R9*1) + MOVQ 24(R8), R10 + VMOVDQU Y5, (R10)(R9*1) + MOVQ 48(R8), R10 + VMOVDQU Y6, (R10)(R9*1) + MOVQ 72(R8), R10 + VMOVDQU Y7, (R10)(R9*1) + MOVQ 96(R8), R10 + VMOVDQU Y8, (R10)(R9*1) + MOVQ 120(R8), R10 + VMOVDQU Y9, (R10)(R9*1) + MOVQ 144(R8), R10 + VMOVDQU Y10, (R10)(R9*1) + MOVQ 168(R8), R10 + VMOVDQU Y11, (R10)(R9*1) + MOVQ 192(R8), R10 + VMOVDQU Y12, (R10)(R9*1) + MOVQ 216(R8), R10 + VMOVDQU Y13, (R10)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxGFNI_4x10Xor_loop + VZEROUPPER + +mulAvxGFNI_4x10Xor_end: + RET + // func mulAvxTwo_4x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_4x10Xor(SB), NOSPLIT, $0-88 @@ -23450,113 +30174,6 @@ mulAvxTwo_4x10Xor_loop: mulAvxTwo_4x10Xor_end: RET -// func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_5x1(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 14 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_5x1_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VMOVDQU 192(CX), Y6 - VMOVDQU 224(CX), Y7 - VMOVDQU 256(CX), Y8 - VMOVDQU 288(CX), Y9 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), BX - MOVQ 48(CX), SI - MOVQ 72(CX), DI - MOVQ 96(CX), CX - MOVQ out_base+48(FP), R8 - MOVQ (R8), R8 - MOVQ start+72(FP), R9 - - // Add start offset to output - ADDQ R9, R8 - - // Add start offset to input - ADDQ R9, DX - ADDQ R9, BX - ADDQ R9, SI - ADDQ R9, DI - ADDQ R9, CX - MOVQ $0x0000000f, R9 - MOVQ R9, X11 - VPBROADCASTB X11, Y11 - -mulAvxTwo_5x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y12 - ADDQ $0x20, DX - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y0, Y12 - VPSHUFB Y13, Y1, Y13 - VPXOR Y12, Y13, Y10 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (BX), Y12 - ADDQ $0x20, BX - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y2, Y12 - VPSHUFB Y13, Y3, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (SI), Y12 - ADDQ $0x20, SI - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y4, Y12 - VPSHUFB Y13, Y5, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (DI), Y12 - ADDQ $0x20, DI - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y6, Y12 - VPSHUFB Y13, Y7, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) - - // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (CX), Y12 - ADDQ $0x20, CX - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y8, Y12 - VPSHUFB Y13, Y9, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) - - // Store 1 outputs - VMOVDQU Y10, (R8) - ADDQ $0x20, R8 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_5x1_loop - VZEROUPPER - -mulAvxTwo_5x1_end: - RET - // func mulAvxTwo_5x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x1_64(SB), $0-88 @@ -23575,7 +30192,6 @@ TEXT ·mulAvxTwo_5x1_64(SB), $0-88 MOVQ 72(DX), R8 MOVQ 96(DX), DX MOVQ out_base+48(FP), R9 - MOVQ out_base+48(FP), R9 MOVQ (R9), R9 MOVQ start+72(FP), R10 @@ -23780,6 +30396,85 @@ mulGFNI_5x1_64_loop: mulGFNI_5x1_64_end: RET +// func mulAvxGFNI_5x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R8 + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, CX + +mulAvxGFNI_5x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y6 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y5 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y6 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y6 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y6 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Store 1 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x1_loop + VZEROUPPER + +mulAvxGFNI_5x1_end: + RET + // func mulGFNI_5x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x1_64Xor(SB), $0-88 @@ -23863,112 +30558,87 @@ mulGFNI_5x1_64Xor_loop: mulGFNI_5x1_64Xor_end: RET -// func mulAvxTwo_5x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_5x1Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_5x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x1Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 14 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_5x1Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VMOVDQU 192(CX), Y6 - VMOVDQU 224(CX), Y7 - VMOVDQU 256(CX), Y8 - VMOVDQU 288(CX), Y9 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), BX - MOVQ 48(CX), SI - MOVQ 72(CX), DI - MOVQ 96(CX), CX - MOVQ out_base+48(FP), R8 - MOVQ (R8), R8 - MOVQ start+72(FP), R9 + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R8 + MOVQ start+72(FP), R9 // Add start offset to output ADDQ R9, R8 // Add start offset to input - ADDQ R9, DX - ADDQ R9, BX - ADDQ R9, SI - ADDQ R9, DI - ADDQ R9, CX - MOVQ $0x0000000f, R9 - MOVQ R9, X11 - VPBROADCASTB X11, Y11 + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, CX + +mulAvxGFNI_5x1Xor_loop: + // Load 1 outputs + VMOVDQU (R8), Y5 -mulAvxTwo_5x1Xor_loop: // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y12 - ADDQ $0x20, DX - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VMOVDQU (R8), Y10 - VPSHUFB Y12, Y0, Y12 - VPSHUFB Y13, Y1, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) + VMOVDQU (DX), Y6 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y6 + VXORPD Y5, Y6, Y5 // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (BX), Y12 - ADDQ $0x20, BX - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y2, Y12 - VPSHUFB Y13, Y3, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) + VMOVDQU (BX), Y6 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y6, Y6 + VXORPD Y5, Y6, Y5 // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (SI), Y12 - ADDQ $0x20, SI - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y4, Y12 - VPSHUFB Y13, Y5, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) + VMOVDQU (SI), Y6 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y6, Y6 + VXORPD Y5, Y6, Y5 // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (DI), Y12 - ADDQ $0x20, DI - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y6, Y12 - VPSHUFB Y13, Y7, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) + VMOVDQU (DI), Y6 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y6, Y6 + VXORPD Y5, Y6, Y5 // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (CX), Y12 - ADDQ $0x20, CX - VPSRLQ $0x04, Y12, Y13 - VPAND Y11, Y12, Y12 - VPAND Y11, Y13, Y13 - VPSHUFB Y12, Y8, Y12 - VPSHUFB Y13, Y9, Y13 - XOR3WAY( $0x00, Y12, Y13, Y10) + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y6, Y6 + VXORPD Y5, Y6, Y5 // Store 1 outputs - VMOVDQU Y10, (R8) + VMOVDQU Y5, (R8) ADDQ $0x20, R8 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_5x1Xor_loop + JNZ mulAvxGFNI_5x1Xor_loop VZEROUPPER -mulAvxTwo_5x1Xor_end: +mulAvxGFNI_5x1Xor_end: RET // func mulAvxTwo_5x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -23989,7 +30659,6 @@ TEXT ·mulAvxTwo_5x1_64Xor(SB), $0-88 MOVQ 72(DX), R8 MOVQ 96(DX), DX MOVQ out_base+48(FP), R9 - MOVQ out_base+48(FP), R9 MOVQ (R9), R9 MOVQ start+72(FP), R10 @@ -24119,142 +30788,6 @@ mulAvxTwo_5x1_64Xor_loop: mulAvxTwo_5x1_64Xor_end: RET -// func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_5x2(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 27 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_5x2_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), DX - MOVQ out_base+48(FP), R9 - MOVQ (R9), R10 - MOVQ 24(R9), R9 - MOVQ start+72(FP), R11 - - // Add start offset to output - ADDQ R11, R10 - ADDQ R11, R9 - - // Add start offset to input - ADDQ R11, BX - ADDQ R11, SI - ADDQ R11, DI - ADDQ R11, R8 - ADDQ R11, DX - MOVQ $0x0000000f, R11 - MOVQ R11, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_5x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y0 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y1 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R10) - ADDQ $0x20, R10 - VMOVDQU Y1, (R9) - ADDQ $0x20, R9 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_5x2_loop - VZEROUPPER - -mulAvxTwo_5x2_end: - RET - // func mulAvxTwo_5x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x2_64(SB), $0-88 @@ -24273,7 +30806,6 @@ TEXT ·mulAvxTwo_5x2_64(SB), $0-88 MOVQ 72(DX), R8 MOVQ 96(DX), DX MOVQ out_base+48(FP), R9 - MOVQ out_base+48(FP), R9 MOVQ (R9), R10 MOVQ 24(R9), R9 MOVQ start+72(FP), R11 @@ -24541,6 +31073,103 @@ mulGFNI_5x2_64_loop: mulGFNI_5x2_64_end: RET +// func mulAvxGFNI_5x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R8 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + ADDQ R10, R8 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, CX + +mulAvxGFNI_5x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y11 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y12 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y12 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (DI), Y12 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y8, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 2 outputs + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x2_loop + VZEROUPPER + +mulAvxGFNI_5x2_end: + RET + // func mulGFNI_5x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x2_64Xor(SB), $0-88 @@ -24644,142 +31273,107 @@ mulGFNI_5x2_64Xor_loop: mulGFNI_5x2_64Xor_end: RET -// func mulAvxTwo_5x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_5x2Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_5x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x2Xor(SB), $0-88 + // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 27 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_5x2Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), DX - MOVQ out_base+48(FP), R9 - MOVQ (R9), R10 - MOVQ 24(R9), R9 - MOVQ start+72(FP), R11 + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R8 + MOVQ start+72(FP), R10 // Add start offset to output - ADDQ R11, R10 - ADDQ R11, R9 + ADDQ R10, R9 + ADDQ R10, R8 // Add start offset to input - ADDQ R11, BX - ADDQ R11, SI - ADDQ R11, DI - ADDQ R11, R8 - ADDQ R11, DX - MOVQ $0x0000000f, R11 - MOVQ R11, X2 - VPBROADCASTB X2, Y2 + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, CX + +mulAvxGFNI_5x2Xor_loop: + // Load 2 outputs + VMOVDQU (R9), Y10 + VMOVDQU (R8), Y11 -mulAvxTwo_5x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (R10), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU (R9), Y1 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y13 + VXORPD Y11, Y13, Y11 // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (BX), Y12 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y11, Y13, Y11 // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (SI), Y12 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DI), Y12 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y11, Y13, Y11 // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y8, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y12, Y13 + VXORPD Y11, Y13, Y11 // Store 2 outputs - VMOVDQU Y0, (R10) - ADDQ $0x20, R10 - VMOVDQU Y1, (R9) + VMOVDQU Y10, (R9) ADDQ $0x20, R9 + VMOVDQU Y11, (R8) + ADDQ $0x20, R8 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_5x2Xor_loop + JNZ mulAvxGFNI_5x2Xor_loop VZEROUPPER -mulAvxTwo_5x2Xor_end: +mulAvxGFNI_5x2Xor_end: RET // func mulAvxTwo_5x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -24800,7 +31394,6 @@ TEXT ·mulAvxTwo_5x2_64Xor(SB), $0-88 MOVQ 72(DX), R8 MOVQ 96(DX), DX MOVQ out_base+48(FP), R9 - MOVQ out_base+48(FP), R9 MOVQ (R9), R10 MOVQ 24(R9), R9 MOVQ start+72(FP), R11 @@ -24977,171 +31570,6 @@ mulAvxTwo_5x2_64Xor_loop: mulAvxTwo_5x2_64Xor_end: RET -// func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_5x3(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 38 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_5x3_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), DX - MOVQ out_base+48(FP), R9 - MOVQ (R9), R10 - MOVQ 24(R9), R11 - MOVQ 48(R9), R9 - MOVQ start+72(FP), R12 - - // Add start offset to output - ADDQ R12, R10 - ADDQ R12, R11 - ADDQ R12, R9 - - // Add start offset to input - ADDQ R12, BX - ADDQ R12, SI - ADDQ R12, DI - ADDQ R12, R8 - ADDQ R12, DX - MOVQ $0x0000000f, R12 - MOVQ R12, X3 - VPBROADCASTB X3, Y3 - -mulAvxTwo_5x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (R10) - ADDQ $0x20, R10 - VMOVDQU Y1, (R11) - ADDQ $0x20, R11 - VMOVDQU Y2, (R9) - ADDQ $0x20, R9 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_5x3_loop - VZEROUPPER - -mulAvxTwo_5x3_end: - RET - // func mulAvxTwo_5x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x3_64(SB), $0-88 @@ -25160,7 +31588,6 @@ TEXT ·mulAvxTwo_5x3_64(SB), $0-88 MOVQ 72(DX), R8 MOVQ 96(DX), DX MOVQ out_base+48(FP), R9 - MOVQ out_base+48(FP), R9 MOVQ (R9), R10 MOVQ 24(R9), R11 MOVQ 48(R9), R9 @@ -25491,6 +31918,121 @@ mulGFNI_5x3_64_loop: mulGFNI_5x3_64_end: RET +// func mulAvxGFNI_5x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x3(SB), $0-88 + // Loading 11 of 15 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R9 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R9 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, DX + +mulAvxGFNI_5x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x3_loop + VZEROUPPER + +mulAvxGFNI_5x3_end: + RET + // func mulGFNI_5x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x3_64Xor(SB), $0-88 @@ -25614,28 +32156,40 @@ mulGFNI_5x3_64Xor_loop: mulGFNI_5x3_64Xor_end: RET -// func mulAvxTwo_5x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_5x3Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_5x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x3Xor(SB), $0-88 + // Loading 11 of 15 tables to registers // Destination kept in GP registers - // Full registers estimated 38 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_5x3Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), DX - MOVQ out_base+48(FP), R9 - MOVQ (R9), R10 - MOVQ 24(R9), R11 - MOVQ 48(R9), R9 - MOVQ start+72(FP), R12 + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R9 + MOVQ start+72(FP), R12 // Add start offset to output ADDQ R12, R10 @@ -25643,143 +32197,86 @@ TEXT ·mulAvxTwo_5x3Xor(SB), NOSPLIT, $0-88 ADDQ R12, R9 // Add start offset to input - ADDQ R12, BX - ADDQ R12, SI - ADDQ R12, DI - ADDQ R12, R8 - ADDQ R12, DX - MOVQ $0x0000000f, R12 - MOVQ R12, X3 - VPBROADCASTB X3, Y3 + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, DX + +mulAvxGFNI_5x3Xor_loop: + // Load 3 outputs + VMOVDQU (R10), Y11 + VMOVDQU (R11), Y12 + VMOVDQU (R9), Y13 -mulAvxTwo_5x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (R10), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R11), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (R9), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 3 outputs - VMOVDQU Y0, (R10) + VMOVDQU Y11, (R10) ADDQ $0x20, R10 - VMOVDQU Y1, (R11) + VMOVDQU Y12, (R11) ADDQ $0x20, R11 - VMOVDQU Y2, (R9) + VMOVDQU Y13, (R9) ADDQ $0x20, R9 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_5x3Xor_loop + JNZ mulAvxGFNI_5x3Xor_loop VZEROUPPER -mulAvxTwo_5x3Xor_end: +mulAvxGFNI_5x3Xor_end: RET // func mulAvxTwo_5x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -25800,7 +32297,6 @@ TEXT ·mulAvxTwo_5x3_64Xor(SB), $0-88 MOVQ 72(DX), R8 MOVQ 96(DX), DX MOVQ out_base+48(FP), R9 - MOVQ out_base+48(FP), R9 MOVQ (R9), R10 MOVQ 24(R9), R11 MOVQ 48(R9), R9 @@ -26351,6 +32847,139 @@ mulGFNI_5x4_64_loop: mulGFNI_5x4_64_end: RET +// func mulAvxGFNI_5x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x4(SB), $0-88 + // Loading 10 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R9 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R9 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, DX + +mulAvxGFNI_5x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x4_loop + VZEROUPPER + +mulAvxGFNI_5x4_end: + RET + // func mulGFNI_5x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x4_64Xor(SB), $0-88 @@ -26494,6 +33123,149 @@ mulGFNI_5x4_64Xor_loop: mulGFNI_5x4_64Xor_end: RET +// func mulAvxGFNI_5x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x4Xor(SB), $0-88 + // Loading 10 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R9 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R9 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, DX + +mulAvxGFNI_5x4Xor_loop: + // Load 4 outputs + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x4Xor_loop + VZEROUPPER + +mulAvxGFNI_5x4Xor_end: + RET + // func mulAvxTwo_5x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x4Xor(SB), NOSPLIT, $0-88 @@ -27066,6 +33838,157 @@ mulGFNI_5x5_64_loop: mulGFNI_5x5_64_end: RET +// func mulAvxGFNI_5x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x5(SB), $0-88 + // Loading 9 of 25 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R9 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R9 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, DX + +mulAvxGFNI_5x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x5_loop + VZEROUPPER + +mulAvxGFNI_5x5_end: + RET + // func mulGFNI_5x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x5_64Xor(SB), $0-88 @@ -27229,6 +34152,169 @@ mulGFNI_5x5_64Xor_loop: mulGFNI_5x5_64Xor_end: RET +// func mulAvxGFNI_5x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x5Xor(SB), $0-88 + // Loading 9 of 25 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R9 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R9 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, DX + +mulAvxGFNI_5x5Xor_loop: + // Load 5 outputs + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x5Xor_loop + VZEROUPPER + +mulAvxGFNI_5x5Xor_end: + RET + // func mulAvxTwo_5x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x5Xor(SB), NOSPLIT, $0-88 @@ -27872,6 +34958,175 @@ mulGFNI_5x6_64_loop: mulGFNI_5x6_64_end: RET +// func mulAvxGFNI_5x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x6(SB), $0-88 + // Loading 8 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R9 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R9 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, DX + +mulAvxGFNI_5x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x6_loop + VZEROUPPER + +mulAvxGFNI_5x6_end: + RET + // func mulGFNI_5x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x6_64Xor(SB), $0-88 @@ -28049,6 +35304,189 @@ mulGFNI_5x6_64Xor_loop: mulGFNI_5x6_64Xor_end: RET +// func mulAvxGFNI_5x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x6Xor(SB), $0-88 + // Loading 8 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R9 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R9 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, DX + +mulAvxGFNI_5x6Xor_loop: + // Load 6 outputs + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x6Xor_loop + VZEROUPPER + +mulAvxGFNI_5x6Xor_end: + RET + // func mulAvxTwo_5x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x6Xor(SB), NOSPLIT, $0-88 @@ -28763,6 +36201,193 @@ mulGFNI_5x7_64_loop: mulGFNI_5x7_64_end: RET +// func mulAvxGFNI_5x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x7(SB), $8-88 + // Loading 7 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, DX + +mulAvxGFNI_5x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x7_loop + VZEROUPPER + +mulAvxGFNI_5x7_end: + RET + // func mulGFNI_5x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x7_64Xor(SB), $8-88 @@ -28954,6 +36579,209 @@ mulGFNI_5x7_64Xor_loop: mulGFNI_5x7_64Xor_end: RET +// func mulAvxGFNI_5x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x7Xor(SB), $8-88 + // Loading 7 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, DX + +mulAvxGFNI_5x7Xor_loop: + // Load 7 outputs + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x7Xor_loop + VZEROUPPER + +mulAvxGFNI_5x7Xor_end: + RET + // func mulAvxTwo_5x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x7Xor(SB), NOSPLIT, $8-88 @@ -29745,6 +37573,215 @@ mulGFNI_5x8_64_loop: mulGFNI_5x8_64_end: RET +// func mulAvxGFNI_5x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x8(SB), $8-88 + // Loading 6 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), AX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_5x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_5x8_loop + VZEROUPPER + +mulAvxGFNI_5x8_end: + RET + // func mulGFNI_5x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x8_64Xor(SB), $8-88 @@ -29954,6 +37991,233 @@ mulGFNI_5x8_64Xor_loop: mulGFNI_5x8_64Xor_end: RET +// func mulAvxGFNI_5x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x8Xor(SB), $8-88 + // Loading 6 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), AX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_5x8Xor_loop: + // Load 8 outputs + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_5x8Xor_loop + VZEROUPPER + +mulAvxGFNI_5x8Xor_end: + RET + // func mulAvxTwo_5x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x8Xor(SB), NOSPLIT, $8-88 @@ -30774,6 +39038,210 @@ mulGFNI_5x9_64_loop: mulGFNI_5x9_64_end: RET +// func mulAvxGFNI_5x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x9(SB), $0-88 + // Loading 5 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulAvxGFNI_5x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R9), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU Y7, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU Y8, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU Y9, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU Y10, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU Y11, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU Y12, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU Y13, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxGFNI_5x9_loop + VZEROUPPER + +mulAvxGFNI_5x9_end: + RET + // func mulGFNI_5x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x9_64Xor(SB), $0-88 @@ -30983,6 +39451,239 @@ mulGFNI_5x9_64Xor_loop: mulGFNI_5x9_64Xor_end: RET +// func mulAvxGFNI_5x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x9Xor(SB), $0-88 + // Loading 5 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulAvxGFNI_5x9Xor_loop: + // Load 9 outputs + MOVQ (R9), R11 + VMOVDQU (R11)(R10*1), Y5 + MOVQ 24(R9), R11 + VMOVDQU (R11)(R10*1), Y6 + MOVQ 48(R9), R11 + VMOVDQU (R11)(R10*1), Y7 + MOVQ 72(R9), R11 + VMOVDQU (R11)(R10*1), Y8 + MOVQ 96(R9), R11 + VMOVDQU (R11)(R10*1), Y9 + MOVQ 120(R9), R11 + VMOVDQU (R11)(R10*1), Y10 + MOVQ 144(R9), R11 + VMOVDQU (R11)(R10*1), Y11 + MOVQ 168(R9), R11 + VMOVDQU (R11)(R10*1), Y12 + MOVQ 192(R9), R11 + VMOVDQU (R11)(R10*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R9), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU Y7, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU Y8, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU Y9, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU Y10, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU Y11, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU Y12, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU Y13, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxGFNI_5x9Xor_loop + VZEROUPPER + +mulAvxGFNI_5x9Xor_end: + RET + // func mulAvxTwo_5x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x9Xor(SB), NOSPLIT, $0-88 @@ -31858,6 +40559,226 @@ mulGFNI_5x10_64_loop: mulGFNI_5x10_64_end: RET +// func mulAvxGFNI_5x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x10(SB), $0-88 + // Loading 4 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulAvxGFNI_5x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R9), R11 + VMOVDQU Y4, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU Y7, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU Y8, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU Y9, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU Y10, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU Y11, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU Y12, (R11)(R10*1) + MOVQ 216(R9), R11 + VMOVDQU Y13, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxGFNI_5x10_loop + VZEROUPPER + +mulAvxGFNI_5x10_end: + RET + // func mulGFNI_5x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_5x10_64Xor(SB), $0-88 @@ -32080,6 +41001,258 @@ mulGFNI_5x10_64Xor_loop: mulGFNI_5x10_64Xor_end: RET +// func mulAvxGFNI_5x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x10Xor(SB), $0-88 + // Loading 4 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulAvxGFNI_5x10Xor_loop: + // Load 10 outputs + MOVQ (R9), R11 + VMOVDQU (R11)(R10*1), Y4 + MOVQ 24(R9), R11 + VMOVDQU (R11)(R10*1), Y5 + MOVQ 48(R9), R11 + VMOVDQU (R11)(R10*1), Y6 + MOVQ 72(R9), R11 + VMOVDQU (R11)(R10*1), Y7 + MOVQ 96(R9), R11 + VMOVDQU (R11)(R10*1), Y8 + MOVQ 120(R9), R11 + VMOVDQU (R11)(R10*1), Y9 + MOVQ 144(R9), R11 + VMOVDQU (R11)(R10*1), Y10 + MOVQ 168(R9), R11 + VMOVDQU (R11)(R10*1), Y11 + MOVQ 192(R9), R11 + VMOVDQU (R11)(R10*1), Y12 + MOVQ 216(R9), R11 + VMOVDQU (R11)(R10*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R9), R11 + VMOVDQU Y4, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU Y7, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU Y8, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU Y9, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU Y10, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU Y11, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU Y12, (R11)(R10*1) + MOVQ 216(R9), R11 + VMOVDQU Y13, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxGFNI_5x10Xor_loop + VZEROUPPER + +mulAvxGFNI_5x10Xor_end: + RET + // func mulAvxTwo_5x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_5x10Xor(SB), NOSPLIT, $0-88 @@ -32447,127 +41620,6 @@ mulAvxTwo_5x10Xor_loop: mulAvxTwo_5x10Xor_end: RET -// func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_6x1(SB), NOSPLIT, $0-88 - // Loading all tables to registers - // Destination kept in GP registers - // Full registers estimated 16 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_6x1_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VMOVDQU 192(CX), Y6 - VMOVDQU 224(CX), Y7 - VMOVDQU 256(CX), Y8 - VMOVDQU 288(CX), Y9 - VMOVDQU 320(CX), Y10 - VMOVDQU 352(CX), Y11 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), BX - MOVQ 48(CX), SI - MOVQ 72(CX), DI - MOVQ 96(CX), R8 - MOVQ 120(CX), CX - MOVQ out_base+48(FP), R9 - MOVQ (R9), R9 - MOVQ start+72(FP), R10 - - // Add start offset to output - ADDQ R10, R9 - - // Add start offset to input - ADDQ R10, DX - ADDQ R10, BX - ADDQ R10, SI - ADDQ R10, DI - ADDQ R10, R8 - ADDQ R10, CX - MOVQ $0x0000000f, R10 - MOVQ R10, X13 - VPBROADCASTB X13, Y13 - -mulAvxTwo_6x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y14 - ADDQ $0x20, DX - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y0, Y14 - VPSHUFB Y15, Y1, Y15 - VPXOR Y14, Y15, Y12 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (BX), Y14 - ADDQ $0x20, BX - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y2, Y14 - VPSHUFB Y15, Y3, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (SI), Y14 - ADDQ $0x20, SI - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y4, Y14 - VPSHUFB Y15, Y5, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (DI), Y14 - ADDQ $0x20, DI - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y6, Y14 - VPSHUFB Y15, Y7, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) - - // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R8), Y14 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y8, Y14 - VPSHUFB Y15, Y9, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) - - // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (CX), Y14 - ADDQ $0x20, CX - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y10, Y14 - VPSHUFB Y15, Y11, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) - - // Store 1 outputs - VMOVDQU Y12, (R9) - ADDQ $0x20, R9 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_6x1_loop - VZEROUPPER - -mulAvxTwo_6x1_end: - RET - // func mulAvxTwo_6x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x1_64(SB), $0-88 @@ -32587,7 +41639,6 @@ TEXT ·mulAvxTwo_6x1_64(SB), $0-88 MOVQ 96(DX), R9 MOVQ 120(DX), DX MOVQ out_base+48(FP), R10 - MOVQ out_base+48(FP), R10 MOVQ (R10), R10 MOVQ start+72(FP), R11 @@ -32821,6 +41872,94 @@ mulGFNI_6x1_64_loop: mulGFNI_6x1_64_end: RET +// func mulAvxGFNI_6x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 9 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R9 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, CX + +mulAvxGFNI_6x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y7 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y7, Y6 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y7 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y7 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y7 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y7 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (CX), Y7 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y5, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Store 1 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x1_loop + VZEROUPPER + +mulAvxGFNI_6x1_end: + RET + // func mulGFNI_6x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x1_64Xor(SB), $0-88 @@ -32913,126 +42052,96 @@ mulGFNI_6x1_64Xor_loop: mulGFNI_6x1_64Xor_end: RET -// func mulAvxTwo_6x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_6x1Xor(SB), NOSPLIT, $0-88 +// func mulAvxGFNI_6x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x1Xor(SB), $0-88 // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 16 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_6x1Xor_end - VMOVDQU (CX), Y0 - VMOVDQU 32(CX), Y1 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VMOVDQU 192(CX), Y6 - VMOVDQU 224(CX), Y7 - VMOVDQU 256(CX), Y8 - VMOVDQU 288(CX), Y9 - VMOVDQU 320(CX), Y10 - VMOVDQU 352(CX), Y11 - MOVQ in_base+24(FP), CX - MOVQ (CX), DX - MOVQ 24(CX), BX - MOVQ 48(CX), SI - MOVQ 72(CX), DI - MOVQ 96(CX), R8 - MOVQ 120(CX), CX - MOVQ out_base+48(FP), R9 - MOVQ (R9), R9 - MOVQ start+72(FP), R10 + // Full registers estimated 9 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R9 + MOVQ start+72(FP), R10 // Add start offset to output ADDQ R10, R9 // Add start offset to input - ADDQ R10, DX - ADDQ R10, BX - ADDQ R10, SI - ADDQ R10, DI - ADDQ R10, R8 - ADDQ R10, CX - MOVQ $0x0000000f, R10 - MOVQ R10, X13 - VPBROADCASTB X13, Y13 + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, CX + +mulAvxGFNI_6x1Xor_loop: + // Load 1 outputs + VMOVDQU (R9), Y6 -mulAvxTwo_6x1Xor_loop: // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (DX), Y14 - ADDQ $0x20, DX - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VMOVDQU (R9), Y12 - VPSHUFB Y14, Y0, Y14 - VPSHUFB Y15, Y1, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) + VMOVDQU (DX), Y7 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y7, Y7 + VXORPD Y6, Y7, Y6 // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (BX), Y14 - ADDQ $0x20, BX - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y2, Y14 - VPSHUFB Y15, Y3, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) + VMOVDQU (BX), Y7 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y7, Y7 + VXORPD Y6, Y7, Y6 // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (SI), Y14 - ADDQ $0x20, SI - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y4, Y14 - VPSHUFB Y15, Y5, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) + VMOVDQU (SI), Y7 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y7, Y7 + VXORPD Y6, Y7, Y6 // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (DI), Y14 - ADDQ $0x20, DI - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y6, Y14 - VPSHUFB Y15, Y7, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) + VMOVDQU (DI), Y7 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y7, Y7 + VXORPD Y6, Y7, Y6 // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R8), Y14 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y8, Y14 - VPSHUFB Y15, Y9, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) + VMOVDQU (R8), Y7 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y7, Y7 + VXORPD Y6, Y7, Y6 // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (CX), Y14 - ADDQ $0x20, CX - VPSRLQ $0x04, Y14, Y15 - VPAND Y13, Y14, Y14 - VPAND Y13, Y15, Y15 - VPSHUFB Y14, Y10, Y14 - VPSHUFB Y15, Y11, Y15 - XOR3WAY( $0x00, Y14, Y15, Y12) + VMOVDQU (CX), Y7 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y5, Y7, Y7 + VXORPD Y6, Y7, Y6 // Store 1 outputs - VMOVDQU Y12, (R9) + VMOVDQU Y6, (R9) ADDQ $0x20, R9 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_6x1Xor_loop + JNZ mulAvxGFNI_6x1Xor_loop VZEROUPPER -mulAvxTwo_6x1Xor_end: +mulAvxGFNI_6x1Xor_end: RET // func mulAvxTwo_6x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -33054,7 +42163,6 @@ TEXT ·mulAvxTwo_6x1_64Xor(SB), $0-88 MOVQ 96(DX), R9 MOVQ 120(DX), DX MOVQ out_base+48(FP), R10 - MOVQ out_base+48(FP), R10 MOVQ (R10), R10 MOVQ start+72(FP), R11 @@ -33204,161 +42312,6 @@ mulAvxTwo_6x1_64Xor_loop: mulAvxTwo_6x1_64Xor_end: RET -// func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_6x2(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 31 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_6x2_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), DX - MOVQ out_base+48(FP), R10 - MOVQ (R10), R11 - MOVQ 24(R10), R10 - MOVQ start+72(FP), R12 - - // Add start offset to output - ADDQ R12, R11 - ADDQ R12, R10 - - // Add start offset to input - ADDQ R12, BX - ADDQ R12, SI - ADDQ R12, DI - ADDQ R12, R8 - ADDQ R12, R9 - ADDQ R12, DX - MOVQ $0x0000000f, R12 - MOVQ R12, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_6x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y0 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y1 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R11) - ADDQ $0x20, R11 - VMOVDQU Y1, (R10) - ADDQ $0x20, R10 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_6x2_loop - VZEROUPPER - -mulAvxTwo_6x2_end: - RET - // func mulAvxTwo_6x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x2_64(SB), $0-88 @@ -33378,7 +42331,6 @@ TEXT ·mulAvxTwo_6x2_64(SB), $0-88 MOVQ 96(DX), R9 MOVQ 120(DX), DX MOVQ out_base+48(FP), R10 - MOVQ out_base+48(FP), R10 MOVQ (R10), R11 MOVQ 24(R10), R10 MOVQ start+72(FP), R12 @@ -33686,6 +42638,115 @@ mulGFNI_6x2_64_loop: mulGFNI_6x2_64_end: RET +// func mulAvxGFNI_6x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R9 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + ADDQ R11, R9 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, CX + +mulAvxGFNI_6x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (CX), Y14 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x2_loop + VZEROUPPER + +mulAvxGFNI_6x2_end: + RET + // func mulGFNI_6x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x2_64Xor(SB), $0-88 @@ -33801,161 +42862,119 @@ mulGFNI_6x2_64Xor_loop: mulGFNI_6x2_64Xor_end: RET -// func mulAvxTwo_6x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_6x2Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_6x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x2Xor(SB), $0-88 + // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 31 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_6x2Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), DX - MOVQ out_base+48(FP), R10 - MOVQ (R10), R11 - MOVQ 24(R10), R10 - MOVQ start+72(FP), R12 + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R9 + MOVQ start+72(FP), R11 // Add start offset to output - ADDQ R12, R11 - ADDQ R12, R10 + ADDQ R11, R10 + ADDQ R11, R9 // Add start offset to input - ADDQ R12, BX - ADDQ R12, SI - ADDQ R12, DI - ADDQ R12, R8 - ADDQ R12, R9 - ADDQ R12, DX - MOVQ $0x0000000f, R12 - MOVQ R12, X2 - VPBROADCASTB X2, Y2 + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, CX + +mulAvxGFNI_6x2Xor_loop: + // Load 2 outputs + VMOVDQU (R10), Y12 + VMOVDQU (R9), Y13 -mulAvxTwo_6x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (R11), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU (R10), Y1 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (CX), Y14 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 2 outputs - VMOVDQU Y0, (R11) - ADDQ $0x20, R11 - VMOVDQU Y1, (R10) + VMOVDQU Y12, (R10) ADDQ $0x20, R10 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_6x2Xor_loop + JNZ mulAvxGFNI_6x2Xor_loop VZEROUPPER -mulAvxTwo_6x2Xor_end: +mulAvxGFNI_6x2Xor_end: RET // func mulAvxTwo_6x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -33977,7 +42996,6 @@ TEXT ·mulAvxTwo_6x2_64Xor(SB), $0-88 MOVQ 96(DX), R9 MOVQ 120(DX), DX MOVQ out_base+48(FP), R10 - MOVQ out_base+48(FP), R10 MOVQ (R10), R11 MOVQ 24(R10), R10 MOVQ start+72(FP), R12 @@ -34182,195 +43200,6 @@ mulAvxTwo_6x2_64Xor_loop: mulAvxTwo_6x2_64Xor_end: RET -// func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_6x3(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 44 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_6x3_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), DX - MOVQ out_base+48(FP), R10 - MOVQ (R10), R11 - MOVQ 24(R10), R12 - MOVQ 48(R10), R10 - MOVQ start+72(FP), R13 - - // Add start offset to output - ADDQ R13, R11 - ADDQ R13, R12 - ADDQ R13, R10 - - // Add start offset to input - ADDQ R13, BX - ADDQ R13, SI - ADDQ R13, DI - ADDQ R13, R8 - ADDQ R13, R9 - ADDQ R13, DX - MOVQ $0x0000000f, R13 - MOVQ R13, X3 - VPBROADCASTB X3, Y3 - -mulAvxTwo_6x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (R11) - ADDQ $0x20, R11 - VMOVDQU Y1, (R12) - ADDQ $0x20, R12 - VMOVDQU Y2, (R10) - ADDQ $0x20, R10 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_6x3_loop - VZEROUPPER - -mulAvxTwo_6x3_end: - RET - // func mulAvxTwo_6x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x3_64(SB), $0-88 @@ -34390,7 +43219,6 @@ TEXT ·mulAvxTwo_6x3_64(SB), $0-88 MOVQ 96(DX), R9 MOVQ 120(DX), DX MOVQ out_base+48(FP), R10 - MOVQ out_base+48(FP), R10 MOVQ (R10), R11 MOVQ 24(R10), R12 MOVQ 48(R10), R10 @@ -34772,6 +43600,136 @@ mulGFNI_6x3_64_loop: mulGFNI_6x3_64_end: RET +// func mulAvxGFNI_6x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x3(SB), $0-88 + // Loading 11 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R10 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R10 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, DX + +mulAvxGFNI_6x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x3_loop + VZEROUPPER + +mulAvxGFNI_6x3_end: + RET + // func mulGFNI_6x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x3_64Xor(SB), $0-88 @@ -34910,29 +43868,41 @@ mulGFNI_6x3_64Xor_loop: mulGFNI_6x3_64Xor_end: RET -// func mulAvxTwo_6x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_6x3Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_6x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x3Xor(SB), $0-88 + // Loading 11 of 18 tables to registers // Destination kept in GP registers - // Full registers estimated 44 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_6x3Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), DX - MOVQ out_base+48(FP), R10 - MOVQ (R10), R11 - MOVQ 24(R10), R12 - MOVQ 48(R10), R10 - MOVQ start+72(FP), R13 + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R10 + MOVQ start+72(FP), R13 // Add start offset to output ADDQ R13, R11 @@ -34940,166 +43910,100 @@ TEXT ·mulAvxTwo_6x3Xor(SB), NOSPLIT, $0-88 ADDQ R13, R10 // Add start offset to input - ADDQ R13, BX - ADDQ R13, SI - ADDQ R13, DI - ADDQ R13, R8 - ADDQ R13, R9 - ADDQ R13, DX - MOVQ $0x0000000f, R13 - MOVQ R13, X3 - VPBROADCASTB X3, Y3 + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, DX + +mulAvxGFNI_6x3Xor_loop: + // Load 3 outputs + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (R10), Y13 -mulAvxTwo_6x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (R11), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R12), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (R10), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 3 outputs - VMOVDQU Y0, (R11) + VMOVDQU Y11, (R11) ADDQ $0x20, R11 - VMOVDQU Y1, (R12) + VMOVDQU Y12, (R12) ADDQ $0x20, R12 - VMOVDQU Y2, (R10) + VMOVDQU Y13, (R10) ADDQ $0x20, R10 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_6x3Xor_loop + JNZ mulAvxGFNI_6x3Xor_loop VZEROUPPER -mulAvxTwo_6x3Xor_end: +mulAvxGFNI_6x3Xor_end: RET // func mulAvxTwo_6x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -35121,7 +44025,6 @@ TEXT ·mulAvxTwo_6x3_64Xor(SB), $0-88 MOVQ 96(DX), R9 MOVQ 120(DX), DX MOVQ out_base+48(FP), R10 - MOVQ out_base+48(FP), R10 MOVQ (R10), R11 MOVQ 24(R10), R12 MOVQ 48(R10), R10 @@ -35755,6 +44658,157 @@ mulGFNI_6x4_64_loop: mulGFNI_6x4_64_end: RET +// func mulAvxGFNI_6x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x4(SB), $0-88 + // Loading 10 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R10 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R10 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, DX + +mulAvxGFNI_6x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x4_loop + VZEROUPPER + +mulAvxGFNI_6x4_end: + RET + // func mulGFNI_6x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x4_64Xor(SB), $0-88 @@ -35916,6 +44970,167 @@ mulGFNI_6x4_64Xor_loop: mulGFNI_6x4_64Xor_end: RET +// func mulAvxGFNI_6x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x4Xor(SB), $0-88 + // Loading 10 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R10 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R10 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, DX + +mulAvxGFNI_6x4Xor_loop: + // Load 4 outputs + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x4Xor_loop + VZEROUPPER + +mulAvxGFNI_6x4Xor_end: + RET + // func mulAvxTwo_6x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x4Xor(SB), NOSPLIT, $0-88 @@ -36567,6 +45782,178 @@ mulGFNI_6x5_64_loop: mulGFNI_6x5_64_end: RET +// func mulAvxGFNI_6x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x5(SB), $0-88 + // Loading 9 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R10 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R10 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, DX + +mulAvxGFNI_6x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x5_loop + VZEROUPPER + +mulAvxGFNI_6x5_end: + RET + // func mulGFNI_6x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x5_64Xor(SB), $0-88 @@ -36746,6 +46133,190 @@ mulGFNI_6x5_64Xor_loop: mulGFNI_6x5_64Xor_end: RET +// func mulAvxGFNI_6x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x5Xor(SB), $0-88 + // Loading 9 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R10 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R10 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, DX + +mulAvxGFNI_6x5Xor_loop: + // Load 5 outputs + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x5Xor_loop + VZEROUPPER + +mulAvxGFNI_6x5Xor_end: + RET + // func mulAvxTwo_6x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x5Xor(SB), NOSPLIT, $0-88 @@ -37480,6 +47051,199 @@ mulGFNI_6x6_64_loop: mulGFNI_6x6_64_end: RET +// func mulAvxGFNI_6x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x6(SB), $8-88 + // Loading 8 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, DX + +mulAvxGFNI_6x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x6_loop + VZEROUPPER + +mulAvxGFNI_6x6_end: + RET + // func mulGFNI_6x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x6_64Xor(SB), $8-88 @@ -37675,6 +47439,213 @@ mulGFNI_6x6_64Xor_loop: mulGFNI_6x6_64Xor_end: RET +// func mulAvxGFNI_6x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x6Xor(SB), $8-88 + // Loading 8 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, DX + +mulAvxGFNI_6x6Xor_loop: + // Load 6 outputs + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x6Xor_loop + VZEROUPPER + +mulAvxGFNI_6x6Xor_end: + RET + // func mulAvxTwo_6x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x6Xor(SB), NOSPLIT, $8-88 @@ -38498,6 +48469,224 @@ mulGFNI_6x7_64_loop: mulGFNI_6x7_64_end: RET +// func mulAvxGFNI_6x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x7(SB), $8-88 + // Loading 7 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 51 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), AX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_6x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_6x7_loop + VZEROUPPER + +mulAvxGFNI_6x7_end: + RET + // func mulGFNI_6x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x7_64Xor(SB), $8-88 @@ -38713,6 +48902,240 @@ mulGFNI_6x7_64Xor_loop: mulGFNI_6x7_64Xor_end: RET +// func mulAvxGFNI_6x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x7Xor(SB), $8-88 + // Loading 7 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 51 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), AX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_6x7Xor_loop: + // Load 7 outputs + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_6x7Xor_loop + VZEROUPPER + +mulAvxGFNI_6x7Xor_end: + RET + // func mulAvxTwo_6x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x7Xor(SB), NOSPLIT, $8-88 @@ -39581,6 +50004,224 @@ mulGFNI_6x8_64_loop: mulGFNI_6x8_64_end: RET +// func mulAvxGFNI_6x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x8(SB), $0-88 + // Loading 6 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x8_loop + VZEROUPPER + +mulAvxGFNI_6x8_end: + RET + // func mulGFNI_6x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x8_64Xor(SB), $0-88 @@ -39799,6 +50440,250 @@ mulGFNI_6x8_64Xor_loop: mulGFNI_6x8_64Xor_end: RET +// func mulAvxGFNI_6x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x8Xor(SB), $0-88 + // Loading 6 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x8Xor_loop: + // Load 8 outputs + MOVQ (R10), R12 + VMOVDQU (R12)(R11*1), Y6 + MOVQ 24(R10), R12 + VMOVDQU (R12)(R11*1), Y7 + MOVQ 48(R10), R12 + VMOVDQU (R12)(R11*1), Y8 + MOVQ 72(R10), R12 + VMOVDQU (R12)(R11*1), Y9 + MOVQ 96(R10), R12 + VMOVDQU (R12)(R11*1), Y10 + MOVQ 120(R10), R12 + VMOVDQU (R12)(R11*1), Y11 + MOVQ 144(R10), R12 + VMOVDQU (R12)(R11*1), Y12 + MOVQ 168(R10), R12 + VMOVDQU (R12)(R11*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x8Xor_loop + VZEROUPPER + +mulAvxGFNI_6x8Xor_end: + RET + // func mulAvxTwo_6x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x8Xor(SB), NOSPLIT, $0-88 @@ -40735,6 +51620,243 @@ mulGFNI_6x9_64_loop: mulGFNI_6x9_64_end: RET +// func mulAvxGFNI_6x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x9(SB), $0-88 + // Loading 5 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R10), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x9_loop + VZEROUPPER + +mulAvxGFNI_6x9_end: + RET + // func mulGFNI_6x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x9_64Xor(SB), $0-88 @@ -40968,6 +52090,272 @@ mulGFNI_6x9_64Xor_loop: mulGFNI_6x9_64Xor_end: RET +// func mulAvxGFNI_6x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x9Xor(SB), $0-88 + // Loading 5 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x9Xor_loop: + // Load 9 outputs + MOVQ (R10), R12 + VMOVDQU (R12)(R11*1), Y5 + MOVQ 24(R10), R12 + VMOVDQU (R12)(R11*1), Y6 + MOVQ 48(R10), R12 + VMOVDQU (R12)(R11*1), Y7 + MOVQ 72(R10), R12 + VMOVDQU (R12)(R11*1), Y8 + MOVQ 96(R10), R12 + VMOVDQU (R12)(R11*1), Y9 + MOVQ 120(R10), R12 + VMOVDQU (R12)(R11*1), Y10 + MOVQ 144(R10), R12 + VMOVDQU (R12)(R11*1), Y11 + MOVQ 168(R10), R12 + VMOVDQU (R12)(R11*1), Y12 + MOVQ 192(R10), R12 + VMOVDQU (R12)(R11*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R10), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x9Xor_loop + VZEROUPPER + +mulAvxGFNI_6x9Xor_end: + RET + // func mulAvxTwo_6x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x9Xor(SB), NOSPLIT, $0-88 @@ -41982,6 +53370,262 @@ mulGFNI_6x10_64_loop: mulGFNI_6x10_64_end: RET +// func mulAvxGFNI_6x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x10(SB), $0-88 + // Loading 4 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R10), R12 + VMOVDQU Y4, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 216(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x10_loop + VZEROUPPER + +mulAvxGFNI_6x10_end: + RET + // func mulGFNI_6x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_6x10_64Xor(SB), $0-88 @@ -42230,6 +53874,294 @@ mulGFNI_6x10_64Xor_loop: mulGFNI_6x10_64Xor_end: RET +// func mulAvxGFNI_6x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x10Xor(SB), $0-88 + // Loading 4 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x10Xor_loop: + // Load 10 outputs + MOVQ (R10), R12 + VMOVDQU (R12)(R11*1), Y4 + MOVQ 24(R10), R12 + VMOVDQU (R12)(R11*1), Y5 + MOVQ 48(R10), R12 + VMOVDQU (R12)(R11*1), Y6 + MOVQ 72(R10), R12 + VMOVDQU (R12)(R11*1), Y7 + MOVQ 96(R10), R12 + VMOVDQU (R12)(R11*1), Y8 + MOVQ 120(R10), R12 + VMOVDQU (R12)(R11*1), Y9 + MOVQ 144(R10), R12 + VMOVDQU (R12)(R11*1), Y10 + MOVQ 168(R10), R12 + VMOVDQU (R12)(R11*1), Y11 + MOVQ 192(R10), R12 + VMOVDQU (R12)(R11*1), Y12 + MOVQ 216(R10), R12 + VMOVDQU (R12)(R11*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R10), R12 + VMOVDQU Y4, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 216(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x10Xor_loop + VZEROUPPER + +mulAvxGFNI_6x10Xor_end: + RET + // func mulAvxTwo_6x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_6x10Xor(SB), NOSPLIT, $0-88 @@ -42656,141 +54588,6 @@ mulAvxTwo_6x10Xor_loop: mulAvxTwo_6x10Xor_end: RET -// func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_7x1(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 18 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_7x1_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), DX - MOVQ out_base+48(FP), R11 - MOVQ (R11), R11 - MOVQ start+72(FP), R12 - - // Add start offset to output - ADDQ R12, R11 - - // Add start offset to input - ADDQ R12, BX - ADDQ R12, SI - ADDQ R12, DI - ADDQ R12, R8 - ADDQ R12, R9 - ADDQ R12, R10 - ADDQ R12, DX - MOVQ $0x0000000f, R12 - MOVQ R12, X1 - VPBROADCASTB X1, Y1 - -mulAvxTwo_7x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (BX), Y4 - ADDQ $0x20, BX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU (CX), Y2 - VMOVDQU 32(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - VPXOR Y2, Y3, Y0 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (SI), Y4 - ADDQ $0x20, SI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (DI), Y4 - ADDQ $0x20, DI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 128(CX), Y2 - VMOVDQU 160(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (R8), Y4 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 192(CX), Y2 - VMOVDQU 224(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R9), Y4 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 256(CX), Y2 - VMOVDQU 288(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (R10), Y4 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 320(CX), Y2 - VMOVDQU 352(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 6 to 1 outputs - VMOVDQU (DX), Y4 - ADDQ $0x20, DX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 384(CX), Y2 - VMOVDQU 416(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Store 1 outputs - VMOVDQU Y0, (R11) - ADDQ $0x20, R11 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_7x1_loop - VZEROUPPER - -mulAvxTwo_7x1_end: - RET - // func mulAvxTwo_7x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x1_64(SB), $0-88 @@ -42811,7 +54608,6 @@ TEXT ·mulAvxTwo_7x1_64(SB), $0-88 MOVQ 120(DX), R10 MOVQ 144(DX), DX MOVQ out_base+48(FP), R11 - MOVQ out_base+48(FP), R11 MOVQ (R11), R11 MOVQ start+72(FP), R12 @@ -43074,6 +54870,103 @@ mulGFNI_7x1_64_loop: mulGFNI_7x1_64_end: RET +// func mulAvxGFNI_7x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R10 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, CX + +mulAvxGFNI_7x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y8 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y7 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y8 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y8 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y8 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y8 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y8 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Store 1 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x1_loop + VZEROUPPER + +mulAvxGFNI_7x1_end: + RET + // func mulGFNI_7x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x1_64Xor(SB), $0-88 @@ -43175,17 +55068,118 @@ mulGFNI_7x1_64Xor_loop: mulGFNI_7x1_64Xor_end: RET -// func mulAvxTwo_7x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// func mulAvxGFNI_7x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R10 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, CX + +mulAvxGFNI_7x1Xor_loop: + // Load 1 outputs + VMOVDQU (R10), Y7 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y8 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y8 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y8 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y8 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y8 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y8 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Store 1 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x1Xor_loop + VZEROUPPER + +mulAvxGFNI_7x1Xor_end: + RET + +// func mulAvxTwo_7x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_7x1Xor(SB), NOSPLIT, $0-88 +TEXT ·mulAvxTwo_7x1_64Xor(SB), $0-88 // Loading no tables to registers // Destination kept in GP registers - // Full registers estimated 18 YMM used + // Full registers estimated 34 YMM used MOVQ n+80(FP), AX MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX + SHRQ $0x06, AX TESTQ AX, AX - JZ mulAvxTwo_7x1Xor_end + JZ mulAvxTwo_7x1_64Xor_end MOVQ in_base+24(FP), DX MOVQ (DX), BX MOVQ 24(DX), SI @@ -43201,143 +55195,6 @@ TEXT ·mulAvxTwo_7x1Xor(SB), NOSPLIT, $0-88 // Add start offset to output ADDQ R12, R11 - // Add start offset to input - ADDQ R12, BX - ADDQ R12, SI - ADDQ R12, DI - ADDQ R12, R8 - ADDQ R12, R9 - ADDQ R12, R10 - ADDQ R12, DX - MOVQ $0x0000000f, R12 - MOVQ R12, X1 - VPBROADCASTB X1, Y1 - -mulAvxTwo_7x1Xor_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (BX), Y4 - ADDQ $0x20, BX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU (R11), Y0 - VMOVDQU (CX), Y2 - VMOVDQU 32(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (SI), Y4 - ADDQ $0x20, SI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (DI), Y4 - ADDQ $0x20, DI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 128(CX), Y2 - VMOVDQU 160(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (R8), Y4 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 192(CX), Y2 - VMOVDQU 224(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R9), Y4 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 256(CX), Y2 - VMOVDQU 288(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (R10), Y4 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 320(CX), Y2 - VMOVDQU 352(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 6 to 1 outputs - VMOVDQU (DX), Y4 - ADDQ $0x20, DX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 384(CX), Y2 - VMOVDQU 416(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Store 1 outputs - VMOVDQU Y0, (R11) - ADDQ $0x20, R11 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_7x1Xor_loop - VZEROUPPER - -mulAvxTwo_7x1Xor_end: - RET - -// func mulAvxTwo_7x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_7x1_64Xor(SB), $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 34 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x06, AX - TESTQ AX, AX - JZ mulAvxTwo_7x1_64Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), DX - MOVQ out_base+48(FP), R11 - MOVQ out_base+48(FP), R11 - MOVQ (R11), R11 - MOVQ start+72(FP), R12 - - // Add start offset to output - ADDQ R12, R11 - // Add start offset to input ADDQ R12, BX ADDQ R12, SI @@ -43501,180 +55358,6 @@ mulAvxTwo_7x1_64Xor_loop: mulAvxTwo_7x1_64Xor_end: RET -// func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_7x2(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 35 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_7x2_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), DX - MOVQ out_base+48(FP), R11 - MOVQ (R11), R12 - MOVQ 24(R11), R11 - MOVQ start+72(FP), R13 - - // Add start offset to output - ADDQ R13, R12 - ADDQ R13, R11 - - // Add start offset to input - ADDQ R13, BX - ADDQ R13, SI - ADDQ R13, DI - ADDQ R13, R8 - ADDQ R13, R9 - ADDQ R13, R10 - ADDQ R13, DX - MOVQ $0x0000000f, R13 - MOVQ R13, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_7x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y0 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y1 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (R10), Y5 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 6 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R12) - ADDQ $0x20, R12 - VMOVDQU Y1, (R11) - ADDQ $0x20, R11 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_7x2_loop - VZEROUPPER - -mulAvxTwo_7x2_end: - RET - // func mulAvxTwo_7x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x2_64(SB), $0-88 @@ -43695,7 +55378,6 @@ TEXT ·mulAvxTwo_7x2_64(SB), $0-88 MOVQ 120(DX), R10 MOVQ 144(DX), DX MOVQ out_base+48(FP), R11 - MOVQ out_base+48(FP), R11 MOVQ (R11), R12 MOVQ 24(R11), R11 MOVQ start+72(FP), R13 @@ -44043,6 +55725,127 @@ mulGFNI_7x2_64_loop: mulGFNI_7x2_64_end: RET +// func mulAvxGFNI_7x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x2(SB), $0-88 + // Loading 12 of 14 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R11 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + ADDQ R13, R11 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, DX + +mulAvxGFNI_7x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x2_loop + VZEROUPPER + +mulAvxGFNI_7x2_end: + RET + // func mulGFNI_7x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x2_64Xor(SB), $0-88 @@ -44170,17 +55973,144 @@ mulGFNI_7x2_64Xor_loop: mulGFNI_7x2_64Xor_end: RET -// func mulAvxTwo_7x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// func mulAvxGFNI_7x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x2Xor(SB), $0-88 + // Loading 12 of 14 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R11 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + ADDQ R13, R11 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, DX + +mulAvxGFNI_7x2Xor_loop: + // Load 2 outputs + VMOVDQU (R12), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x2Xor_loop + VZEROUPPER + +mulAvxGFNI_7x2Xor_end: + RET + +// func mulAvxTwo_7x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_7x2Xor(SB), NOSPLIT, $0-88 +TEXT ·mulAvxTwo_7x2_64Xor(SB), $0-88 // Loading no tables to registers // Destination kept in GP registers - // Full registers estimated 35 YMM used + // Full registers estimated 65 YMM used MOVQ n+80(FP), AX MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX + SHRQ $0x06, AX TESTQ AX, AX - JZ mulAvxTwo_7x2Xor_end + JZ mulAvxTwo_7x2_64Xor_end MOVQ in_base+24(FP), DX MOVQ (DX), BX MOVQ 24(DX), SI @@ -44198,183 +56128,6 @@ TEXT ·mulAvxTwo_7x2Xor(SB), NOSPLIT, $0-88 ADDQ R13, R12 ADDQ R13, R11 - // Add start offset to input - ADDQ R13, BX - ADDQ R13, SI - ADDQ R13, DI - ADDQ R13, R8 - ADDQ R13, R9 - ADDQ R13, R10 - ADDQ R13, DX - MOVQ $0x0000000f, R13 - MOVQ R13, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_7x2Xor_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (R12), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU (R11), Y1 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (R10), Y5 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 6 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R12) - ADDQ $0x20, R12 - VMOVDQU Y1, (R11) - ADDQ $0x20, R11 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_7x2Xor_loop - VZEROUPPER - -mulAvxTwo_7x2Xor_end: - RET - -// func mulAvxTwo_7x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_7x2_64Xor(SB), $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 65 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x06, AX - TESTQ AX, AX - JZ mulAvxTwo_7x2_64Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), DX - MOVQ out_base+48(FP), R11 - MOVQ out_base+48(FP), R11 - MOVQ (R11), R12 - MOVQ 24(R11), R11 - MOVQ start+72(FP), R13 - - // Add start offset to output - ADDQ R13, R12 - ADDQ R13, R11 - // Add start offset to input ADDQ R13, BX ADDQ R13, SI @@ -44599,219 +56352,6 @@ mulAvxTwo_7x2_64Xor_loop: mulAvxTwo_7x2_64Xor_end: RET -// func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_7x3(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 50 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_7x3_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), DX - MOVQ out_base+48(FP), R11 - MOVQ (R11), R12 - MOVQ 24(R11), R13 - MOVQ 48(R11), R11 - MOVQ start+72(FP), R14 - - // Add start offset to output - ADDQ R14, R12 - ADDQ R14, R13 - ADDQ R14, R11 - - // Add start offset to input - ADDQ R14, BX - ADDQ R14, SI - ADDQ R14, DI - ADDQ R14, R8 - ADDQ R14, R9 - ADDQ R14, R10 - ADDQ R14, DX - MOVQ $0x0000000f, R14 - MOVQ R14, X3 - VPBROADCASTB X3, Y3 - -mulAvxTwo_7x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (R10), Y6 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 6 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1152(CX), Y4 - VMOVDQU 1184(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1216(CX), Y4 - VMOVDQU 1248(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1280(CX), Y4 - VMOVDQU 1312(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (R12) - ADDQ $0x20, R12 - VMOVDQU Y1, (R13) - ADDQ $0x20, R13 - VMOVDQU Y2, (R11) - ADDQ $0x20, R11 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_7x3_loop - VZEROUPPER - -mulAvxTwo_7x3_end: - RET - // func mulAvxTwo_7x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x3_64(SB), $0-88 @@ -44832,7 +56372,6 @@ TEXT ·mulAvxTwo_7x3_64(SB), $0-88 MOVQ 120(DX), R10 MOVQ 144(DX), DX MOVQ out_base+48(FP), R11 - MOVQ out_base+48(FP), R11 MOVQ (R11), R12 MOVQ 24(R11), R13 MOVQ 48(R11), R11 @@ -45265,6 +56804,151 @@ mulGFNI_7x3_64_loop: mulGFNI_7x3_64_end: RET +// func mulAvxGFNI_7x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x3(SB), $0-88 + // Loading 11 of 21 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R11 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R11 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, DX + +mulAvxGFNI_7x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x3_loop + VZEROUPPER + +mulAvxGFNI_7x3_end: + RET + // func mulGFNI_7x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x3_64Xor(SB), $0-88 @@ -45418,30 +57102,42 @@ mulGFNI_7x3_64Xor_loop: mulGFNI_7x3_64Xor_end: RET -// func mulAvxTwo_7x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_7x3Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_7x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x3Xor(SB), $0-88 + // Loading 11 of 21 tables to registers // Destination kept in GP registers - // Full registers estimated 50 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_7x3Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), DX - MOVQ out_base+48(FP), R11 - MOVQ (R11), R12 - MOVQ 24(R11), R13 - MOVQ 48(R11), R11 - MOVQ start+72(FP), R14 + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R11 + MOVQ start+72(FP), R14 // Add start offset to output ADDQ R14, R12 @@ -45449,189 +57145,114 @@ TEXT ·mulAvxTwo_7x3Xor(SB), NOSPLIT, $0-88 ADDQ R14, R11 // Add start offset to input - ADDQ R14, BX - ADDQ R14, SI - ADDQ R14, DI - ADDQ R14, R8 - ADDQ R14, R9 - ADDQ R14, R10 - ADDQ R14, DX - MOVQ $0x0000000f, R14 - MOVQ R14, X3 - VPBROADCASTB X3, Y3 + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, DX + +mulAvxGFNI_7x3Xor_loop: + // Load 3 outputs + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (R11), Y13 -mulAvxTwo_7x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (R12), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R13), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (R11), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (R10), Y6 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 6 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1152(CX), Y4 - VMOVDQU 1184(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1216(CX), Y4 - VMOVDQU 1248(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1280(CX), Y4 - VMOVDQU 1312(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 3 outputs - VMOVDQU Y0, (R12) + VMOVDQU Y11, (R12) ADDQ $0x20, R12 - VMOVDQU Y1, (R13) + VMOVDQU Y12, (R13) ADDQ $0x20, R13 - VMOVDQU Y2, (R11) + VMOVDQU Y13, (R11) ADDQ $0x20, R11 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_7x3Xor_loop + JNZ mulAvxGFNI_7x3Xor_loop VZEROUPPER -mulAvxTwo_7x3Xor_end: +mulAvxGFNI_7x3Xor_end: RET // func mulAvxTwo_7x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -45654,7 +57275,6 @@ TEXT ·mulAvxTwo_7x3_64Xor(SB), $0-88 MOVQ 120(DX), R10 MOVQ 144(DX), DX MOVQ out_base+48(FP), R11 - MOVQ out_base+48(FP), R11 MOVQ (R11), R12 MOVQ 24(R11), R13 MOVQ 48(R11), R11 @@ -46369,6 +57989,175 @@ mulGFNI_7x4_64_loop: mulGFNI_7x4_64_end: RET +// func mulAvxGFNI_7x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x4(SB), $0-88 + // Loading 10 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R11 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R11 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, DX + +mulAvxGFNI_7x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x4_loop + VZEROUPPER + +mulAvxGFNI_7x4_end: + RET + // func mulGFNI_7x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x4_64Xor(SB), $0-88 @@ -46546,6 +58335,185 @@ mulGFNI_7x4_64Xor_loop: mulGFNI_7x4_64Xor_end: RET +// func mulAvxGFNI_7x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x4Xor(SB), $0-88 + // Loading 10 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R11 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R11 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, DX + +mulAvxGFNI_7x4Xor_loop: + // Load 4 outputs + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x4Xor_loop + VZEROUPPER + +mulAvxGFNI_7x4Xor_end: + RET + // func mulAvxTwo_7x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x4Xor(SB), NOSPLIT, $0-88 @@ -47276,6 +59244,199 @@ mulGFNI_7x5_64_loop: mulGFNI_7x5_64_end: RET +// func mulAvxGFNI_7x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x5(SB), $8-88 + // Loading 9 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, DX + +mulAvxGFNI_7x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x5_loop + VZEROUPPER + +mulAvxGFNI_7x5_end: + RET + // func mulGFNI_7x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x5_64Xor(SB), $8-88 @@ -47471,6 +59632,211 @@ mulGFNI_7x5_64Xor_loop: mulGFNI_7x5_64Xor_end: RET +// func mulAvxGFNI_7x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x5Xor(SB), $8-88 + // Loading 9 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, DX + +mulAvxGFNI_7x5Xor_loop: + // Load 5 outputs + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x5Xor_loop + VZEROUPPER + +mulAvxGFNI_7x5Xor_end: + RET + // func mulAvxTwo_7x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x5Xor(SB), NOSPLIT, $8-88 @@ -48302,6 +60668,227 @@ mulGFNI_7x6_64_loop: mulGFNI_7x6_64_end: RET +// func mulAvxGFNI_7x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x6(SB), $8-88 + // Loading 8 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), AX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_7x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_7x6_loop + VZEROUPPER + +mulAvxGFNI_7x6_end: + RET + // func mulGFNI_7x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x6_64Xor(SB), $8-88 @@ -48519,6 +61106,241 @@ mulGFNI_7x6_64Xor_loop: mulGFNI_7x6_64Xor_end: RET +// func mulAvxGFNI_7x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x6Xor(SB), $8-88 + // Loading 8 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), AX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_7x6Xor_loop: + // Load 6 outputs + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_7x6Xor_loop + VZEROUPPER + +mulAvxGFNI_7x6Xor_end: + RET + // func mulAvxTwo_7x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x6Xor(SB), NOSPLIT, $8-88 @@ -49411,6 +62233,232 @@ mulGFNI_7x7_64_loop: mulGFNI_7x7_64_end: RET +// func mulAvxGFNI_7x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x7(SB), $0-88 + // Loading 7 of 49 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x7_loop + VZEROUPPER + +mulAvxGFNI_7x7_end: + RET + // func mulGFNI_7x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x7_64Xor(SB), $0-88 @@ -49634,6 +62682,255 @@ mulGFNI_7x7_64Xor_loop: mulGFNI_7x7_64Xor_end: RET +// func mulAvxGFNI_7x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x7Xor(SB), $0-88 + // Loading 7 of 49 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x7Xor_loop: + // Load 7 outputs + MOVQ (R11), R13 + VMOVDQU (R13)(R12*1), Y7 + MOVQ 24(R11), R13 + VMOVDQU (R13)(R12*1), Y8 + MOVQ 48(R11), R13 + VMOVDQU (R13)(R12*1), Y9 + MOVQ 72(R11), R13 + VMOVDQU (R13)(R12*1), Y10 + MOVQ 96(R11), R13 + VMOVDQU (R13)(R12*1), Y11 + MOVQ 120(R11), R13 + VMOVDQU (R13)(R12*1), Y12 + MOVQ 144(R11), R13 + VMOVDQU (R13)(R12*1), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x7Xor_loop + VZEROUPPER + +mulAvxGFNI_7x7Xor_end: + RET + // func mulAvxTwo_7x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x7Xor(SB), NOSPLIT, $0-88 @@ -50607,6 +63904,254 @@ mulGFNI_7x8_64_loop: mulGFNI_7x8_64_end: RET +// func mulAvxGFNI_7x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x8(SB), $0-88 + // Loading 6 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 66 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x8_loop + VZEROUPPER + +mulAvxGFNI_7x8_end: + RET + // func mulGFNI_7x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x8_64Xor(SB), $0-88 @@ -50847,6 +64392,280 @@ mulGFNI_7x8_64Xor_loop: mulGFNI_7x8_64Xor_end: RET +// func mulAvxGFNI_7x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x8Xor(SB), $0-88 + // Loading 6 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 66 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x8Xor_loop: + // Load 8 outputs + MOVQ (R11), R13 + VMOVDQU (R13)(R12*1), Y6 + MOVQ 24(R11), R13 + VMOVDQU (R13)(R12*1), Y7 + MOVQ 48(R11), R13 + VMOVDQU (R13)(R12*1), Y8 + MOVQ 72(R11), R13 + VMOVDQU (R13)(R12*1), Y9 + MOVQ 96(R11), R13 + VMOVDQU (R13)(R12*1), Y10 + MOVQ 120(R11), R13 + VMOVDQU (R13)(R12*1), Y11 + MOVQ 144(R11), R13 + VMOVDQU (R13)(R12*1), Y12 + MOVQ 168(R11), R13 + VMOVDQU (R13)(R12*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x8Xor_loop + VZEROUPPER + +mulAvxGFNI_7x8Xor_end: + RET + // func mulAvxTwo_7x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x8Xor(SB), NOSPLIT, $0-88 @@ -51910,6 +65729,276 @@ mulGFNI_7x9_64_loop: mulGFNI_7x9_64_end: RET +// func mulAvxGFNI_7x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x9(SB), $0-88 + // Loading 5 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R11), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x9_loop + VZEROUPPER + +mulAvxGFNI_7x9_end: + RET + // func mulGFNI_7x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x9_64Xor(SB), $0-88 @@ -52167,6 +66256,305 @@ mulGFNI_7x9_64Xor_loop: mulGFNI_7x9_64Xor_end: RET +// func mulAvxGFNI_7x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x9Xor(SB), $0-88 + // Loading 5 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x9Xor_loop: + // Load 9 outputs + MOVQ (R11), R13 + VMOVDQU (R13)(R12*1), Y5 + MOVQ 24(R11), R13 + VMOVDQU (R13)(R12*1), Y6 + MOVQ 48(R11), R13 + VMOVDQU (R13)(R12*1), Y7 + MOVQ 72(R11), R13 + VMOVDQU (R13)(R12*1), Y8 + MOVQ 96(R11), R13 + VMOVDQU (R13)(R12*1), Y9 + MOVQ 120(R11), R13 + VMOVDQU (R13)(R12*1), Y10 + MOVQ 144(R11), R13 + VMOVDQU (R13)(R12*1), Y11 + MOVQ 168(R11), R13 + VMOVDQU (R13)(R12*1), Y12 + MOVQ 192(R11), R13 + VMOVDQU (R13)(R12*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R11), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x9Xor_loop + VZEROUPPER + +mulAvxGFNI_7x9Xor_end: + RET + // func mulAvxTwo_7x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x9Xor(SB), NOSPLIT, $0-88 @@ -53320,6 +67708,298 @@ mulGFNI_7x10_64_loop: mulGFNI_7x10_64_end: RET +// func mulAvxGFNI_7x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x10(SB), $0-88 + // Loading 4 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R11), R13 + VMOVDQU Y4, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 216(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x10_loop + VZEROUPPER + +mulAvxGFNI_7x10_end: + RET + // func mulGFNI_7x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_7x10_64Xor(SB), $0-88 @@ -53594,6 +68274,330 @@ mulGFNI_7x10_64Xor_loop: mulGFNI_7x10_64Xor_end: RET +// func mulAvxGFNI_7x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x10Xor(SB), $0-88 + // Loading 4 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x10Xor_loop: + // Load 10 outputs + MOVQ (R11), R13 + VMOVDQU (R13)(R12*1), Y4 + MOVQ 24(R11), R13 + VMOVDQU (R13)(R12*1), Y5 + MOVQ 48(R11), R13 + VMOVDQU (R13)(R12*1), Y6 + MOVQ 72(R11), R13 + VMOVDQU (R13)(R12*1), Y7 + MOVQ 96(R11), R13 + VMOVDQU (R13)(R12*1), Y8 + MOVQ 120(R11), R13 + VMOVDQU (R13)(R12*1), Y9 + MOVQ 144(R11), R13 + VMOVDQU (R13)(R12*1), Y10 + MOVQ 168(R11), R13 + VMOVDQU (R13)(R12*1), Y11 + MOVQ 192(R11), R13 + VMOVDQU (R13)(R12*1), Y12 + MOVQ 216(R11), R13 + VMOVDQU (R13)(R12*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R11), R13 + VMOVDQU Y4, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 216(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x10Xor_loop + VZEROUPPER + +mulAvxGFNI_7x10Xor_end: + RET + // func mulAvxTwo_7x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_7x10Xor(SB), NOSPLIT, $0-88 @@ -54079,155 +69083,6 @@ mulAvxTwo_7x10Xor_loop: mulAvxTwo_7x10Xor_end: RET -// func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_8x1(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 20 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_8x1_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), DX - MOVQ out_base+48(FP), R12 - MOVQ (R12), R12 - MOVQ start+72(FP), R13 - - // Add start offset to output - ADDQ R13, R12 - - // Add start offset to input - ADDQ R13, BX - ADDQ R13, SI - ADDQ R13, DI - ADDQ R13, R8 - ADDQ R13, R9 - ADDQ R13, R10 - ADDQ R13, R11 - ADDQ R13, DX - MOVQ $0x0000000f, R13 - MOVQ R13, X1 - VPBROADCASTB X1, Y1 - -mulAvxTwo_8x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (BX), Y4 - ADDQ $0x20, BX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU (CX), Y2 - VMOVDQU 32(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - VPXOR Y2, Y3, Y0 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (SI), Y4 - ADDQ $0x20, SI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (DI), Y4 - ADDQ $0x20, DI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 128(CX), Y2 - VMOVDQU 160(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (R8), Y4 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 192(CX), Y2 - VMOVDQU 224(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R9), Y4 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 256(CX), Y2 - VMOVDQU 288(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (R10), Y4 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 320(CX), Y2 - VMOVDQU 352(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 6 to 1 outputs - VMOVDQU (R11), Y4 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 384(CX), Y2 - VMOVDQU 416(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 7 to 1 outputs - VMOVDQU (DX), Y4 - ADDQ $0x20, DX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 448(CX), Y2 - VMOVDQU 480(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Store 1 outputs - VMOVDQU Y0, (R12) - ADDQ $0x20, R12 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_8x1_loop - VZEROUPPER - -mulAvxTwo_8x1_end: - RET - // func mulAvxTwo_8x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x1_64(SB), $0-88 @@ -54249,7 +69104,6 @@ TEXT ·mulAvxTwo_8x1_64(SB), $0-88 MOVQ 144(DX), R11 MOVQ 168(DX), DX MOVQ out_base+48(FP), R12 - MOVQ out_base+48(FP), R12 MOVQ (R12), R12 MOVQ start+72(FP), R13 @@ -54541,6 +69395,112 @@ mulGFNI_8x1_64_loop: mulGFNI_8x1_64_end: RET +// func mulAvxGFNI_8x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R11 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R11 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, CX + +mulAvxGFNI_8x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y9 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y8 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y9 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y9 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y9 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y9 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y9 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y9 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y7, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Store 1 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x1_loop + VZEROUPPER + +mulAvxGFNI_8x1_end: + RET + // func mulGFNI_8x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x1_64Xor(SB), $0-88 @@ -54651,154 +69611,114 @@ mulGFNI_8x1_64Xor_loop: mulGFNI_8x1_64Xor_end: RET -// func mulAvxTwo_8x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_8x1Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_8x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x1Xor(SB), $0-88 + // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 20 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_8x1Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), DX - MOVQ out_base+48(FP), R12 - MOVQ (R12), R12 - MOVQ start+72(FP), R13 + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R11 + MOVQ start+72(FP), R12 // Add start offset to output - ADDQ R13, R12 + ADDQ R12, R11 // Add start offset to input - ADDQ R13, BX - ADDQ R13, SI - ADDQ R13, DI - ADDQ R13, R8 - ADDQ R13, R9 - ADDQ R13, R10 - ADDQ R13, R11 - ADDQ R13, DX - MOVQ $0x0000000f, R13 - MOVQ R13, X1 - VPBROADCASTB X1, Y1 + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, CX + +mulAvxGFNI_8x1Xor_loop: + // Load 1 outputs + VMOVDQU (R11), Y8 -mulAvxTwo_8x1Xor_loop: // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (BX), Y4 - ADDQ $0x20, BX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU (R12), Y0 - VMOVDQU (CX), Y2 - VMOVDQU 32(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (DX), Y9 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y9 + VXORPD Y8, Y9, Y8 // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (SI), Y4 - ADDQ $0x20, SI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (BX), Y9 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y9, Y9 + VXORPD Y8, Y9, Y8 // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (DI), Y4 - ADDQ $0x20, DI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 128(CX), Y2 - VMOVDQU 160(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (SI), Y9 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y9, Y9 + VXORPD Y8, Y9, Y8 // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (R8), Y4 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 192(CX), Y2 - VMOVDQU 224(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (DI), Y9 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y9, Y9 + VXORPD Y8, Y9, Y8 // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R9), Y4 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 256(CX), Y2 - VMOVDQU 288(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (R8), Y9 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y9 + VXORPD Y8, Y9, Y8 // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (R10), Y4 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 320(CX), Y2 - VMOVDQU 352(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (R9), Y9 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y9, Y9 + VXORPD Y8, Y9, Y8 // Load and process 32 bytes from input 6 to 1 outputs - VMOVDQU (R11), Y4 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 384(CX), Y2 - VMOVDQU 416(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (R10), Y9 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y9, Y9 + VXORPD Y8, Y9, Y8 // Load and process 32 bytes from input 7 to 1 outputs - VMOVDQU (DX), Y4 - ADDQ $0x20, DX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 448(CX), Y2 - VMOVDQU 480(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y7, Y9, Y9 + VXORPD Y8, Y9, Y8 // Store 1 outputs - VMOVDQU Y0, (R12) - ADDQ $0x20, R12 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_8x1Xor_loop + JNZ mulAvxGFNI_8x1Xor_loop VZEROUPPER -mulAvxTwo_8x1Xor_end: +mulAvxGFNI_8x1Xor_end: RET // func mulAvxTwo_8x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -54822,7 +69742,6 @@ TEXT ·mulAvxTwo_8x1_64Xor(SB), $0-88 MOVQ 144(DX), R11 MOVQ 168(DX), DX MOVQ out_base+48(FP), R12 - MOVQ out_base+48(FP), R12 MOVQ (R12), R12 MOVQ start+72(FP), R13 @@ -55012,199 +69931,6 @@ mulAvxTwo_8x1_64Xor_loop: mulAvxTwo_8x1_64Xor_end: RET -// func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_8x2(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 39 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_8x2_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), DX - MOVQ out_base+48(FP), R12 - MOVQ (R12), R13 - MOVQ 24(R12), R12 - MOVQ start+72(FP), R14 - - // Add start offset to output - ADDQ R14, R13 - ADDQ R14, R12 - - // Add start offset to input - ADDQ R14, BX - ADDQ R14, SI - ADDQ R14, DI - ADDQ R14, R8 - ADDQ R14, R9 - ADDQ R14, R10 - ADDQ R14, R11 - ADDQ R14, DX - MOVQ $0x0000000f, R14 - MOVQ R14, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_8x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y0 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y1 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (R10), Y5 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 6 to 2 outputs - VMOVDQU (R11), Y5 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 7 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R13) - ADDQ $0x20, R13 - VMOVDQU Y1, (R12) - ADDQ $0x20, R12 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_8x2_loop - VZEROUPPER - -mulAvxTwo_8x2_end: - RET - // func mulAvxTwo_8x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x2_64(SB), $0-88 @@ -55226,7 +69952,6 @@ TEXT ·mulAvxTwo_8x2_64(SB), $0-88 MOVQ 144(DX), R11 MOVQ 168(DX), DX MOVQ out_base+48(FP), R12 - MOVQ out_base+48(FP), R12 MOVQ (R12), R13 MOVQ 24(R12), R12 MOVQ start+72(FP), R14 @@ -55614,6 +70339,139 @@ mulGFNI_8x2_64_loop: mulGFNI_8x2_64_end: RET +// func mulAvxGFNI_8x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x2(SB), $0-88 + // Loading 12 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R12 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + ADDQ R14, R12 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, DX + +mulAvxGFNI_8x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x2_loop + VZEROUPPER + +mulAvxGFNI_8x2_end: + RET + // func mulGFNI_8x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x2_64Xor(SB), $0-88 @@ -55753,199 +70611,143 @@ mulGFNI_8x2_64Xor_loop: mulGFNI_8x2_64Xor_end: RET -// func mulAvxTwo_8x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_8x2Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_8x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x2Xor(SB), $0-88 + // Loading 12 of 16 tables to registers // Destination kept in GP registers - // Full registers estimated 39 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_8x2Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), DX - MOVQ out_base+48(FP), R12 - MOVQ (R12), R13 - MOVQ 24(R12), R12 - MOVQ start+72(FP), R14 + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R12 + MOVQ start+72(FP), R14 // Add start offset to output ADDQ R14, R13 ADDQ R14, R12 // Add start offset to input - ADDQ R14, BX - ADDQ R14, SI - ADDQ R14, DI - ADDQ R14, R8 - ADDQ R14, R9 - ADDQ R14, R10 - ADDQ R14, R11 - ADDQ R14, DX - MOVQ $0x0000000f, R14 - MOVQ R14, X2 - VPBROADCASTB X2, Y2 + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, DX + +mulAvxGFNI_8x2Xor_loop: + // Load 2 outputs + VMOVDQU (R13), Y12 + VMOVDQU (R12), Y13 -mulAvxTwo_8x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (R13), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU (R12), Y1 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (R10), Y5 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 6 to 2 outputs - VMOVDQU (R11), Y5 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 7 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 2 outputs - VMOVDQU Y0, (R13) + VMOVDQU Y12, (R13) ADDQ $0x20, R13 - VMOVDQU Y1, (R12) + VMOVDQU Y13, (R12) ADDQ $0x20, R12 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_8x2Xor_loop + JNZ mulAvxGFNI_8x2Xor_loop VZEROUPPER -mulAvxTwo_8x2Xor_end: +mulAvxGFNI_8x2Xor_end: RET // func mulAvxTwo_8x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -55969,7 +70771,6 @@ TEXT ·mulAvxTwo_8x2_64Xor(SB), $0-88 MOVQ 144(DX), R11 MOVQ 168(DX), DX MOVQ out_base+48(FP), R12 - MOVQ out_base+48(FP), R12 MOVQ (R12), R13 MOVQ 24(R12), R12 MOVQ start+72(FP), R14 @@ -56230,243 +71031,6 @@ mulAvxTwo_8x2_64Xor_loop: mulAvxTwo_8x2_64Xor_end: RET -// func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_8x3(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 56 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_8x3_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), DX - MOVQ out_base+48(FP), R12 - MOVQ (R12), R13 - MOVQ 24(R12), R14 - MOVQ 48(R12), R12 - MOVQ start+72(FP), R15 - - // Add start offset to output - ADDQ R15, R13 - ADDQ R15, R14 - ADDQ R15, R12 - - // Add start offset to input - ADDQ R15, BX - ADDQ R15, SI - ADDQ R15, DI - ADDQ R15, R8 - ADDQ R15, R9 - ADDQ R15, R10 - ADDQ R15, R11 - ADDQ R15, DX - MOVQ $0x0000000f, R15 - MOVQ R15, X3 - VPBROADCASTB X3, Y3 - -mulAvxTwo_8x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (R10), Y6 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 6 to 3 outputs - VMOVDQU (R11), Y6 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1152(CX), Y4 - VMOVDQU 1184(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1216(CX), Y4 - VMOVDQU 1248(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1280(CX), Y4 - VMOVDQU 1312(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 7 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1344(CX), Y4 - VMOVDQU 1376(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1408(CX), Y4 - VMOVDQU 1440(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1472(CX), Y4 - VMOVDQU 1504(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (R13) - ADDQ $0x20, R13 - VMOVDQU Y1, (R14) - ADDQ $0x20, R14 - VMOVDQU Y2, (R12) - ADDQ $0x20, R12 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_8x3_loop - VZEROUPPER - -mulAvxTwo_8x3_end: - RET - // func mulAvxTwo_8x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x3_64(SB), $0-88 @@ -56488,7 +71052,6 @@ TEXT ·mulAvxTwo_8x3_64(SB), $0-88 MOVQ 144(DX), R11 MOVQ 168(DX), DX MOVQ out_base+48(FP), R12 - MOVQ out_base+48(FP), R12 MOVQ (R12), R13 MOVQ 24(R12), R14 MOVQ 48(R12), R12 @@ -56972,6 +71535,166 @@ mulGFNI_8x3_64_loop: mulGFNI_8x3_64_end: RET +// func mulAvxGFNI_8x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x3(SB), $0-88 + // Loading 11 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R12 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R12 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, DX + +mulAvxGFNI_8x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x3_loop + VZEROUPPER + +mulAvxGFNI_8x3_end: + RET + // func mulGFNI_8x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x3_64Xor(SB), $0-88 @@ -57140,31 +71863,43 @@ mulGFNI_8x3_64Xor_loop: mulGFNI_8x3_64Xor_end: RET -// func mulAvxTwo_8x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_8x3Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_8x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x3Xor(SB), $0-88 + // Loading 11 of 24 tables to registers // Destination kept in GP registers - // Full registers estimated 56 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_8x3Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), DX - MOVQ out_base+48(FP), R12 - MOVQ (R12), R13 - MOVQ 24(R12), R14 - MOVQ 48(R12), R12 - MOVQ start+72(FP), R15 + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R12 + MOVQ start+72(FP), R15 // Add start offset to output ADDQ R15, R13 @@ -57172,212 +71907,128 @@ TEXT ·mulAvxTwo_8x3Xor(SB), NOSPLIT, $0-88 ADDQ R15, R12 // Add start offset to input - ADDQ R15, BX - ADDQ R15, SI - ADDQ R15, DI - ADDQ R15, R8 - ADDQ R15, R9 - ADDQ R15, R10 - ADDQ R15, R11 - ADDQ R15, DX - MOVQ $0x0000000f, R15 - MOVQ R15, X3 - VPBROADCASTB X3, Y3 + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, DX + +mulAvxGFNI_8x3Xor_loop: + // Load 3 outputs + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R12), Y13 -mulAvxTwo_8x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (R13), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R14), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (R12), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (R10), Y6 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 6 to 3 outputs - VMOVDQU (R11), Y6 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1152(CX), Y4 - VMOVDQU 1184(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1216(CX), Y4 - VMOVDQU 1248(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1280(CX), Y4 - VMOVDQU 1312(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 7 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1344(CX), Y4 - VMOVDQU 1376(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1408(CX), Y4 - VMOVDQU 1440(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1472(CX), Y4 - VMOVDQU 1504(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 3 outputs - VMOVDQU Y0, (R13) + VMOVDQU Y11, (R13) ADDQ $0x20, R13 - VMOVDQU Y1, (R14) + VMOVDQU Y12, (R14) ADDQ $0x20, R14 - VMOVDQU Y2, (R12) + VMOVDQU Y13, (R12) ADDQ $0x20, R12 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_8x3Xor_loop + JNZ mulAvxGFNI_8x3Xor_loop VZEROUPPER -mulAvxTwo_8x3Xor_end: +mulAvxGFNI_8x3Xor_end: RET // func mulAvxTwo_8x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -57401,7 +72052,6 @@ TEXT ·mulAvxTwo_8x3_64Xor(SB), $0-88 MOVQ 144(DX), R11 MOVQ 168(DX), DX MOVQ out_base+48(FP), R12 - MOVQ out_base+48(FP), R12 MOVQ (R12), R13 MOVQ 24(R12), R14 MOVQ 48(R12), R12 @@ -58195,6 +72845,193 @@ mulGFNI_8x4_64_loop: mulGFNI_8x4_64_end: RET +// func mulAvxGFNI_8x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x4(SB), $8-88 + // Loading 10 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, DX + +mulAvxGFNI_8x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x4_loop + VZEROUPPER + +mulAvxGFNI_8x4_end: + RET + // func mulGFNI_8x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x4_64Xor(SB), $8-88 @@ -58386,6 +73223,203 @@ mulGFNI_8x4_64Xor_loop: mulGFNI_8x4_64Xor_end: RET +// func mulAvxGFNI_8x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x4Xor(SB), $8-88 + // Loading 10 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, DX + +mulAvxGFNI_8x4Xor_loop: + // Load 4 outputs + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R12), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x4Xor_loop + VZEROUPPER + +mulAvxGFNI_8x4Xor_end: + RET + // func mulAvxTwo_8x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x4Xor(SB), NOSPLIT, $8-88 @@ -59201,6 +74235,224 @@ mulGFNI_8x5_64_loop: mulGFNI_8x5_64_end: RET +// func mulAvxGFNI_8x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x5(SB), $8-88 + // Loading 9 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), AX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_8x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_8x5_loop + VZEROUPPER + +mulAvxGFNI_8x5_end: + RET + // func mulGFNI_8x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x5_64Xor(SB), $8-88 @@ -59416,6 +74668,236 @@ mulGFNI_8x5_64Xor_loop: mulGFNI_8x5_64Xor_end: RET +// func mulAvxGFNI_8x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x5Xor(SB), $8-88 + // Loading 9 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), AX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_8x5Xor_loop: + // Load 5 outputs + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_8x5Xor_loop + VZEROUPPER + +mulAvxGFNI_8x5Xor_end: + RET + // func mulAvxTwo_8x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x5Xor(SB), NOSPLIT, $8-88 @@ -60308,6 +75790,234 @@ mulGFNI_8x6_64_loop: mulGFNI_8x6_64_end: RET +// func mulAvxGFNI_8x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x6(SB), $0-88 + // Loading 8 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x6_loop + VZEROUPPER + +mulAvxGFNI_8x6_end: + RET + // func mulGFNI_8x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x6_64Xor(SB), $0-88 @@ -60532,6 +76242,254 @@ mulGFNI_8x6_64Xor_loop: mulGFNI_8x6_64Xor_end: RET +// func mulAvxGFNI_8x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x6Xor(SB), $0-88 + // Loading 8 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x6Xor_loop: + // Load 6 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x6Xor_loop + VZEROUPPER + +mulAvxGFNI_8x6Xor_end: + RET + // func mulAvxTwo_8x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x6Xor(SB), NOSPLIT, $0-88 @@ -61518,6 +77476,259 @@ mulGFNI_8x7_64_loop: mulGFNI_8x7_64_end: RET +// func mulAvxGFNI_8x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x7(SB), $0-88 + // Loading 7 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x7_loop + VZEROUPPER + +mulAvxGFNI_8x7_end: + RET + // func mulGFNI_8x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x7_64Xor(SB), $0-88 @@ -61761,6 +77972,282 @@ mulGFNI_8x7_64Xor_loop: mulGFNI_8x7_64Xor_end: RET +// func mulAvxGFNI_8x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x7Xor(SB), $0-88 + // Loading 7 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x7Xor_loop: + // Load 7 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y7 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 144(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x7Xor_loop + VZEROUPPER + +mulAvxGFNI_8x7Xor_end: + RET + // func mulAvxTwo_8x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x7Xor(SB), NOSPLIT, $0-88 @@ -62849,6 +79336,284 @@ mulGFNI_8x8_64_loop: mulGFNI_8x8_64_end: RET +// func mulAvxGFNI_8x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x8(SB), $0-88 + // Loading 6 of 64 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x8_loop + VZEROUPPER + +mulAvxGFNI_8x8_end: + RET + // func mulGFNI_8x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x8_64Xor(SB), $0-88 @@ -63111,6 +79876,310 @@ mulGFNI_8x8_64Xor_loop: mulGFNI_8x8_64Xor_end: RET +// func mulAvxGFNI_8x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x8Xor(SB), $0-88 + // Loading 6 of 64 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x8Xor_loop: + // Load 8 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y6 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y7 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 144(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 168(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x8Xor_loop + VZEROUPPER + +mulAvxGFNI_8x8Xor_end: + RET + // func mulAvxTwo_8x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x8Xor(SB), NOSPLIT, $0-88 @@ -64301,6 +81370,309 @@ mulGFNI_8x9_64_loop: mulGFNI_8x9_64_end: RET +// func mulAvxGFNI_8x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x9(SB), $0-88 + // Loading 5 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 83 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R12), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x9_loop + VZEROUPPER + +mulAvxGFNI_8x9_end: + RET + // func mulGFNI_8x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x9_64Xor(SB), $0-88 @@ -64582,6 +81954,338 @@ mulGFNI_8x9_64Xor_loop: mulGFNI_8x9_64Xor_end: RET +// func mulAvxGFNI_8x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x9Xor(SB), $0-88 + // Loading 5 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 83 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x9Xor_loop: + // Load 9 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y5 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y6 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y7 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 144(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 168(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 192(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R12), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x9Xor_loop + VZEROUPPER + +mulAvxGFNI_8x9Xor_end: + RET + // func mulAvxTwo_8x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x9Xor(SB), NOSPLIT, $0-88 @@ -65874,6 +83578,334 @@ mulGFNI_8x10_64_loop: mulGFNI_8x10_64_end: RET +// func mulAvxGFNI_8x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x10(SB), $0-88 + // Loading 4 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R12), R14 + VMOVDQU Y4, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 216(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x10_loop + VZEROUPPER + +mulAvxGFNI_8x10_end: + RET + // func mulGFNI_8x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_8x10_64Xor(SB), $0-88 @@ -66174,6 +84206,366 @@ mulGFNI_8x10_64Xor_loop: mulGFNI_8x10_64Xor_end: RET +// func mulAvxGFNI_8x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x10Xor(SB), $0-88 + // Loading 4 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x10Xor_loop: + // Load 10 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y4 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y5 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y6 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y7 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 144(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 168(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 192(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 216(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R12), R14 + VMOVDQU Y4, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 216(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x10Xor_loop + VZEROUPPER + +mulAvxGFNI_8x10Xor_end: + RET + // func mulAvxTwo_8x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_8x10Xor(SB), NOSPLIT, $0-88 @@ -66718,169 +85110,6 @@ mulAvxTwo_8x10Xor_loop: mulAvxTwo_8x10Xor_end: RET -// func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_9x1(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 22 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_9x1_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), DX - MOVQ out_base+48(FP), R13 - MOVQ (R13), R13 - MOVQ start+72(FP), R14 - - // Add start offset to output - ADDQ R14, R13 - - // Add start offset to input - ADDQ R14, BX - ADDQ R14, SI - ADDQ R14, DI - ADDQ R14, R8 - ADDQ R14, R9 - ADDQ R14, R10 - ADDQ R14, R11 - ADDQ R14, R12 - ADDQ R14, DX - MOVQ $0x0000000f, R14 - MOVQ R14, X1 - VPBROADCASTB X1, Y1 - -mulAvxTwo_9x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (BX), Y4 - ADDQ $0x20, BX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU (CX), Y2 - VMOVDQU 32(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - VPXOR Y2, Y3, Y0 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (SI), Y4 - ADDQ $0x20, SI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (DI), Y4 - ADDQ $0x20, DI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 128(CX), Y2 - VMOVDQU 160(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (R8), Y4 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 192(CX), Y2 - VMOVDQU 224(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R9), Y4 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 256(CX), Y2 - VMOVDQU 288(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (R10), Y4 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 320(CX), Y2 - VMOVDQU 352(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 6 to 1 outputs - VMOVDQU (R11), Y4 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 384(CX), Y2 - VMOVDQU 416(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 7 to 1 outputs - VMOVDQU (R12), Y4 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 448(CX), Y2 - VMOVDQU 480(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 8 to 1 outputs - VMOVDQU (DX), Y4 - ADDQ $0x20, DX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 512(CX), Y2 - VMOVDQU 544(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Store 1 outputs - VMOVDQU Y0, (R13) - ADDQ $0x20, R13 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_9x1_loop - VZEROUPPER - -mulAvxTwo_9x1_end: - RET - // func mulAvxTwo_9x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x1_64(SB), $0-88 @@ -66903,7 +85132,6 @@ TEXT ·mulAvxTwo_9x1_64(SB), $0-88 MOVQ 168(DX), R12 MOVQ 192(DX), DX MOVQ out_base+48(FP), R13 - MOVQ out_base+48(FP), R13 MOVQ (R13), R13 MOVQ start+72(FP), R14 @@ -67224,6 +85452,121 @@ mulGFNI_9x1_64_loop: mulGFNI_9x1_64_end: RET +// func mulAvxGFNI_9x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R12 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, CX + +mulAvxGFNI_9x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y10 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y9 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y10 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y10 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y10 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y10 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y10 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y10 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R11), Y10 + ADDQ $0x20, R11 + VGF2P8AFFINEQB $0x00, Y7, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y8, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Store 1 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x1_loop + VZEROUPPER + +mulAvxGFNI_9x1_end: + RET + // func mulGFNI_9x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x1_64Xor(SB), $0-88 @@ -67343,17 +85686,136 @@ mulGFNI_9x1_64Xor_loop: mulGFNI_9x1_64Xor_end: RET -// func mulAvxTwo_9x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// func mulAvxGFNI_9x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R12 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, CX + +mulAvxGFNI_9x1Xor_loop: + // Load 1 outputs + VMOVDQU (R12), Y9 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y10 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y10 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y10 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y10 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y10 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y10 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y10 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R11), Y10 + ADDQ $0x20, R11 + VGF2P8AFFINEQB $0x00, Y7, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y8, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Store 1 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x1Xor_loop + VZEROUPPER + +mulAvxGFNI_9x1Xor_end: + RET + +// func mulAvxTwo_9x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_9x1Xor(SB), NOSPLIT, $0-88 +TEXT ·mulAvxTwo_9x1_64Xor(SB), $0-88 // Loading no tables to registers // Destination kept in GP registers - // Full registers estimated 22 YMM used + // Full registers estimated 42 YMM used MOVQ n+80(FP), AX MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX + SHRQ $0x06, AX TESTQ AX, AX - JZ mulAvxTwo_9x1Xor_end + JZ mulAvxTwo_9x1_64Xor_end MOVQ in_base+24(FP), DX MOVQ (DX), BX MOVQ 24(DX), SI @@ -67371,171 +85833,6 @@ TEXT ·mulAvxTwo_9x1Xor(SB), NOSPLIT, $0-88 // Add start offset to output ADDQ R14, R13 - // Add start offset to input - ADDQ R14, BX - ADDQ R14, SI - ADDQ R14, DI - ADDQ R14, R8 - ADDQ R14, R9 - ADDQ R14, R10 - ADDQ R14, R11 - ADDQ R14, R12 - ADDQ R14, DX - MOVQ $0x0000000f, R14 - MOVQ R14, X1 - VPBROADCASTB X1, Y1 - -mulAvxTwo_9x1Xor_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (BX), Y4 - ADDQ $0x20, BX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU (R13), Y0 - VMOVDQU (CX), Y2 - VMOVDQU 32(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (SI), Y4 - ADDQ $0x20, SI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (DI), Y4 - ADDQ $0x20, DI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 128(CX), Y2 - VMOVDQU 160(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (R8), Y4 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 192(CX), Y2 - VMOVDQU 224(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R9), Y4 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 256(CX), Y2 - VMOVDQU 288(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (R10), Y4 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 320(CX), Y2 - VMOVDQU 352(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 6 to 1 outputs - VMOVDQU (R11), Y4 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 384(CX), Y2 - VMOVDQU 416(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 7 to 1 outputs - VMOVDQU (R12), Y4 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 448(CX), Y2 - VMOVDQU 480(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 8 to 1 outputs - VMOVDQU (DX), Y4 - ADDQ $0x20, DX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 512(CX), Y2 - VMOVDQU 544(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Store 1 outputs - VMOVDQU Y0, (R13) - ADDQ $0x20, R13 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_9x1Xor_loop - VZEROUPPER - -mulAvxTwo_9x1Xor_end: - RET - -// func mulAvxTwo_9x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_9x1_64Xor(SB), $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 42 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x06, AX - TESTQ AX, AX - JZ mulAvxTwo_9x1_64Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), DX - MOVQ out_base+48(FP), R13 - MOVQ out_base+48(FP), R13 - MOVQ (R13), R13 - MOVQ start+72(FP), R14 - - // Add start offset to output - ADDQ R14, R13 - // Add start offset to input ADDQ R14, BX ADDQ R14, SI @@ -67739,218 +86036,6 @@ mulAvxTwo_9x1_64Xor_loop: mulAvxTwo_9x1_64Xor_end: RET -// func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_9x2(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 43 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_9x2_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), DX - MOVQ out_base+48(FP), R13 - MOVQ (R13), R14 - MOVQ 24(R13), R13 - MOVQ start+72(FP), R15 - - // Add start offset to output - ADDQ R15, R14 - ADDQ R15, R13 - - // Add start offset to input - ADDQ R15, BX - ADDQ R15, SI - ADDQ R15, DI - ADDQ R15, R8 - ADDQ R15, R9 - ADDQ R15, R10 - ADDQ R15, R11 - ADDQ R15, R12 - ADDQ R15, DX - MOVQ $0x0000000f, R15 - MOVQ R15, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_9x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y0 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y1 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (R10), Y5 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 6 to 2 outputs - VMOVDQU (R11), Y5 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 7 to 2 outputs - VMOVDQU (R12), Y5 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 8 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 1024(CX), Y3 - VMOVDQU 1056(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 1088(CX), Y3 - VMOVDQU 1120(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R14) - ADDQ $0x20, R14 - VMOVDQU Y1, (R13) - ADDQ $0x20, R13 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_9x2_loop - VZEROUPPER - -mulAvxTwo_9x2_end: - RET - // func mulAvxTwo_9x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x2_64(SB), $0-88 @@ -67973,7 +86058,6 @@ TEXT ·mulAvxTwo_9x2_64(SB), $0-88 MOVQ 168(DX), R12 MOVQ 192(DX), DX MOVQ out_base+48(FP), R13 - MOVQ out_base+48(FP), R13 MOVQ (R13), R14 MOVQ 24(R13), R13 MOVQ start+72(FP), R15 @@ -68401,6 +86485,151 @@ mulGFNI_9x2_64_loop: mulGFNI_9x2_64_end: RET +// func mulAvxGFNI_9x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x2(SB), $0-88 + // Loading 12 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R13 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R14 + ADDQ R15, R13 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, DX + +mulAvxGFNI_9x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x2_loop + VZEROUPPER + +mulAvxGFNI_9x2_end: + RET + // func mulGFNI_9x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x2_64Xor(SB), $0-88 @@ -68552,218 +86781,155 @@ mulGFNI_9x2_64Xor_loop: mulGFNI_9x2_64Xor_end: RET -// func mulAvxTwo_9x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_9x2Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_9x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x2Xor(SB), $0-88 + // Loading 12 of 18 tables to registers // Destination kept in GP registers - // Full registers estimated 43 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_9x2Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), DX - MOVQ out_base+48(FP), R13 - MOVQ (R13), R14 - MOVQ 24(R13), R13 - MOVQ start+72(FP), R15 + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R13 + MOVQ start+72(FP), R15 // Add start offset to output ADDQ R15, R14 ADDQ R15, R13 // Add start offset to input - ADDQ R15, BX - ADDQ R15, SI - ADDQ R15, DI - ADDQ R15, R8 - ADDQ R15, R9 - ADDQ R15, R10 - ADDQ R15, R11 - ADDQ R15, R12 - ADDQ R15, DX - MOVQ $0x0000000f, R15 - MOVQ R15, X2 - VPBROADCASTB X2, Y2 + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, DX + +mulAvxGFNI_9x2Xor_loop: + // Load 2 outputs + VMOVDQU (R14), Y12 + VMOVDQU (R13), Y13 -mulAvxTwo_9x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (R14), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU (R13), Y1 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (R10), Y5 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 6 to 2 outputs - VMOVDQU (R11), Y5 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 7 to 2 outputs - VMOVDQU (R12), Y5 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 8 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 1024(CX), Y3 - VMOVDQU 1056(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 1088(CX), Y3 - VMOVDQU 1120(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 2 outputs - VMOVDQU Y0, (R14) + VMOVDQU Y12, (R14) ADDQ $0x20, R14 - VMOVDQU Y1, (R13) + VMOVDQU Y13, (R13) ADDQ $0x20, R13 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_9x2Xor_loop + JNZ mulAvxGFNI_9x2Xor_loop VZEROUPPER -mulAvxTwo_9x2Xor_end: +mulAvxGFNI_9x2Xor_end: RET // func mulAvxTwo_9x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -68788,7 +86954,6 @@ TEXT ·mulAvxTwo_9x2_64Xor(SB), $0-88 MOVQ 168(DX), R12 MOVQ 192(DX), DX MOVQ out_base+48(FP), R13 - MOVQ out_base+48(FP), R13 MOVQ (R13), R14 MOVQ 24(R13), R13 MOVQ start+72(FP), R15 @@ -69077,267 +87242,6 @@ mulAvxTwo_9x2_64Xor_loop: mulAvxTwo_9x2_64Xor_end: RET -// func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_9x3(SB), NOSPLIT, $8-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 62 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_9x3_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), DX - MOVQ out_base+48(FP), R13 - MOVQ (R13), R14 - MOVQ 24(R13), R15 - MOVQ 48(R13), R13 - MOVQ start+72(FP), BP - - // Add start offset to output - ADDQ BP, R14 - ADDQ BP, R15 - ADDQ BP, R13 - - // Add start offset to input - ADDQ BP, BX - ADDQ BP, SI - ADDQ BP, DI - ADDQ BP, R8 - ADDQ BP, R9 - ADDQ BP, R10 - ADDQ BP, R11 - ADDQ BP, R12 - ADDQ BP, DX - MOVQ $0x0000000f, BP - MOVQ BP, X3 - VPBROADCASTB X3, Y3 - -mulAvxTwo_9x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (R10), Y6 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 6 to 3 outputs - VMOVDQU (R11), Y6 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1152(CX), Y4 - VMOVDQU 1184(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1216(CX), Y4 - VMOVDQU 1248(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1280(CX), Y4 - VMOVDQU 1312(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 7 to 3 outputs - VMOVDQU (R12), Y6 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1344(CX), Y4 - VMOVDQU 1376(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1408(CX), Y4 - VMOVDQU 1440(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1472(CX), Y4 - VMOVDQU 1504(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 8 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1536(CX), Y4 - VMOVDQU 1568(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1600(CX), Y4 - VMOVDQU 1632(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1664(CX), Y4 - VMOVDQU 1696(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (R14) - ADDQ $0x20, R14 - VMOVDQU Y1, (R15) - ADDQ $0x20, R15 - VMOVDQU Y2, (R13) - ADDQ $0x20, R13 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_9x3_loop - VZEROUPPER - -mulAvxTwo_9x3_end: - RET - // func mulAvxTwo_9x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x3_64(SB), $8-88 @@ -69360,7 +87264,6 @@ TEXT ·mulAvxTwo_9x3_64(SB), $8-88 MOVQ 168(DX), R12 MOVQ 192(DX), DX MOVQ out_base+48(FP), R13 - MOVQ out_base+48(FP), R13 MOVQ (R13), R14 MOVQ 24(R13), R15 MOVQ 48(R13), R13 @@ -69895,6 +87798,181 @@ mulGFNI_9x3_64_loop: mulGFNI_9x3_64_end: RET +// func mulAvxGFNI_9x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x3(SB), $8-88 + // Loading 11 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R13 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, DX + +mulAvxGFNI_9x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x3_loop + VZEROUPPER + +mulAvxGFNI_9x3_end: + RET + // func mulGFNI_9x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x3_64Xor(SB), $0-88 @@ -70078,32 +88156,44 @@ mulGFNI_9x3_64Xor_loop: mulGFNI_9x3_64Xor_end: RET -// func mulAvxTwo_9x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_9x3Xor(SB), NOSPLIT, $8-88 - // Loading no tables to registers +// func mulAvxGFNI_9x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x3Xor(SB), $8-88 + // Loading 11 of 27 tables to registers // Destination kept in GP registers - // Full registers estimated 62 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_9x3Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), DX - MOVQ out_base+48(FP), R13 - MOVQ (R13), R14 - MOVQ 24(R13), R15 - MOVQ 48(R13), R13 - MOVQ start+72(FP), BP + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP // Add start offset to output ADDQ BP, R14 @@ -70111,235 +88201,142 @@ TEXT ·mulAvxTwo_9x3Xor(SB), NOSPLIT, $8-88 ADDQ BP, R13 // Add start offset to input - ADDQ BP, BX - ADDQ BP, SI - ADDQ BP, DI - ADDQ BP, R8 - ADDQ BP, R9 - ADDQ BP, R10 - ADDQ BP, R11 - ADDQ BP, R12 - ADDQ BP, DX - MOVQ $0x0000000f, BP - MOVQ BP, X3 - VPBROADCASTB X3, Y3 + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, DX + +mulAvxGFNI_9x3Xor_loop: + // Load 3 outputs + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R13), Y13 -mulAvxTwo_9x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (R14), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R15), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (R13), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (R10), Y6 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 6 to 3 outputs - VMOVDQU (R11), Y6 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1152(CX), Y4 - VMOVDQU 1184(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1216(CX), Y4 - VMOVDQU 1248(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1280(CX), Y4 - VMOVDQU 1312(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 7 to 3 outputs - VMOVDQU (R12), Y6 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1344(CX), Y4 - VMOVDQU 1376(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1408(CX), Y4 - VMOVDQU 1440(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1472(CX), Y4 - VMOVDQU 1504(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 8 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1536(CX), Y4 - VMOVDQU 1568(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1600(CX), Y4 - VMOVDQU 1632(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1664(CX), Y4 - VMOVDQU 1696(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 3 outputs - VMOVDQU Y0, (R14) + VMOVDQU Y11, (R14) ADDQ $0x20, R14 - VMOVDQU Y1, (R15) + VMOVDQU Y12, (R15) ADDQ $0x20, R15 - VMOVDQU Y2, (R13) + VMOVDQU Y13, (R13) ADDQ $0x20, R13 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_9x3Xor_loop + JNZ mulAvxGFNI_9x3Xor_loop VZEROUPPER -mulAvxTwo_9x3Xor_end: +mulAvxGFNI_9x3Xor_end: RET // func mulAvxTwo_9x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -70364,7 +88361,6 @@ TEXT ·mulAvxTwo_9x3_64Xor(SB), $8-88 MOVQ 168(DX), R12 MOVQ 192(DX), DX MOVQ out_base+48(FP), R13 - MOVQ out_base+48(FP), R13 MOVQ (R13), R14 MOVQ 24(R13), R15 MOVQ 48(R13), R13 @@ -71243,6 +89239,215 @@ mulGFNI_9x4_64_loop: mulGFNI_9x4_64_end: RET +// func mulAvxGFNI_9x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x4(SB), $8-88 + // Loading 10 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), AX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_9x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_9x4_loop + VZEROUPPER + +mulAvxGFNI_9x4_end: + RET + // func mulGFNI_9x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x4_64Xor(SB), $8-88 @@ -71452,6 +89657,225 @@ mulGFNI_9x4_64Xor_loop: mulGFNI_9x4_64Xor_end: RET +// func mulAvxGFNI_9x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x4Xor(SB), $8-88 + // Loading 10 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), AX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_9x4Xor_loop: + // Load 4 outputs + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R12), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_9x4Xor_loop + VZEROUPPER + +mulAvxGFNI_9x4Xor_end: + RET + // func mulAvxTwo_9x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x4Xor(SB), NOSPLIT, $8-88 @@ -72320,6 +90744,230 @@ mulGFNI_9x5_64_loop: mulGFNI_9x5_64_end: RET +// func mulAvxGFNI_9x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x5(SB), $0-88 + // Loading 9 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + MOVQ (R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x5_loop + VZEROUPPER + +mulAvxGFNI_9x5_end: + RET + // func mulGFNI_9x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x5_64Xor(SB), $0-88 @@ -72541,6 +91189,247 @@ mulGFNI_9x5_64Xor_loop: mulGFNI_9x5_64Xor_end: RET +// func mulAvxGFNI_9x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x5Xor(SB), $0-88 + // Loading 9 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x5Xor_loop: + // Load 5 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + MOVQ (R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x5Xor_loop + VZEROUPPER + +mulAvxGFNI_9x5Xor_end: + RET + // func mulAvxTwo_9x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x5Xor(SB), NOSPLIT, $0-88 @@ -73516,6 +92405,258 @@ mulGFNI_9x6_64_loop: mulGFNI_9x6_64_end: RET +// func mulAvxGFNI_9x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x6(SB), $0-88 + // Loading 8 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x6_loop + VZEROUPPER + +mulAvxGFNI_9x6_end: + RET + // func mulGFNI_9x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x6_64Xor(SB), $0-88 @@ -73758,6 +92899,278 @@ mulGFNI_9x6_64Xor_loop: mulGFNI_9x6_64Xor_end: RET +// func mulAvxGFNI_9x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x6Xor(SB), $0-88 + // Loading 8 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x6Xor_loop: + // Load 6 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x6Xor_loop + VZEROUPPER + +mulAvxGFNI_9x6Xor_end: + RET + // func mulAvxTwo_9x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x6Xor(SB), NOSPLIT, $0-88 @@ -74847,6 +94260,286 @@ mulGFNI_9x7_64_loop: mulGFNI_9x7_64_end: RET +// func mulAvxGFNI_9x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x7(SB), $0-88 + // Loading 7 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x7_loop + VZEROUPPER + +mulAvxGFNI_9x7_end: + RET + // func mulGFNI_9x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x7_64Xor(SB), $0-88 @@ -75110,6 +94803,309 @@ mulGFNI_9x7_64Xor_loop: mulGFNI_9x7_64Xor_end: RET +// func mulAvxGFNI_9x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x7Xor(SB), $0-88 + // Loading 7 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x7Xor_loop: + // Load 7 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y7 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 144(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x7Xor_loop + VZEROUPPER + +mulAvxGFNI_9x7Xor_end: + RET + // func mulAvxTwo_9x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x7Xor(SB), NOSPLIT, $0-88 @@ -76313,6 +96309,314 @@ mulGFNI_9x8_64_loop: mulGFNI_9x8_64_end: RET +// func mulAvxGFNI_9x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x8(SB), $0-88 + // Loading 6 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x8_loop + VZEROUPPER + +mulAvxGFNI_9x8_end: + RET + // func mulGFNI_9x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x8_64Xor(SB), $0-88 @@ -76597,6 +96901,340 @@ mulGFNI_9x8_64Xor_loop: mulGFNI_9x8_64Xor_end: RET +// func mulAvxGFNI_9x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x8Xor(SB), $0-88 + // Loading 6 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x8Xor_loop: + // Load 8 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y6 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y7 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 144(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 168(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x8Xor_loop + VZEROUPPER + +mulAvxGFNI_9x8Xor_end: + RET + // func mulAvxTwo_9x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x8Xor(SB), NOSPLIT, $0-88 @@ -77914,6 +98552,342 @@ mulGFNI_9x9_64_loop: mulGFNI_9x9_64_end: RET +// func mulAvxGFNI_9x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x9(SB), $0-88 + // Loading 5 of 81 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R13), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x9_loop + VZEROUPPER + +mulAvxGFNI_9x9_end: + RET + // func mulGFNI_9x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x9_64Xor(SB), $0-88 @@ -78219,6 +99193,371 @@ mulGFNI_9x9_64Xor_loop: mulGFNI_9x9_64Xor_end: RET +// func mulAvxGFNI_9x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x9Xor(SB), $0-88 + // Loading 5 of 81 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x9Xor_loop: + // Load 9 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y5 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y6 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y7 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 144(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 168(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 192(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R13), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x9Xor_loop + VZEROUPPER + +mulAvxGFNI_9x9Xor_end: + RET + // func mulAvxTwo_9x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x9Xor(SB), NOSPLIT, $0-88 @@ -79650,6 +100989,370 @@ mulGFNI_9x10_64_loop: mulGFNI_9x10_64_end: RET +// func mulAvxGFNI_9x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x10(SB), $0-88 + // Loading 4 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 102 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R13), R15 + VMOVDQU Y4, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 216(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x10_loop + VZEROUPPER + +mulAvxGFNI_9x10_end: + RET + // func mulGFNI_9x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_9x10_64Xor(SB), $0-88 @@ -79976,6 +101679,402 @@ mulGFNI_9x10_64Xor_loop: mulGFNI_9x10_64Xor_end: RET +// func mulAvxGFNI_9x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x10Xor(SB), $0-88 + // Loading 4 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 102 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x10Xor_loop: + // Load 10 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y4 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y5 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y6 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y7 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 144(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 168(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 192(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 216(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R13), R15 + VMOVDQU Y4, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 216(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x10Xor_loop + VZEROUPPER + +mulAvxGFNI_9x10Xor_end: + RET + // func mulAvxTwo_9x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_9x10Xor(SB), NOSPLIT, $0-88 @@ -80579,183 +102678,6 @@ mulAvxTwo_9x10Xor_loop: mulAvxTwo_9x10Xor_end: RET -// func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_10x1(SB), NOSPLIT, $0-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 24 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_10x1_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), R13 - MOVQ 216(DX), DX - MOVQ out_base+48(FP), R14 - MOVQ (R14), R14 - MOVQ start+72(FP), R15 - - // Add start offset to output - ADDQ R15, R14 - - // Add start offset to input - ADDQ R15, BX - ADDQ R15, SI - ADDQ R15, DI - ADDQ R15, R8 - ADDQ R15, R9 - ADDQ R15, R10 - ADDQ R15, R11 - ADDQ R15, R12 - ADDQ R15, R13 - ADDQ R15, DX - MOVQ $0x0000000f, R15 - MOVQ R15, X1 - VPBROADCASTB X1, Y1 - -mulAvxTwo_10x1_loop: - // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (BX), Y4 - ADDQ $0x20, BX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU (CX), Y2 - VMOVDQU 32(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - VPXOR Y2, Y3, Y0 - - // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (SI), Y4 - ADDQ $0x20, SI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (DI), Y4 - ADDQ $0x20, DI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 128(CX), Y2 - VMOVDQU 160(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (R8), Y4 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 192(CX), Y2 - VMOVDQU 224(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R9), Y4 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 256(CX), Y2 - VMOVDQU 288(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (R10), Y4 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 320(CX), Y2 - VMOVDQU 352(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 6 to 1 outputs - VMOVDQU (R11), Y4 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 384(CX), Y2 - VMOVDQU 416(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 7 to 1 outputs - VMOVDQU (R12), Y4 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 448(CX), Y2 - VMOVDQU 480(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 8 to 1 outputs - VMOVDQU (R13), Y4 - ADDQ $0x20, R13 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 512(CX), Y2 - VMOVDQU 544(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Load and process 32 bytes from input 9 to 1 outputs - VMOVDQU (DX), Y4 - ADDQ $0x20, DX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 576(CX), Y2 - VMOVDQU 608(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) - - // Store 1 outputs - VMOVDQU Y0, (R14) - ADDQ $0x20, R14 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_10x1_loop - VZEROUPPER - -mulAvxTwo_10x1_end: - RET - // func mulAvxTwo_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x1_64(SB), $0-88 @@ -80779,7 +102701,6 @@ TEXT ·mulAvxTwo_10x1_64(SB), $0-88 MOVQ 192(DX), R13 MOVQ 216(DX), DX MOVQ out_base+48(FP), R14 - MOVQ out_base+48(FP), R14 MOVQ (R14), R14 MOVQ start+72(FP), R15 @@ -81129,6 +103050,130 @@ mulGFNI_10x1_64_loop: mulGFNI_10x1_64_end: RET +// func mulAvxGFNI_10x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 13 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), R12 + MOVQ 216(CX), CX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R13 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, CX + +mulAvxGFNI_10x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y11 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y11, Y10 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y11 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y11 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y11 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y11 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y11 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y11 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R11), Y11 + ADDQ $0x20, R11 + VGF2P8AFFINEQB $0x00, Y7, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (R12), Y11 + ADDQ $0x20, R12 + VGF2P8AFFINEQB $0x00, Y8, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 9 to 1 outputs + VMOVDQU (CX), Y11 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y9, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Store 1 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_10x1_loop + VZEROUPPER + +mulAvxGFNI_10x1_end: + RET + // func mulGFNI_10x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x1_64Xor(SB), $0-88 @@ -81257,182 +103302,132 @@ mulGFNI_10x1_64Xor_loop: mulGFNI_10x1_64Xor_end: RET -// func mulAvxTwo_10x1Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_10x1Xor(SB), NOSPLIT, $0-88 - // Loading no tables to registers +// func mulAvxGFNI_10x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x1Xor(SB), $0-88 + // Loading all tables to registers // Destination kept in GP registers - // Full registers estimated 24 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_10x1Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), R13 - MOVQ 216(DX), DX - MOVQ out_base+48(FP), R14 - MOVQ (R14), R14 - MOVQ start+72(FP), R15 + // Full registers estimated 13 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), R12 + MOVQ 216(CX), CX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R13 + MOVQ start+72(FP), R14 // Add start offset to output - ADDQ R15, R14 + ADDQ R14, R13 // Add start offset to input - ADDQ R15, BX - ADDQ R15, SI - ADDQ R15, DI - ADDQ R15, R8 - ADDQ R15, R9 - ADDQ R15, R10 - ADDQ R15, R11 - ADDQ R15, R12 - ADDQ R15, R13 - ADDQ R15, DX - MOVQ $0x0000000f, R15 - MOVQ R15, X1 - VPBROADCASTB X1, Y1 + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, CX + +mulAvxGFNI_10x1Xor_loop: + // Load 1 outputs + VMOVDQU (R13), Y10 -mulAvxTwo_10x1Xor_loop: // Load and process 32 bytes from input 0 to 1 outputs - VMOVDQU (BX), Y4 - ADDQ $0x20, BX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU (R14), Y0 - VMOVDQU (CX), Y2 - VMOVDQU 32(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (DX), Y11 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 1 to 1 outputs - VMOVDQU (SI), Y4 - ADDQ $0x20, SI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 64(CX), Y2 - VMOVDQU 96(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (BX), Y11 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 2 to 1 outputs - VMOVDQU (DI), Y4 - ADDQ $0x20, DI - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 128(CX), Y2 - VMOVDQU 160(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (SI), Y11 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 3 to 1 outputs - VMOVDQU (R8), Y4 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 192(CX), Y2 - VMOVDQU 224(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (DI), Y11 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 4 to 1 outputs - VMOVDQU (R9), Y4 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 256(CX), Y2 - VMOVDQU 288(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (R8), Y11 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 5 to 1 outputs - VMOVDQU (R10), Y4 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 320(CX), Y2 - VMOVDQU 352(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (R9), Y11 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 6 to 1 outputs - VMOVDQU (R11), Y4 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 384(CX), Y2 - VMOVDQU 416(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (R10), Y11 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 7 to 1 outputs - VMOVDQU (R12), Y4 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 448(CX), Y2 - VMOVDQU 480(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (R11), Y11 + ADDQ $0x20, R11 + VGF2P8AFFINEQB $0x00, Y7, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 8 to 1 outputs - VMOVDQU (R13), Y4 - ADDQ $0x20, R13 - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 512(CX), Y2 - VMOVDQU 544(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (R12), Y11 + ADDQ $0x20, R12 + VGF2P8AFFINEQB $0x00, Y8, Y11, Y11 + VXORPD Y10, Y11, Y10 // Load and process 32 bytes from input 9 to 1 outputs - VMOVDQU (DX), Y4 - ADDQ $0x20, DX - VPSRLQ $0x04, Y4, Y5 - VPAND Y1, Y4, Y4 - VPAND Y1, Y5, Y5 - VMOVDQU 576(CX), Y2 - VMOVDQU 608(CX), Y3 - VPSHUFB Y4, Y2, Y2 - VPSHUFB Y5, Y3, Y3 - XOR3WAY( $0x00, Y2, Y3, Y0) + VMOVDQU (CX), Y11 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y9, Y11, Y11 + VXORPD Y10, Y11, Y10 // Store 1 outputs - VMOVDQU Y0, (R14) - ADDQ $0x20, R14 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_10x1Xor_loop + JNZ mulAvxGFNI_10x1Xor_loop VZEROUPPER -mulAvxTwo_10x1Xor_end: +mulAvxGFNI_10x1Xor_end: RET // func mulAvxTwo_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -81458,7 +103453,6 @@ TEXT ·mulAvxTwo_10x1_64Xor(SB), $0-88 MOVQ 192(DX), R13 MOVQ 216(DX), DX MOVQ out_base+48(FP), R14 - MOVQ out_base+48(FP), R14 MOVQ (R14), R14 MOVQ start+72(FP), R15 @@ -81688,237 +103682,6 @@ mulAvxTwo_10x1_64Xor_loop: mulAvxTwo_10x1_64Xor_end: RET -// func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_10x2(SB), NOSPLIT, $8-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 47 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_10x2_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), R13 - MOVQ 216(DX), DX - MOVQ out_base+48(FP), R14 - MOVQ (R14), R15 - MOVQ 24(R14), R14 - MOVQ start+72(FP), BP - - // Add start offset to output - ADDQ BP, R15 - ADDQ BP, R14 - - // Add start offset to input - ADDQ BP, BX - ADDQ BP, SI - ADDQ BP, DI - ADDQ BP, R8 - ADDQ BP, R9 - ADDQ BP, R10 - ADDQ BP, R11 - ADDQ BP, R12 - ADDQ BP, R13 - ADDQ BP, DX - MOVQ $0x0000000f, BP - MOVQ BP, X2 - VPBROADCASTB X2, Y2 - -mulAvxTwo_10x2_loop: - // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y0 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - VPXOR Y3, Y4, Y1 - - // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (R10), Y5 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 6 to 2 outputs - VMOVDQU (R11), Y5 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 7 to 2 outputs - VMOVDQU (R12), Y5 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 8 to 2 outputs - VMOVDQU (R13), Y5 - ADDQ $0x20, R13 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 1024(CX), Y3 - VMOVDQU 1056(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 1088(CX), Y3 - VMOVDQU 1120(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Load and process 32 bytes from input 9 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 1152(CX), Y3 - VMOVDQU 1184(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 1216(CX), Y3 - VMOVDQU 1248(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) - - // Store 2 outputs - VMOVDQU Y0, (R15) - ADDQ $0x20, R15 - VMOVDQU Y1, (R14) - ADDQ $0x20, R14 - - // Prepare for next loop - DECQ AX - JNZ mulAvxTwo_10x2_loop - VZEROUPPER - -mulAvxTwo_10x2_end: - RET - // func mulAvxTwo_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x2_64(SB), $8-88 @@ -81942,7 +103705,6 @@ TEXT ·mulAvxTwo_10x2_64(SB), $8-88 MOVQ 192(DX), R13 MOVQ 216(DX), DX MOVQ out_base+48(FP), R14 - MOVQ out_base+48(FP), R14 MOVQ (R14), R15 MOVQ 24(R14), R14 MOVQ start+72(FP), BP @@ -82410,6 +104172,163 @@ mulGFNI_10x2_64_loop: mulGFNI_10x2_64_end: RET +// func mulAvxGFNI_10x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x2(SB), $8-88 + // Loading 12 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 24 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ (R14), R15 + MOVQ 24(R14), R14 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R15 + ADDQ BP, R14 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, DX + +mulAvxGFNI_10x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 2 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R14) + ADDQ $0x20, R14 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_10x2_loop + VZEROUPPER + +mulAvxGFNI_10x2_end: + RET + // func mulGFNI_10x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x2_64Xor(SB), $0-88 @@ -82573,237 +104492,167 @@ mulGFNI_10x2_64Xor_loop: mulGFNI_10x2_64Xor_end: RET -// func mulAvxTwo_10x2Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_10x2Xor(SB), NOSPLIT, $8-88 - // Loading no tables to registers +// func mulAvxGFNI_10x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x2Xor(SB), $8-88 + // Loading 12 of 20 tables to registers // Destination kept in GP registers - // Full registers estimated 47 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_10x2Xor_end - MOVQ in_base+24(FP), DX - MOVQ (DX), BX - MOVQ 24(DX), SI - MOVQ 48(DX), DI - MOVQ 72(DX), R8 - MOVQ 96(DX), R9 - MOVQ 120(DX), R10 - MOVQ 144(DX), R11 - MOVQ 168(DX), R12 - MOVQ 192(DX), R13 - MOVQ 216(DX), DX - MOVQ out_base+48(FP), R14 - MOVQ (R14), R15 - MOVQ 24(R14), R14 - MOVQ start+72(FP), BP + // Full registers estimated 24 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ (R14), R15 + MOVQ 24(R14), R14 + MOVQ start+72(FP), BP // Add start offset to output ADDQ BP, R15 ADDQ BP, R14 // Add start offset to input - ADDQ BP, BX - ADDQ BP, SI - ADDQ BP, DI - ADDQ BP, R8 - ADDQ BP, R9 - ADDQ BP, R10 - ADDQ BP, R11 - ADDQ BP, R12 - ADDQ BP, R13 - ADDQ BP, DX - MOVQ $0x0000000f, BP - MOVQ BP, X2 - VPBROADCASTB X2, Y2 + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, DX + +mulAvxGFNI_10x2Xor_loop: + // Load 2 outputs + VMOVDQU (R15), Y12 + VMOVDQU (R14), Y13 -mulAvxTwo_10x2Xor_loop: // Load and process 32 bytes from input 0 to 2 outputs - VMOVDQU (BX), Y5 - ADDQ $0x20, BX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU (R15), Y0 - VMOVDQU (CX), Y3 - VMOVDQU 32(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU (R14), Y1 - VMOVDQU 64(CX), Y3 - VMOVDQU 96(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 2 outputs - VMOVDQU (SI), Y5 - ADDQ $0x20, SI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 128(CX), Y3 - VMOVDQU 160(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 192(CX), Y3 - VMOVDQU 224(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 2 outputs - VMOVDQU (DI), Y5 - ADDQ $0x20, DI - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 256(CX), Y3 - VMOVDQU 288(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 320(CX), Y3 - VMOVDQU 352(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 2 outputs - VMOVDQU (R8), Y5 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 384(CX), Y3 - VMOVDQU 416(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 448(CX), Y3 - VMOVDQU 480(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 2 outputs - VMOVDQU (R9), Y5 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 512(CX), Y3 - VMOVDQU 544(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 576(CX), Y3 - VMOVDQU 608(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 2 outputs - VMOVDQU (R10), Y5 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 640(CX), Y3 - VMOVDQU 672(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 704(CX), Y3 - VMOVDQU 736(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 6 to 2 outputs - VMOVDQU (R11), Y5 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 768(CX), Y3 - VMOVDQU 800(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 832(CX), Y3 - VMOVDQU 864(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 7 to 2 outputs - VMOVDQU (R12), Y5 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 896(CX), Y3 - VMOVDQU 928(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 960(CX), Y3 - VMOVDQU 992(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 8 to 2 outputs - VMOVDQU (R13), Y5 - ADDQ $0x20, R13 - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 1024(CX), Y3 - VMOVDQU 1056(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 1088(CX), Y3 - VMOVDQU 1120(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 9 to 2 outputs - VMOVDQU (DX), Y5 - ADDQ $0x20, DX - VPSRLQ $0x04, Y5, Y6 - VPAND Y2, Y5, Y5 - VPAND Y2, Y6, Y6 - VMOVDQU 1152(CX), Y3 - VMOVDQU 1184(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y0) - VMOVDQU 1216(CX), Y3 - VMOVDQU 1248(CX), Y4 - VPSHUFB Y5, Y3, Y3 - VPSHUFB Y6, Y4, Y4 - XOR3WAY( $0x00, Y3, Y4, Y1) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 2 outputs - VMOVDQU Y0, (R15) + VMOVDQU Y12, (R15) ADDQ $0x20, R15 - VMOVDQU Y1, (R14) + VMOVDQU Y13, (R14) ADDQ $0x20, R14 // Prepare for next loop DECQ AX - JNZ mulAvxTwo_10x2Xor_loop + JNZ mulAvxGFNI_10x2Xor_loop VZEROUPPER -mulAvxTwo_10x2Xor_end: +mulAvxGFNI_10x2Xor_end: RET // func mulAvxTwo_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -82829,7 +104678,6 @@ TEXT ·mulAvxTwo_10x2_64Xor(SB), $8-88 MOVQ 192(DX), R13 MOVQ 216(DX), DX MOVQ out_base+48(FP), R14 - MOVQ out_base+48(FP), R14 MOVQ (R14), R15 MOVQ 24(R14), R14 MOVQ start+72(FP), BP @@ -83146,293 +104994,6 @@ mulAvxTwo_10x2_64Xor_loop: mulAvxTwo_10x2_64Xor_end: RET -// func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_10x3(SB), NOSPLIT, $8-88 - // Loading no tables to registers - // Destination kept in GP registers - // Full registers estimated 68 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_10x3_end - MOVQ in_base+24(FP), AX - MOVQ (AX), DX - MOVQ 24(AX), BX - MOVQ 48(AX), SI - MOVQ 72(AX), DI - MOVQ 96(AX), R8 - MOVQ 120(AX), R9 - MOVQ 144(AX), R10 - MOVQ 168(AX), R11 - MOVQ 192(AX), R12 - MOVQ 216(AX), AX - MOVQ out_base+48(FP), R13 - MOVQ (R13), R14 - MOVQ 24(R13), R15 - MOVQ 48(R13), R13 - MOVQ start+72(FP), BP - - // Add start offset to output - ADDQ BP, R14 - ADDQ BP, R15 - ADDQ BP, R13 - - // Add start offset to input - ADDQ BP, DX - ADDQ BP, BX - ADDQ BP, SI - ADDQ BP, DI - ADDQ BP, R8 - ADDQ BP, R9 - ADDQ BP, R10 - ADDQ BP, R11 - ADDQ BP, R12 - ADDQ BP, AX - MOVQ $0x0000000f, BP - MOVQ BP, X3 - VPBROADCASTB X3, Y3 - MOVQ n+80(FP), BP - SHRQ $0x05, BP - -mulAvxTwo_10x3_loop: - // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y0 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y1 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - VPXOR Y4, Y5, Y2 - - // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 6 to 3 outputs - VMOVDQU (R10), Y6 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1152(CX), Y4 - VMOVDQU 1184(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1216(CX), Y4 - VMOVDQU 1248(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1280(CX), Y4 - VMOVDQU 1312(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 7 to 3 outputs - VMOVDQU (R11), Y6 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1344(CX), Y4 - VMOVDQU 1376(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1408(CX), Y4 - VMOVDQU 1440(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1472(CX), Y4 - VMOVDQU 1504(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 8 to 3 outputs - VMOVDQU (R12), Y6 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1536(CX), Y4 - VMOVDQU 1568(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1600(CX), Y4 - VMOVDQU 1632(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1664(CX), Y4 - VMOVDQU 1696(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Load and process 32 bytes from input 9 to 3 outputs - VMOVDQU (AX), Y6 - ADDQ $0x20, AX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1728(CX), Y4 - VMOVDQU 1760(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1792(CX), Y4 - VMOVDQU 1824(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1856(CX), Y4 - VMOVDQU 1888(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) - - // Store 3 outputs - VMOVDQU Y0, (R14) - ADDQ $0x20, R14 - VMOVDQU Y1, (R15) - ADDQ $0x20, R15 - VMOVDQU Y2, (R13) - ADDQ $0x20, R13 - - // Prepare for next loop - DECQ BP - JNZ mulAvxTwo_10x3_loop - VZEROUPPER - -mulAvxTwo_10x3_end: - RET - // func mulAvxTwo_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x3_64(SB), $8-88 @@ -83456,7 +105017,6 @@ TEXT ·mulAvxTwo_10x3_64(SB), $8-88 MOVQ 192(AX), R12 MOVQ 216(AX), AX MOVQ out_base+48(FP), R13 - MOVQ out_base+48(FP), R13 MOVQ (R13), R14 MOVQ 24(R13), R15 MOVQ 48(R13), R13 @@ -84047,6 +105607,200 @@ mulGFNI_10x3_64_loop: mulGFNI_10x3_64_end: RET +// func mulAvxGFNI_10x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x3(SB), $8-88 + // Loading 11 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 35 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), R12 + MOVQ 216(AX), AX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R13 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_10x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 3 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 3 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_10x3_loop + VZEROUPPER + +mulAvxGFNI_10x3_end: + RET + // func mulGFNI_10x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x3_64Xor(SB), $8-88 @@ -84246,33 +106000,45 @@ mulGFNI_10x3_64Xor_loop: mulGFNI_10x3_64Xor_end: RET -// func mulAvxTwo_10x3Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) -// Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 -TEXT ·mulAvxTwo_10x3Xor(SB), NOSPLIT, $8-88 - // Loading no tables to registers +// func mulAvxGFNI_10x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x3Xor(SB), $8-88 + // Loading 11 of 30 tables to registers // Destination kept in GP registers - // Full registers estimated 68 YMM used - MOVQ n+80(FP), AX - MOVQ matrix_base+0(FP), CX - SHRQ $0x05, AX - TESTQ AX, AX - JZ mulAvxTwo_10x3Xor_end - MOVQ in_base+24(FP), AX - MOVQ (AX), DX - MOVQ 24(AX), BX - MOVQ 48(AX), SI - MOVQ 72(AX), DI - MOVQ 96(AX), R8 - MOVQ 120(AX), R9 - MOVQ 144(AX), R10 - MOVQ 168(AX), R11 - MOVQ 192(AX), R12 - MOVQ 216(AX), AX - MOVQ out_base+48(FP), R13 - MOVQ (R13), R14 - MOVQ 24(R13), R15 - MOVQ 48(R13), R13 - MOVQ start+72(FP), BP + // Full registers estimated 35 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), R12 + MOVQ 216(AX), AX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP // Add start offset to output ADDQ BP, R14 @@ -84280,260 +106046,160 @@ TEXT ·mulAvxTwo_10x3Xor(SB), NOSPLIT, $8-88 ADDQ BP, R13 // Add start offset to input - ADDQ BP, DX - ADDQ BP, BX - ADDQ BP, SI - ADDQ BP, DI - ADDQ BP, R8 - ADDQ BP, R9 - ADDQ BP, R10 - ADDQ BP, R11 - ADDQ BP, R12 - ADDQ BP, AX - MOVQ $0x0000000f, BP - MOVQ BP, X3 - VPBROADCASTB X3, Y3 - MOVQ n+80(FP), BP - SHRQ $0x05, BP + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_10x3Xor_loop: + // Load 3 outputs + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R13), Y13 -mulAvxTwo_10x3Xor_loop: // Load and process 32 bytes from input 0 to 3 outputs - VMOVDQU (DX), Y6 - ADDQ $0x20, DX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU (R14), Y0 - VMOVDQU (CX), Y4 - VMOVDQU 32(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU (R15), Y1 - VMOVDQU 64(CX), Y4 - VMOVDQU 96(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU (R13), Y2 - VMOVDQU 128(CX), Y4 - VMOVDQU 160(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 1 to 3 outputs - VMOVDQU (BX), Y6 - ADDQ $0x20, BX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 192(CX), Y4 - VMOVDQU 224(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 256(CX), Y4 - VMOVDQU 288(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 320(CX), Y4 - VMOVDQU 352(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 2 to 3 outputs - VMOVDQU (SI), Y6 - ADDQ $0x20, SI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 384(CX), Y4 - VMOVDQU 416(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 448(CX), Y4 - VMOVDQU 480(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 512(CX), Y4 - VMOVDQU 544(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 3 to 3 outputs - VMOVDQU (DI), Y6 - ADDQ $0x20, DI - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 576(CX), Y4 - VMOVDQU 608(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 640(CX), Y4 - VMOVDQU 672(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 704(CX), Y4 - VMOVDQU 736(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 4 to 3 outputs - VMOVDQU (R8), Y6 - ADDQ $0x20, R8 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 768(CX), Y4 - VMOVDQU 800(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 832(CX), Y4 - VMOVDQU 864(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 896(CX), Y4 - VMOVDQU 928(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 5 to 3 outputs - VMOVDQU (R9), Y6 - ADDQ $0x20, R9 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 960(CX), Y4 - VMOVDQU 992(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1024(CX), Y4 - VMOVDQU 1056(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1088(CX), Y4 - VMOVDQU 1120(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 6 to 3 outputs - VMOVDQU (R10), Y6 - ADDQ $0x20, R10 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1152(CX), Y4 - VMOVDQU 1184(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1216(CX), Y4 - VMOVDQU 1248(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1280(CX), Y4 - VMOVDQU 1312(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 7 to 3 outputs - VMOVDQU (R11), Y6 - ADDQ $0x20, R11 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1344(CX), Y4 - VMOVDQU 1376(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1408(CX), Y4 - VMOVDQU 1440(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1472(CX), Y4 - VMOVDQU 1504(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 8 to 3 outputs - VMOVDQU (R12), Y6 - ADDQ $0x20, R12 - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1536(CX), Y4 - VMOVDQU 1568(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1600(CX), Y4 - VMOVDQU 1632(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1664(CX), Y4 - VMOVDQU 1696(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Load and process 32 bytes from input 9 to 3 outputs - VMOVDQU (AX), Y6 - ADDQ $0x20, AX - VPSRLQ $0x04, Y6, Y7 - VPAND Y3, Y6, Y6 - VPAND Y3, Y7, Y7 - VMOVDQU 1728(CX), Y4 - VMOVDQU 1760(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y0) - VMOVDQU 1792(CX), Y4 - VMOVDQU 1824(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y1) - VMOVDQU 1856(CX), Y4 - VMOVDQU 1888(CX), Y5 - VPSHUFB Y6, Y4, Y4 - VPSHUFB Y7, Y5, Y5 - XOR3WAY( $0x00, Y4, Y5, Y2) + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 // Store 3 outputs - VMOVDQU Y0, (R14) + VMOVDQU Y11, (R14) ADDQ $0x20, R14 - VMOVDQU Y1, (R15) + VMOVDQU Y12, (R15) ADDQ $0x20, R15 - VMOVDQU Y2, (R13) + VMOVDQU Y13, (R13) ADDQ $0x20, R13 // Prepare for next loop DECQ BP - JNZ mulAvxTwo_10x3Xor_loop + JNZ mulAvxGFNI_10x3Xor_loop VZEROUPPER -mulAvxTwo_10x3Xor_end: +mulAvxGFNI_10x3Xor_end: RET // func mulAvxTwo_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) @@ -84559,7 +106225,6 @@ TEXT ·mulAvxTwo_10x3_64Xor(SB), $8-88 MOVQ 192(AX), R12 MOVQ 216(AX), AX MOVQ out_base+48(FP), R13 - MOVQ out_base+48(FP), R13 MOVQ (R13), R14 MOVQ 24(R13), R15 MOVQ 48(R13), R13 @@ -85497,6 +107162,220 @@ mulGFNI_10x4_64_loop: mulGFNI_10x4_64_end: RET +// func mulAvxGFNI_10x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x4(SB), $8-88 + // Loading 10 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 46 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + MOVQ (R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x4_loop + VZEROUPPER + +mulAvxGFNI_10x4_end: + RET + // func mulGFNI_10x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x4_64Xor(SB), $8-88 @@ -85711,6 +107590,234 @@ mulGFNI_10x4_64Xor_loop: mulGFNI_10x4_64Xor_end: RET +// func mulAvxGFNI_10x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x4Xor(SB), $8-88 + // Loading 10 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 46 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x4Xor_loop: + // Load 4 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + MOVQ (R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x4Xor_loop + VZEROUPPER + +mulAvxGFNI_10x4Xor_end: + RET + // func mulAvxTwo_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x4Xor(SB), NOSPLIT, $8-88 @@ -86651,6 +108758,251 @@ mulGFNI_10x5_64_loop: mulGFNI_10x5_64_end: RET +// func mulAvxGFNI_10x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x5(SB), $8-88 + // Loading 9 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 57 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + MOVQ (R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x5_loop + VZEROUPPER + +mulAvxGFNI_10x5_end: + RET + // func mulGFNI_10x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x5_64Xor(SB), $8-88 @@ -86888,6 +109240,268 @@ mulGFNI_10x5_64Xor_loop: mulGFNI_10x5_64Xor_end: RET +// func mulAvxGFNI_10x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x5Xor(SB), $8-88 + // Loading 9 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 57 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x5Xor_loop: + // Load 5 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + MOVQ (R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x5Xor_loop + VZEROUPPER + +mulAvxGFNI_10x5Xor_end: + RET + // func mulAvxTwo_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x5Xor(SB), NOSPLIT, $8-88 @@ -87954,6 +110568,282 @@ mulGFNI_10x6_64_loop: mulGFNI_10x6_64_end: RET +// func mulAvxGFNI_10x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x6(SB), $8-88 + // Loading 8 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 68 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x6_loop + VZEROUPPER + +mulAvxGFNI_10x6_end: + RET + // func mulGFNI_10x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x6_64Xor(SB), $8-88 @@ -88214,6 +111104,302 @@ mulGFNI_10x6_64Xor_loop: mulGFNI_10x6_64Xor_end: RET +// func mulAvxGFNI_10x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x6Xor(SB), $8-88 + // Loading 8 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 68 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x6Xor_loop: + // Load 6 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x6Xor_loop + VZEROUPPER + +mulAvxGFNI_10x6Xor_end: + RET + // func mulAvxTwo_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x6Xor(SB), NOSPLIT, $8-88 @@ -89406,6 +112592,313 @@ mulGFNI_10x7_64_loop: mulGFNI_10x7_64_end: RET +// func mulAvxGFNI_10x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x7(SB), $8-88 + // Loading 7 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 79 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x7_loop + VZEROUPPER + +mulAvxGFNI_10x7_end: + RET + // func mulGFNI_10x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x7_64Xor(SB), $8-88 @@ -89689,6 +113182,336 @@ mulGFNI_10x7_64Xor_loop: mulGFNI_10x7_64Xor_end: RET +// func mulAvxGFNI_10x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x7Xor(SB), $8-88 + // Loading 7 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 79 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x7Xor_loop: + // Load 7 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y7 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 144(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x7Xor_loop + VZEROUPPER + +mulAvxGFNI_10x7Xor_end: + RET + // func mulAvxTwo_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x7Xor(SB), NOSPLIT, $8-88 @@ -91007,6 +114830,344 @@ mulGFNI_10x8_64_loop: mulGFNI_10x8_64_end: RET +// func mulAvxGFNI_10x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x8(SB), $8-88 + // Loading 6 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 90 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x8_loop + VZEROUPPER + +mulAvxGFNI_10x8_end: + RET + // func mulGFNI_10x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x8_64Xor(SB), $8-88 @@ -91313,6 +115474,370 @@ mulGFNI_10x8_64Xor_loop: mulGFNI_10x8_64Xor_end: RET +// func mulAvxGFNI_10x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x8Xor(SB), $8-88 + // Loading 6 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 90 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x8Xor_loop: + // Load 8 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y6 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y7 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 144(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 168(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x8Xor_loop + VZEROUPPER + +mulAvxGFNI_10x8Xor_end: + RET + // func mulAvxTwo_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x8Xor(SB), NOSPLIT, $8-88 @@ -92757,6 +117282,375 @@ mulGFNI_10x9_64_loop: mulGFNI_10x9_64_end: RET +// func mulAvxGFNI_10x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x9(SB), $8-88 + // Loading 5 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 101 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 9 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R14), BP + VMOVDQU Y5, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x9_loop + VZEROUPPER + +mulAvxGFNI_10x9_end: + RET + // func mulGFNI_10x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x9_64Xor(SB), $8-88 @@ -93086,6 +117980,404 @@ mulGFNI_10x9_64Xor_loop: mulGFNI_10x9_64Xor_end: RET +// func mulAvxGFNI_10x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x9Xor(SB), $8-88 + // Loading 5 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 101 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x9Xor_loop: + // Load 9 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y5 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y6 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y7 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 144(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 168(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 192(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 9 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R14), BP + VMOVDQU Y5, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x9Xor_loop + VZEROUPPER + +mulAvxGFNI_10x9Xor_end: + RET + // func mulAvxTwo_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x9Xor(SB), NOSPLIT, $8-88 @@ -94656,6 +119948,406 @@ mulGFNI_10x10_64_loop: mulGFNI_10x10_64_end: RET +// func mulAvxGFNI_10x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x10(SB), $8-88 + // Loading 4 of 100 tables to registers + // Destination kept on stack + // Full registers estimated 112 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 10 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 720(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 728(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 736(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 744(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 752(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 760(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 768(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 776(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 784(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 792(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R14), BP + VMOVDQU Y4, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y5, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 216(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x10_loop + VZEROUPPER + +mulAvxGFNI_10x10_end: + RET + // func mulGFNI_10x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX512DQ, AVX512F, GFNI TEXT ·mulGFNI_10x10_64Xor(SB), $8-88 @@ -95008,6 +120700,438 @@ mulGFNI_10x10_64Xor_loop: mulGFNI_10x10_64Xor_end: RET +// func mulAvxGFNI_10x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x10Xor(SB), $8-88 + // Loading 4 of 100 tables to registers + // Destination kept on stack + // Full registers estimated 112 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x10Xor_loop: + // Load 10 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y4 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y5 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y6 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y7 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 144(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 168(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 192(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 216(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 10 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 720(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 728(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 736(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 744(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 752(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 760(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 768(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 776(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 784(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 792(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R14), BP + VMOVDQU Y4, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y5, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 216(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x10Xor_loop + VZEROUPPER + +mulAvxGFNI_10x10Xor_end: + RET + // func mulAvxTwo_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) // Requires: AVX, AVX2, AVX512F, AVX512VL, SSE2 TEXT ·mulAvxTwo_10x10Xor(SB), NOSPLIT, $8-88 diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_arm64.go new file mode 100644 index 00000000..2f871903 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_arm64.go @@ -0,0 +1,125 @@ +// Code generated by command: go generate gen.go. DO NOT EDIT. + +//go:build !noasm && !appengine && !gccgo && !nopshufb + +package reedsolomon + +//go:noescape +func mulSve_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulSve_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func mulNeon_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_arm64.s b/vendor/github.com/klauspost/reedsolomon/galois_gen_arm64.s new file mode 100644 index 00000000..dd974c11 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_arm64.s @@ -0,0 +1,27052 @@ +// Code generated by command: go generate gen.go. DO NOT EDIT. + +//go:build !appengine && !noasm && !nogen && !nopshufb && gc + +#include "textflag.h" + +// func mulSve_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x1_64(SB), $0-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 46 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd346fc00 // lsr x0, x0, #6 + WORD $0xd37ae400 // lsl x0, x0, #6 + WORD $0x04bf5050 // rdvl x16, #2 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x1_64_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD (R14), R14 + MOVD start+72(FP), R15 + + // Add start offset to output + WORD $0x8b0f01ce // add x14, x14, x15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd28001ef // mov x15, #15 + WORD $0x05e039e2 // mov z2.d, x15 + WORD $0x05212042 // dup z2.b, z2.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulSve_10x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + WORD $0x85804026 // ldr z6, [x1] + WORD $0x85804425 // ldr z5, [x1, #1, MUL VL] + WORD $0x04215041 // addvl x1, x1, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85804043 // ldr z3, [x2] + WORD $0x85804444 // ldr z4, [x2, #1, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33080 // eor z0.d, z4.d, z3.d + WORD $0x04a530c1 // eor z1.d, z6.d, z5.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 1 to 1 outputs + WORD $0x85804086 // ldr z6, [x4] + WORD $0x85804485 // ldr z5, [x4, #1, MUL VL] + WORD $0x04245044 // addvl x4, x4, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85804843 // ldr z3, [x2, #2, MUL VL] + WORD $0x85804c44 // ldr z4, [x2, #3, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 2 to 1 outputs + WORD $0x858040a6 // ldr z6, [x5] + WORD $0x858044a5 // ldr z5, [x5, #1, MUL VL] + WORD $0x04255045 // addvl x5, x5, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85805043 // ldr z3, [x2, #4, MUL VL] + WORD $0x85805444 // ldr z4, [x2, #5, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 3 to 1 outputs + WORD $0x85804106 // ldr z6, [x8] + WORD $0x85804505 // ldr z5, [x8, #1, MUL VL] + WORD $0x04285048 // addvl x8, x8, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85805843 // ldr z3, [x2, #6, MUL VL] + WORD $0x85805c44 // ldr z4, [x2, #7, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 4 to 1 outputs + WORD $0x85804126 // ldr z6, [x9] + WORD $0x85804525 // ldr z5, [x9, #1, MUL VL] + WORD $0x04295049 // addvl x9, x9, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85814043 // ldr z3, [x2, #8, MUL VL] + WORD $0x85814444 // ldr z4, [x2, #9, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 5 to 1 outputs + WORD $0x85804146 // ldr z6, [x10] + WORD $0x85804545 // ldr z5, [x10, #1, MUL VL] + WORD $0x042a504a // addvl x10, x10, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85814843 // ldr z3, [x2, #10, MUL VL] + WORD $0x85814c44 // ldr z4, [x2, #11, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 6 to 1 outputs + WORD $0x85804166 // ldr z6, [x11] + WORD $0x85804565 // ldr z5, [x11, #1, MUL VL] + WORD $0x042b504b // addvl x11, x11, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85815043 // ldr z3, [x2, #12, MUL VL] + WORD $0x85815444 // ldr z4, [x2, #13, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 7 to 1 outputs + WORD $0x85804186 // ldr z6, [x12] + WORD $0x85804585 // ldr z5, [x12, #1, MUL VL] + WORD $0x042c504c // addvl x12, x12, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85815843 // ldr z3, [x2, #14, MUL VL] + WORD $0x85815c44 // ldr z4, [x2, #15, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 8 to 1 outputs + WORD $0x858041a6 // ldr z6, [x13] + WORD $0x858045a5 // ldr z5, [x13, #1, MUL VL] + WORD $0x042d504d // addvl x13, x13, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85824043 // ldr z3, [x2, #16, MUL VL] + WORD $0x85824444 // ldr z4, [x2, #17, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x1_64_store + + // Load and process 64 bytes from input 9 to 1 outputs + WORD $0x85804066 // ldr z6, [x3] + WORD $0x85804465 // ldr z5, [x3, #1, MUL VL] + WORD $0x04235043 // addvl x3, x3, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85824843 // ldr z3, [x2, #18, MUL VL] + WORD $0x85824c44 // ldr z4, [x2, #19, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + +mulSve_10x1_64_store: + // Store 1 outputs + WORD $0xe58041c0 // str z0, [x14] + WORD $0xe58045c1 // str z1, [x14, #1, MUL VL] + WORD $0x042e504e // addvl x14, x14, #2 + + // Prepare for next loop + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x1_64_loop + +mulSve_10x1_64_end: + RET + +// func mulSve_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x1_64Xor(SB), $0-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 46 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd346fc00 // lsr x0, x0, #6 + WORD $0xd37ae400 // lsl x0, x0, #6 + WORD $0x04bf5050 // rdvl x16, #2 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x1_64Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD (R14), R14 + MOVD start+72(FP), R15 + + // Add start offset to output + WORD $0x8b0f01ce // add x14, x14, x15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd28001ef // mov x15, #15 + WORD $0x05e039e2 // mov z2.d, x15 + WORD $0x05212042 // dup z2.b, z2.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulSve_10x1_64Xor_loop: + // Load 1 outputs + WORD $0x858041c0 // ldr z0, [x14] + WORD $0x858045c1 // ldr z1, [x14, #1, MUL VL] + + // Load and process 64 bytes from input 0 to 1 outputs + WORD $0x85804026 // ldr z6, [x1] + WORD $0x85804425 // ldr z5, [x1, #1, MUL VL] + WORD $0x04215041 // addvl x1, x1, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85804043 // ldr z3, [x2] + WORD $0x85804444 // ldr z4, [x2, #1, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 1 to 1 outputs + WORD $0x85804086 // ldr z6, [x4] + WORD $0x85804485 // ldr z5, [x4, #1, MUL VL] + WORD $0x04245044 // addvl x4, x4, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85804843 // ldr z3, [x2, #2, MUL VL] + WORD $0x85804c44 // ldr z4, [x2, #3, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 2 to 1 outputs + WORD $0x858040a6 // ldr z6, [x5] + WORD $0x858044a5 // ldr z5, [x5, #1, MUL VL] + WORD $0x04255045 // addvl x5, x5, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85805043 // ldr z3, [x2, #4, MUL VL] + WORD $0x85805444 // ldr z4, [x2, #5, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 3 to 1 outputs + WORD $0x85804106 // ldr z6, [x8] + WORD $0x85804505 // ldr z5, [x8, #1, MUL VL] + WORD $0x04285048 // addvl x8, x8, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85805843 // ldr z3, [x2, #6, MUL VL] + WORD $0x85805c44 // ldr z4, [x2, #7, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 4 to 1 outputs + WORD $0x85804126 // ldr z6, [x9] + WORD $0x85804525 // ldr z5, [x9, #1, MUL VL] + WORD $0x04295049 // addvl x9, x9, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85814043 // ldr z3, [x2, #8, MUL VL] + WORD $0x85814444 // ldr z4, [x2, #9, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 5 to 1 outputs + WORD $0x85804146 // ldr z6, [x10] + WORD $0x85804545 // ldr z5, [x10, #1, MUL VL] + WORD $0x042a504a // addvl x10, x10, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85814843 // ldr z3, [x2, #10, MUL VL] + WORD $0x85814c44 // ldr z4, [x2, #11, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 6 to 1 outputs + WORD $0x85804166 // ldr z6, [x11] + WORD $0x85804565 // ldr z5, [x11, #1, MUL VL] + WORD $0x042b504b // addvl x11, x11, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85815043 // ldr z3, [x2, #12, MUL VL] + WORD $0x85815444 // ldr z4, [x2, #13, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 7 to 1 outputs + WORD $0x85804186 // ldr z6, [x12] + WORD $0x85804585 // ldr z5, [x12, #1, MUL VL] + WORD $0x042c504c // addvl x12, x12, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85815843 // ldr z3, [x2, #14, MUL VL] + WORD $0x85815c44 // ldr z4, [x2, #15, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 8 to 1 outputs + WORD $0x858041a6 // ldr z6, [x13] + WORD $0x858045a5 // ldr z5, [x13, #1, MUL VL] + WORD $0x042d504d // addvl x13, x13, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85824043 // ldr z3, [x2, #16, MUL VL] + WORD $0x85824444 // ldr z4, [x2, #17, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x1_64Xor_store + + // Load and process 64 bytes from input 9 to 1 outputs + WORD $0x85804066 // ldr z6, [x3] + WORD $0x85804465 // ldr z5, [x3, #1, MUL VL] + WORD $0x04235043 // addvl x3, x3, #2 + WORD $0x04fc94c7 // lsr z7.d, z6.d, #4 + WORD $0x04fc94a8 // lsr z8.d, z5.d, #4 + WORD $0x042230c6 // and z6.d, z6.d, z2.d + WORD $0x042230a5 // and z5.d, z5.d, z2.d + WORD $0x042230e7 // and z7.d, z7.d, z2.d + WORD $0x04223108 // and z8.d, z8.d, z2.d + WORD $0x85824843 // ldr z3, [x2, #18, MUL VL] + WORD $0x85824c44 // ldr z4, [x2, #19, MUL VL] + WORD $0x05253065 // tbl z5.b, z3.b, z5.b + WORD $0x05263063 // tbl z3.b, z3.b, z6.b + WORD $0x05283086 // tbl z6.b, z4.b, z8.b + WORD $0x05273084 // tbl z4.b, z4.b, z7.b + WORD $0x04a33000 // eor z0.d, z0.d, z3.d + WORD $0x04a43000 // eor z0.d, z0.d, z4.d + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + +mulSve_10x1_64Xor_store: + // Store 1 outputs + WORD $0xe58041c0 // str z0, [x14] + WORD $0xe58045c1 // str z1, [x14, #1, MUL VL] + WORD $0x042e504e // addvl x14, x14, #2 + + // Prepare for next loop + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x1_64Xor_loop + +mulSve_10x1_64Xor_end: + RET + +// func mulSve_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x2_64(SB), $8-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 89 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd346fc00 // lsr x0, x0, #6 + WORD $0xd37ae400 // lsl x0, x0, #6 + WORD $0x04bf5050 // rdvl x16, #2 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x2_64_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD (R14), R15 + MOVD 24(R14), R14 + MOVD start+72(FP), R6 + + // Add start offset to output + WORD $0x8b0601ef // add x15, x15, x6 + WORD $0x8b0601ce // add x14, x14, x6 + + // Add start offset to input + WORD $0x8b060021 // add x1, x1, x6 + WORD $0x8b060084 // add x4, x4, x6 + WORD $0x8b0600a5 // add x5, x5, x6 + WORD $0x8b060108 // add x8, x8, x6 + WORD $0x8b060129 // add x9, x9, x6 + WORD $0x8b06014a // add x10, x10, x6 + WORD $0x8b06016b // add x11, x11, x6 + WORD $0x8b06018c // add x12, x12, x6 + WORD $0x8b0601ad // add x13, x13, x6 + WORD $0x8b060063 // add x3, x3, x6 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c4 // mov z4.d, x6 + WORD $0x05212084 // dup z4.b, z4.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulSve_10x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + WORD $0x85804029 // ldr z9, [x1] + WORD $0x8580442b // ldr z11, [x1, #1, MUL VL] + WORD $0x04215041 // addvl x1, x1, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85804045 // ldr z5, [x2] + WORD $0x85804446 // ldr z6, [x2, #1, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a530c0 // eor z0.d, z6.d, z5.d + WORD $0x04a73101 // eor z1.d, z8.d, z7.d + WORD $0x85804845 // ldr z5, [x2, #2, MUL VL] + WORD $0x85804c46 // ldr z6, [x2, #3, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a530c2 // eor z2.d, z6.d, z5.d + WORD $0x04a73103 // eor z3.d, z8.d, z7.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 1 to 2 outputs + WORD $0x85804089 // ldr z9, [x4] + WORD $0x8580448b // ldr z11, [x4, #1, MUL VL] + WORD $0x04245044 // addvl x4, x4, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85805045 // ldr z5, [x2, #4, MUL VL] + WORD $0x85805446 // ldr z6, [x2, #5, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85805845 // ldr z5, [x2, #6, MUL VL] + WORD $0x85805c46 // ldr z6, [x2, #7, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 2 to 2 outputs + WORD $0x858040a9 // ldr z9, [x5] + WORD $0x858044ab // ldr z11, [x5, #1, MUL VL] + WORD $0x04255045 // addvl x5, x5, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85814045 // ldr z5, [x2, #8, MUL VL] + WORD $0x85814446 // ldr z6, [x2, #9, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85814845 // ldr z5, [x2, #10, MUL VL] + WORD $0x85814c46 // ldr z6, [x2, #11, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 3 to 2 outputs + WORD $0x85804109 // ldr z9, [x8] + WORD $0x8580450b // ldr z11, [x8, #1, MUL VL] + WORD $0x04285048 // addvl x8, x8, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85815045 // ldr z5, [x2, #12, MUL VL] + WORD $0x85815446 // ldr z6, [x2, #13, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85815845 // ldr z5, [x2, #14, MUL VL] + WORD $0x85815c46 // ldr z6, [x2, #15, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 4 to 2 outputs + WORD $0x85804129 // ldr z9, [x9] + WORD $0x8580452b // ldr z11, [x9, #1, MUL VL] + WORD $0x04295049 // addvl x9, x9, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85824045 // ldr z5, [x2, #16, MUL VL] + WORD $0x85824446 // ldr z6, [x2, #17, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85824845 // ldr z5, [x2, #18, MUL VL] + WORD $0x85824c46 // ldr z6, [x2, #19, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 5 to 2 outputs + WORD $0x85804149 // ldr z9, [x10] + WORD $0x8580454b // ldr z11, [x10, #1, MUL VL] + WORD $0x042a504a // addvl x10, x10, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85825045 // ldr z5, [x2, #20, MUL VL] + WORD $0x85825446 // ldr z6, [x2, #21, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85825845 // ldr z5, [x2, #22, MUL VL] + WORD $0x85825c46 // ldr z6, [x2, #23, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 6 to 2 outputs + WORD $0x85804169 // ldr z9, [x11] + WORD $0x8580456b // ldr z11, [x11, #1, MUL VL] + WORD $0x042b504b // addvl x11, x11, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85834045 // ldr z5, [x2, #24, MUL VL] + WORD $0x85834446 // ldr z6, [x2, #25, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85834845 // ldr z5, [x2, #26, MUL VL] + WORD $0x85834c46 // ldr z6, [x2, #27, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 7 to 2 outputs + WORD $0x85804189 // ldr z9, [x12] + WORD $0x8580458b // ldr z11, [x12, #1, MUL VL] + WORD $0x042c504c // addvl x12, x12, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85835045 // ldr z5, [x2, #28, MUL VL] + WORD $0x85835446 // ldr z6, [x2, #29, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85835845 // ldr z5, [x2, #30, MUL VL] + WORD $0x85835c46 // ldr z6, [x2, #31, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 8 to 2 outputs + WORD $0x858041a9 // ldr z9, [x13] + WORD $0x858045ab // ldr z11, [x13, #1, MUL VL] + WORD $0x042d504d // addvl x13, x13, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85844045 // ldr z5, [x2, #32, MUL VL] + WORD $0x85844446 // ldr z6, [x2, #33, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85844845 // ldr z5, [x2, #34, MUL VL] + WORD $0x85844c46 // ldr z6, [x2, #35, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x2_64_store + + // Load and process 64 bytes from input 9 to 2 outputs + WORD $0x85804069 // ldr z9, [x3] + WORD $0x8580446b // ldr z11, [x3, #1, MUL VL] + WORD $0x04235043 // addvl x3, x3, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85845045 // ldr z5, [x2, #36, MUL VL] + WORD $0x85845446 // ldr z6, [x2, #37, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85845845 // ldr z5, [x2, #38, MUL VL] + WORD $0x85845c46 // ldr z6, [x2, #39, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + +mulSve_10x2_64_store: + // Store 2 outputs + WORD $0xe58041e0 // str z0, [x15] + WORD $0xe58045e1 // str z1, [x15, #1, MUL VL] + WORD $0x042f504f // addvl x15, x15, #2 + WORD $0xe58041c2 // str z2, [x14] + WORD $0xe58045c3 // str z3, [x14, #1, MUL VL] + WORD $0x042e504e // addvl x14, x14, #2 + + // Prepare for next loop + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x2_64_loop + +mulSve_10x2_64_end: + RET + +// func mulSve_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x2_64Xor(SB), $8-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 89 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd346fc00 // lsr x0, x0, #6 + WORD $0xd37ae400 // lsl x0, x0, #6 + WORD $0x04bf5050 // rdvl x16, #2 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x2_64Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD (R14), R15 + MOVD 24(R14), R14 + MOVD start+72(FP), R6 + + // Add start offset to output + WORD $0x8b0601ef // add x15, x15, x6 + WORD $0x8b0601ce // add x14, x14, x6 + + // Add start offset to input + WORD $0x8b060021 // add x1, x1, x6 + WORD $0x8b060084 // add x4, x4, x6 + WORD $0x8b0600a5 // add x5, x5, x6 + WORD $0x8b060108 // add x8, x8, x6 + WORD $0x8b060129 // add x9, x9, x6 + WORD $0x8b06014a // add x10, x10, x6 + WORD $0x8b06016b // add x11, x11, x6 + WORD $0x8b06018c // add x12, x12, x6 + WORD $0x8b0601ad // add x13, x13, x6 + WORD $0x8b060063 // add x3, x3, x6 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c4 // mov z4.d, x6 + WORD $0x05212084 // dup z4.b, z4.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulSve_10x2_64Xor_loop: + // Load 2 outputs + WORD $0x858041e0 // ldr z0, [x15] + WORD $0x858045e1 // ldr z1, [x15, #1, MUL VL] + WORD $0x858041c2 // ldr z2, [x14] + WORD $0x858045c3 // ldr z3, [x14, #1, MUL VL] + + // Load and process 64 bytes from input 0 to 2 outputs + WORD $0x85804029 // ldr z9, [x1] + WORD $0x8580442b // ldr z11, [x1, #1, MUL VL] + WORD $0x04215041 // addvl x1, x1, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85804045 // ldr z5, [x2] + WORD $0x85804446 // ldr z6, [x2, #1, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85804845 // ldr z5, [x2, #2, MUL VL] + WORD $0x85804c46 // ldr z6, [x2, #3, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 1 to 2 outputs + WORD $0x85804089 // ldr z9, [x4] + WORD $0x8580448b // ldr z11, [x4, #1, MUL VL] + WORD $0x04245044 // addvl x4, x4, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85805045 // ldr z5, [x2, #4, MUL VL] + WORD $0x85805446 // ldr z6, [x2, #5, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85805845 // ldr z5, [x2, #6, MUL VL] + WORD $0x85805c46 // ldr z6, [x2, #7, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 2 to 2 outputs + WORD $0x858040a9 // ldr z9, [x5] + WORD $0x858044ab // ldr z11, [x5, #1, MUL VL] + WORD $0x04255045 // addvl x5, x5, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85814045 // ldr z5, [x2, #8, MUL VL] + WORD $0x85814446 // ldr z6, [x2, #9, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85814845 // ldr z5, [x2, #10, MUL VL] + WORD $0x85814c46 // ldr z6, [x2, #11, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 3 to 2 outputs + WORD $0x85804109 // ldr z9, [x8] + WORD $0x8580450b // ldr z11, [x8, #1, MUL VL] + WORD $0x04285048 // addvl x8, x8, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85815045 // ldr z5, [x2, #12, MUL VL] + WORD $0x85815446 // ldr z6, [x2, #13, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85815845 // ldr z5, [x2, #14, MUL VL] + WORD $0x85815c46 // ldr z6, [x2, #15, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 4 to 2 outputs + WORD $0x85804129 // ldr z9, [x9] + WORD $0x8580452b // ldr z11, [x9, #1, MUL VL] + WORD $0x04295049 // addvl x9, x9, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85824045 // ldr z5, [x2, #16, MUL VL] + WORD $0x85824446 // ldr z6, [x2, #17, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85824845 // ldr z5, [x2, #18, MUL VL] + WORD $0x85824c46 // ldr z6, [x2, #19, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 5 to 2 outputs + WORD $0x85804149 // ldr z9, [x10] + WORD $0x8580454b // ldr z11, [x10, #1, MUL VL] + WORD $0x042a504a // addvl x10, x10, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85825045 // ldr z5, [x2, #20, MUL VL] + WORD $0x85825446 // ldr z6, [x2, #21, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85825845 // ldr z5, [x2, #22, MUL VL] + WORD $0x85825c46 // ldr z6, [x2, #23, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 6 to 2 outputs + WORD $0x85804169 // ldr z9, [x11] + WORD $0x8580456b // ldr z11, [x11, #1, MUL VL] + WORD $0x042b504b // addvl x11, x11, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85834045 // ldr z5, [x2, #24, MUL VL] + WORD $0x85834446 // ldr z6, [x2, #25, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85834845 // ldr z5, [x2, #26, MUL VL] + WORD $0x85834c46 // ldr z6, [x2, #27, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 7 to 2 outputs + WORD $0x85804189 // ldr z9, [x12] + WORD $0x8580458b // ldr z11, [x12, #1, MUL VL] + WORD $0x042c504c // addvl x12, x12, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85835045 // ldr z5, [x2, #28, MUL VL] + WORD $0x85835446 // ldr z6, [x2, #29, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85835845 // ldr z5, [x2, #30, MUL VL] + WORD $0x85835c46 // ldr z6, [x2, #31, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 8 to 2 outputs + WORD $0x858041a9 // ldr z9, [x13] + WORD $0x858045ab // ldr z11, [x13, #1, MUL VL] + WORD $0x042d504d // addvl x13, x13, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85844045 // ldr z5, [x2, #32, MUL VL] + WORD $0x85844446 // ldr z6, [x2, #33, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85844845 // ldr z5, [x2, #34, MUL VL] + WORD $0x85844c46 // ldr z6, [x2, #35, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x2_64Xor_store + + // Load and process 64 bytes from input 9 to 2 outputs + WORD $0x85804069 // ldr z9, [x3] + WORD $0x8580446b // ldr z11, [x3, #1, MUL VL] + WORD $0x04235043 // addvl x3, x3, #2 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04243129 // and z9.d, z9.d, z4.d + WORD $0x0424316b // and z11.d, z11.d, z4.d + WORD $0x0424314a // and z10.d, z10.d, z4.d + WORD $0x0424318c // and z12.d, z12.d, z4.d + WORD $0x85845045 // ldr z5, [x2, #36, MUL VL] + WORD $0x85845446 // ldr z6, [x2, #37, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85845845 // ldr z5, [x2, #38, MUL VL] + WORD $0x85845c46 // ldr z6, [x2, #39, MUL VL] + WORD $0x052b30a7 // tbl z7.b, z5.b, z11.b + WORD $0x052930a5 // tbl z5.b, z5.b, z9.b + WORD $0x052c30c8 // tbl z8.b, z6.b, z12.b + WORD $0x052a30c6 // tbl z6.b, z6.b, z10.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + +mulSve_10x2_64Xor_store: + // Store 2 outputs + WORD $0xe58041e0 // str z0, [x15] + WORD $0xe58045e1 // str z1, [x15, #1, MUL VL] + WORD $0x042f504f // addvl x15, x15, #2 + WORD $0xe58041c2 // str z2, [x14] + WORD $0xe58045c3 // str z3, [x14, #1, MUL VL] + WORD $0x042e504e // addvl x14, x14, #2 + + // Prepare for next loop + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x2_64Xor_loop + +mulSve_10x2_64Xor_end: + RET + +// func mulSve_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x3_64(SB), $8-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 130 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd346fc00 // lsr x0, x0, #6 + WORD $0xd37ae400 // lsl x0, x0, #6 + WORD $0x04bf5050 // rdvl x16, #2 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x3_64_end + MOVD in_base+24(FP), R0 + MOVD (R0), R3 + MOVD 24(R0), R1 + MOVD 48(R0), R4 + MOVD 72(R0), R5 + MOVD 96(R0), R8 + MOVD 120(R0), R9 + MOVD 144(R0), R10 + MOVD 168(R0), R11 + MOVD 192(R0), R12 + MOVD 216(R0), R0 + MOVD out_base+48(FP), R13 + MOVD (R13), R14 + MOVD 24(R13), R15 + MOVD 48(R13), R13 + MOVD start+72(FP), R6 + + // Add start offset to output + WORD $0x8b0601ce // add x14, x14, x6 + WORD $0x8b0601ef // add x15, x15, x6 + WORD $0x8b0601ad // add x13, x13, x6 + + // Add start offset to input + WORD $0x8b060063 // add x3, x3, x6 + WORD $0x8b060021 // add x1, x1, x6 + WORD $0x8b060084 // add x4, x4, x6 + WORD $0x8b0600a5 // add x5, x5, x6 + WORD $0x8b060108 // add x8, x8, x6 + WORD $0x8b060129 // add x9, x9, x6 + WORD $0x8b06014a // add x10, x10, x6 + WORD $0x8b06016b // add x11, x11, x6 + WORD $0x8b06018c // add x12, x12, x6 + WORD $0x8b060000 // add x0, x0, x6 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c6 // mov z6.d, x6 + WORD $0x052120c6 // dup z6.b, z6.b[0] + + // Reload length to save a register + MOVD n+80(FP), R6 + WORD $0xd346fcc6 // lsr x6, x6, #6 + WORD $0xd37ae4c6 // lsl x6, x6, #6 + WORD $0x04bf5050 // rdvl x16, #2 + WORD $0x9ad008c6 // udiv x6, x6, x16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulSve_10x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + WORD $0x8580406b // ldr z11, [x3] + WORD $0x8580446d // ldr z13, [x3, #1, MUL VL] + WORD $0x04235043 // addvl x3, x3, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85804047 // ldr z7, [x2] + WORD $0x85804448 // ldr z8, [x2, #1, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73100 // eor z0.d, z8.d, z7.d + WORD $0x04a93141 // eor z1.d, z10.d, z9.d + WORD $0x85804847 // ldr z7, [x2, #2, MUL VL] + WORD $0x85804c48 // ldr z8, [x2, #3, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73102 // eor z2.d, z8.d, z7.d + WORD $0x04a93143 // eor z3.d, z10.d, z9.d + WORD $0x85805047 // ldr z7, [x2, #4, MUL VL] + WORD $0x85805448 // ldr z8, [x2, #5, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73104 // eor z4.d, z8.d, z7.d + WORD $0x04a93145 // eor z5.d, z10.d, z9.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 1 to 3 outputs + WORD $0x8580402b // ldr z11, [x1] + WORD $0x8580442d // ldr z13, [x1, #1, MUL VL] + WORD $0x04215041 // addvl x1, x1, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85805847 // ldr z7, [x2, #6, MUL VL] + WORD $0x85805c48 // ldr z8, [x2, #7, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85814047 // ldr z7, [x2, #8, MUL VL] + WORD $0x85814448 // ldr z8, [x2, #9, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85814847 // ldr z7, [x2, #10, MUL VL] + WORD $0x85814c48 // ldr z8, [x2, #11, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 2 to 3 outputs + WORD $0x8580408b // ldr z11, [x4] + WORD $0x8580448d // ldr z13, [x4, #1, MUL VL] + WORD $0x04245044 // addvl x4, x4, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85815047 // ldr z7, [x2, #12, MUL VL] + WORD $0x85815448 // ldr z8, [x2, #13, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85815847 // ldr z7, [x2, #14, MUL VL] + WORD $0x85815c48 // ldr z8, [x2, #15, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85824047 // ldr z7, [x2, #16, MUL VL] + WORD $0x85824448 // ldr z8, [x2, #17, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 3 to 3 outputs + WORD $0x858040ab // ldr z11, [x5] + WORD $0x858044ad // ldr z13, [x5, #1, MUL VL] + WORD $0x04255045 // addvl x5, x5, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85824847 // ldr z7, [x2, #18, MUL VL] + WORD $0x85824c48 // ldr z8, [x2, #19, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85825047 // ldr z7, [x2, #20, MUL VL] + WORD $0x85825448 // ldr z8, [x2, #21, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85825847 // ldr z7, [x2, #22, MUL VL] + WORD $0x85825c48 // ldr z8, [x2, #23, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 4 to 3 outputs + WORD $0x8580410b // ldr z11, [x8] + WORD $0x8580450d // ldr z13, [x8, #1, MUL VL] + WORD $0x04285048 // addvl x8, x8, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85834047 // ldr z7, [x2, #24, MUL VL] + WORD $0x85834448 // ldr z8, [x2, #25, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85834847 // ldr z7, [x2, #26, MUL VL] + WORD $0x85834c48 // ldr z8, [x2, #27, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85835047 // ldr z7, [x2, #28, MUL VL] + WORD $0x85835448 // ldr z8, [x2, #29, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 5 to 3 outputs + WORD $0x8580412b // ldr z11, [x9] + WORD $0x8580452d // ldr z13, [x9, #1, MUL VL] + WORD $0x04295049 // addvl x9, x9, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85835847 // ldr z7, [x2, #30, MUL VL] + WORD $0x85835c48 // ldr z8, [x2, #31, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85844047 // ldr z7, [x2, #32, MUL VL] + WORD $0x85844448 // ldr z8, [x2, #33, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85844847 // ldr z7, [x2, #34, MUL VL] + WORD $0x85844c48 // ldr z8, [x2, #35, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 6 to 3 outputs + WORD $0x8580414b // ldr z11, [x10] + WORD $0x8580454d // ldr z13, [x10, #1, MUL VL] + WORD $0x042a504a // addvl x10, x10, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85845047 // ldr z7, [x2, #36, MUL VL] + WORD $0x85845448 // ldr z8, [x2, #37, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85845847 // ldr z7, [x2, #38, MUL VL] + WORD $0x85845c48 // ldr z8, [x2, #39, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85854047 // ldr z7, [x2, #40, MUL VL] + WORD $0x85854448 // ldr z8, [x2, #41, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 7 to 3 outputs + WORD $0x8580416b // ldr z11, [x11] + WORD $0x8580456d // ldr z13, [x11, #1, MUL VL] + WORD $0x042b504b // addvl x11, x11, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85854847 // ldr z7, [x2, #42, MUL VL] + WORD $0x85854c48 // ldr z8, [x2, #43, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85855047 // ldr z7, [x2, #44, MUL VL] + WORD $0x85855448 // ldr z8, [x2, #45, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85855847 // ldr z7, [x2, #46, MUL VL] + WORD $0x85855c48 // ldr z8, [x2, #47, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 8 to 3 outputs + WORD $0x8580418b // ldr z11, [x12] + WORD $0x8580458d // ldr z13, [x12, #1, MUL VL] + WORD $0x042c504c // addvl x12, x12, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85864047 // ldr z7, [x2, #48, MUL VL] + WORD $0x85864448 // ldr z8, [x2, #49, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85864847 // ldr z7, [x2, #50, MUL VL] + WORD $0x85864c48 // ldr z8, [x2, #51, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85865047 // ldr z7, [x2, #52, MUL VL] + WORD $0x85865448 // ldr z8, [x2, #53, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x3_64_store + + // Load and process 64 bytes from input 9 to 3 outputs + WORD $0x8580400b // ldr z11, [x0] + WORD $0x8580440d // ldr z13, [x0, #1, MUL VL] + WORD $0x04205040 // addvl x0, x0, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85865847 // ldr z7, [x2, #54, MUL VL] + WORD $0x85865c48 // ldr z8, [x2, #55, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85874047 // ldr z7, [x2, #56, MUL VL] + WORD $0x85874448 // ldr z8, [x2, #57, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85874847 // ldr z7, [x2, #58, MUL VL] + WORD $0x85874c48 // ldr z8, [x2, #59, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + +mulSve_10x3_64_store: + // Store 3 outputs + WORD $0xe58041c0 // str z0, [x14] + WORD $0xe58045c1 // str z1, [x14, #1, MUL VL] + WORD $0x042e504e // addvl x14, x14, #2 + WORD $0xe58041e2 // str z2, [x15] + WORD $0xe58045e3 // str z3, [x15, #1, MUL VL] + WORD $0x042f504f // addvl x15, x15, #2 + WORD $0xe58041a4 // str z4, [x13] + WORD $0xe58045a5 // str z5, [x13, #1, MUL VL] + WORD $0x042d504d // addvl x13, x13, #2 + + // Prepare for next loop + WORD $0xf10004c6 // subs x6, x6, #1 + BNE mulSve_10x3_64_loop + +mulSve_10x3_64_end: + RET + +// func mulSve_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x3_64Xor(SB), $8-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 130 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd346fc00 // lsr x0, x0, #6 + WORD $0xd37ae400 // lsl x0, x0, #6 + WORD $0x04bf5050 // rdvl x16, #2 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x3_64Xor_end + MOVD in_base+24(FP), R0 + MOVD (R0), R3 + MOVD 24(R0), R1 + MOVD 48(R0), R4 + MOVD 72(R0), R5 + MOVD 96(R0), R8 + MOVD 120(R0), R9 + MOVD 144(R0), R10 + MOVD 168(R0), R11 + MOVD 192(R0), R12 + MOVD 216(R0), R0 + MOVD out_base+48(FP), R13 + MOVD (R13), R14 + MOVD 24(R13), R15 + MOVD 48(R13), R13 + MOVD start+72(FP), R6 + + // Add start offset to output + WORD $0x8b0601ce // add x14, x14, x6 + WORD $0x8b0601ef // add x15, x15, x6 + WORD $0x8b0601ad // add x13, x13, x6 + + // Add start offset to input + WORD $0x8b060063 // add x3, x3, x6 + WORD $0x8b060021 // add x1, x1, x6 + WORD $0x8b060084 // add x4, x4, x6 + WORD $0x8b0600a5 // add x5, x5, x6 + WORD $0x8b060108 // add x8, x8, x6 + WORD $0x8b060129 // add x9, x9, x6 + WORD $0x8b06014a // add x10, x10, x6 + WORD $0x8b06016b // add x11, x11, x6 + WORD $0x8b06018c // add x12, x12, x6 + WORD $0x8b060000 // add x0, x0, x6 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c6 // mov z6.d, x6 + WORD $0x052120c6 // dup z6.b, z6.b[0] + + // Reload length to save a register + MOVD n+80(FP), R6 + WORD $0xd346fcc6 // lsr x6, x6, #6 + WORD $0xd37ae4c6 // lsl x6, x6, #6 + WORD $0x04bf5050 // rdvl x16, #2 + WORD $0x9ad008c6 // udiv x6, x6, x16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulSve_10x3_64Xor_loop: + // Load 3 outputs + WORD $0x858041c0 // ldr z0, [x14] + WORD $0x858045c1 // ldr z1, [x14, #1, MUL VL] + WORD $0x858041e2 // ldr z2, [x15] + WORD $0x858045e3 // ldr z3, [x15, #1, MUL VL] + WORD $0x858041a4 // ldr z4, [x13] + WORD $0x858045a5 // ldr z5, [x13, #1, MUL VL] + + // Load and process 64 bytes from input 0 to 3 outputs + WORD $0x8580406b // ldr z11, [x3] + WORD $0x8580446d // ldr z13, [x3, #1, MUL VL] + WORD $0x04235043 // addvl x3, x3, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85804047 // ldr z7, [x2] + WORD $0x85804448 // ldr z8, [x2, #1, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85804847 // ldr z7, [x2, #2, MUL VL] + WORD $0x85804c48 // ldr z8, [x2, #3, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85805047 // ldr z7, [x2, #4, MUL VL] + WORD $0x85805448 // ldr z8, [x2, #5, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 1 to 3 outputs + WORD $0x8580402b // ldr z11, [x1] + WORD $0x8580442d // ldr z13, [x1, #1, MUL VL] + WORD $0x04215041 // addvl x1, x1, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85805847 // ldr z7, [x2, #6, MUL VL] + WORD $0x85805c48 // ldr z8, [x2, #7, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85814047 // ldr z7, [x2, #8, MUL VL] + WORD $0x85814448 // ldr z8, [x2, #9, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85814847 // ldr z7, [x2, #10, MUL VL] + WORD $0x85814c48 // ldr z8, [x2, #11, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 2 to 3 outputs + WORD $0x8580408b // ldr z11, [x4] + WORD $0x8580448d // ldr z13, [x4, #1, MUL VL] + WORD $0x04245044 // addvl x4, x4, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85815047 // ldr z7, [x2, #12, MUL VL] + WORD $0x85815448 // ldr z8, [x2, #13, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85815847 // ldr z7, [x2, #14, MUL VL] + WORD $0x85815c48 // ldr z8, [x2, #15, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85824047 // ldr z7, [x2, #16, MUL VL] + WORD $0x85824448 // ldr z8, [x2, #17, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 3 to 3 outputs + WORD $0x858040ab // ldr z11, [x5] + WORD $0x858044ad // ldr z13, [x5, #1, MUL VL] + WORD $0x04255045 // addvl x5, x5, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85824847 // ldr z7, [x2, #18, MUL VL] + WORD $0x85824c48 // ldr z8, [x2, #19, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85825047 // ldr z7, [x2, #20, MUL VL] + WORD $0x85825448 // ldr z8, [x2, #21, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85825847 // ldr z7, [x2, #22, MUL VL] + WORD $0x85825c48 // ldr z8, [x2, #23, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 4 to 3 outputs + WORD $0x8580410b // ldr z11, [x8] + WORD $0x8580450d // ldr z13, [x8, #1, MUL VL] + WORD $0x04285048 // addvl x8, x8, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85834047 // ldr z7, [x2, #24, MUL VL] + WORD $0x85834448 // ldr z8, [x2, #25, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85834847 // ldr z7, [x2, #26, MUL VL] + WORD $0x85834c48 // ldr z8, [x2, #27, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85835047 // ldr z7, [x2, #28, MUL VL] + WORD $0x85835448 // ldr z8, [x2, #29, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 5 to 3 outputs + WORD $0x8580412b // ldr z11, [x9] + WORD $0x8580452d // ldr z13, [x9, #1, MUL VL] + WORD $0x04295049 // addvl x9, x9, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85835847 // ldr z7, [x2, #30, MUL VL] + WORD $0x85835c48 // ldr z8, [x2, #31, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85844047 // ldr z7, [x2, #32, MUL VL] + WORD $0x85844448 // ldr z8, [x2, #33, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85844847 // ldr z7, [x2, #34, MUL VL] + WORD $0x85844c48 // ldr z8, [x2, #35, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 6 to 3 outputs + WORD $0x8580414b // ldr z11, [x10] + WORD $0x8580454d // ldr z13, [x10, #1, MUL VL] + WORD $0x042a504a // addvl x10, x10, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85845047 // ldr z7, [x2, #36, MUL VL] + WORD $0x85845448 // ldr z8, [x2, #37, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85845847 // ldr z7, [x2, #38, MUL VL] + WORD $0x85845c48 // ldr z8, [x2, #39, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85854047 // ldr z7, [x2, #40, MUL VL] + WORD $0x85854448 // ldr z8, [x2, #41, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 7 to 3 outputs + WORD $0x8580416b // ldr z11, [x11] + WORD $0x8580456d // ldr z13, [x11, #1, MUL VL] + WORD $0x042b504b // addvl x11, x11, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85854847 // ldr z7, [x2, #42, MUL VL] + WORD $0x85854c48 // ldr z8, [x2, #43, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85855047 // ldr z7, [x2, #44, MUL VL] + WORD $0x85855448 // ldr z8, [x2, #45, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85855847 // ldr z7, [x2, #46, MUL VL] + WORD $0x85855c48 // ldr z8, [x2, #47, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 8 to 3 outputs + WORD $0x8580418b // ldr z11, [x12] + WORD $0x8580458d // ldr z13, [x12, #1, MUL VL] + WORD $0x042c504c // addvl x12, x12, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85864047 // ldr z7, [x2, #48, MUL VL] + WORD $0x85864448 // ldr z8, [x2, #49, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85864847 // ldr z7, [x2, #50, MUL VL] + WORD $0x85864c48 // ldr z8, [x2, #51, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85865047 // ldr z7, [x2, #52, MUL VL] + WORD $0x85865448 // ldr z8, [x2, #53, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x3_64Xor_store + + // Load and process 64 bytes from input 9 to 3 outputs + WORD $0x8580400b // ldr z11, [x0] + WORD $0x8580440d // ldr z13, [x0, #1, MUL VL] + WORD $0x04205040 // addvl x0, x0, #2 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x0426316b // and z11.d, z11.d, z6.d + WORD $0x042631ad // and z13.d, z13.d, z6.d + WORD $0x0426318c // and z12.d, z12.d, z6.d + WORD $0x042631ce // and z14.d, z14.d, z6.d + WORD $0x85865847 // ldr z7, [x2, #54, MUL VL] + WORD $0x85865c48 // ldr z8, [x2, #55, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85874047 // ldr z7, [x2, #56, MUL VL] + WORD $0x85874448 // ldr z8, [x2, #57, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85874847 // ldr z7, [x2, #58, MUL VL] + WORD $0x85874c48 // ldr z8, [x2, #59, MUL VL] + WORD $0x052d30e9 // tbl z9.b, z7.b, z13.b + WORD $0x052b30e7 // tbl z7.b, z7.b, z11.b + WORD $0x052e310a // tbl z10.b, z8.b, z14.b + WORD $0x052c3108 // tbl z8.b, z8.b, z12.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + +mulSve_10x3_64Xor_store: + // Store 3 outputs + WORD $0xe58041c0 // str z0, [x14] + WORD $0xe58045c1 // str z1, [x14, #1, MUL VL] + WORD $0x042e504e // addvl x14, x14, #2 + WORD $0xe58041e2 // str z2, [x15] + WORD $0xe58045e3 // str z3, [x15, #1, MUL VL] + WORD $0x042f504f // addvl x15, x15, #2 + WORD $0xe58041a4 // str z4, [x13] + WORD $0xe58045a5 // str z5, [x13, #1, MUL VL] + WORD $0x042d504d // addvl x13, x13, #2 + + // Prepare for next loop + WORD $0xf10004c6 // subs x6, x6, #1 + BNE mulSve_10x3_64Xor_loop + +mulSve_10x3_64Xor_end: + RET + +// func mulSve_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x4(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 89 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x4_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c4 // mov z4.d, x6 + WORD $0x05212084 // dup z4.b, z4.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + WORD $0x85804027 // ldr z7, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85804045 // ldr z5, [x2] + WORD $0x85804446 // ldr z6, [x2, #1, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a530c0 // eor z0.d, z6.d, z5.d + WORD $0x85804845 // ldr z5, [x2, #2, MUL VL] + WORD $0x85804c46 // ldr z6, [x2, #3, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a530c1 // eor z1.d, z6.d, z5.d + WORD $0x85805045 // ldr z5, [x2, #4, MUL VL] + WORD $0x85805446 // ldr z6, [x2, #5, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a530c2 // eor z2.d, z6.d, z5.d + WORD $0x85805845 // ldr z5, [x2, #6, MUL VL] + WORD $0x85805c46 // ldr z6, [x2, #7, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a530c3 // eor z3.d, z6.d, z5.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 1 to 4 outputs + WORD $0x85804087 // ldr z7, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85814045 // ldr z5, [x2, #8, MUL VL] + WORD $0x85814446 // ldr z6, [x2, #9, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85814845 // ldr z5, [x2, #10, MUL VL] + WORD $0x85814c46 // ldr z6, [x2, #11, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85815045 // ldr z5, [x2, #12, MUL VL] + WORD $0x85815446 // ldr z6, [x2, #13, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85815845 // ldr z5, [x2, #14, MUL VL] + WORD $0x85815c46 // ldr z6, [x2, #15, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 2 to 4 outputs + WORD $0x858040a7 // ldr z7, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85824045 // ldr z5, [x2, #16, MUL VL] + WORD $0x85824446 // ldr z6, [x2, #17, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85824845 // ldr z5, [x2, #18, MUL VL] + WORD $0x85824c46 // ldr z6, [x2, #19, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85825045 // ldr z5, [x2, #20, MUL VL] + WORD $0x85825446 // ldr z6, [x2, #21, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85825845 // ldr z5, [x2, #22, MUL VL] + WORD $0x85825c46 // ldr z6, [x2, #23, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 3 to 4 outputs + WORD $0x85804107 // ldr z7, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85834045 // ldr z5, [x2, #24, MUL VL] + WORD $0x85834446 // ldr z6, [x2, #25, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85834845 // ldr z5, [x2, #26, MUL VL] + WORD $0x85834c46 // ldr z6, [x2, #27, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85835045 // ldr z5, [x2, #28, MUL VL] + WORD $0x85835446 // ldr z6, [x2, #29, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85835845 // ldr z5, [x2, #30, MUL VL] + WORD $0x85835c46 // ldr z6, [x2, #31, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 4 to 4 outputs + WORD $0x85804127 // ldr z7, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85844045 // ldr z5, [x2, #32, MUL VL] + WORD $0x85844446 // ldr z6, [x2, #33, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85844845 // ldr z5, [x2, #34, MUL VL] + WORD $0x85844c46 // ldr z6, [x2, #35, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85845045 // ldr z5, [x2, #36, MUL VL] + WORD $0x85845446 // ldr z6, [x2, #37, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85845845 // ldr z5, [x2, #38, MUL VL] + WORD $0x85845c46 // ldr z6, [x2, #39, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 5 to 4 outputs + WORD $0x85804147 // ldr z7, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85854045 // ldr z5, [x2, #40, MUL VL] + WORD $0x85854446 // ldr z6, [x2, #41, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85854845 // ldr z5, [x2, #42, MUL VL] + WORD $0x85854c46 // ldr z6, [x2, #43, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85855045 // ldr z5, [x2, #44, MUL VL] + WORD $0x85855446 // ldr z6, [x2, #45, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85855845 // ldr z5, [x2, #46, MUL VL] + WORD $0x85855c46 // ldr z6, [x2, #47, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 6 to 4 outputs + WORD $0x85804167 // ldr z7, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85864045 // ldr z5, [x2, #48, MUL VL] + WORD $0x85864446 // ldr z6, [x2, #49, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85864845 // ldr z5, [x2, #50, MUL VL] + WORD $0x85864c46 // ldr z6, [x2, #51, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85865045 // ldr z5, [x2, #52, MUL VL] + WORD $0x85865446 // ldr z6, [x2, #53, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85865845 // ldr z5, [x2, #54, MUL VL] + WORD $0x85865c46 // ldr z6, [x2, #55, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 7 to 4 outputs + WORD $0x85804187 // ldr z7, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85874045 // ldr z5, [x2, #56, MUL VL] + WORD $0x85874446 // ldr z6, [x2, #57, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85874845 // ldr z5, [x2, #58, MUL VL] + WORD $0x85874c46 // ldr z6, [x2, #59, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85875045 // ldr z5, [x2, #60, MUL VL] + WORD $0x85875446 // ldr z6, [x2, #61, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85875845 // ldr z5, [x2, #62, MUL VL] + WORD $0x85875c46 // ldr z6, [x2, #63, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 8 to 4 outputs + WORD $0x858041a7 // ldr z7, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85884045 // ldr z5, [x2, #64, MUL VL] + WORD $0x85884446 // ldr z6, [x2, #65, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85884845 // ldr z5, [x2, #66, MUL VL] + WORD $0x85884c46 // ldr z6, [x2, #67, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85885045 // ldr z5, [x2, #68, MUL VL] + WORD $0x85885446 // ldr z6, [x2, #69, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85885845 // ldr z5, [x2, #70, MUL VL] + WORD $0x85885c46 // ldr z6, [x2, #71, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x4_store + + // Load and process 32 bytes from input 9 to 4 outputs + WORD $0x85804067 // ldr z7, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85894045 // ldr z5, [x2, #72, MUL VL] + WORD $0x85894446 // ldr z6, [x2, #73, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85894845 // ldr z5, [x2, #74, MUL VL] + WORD $0x85894c46 // ldr z6, [x2, #75, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85895045 // ldr z5, [x2, #76, MUL VL] + WORD $0x85895446 // ldr z6, [x2, #77, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85895845 // ldr z5, [x2, #78, MUL VL] + WORD $0x85895c46 // ldr z6, [x2, #79, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + +mulSve_10x4_store: + // Store 4 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x4_loop + +mulSve_10x4_end: + RET + +// func mulSve_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x4Xor(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 89 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x4Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c4 // mov z4.d, x6 + WORD $0x05212084 // dup z4.b, z4.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x4Xor_loop: + // Load and process 32 bytes from input 0 to 4 outputs + WORD $0x85804027 // ldr z7, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + MOVD (R14), R6 + WORD $0xa5ef40c0 // ld1d { z0.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804045 // ldr z5, [x2] + WORD $0x85804446 // ldr z6, [x2, #1, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + MOVD 24(R14), R6 + WORD $0xa5ef40c1 // ld1d { z1.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804845 // ldr z5, [x2, #2, MUL VL] + WORD $0x85804c46 // ldr z6, [x2, #3, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + MOVD 48(R14), R6 + WORD $0xa5ef40c2 // ld1d { z2.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805045 // ldr z5, [x2, #4, MUL VL] + WORD $0x85805446 // ldr z6, [x2, #5, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + MOVD 72(R14), R6 + WORD $0xa5ef40c3 // ld1d { z3.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805845 // ldr z5, [x2, #6, MUL VL] + WORD $0x85805c46 // ldr z6, [x2, #7, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 1 to 4 outputs + WORD $0x85804087 // ldr z7, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85814045 // ldr z5, [x2, #8, MUL VL] + WORD $0x85814446 // ldr z6, [x2, #9, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85814845 // ldr z5, [x2, #10, MUL VL] + WORD $0x85814c46 // ldr z6, [x2, #11, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85815045 // ldr z5, [x2, #12, MUL VL] + WORD $0x85815446 // ldr z6, [x2, #13, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85815845 // ldr z5, [x2, #14, MUL VL] + WORD $0x85815c46 // ldr z6, [x2, #15, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 2 to 4 outputs + WORD $0x858040a7 // ldr z7, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85824045 // ldr z5, [x2, #16, MUL VL] + WORD $0x85824446 // ldr z6, [x2, #17, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85824845 // ldr z5, [x2, #18, MUL VL] + WORD $0x85824c46 // ldr z6, [x2, #19, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85825045 // ldr z5, [x2, #20, MUL VL] + WORD $0x85825446 // ldr z6, [x2, #21, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85825845 // ldr z5, [x2, #22, MUL VL] + WORD $0x85825c46 // ldr z6, [x2, #23, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 3 to 4 outputs + WORD $0x85804107 // ldr z7, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85834045 // ldr z5, [x2, #24, MUL VL] + WORD $0x85834446 // ldr z6, [x2, #25, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85834845 // ldr z5, [x2, #26, MUL VL] + WORD $0x85834c46 // ldr z6, [x2, #27, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85835045 // ldr z5, [x2, #28, MUL VL] + WORD $0x85835446 // ldr z6, [x2, #29, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85835845 // ldr z5, [x2, #30, MUL VL] + WORD $0x85835c46 // ldr z6, [x2, #31, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 4 to 4 outputs + WORD $0x85804127 // ldr z7, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85844045 // ldr z5, [x2, #32, MUL VL] + WORD $0x85844446 // ldr z6, [x2, #33, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85844845 // ldr z5, [x2, #34, MUL VL] + WORD $0x85844c46 // ldr z6, [x2, #35, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85845045 // ldr z5, [x2, #36, MUL VL] + WORD $0x85845446 // ldr z6, [x2, #37, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85845845 // ldr z5, [x2, #38, MUL VL] + WORD $0x85845c46 // ldr z6, [x2, #39, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 5 to 4 outputs + WORD $0x85804147 // ldr z7, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85854045 // ldr z5, [x2, #40, MUL VL] + WORD $0x85854446 // ldr z6, [x2, #41, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85854845 // ldr z5, [x2, #42, MUL VL] + WORD $0x85854c46 // ldr z6, [x2, #43, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85855045 // ldr z5, [x2, #44, MUL VL] + WORD $0x85855446 // ldr z6, [x2, #45, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85855845 // ldr z5, [x2, #46, MUL VL] + WORD $0x85855c46 // ldr z6, [x2, #47, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 6 to 4 outputs + WORD $0x85804167 // ldr z7, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85864045 // ldr z5, [x2, #48, MUL VL] + WORD $0x85864446 // ldr z6, [x2, #49, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85864845 // ldr z5, [x2, #50, MUL VL] + WORD $0x85864c46 // ldr z6, [x2, #51, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85865045 // ldr z5, [x2, #52, MUL VL] + WORD $0x85865446 // ldr z6, [x2, #53, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85865845 // ldr z5, [x2, #54, MUL VL] + WORD $0x85865c46 // ldr z6, [x2, #55, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 7 to 4 outputs + WORD $0x85804187 // ldr z7, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85874045 // ldr z5, [x2, #56, MUL VL] + WORD $0x85874446 // ldr z6, [x2, #57, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85874845 // ldr z5, [x2, #58, MUL VL] + WORD $0x85874c46 // ldr z6, [x2, #59, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85875045 // ldr z5, [x2, #60, MUL VL] + WORD $0x85875446 // ldr z6, [x2, #61, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85875845 // ldr z5, [x2, #62, MUL VL] + WORD $0x85875c46 // ldr z6, [x2, #63, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 8 to 4 outputs + WORD $0x858041a7 // ldr z7, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85884045 // ldr z5, [x2, #64, MUL VL] + WORD $0x85884446 // ldr z6, [x2, #65, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85884845 // ldr z5, [x2, #66, MUL VL] + WORD $0x85884c46 // ldr z6, [x2, #67, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85885045 // ldr z5, [x2, #68, MUL VL] + WORD $0x85885446 // ldr z6, [x2, #69, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85885845 // ldr z5, [x2, #70, MUL VL] + WORD $0x85885c46 // ldr z6, [x2, #71, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x4Xor_store + + // Load and process 32 bytes from input 9 to 4 outputs + WORD $0x85804067 // ldr z7, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc94e8 // lsr z8.d, z7.d, #4 + WORD $0x042430e7 // and z7.d, z7.d, z4.d + WORD $0x04243108 // and z8.d, z8.d, z4.d + WORD $0x85894045 // ldr z5, [x2, #72, MUL VL] + WORD $0x85894446 // ldr z6, [x2, #73, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53000 // eor z0.d, z0.d, z5.d + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x85894845 // ldr z5, [x2, #74, MUL VL] + WORD $0x85894c46 // ldr z6, [x2, #75, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53021 // eor z1.d, z1.d, z5.d + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x85895045 // ldr z5, [x2, #76, MUL VL] + WORD $0x85895446 // ldr z6, [x2, #77, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53042 // eor z2.d, z2.d, z5.d + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x85895845 // ldr z5, [x2, #78, MUL VL] + WORD $0x85895c46 // ldr z6, [x2, #79, MUL VL] + WORD $0x052730a5 // tbl z5.b, z5.b, z7.b + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x04a53063 // eor z3.d, z3.d, z5.d + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + +mulSve_10x4Xor_store: + // Store 4 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x4Xor_loop + +mulSve_10x4Xor_end: + RET + +// func mulSve_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x5(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 110 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x5_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c5 // mov z5.d, x6 + WORD $0x052120a5 // dup z5.b, z5.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + WORD $0x85804028 // ldr z8, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85804046 // ldr z6, [x2] + WORD $0x85804447 // ldr z7, [x2, #1, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a630e0 // eor z0.d, z7.d, z6.d + WORD $0x85804846 // ldr z6, [x2, #2, MUL VL] + WORD $0x85804c47 // ldr z7, [x2, #3, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a630e1 // eor z1.d, z7.d, z6.d + WORD $0x85805046 // ldr z6, [x2, #4, MUL VL] + WORD $0x85805447 // ldr z7, [x2, #5, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a630e2 // eor z2.d, z7.d, z6.d + WORD $0x85805846 // ldr z6, [x2, #6, MUL VL] + WORD $0x85805c47 // ldr z7, [x2, #7, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a630e3 // eor z3.d, z7.d, z6.d + WORD $0x85814046 // ldr z6, [x2, #8, MUL VL] + WORD $0x85814447 // ldr z7, [x2, #9, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a630e4 // eor z4.d, z7.d, z6.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 1 to 5 outputs + WORD $0x85804088 // ldr z8, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85814846 // ldr z6, [x2, #10, MUL VL] + WORD $0x85814c47 // ldr z7, [x2, #11, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85815046 // ldr z6, [x2, #12, MUL VL] + WORD $0x85815447 // ldr z7, [x2, #13, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85815846 // ldr z6, [x2, #14, MUL VL] + WORD $0x85815c47 // ldr z7, [x2, #15, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85824046 // ldr z6, [x2, #16, MUL VL] + WORD $0x85824447 // ldr z7, [x2, #17, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85824846 // ldr z6, [x2, #18, MUL VL] + WORD $0x85824c47 // ldr z7, [x2, #19, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 2 to 5 outputs + WORD $0x858040a8 // ldr z8, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85825046 // ldr z6, [x2, #20, MUL VL] + WORD $0x85825447 // ldr z7, [x2, #21, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85825846 // ldr z6, [x2, #22, MUL VL] + WORD $0x85825c47 // ldr z7, [x2, #23, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85834046 // ldr z6, [x2, #24, MUL VL] + WORD $0x85834447 // ldr z7, [x2, #25, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85834846 // ldr z6, [x2, #26, MUL VL] + WORD $0x85834c47 // ldr z7, [x2, #27, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85835046 // ldr z6, [x2, #28, MUL VL] + WORD $0x85835447 // ldr z7, [x2, #29, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 3 to 5 outputs + WORD $0x85804108 // ldr z8, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85835846 // ldr z6, [x2, #30, MUL VL] + WORD $0x85835c47 // ldr z7, [x2, #31, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85844046 // ldr z6, [x2, #32, MUL VL] + WORD $0x85844447 // ldr z7, [x2, #33, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85844846 // ldr z6, [x2, #34, MUL VL] + WORD $0x85844c47 // ldr z7, [x2, #35, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85845046 // ldr z6, [x2, #36, MUL VL] + WORD $0x85845447 // ldr z7, [x2, #37, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85845846 // ldr z6, [x2, #38, MUL VL] + WORD $0x85845c47 // ldr z7, [x2, #39, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 4 to 5 outputs + WORD $0x85804128 // ldr z8, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85854046 // ldr z6, [x2, #40, MUL VL] + WORD $0x85854447 // ldr z7, [x2, #41, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85854846 // ldr z6, [x2, #42, MUL VL] + WORD $0x85854c47 // ldr z7, [x2, #43, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85855046 // ldr z6, [x2, #44, MUL VL] + WORD $0x85855447 // ldr z7, [x2, #45, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85855846 // ldr z6, [x2, #46, MUL VL] + WORD $0x85855c47 // ldr z7, [x2, #47, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85864046 // ldr z6, [x2, #48, MUL VL] + WORD $0x85864447 // ldr z7, [x2, #49, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 5 to 5 outputs + WORD $0x85804148 // ldr z8, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85864846 // ldr z6, [x2, #50, MUL VL] + WORD $0x85864c47 // ldr z7, [x2, #51, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85865046 // ldr z6, [x2, #52, MUL VL] + WORD $0x85865447 // ldr z7, [x2, #53, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85865846 // ldr z6, [x2, #54, MUL VL] + WORD $0x85865c47 // ldr z7, [x2, #55, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85874046 // ldr z6, [x2, #56, MUL VL] + WORD $0x85874447 // ldr z7, [x2, #57, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85874846 // ldr z6, [x2, #58, MUL VL] + WORD $0x85874c47 // ldr z7, [x2, #59, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 6 to 5 outputs + WORD $0x85804168 // ldr z8, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85875046 // ldr z6, [x2, #60, MUL VL] + WORD $0x85875447 // ldr z7, [x2, #61, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85875846 // ldr z6, [x2, #62, MUL VL] + WORD $0x85875c47 // ldr z7, [x2, #63, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85884046 // ldr z6, [x2, #64, MUL VL] + WORD $0x85884447 // ldr z7, [x2, #65, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85884846 // ldr z6, [x2, #66, MUL VL] + WORD $0x85884c47 // ldr z7, [x2, #67, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85885046 // ldr z6, [x2, #68, MUL VL] + WORD $0x85885447 // ldr z7, [x2, #69, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 7 to 5 outputs + WORD $0x85804188 // ldr z8, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85885846 // ldr z6, [x2, #70, MUL VL] + WORD $0x85885c47 // ldr z7, [x2, #71, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85894046 // ldr z6, [x2, #72, MUL VL] + WORD $0x85894447 // ldr z7, [x2, #73, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85894846 // ldr z6, [x2, #74, MUL VL] + WORD $0x85894c47 // ldr z7, [x2, #75, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85895046 // ldr z6, [x2, #76, MUL VL] + WORD $0x85895447 // ldr z7, [x2, #77, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85895846 // ldr z6, [x2, #78, MUL VL] + WORD $0x85895c47 // ldr z7, [x2, #79, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 8 to 5 outputs + WORD $0x858041a8 // ldr z8, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x858a4046 // ldr z6, [x2, #80, MUL VL] + WORD $0x858a4447 // ldr z7, [x2, #81, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x858a4846 // ldr z6, [x2, #82, MUL VL] + WORD $0x858a4c47 // ldr z7, [x2, #83, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x858a5046 // ldr z6, [x2, #84, MUL VL] + WORD $0x858a5447 // ldr z7, [x2, #85, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x858a5846 // ldr z6, [x2, #86, MUL VL] + WORD $0x858a5c47 // ldr z7, [x2, #87, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x858b4046 // ldr z6, [x2, #88, MUL VL] + WORD $0x858b4447 // ldr z7, [x2, #89, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x5_store + + // Load and process 32 bytes from input 9 to 5 outputs + WORD $0x85804068 // ldr z8, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x858b4846 // ldr z6, [x2, #90, MUL VL] + WORD $0x858b4c47 // ldr z7, [x2, #91, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x858b5046 // ldr z6, [x2, #92, MUL VL] + WORD $0x858b5447 // ldr z7, [x2, #93, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x858b5846 // ldr z6, [x2, #94, MUL VL] + WORD $0x858b5c47 // ldr z7, [x2, #95, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x858c4046 // ldr z6, [x2, #96, MUL VL] + WORD $0x858c4447 // ldr z7, [x2, #97, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x858c4846 // ldr z6, [x2, #98, MUL VL] + WORD $0x858c4c47 // ldr z7, [x2, #99, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + +mulSve_10x5_store: + // Store 5 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x5_loop + +mulSve_10x5_end: + RET + +// func mulSve_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x5Xor(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 110 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x5Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c5 // mov z5.d, x6 + WORD $0x052120a5 // dup z5.b, z5.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x5Xor_loop: + // Load and process 32 bytes from input 0 to 5 outputs + WORD $0x85804028 // ldr z8, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + MOVD (R14), R6 + WORD $0xa5ef40c0 // ld1d { z0.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804046 // ldr z6, [x2] + WORD $0x85804447 // ldr z7, [x2, #1, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + MOVD 24(R14), R6 + WORD $0xa5ef40c1 // ld1d { z1.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804846 // ldr z6, [x2, #2, MUL VL] + WORD $0x85804c47 // ldr z7, [x2, #3, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + MOVD 48(R14), R6 + WORD $0xa5ef40c2 // ld1d { z2.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805046 // ldr z6, [x2, #4, MUL VL] + WORD $0x85805447 // ldr z7, [x2, #5, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + MOVD 72(R14), R6 + WORD $0xa5ef40c3 // ld1d { z3.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805846 // ldr z6, [x2, #6, MUL VL] + WORD $0x85805c47 // ldr z7, [x2, #7, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + MOVD 96(R14), R6 + WORD $0xa5ef40c4 // ld1d { z4.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85814046 // ldr z6, [x2, #8, MUL VL] + WORD $0x85814447 // ldr z7, [x2, #9, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 1 to 5 outputs + WORD $0x85804088 // ldr z8, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85814846 // ldr z6, [x2, #10, MUL VL] + WORD $0x85814c47 // ldr z7, [x2, #11, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85815046 // ldr z6, [x2, #12, MUL VL] + WORD $0x85815447 // ldr z7, [x2, #13, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85815846 // ldr z6, [x2, #14, MUL VL] + WORD $0x85815c47 // ldr z7, [x2, #15, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85824046 // ldr z6, [x2, #16, MUL VL] + WORD $0x85824447 // ldr z7, [x2, #17, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85824846 // ldr z6, [x2, #18, MUL VL] + WORD $0x85824c47 // ldr z7, [x2, #19, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 2 to 5 outputs + WORD $0x858040a8 // ldr z8, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85825046 // ldr z6, [x2, #20, MUL VL] + WORD $0x85825447 // ldr z7, [x2, #21, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85825846 // ldr z6, [x2, #22, MUL VL] + WORD $0x85825c47 // ldr z7, [x2, #23, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85834046 // ldr z6, [x2, #24, MUL VL] + WORD $0x85834447 // ldr z7, [x2, #25, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85834846 // ldr z6, [x2, #26, MUL VL] + WORD $0x85834c47 // ldr z7, [x2, #27, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85835046 // ldr z6, [x2, #28, MUL VL] + WORD $0x85835447 // ldr z7, [x2, #29, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 3 to 5 outputs + WORD $0x85804108 // ldr z8, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85835846 // ldr z6, [x2, #30, MUL VL] + WORD $0x85835c47 // ldr z7, [x2, #31, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85844046 // ldr z6, [x2, #32, MUL VL] + WORD $0x85844447 // ldr z7, [x2, #33, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85844846 // ldr z6, [x2, #34, MUL VL] + WORD $0x85844c47 // ldr z7, [x2, #35, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85845046 // ldr z6, [x2, #36, MUL VL] + WORD $0x85845447 // ldr z7, [x2, #37, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85845846 // ldr z6, [x2, #38, MUL VL] + WORD $0x85845c47 // ldr z7, [x2, #39, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 4 to 5 outputs + WORD $0x85804128 // ldr z8, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85854046 // ldr z6, [x2, #40, MUL VL] + WORD $0x85854447 // ldr z7, [x2, #41, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85854846 // ldr z6, [x2, #42, MUL VL] + WORD $0x85854c47 // ldr z7, [x2, #43, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85855046 // ldr z6, [x2, #44, MUL VL] + WORD $0x85855447 // ldr z7, [x2, #45, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85855846 // ldr z6, [x2, #46, MUL VL] + WORD $0x85855c47 // ldr z7, [x2, #47, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85864046 // ldr z6, [x2, #48, MUL VL] + WORD $0x85864447 // ldr z7, [x2, #49, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 5 to 5 outputs + WORD $0x85804148 // ldr z8, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85864846 // ldr z6, [x2, #50, MUL VL] + WORD $0x85864c47 // ldr z7, [x2, #51, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85865046 // ldr z6, [x2, #52, MUL VL] + WORD $0x85865447 // ldr z7, [x2, #53, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85865846 // ldr z6, [x2, #54, MUL VL] + WORD $0x85865c47 // ldr z7, [x2, #55, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85874046 // ldr z6, [x2, #56, MUL VL] + WORD $0x85874447 // ldr z7, [x2, #57, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85874846 // ldr z6, [x2, #58, MUL VL] + WORD $0x85874c47 // ldr z7, [x2, #59, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 6 to 5 outputs + WORD $0x85804168 // ldr z8, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85875046 // ldr z6, [x2, #60, MUL VL] + WORD $0x85875447 // ldr z7, [x2, #61, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85875846 // ldr z6, [x2, #62, MUL VL] + WORD $0x85875c47 // ldr z7, [x2, #63, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85884046 // ldr z6, [x2, #64, MUL VL] + WORD $0x85884447 // ldr z7, [x2, #65, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85884846 // ldr z6, [x2, #66, MUL VL] + WORD $0x85884c47 // ldr z7, [x2, #67, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85885046 // ldr z6, [x2, #68, MUL VL] + WORD $0x85885447 // ldr z7, [x2, #69, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 7 to 5 outputs + WORD $0x85804188 // ldr z8, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x85885846 // ldr z6, [x2, #70, MUL VL] + WORD $0x85885c47 // ldr z7, [x2, #71, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x85894046 // ldr z6, [x2, #72, MUL VL] + WORD $0x85894447 // ldr z7, [x2, #73, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x85894846 // ldr z6, [x2, #74, MUL VL] + WORD $0x85894c47 // ldr z7, [x2, #75, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x85895046 // ldr z6, [x2, #76, MUL VL] + WORD $0x85895447 // ldr z7, [x2, #77, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x85895846 // ldr z6, [x2, #78, MUL VL] + WORD $0x85895c47 // ldr z7, [x2, #79, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 8 to 5 outputs + WORD $0x858041a8 // ldr z8, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x858a4046 // ldr z6, [x2, #80, MUL VL] + WORD $0x858a4447 // ldr z7, [x2, #81, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x858a4846 // ldr z6, [x2, #82, MUL VL] + WORD $0x858a4c47 // ldr z7, [x2, #83, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x858a5046 // ldr z6, [x2, #84, MUL VL] + WORD $0x858a5447 // ldr z7, [x2, #85, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x858a5846 // ldr z6, [x2, #86, MUL VL] + WORD $0x858a5c47 // ldr z7, [x2, #87, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x858b4046 // ldr z6, [x2, #88, MUL VL] + WORD $0x858b4447 // ldr z7, [x2, #89, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x5Xor_store + + // Load and process 32 bytes from input 9 to 5 outputs + WORD $0x85804068 // ldr z8, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc9509 // lsr z9.d, z8.d, #4 + WORD $0x04253108 // and z8.d, z8.d, z5.d + WORD $0x04253129 // and z9.d, z9.d, z5.d + WORD $0x858b4846 // ldr z6, [x2, #90, MUL VL] + WORD $0x858b4c47 // ldr z7, [x2, #91, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63000 // eor z0.d, z0.d, z6.d + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x858b5046 // ldr z6, [x2, #92, MUL VL] + WORD $0x858b5447 // ldr z7, [x2, #93, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63021 // eor z1.d, z1.d, z6.d + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x858b5846 // ldr z6, [x2, #94, MUL VL] + WORD $0x858b5c47 // ldr z7, [x2, #95, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63042 // eor z2.d, z2.d, z6.d + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x858c4046 // ldr z6, [x2, #96, MUL VL] + WORD $0x858c4447 // ldr z7, [x2, #97, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63063 // eor z3.d, z3.d, z6.d + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x858c4846 // ldr z6, [x2, #98, MUL VL] + WORD $0x858c4c47 // ldr z7, [x2, #99, MUL VL] + WORD $0x052830c6 // tbl z6.b, z6.b, z8.b + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x04a63084 // eor z4.d, z4.d, z6.d + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + +mulSve_10x5Xor_store: + // Store 5 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x5Xor_loop + +mulSve_10x5Xor_end: + RET + +// func mulSve_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x6(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 131 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x6_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c6 // mov z6.d, x6 + WORD $0x052120c6 // dup z6.b, z6.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + WORD $0x85804029 // ldr z9, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85804047 // ldr z7, [x2] + WORD $0x85804448 // ldr z8, [x2, #1, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73100 // eor z0.d, z8.d, z7.d + WORD $0x85804847 // ldr z7, [x2, #2, MUL VL] + WORD $0x85804c48 // ldr z8, [x2, #3, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73101 // eor z1.d, z8.d, z7.d + WORD $0x85805047 // ldr z7, [x2, #4, MUL VL] + WORD $0x85805448 // ldr z8, [x2, #5, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73102 // eor z2.d, z8.d, z7.d + WORD $0x85805847 // ldr z7, [x2, #6, MUL VL] + WORD $0x85805c48 // ldr z8, [x2, #7, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73103 // eor z3.d, z8.d, z7.d + WORD $0x85814047 // ldr z7, [x2, #8, MUL VL] + WORD $0x85814448 // ldr z8, [x2, #9, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73104 // eor z4.d, z8.d, z7.d + WORD $0x85814847 // ldr z7, [x2, #10, MUL VL] + WORD $0x85814c48 // ldr z8, [x2, #11, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73105 // eor z5.d, z8.d, z7.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 1 to 6 outputs + WORD $0x85804089 // ldr z9, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85815047 // ldr z7, [x2, #12, MUL VL] + WORD $0x85815448 // ldr z8, [x2, #13, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85815847 // ldr z7, [x2, #14, MUL VL] + WORD $0x85815c48 // ldr z8, [x2, #15, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85824047 // ldr z7, [x2, #16, MUL VL] + WORD $0x85824448 // ldr z8, [x2, #17, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85824847 // ldr z7, [x2, #18, MUL VL] + WORD $0x85824c48 // ldr z8, [x2, #19, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85825047 // ldr z7, [x2, #20, MUL VL] + WORD $0x85825448 // ldr z8, [x2, #21, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85825847 // ldr z7, [x2, #22, MUL VL] + WORD $0x85825c48 // ldr z8, [x2, #23, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 2 to 6 outputs + WORD $0x858040a9 // ldr z9, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85834047 // ldr z7, [x2, #24, MUL VL] + WORD $0x85834448 // ldr z8, [x2, #25, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85834847 // ldr z7, [x2, #26, MUL VL] + WORD $0x85834c48 // ldr z8, [x2, #27, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85835047 // ldr z7, [x2, #28, MUL VL] + WORD $0x85835448 // ldr z8, [x2, #29, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85835847 // ldr z7, [x2, #30, MUL VL] + WORD $0x85835c48 // ldr z8, [x2, #31, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85844047 // ldr z7, [x2, #32, MUL VL] + WORD $0x85844448 // ldr z8, [x2, #33, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85844847 // ldr z7, [x2, #34, MUL VL] + WORD $0x85844c48 // ldr z8, [x2, #35, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 3 to 6 outputs + WORD $0x85804109 // ldr z9, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85845047 // ldr z7, [x2, #36, MUL VL] + WORD $0x85845448 // ldr z8, [x2, #37, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85845847 // ldr z7, [x2, #38, MUL VL] + WORD $0x85845c48 // ldr z8, [x2, #39, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85854047 // ldr z7, [x2, #40, MUL VL] + WORD $0x85854448 // ldr z8, [x2, #41, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85854847 // ldr z7, [x2, #42, MUL VL] + WORD $0x85854c48 // ldr z8, [x2, #43, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85855047 // ldr z7, [x2, #44, MUL VL] + WORD $0x85855448 // ldr z8, [x2, #45, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85855847 // ldr z7, [x2, #46, MUL VL] + WORD $0x85855c48 // ldr z8, [x2, #47, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 4 to 6 outputs + WORD $0x85804129 // ldr z9, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85864047 // ldr z7, [x2, #48, MUL VL] + WORD $0x85864448 // ldr z8, [x2, #49, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85864847 // ldr z7, [x2, #50, MUL VL] + WORD $0x85864c48 // ldr z8, [x2, #51, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85865047 // ldr z7, [x2, #52, MUL VL] + WORD $0x85865448 // ldr z8, [x2, #53, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85865847 // ldr z7, [x2, #54, MUL VL] + WORD $0x85865c48 // ldr z8, [x2, #55, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85874047 // ldr z7, [x2, #56, MUL VL] + WORD $0x85874448 // ldr z8, [x2, #57, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85874847 // ldr z7, [x2, #58, MUL VL] + WORD $0x85874c48 // ldr z8, [x2, #59, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 5 to 6 outputs + WORD $0x85804149 // ldr z9, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85875047 // ldr z7, [x2, #60, MUL VL] + WORD $0x85875448 // ldr z8, [x2, #61, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85875847 // ldr z7, [x2, #62, MUL VL] + WORD $0x85875c48 // ldr z8, [x2, #63, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85884047 // ldr z7, [x2, #64, MUL VL] + WORD $0x85884448 // ldr z8, [x2, #65, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85884847 // ldr z7, [x2, #66, MUL VL] + WORD $0x85884c48 // ldr z8, [x2, #67, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85885047 // ldr z7, [x2, #68, MUL VL] + WORD $0x85885448 // ldr z8, [x2, #69, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85885847 // ldr z7, [x2, #70, MUL VL] + WORD $0x85885c48 // ldr z8, [x2, #71, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 6 to 6 outputs + WORD $0x85804169 // ldr z9, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85894047 // ldr z7, [x2, #72, MUL VL] + WORD $0x85894448 // ldr z8, [x2, #73, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85894847 // ldr z7, [x2, #74, MUL VL] + WORD $0x85894c48 // ldr z8, [x2, #75, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85895047 // ldr z7, [x2, #76, MUL VL] + WORD $0x85895448 // ldr z8, [x2, #77, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85895847 // ldr z7, [x2, #78, MUL VL] + WORD $0x85895c48 // ldr z8, [x2, #79, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x858a4047 // ldr z7, [x2, #80, MUL VL] + WORD $0x858a4448 // ldr z8, [x2, #81, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x858a4847 // ldr z7, [x2, #82, MUL VL] + WORD $0x858a4c48 // ldr z8, [x2, #83, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 7 to 6 outputs + WORD $0x85804189 // ldr z9, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x858a5047 // ldr z7, [x2, #84, MUL VL] + WORD $0x858a5448 // ldr z8, [x2, #85, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x858a5847 // ldr z7, [x2, #86, MUL VL] + WORD $0x858a5c48 // ldr z8, [x2, #87, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x858b4047 // ldr z7, [x2, #88, MUL VL] + WORD $0x858b4448 // ldr z8, [x2, #89, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x858b4847 // ldr z7, [x2, #90, MUL VL] + WORD $0x858b4c48 // ldr z8, [x2, #91, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x858b5047 // ldr z7, [x2, #92, MUL VL] + WORD $0x858b5448 // ldr z8, [x2, #93, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x858b5847 // ldr z7, [x2, #94, MUL VL] + WORD $0x858b5c48 // ldr z8, [x2, #95, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 8 to 6 outputs + WORD $0x858041a9 // ldr z9, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x858c4047 // ldr z7, [x2, #96, MUL VL] + WORD $0x858c4448 // ldr z8, [x2, #97, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x858c4847 // ldr z7, [x2, #98, MUL VL] + WORD $0x858c4c48 // ldr z8, [x2, #99, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x858c5047 // ldr z7, [x2, #100, MUL VL] + WORD $0x858c5448 // ldr z8, [x2, #101, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x858c5847 // ldr z7, [x2, #102, MUL VL] + WORD $0x858c5c48 // ldr z8, [x2, #103, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x858d4047 // ldr z7, [x2, #104, MUL VL] + WORD $0x858d4448 // ldr z8, [x2, #105, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x858d4847 // ldr z7, [x2, #106, MUL VL] + WORD $0x858d4c48 // ldr z8, [x2, #107, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x6_store + + // Load and process 32 bytes from input 9 to 6 outputs + WORD $0x85804069 // ldr z9, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x858d5047 // ldr z7, [x2, #108, MUL VL] + WORD $0x858d5448 // ldr z8, [x2, #109, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x858d5847 // ldr z7, [x2, #110, MUL VL] + WORD $0x858d5c48 // ldr z8, [x2, #111, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x858e4047 // ldr z7, [x2, #112, MUL VL] + WORD $0x858e4448 // ldr z8, [x2, #113, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x858e4847 // ldr z7, [x2, #114, MUL VL] + WORD $0x858e4c48 // ldr z8, [x2, #115, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x858e5047 // ldr z7, [x2, #116, MUL VL] + WORD $0x858e5448 // ldr z8, [x2, #117, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x858e5847 // ldr z7, [x2, #118, MUL VL] + WORD $0x858e5c48 // ldr z8, [x2, #119, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + +mulSve_10x6_store: + // Store 6 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x6_loop + +mulSve_10x6_end: + RET + +// func mulSve_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x6Xor(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 131 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x6Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c6 // mov z6.d, x6 + WORD $0x052120c6 // dup z6.b, z6.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x6Xor_loop: + // Load and process 32 bytes from input 0 to 6 outputs + WORD $0x85804029 // ldr z9, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + MOVD (R14), R6 + WORD $0xa5ef40c0 // ld1d { z0.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804047 // ldr z7, [x2] + WORD $0x85804448 // ldr z8, [x2, #1, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + MOVD 24(R14), R6 + WORD $0xa5ef40c1 // ld1d { z1.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804847 // ldr z7, [x2, #2, MUL VL] + WORD $0x85804c48 // ldr z8, [x2, #3, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + MOVD 48(R14), R6 + WORD $0xa5ef40c2 // ld1d { z2.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805047 // ldr z7, [x2, #4, MUL VL] + WORD $0x85805448 // ldr z8, [x2, #5, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + MOVD 72(R14), R6 + WORD $0xa5ef40c3 // ld1d { z3.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805847 // ldr z7, [x2, #6, MUL VL] + WORD $0x85805c48 // ldr z8, [x2, #7, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + MOVD 96(R14), R6 + WORD $0xa5ef40c4 // ld1d { z4.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85814047 // ldr z7, [x2, #8, MUL VL] + WORD $0x85814448 // ldr z8, [x2, #9, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + MOVD 120(R14), R6 + WORD $0xa5ef40c5 // ld1d { z5.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85814847 // ldr z7, [x2, #10, MUL VL] + WORD $0x85814c48 // ldr z8, [x2, #11, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 1 to 6 outputs + WORD $0x85804089 // ldr z9, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85815047 // ldr z7, [x2, #12, MUL VL] + WORD $0x85815448 // ldr z8, [x2, #13, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85815847 // ldr z7, [x2, #14, MUL VL] + WORD $0x85815c48 // ldr z8, [x2, #15, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85824047 // ldr z7, [x2, #16, MUL VL] + WORD $0x85824448 // ldr z8, [x2, #17, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85824847 // ldr z7, [x2, #18, MUL VL] + WORD $0x85824c48 // ldr z8, [x2, #19, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85825047 // ldr z7, [x2, #20, MUL VL] + WORD $0x85825448 // ldr z8, [x2, #21, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85825847 // ldr z7, [x2, #22, MUL VL] + WORD $0x85825c48 // ldr z8, [x2, #23, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 2 to 6 outputs + WORD $0x858040a9 // ldr z9, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85834047 // ldr z7, [x2, #24, MUL VL] + WORD $0x85834448 // ldr z8, [x2, #25, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85834847 // ldr z7, [x2, #26, MUL VL] + WORD $0x85834c48 // ldr z8, [x2, #27, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85835047 // ldr z7, [x2, #28, MUL VL] + WORD $0x85835448 // ldr z8, [x2, #29, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85835847 // ldr z7, [x2, #30, MUL VL] + WORD $0x85835c48 // ldr z8, [x2, #31, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85844047 // ldr z7, [x2, #32, MUL VL] + WORD $0x85844448 // ldr z8, [x2, #33, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85844847 // ldr z7, [x2, #34, MUL VL] + WORD $0x85844c48 // ldr z8, [x2, #35, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 3 to 6 outputs + WORD $0x85804109 // ldr z9, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85845047 // ldr z7, [x2, #36, MUL VL] + WORD $0x85845448 // ldr z8, [x2, #37, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85845847 // ldr z7, [x2, #38, MUL VL] + WORD $0x85845c48 // ldr z8, [x2, #39, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85854047 // ldr z7, [x2, #40, MUL VL] + WORD $0x85854448 // ldr z8, [x2, #41, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85854847 // ldr z7, [x2, #42, MUL VL] + WORD $0x85854c48 // ldr z8, [x2, #43, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85855047 // ldr z7, [x2, #44, MUL VL] + WORD $0x85855448 // ldr z8, [x2, #45, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85855847 // ldr z7, [x2, #46, MUL VL] + WORD $0x85855c48 // ldr z8, [x2, #47, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 4 to 6 outputs + WORD $0x85804129 // ldr z9, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85864047 // ldr z7, [x2, #48, MUL VL] + WORD $0x85864448 // ldr z8, [x2, #49, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85864847 // ldr z7, [x2, #50, MUL VL] + WORD $0x85864c48 // ldr z8, [x2, #51, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85865047 // ldr z7, [x2, #52, MUL VL] + WORD $0x85865448 // ldr z8, [x2, #53, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85865847 // ldr z7, [x2, #54, MUL VL] + WORD $0x85865c48 // ldr z8, [x2, #55, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85874047 // ldr z7, [x2, #56, MUL VL] + WORD $0x85874448 // ldr z8, [x2, #57, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85874847 // ldr z7, [x2, #58, MUL VL] + WORD $0x85874c48 // ldr z8, [x2, #59, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 5 to 6 outputs + WORD $0x85804149 // ldr z9, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85875047 // ldr z7, [x2, #60, MUL VL] + WORD $0x85875448 // ldr z8, [x2, #61, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85875847 // ldr z7, [x2, #62, MUL VL] + WORD $0x85875c48 // ldr z8, [x2, #63, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85884047 // ldr z7, [x2, #64, MUL VL] + WORD $0x85884448 // ldr z8, [x2, #65, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85884847 // ldr z7, [x2, #66, MUL VL] + WORD $0x85884c48 // ldr z8, [x2, #67, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x85885047 // ldr z7, [x2, #68, MUL VL] + WORD $0x85885448 // ldr z8, [x2, #69, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x85885847 // ldr z7, [x2, #70, MUL VL] + WORD $0x85885c48 // ldr z8, [x2, #71, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 6 to 6 outputs + WORD $0x85804169 // ldr z9, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x85894047 // ldr z7, [x2, #72, MUL VL] + WORD $0x85894448 // ldr z8, [x2, #73, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x85894847 // ldr z7, [x2, #74, MUL VL] + WORD $0x85894c48 // ldr z8, [x2, #75, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x85895047 // ldr z7, [x2, #76, MUL VL] + WORD $0x85895448 // ldr z8, [x2, #77, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x85895847 // ldr z7, [x2, #78, MUL VL] + WORD $0x85895c48 // ldr z8, [x2, #79, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x858a4047 // ldr z7, [x2, #80, MUL VL] + WORD $0x858a4448 // ldr z8, [x2, #81, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x858a4847 // ldr z7, [x2, #82, MUL VL] + WORD $0x858a4c48 // ldr z8, [x2, #83, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 7 to 6 outputs + WORD $0x85804189 // ldr z9, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x858a5047 // ldr z7, [x2, #84, MUL VL] + WORD $0x858a5448 // ldr z8, [x2, #85, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x858a5847 // ldr z7, [x2, #86, MUL VL] + WORD $0x858a5c48 // ldr z8, [x2, #87, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x858b4047 // ldr z7, [x2, #88, MUL VL] + WORD $0x858b4448 // ldr z8, [x2, #89, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x858b4847 // ldr z7, [x2, #90, MUL VL] + WORD $0x858b4c48 // ldr z8, [x2, #91, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x858b5047 // ldr z7, [x2, #92, MUL VL] + WORD $0x858b5448 // ldr z8, [x2, #93, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x858b5847 // ldr z7, [x2, #94, MUL VL] + WORD $0x858b5c48 // ldr z8, [x2, #95, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 8 to 6 outputs + WORD $0x858041a9 // ldr z9, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x858c4047 // ldr z7, [x2, #96, MUL VL] + WORD $0x858c4448 // ldr z8, [x2, #97, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x858c4847 // ldr z7, [x2, #98, MUL VL] + WORD $0x858c4c48 // ldr z8, [x2, #99, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x858c5047 // ldr z7, [x2, #100, MUL VL] + WORD $0x858c5448 // ldr z8, [x2, #101, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x858c5847 // ldr z7, [x2, #102, MUL VL] + WORD $0x858c5c48 // ldr z8, [x2, #103, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x858d4047 // ldr z7, [x2, #104, MUL VL] + WORD $0x858d4448 // ldr z8, [x2, #105, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x858d4847 // ldr z7, [x2, #106, MUL VL] + WORD $0x858d4c48 // ldr z8, [x2, #107, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x6Xor_store + + // Load and process 32 bytes from input 9 to 6 outputs + WORD $0x85804069 // ldr z9, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc952a // lsr z10.d, z9.d, #4 + WORD $0x04263129 // and z9.d, z9.d, z6.d + WORD $0x0426314a // and z10.d, z10.d, z6.d + WORD $0x858d5047 // ldr z7, [x2, #108, MUL VL] + WORD $0x858d5448 // ldr z8, [x2, #109, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73000 // eor z0.d, z0.d, z7.d + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x858d5847 // ldr z7, [x2, #110, MUL VL] + WORD $0x858d5c48 // ldr z8, [x2, #111, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73021 // eor z1.d, z1.d, z7.d + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x858e4047 // ldr z7, [x2, #112, MUL VL] + WORD $0x858e4448 // ldr z8, [x2, #113, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73042 // eor z2.d, z2.d, z7.d + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x858e4847 // ldr z7, [x2, #114, MUL VL] + WORD $0x858e4c48 // ldr z8, [x2, #115, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73063 // eor z3.d, z3.d, z7.d + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x858e5047 // ldr z7, [x2, #116, MUL VL] + WORD $0x858e5448 // ldr z8, [x2, #117, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a73084 // eor z4.d, z4.d, z7.d + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x858e5847 // ldr z7, [x2, #118, MUL VL] + WORD $0x858e5c48 // ldr z8, [x2, #119, MUL VL] + WORD $0x052930e7 // tbl z7.b, z7.b, z9.b + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x04a730a5 // eor z5.d, z5.d, z7.d + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + +mulSve_10x6Xor_store: + // Store 6 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x6Xor_loop + +mulSve_10x6Xor_end: + RET + +// func mulSve_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x7(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 152 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x7_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c7 // mov z7.d, x6 + WORD $0x052120e7 // dup z7.b, z7.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + WORD $0x8580402a // ldr z10, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85804048 // ldr z8, [x2] + WORD $0x85804449 // ldr z9, [x2, #1, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83120 // eor z0.d, z9.d, z8.d + WORD $0x85804848 // ldr z8, [x2, #2, MUL VL] + WORD $0x85804c49 // ldr z9, [x2, #3, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83121 // eor z1.d, z9.d, z8.d + WORD $0x85805048 // ldr z8, [x2, #4, MUL VL] + WORD $0x85805449 // ldr z9, [x2, #5, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83122 // eor z2.d, z9.d, z8.d + WORD $0x85805848 // ldr z8, [x2, #6, MUL VL] + WORD $0x85805c49 // ldr z9, [x2, #7, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83123 // eor z3.d, z9.d, z8.d + WORD $0x85814048 // ldr z8, [x2, #8, MUL VL] + WORD $0x85814449 // ldr z9, [x2, #9, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83124 // eor z4.d, z9.d, z8.d + WORD $0x85814848 // ldr z8, [x2, #10, MUL VL] + WORD $0x85814c49 // ldr z9, [x2, #11, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83125 // eor z5.d, z9.d, z8.d + WORD $0x85815048 // ldr z8, [x2, #12, MUL VL] + WORD $0x85815449 // ldr z9, [x2, #13, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83126 // eor z6.d, z9.d, z8.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 1 to 7 outputs + WORD $0x8580408a // ldr z10, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85815848 // ldr z8, [x2, #14, MUL VL] + WORD $0x85815c49 // ldr z9, [x2, #15, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85824048 // ldr z8, [x2, #16, MUL VL] + WORD $0x85824449 // ldr z9, [x2, #17, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85824848 // ldr z8, [x2, #18, MUL VL] + WORD $0x85824c49 // ldr z9, [x2, #19, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85825048 // ldr z8, [x2, #20, MUL VL] + WORD $0x85825449 // ldr z9, [x2, #21, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85825848 // ldr z8, [x2, #22, MUL VL] + WORD $0x85825c49 // ldr z9, [x2, #23, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85834048 // ldr z8, [x2, #24, MUL VL] + WORD $0x85834449 // ldr z9, [x2, #25, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85834848 // ldr z8, [x2, #26, MUL VL] + WORD $0x85834c49 // ldr z9, [x2, #27, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 2 to 7 outputs + WORD $0x858040aa // ldr z10, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85835048 // ldr z8, [x2, #28, MUL VL] + WORD $0x85835449 // ldr z9, [x2, #29, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85835848 // ldr z8, [x2, #30, MUL VL] + WORD $0x85835c49 // ldr z9, [x2, #31, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85844048 // ldr z8, [x2, #32, MUL VL] + WORD $0x85844449 // ldr z9, [x2, #33, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85844848 // ldr z8, [x2, #34, MUL VL] + WORD $0x85844c49 // ldr z9, [x2, #35, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85845048 // ldr z8, [x2, #36, MUL VL] + WORD $0x85845449 // ldr z9, [x2, #37, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85845848 // ldr z8, [x2, #38, MUL VL] + WORD $0x85845c49 // ldr z9, [x2, #39, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85854048 // ldr z8, [x2, #40, MUL VL] + WORD $0x85854449 // ldr z9, [x2, #41, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 3 to 7 outputs + WORD $0x8580410a // ldr z10, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85854848 // ldr z8, [x2, #42, MUL VL] + WORD $0x85854c49 // ldr z9, [x2, #43, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85855048 // ldr z8, [x2, #44, MUL VL] + WORD $0x85855449 // ldr z9, [x2, #45, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85855848 // ldr z8, [x2, #46, MUL VL] + WORD $0x85855c49 // ldr z9, [x2, #47, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85864048 // ldr z8, [x2, #48, MUL VL] + WORD $0x85864449 // ldr z9, [x2, #49, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85864848 // ldr z8, [x2, #50, MUL VL] + WORD $0x85864c49 // ldr z9, [x2, #51, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85865048 // ldr z8, [x2, #52, MUL VL] + WORD $0x85865449 // ldr z9, [x2, #53, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85865848 // ldr z8, [x2, #54, MUL VL] + WORD $0x85865c49 // ldr z9, [x2, #55, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 4 to 7 outputs + WORD $0x8580412a // ldr z10, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85874048 // ldr z8, [x2, #56, MUL VL] + WORD $0x85874449 // ldr z9, [x2, #57, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85874848 // ldr z8, [x2, #58, MUL VL] + WORD $0x85874c49 // ldr z9, [x2, #59, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85875048 // ldr z8, [x2, #60, MUL VL] + WORD $0x85875449 // ldr z9, [x2, #61, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85875848 // ldr z8, [x2, #62, MUL VL] + WORD $0x85875c49 // ldr z9, [x2, #63, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85884048 // ldr z8, [x2, #64, MUL VL] + WORD $0x85884449 // ldr z9, [x2, #65, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85884848 // ldr z8, [x2, #66, MUL VL] + WORD $0x85884c49 // ldr z9, [x2, #67, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85885048 // ldr z8, [x2, #68, MUL VL] + WORD $0x85885449 // ldr z9, [x2, #69, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 5 to 7 outputs + WORD $0x8580414a // ldr z10, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85885848 // ldr z8, [x2, #70, MUL VL] + WORD $0x85885c49 // ldr z9, [x2, #71, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85894048 // ldr z8, [x2, #72, MUL VL] + WORD $0x85894449 // ldr z9, [x2, #73, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85894848 // ldr z8, [x2, #74, MUL VL] + WORD $0x85894c49 // ldr z9, [x2, #75, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85895048 // ldr z8, [x2, #76, MUL VL] + WORD $0x85895449 // ldr z9, [x2, #77, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85895848 // ldr z8, [x2, #78, MUL VL] + WORD $0x85895c49 // ldr z9, [x2, #79, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x858a4048 // ldr z8, [x2, #80, MUL VL] + WORD $0x858a4449 // ldr z9, [x2, #81, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x858a4848 // ldr z8, [x2, #82, MUL VL] + WORD $0x858a4c49 // ldr z9, [x2, #83, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 6 to 7 outputs + WORD $0x8580416a // ldr z10, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x858a5048 // ldr z8, [x2, #84, MUL VL] + WORD $0x858a5449 // ldr z9, [x2, #85, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x858a5848 // ldr z8, [x2, #86, MUL VL] + WORD $0x858a5c49 // ldr z9, [x2, #87, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x858b4048 // ldr z8, [x2, #88, MUL VL] + WORD $0x858b4449 // ldr z9, [x2, #89, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x858b4848 // ldr z8, [x2, #90, MUL VL] + WORD $0x858b4c49 // ldr z9, [x2, #91, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x858b5048 // ldr z8, [x2, #92, MUL VL] + WORD $0x858b5449 // ldr z9, [x2, #93, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x858b5848 // ldr z8, [x2, #94, MUL VL] + WORD $0x858b5c49 // ldr z9, [x2, #95, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x858c4048 // ldr z8, [x2, #96, MUL VL] + WORD $0x858c4449 // ldr z9, [x2, #97, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 7 to 7 outputs + WORD $0x8580418a // ldr z10, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x858c4848 // ldr z8, [x2, #98, MUL VL] + WORD $0x858c4c49 // ldr z9, [x2, #99, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x858c5048 // ldr z8, [x2, #100, MUL VL] + WORD $0x858c5449 // ldr z9, [x2, #101, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x858c5848 // ldr z8, [x2, #102, MUL VL] + WORD $0x858c5c49 // ldr z9, [x2, #103, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x858d4048 // ldr z8, [x2, #104, MUL VL] + WORD $0x858d4449 // ldr z9, [x2, #105, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x858d4848 // ldr z8, [x2, #106, MUL VL] + WORD $0x858d4c49 // ldr z9, [x2, #107, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x858d5048 // ldr z8, [x2, #108, MUL VL] + WORD $0x858d5449 // ldr z9, [x2, #109, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x858d5848 // ldr z8, [x2, #110, MUL VL] + WORD $0x858d5c49 // ldr z9, [x2, #111, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 8 to 7 outputs + WORD $0x858041aa // ldr z10, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x858e4048 // ldr z8, [x2, #112, MUL VL] + WORD $0x858e4449 // ldr z9, [x2, #113, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x858e4848 // ldr z8, [x2, #114, MUL VL] + WORD $0x858e4c49 // ldr z9, [x2, #115, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x858e5048 // ldr z8, [x2, #116, MUL VL] + WORD $0x858e5449 // ldr z9, [x2, #117, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x858e5848 // ldr z8, [x2, #118, MUL VL] + WORD $0x858e5c49 // ldr z9, [x2, #119, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x858f4048 // ldr z8, [x2, #120, MUL VL] + WORD $0x858f4449 // ldr z9, [x2, #121, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x858f4848 // ldr z8, [x2, #122, MUL VL] + WORD $0x858f4c49 // ldr z9, [x2, #123, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x858f5048 // ldr z8, [x2, #124, MUL VL] + WORD $0x858f5449 // ldr z9, [x2, #125, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x7_store + + // Load and process 32 bytes from input 9 to 7 outputs + WORD $0x8580406a // ldr z10, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x858f5848 // ldr z8, [x2, #126, MUL VL] + WORD $0x858f5c49 // ldr z9, [x2, #127, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85904048 // ldr z8, [x2, #128, MUL VL] + WORD $0x85904449 // ldr z9, [x2, #129, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85904848 // ldr z8, [x2, #130, MUL VL] + WORD $0x85904c49 // ldr z9, [x2, #131, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85905048 // ldr z8, [x2, #132, MUL VL] + WORD $0x85905449 // ldr z9, [x2, #133, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85905848 // ldr z8, [x2, #134, MUL VL] + WORD $0x85905c49 // ldr z9, [x2, #135, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85914048 // ldr z8, [x2, #136, MUL VL] + WORD $0x85914449 // ldr z9, [x2, #137, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85914848 // ldr z8, [x2, #138, MUL VL] + WORD $0x85914c49 // ldr z9, [x2, #139, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + +mulSve_10x7_store: + // Store 7 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + MOVD 144(R14), R6 + WORD $0xe5ef40c6 // st1d { z6.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x7_loop + +mulSve_10x7_end: + RET + +// func mulSve_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x7Xor(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 152 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x7Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c7 // mov z7.d, x6 + WORD $0x052120e7 // dup z7.b, z7.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x7Xor_loop: + // Load and process 32 bytes from input 0 to 7 outputs + WORD $0x8580402a // ldr z10, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + MOVD (R14), R6 + WORD $0xa5ef40c0 // ld1d { z0.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804048 // ldr z8, [x2] + WORD $0x85804449 // ldr z9, [x2, #1, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + MOVD 24(R14), R6 + WORD $0xa5ef40c1 // ld1d { z1.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804848 // ldr z8, [x2, #2, MUL VL] + WORD $0x85804c49 // ldr z9, [x2, #3, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + MOVD 48(R14), R6 + WORD $0xa5ef40c2 // ld1d { z2.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805048 // ldr z8, [x2, #4, MUL VL] + WORD $0x85805449 // ldr z9, [x2, #5, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + MOVD 72(R14), R6 + WORD $0xa5ef40c3 // ld1d { z3.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805848 // ldr z8, [x2, #6, MUL VL] + WORD $0x85805c49 // ldr z9, [x2, #7, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + MOVD 96(R14), R6 + WORD $0xa5ef40c4 // ld1d { z4.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85814048 // ldr z8, [x2, #8, MUL VL] + WORD $0x85814449 // ldr z9, [x2, #9, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + MOVD 120(R14), R6 + WORD $0xa5ef40c5 // ld1d { z5.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85814848 // ldr z8, [x2, #10, MUL VL] + WORD $0x85814c49 // ldr z9, [x2, #11, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + MOVD 144(R14), R6 + WORD $0xa5ef40c6 // ld1d { z6.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85815048 // ldr z8, [x2, #12, MUL VL] + WORD $0x85815449 // ldr z9, [x2, #13, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 1 to 7 outputs + WORD $0x8580408a // ldr z10, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85815848 // ldr z8, [x2, #14, MUL VL] + WORD $0x85815c49 // ldr z9, [x2, #15, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85824048 // ldr z8, [x2, #16, MUL VL] + WORD $0x85824449 // ldr z9, [x2, #17, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85824848 // ldr z8, [x2, #18, MUL VL] + WORD $0x85824c49 // ldr z9, [x2, #19, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85825048 // ldr z8, [x2, #20, MUL VL] + WORD $0x85825449 // ldr z9, [x2, #21, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85825848 // ldr z8, [x2, #22, MUL VL] + WORD $0x85825c49 // ldr z9, [x2, #23, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85834048 // ldr z8, [x2, #24, MUL VL] + WORD $0x85834449 // ldr z9, [x2, #25, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85834848 // ldr z8, [x2, #26, MUL VL] + WORD $0x85834c49 // ldr z9, [x2, #27, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 2 to 7 outputs + WORD $0x858040aa // ldr z10, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85835048 // ldr z8, [x2, #28, MUL VL] + WORD $0x85835449 // ldr z9, [x2, #29, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85835848 // ldr z8, [x2, #30, MUL VL] + WORD $0x85835c49 // ldr z9, [x2, #31, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85844048 // ldr z8, [x2, #32, MUL VL] + WORD $0x85844449 // ldr z9, [x2, #33, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85844848 // ldr z8, [x2, #34, MUL VL] + WORD $0x85844c49 // ldr z9, [x2, #35, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85845048 // ldr z8, [x2, #36, MUL VL] + WORD $0x85845449 // ldr z9, [x2, #37, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85845848 // ldr z8, [x2, #38, MUL VL] + WORD $0x85845c49 // ldr z9, [x2, #39, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85854048 // ldr z8, [x2, #40, MUL VL] + WORD $0x85854449 // ldr z9, [x2, #41, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 3 to 7 outputs + WORD $0x8580410a // ldr z10, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85854848 // ldr z8, [x2, #42, MUL VL] + WORD $0x85854c49 // ldr z9, [x2, #43, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85855048 // ldr z8, [x2, #44, MUL VL] + WORD $0x85855449 // ldr z9, [x2, #45, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85855848 // ldr z8, [x2, #46, MUL VL] + WORD $0x85855c49 // ldr z9, [x2, #47, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85864048 // ldr z8, [x2, #48, MUL VL] + WORD $0x85864449 // ldr z9, [x2, #49, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85864848 // ldr z8, [x2, #50, MUL VL] + WORD $0x85864c49 // ldr z9, [x2, #51, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85865048 // ldr z8, [x2, #52, MUL VL] + WORD $0x85865449 // ldr z9, [x2, #53, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85865848 // ldr z8, [x2, #54, MUL VL] + WORD $0x85865c49 // ldr z9, [x2, #55, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 4 to 7 outputs + WORD $0x8580412a // ldr z10, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85874048 // ldr z8, [x2, #56, MUL VL] + WORD $0x85874449 // ldr z9, [x2, #57, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85874848 // ldr z8, [x2, #58, MUL VL] + WORD $0x85874c49 // ldr z9, [x2, #59, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85875048 // ldr z8, [x2, #60, MUL VL] + WORD $0x85875449 // ldr z9, [x2, #61, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85875848 // ldr z8, [x2, #62, MUL VL] + WORD $0x85875c49 // ldr z9, [x2, #63, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85884048 // ldr z8, [x2, #64, MUL VL] + WORD $0x85884449 // ldr z9, [x2, #65, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85884848 // ldr z8, [x2, #66, MUL VL] + WORD $0x85884c49 // ldr z9, [x2, #67, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85885048 // ldr z8, [x2, #68, MUL VL] + WORD $0x85885449 // ldr z9, [x2, #69, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 5 to 7 outputs + WORD $0x8580414a // ldr z10, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x85885848 // ldr z8, [x2, #70, MUL VL] + WORD $0x85885c49 // ldr z9, [x2, #71, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85894048 // ldr z8, [x2, #72, MUL VL] + WORD $0x85894449 // ldr z9, [x2, #73, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85894848 // ldr z8, [x2, #74, MUL VL] + WORD $0x85894c49 // ldr z9, [x2, #75, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85895048 // ldr z8, [x2, #76, MUL VL] + WORD $0x85895449 // ldr z9, [x2, #77, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85895848 // ldr z8, [x2, #78, MUL VL] + WORD $0x85895c49 // ldr z9, [x2, #79, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x858a4048 // ldr z8, [x2, #80, MUL VL] + WORD $0x858a4449 // ldr z9, [x2, #81, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x858a4848 // ldr z8, [x2, #82, MUL VL] + WORD $0x858a4c49 // ldr z9, [x2, #83, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 6 to 7 outputs + WORD $0x8580416a // ldr z10, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x858a5048 // ldr z8, [x2, #84, MUL VL] + WORD $0x858a5449 // ldr z9, [x2, #85, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x858a5848 // ldr z8, [x2, #86, MUL VL] + WORD $0x858a5c49 // ldr z9, [x2, #87, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x858b4048 // ldr z8, [x2, #88, MUL VL] + WORD $0x858b4449 // ldr z9, [x2, #89, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x858b4848 // ldr z8, [x2, #90, MUL VL] + WORD $0x858b4c49 // ldr z9, [x2, #91, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x858b5048 // ldr z8, [x2, #92, MUL VL] + WORD $0x858b5449 // ldr z9, [x2, #93, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x858b5848 // ldr z8, [x2, #94, MUL VL] + WORD $0x858b5c49 // ldr z9, [x2, #95, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x858c4048 // ldr z8, [x2, #96, MUL VL] + WORD $0x858c4449 // ldr z9, [x2, #97, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 7 to 7 outputs + WORD $0x8580418a // ldr z10, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x858c4848 // ldr z8, [x2, #98, MUL VL] + WORD $0x858c4c49 // ldr z9, [x2, #99, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x858c5048 // ldr z8, [x2, #100, MUL VL] + WORD $0x858c5449 // ldr z9, [x2, #101, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x858c5848 // ldr z8, [x2, #102, MUL VL] + WORD $0x858c5c49 // ldr z9, [x2, #103, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x858d4048 // ldr z8, [x2, #104, MUL VL] + WORD $0x858d4449 // ldr z9, [x2, #105, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x858d4848 // ldr z8, [x2, #106, MUL VL] + WORD $0x858d4c49 // ldr z9, [x2, #107, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x858d5048 // ldr z8, [x2, #108, MUL VL] + WORD $0x858d5449 // ldr z9, [x2, #109, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x858d5848 // ldr z8, [x2, #110, MUL VL] + WORD $0x858d5c49 // ldr z9, [x2, #111, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 8 to 7 outputs + WORD $0x858041aa // ldr z10, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x858e4048 // ldr z8, [x2, #112, MUL VL] + WORD $0x858e4449 // ldr z9, [x2, #113, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x858e4848 // ldr z8, [x2, #114, MUL VL] + WORD $0x858e4c49 // ldr z9, [x2, #115, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x858e5048 // ldr z8, [x2, #116, MUL VL] + WORD $0x858e5449 // ldr z9, [x2, #117, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x858e5848 // ldr z8, [x2, #118, MUL VL] + WORD $0x858e5c49 // ldr z9, [x2, #119, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x858f4048 // ldr z8, [x2, #120, MUL VL] + WORD $0x858f4449 // ldr z9, [x2, #121, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x858f4848 // ldr z8, [x2, #122, MUL VL] + WORD $0x858f4c49 // ldr z9, [x2, #123, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x858f5048 // ldr z8, [x2, #124, MUL VL] + WORD $0x858f5449 // ldr z9, [x2, #125, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x7Xor_store + + // Load and process 32 bytes from input 9 to 7 outputs + WORD $0x8580406a // ldr z10, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc954b // lsr z11.d, z10.d, #4 + WORD $0x0427314a // and z10.d, z10.d, z7.d + WORD $0x0427316b // and z11.d, z11.d, z7.d + WORD $0x858f5848 // ldr z8, [x2, #126, MUL VL] + WORD $0x858f5c49 // ldr z9, [x2, #127, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83000 // eor z0.d, z0.d, z8.d + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x85904048 // ldr z8, [x2, #128, MUL VL] + WORD $0x85904449 // ldr z9, [x2, #129, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83021 // eor z1.d, z1.d, z8.d + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x85904848 // ldr z8, [x2, #130, MUL VL] + WORD $0x85904c49 // ldr z9, [x2, #131, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83042 // eor z2.d, z2.d, z8.d + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x85905048 // ldr z8, [x2, #132, MUL VL] + WORD $0x85905449 // ldr z9, [x2, #133, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83063 // eor z3.d, z3.d, z8.d + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x85905848 // ldr z8, [x2, #134, MUL VL] + WORD $0x85905c49 // ldr z9, [x2, #135, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a83084 // eor z4.d, z4.d, z8.d + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x85914048 // ldr z8, [x2, #136, MUL VL] + WORD $0x85914449 // ldr z9, [x2, #137, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830a5 // eor z5.d, z5.d, z8.d + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x85914848 // ldr z8, [x2, #138, MUL VL] + WORD $0x85914c49 // ldr z9, [x2, #139, MUL VL] + WORD $0x052a3108 // tbl z8.b, z8.b, z10.b + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x04a830c6 // eor z6.d, z6.d, z8.d + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + +mulSve_10x7Xor_store: + // Store 7 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + MOVD 144(R14), R6 + WORD $0xe5ef40c6 // st1d { z6.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x7Xor_loop + +mulSve_10x7Xor_end: + RET + +// func mulSve_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x8(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 173 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x8_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c8 // mov z8.d, x6 + WORD $0x05212108 // dup z8.b, z8.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + WORD $0x8580402b // ldr z11, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85804049 // ldr z9, [x2] + WORD $0x8580444a // ldr z10, [x2, #1, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93140 // eor z0.d, z10.d, z9.d + WORD $0x85804849 // ldr z9, [x2, #2, MUL VL] + WORD $0x85804c4a // ldr z10, [x2, #3, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93141 // eor z1.d, z10.d, z9.d + WORD $0x85805049 // ldr z9, [x2, #4, MUL VL] + WORD $0x8580544a // ldr z10, [x2, #5, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93142 // eor z2.d, z10.d, z9.d + WORD $0x85805849 // ldr z9, [x2, #6, MUL VL] + WORD $0x85805c4a // ldr z10, [x2, #7, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93143 // eor z3.d, z10.d, z9.d + WORD $0x85814049 // ldr z9, [x2, #8, MUL VL] + WORD $0x8581444a // ldr z10, [x2, #9, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93144 // eor z4.d, z10.d, z9.d + WORD $0x85814849 // ldr z9, [x2, #10, MUL VL] + WORD $0x85814c4a // ldr z10, [x2, #11, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93145 // eor z5.d, z10.d, z9.d + WORD $0x85815049 // ldr z9, [x2, #12, MUL VL] + WORD $0x8581544a // ldr z10, [x2, #13, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93146 // eor z6.d, z10.d, z9.d + WORD $0x85815849 // ldr z9, [x2, #14, MUL VL] + WORD $0x85815c4a // ldr z10, [x2, #15, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93147 // eor z7.d, z10.d, z9.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 1 to 8 outputs + WORD $0x8580408b // ldr z11, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85824049 // ldr z9, [x2, #16, MUL VL] + WORD $0x8582444a // ldr z10, [x2, #17, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85824849 // ldr z9, [x2, #18, MUL VL] + WORD $0x85824c4a // ldr z10, [x2, #19, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85825049 // ldr z9, [x2, #20, MUL VL] + WORD $0x8582544a // ldr z10, [x2, #21, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85825849 // ldr z9, [x2, #22, MUL VL] + WORD $0x85825c4a // ldr z10, [x2, #23, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85834049 // ldr z9, [x2, #24, MUL VL] + WORD $0x8583444a // ldr z10, [x2, #25, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85834849 // ldr z9, [x2, #26, MUL VL] + WORD $0x85834c4a // ldr z10, [x2, #27, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85835049 // ldr z9, [x2, #28, MUL VL] + WORD $0x8583544a // ldr z10, [x2, #29, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85835849 // ldr z9, [x2, #30, MUL VL] + WORD $0x85835c4a // ldr z10, [x2, #31, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 2 to 8 outputs + WORD $0x858040ab // ldr z11, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85844049 // ldr z9, [x2, #32, MUL VL] + WORD $0x8584444a // ldr z10, [x2, #33, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85844849 // ldr z9, [x2, #34, MUL VL] + WORD $0x85844c4a // ldr z10, [x2, #35, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85845049 // ldr z9, [x2, #36, MUL VL] + WORD $0x8584544a // ldr z10, [x2, #37, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85845849 // ldr z9, [x2, #38, MUL VL] + WORD $0x85845c4a // ldr z10, [x2, #39, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85854049 // ldr z9, [x2, #40, MUL VL] + WORD $0x8585444a // ldr z10, [x2, #41, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85854849 // ldr z9, [x2, #42, MUL VL] + WORD $0x85854c4a // ldr z10, [x2, #43, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85855049 // ldr z9, [x2, #44, MUL VL] + WORD $0x8585544a // ldr z10, [x2, #45, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85855849 // ldr z9, [x2, #46, MUL VL] + WORD $0x85855c4a // ldr z10, [x2, #47, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 3 to 8 outputs + WORD $0x8580410b // ldr z11, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85864049 // ldr z9, [x2, #48, MUL VL] + WORD $0x8586444a // ldr z10, [x2, #49, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85864849 // ldr z9, [x2, #50, MUL VL] + WORD $0x85864c4a // ldr z10, [x2, #51, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85865049 // ldr z9, [x2, #52, MUL VL] + WORD $0x8586544a // ldr z10, [x2, #53, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85865849 // ldr z9, [x2, #54, MUL VL] + WORD $0x85865c4a // ldr z10, [x2, #55, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85874049 // ldr z9, [x2, #56, MUL VL] + WORD $0x8587444a // ldr z10, [x2, #57, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85874849 // ldr z9, [x2, #58, MUL VL] + WORD $0x85874c4a // ldr z10, [x2, #59, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85875049 // ldr z9, [x2, #60, MUL VL] + WORD $0x8587544a // ldr z10, [x2, #61, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85875849 // ldr z9, [x2, #62, MUL VL] + WORD $0x85875c4a // ldr z10, [x2, #63, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 4 to 8 outputs + WORD $0x8580412b // ldr z11, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85884049 // ldr z9, [x2, #64, MUL VL] + WORD $0x8588444a // ldr z10, [x2, #65, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85884849 // ldr z9, [x2, #66, MUL VL] + WORD $0x85884c4a // ldr z10, [x2, #67, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85885049 // ldr z9, [x2, #68, MUL VL] + WORD $0x8588544a // ldr z10, [x2, #69, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85885849 // ldr z9, [x2, #70, MUL VL] + WORD $0x85885c4a // ldr z10, [x2, #71, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85894049 // ldr z9, [x2, #72, MUL VL] + WORD $0x8589444a // ldr z10, [x2, #73, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85894849 // ldr z9, [x2, #74, MUL VL] + WORD $0x85894c4a // ldr z10, [x2, #75, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85895049 // ldr z9, [x2, #76, MUL VL] + WORD $0x8589544a // ldr z10, [x2, #77, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85895849 // ldr z9, [x2, #78, MUL VL] + WORD $0x85895c4a // ldr z10, [x2, #79, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 5 to 8 outputs + WORD $0x8580414b // ldr z11, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x858a4049 // ldr z9, [x2, #80, MUL VL] + WORD $0x858a444a // ldr z10, [x2, #81, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x858a4849 // ldr z9, [x2, #82, MUL VL] + WORD $0x858a4c4a // ldr z10, [x2, #83, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x858a5049 // ldr z9, [x2, #84, MUL VL] + WORD $0x858a544a // ldr z10, [x2, #85, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x858a5849 // ldr z9, [x2, #86, MUL VL] + WORD $0x858a5c4a // ldr z10, [x2, #87, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x858b4049 // ldr z9, [x2, #88, MUL VL] + WORD $0x858b444a // ldr z10, [x2, #89, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x858b4849 // ldr z9, [x2, #90, MUL VL] + WORD $0x858b4c4a // ldr z10, [x2, #91, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x858b5049 // ldr z9, [x2, #92, MUL VL] + WORD $0x858b544a // ldr z10, [x2, #93, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x858b5849 // ldr z9, [x2, #94, MUL VL] + WORD $0x858b5c4a // ldr z10, [x2, #95, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 6 to 8 outputs + WORD $0x8580416b // ldr z11, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x858c4049 // ldr z9, [x2, #96, MUL VL] + WORD $0x858c444a // ldr z10, [x2, #97, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x858c4849 // ldr z9, [x2, #98, MUL VL] + WORD $0x858c4c4a // ldr z10, [x2, #99, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x858c5049 // ldr z9, [x2, #100, MUL VL] + WORD $0x858c544a // ldr z10, [x2, #101, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x858c5849 // ldr z9, [x2, #102, MUL VL] + WORD $0x858c5c4a // ldr z10, [x2, #103, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x858d4049 // ldr z9, [x2, #104, MUL VL] + WORD $0x858d444a // ldr z10, [x2, #105, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x858d4849 // ldr z9, [x2, #106, MUL VL] + WORD $0x858d4c4a // ldr z10, [x2, #107, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x858d5049 // ldr z9, [x2, #108, MUL VL] + WORD $0x858d544a // ldr z10, [x2, #109, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x858d5849 // ldr z9, [x2, #110, MUL VL] + WORD $0x858d5c4a // ldr z10, [x2, #111, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 7 to 8 outputs + WORD $0x8580418b // ldr z11, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x858e4049 // ldr z9, [x2, #112, MUL VL] + WORD $0x858e444a // ldr z10, [x2, #113, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x858e4849 // ldr z9, [x2, #114, MUL VL] + WORD $0x858e4c4a // ldr z10, [x2, #115, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x858e5049 // ldr z9, [x2, #116, MUL VL] + WORD $0x858e544a // ldr z10, [x2, #117, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x858e5849 // ldr z9, [x2, #118, MUL VL] + WORD $0x858e5c4a // ldr z10, [x2, #119, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x858f4049 // ldr z9, [x2, #120, MUL VL] + WORD $0x858f444a // ldr z10, [x2, #121, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x858f4849 // ldr z9, [x2, #122, MUL VL] + WORD $0x858f4c4a // ldr z10, [x2, #123, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x858f5049 // ldr z9, [x2, #124, MUL VL] + WORD $0x858f544a // ldr z10, [x2, #125, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x858f5849 // ldr z9, [x2, #126, MUL VL] + WORD $0x858f5c4a // ldr z10, [x2, #127, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 8 to 8 outputs + WORD $0x858041ab // ldr z11, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85904049 // ldr z9, [x2, #128, MUL VL] + WORD $0x8590444a // ldr z10, [x2, #129, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85904849 // ldr z9, [x2, #130, MUL VL] + WORD $0x85904c4a // ldr z10, [x2, #131, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85905049 // ldr z9, [x2, #132, MUL VL] + WORD $0x8590544a // ldr z10, [x2, #133, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85905849 // ldr z9, [x2, #134, MUL VL] + WORD $0x85905c4a // ldr z10, [x2, #135, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85914049 // ldr z9, [x2, #136, MUL VL] + WORD $0x8591444a // ldr z10, [x2, #137, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85914849 // ldr z9, [x2, #138, MUL VL] + WORD $0x85914c4a // ldr z10, [x2, #139, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85915049 // ldr z9, [x2, #140, MUL VL] + WORD $0x8591544a // ldr z10, [x2, #141, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85915849 // ldr z9, [x2, #142, MUL VL] + WORD $0x85915c4a // ldr z10, [x2, #143, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x8_store + + // Load and process 32 bytes from input 9 to 8 outputs + WORD $0x8580406b // ldr z11, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85924049 // ldr z9, [x2, #144, MUL VL] + WORD $0x8592444a // ldr z10, [x2, #145, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85924849 // ldr z9, [x2, #146, MUL VL] + WORD $0x85924c4a // ldr z10, [x2, #147, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85925049 // ldr z9, [x2, #148, MUL VL] + WORD $0x8592544a // ldr z10, [x2, #149, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85925849 // ldr z9, [x2, #150, MUL VL] + WORD $0x85925c4a // ldr z10, [x2, #151, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85934049 // ldr z9, [x2, #152, MUL VL] + WORD $0x8593444a // ldr z10, [x2, #153, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85934849 // ldr z9, [x2, #154, MUL VL] + WORD $0x85934c4a // ldr z10, [x2, #155, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85935049 // ldr z9, [x2, #156, MUL VL] + WORD $0x8593544a // ldr z10, [x2, #157, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85935849 // ldr z9, [x2, #158, MUL VL] + WORD $0x85935c4a // ldr z10, [x2, #159, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + +mulSve_10x8_store: + // Store 8 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + MOVD 144(R14), R6 + WORD $0xe5ef40c6 // st1d { z6.d }, p0, [x6, x15, lsl #3] + MOVD 168(R14), R6 + WORD $0xe5ef40c7 // st1d { z7.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x8_loop + +mulSve_10x8_end: + RET + +// func mulSve_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x8Xor(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 173 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x8Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c8 // mov z8.d, x6 + WORD $0x05212108 // dup z8.b, z8.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x8Xor_loop: + // Load and process 32 bytes from input 0 to 8 outputs + WORD $0x8580402b // ldr z11, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + MOVD (R14), R6 + WORD $0xa5ef40c0 // ld1d { z0.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804049 // ldr z9, [x2] + WORD $0x8580444a // ldr z10, [x2, #1, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + MOVD 24(R14), R6 + WORD $0xa5ef40c1 // ld1d { z1.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85804849 // ldr z9, [x2, #2, MUL VL] + WORD $0x85804c4a // ldr z10, [x2, #3, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + MOVD 48(R14), R6 + WORD $0xa5ef40c2 // ld1d { z2.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805049 // ldr z9, [x2, #4, MUL VL] + WORD $0x8580544a // ldr z10, [x2, #5, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + MOVD 72(R14), R6 + WORD $0xa5ef40c3 // ld1d { z3.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85805849 // ldr z9, [x2, #6, MUL VL] + WORD $0x85805c4a // ldr z10, [x2, #7, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + MOVD 96(R14), R6 + WORD $0xa5ef40c4 // ld1d { z4.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85814049 // ldr z9, [x2, #8, MUL VL] + WORD $0x8581444a // ldr z10, [x2, #9, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + MOVD 120(R14), R6 + WORD $0xa5ef40c5 // ld1d { z5.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85814849 // ldr z9, [x2, #10, MUL VL] + WORD $0x85814c4a // ldr z10, [x2, #11, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + MOVD 144(R14), R6 + WORD $0xa5ef40c6 // ld1d { z6.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85815049 // ldr z9, [x2, #12, MUL VL] + WORD $0x8581544a // ldr z10, [x2, #13, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + MOVD 168(R14), R6 + WORD $0xa5ef40c7 // ld1d { z7.d }, p0/z, [x6, x15, lsl #3] + WORD $0x85815849 // ldr z9, [x2, #14, MUL VL] + WORD $0x85815c4a // ldr z10, [x2, #15, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 1 to 8 outputs + WORD $0x8580408b // ldr z11, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85824049 // ldr z9, [x2, #16, MUL VL] + WORD $0x8582444a // ldr z10, [x2, #17, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85824849 // ldr z9, [x2, #18, MUL VL] + WORD $0x85824c4a // ldr z10, [x2, #19, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85825049 // ldr z9, [x2, #20, MUL VL] + WORD $0x8582544a // ldr z10, [x2, #21, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85825849 // ldr z9, [x2, #22, MUL VL] + WORD $0x85825c4a // ldr z10, [x2, #23, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85834049 // ldr z9, [x2, #24, MUL VL] + WORD $0x8583444a // ldr z10, [x2, #25, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85834849 // ldr z9, [x2, #26, MUL VL] + WORD $0x85834c4a // ldr z10, [x2, #27, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85835049 // ldr z9, [x2, #28, MUL VL] + WORD $0x8583544a // ldr z10, [x2, #29, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85835849 // ldr z9, [x2, #30, MUL VL] + WORD $0x85835c4a // ldr z10, [x2, #31, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 2 to 8 outputs + WORD $0x858040ab // ldr z11, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85844049 // ldr z9, [x2, #32, MUL VL] + WORD $0x8584444a // ldr z10, [x2, #33, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85844849 // ldr z9, [x2, #34, MUL VL] + WORD $0x85844c4a // ldr z10, [x2, #35, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85845049 // ldr z9, [x2, #36, MUL VL] + WORD $0x8584544a // ldr z10, [x2, #37, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85845849 // ldr z9, [x2, #38, MUL VL] + WORD $0x85845c4a // ldr z10, [x2, #39, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85854049 // ldr z9, [x2, #40, MUL VL] + WORD $0x8585444a // ldr z10, [x2, #41, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85854849 // ldr z9, [x2, #42, MUL VL] + WORD $0x85854c4a // ldr z10, [x2, #43, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85855049 // ldr z9, [x2, #44, MUL VL] + WORD $0x8585544a // ldr z10, [x2, #45, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85855849 // ldr z9, [x2, #46, MUL VL] + WORD $0x85855c4a // ldr z10, [x2, #47, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 3 to 8 outputs + WORD $0x8580410b // ldr z11, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85864049 // ldr z9, [x2, #48, MUL VL] + WORD $0x8586444a // ldr z10, [x2, #49, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85864849 // ldr z9, [x2, #50, MUL VL] + WORD $0x85864c4a // ldr z10, [x2, #51, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85865049 // ldr z9, [x2, #52, MUL VL] + WORD $0x8586544a // ldr z10, [x2, #53, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85865849 // ldr z9, [x2, #54, MUL VL] + WORD $0x85865c4a // ldr z10, [x2, #55, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85874049 // ldr z9, [x2, #56, MUL VL] + WORD $0x8587444a // ldr z10, [x2, #57, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85874849 // ldr z9, [x2, #58, MUL VL] + WORD $0x85874c4a // ldr z10, [x2, #59, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85875049 // ldr z9, [x2, #60, MUL VL] + WORD $0x8587544a // ldr z10, [x2, #61, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85875849 // ldr z9, [x2, #62, MUL VL] + WORD $0x85875c4a // ldr z10, [x2, #63, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 4 to 8 outputs + WORD $0x8580412b // ldr z11, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85884049 // ldr z9, [x2, #64, MUL VL] + WORD $0x8588444a // ldr z10, [x2, #65, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85884849 // ldr z9, [x2, #66, MUL VL] + WORD $0x85884c4a // ldr z10, [x2, #67, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85885049 // ldr z9, [x2, #68, MUL VL] + WORD $0x8588544a // ldr z10, [x2, #69, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85885849 // ldr z9, [x2, #70, MUL VL] + WORD $0x85885c4a // ldr z10, [x2, #71, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85894049 // ldr z9, [x2, #72, MUL VL] + WORD $0x8589444a // ldr z10, [x2, #73, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85894849 // ldr z9, [x2, #74, MUL VL] + WORD $0x85894c4a // ldr z10, [x2, #75, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85895049 // ldr z9, [x2, #76, MUL VL] + WORD $0x8589544a // ldr z10, [x2, #77, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85895849 // ldr z9, [x2, #78, MUL VL] + WORD $0x85895c4a // ldr z10, [x2, #79, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 5 to 8 outputs + WORD $0x8580414b // ldr z11, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x858a4049 // ldr z9, [x2, #80, MUL VL] + WORD $0x858a444a // ldr z10, [x2, #81, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x858a4849 // ldr z9, [x2, #82, MUL VL] + WORD $0x858a4c4a // ldr z10, [x2, #83, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x858a5049 // ldr z9, [x2, #84, MUL VL] + WORD $0x858a544a // ldr z10, [x2, #85, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x858a5849 // ldr z9, [x2, #86, MUL VL] + WORD $0x858a5c4a // ldr z10, [x2, #87, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x858b4049 // ldr z9, [x2, #88, MUL VL] + WORD $0x858b444a // ldr z10, [x2, #89, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x858b4849 // ldr z9, [x2, #90, MUL VL] + WORD $0x858b4c4a // ldr z10, [x2, #91, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x858b5049 // ldr z9, [x2, #92, MUL VL] + WORD $0x858b544a // ldr z10, [x2, #93, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x858b5849 // ldr z9, [x2, #94, MUL VL] + WORD $0x858b5c4a // ldr z10, [x2, #95, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 6 to 8 outputs + WORD $0x8580416b // ldr z11, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x858c4049 // ldr z9, [x2, #96, MUL VL] + WORD $0x858c444a // ldr z10, [x2, #97, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x858c4849 // ldr z9, [x2, #98, MUL VL] + WORD $0x858c4c4a // ldr z10, [x2, #99, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x858c5049 // ldr z9, [x2, #100, MUL VL] + WORD $0x858c544a // ldr z10, [x2, #101, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x858c5849 // ldr z9, [x2, #102, MUL VL] + WORD $0x858c5c4a // ldr z10, [x2, #103, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x858d4049 // ldr z9, [x2, #104, MUL VL] + WORD $0x858d444a // ldr z10, [x2, #105, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x858d4849 // ldr z9, [x2, #106, MUL VL] + WORD $0x858d4c4a // ldr z10, [x2, #107, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x858d5049 // ldr z9, [x2, #108, MUL VL] + WORD $0x858d544a // ldr z10, [x2, #109, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x858d5849 // ldr z9, [x2, #110, MUL VL] + WORD $0x858d5c4a // ldr z10, [x2, #111, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 7 to 8 outputs + WORD $0x8580418b // ldr z11, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x858e4049 // ldr z9, [x2, #112, MUL VL] + WORD $0x858e444a // ldr z10, [x2, #113, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x858e4849 // ldr z9, [x2, #114, MUL VL] + WORD $0x858e4c4a // ldr z10, [x2, #115, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x858e5049 // ldr z9, [x2, #116, MUL VL] + WORD $0x858e544a // ldr z10, [x2, #117, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x858e5849 // ldr z9, [x2, #118, MUL VL] + WORD $0x858e5c4a // ldr z10, [x2, #119, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x858f4049 // ldr z9, [x2, #120, MUL VL] + WORD $0x858f444a // ldr z10, [x2, #121, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x858f4849 // ldr z9, [x2, #122, MUL VL] + WORD $0x858f4c4a // ldr z10, [x2, #123, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x858f5049 // ldr z9, [x2, #124, MUL VL] + WORD $0x858f544a // ldr z10, [x2, #125, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x858f5849 // ldr z9, [x2, #126, MUL VL] + WORD $0x858f5c4a // ldr z10, [x2, #127, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 8 to 8 outputs + WORD $0x858041ab // ldr z11, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85904049 // ldr z9, [x2, #128, MUL VL] + WORD $0x8590444a // ldr z10, [x2, #129, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85904849 // ldr z9, [x2, #130, MUL VL] + WORD $0x85904c4a // ldr z10, [x2, #131, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85905049 // ldr z9, [x2, #132, MUL VL] + WORD $0x8590544a // ldr z10, [x2, #133, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85905849 // ldr z9, [x2, #134, MUL VL] + WORD $0x85905c4a // ldr z10, [x2, #135, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85914049 // ldr z9, [x2, #136, MUL VL] + WORD $0x8591444a // ldr z10, [x2, #137, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85914849 // ldr z9, [x2, #138, MUL VL] + WORD $0x85914c4a // ldr z10, [x2, #139, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85915049 // ldr z9, [x2, #140, MUL VL] + WORD $0x8591544a // ldr z10, [x2, #141, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85915849 // ldr z9, [x2, #142, MUL VL] + WORD $0x85915c4a // ldr z10, [x2, #143, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x8Xor_store + + // Load and process 32 bytes from input 9 to 8 outputs + WORD $0x8580406b // ldr z11, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc956c // lsr z12.d, z11.d, #4 + WORD $0x0428316b // and z11.d, z11.d, z8.d + WORD $0x0428318c // and z12.d, z12.d, z8.d + WORD $0x85924049 // ldr z9, [x2, #144, MUL VL] + WORD $0x8592444a // ldr z10, [x2, #145, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93000 // eor z0.d, z0.d, z9.d + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x85924849 // ldr z9, [x2, #146, MUL VL] + WORD $0x85924c4a // ldr z10, [x2, #147, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93021 // eor z1.d, z1.d, z9.d + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x85925049 // ldr z9, [x2, #148, MUL VL] + WORD $0x8592544a // ldr z10, [x2, #149, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93042 // eor z2.d, z2.d, z9.d + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x85925849 // ldr z9, [x2, #150, MUL VL] + WORD $0x85925c4a // ldr z10, [x2, #151, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93063 // eor z3.d, z3.d, z9.d + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x85934049 // ldr z9, [x2, #152, MUL VL] + WORD $0x8593444a // ldr z10, [x2, #153, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a93084 // eor z4.d, z4.d, z9.d + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x85934849 // ldr z9, [x2, #154, MUL VL] + WORD $0x85934c4a // ldr z10, [x2, #155, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930a5 // eor z5.d, z5.d, z9.d + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x85935049 // ldr z9, [x2, #156, MUL VL] + WORD $0x8593544a // ldr z10, [x2, #157, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930c6 // eor z6.d, z6.d, z9.d + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x85935849 // ldr z9, [x2, #158, MUL VL] + WORD $0x85935c4a // ldr z10, [x2, #159, MUL VL] + WORD $0x052b3129 // tbl z9.b, z9.b, z11.b + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x04a930e7 // eor z7.d, z7.d, z9.d + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + +mulSve_10x8Xor_store: + // Store 8 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + MOVD 144(R14), R6 + WORD $0xe5ef40c6 // st1d { z6.d }, p0, [x6, x15, lsl #3] + MOVD 168(R14), R6 + WORD $0xe5ef40c7 // st1d { z7.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x8Xor_loop + +mulSve_10x8Xor_end: + RET + +// func mulSve_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x9(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 194 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x9_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c9 // mov z9.d, x6 + WORD $0x05212129 // dup z9.b, z9.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + WORD $0x8580402c // ldr z12, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8580404a // ldr z10, [x2] + WORD $0x8580444b // ldr z11, [x2, #1, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3160 // eor z0.d, z11.d, z10.d + WORD $0x8580484a // ldr z10, [x2, #2, MUL VL] + WORD $0x85804c4b // ldr z11, [x2, #3, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3161 // eor z1.d, z11.d, z10.d + WORD $0x8580504a // ldr z10, [x2, #4, MUL VL] + WORD $0x8580544b // ldr z11, [x2, #5, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3162 // eor z2.d, z11.d, z10.d + WORD $0x8580584a // ldr z10, [x2, #6, MUL VL] + WORD $0x85805c4b // ldr z11, [x2, #7, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3163 // eor z3.d, z11.d, z10.d + WORD $0x8581404a // ldr z10, [x2, #8, MUL VL] + WORD $0x8581444b // ldr z11, [x2, #9, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3164 // eor z4.d, z11.d, z10.d + WORD $0x8581484a // ldr z10, [x2, #10, MUL VL] + WORD $0x85814c4b // ldr z11, [x2, #11, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3165 // eor z5.d, z11.d, z10.d + WORD $0x8581504a // ldr z10, [x2, #12, MUL VL] + WORD $0x8581544b // ldr z11, [x2, #13, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3166 // eor z6.d, z11.d, z10.d + WORD $0x8581584a // ldr z10, [x2, #14, MUL VL] + WORD $0x85815c4b // ldr z11, [x2, #15, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3167 // eor z7.d, z11.d, z10.d + WORD $0x8582404a // ldr z10, [x2, #16, MUL VL] + WORD $0x8582444b // ldr z11, [x2, #17, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3168 // eor z8.d, z11.d, z10.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 1 to 9 outputs + WORD $0x8580408c // ldr z12, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8582484a // ldr z10, [x2, #18, MUL VL] + WORD $0x85824c4b // ldr z11, [x2, #19, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8582504a // ldr z10, [x2, #20, MUL VL] + WORD $0x8582544b // ldr z11, [x2, #21, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8582584a // ldr z10, [x2, #22, MUL VL] + WORD $0x85825c4b // ldr z11, [x2, #23, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8583404a // ldr z10, [x2, #24, MUL VL] + WORD $0x8583444b // ldr z11, [x2, #25, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8583484a // ldr z10, [x2, #26, MUL VL] + WORD $0x85834c4b // ldr z11, [x2, #27, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8583504a // ldr z10, [x2, #28, MUL VL] + WORD $0x8583544b // ldr z11, [x2, #29, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8583584a // ldr z10, [x2, #30, MUL VL] + WORD $0x85835c4b // ldr z11, [x2, #31, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8584404a // ldr z10, [x2, #32, MUL VL] + WORD $0x8584444b // ldr z11, [x2, #33, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8584484a // ldr z10, [x2, #34, MUL VL] + WORD $0x85844c4b // ldr z11, [x2, #35, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 2 to 9 outputs + WORD $0x858040ac // ldr z12, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8584504a // ldr z10, [x2, #36, MUL VL] + WORD $0x8584544b // ldr z11, [x2, #37, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8584584a // ldr z10, [x2, #38, MUL VL] + WORD $0x85845c4b // ldr z11, [x2, #39, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8585404a // ldr z10, [x2, #40, MUL VL] + WORD $0x8585444b // ldr z11, [x2, #41, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8585484a // ldr z10, [x2, #42, MUL VL] + WORD $0x85854c4b // ldr z11, [x2, #43, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8585504a // ldr z10, [x2, #44, MUL VL] + WORD $0x8585544b // ldr z11, [x2, #45, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8585584a // ldr z10, [x2, #46, MUL VL] + WORD $0x85855c4b // ldr z11, [x2, #47, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8586404a // ldr z10, [x2, #48, MUL VL] + WORD $0x8586444b // ldr z11, [x2, #49, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8586484a // ldr z10, [x2, #50, MUL VL] + WORD $0x85864c4b // ldr z11, [x2, #51, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8586504a // ldr z10, [x2, #52, MUL VL] + WORD $0x8586544b // ldr z11, [x2, #53, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 3 to 9 outputs + WORD $0x8580410c // ldr z12, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8586584a // ldr z10, [x2, #54, MUL VL] + WORD $0x85865c4b // ldr z11, [x2, #55, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8587404a // ldr z10, [x2, #56, MUL VL] + WORD $0x8587444b // ldr z11, [x2, #57, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8587484a // ldr z10, [x2, #58, MUL VL] + WORD $0x85874c4b // ldr z11, [x2, #59, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8587504a // ldr z10, [x2, #60, MUL VL] + WORD $0x8587544b // ldr z11, [x2, #61, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8587584a // ldr z10, [x2, #62, MUL VL] + WORD $0x85875c4b // ldr z11, [x2, #63, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8588404a // ldr z10, [x2, #64, MUL VL] + WORD $0x8588444b // ldr z11, [x2, #65, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8588484a // ldr z10, [x2, #66, MUL VL] + WORD $0x85884c4b // ldr z11, [x2, #67, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8588504a // ldr z10, [x2, #68, MUL VL] + WORD $0x8588544b // ldr z11, [x2, #69, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8588584a // ldr z10, [x2, #70, MUL VL] + WORD $0x85885c4b // ldr z11, [x2, #71, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 4 to 9 outputs + WORD $0x8580412c // ldr z12, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8589404a // ldr z10, [x2, #72, MUL VL] + WORD $0x8589444b // ldr z11, [x2, #73, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8589484a // ldr z10, [x2, #74, MUL VL] + WORD $0x85894c4b // ldr z11, [x2, #75, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8589504a // ldr z10, [x2, #76, MUL VL] + WORD $0x8589544b // ldr z11, [x2, #77, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8589584a // ldr z10, [x2, #78, MUL VL] + WORD $0x85895c4b // ldr z11, [x2, #79, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x858a404a // ldr z10, [x2, #80, MUL VL] + WORD $0x858a444b // ldr z11, [x2, #81, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x858a484a // ldr z10, [x2, #82, MUL VL] + WORD $0x858a4c4b // ldr z11, [x2, #83, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x858a504a // ldr z10, [x2, #84, MUL VL] + WORD $0x858a544b // ldr z11, [x2, #85, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x858a584a // ldr z10, [x2, #86, MUL VL] + WORD $0x858a5c4b // ldr z11, [x2, #87, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x858b404a // ldr z10, [x2, #88, MUL VL] + WORD $0x858b444b // ldr z11, [x2, #89, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 5 to 9 outputs + WORD $0x8580414c // ldr z12, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x858b484a // ldr z10, [x2, #90, MUL VL] + WORD $0x858b4c4b // ldr z11, [x2, #91, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x858b504a // ldr z10, [x2, #92, MUL VL] + WORD $0x858b544b // ldr z11, [x2, #93, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x858b584a // ldr z10, [x2, #94, MUL VL] + WORD $0x858b5c4b // ldr z11, [x2, #95, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x858c404a // ldr z10, [x2, #96, MUL VL] + WORD $0x858c444b // ldr z11, [x2, #97, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x858c484a // ldr z10, [x2, #98, MUL VL] + WORD $0x858c4c4b // ldr z11, [x2, #99, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x858c504a // ldr z10, [x2, #100, MUL VL] + WORD $0x858c544b // ldr z11, [x2, #101, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x858c584a // ldr z10, [x2, #102, MUL VL] + WORD $0x858c5c4b // ldr z11, [x2, #103, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x858d404a // ldr z10, [x2, #104, MUL VL] + WORD $0x858d444b // ldr z11, [x2, #105, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x858d484a // ldr z10, [x2, #106, MUL VL] + WORD $0x858d4c4b // ldr z11, [x2, #107, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 6 to 9 outputs + WORD $0x8580416c // ldr z12, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x858d504a // ldr z10, [x2, #108, MUL VL] + WORD $0x858d544b // ldr z11, [x2, #109, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x858d584a // ldr z10, [x2, #110, MUL VL] + WORD $0x858d5c4b // ldr z11, [x2, #111, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x858e404a // ldr z10, [x2, #112, MUL VL] + WORD $0x858e444b // ldr z11, [x2, #113, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x858e484a // ldr z10, [x2, #114, MUL VL] + WORD $0x858e4c4b // ldr z11, [x2, #115, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x858e504a // ldr z10, [x2, #116, MUL VL] + WORD $0x858e544b // ldr z11, [x2, #117, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x858e584a // ldr z10, [x2, #118, MUL VL] + WORD $0x858e5c4b // ldr z11, [x2, #119, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x858f404a // ldr z10, [x2, #120, MUL VL] + WORD $0x858f444b // ldr z11, [x2, #121, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x858f484a // ldr z10, [x2, #122, MUL VL] + WORD $0x858f4c4b // ldr z11, [x2, #123, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x858f504a // ldr z10, [x2, #124, MUL VL] + WORD $0x858f544b // ldr z11, [x2, #125, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 7 to 9 outputs + WORD $0x8580418c // ldr z12, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x858f584a // ldr z10, [x2, #126, MUL VL] + WORD $0x858f5c4b // ldr z11, [x2, #127, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8590404a // ldr z10, [x2, #128, MUL VL] + WORD $0x8590444b // ldr z11, [x2, #129, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8590484a // ldr z10, [x2, #130, MUL VL] + WORD $0x85904c4b // ldr z11, [x2, #131, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8590504a // ldr z10, [x2, #132, MUL VL] + WORD $0x8590544b // ldr z11, [x2, #133, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8590584a // ldr z10, [x2, #134, MUL VL] + WORD $0x85905c4b // ldr z11, [x2, #135, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8591404a // ldr z10, [x2, #136, MUL VL] + WORD $0x8591444b // ldr z11, [x2, #137, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8591484a // ldr z10, [x2, #138, MUL VL] + WORD $0x85914c4b // ldr z11, [x2, #139, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8591504a // ldr z10, [x2, #140, MUL VL] + WORD $0x8591544b // ldr z11, [x2, #141, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8591584a // ldr z10, [x2, #142, MUL VL] + WORD $0x85915c4b // ldr z11, [x2, #143, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 8 to 9 outputs + WORD $0x858041ac // ldr z12, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8592404a // ldr z10, [x2, #144, MUL VL] + WORD $0x8592444b // ldr z11, [x2, #145, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8592484a // ldr z10, [x2, #146, MUL VL] + WORD $0x85924c4b // ldr z11, [x2, #147, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8592504a // ldr z10, [x2, #148, MUL VL] + WORD $0x8592544b // ldr z11, [x2, #149, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8592584a // ldr z10, [x2, #150, MUL VL] + WORD $0x85925c4b // ldr z11, [x2, #151, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8593404a // ldr z10, [x2, #152, MUL VL] + WORD $0x8593444b // ldr z11, [x2, #153, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8593484a // ldr z10, [x2, #154, MUL VL] + WORD $0x85934c4b // ldr z11, [x2, #155, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8593504a // ldr z10, [x2, #156, MUL VL] + WORD $0x8593544b // ldr z11, [x2, #157, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8593584a // ldr z10, [x2, #158, MUL VL] + WORD $0x85935c4b // ldr z11, [x2, #159, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8594404a // ldr z10, [x2, #160, MUL VL] + WORD $0x8594444b // ldr z11, [x2, #161, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x9_store + + // Load and process 32 bytes from input 9 to 9 outputs + WORD $0x8580406c // ldr z12, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8594484a // ldr z10, [x2, #162, MUL VL] + WORD $0x85944c4b // ldr z11, [x2, #163, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8594504a // ldr z10, [x2, #164, MUL VL] + WORD $0x8594544b // ldr z11, [x2, #165, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8594584a // ldr z10, [x2, #166, MUL VL] + WORD $0x85945c4b // ldr z11, [x2, #167, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8595404a // ldr z10, [x2, #168, MUL VL] + WORD $0x8595444b // ldr z11, [x2, #169, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8595484a // ldr z10, [x2, #170, MUL VL] + WORD $0x85954c4b // ldr z11, [x2, #171, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8595504a // ldr z10, [x2, #172, MUL VL] + WORD $0x8595544b // ldr z11, [x2, #173, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8595584a // ldr z10, [x2, #174, MUL VL] + WORD $0x85955c4b // ldr z11, [x2, #175, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8596404a // ldr z10, [x2, #176, MUL VL] + WORD $0x8596444b // ldr z11, [x2, #177, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8596484a // ldr z10, [x2, #178, MUL VL] + WORD $0x85964c4b // ldr z11, [x2, #179, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + +mulSve_10x9_store: + // Store 9 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + MOVD 144(R14), R6 + WORD $0xe5ef40c6 // st1d { z6.d }, p0, [x6, x15, lsl #3] + MOVD 168(R14), R6 + WORD $0xe5ef40c7 // st1d { z7.d }, p0, [x6, x15, lsl #3] + MOVD 192(R14), R6 + WORD $0xe5ef40c8 // st1d { z8.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x9_loop + +mulSve_10x9_end: + RET + +// func mulSve_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x9Xor(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 194 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x9Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038c9 // mov z9.d, x6 + WORD $0x05212129 // dup z9.b, z9.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x9Xor_loop: + // Load and process 32 bytes from input 0 to 9 outputs + WORD $0x8580402c // ldr z12, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + MOVD (R14), R6 + WORD $0xa5ef40c0 // ld1d { z0.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8580404a // ldr z10, [x2] + WORD $0x8580444b // ldr z11, [x2, #1, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + MOVD 24(R14), R6 + WORD $0xa5ef40c1 // ld1d { z1.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8580484a // ldr z10, [x2, #2, MUL VL] + WORD $0x85804c4b // ldr z11, [x2, #3, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + MOVD 48(R14), R6 + WORD $0xa5ef40c2 // ld1d { z2.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8580504a // ldr z10, [x2, #4, MUL VL] + WORD $0x8580544b // ldr z11, [x2, #5, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + MOVD 72(R14), R6 + WORD $0xa5ef40c3 // ld1d { z3.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8580584a // ldr z10, [x2, #6, MUL VL] + WORD $0x85805c4b // ldr z11, [x2, #7, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + MOVD 96(R14), R6 + WORD $0xa5ef40c4 // ld1d { z4.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8581404a // ldr z10, [x2, #8, MUL VL] + WORD $0x8581444b // ldr z11, [x2, #9, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + MOVD 120(R14), R6 + WORD $0xa5ef40c5 // ld1d { z5.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8581484a // ldr z10, [x2, #10, MUL VL] + WORD $0x85814c4b // ldr z11, [x2, #11, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + MOVD 144(R14), R6 + WORD $0xa5ef40c6 // ld1d { z6.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8581504a // ldr z10, [x2, #12, MUL VL] + WORD $0x8581544b // ldr z11, [x2, #13, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + MOVD 168(R14), R6 + WORD $0xa5ef40c7 // ld1d { z7.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8581584a // ldr z10, [x2, #14, MUL VL] + WORD $0x85815c4b // ldr z11, [x2, #15, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + MOVD 192(R14), R6 + WORD $0xa5ef40c8 // ld1d { z8.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8582404a // ldr z10, [x2, #16, MUL VL] + WORD $0x8582444b // ldr z11, [x2, #17, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 1 to 9 outputs + WORD $0x8580408c // ldr z12, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8582484a // ldr z10, [x2, #18, MUL VL] + WORD $0x85824c4b // ldr z11, [x2, #19, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8582504a // ldr z10, [x2, #20, MUL VL] + WORD $0x8582544b // ldr z11, [x2, #21, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8582584a // ldr z10, [x2, #22, MUL VL] + WORD $0x85825c4b // ldr z11, [x2, #23, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8583404a // ldr z10, [x2, #24, MUL VL] + WORD $0x8583444b // ldr z11, [x2, #25, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8583484a // ldr z10, [x2, #26, MUL VL] + WORD $0x85834c4b // ldr z11, [x2, #27, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8583504a // ldr z10, [x2, #28, MUL VL] + WORD $0x8583544b // ldr z11, [x2, #29, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8583584a // ldr z10, [x2, #30, MUL VL] + WORD $0x85835c4b // ldr z11, [x2, #31, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8584404a // ldr z10, [x2, #32, MUL VL] + WORD $0x8584444b // ldr z11, [x2, #33, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8584484a // ldr z10, [x2, #34, MUL VL] + WORD $0x85844c4b // ldr z11, [x2, #35, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 2 to 9 outputs + WORD $0x858040ac // ldr z12, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8584504a // ldr z10, [x2, #36, MUL VL] + WORD $0x8584544b // ldr z11, [x2, #37, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8584584a // ldr z10, [x2, #38, MUL VL] + WORD $0x85845c4b // ldr z11, [x2, #39, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8585404a // ldr z10, [x2, #40, MUL VL] + WORD $0x8585444b // ldr z11, [x2, #41, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8585484a // ldr z10, [x2, #42, MUL VL] + WORD $0x85854c4b // ldr z11, [x2, #43, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8585504a // ldr z10, [x2, #44, MUL VL] + WORD $0x8585544b // ldr z11, [x2, #45, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8585584a // ldr z10, [x2, #46, MUL VL] + WORD $0x85855c4b // ldr z11, [x2, #47, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8586404a // ldr z10, [x2, #48, MUL VL] + WORD $0x8586444b // ldr z11, [x2, #49, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8586484a // ldr z10, [x2, #50, MUL VL] + WORD $0x85864c4b // ldr z11, [x2, #51, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8586504a // ldr z10, [x2, #52, MUL VL] + WORD $0x8586544b // ldr z11, [x2, #53, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 3 to 9 outputs + WORD $0x8580410c // ldr z12, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8586584a // ldr z10, [x2, #54, MUL VL] + WORD $0x85865c4b // ldr z11, [x2, #55, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8587404a // ldr z10, [x2, #56, MUL VL] + WORD $0x8587444b // ldr z11, [x2, #57, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8587484a // ldr z10, [x2, #58, MUL VL] + WORD $0x85874c4b // ldr z11, [x2, #59, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8587504a // ldr z10, [x2, #60, MUL VL] + WORD $0x8587544b // ldr z11, [x2, #61, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8587584a // ldr z10, [x2, #62, MUL VL] + WORD $0x85875c4b // ldr z11, [x2, #63, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8588404a // ldr z10, [x2, #64, MUL VL] + WORD $0x8588444b // ldr z11, [x2, #65, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8588484a // ldr z10, [x2, #66, MUL VL] + WORD $0x85884c4b // ldr z11, [x2, #67, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8588504a // ldr z10, [x2, #68, MUL VL] + WORD $0x8588544b // ldr z11, [x2, #69, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8588584a // ldr z10, [x2, #70, MUL VL] + WORD $0x85885c4b // ldr z11, [x2, #71, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 4 to 9 outputs + WORD $0x8580412c // ldr z12, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8589404a // ldr z10, [x2, #72, MUL VL] + WORD $0x8589444b // ldr z11, [x2, #73, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8589484a // ldr z10, [x2, #74, MUL VL] + WORD $0x85894c4b // ldr z11, [x2, #75, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8589504a // ldr z10, [x2, #76, MUL VL] + WORD $0x8589544b // ldr z11, [x2, #77, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8589584a // ldr z10, [x2, #78, MUL VL] + WORD $0x85895c4b // ldr z11, [x2, #79, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x858a404a // ldr z10, [x2, #80, MUL VL] + WORD $0x858a444b // ldr z11, [x2, #81, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x858a484a // ldr z10, [x2, #82, MUL VL] + WORD $0x858a4c4b // ldr z11, [x2, #83, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x858a504a // ldr z10, [x2, #84, MUL VL] + WORD $0x858a544b // ldr z11, [x2, #85, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x858a584a // ldr z10, [x2, #86, MUL VL] + WORD $0x858a5c4b // ldr z11, [x2, #87, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x858b404a // ldr z10, [x2, #88, MUL VL] + WORD $0x858b444b // ldr z11, [x2, #89, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 5 to 9 outputs + WORD $0x8580414c // ldr z12, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x858b484a // ldr z10, [x2, #90, MUL VL] + WORD $0x858b4c4b // ldr z11, [x2, #91, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x858b504a // ldr z10, [x2, #92, MUL VL] + WORD $0x858b544b // ldr z11, [x2, #93, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x858b584a // ldr z10, [x2, #94, MUL VL] + WORD $0x858b5c4b // ldr z11, [x2, #95, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x858c404a // ldr z10, [x2, #96, MUL VL] + WORD $0x858c444b // ldr z11, [x2, #97, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x858c484a // ldr z10, [x2, #98, MUL VL] + WORD $0x858c4c4b // ldr z11, [x2, #99, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x858c504a // ldr z10, [x2, #100, MUL VL] + WORD $0x858c544b // ldr z11, [x2, #101, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x858c584a // ldr z10, [x2, #102, MUL VL] + WORD $0x858c5c4b // ldr z11, [x2, #103, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x858d404a // ldr z10, [x2, #104, MUL VL] + WORD $0x858d444b // ldr z11, [x2, #105, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x858d484a // ldr z10, [x2, #106, MUL VL] + WORD $0x858d4c4b // ldr z11, [x2, #107, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 6 to 9 outputs + WORD $0x8580416c // ldr z12, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x858d504a // ldr z10, [x2, #108, MUL VL] + WORD $0x858d544b // ldr z11, [x2, #109, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x858d584a // ldr z10, [x2, #110, MUL VL] + WORD $0x858d5c4b // ldr z11, [x2, #111, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x858e404a // ldr z10, [x2, #112, MUL VL] + WORD $0x858e444b // ldr z11, [x2, #113, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x858e484a // ldr z10, [x2, #114, MUL VL] + WORD $0x858e4c4b // ldr z11, [x2, #115, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x858e504a // ldr z10, [x2, #116, MUL VL] + WORD $0x858e544b // ldr z11, [x2, #117, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x858e584a // ldr z10, [x2, #118, MUL VL] + WORD $0x858e5c4b // ldr z11, [x2, #119, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x858f404a // ldr z10, [x2, #120, MUL VL] + WORD $0x858f444b // ldr z11, [x2, #121, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x858f484a // ldr z10, [x2, #122, MUL VL] + WORD $0x858f4c4b // ldr z11, [x2, #123, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x858f504a // ldr z10, [x2, #124, MUL VL] + WORD $0x858f544b // ldr z11, [x2, #125, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 7 to 9 outputs + WORD $0x8580418c // ldr z12, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x858f584a // ldr z10, [x2, #126, MUL VL] + WORD $0x858f5c4b // ldr z11, [x2, #127, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8590404a // ldr z10, [x2, #128, MUL VL] + WORD $0x8590444b // ldr z11, [x2, #129, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8590484a // ldr z10, [x2, #130, MUL VL] + WORD $0x85904c4b // ldr z11, [x2, #131, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8590504a // ldr z10, [x2, #132, MUL VL] + WORD $0x8590544b // ldr z11, [x2, #133, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8590584a // ldr z10, [x2, #134, MUL VL] + WORD $0x85905c4b // ldr z11, [x2, #135, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8591404a // ldr z10, [x2, #136, MUL VL] + WORD $0x8591444b // ldr z11, [x2, #137, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8591484a // ldr z10, [x2, #138, MUL VL] + WORD $0x85914c4b // ldr z11, [x2, #139, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8591504a // ldr z10, [x2, #140, MUL VL] + WORD $0x8591544b // ldr z11, [x2, #141, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8591584a // ldr z10, [x2, #142, MUL VL] + WORD $0x85915c4b // ldr z11, [x2, #143, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 8 to 9 outputs + WORD $0x858041ac // ldr z12, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8592404a // ldr z10, [x2, #144, MUL VL] + WORD $0x8592444b // ldr z11, [x2, #145, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8592484a // ldr z10, [x2, #146, MUL VL] + WORD $0x85924c4b // ldr z11, [x2, #147, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8592504a // ldr z10, [x2, #148, MUL VL] + WORD $0x8592544b // ldr z11, [x2, #149, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8592584a // ldr z10, [x2, #150, MUL VL] + WORD $0x85925c4b // ldr z11, [x2, #151, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8593404a // ldr z10, [x2, #152, MUL VL] + WORD $0x8593444b // ldr z11, [x2, #153, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8593484a // ldr z10, [x2, #154, MUL VL] + WORD $0x85934c4b // ldr z11, [x2, #155, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8593504a // ldr z10, [x2, #156, MUL VL] + WORD $0x8593544b // ldr z11, [x2, #157, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8593584a // ldr z10, [x2, #158, MUL VL] + WORD $0x85935c4b // ldr z11, [x2, #159, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8594404a // ldr z10, [x2, #160, MUL VL] + WORD $0x8594444b // ldr z11, [x2, #161, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x9Xor_store + + // Load and process 32 bytes from input 9 to 9 outputs + WORD $0x8580406c // ldr z12, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc958d // lsr z13.d, z12.d, #4 + WORD $0x0429318c // and z12.d, z12.d, z9.d + WORD $0x042931ad // and z13.d, z13.d, z9.d + WORD $0x8594484a // ldr z10, [x2, #162, MUL VL] + WORD $0x85944c4b // ldr z11, [x2, #163, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3000 // eor z0.d, z0.d, z10.d + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x8594504a // ldr z10, [x2, #164, MUL VL] + WORD $0x8594544b // ldr z11, [x2, #165, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3021 // eor z1.d, z1.d, z10.d + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x8594584a // ldr z10, [x2, #166, MUL VL] + WORD $0x85945c4b // ldr z11, [x2, #167, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3042 // eor z2.d, z2.d, z10.d + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x8595404a // ldr z10, [x2, #168, MUL VL] + WORD $0x8595444b // ldr z11, [x2, #169, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3063 // eor z3.d, z3.d, z10.d + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x8595484a // ldr z10, [x2, #170, MUL VL] + WORD $0x85954c4b // ldr z11, [x2, #171, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3084 // eor z4.d, z4.d, z10.d + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x8595504a // ldr z10, [x2, #172, MUL VL] + WORD $0x8595544b // ldr z11, [x2, #173, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30a5 // eor z5.d, z5.d, z10.d + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x8595584a // ldr z10, [x2, #174, MUL VL] + WORD $0x85955c4b // ldr z11, [x2, #175, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30c6 // eor z6.d, z6.d, z10.d + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x8596404a // ldr z10, [x2, #176, MUL VL] + WORD $0x8596444b // ldr z11, [x2, #177, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa30e7 // eor z7.d, z7.d, z10.d + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x8596484a // ldr z10, [x2, #178, MUL VL] + WORD $0x85964c4b // ldr z11, [x2, #179, MUL VL] + WORD $0x052c314a // tbl z10.b, z10.b, z12.b + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x04aa3108 // eor z8.d, z8.d, z10.d + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + +mulSve_10x9Xor_store: + // Store 9 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + MOVD 144(R14), R6 + WORD $0xe5ef40c6 // st1d { z6.d }, p0, [x6, x15, lsl #3] + MOVD 168(R14), R6 + WORD $0xe5ef40c7 // st1d { z7.d }, p0, [x6, x15, lsl #3] + MOVD 192(R14), R6 + WORD $0xe5ef40c8 // st1d { z8.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x9Xor_loop + +mulSve_10x9Xor_end: + RET + +// func mulSve_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x10(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 215 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x10_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038ca // mov z10.d, x6 + WORD $0x0521214a // dup z10.b, z10.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + WORD $0x8580402d // ldr z13, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8580404b // ldr z11, [x2] + WORD $0x8580444c // ldr z12, [x2, #1, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3180 // eor z0.d, z12.d, z11.d + WORD $0x8580484b // ldr z11, [x2, #2, MUL VL] + WORD $0x85804c4c // ldr z12, [x2, #3, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3181 // eor z1.d, z12.d, z11.d + WORD $0x8580504b // ldr z11, [x2, #4, MUL VL] + WORD $0x8580544c // ldr z12, [x2, #5, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3182 // eor z2.d, z12.d, z11.d + WORD $0x8580584b // ldr z11, [x2, #6, MUL VL] + WORD $0x85805c4c // ldr z12, [x2, #7, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3183 // eor z3.d, z12.d, z11.d + WORD $0x8581404b // ldr z11, [x2, #8, MUL VL] + WORD $0x8581444c // ldr z12, [x2, #9, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3184 // eor z4.d, z12.d, z11.d + WORD $0x8581484b // ldr z11, [x2, #10, MUL VL] + WORD $0x85814c4c // ldr z12, [x2, #11, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3185 // eor z5.d, z12.d, z11.d + WORD $0x8581504b // ldr z11, [x2, #12, MUL VL] + WORD $0x8581544c // ldr z12, [x2, #13, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3186 // eor z6.d, z12.d, z11.d + WORD $0x8581584b // ldr z11, [x2, #14, MUL VL] + WORD $0x85815c4c // ldr z12, [x2, #15, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3187 // eor z7.d, z12.d, z11.d + WORD $0x8582404b // ldr z11, [x2, #16, MUL VL] + WORD $0x8582444c // ldr z12, [x2, #17, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3188 // eor z8.d, z12.d, z11.d + WORD $0x8582484b // ldr z11, [x2, #18, MUL VL] + WORD $0x85824c4c // ldr z12, [x2, #19, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3189 // eor z9.d, z12.d, z11.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 1 to 10 outputs + WORD $0x8580408d // ldr z13, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8582504b // ldr z11, [x2, #20, MUL VL] + WORD $0x8582544c // ldr z12, [x2, #21, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8582584b // ldr z11, [x2, #22, MUL VL] + WORD $0x85825c4c // ldr z12, [x2, #23, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8583404b // ldr z11, [x2, #24, MUL VL] + WORD $0x8583444c // ldr z12, [x2, #25, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8583484b // ldr z11, [x2, #26, MUL VL] + WORD $0x85834c4c // ldr z12, [x2, #27, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8583504b // ldr z11, [x2, #28, MUL VL] + WORD $0x8583544c // ldr z12, [x2, #29, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8583584b // ldr z11, [x2, #30, MUL VL] + WORD $0x85835c4c // ldr z12, [x2, #31, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8584404b // ldr z11, [x2, #32, MUL VL] + WORD $0x8584444c // ldr z12, [x2, #33, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8584484b // ldr z11, [x2, #34, MUL VL] + WORD $0x85844c4c // ldr z12, [x2, #35, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8584504b // ldr z11, [x2, #36, MUL VL] + WORD $0x8584544c // ldr z12, [x2, #37, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8584584b // ldr z11, [x2, #38, MUL VL] + WORD $0x85845c4c // ldr z12, [x2, #39, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 2 to 10 outputs + WORD $0x858040ad // ldr z13, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8585404b // ldr z11, [x2, #40, MUL VL] + WORD $0x8585444c // ldr z12, [x2, #41, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8585484b // ldr z11, [x2, #42, MUL VL] + WORD $0x85854c4c // ldr z12, [x2, #43, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8585504b // ldr z11, [x2, #44, MUL VL] + WORD $0x8585544c // ldr z12, [x2, #45, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8585584b // ldr z11, [x2, #46, MUL VL] + WORD $0x85855c4c // ldr z12, [x2, #47, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8586404b // ldr z11, [x2, #48, MUL VL] + WORD $0x8586444c // ldr z12, [x2, #49, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8586484b // ldr z11, [x2, #50, MUL VL] + WORD $0x85864c4c // ldr z12, [x2, #51, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8586504b // ldr z11, [x2, #52, MUL VL] + WORD $0x8586544c // ldr z12, [x2, #53, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8586584b // ldr z11, [x2, #54, MUL VL] + WORD $0x85865c4c // ldr z12, [x2, #55, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8587404b // ldr z11, [x2, #56, MUL VL] + WORD $0x8587444c // ldr z12, [x2, #57, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8587484b // ldr z11, [x2, #58, MUL VL] + WORD $0x85874c4c // ldr z12, [x2, #59, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 3 to 10 outputs + WORD $0x8580410d // ldr z13, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8587504b // ldr z11, [x2, #60, MUL VL] + WORD $0x8587544c // ldr z12, [x2, #61, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8587584b // ldr z11, [x2, #62, MUL VL] + WORD $0x85875c4c // ldr z12, [x2, #63, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8588404b // ldr z11, [x2, #64, MUL VL] + WORD $0x8588444c // ldr z12, [x2, #65, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8588484b // ldr z11, [x2, #66, MUL VL] + WORD $0x85884c4c // ldr z12, [x2, #67, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8588504b // ldr z11, [x2, #68, MUL VL] + WORD $0x8588544c // ldr z12, [x2, #69, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8588584b // ldr z11, [x2, #70, MUL VL] + WORD $0x85885c4c // ldr z12, [x2, #71, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8589404b // ldr z11, [x2, #72, MUL VL] + WORD $0x8589444c // ldr z12, [x2, #73, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8589484b // ldr z11, [x2, #74, MUL VL] + WORD $0x85894c4c // ldr z12, [x2, #75, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8589504b // ldr z11, [x2, #76, MUL VL] + WORD $0x8589544c // ldr z12, [x2, #77, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8589584b // ldr z11, [x2, #78, MUL VL] + WORD $0x85895c4c // ldr z12, [x2, #79, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 4 to 10 outputs + WORD $0x8580412d // ldr z13, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x858a404b // ldr z11, [x2, #80, MUL VL] + WORD $0x858a444c // ldr z12, [x2, #81, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x858a484b // ldr z11, [x2, #82, MUL VL] + WORD $0x858a4c4c // ldr z12, [x2, #83, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x858a504b // ldr z11, [x2, #84, MUL VL] + WORD $0x858a544c // ldr z12, [x2, #85, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x858a584b // ldr z11, [x2, #86, MUL VL] + WORD $0x858a5c4c // ldr z12, [x2, #87, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x858b404b // ldr z11, [x2, #88, MUL VL] + WORD $0x858b444c // ldr z12, [x2, #89, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x858b484b // ldr z11, [x2, #90, MUL VL] + WORD $0x858b4c4c // ldr z12, [x2, #91, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x858b504b // ldr z11, [x2, #92, MUL VL] + WORD $0x858b544c // ldr z12, [x2, #93, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x858b584b // ldr z11, [x2, #94, MUL VL] + WORD $0x858b5c4c // ldr z12, [x2, #95, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x858c404b // ldr z11, [x2, #96, MUL VL] + WORD $0x858c444c // ldr z12, [x2, #97, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x858c484b // ldr z11, [x2, #98, MUL VL] + WORD $0x858c4c4c // ldr z12, [x2, #99, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 5 to 10 outputs + WORD $0x8580414d // ldr z13, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x858c504b // ldr z11, [x2, #100, MUL VL] + WORD $0x858c544c // ldr z12, [x2, #101, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x858c584b // ldr z11, [x2, #102, MUL VL] + WORD $0x858c5c4c // ldr z12, [x2, #103, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x858d404b // ldr z11, [x2, #104, MUL VL] + WORD $0x858d444c // ldr z12, [x2, #105, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x858d484b // ldr z11, [x2, #106, MUL VL] + WORD $0x858d4c4c // ldr z12, [x2, #107, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x858d504b // ldr z11, [x2, #108, MUL VL] + WORD $0x858d544c // ldr z12, [x2, #109, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x858d584b // ldr z11, [x2, #110, MUL VL] + WORD $0x858d5c4c // ldr z12, [x2, #111, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x858e404b // ldr z11, [x2, #112, MUL VL] + WORD $0x858e444c // ldr z12, [x2, #113, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x858e484b // ldr z11, [x2, #114, MUL VL] + WORD $0x858e4c4c // ldr z12, [x2, #115, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x858e504b // ldr z11, [x2, #116, MUL VL] + WORD $0x858e544c // ldr z12, [x2, #117, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x858e584b // ldr z11, [x2, #118, MUL VL] + WORD $0x858e5c4c // ldr z12, [x2, #119, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 6 to 10 outputs + WORD $0x8580416d // ldr z13, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x858f404b // ldr z11, [x2, #120, MUL VL] + WORD $0x858f444c // ldr z12, [x2, #121, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x858f484b // ldr z11, [x2, #122, MUL VL] + WORD $0x858f4c4c // ldr z12, [x2, #123, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x858f504b // ldr z11, [x2, #124, MUL VL] + WORD $0x858f544c // ldr z12, [x2, #125, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x858f584b // ldr z11, [x2, #126, MUL VL] + WORD $0x858f5c4c // ldr z12, [x2, #127, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8590404b // ldr z11, [x2, #128, MUL VL] + WORD $0x8590444c // ldr z12, [x2, #129, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8590484b // ldr z11, [x2, #130, MUL VL] + WORD $0x85904c4c // ldr z12, [x2, #131, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8590504b // ldr z11, [x2, #132, MUL VL] + WORD $0x8590544c // ldr z12, [x2, #133, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8590584b // ldr z11, [x2, #134, MUL VL] + WORD $0x85905c4c // ldr z12, [x2, #135, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8591404b // ldr z11, [x2, #136, MUL VL] + WORD $0x8591444c // ldr z12, [x2, #137, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8591484b // ldr z11, [x2, #138, MUL VL] + WORD $0x85914c4c // ldr z12, [x2, #139, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 7 to 10 outputs + WORD $0x8580418d // ldr z13, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8591504b // ldr z11, [x2, #140, MUL VL] + WORD $0x8591544c // ldr z12, [x2, #141, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8591584b // ldr z11, [x2, #142, MUL VL] + WORD $0x85915c4c // ldr z12, [x2, #143, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8592404b // ldr z11, [x2, #144, MUL VL] + WORD $0x8592444c // ldr z12, [x2, #145, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8592484b // ldr z11, [x2, #146, MUL VL] + WORD $0x85924c4c // ldr z12, [x2, #147, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8592504b // ldr z11, [x2, #148, MUL VL] + WORD $0x8592544c // ldr z12, [x2, #149, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8592584b // ldr z11, [x2, #150, MUL VL] + WORD $0x85925c4c // ldr z12, [x2, #151, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8593404b // ldr z11, [x2, #152, MUL VL] + WORD $0x8593444c // ldr z12, [x2, #153, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8593484b // ldr z11, [x2, #154, MUL VL] + WORD $0x85934c4c // ldr z12, [x2, #155, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8593504b // ldr z11, [x2, #156, MUL VL] + WORD $0x8593544c // ldr z12, [x2, #157, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8593584b // ldr z11, [x2, #158, MUL VL] + WORD $0x85935c4c // ldr z12, [x2, #159, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 8 to 10 outputs + WORD $0x858041ad // ldr z13, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8594404b // ldr z11, [x2, #160, MUL VL] + WORD $0x8594444c // ldr z12, [x2, #161, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8594484b // ldr z11, [x2, #162, MUL VL] + WORD $0x85944c4c // ldr z12, [x2, #163, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8594504b // ldr z11, [x2, #164, MUL VL] + WORD $0x8594544c // ldr z12, [x2, #165, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8594584b // ldr z11, [x2, #166, MUL VL] + WORD $0x85945c4c // ldr z12, [x2, #167, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8595404b // ldr z11, [x2, #168, MUL VL] + WORD $0x8595444c // ldr z12, [x2, #169, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8595484b // ldr z11, [x2, #170, MUL VL] + WORD $0x85954c4c // ldr z12, [x2, #171, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8595504b // ldr z11, [x2, #172, MUL VL] + WORD $0x8595544c // ldr z12, [x2, #173, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8595584b // ldr z11, [x2, #174, MUL VL] + WORD $0x85955c4c // ldr z12, [x2, #175, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8596404b // ldr z11, [x2, #176, MUL VL] + WORD $0x8596444c // ldr z12, [x2, #177, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8596484b // ldr z11, [x2, #178, MUL VL] + WORD $0x85964c4c // ldr z12, [x2, #179, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x10_store + + // Load and process 32 bytes from input 9 to 10 outputs + WORD $0x8580406d // ldr z13, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8596504b // ldr z11, [x2, #180, MUL VL] + WORD $0x8596544c // ldr z12, [x2, #181, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8596584b // ldr z11, [x2, #182, MUL VL] + WORD $0x85965c4c // ldr z12, [x2, #183, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8597404b // ldr z11, [x2, #184, MUL VL] + WORD $0x8597444c // ldr z12, [x2, #185, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8597484b // ldr z11, [x2, #186, MUL VL] + WORD $0x85974c4c // ldr z12, [x2, #187, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8597504b // ldr z11, [x2, #188, MUL VL] + WORD $0x8597544c // ldr z12, [x2, #189, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8597584b // ldr z11, [x2, #190, MUL VL] + WORD $0x85975c4c // ldr z12, [x2, #191, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8598404b // ldr z11, [x2, #192, MUL VL] + WORD $0x8598444c // ldr z12, [x2, #193, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8598484b // ldr z11, [x2, #194, MUL VL] + WORD $0x85984c4c // ldr z12, [x2, #195, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8598504b // ldr z11, [x2, #196, MUL VL] + WORD $0x8598544c // ldr z12, [x2, #197, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8598584b // ldr z11, [x2, #198, MUL VL] + WORD $0x85985c4c // ldr z12, [x2, #199, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + +mulSve_10x10_store: + // Store 10 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + MOVD 144(R14), R6 + WORD $0xe5ef40c6 // st1d { z6.d }, p0, [x6, x15, lsl #3] + MOVD 168(R14), R6 + WORD $0xe5ef40c7 // st1d { z7.d }, p0, [x6, x15, lsl #3] + MOVD 192(R14), R6 + WORD $0xe5ef40c8 // st1d { z8.d }, p0, [x6, x15, lsl #3] + MOVD 216(R14), R6 + WORD $0xe5ef40c9 // st1d { z9.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x10_loop + +mulSve_10x10_end: + RET + +// func mulSve_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: SVE +TEXT ·mulSve_10x10Xor(SB), NOSPLIT, $8-88 + WORD $0x25d8e3e0 // ptrue p0.d + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 215 YMM used + MOVD n+80(FP), R0 + MOVD matrix_base+0(FP), R2 + WORD $0xd345fc00 // lsr x0, x0, #5 + WORD $0xd37be800 // lsl x0, x0, #5 + WORD $0x04bf5030 // rdvl x16, #1 + WORD $0x9ad00800 // udiv x0, x0, x16 + WORD $0xea00001f // tst x0, x0 + BEQ mulSve_10x10Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + WORD $0x8b0f0021 // add x1, x1, x15 + WORD $0x8b0f0084 // add x4, x4, x15 + WORD $0x8b0f00a5 // add x5, x5, x15 + WORD $0x8b0f0108 // add x8, x8, x15 + WORD $0x8b0f0129 // add x9, x9, x15 + WORD $0x8b0f014a // add x10, x10, x15 + WORD $0x8b0f016b // add x11, x11, x15 + WORD $0x8b0f018c // add x12, x12, x15 + WORD $0x8b0f01ad // add x13, x13, x15 + WORD $0x8b0f0063 // add x3, x3, x15 + WORD $0xd343fdef // lsr x15, x15, #3 + WORD $0xd28001e6 // mov x6, #15 + WORD $0x05e038ca // mov z10.d, x6 + WORD $0x0521214a // dup z10.b, z10.b[0] + + // Load number of input shards + MOVD in_len+32(FP), R16 + WORD $0x04bf5031 // rdvl x17, #1 + WORD $0xd343fe31 // lsr x17, x17, #3 + +mulSve_10x10Xor_loop: + // Load and process 32 bytes from input 0 to 10 outputs + WORD $0x8580402d // ldr z13, [x1] + WORD $0x04215021 // addvl x1, x1, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + MOVD (R14), R6 + WORD $0xa5ef40c0 // ld1d { z0.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8580404b // ldr z11, [x2] + WORD $0x8580444c // ldr z12, [x2, #1, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + MOVD 24(R14), R6 + WORD $0xa5ef40c1 // ld1d { z1.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8580484b // ldr z11, [x2, #2, MUL VL] + WORD $0x85804c4c // ldr z12, [x2, #3, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + MOVD 48(R14), R6 + WORD $0xa5ef40c2 // ld1d { z2.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8580504b // ldr z11, [x2, #4, MUL VL] + WORD $0x8580544c // ldr z12, [x2, #5, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + MOVD 72(R14), R6 + WORD $0xa5ef40c3 // ld1d { z3.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8580584b // ldr z11, [x2, #6, MUL VL] + WORD $0x85805c4c // ldr z12, [x2, #7, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + MOVD 96(R14), R6 + WORD $0xa5ef40c4 // ld1d { z4.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8581404b // ldr z11, [x2, #8, MUL VL] + WORD $0x8581444c // ldr z12, [x2, #9, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + MOVD 120(R14), R6 + WORD $0xa5ef40c5 // ld1d { z5.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8581484b // ldr z11, [x2, #10, MUL VL] + WORD $0x85814c4c // ldr z12, [x2, #11, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + MOVD 144(R14), R6 + WORD $0xa5ef40c6 // ld1d { z6.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8581504b // ldr z11, [x2, #12, MUL VL] + WORD $0x8581544c // ldr z12, [x2, #13, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + MOVD 168(R14), R6 + WORD $0xa5ef40c7 // ld1d { z7.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8581584b // ldr z11, [x2, #14, MUL VL] + WORD $0x85815c4c // ldr z12, [x2, #15, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + MOVD 192(R14), R6 + WORD $0xa5ef40c8 // ld1d { z8.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8582404b // ldr z11, [x2, #16, MUL VL] + WORD $0x8582444c // ldr z12, [x2, #17, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + MOVD 216(R14), R6 + WORD $0xa5ef40c9 // ld1d { z9.d }, p0/z, [x6, x15, lsl #3] + WORD $0x8582484b // ldr z11, [x2, #18, MUL VL] + WORD $0x85824c4c // ldr z12, [x2, #19, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $1, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 1 to 10 outputs + WORD $0x8580408d // ldr z13, [x4] + WORD $0x04245024 // addvl x4, x4, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8582504b // ldr z11, [x2, #20, MUL VL] + WORD $0x8582544c // ldr z12, [x2, #21, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8582584b // ldr z11, [x2, #22, MUL VL] + WORD $0x85825c4c // ldr z12, [x2, #23, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8583404b // ldr z11, [x2, #24, MUL VL] + WORD $0x8583444c // ldr z12, [x2, #25, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8583484b // ldr z11, [x2, #26, MUL VL] + WORD $0x85834c4c // ldr z12, [x2, #27, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8583504b // ldr z11, [x2, #28, MUL VL] + WORD $0x8583544c // ldr z12, [x2, #29, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8583584b // ldr z11, [x2, #30, MUL VL] + WORD $0x85835c4c // ldr z12, [x2, #31, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8584404b // ldr z11, [x2, #32, MUL VL] + WORD $0x8584444c // ldr z12, [x2, #33, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8584484b // ldr z11, [x2, #34, MUL VL] + WORD $0x85844c4c // ldr z12, [x2, #35, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8584504b // ldr z11, [x2, #36, MUL VL] + WORD $0x8584544c // ldr z12, [x2, #37, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8584584b // ldr z11, [x2, #38, MUL VL] + WORD $0x85845c4c // ldr z12, [x2, #39, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $2, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 2 to 10 outputs + WORD $0x858040ad // ldr z13, [x5] + WORD $0x04255025 // addvl x5, x5, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8585404b // ldr z11, [x2, #40, MUL VL] + WORD $0x8585444c // ldr z12, [x2, #41, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8585484b // ldr z11, [x2, #42, MUL VL] + WORD $0x85854c4c // ldr z12, [x2, #43, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8585504b // ldr z11, [x2, #44, MUL VL] + WORD $0x8585544c // ldr z12, [x2, #45, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8585584b // ldr z11, [x2, #46, MUL VL] + WORD $0x85855c4c // ldr z12, [x2, #47, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8586404b // ldr z11, [x2, #48, MUL VL] + WORD $0x8586444c // ldr z12, [x2, #49, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8586484b // ldr z11, [x2, #50, MUL VL] + WORD $0x85864c4c // ldr z12, [x2, #51, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8586504b // ldr z11, [x2, #52, MUL VL] + WORD $0x8586544c // ldr z12, [x2, #53, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8586584b // ldr z11, [x2, #54, MUL VL] + WORD $0x85865c4c // ldr z12, [x2, #55, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8587404b // ldr z11, [x2, #56, MUL VL] + WORD $0x8587444c // ldr z12, [x2, #57, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8587484b // ldr z11, [x2, #58, MUL VL] + WORD $0x85874c4c // ldr z12, [x2, #59, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $3, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 3 to 10 outputs + WORD $0x8580410d // ldr z13, [x8] + WORD $0x04285028 // addvl x8, x8, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8587504b // ldr z11, [x2, #60, MUL VL] + WORD $0x8587544c // ldr z12, [x2, #61, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8587584b // ldr z11, [x2, #62, MUL VL] + WORD $0x85875c4c // ldr z12, [x2, #63, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8588404b // ldr z11, [x2, #64, MUL VL] + WORD $0x8588444c // ldr z12, [x2, #65, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8588484b // ldr z11, [x2, #66, MUL VL] + WORD $0x85884c4c // ldr z12, [x2, #67, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8588504b // ldr z11, [x2, #68, MUL VL] + WORD $0x8588544c // ldr z12, [x2, #69, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8588584b // ldr z11, [x2, #70, MUL VL] + WORD $0x85885c4c // ldr z12, [x2, #71, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8589404b // ldr z11, [x2, #72, MUL VL] + WORD $0x8589444c // ldr z12, [x2, #73, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8589484b // ldr z11, [x2, #74, MUL VL] + WORD $0x85894c4c // ldr z12, [x2, #75, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8589504b // ldr z11, [x2, #76, MUL VL] + WORD $0x8589544c // ldr z12, [x2, #77, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8589584b // ldr z11, [x2, #78, MUL VL] + WORD $0x85895c4c // ldr z12, [x2, #79, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $4, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 4 to 10 outputs + WORD $0x8580412d // ldr z13, [x9] + WORD $0x04295029 // addvl x9, x9, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x858a404b // ldr z11, [x2, #80, MUL VL] + WORD $0x858a444c // ldr z12, [x2, #81, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x858a484b // ldr z11, [x2, #82, MUL VL] + WORD $0x858a4c4c // ldr z12, [x2, #83, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x858a504b // ldr z11, [x2, #84, MUL VL] + WORD $0x858a544c // ldr z12, [x2, #85, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x858a584b // ldr z11, [x2, #86, MUL VL] + WORD $0x858a5c4c // ldr z12, [x2, #87, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x858b404b // ldr z11, [x2, #88, MUL VL] + WORD $0x858b444c // ldr z12, [x2, #89, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x858b484b // ldr z11, [x2, #90, MUL VL] + WORD $0x858b4c4c // ldr z12, [x2, #91, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x858b504b // ldr z11, [x2, #92, MUL VL] + WORD $0x858b544c // ldr z12, [x2, #93, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x858b584b // ldr z11, [x2, #94, MUL VL] + WORD $0x858b5c4c // ldr z12, [x2, #95, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x858c404b // ldr z11, [x2, #96, MUL VL] + WORD $0x858c444c // ldr z12, [x2, #97, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x858c484b // ldr z11, [x2, #98, MUL VL] + WORD $0x858c4c4c // ldr z12, [x2, #99, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $5, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 5 to 10 outputs + WORD $0x8580414d // ldr z13, [x10] + WORD $0x042a502a // addvl x10, x10, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x858c504b // ldr z11, [x2, #100, MUL VL] + WORD $0x858c544c // ldr z12, [x2, #101, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x858c584b // ldr z11, [x2, #102, MUL VL] + WORD $0x858c5c4c // ldr z12, [x2, #103, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x858d404b // ldr z11, [x2, #104, MUL VL] + WORD $0x858d444c // ldr z12, [x2, #105, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x858d484b // ldr z11, [x2, #106, MUL VL] + WORD $0x858d4c4c // ldr z12, [x2, #107, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x858d504b // ldr z11, [x2, #108, MUL VL] + WORD $0x858d544c // ldr z12, [x2, #109, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x858d584b // ldr z11, [x2, #110, MUL VL] + WORD $0x858d5c4c // ldr z12, [x2, #111, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x858e404b // ldr z11, [x2, #112, MUL VL] + WORD $0x858e444c // ldr z12, [x2, #113, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x858e484b // ldr z11, [x2, #114, MUL VL] + WORD $0x858e4c4c // ldr z12, [x2, #115, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x858e504b // ldr z11, [x2, #116, MUL VL] + WORD $0x858e544c // ldr z12, [x2, #117, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x858e584b // ldr z11, [x2, #118, MUL VL] + WORD $0x858e5c4c // ldr z12, [x2, #119, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $6, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 6 to 10 outputs + WORD $0x8580416d // ldr z13, [x11] + WORD $0x042b502b // addvl x11, x11, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x858f404b // ldr z11, [x2, #120, MUL VL] + WORD $0x858f444c // ldr z12, [x2, #121, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x858f484b // ldr z11, [x2, #122, MUL VL] + WORD $0x858f4c4c // ldr z12, [x2, #123, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x858f504b // ldr z11, [x2, #124, MUL VL] + WORD $0x858f544c // ldr z12, [x2, #125, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x858f584b // ldr z11, [x2, #126, MUL VL] + WORD $0x858f5c4c // ldr z12, [x2, #127, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8590404b // ldr z11, [x2, #128, MUL VL] + WORD $0x8590444c // ldr z12, [x2, #129, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8590484b // ldr z11, [x2, #130, MUL VL] + WORD $0x85904c4c // ldr z12, [x2, #131, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8590504b // ldr z11, [x2, #132, MUL VL] + WORD $0x8590544c // ldr z12, [x2, #133, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8590584b // ldr z11, [x2, #134, MUL VL] + WORD $0x85905c4c // ldr z12, [x2, #135, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8591404b // ldr z11, [x2, #136, MUL VL] + WORD $0x8591444c // ldr z12, [x2, #137, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8591484b // ldr z11, [x2, #138, MUL VL] + WORD $0x85914c4c // ldr z12, [x2, #139, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $7, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 7 to 10 outputs + WORD $0x8580418d // ldr z13, [x12] + WORD $0x042c502c // addvl x12, x12, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8591504b // ldr z11, [x2, #140, MUL VL] + WORD $0x8591544c // ldr z12, [x2, #141, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8591584b // ldr z11, [x2, #142, MUL VL] + WORD $0x85915c4c // ldr z12, [x2, #143, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8592404b // ldr z11, [x2, #144, MUL VL] + WORD $0x8592444c // ldr z12, [x2, #145, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8592484b // ldr z11, [x2, #146, MUL VL] + WORD $0x85924c4c // ldr z12, [x2, #147, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8592504b // ldr z11, [x2, #148, MUL VL] + WORD $0x8592544c // ldr z12, [x2, #149, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8592584b // ldr z11, [x2, #150, MUL VL] + WORD $0x85925c4c // ldr z12, [x2, #151, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8593404b // ldr z11, [x2, #152, MUL VL] + WORD $0x8593444c // ldr z12, [x2, #153, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8593484b // ldr z11, [x2, #154, MUL VL] + WORD $0x85934c4c // ldr z12, [x2, #155, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8593504b // ldr z11, [x2, #156, MUL VL] + WORD $0x8593544c // ldr z12, [x2, #157, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8593584b // ldr z11, [x2, #158, MUL VL] + WORD $0x85935c4c // ldr z12, [x2, #159, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $8, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 8 to 10 outputs + WORD $0x858041ad // ldr z13, [x13] + WORD $0x042d502d // addvl x13, x13, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8594404b // ldr z11, [x2, #160, MUL VL] + WORD $0x8594444c // ldr z12, [x2, #161, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8594484b // ldr z11, [x2, #162, MUL VL] + WORD $0x85944c4c // ldr z12, [x2, #163, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8594504b // ldr z11, [x2, #164, MUL VL] + WORD $0x8594544c // ldr z12, [x2, #165, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8594584b // ldr z11, [x2, #166, MUL VL] + WORD $0x85945c4c // ldr z12, [x2, #167, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8595404b // ldr z11, [x2, #168, MUL VL] + WORD $0x8595444c // ldr z12, [x2, #169, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8595484b // ldr z11, [x2, #170, MUL VL] + WORD $0x85954c4c // ldr z12, [x2, #171, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8595504b // ldr z11, [x2, #172, MUL VL] + WORD $0x8595544c // ldr z12, [x2, #173, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8595584b // ldr z11, [x2, #174, MUL VL] + WORD $0x85955c4c // ldr z12, [x2, #175, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8596404b // ldr z11, [x2, #176, MUL VL] + WORD $0x8596444c // ldr z12, [x2, #177, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8596484b // ldr z11, [x2, #178, MUL VL] + WORD $0x85964c4c // ldr z12, [x2, #179, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + // Check for early termination + CMP $9, R16 + BEQ mulSve_10x10Xor_store + + // Load and process 32 bytes from input 9 to 10 outputs + WORD $0x8580406d // ldr z13, [x3] + WORD $0x04235023 // addvl x3, x3, #1 + WORD $0x04fc95ae // lsr z14.d, z13.d, #4 + WORD $0x042a31ad // and z13.d, z13.d, z10.d + WORD $0x042a31ce // and z14.d, z14.d, z10.d + WORD $0x8596504b // ldr z11, [x2, #180, MUL VL] + WORD $0x8596544c // ldr z12, [x2, #181, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3000 // eor z0.d, z0.d, z11.d + WORD $0x04ac3000 // eor z0.d, z0.d, z12.d + WORD $0x8596584b // ldr z11, [x2, #182, MUL VL] + WORD $0x85965c4c // ldr z12, [x2, #183, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3021 // eor z1.d, z1.d, z11.d + WORD $0x04ac3021 // eor z1.d, z1.d, z12.d + WORD $0x8597404b // ldr z11, [x2, #184, MUL VL] + WORD $0x8597444c // ldr z12, [x2, #185, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3042 // eor z2.d, z2.d, z11.d + WORD $0x04ac3042 // eor z2.d, z2.d, z12.d + WORD $0x8597484b // ldr z11, [x2, #186, MUL VL] + WORD $0x85974c4c // ldr z12, [x2, #187, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3063 // eor z3.d, z3.d, z11.d + WORD $0x04ac3063 // eor z3.d, z3.d, z12.d + WORD $0x8597504b // ldr z11, [x2, #188, MUL VL] + WORD $0x8597544c // ldr z12, [x2, #189, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3084 // eor z4.d, z4.d, z11.d + WORD $0x04ac3084 // eor z4.d, z4.d, z12.d + WORD $0x8597584b // ldr z11, [x2, #190, MUL VL] + WORD $0x85975c4c // ldr z12, [x2, #191, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30a5 // eor z5.d, z5.d, z11.d + WORD $0x04ac30a5 // eor z5.d, z5.d, z12.d + WORD $0x8598404b // ldr z11, [x2, #192, MUL VL] + WORD $0x8598444c // ldr z12, [x2, #193, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30c6 // eor z6.d, z6.d, z11.d + WORD $0x04ac30c6 // eor z6.d, z6.d, z12.d + WORD $0x8598484b // ldr z11, [x2, #194, MUL VL] + WORD $0x85984c4c // ldr z12, [x2, #195, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab30e7 // eor z7.d, z7.d, z11.d + WORD $0x04ac30e7 // eor z7.d, z7.d, z12.d + WORD $0x8598504b // ldr z11, [x2, #196, MUL VL] + WORD $0x8598544c // ldr z12, [x2, #197, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3108 // eor z8.d, z8.d, z11.d + WORD $0x04ac3108 // eor z8.d, z8.d, z12.d + WORD $0x8598584b // ldr z11, [x2, #198, MUL VL] + WORD $0x85985c4c // ldr z12, [x2, #199, MUL VL] + WORD $0x052d316b // tbl z11.b, z11.b, z13.b + WORD $0x052e318c // tbl z12.b, z12.b, z14.b + WORD $0x04ab3129 // eor z9.d, z9.d, z11.d + WORD $0x04ac3129 // eor z9.d, z9.d, z12.d + +mulSve_10x10Xor_store: + // Store 10 outputs + MOVD (R14), R6 + WORD $0xe5ef40c0 // st1d { z0.d }, p0, [x6, x15, lsl #3] + MOVD 24(R14), R6 + WORD $0xe5ef40c1 // st1d { z1.d }, p0, [x6, x15, lsl #3] + MOVD 48(R14), R6 + WORD $0xe5ef40c2 // st1d { z2.d }, p0, [x6, x15, lsl #3] + MOVD 72(R14), R6 + WORD $0xe5ef40c3 // st1d { z3.d }, p0, [x6, x15, lsl #3] + MOVD 96(R14), R6 + WORD $0xe5ef40c4 // st1d { z4.d }, p0, [x6, x15, lsl #3] + MOVD 120(R14), R6 + WORD $0xe5ef40c5 // st1d { z5.d }, p0, [x6, x15, lsl #3] + MOVD 144(R14), R6 + WORD $0xe5ef40c6 // st1d { z6.d }, p0, [x6, x15, lsl #3] + MOVD 168(R14), R6 + WORD $0xe5ef40c7 // st1d { z7.d }, p0, [x6, x15, lsl #3] + MOVD 192(R14), R6 + WORD $0xe5ef40c8 // st1d { z8.d }, p0, [x6, x15, lsl #3] + MOVD 216(R14), R6 + WORD $0xe5ef40c9 // st1d { z9.d }, p0, [x6, x15, lsl #3] + + // Prepare for next loop + WORD $0x8b1101ef // add x15, x15, x17 + WORD $0xf1000400 // subs x0, x0, #1 + BNE mulSve_10x10Xor_loop + +mulSve_10x10Xor_end: + RET + +// func mulNeon_10x1_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x1_64(SB), $0-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 46 YMM used + MOVD n+80(FP), R0 + LSR $6, R0 + TST R0, R0 + BEQ mulNeon_10x1_64_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD (R14), R14 + MOVD start+72(FP), R15 + + // Add start offset to output + ADD R15, R14 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + MOVD $15, R15 + VMOV R15, V4.B[0] + VDUP V4.B[0], V4.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x1_64_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 64 bytes from input 0 to 1 outputs + VLD1.P 32(R1), [V12.B16, V13.B16] + VLD1.P 32(R1), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V8.B16, V0.B16 + VEOR V7.B16, V9.B16, V1.B16 + VEOR V10.B16, V12.B16, V2.B16 + VEOR V11.B16, V13.B16, V3.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 1 to 1 outputs + VLD1.P 32(R4), [V12.B16, V13.B16] + VLD1.P 32(R4), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 2 to 1 outputs + VLD1.P 32(R5), [V12.B16, V13.B16] + VLD1.P 32(R5), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 3 to 1 outputs + VLD1.P 32(R8), [V12.B16, V13.B16] + VLD1.P 32(R8), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 4 to 1 outputs + VLD1.P 32(R9), [V12.B16, V13.B16] + VLD1.P 32(R9), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 5 to 1 outputs + VLD1.P 32(R10), [V12.B16, V13.B16] + VLD1.P 32(R10), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 6 to 1 outputs + VLD1.P 32(R11), [V12.B16, V13.B16] + VLD1.P 32(R11), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 7 to 1 outputs + VLD1.P 32(R12), [V12.B16, V13.B16] + VLD1.P 32(R12), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 8 to 1 outputs + VLD1.P 32(R13), [V12.B16, V13.B16] + VLD1.P 32(R13), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x1_64_store + + // Load and process 64 bytes from input 9 to 1 outputs + VLD1.P 32(R3), [V12.B16, V13.B16] + VLD1.P 32(R3), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + +mulNeon_10x1_64_store: + // Store 1 outputs + VST1.P [V0.D2, V1.D2], 32(R14) + VST1.P [V2.D2, V3.D2], 32(R14) + + // Prepare for next loop + SUBS $1, R0 + BNE mulNeon_10x1_64_loop + +mulNeon_10x1_64_end: + RET + +// func mulNeon_10x1_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x1_64Xor(SB), $0-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 46 YMM used + MOVD n+80(FP), R0 + LSR $6, R0 + TST R0, R0 + BEQ mulNeon_10x1_64Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD (R14), R14 + MOVD start+72(FP), R15 + + // Add start offset to output + ADD R15, R14 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + MOVD $15, R15 + VMOV R15, V4.B[0] + VDUP V4.B[0], V4.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x1_64Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load 1 outputs + VLD1.P 32(R14), [V0.B16, V1.B16] + VLD1.P 32(R14), [V2.B16, V3.B16] + + // Load and process 64 bytes from input 0 to 1 outputs + VLD1.P 32(R1), [V12.B16, V13.B16] + VLD1.P 32(R1), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 1 to 1 outputs + VLD1.P 32(R4), [V12.B16, V13.B16] + VLD1.P 32(R4), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 2 to 1 outputs + VLD1.P 32(R5), [V12.B16, V13.B16] + VLD1.P 32(R5), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 3 to 1 outputs + VLD1.P 32(R8), [V12.B16, V13.B16] + VLD1.P 32(R8), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 4 to 1 outputs + VLD1.P 32(R9), [V12.B16, V13.B16] + VLD1.P 32(R9), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 5 to 1 outputs + VLD1.P 32(R10), [V12.B16, V13.B16] + VLD1.P 32(R10), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 6 to 1 outputs + VLD1.P 32(R11), [V12.B16, V13.B16] + VLD1.P 32(R11), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 7 to 1 outputs + VLD1.P 32(R12), [V12.B16, V13.B16] + VLD1.P 32(R12), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 8 to 1 outputs + VLD1.P 32(R13), [V12.B16, V13.B16] + VLD1.P 32(R13), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x1_64Xor_store + + // Load and process 64 bytes from input 9 to 1 outputs + VLD1.P 32(R3), [V12.B16, V13.B16] + VLD1.P 32(R3), [V10.B16, V11.B16] + VUSHR $4, V12.B16, V14.B16 + VUSHR $4, V13.B16, V15.B16 + VUSHR $4, V10.B16, V16.B16 + VUSHR $4, V11.B16, V17.B16 + VAND V4.B16, V12.B16, V12.B16 + VAND V4.B16, V13.B16, V13.B16 + VAND V4.B16, V10.B16, V10.B16 + VAND V4.B16, V11.B16, V11.B16 + VAND V4.B16, V14.B16, V14.B16 + VAND V4.B16, V15.B16, V15.B16 + VAND V4.B16, V16.B16, V16.B16 + VAND V4.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V6.B16, V7.B16] + VLD1.P 32(R2), [V8.B16, V9.B16] + VTBL V10.B16, [V6.B16], V10.B16 + VTBL V11.B16, [V7.B16], V11.B16 + VTBL V12.B16, [V6.B16], V6.B16 + VTBL V13.B16, [V7.B16], V7.B16 + VTBL V16.B16, [V8.B16], V12.B16 + VTBL V17.B16, [V9.B16], V13.B16 + VTBL V14.B16, [V8.B16], V8.B16 + VTBL V15.B16, [V9.B16], V9.B16 + VEOR V6.B16, V0.B16, V0.B16 + VEOR V7.B16, V1.B16, V1.B16 + VEOR V8.B16, V0.B16, V0.B16 + VEOR V9.B16, V1.B16, V1.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + +mulNeon_10x1_64Xor_store: + // Store 1 outputs + SUB $64, R14 + VST1.P [V0.D2, V1.D2], 32(R14) + VST1.P [V2.D2, V3.D2], 32(R14) + + // Prepare for next loop + SUBS $1, R0 + BNE mulNeon_10x1_64Xor_loop + +mulNeon_10x1_64Xor_end: + RET + +// func mulNeon_10x2_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x2_64(SB), $8-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 89 YMM used + MOVD n+80(FP), R0 + LSR $6, R0 + TST R0, R0 + BEQ mulNeon_10x2_64_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD (R14), R15 + MOVD 24(R14), R14 + MOVD start+72(FP), R6 + + // Add start offset to output + ADD R6, R15 + ADD R6, R14 + + // Add start offset to input + ADD R6, R1 + ADD R6, R4 + ADD R6, R5 + ADD R6, R8 + ADD R6, R9 + ADD R6, R10 + ADD R6, R11 + ADD R6, R12 + ADD R6, R13 + ADD R6, R3 + MOVD $15, R6 + VMOV R6, V8.B[0] + VDUP V8.B[0], V8.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x2_64_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 64 bytes from input 0 to 2 outputs + VLD1.P 32(R1), [V18.B16, V19.B16] + VLD1.P 32(R1), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V12.B16, V0.B16 + VEOR V11.B16, V13.B16, V1.B16 + VEOR V14.B16, V16.B16, V2.B16 + VEOR V15.B16, V17.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V12.B16, V4.B16 + VEOR V11.B16, V13.B16, V5.B16 + VEOR V14.B16, V16.B16, V6.B16 + VEOR V15.B16, V17.B16, V7.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 1 to 2 outputs + VLD1.P 32(R4), [V18.B16, V19.B16] + VLD1.P 32(R4), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 2 to 2 outputs + VLD1.P 32(R5), [V18.B16, V19.B16] + VLD1.P 32(R5), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 3 to 2 outputs + VLD1.P 32(R8), [V18.B16, V19.B16] + VLD1.P 32(R8), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 4 to 2 outputs + VLD1.P 32(R9), [V18.B16, V19.B16] + VLD1.P 32(R9), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 5 to 2 outputs + VLD1.P 32(R10), [V18.B16, V19.B16] + VLD1.P 32(R10), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 6 to 2 outputs + VLD1.P 32(R11), [V18.B16, V19.B16] + VLD1.P 32(R11), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 7 to 2 outputs + VLD1.P 32(R12), [V18.B16, V19.B16] + VLD1.P 32(R12), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 8 to 2 outputs + VLD1.P 32(R13), [V18.B16, V19.B16] + VLD1.P 32(R13), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x2_64_store + + // Load and process 64 bytes from input 9 to 2 outputs + VLD1.P 32(R3), [V18.B16, V19.B16] + VLD1.P 32(R3), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + +mulNeon_10x2_64_store: + // Store 2 outputs + VST1.P [V0.D2, V1.D2], 32(R15) + VST1.P [V2.D2, V3.D2], 32(R15) + VST1.P [V4.D2, V5.D2], 32(R14) + VST1.P [V6.D2, V7.D2], 32(R14) + + // Prepare for next loop + SUBS $1, R0 + BNE mulNeon_10x2_64_loop + +mulNeon_10x2_64_end: + RET + +// func mulNeon_10x2_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x2_64Xor(SB), $8-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 89 YMM used + MOVD n+80(FP), R0 + LSR $6, R0 + TST R0, R0 + BEQ mulNeon_10x2_64Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD (R14), R15 + MOVD 24(R14), R14 + MOVD start+72(FP), R6 + + // Add start offset to output + ADD R6, R15 + ADD R6, R14 + + // Add start offset to input + ADD R6, R1 + ADD R6, R4 + ADD R6, R5 + ADD R6, R8 + ADD R6, R9 + ADD R6, R10 + ADD R6, R11 + ADD R6, R12 + ADD R6, R13 + ADD R6, R3 + MOVD $15, R6 + VMOV R6, V8.B[0] + VDUP V8.B[0], V8.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x2_64Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load 2 outputs + VLD1.P 32(R15), [V0.B16, V1.B16] + VLD1.P 32(R15), [V2.B16, V3.B16] + VLD1.P 32(R14), [V4.B16, V5.B16] + VLD1.P 32(R14), [V6.B16, V7.B16] + + // Load and process 64 bytes from input 0 to 2 outputs + VLD1.P 32(R1), [V18.B16, V19.B16] + VLD1.P 32(R1), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 1 to 2 outputs + VLD1.P 32(R4), [V18.B16, V19.B16] + VLD1.P 32(R4), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 2 to 2 outputs + VLD1.P 32(R5), [V18.B16, V19.B16] + VLD1.P 32(R5), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 3 to 2 outputs + VLD1.P 32(R8), [V18.B16, V19.B16] + VLD1.P 32(R8), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 4 to 2 outputs + VLD1.P 32(R9), [V18.B16, V19.B16] + VLD1.P 32(R9), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 5 to 2 outputs + VLD1.P 32(R10), [V18.B16, V19.B16] + VLD1.P 32(R10), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 6 to 2 outputs + VLD1.P 32(R11), [V18.B16, V19.B16] + VLD1.P 32(R11), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 7 to 2 outputs + VLD1.P 32(R12), [V18.B16, V19.B16] + VLD1.P 32(R12), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 8 to 2 outputs + VLD1.P 32(R13), [V18.B16, V19.B16] + VLD1.P 32(R13), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x2_64Xor_store + + // Load and process 64 bytes from input 9 to 2 outputs + VLD1.P 32(R3), [V18.B16, V19.B16] + VLD1.P 32(R3), [V22.B16, V23.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V8.B16, V18.B16, V18.B16 + VAND V8.B16, V19.B16, V19.B16 + VAND V8.B16, V22.B16, V22.B16 + VAND V8.B16, V23.B16, V23.B16 + VAND V8.B16, V20.B16, V20.B16 + VAND V8.B16, V21.B16, V21.B16 + VAND V8.B16, V24.B16, V24.B16 + VAND V8.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V22.B16, [V10.B16], V14.B16 + VTBL V23.B16, [V11.B16], V15.B16 + VTBL V18.B16, [V10.B16], V10.B16 + VTBL V19.B16, [V11.B16], V11.B16 + VTBL V24.B16, [V12.B16], V16.B16 + VTBL V25.B16, [V13.B16], V17.B16 + VTBL V20.B16, [V12.B16], V12.B16 + VTBL V21.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + +mulNeon_10x2_64Xor_store: + // Store 2 outputs + SUB $64, R15 + VST1.P [V0.D2, V1.D2], 32(R15) + VST1.P [V2.D2, V3.D2], 32(R15) + SUB $64, R14 + VST1.P [V4.D2, V5.D2], 32(R14) + VST1.P [V6.D2, V7.D2], 32(R14) + + // Prepare for next loop + SUBS $1, R0 + BNE mulNeon_10x2_64Xor_loop + +mulNeon_10x2_64Xor_end: + RET + +// func mulNeon_10x3_64(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x3_64(SB), $8-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 130 YMM used + MOVD n+80(FP), R0 + LSR $6, R0 + TST R0, R0 + BEQ mulNeon_10x3_64_end + MOVD in_base+24(FP), R0 + MOVD (R0), R3 + MOVD 24(R0), R1 + MOVD 48(R0), R4 + MOVD 72(R0), R5 + MOVD 96(R0), R8 + MOVD 120(R0), R9 + MOVD 144(R0), R10 + MOVD 168(R0), R11 + MOVD 192(R0), R12 + MOVD 216(R0), R0 + MOVD out_base+48(FP), R13 + MOVD (R13), R14 + MOVD 24(R13), R15 + MOVD 48(R13), R13 + MOVD start+72(FP), R6 + + // Add start offset to output + ADD R6, R14 + ADD R6, R15 + ADD R6, R13 + + // Add start offset to input + ADD R6, R3 + ADD R6, R1 + ADD R6, R4 + ADD R6, R5 + ADD R6, R8 + ADD R6, R9 + ADD R6, R10 + ADD R6, R11 + ADD R6, R12 + ADD R6, R0 + MOVD $15, R6 + VMOV R6, V12.B[0] + VDUP V12.B[0], V12.B16 + + // Reload length to save a register + MOVD n+80(FP), R6 + LSR $6, R6 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x3_64_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 64 bytes from input 0 to 3 outputs + VLD1.P 32(R3), [V22.B16, V23.B16] + VLD1.P 32(R3), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V0.B16 + VEOR V15.B16, V17.B16, V1.B16 + VEOR V18.B16, V20.B16, V2.B16 + VEOR V19.B16, V21.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V4.B16 + VEOR V15.B16, V17.B16, V5.B16 + VEOR V18.B16, V20.B16, V6.B16 + VEOR V19.B16, V21.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V8.B16 + VEOR V15.B16, V17.B16, V9.B16 + VEOR V18.B16, V20.B16, V10.B16 + VEOR V19.B16, V21.B16, V11.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 1 to 3 outputs + VLD1.P 32(R1), [V22.B16, V23.B16] + VLD1.P 32(R1), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 2 to 3 outputs + VLD1.P 32(R4), [V22.B16, V23.B16] + VLD1.P 32(R4), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 3 to 3 outputs + VLD1.P 32(R5), [V22.B16, V23.B16] + VLD1.P 32(R5), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 4 to 3 outputs + VLD1.P 32(R8), [V22.B16, V23.B16] + VLD1.P 32(R8), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 5 to 3 outputs + VLD1.P 32(R9), [V22.B16, V23.B16] + VLD1.P 32(R9), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 6 to 3 outputs + VLD1.P 32(R10), [V22.B16, V23.B16] + VLD1.P 32(R10), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 7 to 3 outputs + VLD1.P 32(R11), [V22.B16, V23.B16] + VLD1.P 32(R11), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 8 to 3 outputs + VLD1.P 32(R12), [V22.B16, V23.B16] + VLD1.P 32(R12), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x3_64_store + + // Load and process 64 bytes from input 9 to 3 outputs + VLD1.P 32(R0), [V22.B16, V23.B16] + VLD1.P 32(R0), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + +mulNeon_10x3_64_store: + // Store 3 outputs + VST1.P [V0.D2, V1.D2], 32(R14) + VST1.P [V2.D2, V3.D2], 32(R14) + VST1.P [V4.D2, V5.D2], 32(R15) + VST1.P [V6.D2, V7.D2], 32(R15) + VST1.P [V8.D2, V9.D2], 32(R13) + VST1.P [V10.D2, V11.D2], 32(R13) + + // Prepare for next loop + SUBS $1, R6 + BNE mulNeon_10x3_64_loop + +mulNeon_10x3_64_end: + RET + +// func mulNeon_10x3_64Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x3_64Xor(SB), $8-88 + // Loading no tables to registers + // Destination kept in GP registers + // Full registers estimated 130 YMM used + MOVD n+80(FP), R0 + LSR $6, R0 + TST R0, R0 + BEQ mulNeon_10x3_64Xor_end + MOVD in_base+24(FP), R0 + MOVD (R0), R3 + MOVD 24(R0), R1 + MOVD 48(R0), R4 + MOVD 72(R0), R5 + MOVD 96(R0), R8 + MOVD 120(R0), R9 + MOVD 144(R0), R10 + MOVD 168(R0), R11 + MOVD 192(R0), R12 + MOVD 216(R0), R0 + MOVD out_base+48(FP), R13 + MOVD (R13), R14 + MOVD 24(R13), R15 + MOVD 48(R13), R13 + MOVD start+72(FP), R6 + + // Add start offset to output + ADD R6, R14 + ADD R6, R15 + ADD R6, R13 + + // Add start offset to input + ADD R6, R3 + ADD R6, R1 + ADD R6, R4 + ADD R6, R5 + ADD R6, R8 + ADD R6, R9 + ADD R6, R10 + ADD R6, R11 + ADD R6, R12 + ADD R6, R0 + MOVD $15, R6 + VMOV R6, V12.B[0] + VDUP V12.B[0], V12.B16 + + // Reload length to save a register + MOVD n+80(FP), R6 + LSR $6, R6 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x3_64Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load 3 outputs + VLD1.P 32(R14), [V0.B16, V1.B16] + VLD1.P 32(R14), [V2.B16, V3.B16] + VLD1.P 32(R15), [V4.B16, V5.B16] + VLD1.P 32(R15), [V6.B16, V7.B16] + VLD1.P 32(R13), [V8.B16, V9.B16] + VLD1.P 32(R13), [V10.B16, V11.B16] + + // Load and process 64 bytes from input 0 to 3 outputs + VLD1.P 32(R3), [V22.B16, V23.B16] + VLD1.P 32(R3), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 1 to 3 outputs + VLD1.P 32(R1), [V22.B16, V23.B16] + VLD1.P 32(R1), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 2 to 3 outputs + VLD1.P 32(R4), [V22.B16, V23.B16] + VLD1.P 32(R4), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 3 to 3 outputs + VLD1.P 32(R5), [V22.B16, V23.B16] + VLD1.P 32(R5), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 4 to 3 outputs + VLD1.P 32(R8), [V22.B16, V23.B16] + VLD1.P 32(R8), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 5 to 3 outputs + VLD1.P 32(R9), [V22.B16, V23.B16] + VLD1.P 32(R9), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 6 to 3 outputs + VLD1.P 32(R10), [V22.B16, V23.B16] + VLD1.P 32(R10), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 7 to 3 outputs + VLD1.P 32(R11), [V22.B16, V23.B16] + VLD1.P 32(R11), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 8 to 3 outputs + VLD1.P 32(R12), [V22.B16, V23.B16] + VLD1.P 32(R12), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x3_64Xor_store + + // Load and process 64 bytes from input 9 to 3 outputs + VLD1.P 32(R0), [V22.B16, V23.B16] + VLD1.P 32(R0), [V26.B16, V27.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V12.B16, V22.B16, V22.B16 + VAND V12.B16, V23.B16, V23.B16 + VAND V12.B16, V26.B16, V26.B16 + VAND V12.B16, V27.B16, V27.B16 + VAND V12.B16, V24.B16, V24.B16 + VAND V12.B16, V25.B16, V25.B16 + VAND V12.B16, V28.B16, V28.B16 + VAND V12.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V26.B16, [V14.B16], V18.B16 + VTBL V27.B16, [V15.B16], V19.B16 + VTBL V22.B16, [V14.B16], V14.B16 + VTBL V23.B16, [V15.B16], V15.B16 + VTBL V28.B16, [V16.B16], V20.B16 + VTBL V29.B16, [V17.B16], V21.B16 + VTBL V24.B16, [V16.B16], V16.B16 + VTBL V25.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + +mulNeon_10x3_64Xor_store: + // Store 3 outputs + SUB $64, R14 + VST1.P [V0.D2, V1.D2], 32(R14) + VST1.P [V2.D2, V3.D2], 32(R14) + SUB $64, R15 + VST1.P [V4.D2, V5.D2], 32(R15) + VST1.P [V6.D2, V7.D2], 32(R15) + SUB $64, R13 + VST1.P [V8.D2, V9.D2], 32(R13) + VST1.P [V10.D2, V11.D2], 32(R13) + + // Prepare for next loop + SUBS $1, R6 + BNE mulNeon_10x3_64Xor_loop + +mulNeon_10x3_64Xor_end: + RET + +// func mulNeon_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x4(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 89 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x4_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V8.B[0] + VDUP V8.B[0], V8.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x4_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 4 outputs + VLD1.P 32(R1), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V12.B16, V0.B16 + VEOR V11.B16, V13.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V12.B16, V2.B16 + VEOR V11.B16, V13.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V12.B16, V4.B16 + VEOR V11.B16, V13.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V12.B16, V6.B16 + VEOR V11.B16, V13.B16, V7.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 1 to 4 outputs + VLD1.P 32(R4), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 2 to 4 outputs + VLD1.P 32(R5), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 3 to 4 outputs + VLD1.P 32(R8), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 4 to 4 outputs + VLD1.P 32(R9), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 5 to 4 outputs + VLD1.P 32(R10), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 6 to 4 outputs + VLD1.P 32(R11), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 7 to 4 outputs + VLD1.P 32(R12), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 8 to 4 outputs + VLD1.P 32(R13), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x4_store + + // Load and process 32 bytes from input 9 to 4 outputs + VLD1.P 32(R3), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + +mulNeon_10x4_store: + // Store 4 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x4_loop + +mulNeon_10x4_end: + RET + +// func mulNeon_10x4Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x4Xor(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 89 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x4Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V8.B[0] + VDUP V8.B[0], V8.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x4Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 4 outputs + VLD1.P 32(R1), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + MOVD (R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V0.B16, V1.B16] + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + MOVD 24(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V2.B16, V3.B16] + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + MOVD 48(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V4.B16, V5.B16] + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + MOVD 72(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V6.B16, V7.B16] + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 1 to 4 outputs + VLD1.P 32(R4), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 2 to 4 outputs + VLD1.P 32(R5), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 3 to 4 outputs + VLD1.P 32(R8), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 4 to 4 outputs + VLD1.P 32(R9), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 5 to 4 outputs + VLD1.P 32(R10), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 6 to 4 outputs + VLD1.P 32(R11), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 7 to 4 outputs + VLD1.P 32(R12), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 8 to 4 outputs + VLD1.P 32(R13), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x4Xor_store + + // Load and process 32 bytes from input 9 to 4 outputs + VLD1.P 32(R3), [V14.B16, V15.B16] + VUSHR $4, V14.B16, V16.B16 + VUSHR $4, V15.B16, V17.B16 + VAND V8.B16, V14.B16, V14.B16 + VAND V8.B16, V15.B16, V15.B16 + VAND V8.B16, V16.B16, V16.B16 + VAND V8.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V0.B16, V0.B16 + VEOR V11.B16, V1.B16, V1.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V2.B16, V2.B16 + VEOR V11.B16, V3.B16, V3.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V4.B16, V4.B16 + VEOR V11.B16, V5.B16, V5.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V10.B16, V11.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VTBL V14.B16, [V10.B16], V10.B16 + VTBL V15.B16, [V11.B16], V11.B16 + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VEOR V10.B16, V6.B16, V6.B16 + VEOR V11.B16, V7.B16, V7.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + +mulNeon_10x4Xor_store: + // Store 4 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x4Xor_loop + +mulNeon_10x4Xor_end: + RET + +// func mulNeon_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x5(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 110 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x5_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V10.B[0] + VDUP V10.B[0], V10.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x5_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 5 outputs + VLD1.P 32(R1), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V14.B16, V0.B16 + VEOR V13.B16, V15.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V14.B16, V2.B16 + VEOR V13.B16, V15.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V14.B16, V4.B16 + VEOR V13.B16, V15.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V14.B16, V6.B16 + VEOR V13.B16, V15.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V14.B16, V8.B16 + VEOR V13.B16, V15.B16, V9.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 1 to 5 outputs + VLD1.P 32(R4), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 2 to 5 outputs + VLD1.P 32(R5), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 3 to 5 outputs + VLD1.P 32(R8), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 4 to 5 outputs + VLD1.P 32(R9), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 5 to 5 outputs + VLD1.P 32(R10), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 6 to 5 outputs + VLD1.P 32(R11), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 7 to 5 outputs + VLD1.P 32(R12), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 8 to 5 outputs + VLD1.P 32(R13), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x5_store + + // Load and process 32 bytes from input 9 to 5 outputs + VLD1.P 32(R3), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + +mulNeon_10x5_store: + // Store 5 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x5_loop + +mulNeon_10x5_end: + RET + +// func mulNeon_10x5Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x5Xor(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 110 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x5Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V10.B[0] + VDUP V10.B[0], V10.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x5Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 5 outputs + VLD1.P 32(R1), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + MOVD (R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V0.B16, V1.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + MOVD 24(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V2.B16, V3.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + MOVD 48(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V4.B16, V5.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + MOVD 72(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V6.B16, V7.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + MOVD 96(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V8.B16, V9.B16] + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 1 to 5 outputs + VLD1.P 32(R4), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 2 to 5 outputs + VLD1.P 32(R5), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 3 to 5 outputs + VLD1.P 32(R8), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 4 to 5 outputs + VLD1.P 32(R9), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 5 to 5 outputs + VLD1.P 32(R10), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 6 to 5 outputs + VLD1.P 32(R11), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 7 to 5 outputs + VLD1.P 32(R12), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 8 to 5 outputs + VLD1.P 32(R13), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x5Xor_store + + // Load and process 32 bytes from input 9 to 5 outputs + VLD1.P 32(R3), [V16.B16, V17.B16] + VUSHR $4, V16.B16, V18.B16 + VUSHR $4, V17.B16, V19.B16 + VAND V10.B16, V16.B16, V16.B16 + VAND V10.B16, V17.B16, V17.B16 + VAND V10.B16, V18.B16, V18.B16 + VAND V10.B16, V19.B16, V19.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V0.B16, V0.B16 + VEOR V13.B16, V1.B16, V1.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V2.B16, V2.B16 + VEOR V13.B16, V3.B16, V3.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V4.B16, V4.B16 + VEOR V13.B16, V5.B16, V5.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V6.B16, V6.B16 + VEOR V13.B16, V7.B16, V7.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V12.B16, V13.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VTBL V16.B16, [V12.B16], V12.B16 + VTBL V17.B16, [V13.B16], V13.B16 + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VEOR V12.B16, V8.B16, V8.B16 + VEOR V13.B16, V9.B16, V9.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + +mulNeon_10x5Xor_store: + // Store 5 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x5Xor_loop + +mulNeon_10x5Xor_end: + RET + +// func mulNeon_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x6(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 131 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x6_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V12.B[0] + VDUP V12.B[0], V12.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x6_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 6 outputs + VLD1.P 32(R1), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V0.B16 + VEOR V15.B16, V17.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V2.B16 + VEOR V15.B16, V17.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V4.B16 + VEOR V15.B16, V17.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V6.B16 + VEOR V15.B16, V17.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V8.B16 + VEOR V15.B16, V17.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V16.B16, V10.B16 + VEOR V15.B16, V17.B16, V11.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 1 to 6 outputs + VLD1.P 32(R4), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 2 to 6 outputs + VLD1.P 32(R5), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 3 to 6 outputs + VLD1.P 32(R8), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 4 to 6 outputs + VLD1.P 32(R9), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 5 to 6 outputs + VLD1.P 32(R10), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 6 to 6 outputs + VLD1.P 32(R11), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 7 to 6 outputs + VLD1.P 32(R12), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 8 to 6 outputs + VLD1.P 32(R13), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x6_store + + // Load and process 32 bytes from input 9 to 6 outputs + VLD1.P 32(R3), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + +mulNeon_10x6_store: + // Store 6 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x6_loop + +mulNeon_10x6_end: + RET + +// func mulNeon_10x6Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x6Xor(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 131 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x6Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V12.B[0] + VDUP V12.B[0], V12.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x6Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 6 outputs + VLD1.P 32(R1), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + MOVD (R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V0.B16, V1.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + MOVD 24(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V2.B16, V3.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + MOVD 48(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V4.B16, V5.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + MOVD 72(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V6.B16, V7.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + MOVD 96(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V8.B16, V9.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + MOVD 120(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V10.B16, V11.B16] + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 1 to 6 outputs + VLD1.P 32(R4), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 2 to 6 outputs + VLD1.P 32(R5), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 3 to 6 outputs + VLD1.P 32(R8), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 4 to 6 outputs + VLD1.P 32(R9), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 5 to 6 outputs + VLD1.P 32(R10), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 6 to 6 outputs + VLD1.P 32(R11), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 7 to 6 outputs + VLD1.P 32(R12), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 8 to 6 outputs + VLD1.P 32(R13), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x6Xor_store + + // Load and process 32 bytes from input 9 to 6 outputs + VLD1.P 32(R3), [V18.B16, V19.B16] + VUSHR $4, V18.B16, V20.B16 + VUSHR $4, V19.B16, V21.B16 + VAND V12.B16, V18.B16, V18.B16 + VAND V12.B16, V19.B16, V19.B16 + VAND V12.B16, V20.B16, V20.B16 + VAND V12.B16, V21.B16, V21.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V0.B16, V0.B16 + VEOR V15.B16, V1.B16, V1.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V2.B16, V2.B16 + VEOR V15.B16, V3.B16, V3.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V4.B16, V4.B16 + VEOR V15.B16, V5.B16, V5.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V6.B16, V6.B16 + VEOR V15.B16, V7.B16, V7.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V8.B16, V8.B16 + VEOR V15.B16, V9.B16, V9.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V14.B16, V15.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VTBL V18.B16, [V14.B16], V14.B16 + VTBL V19.B16, [V15.B16], V15.B16 + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VEOR V14.B16, V10.B16, V10.B16 + VEOR V15.B16, V11.B16, V11.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + +mulNeon_10x6Xor_store: + // Store 6 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x6Xor_loop + +mulNeon_10x6Xor_end: + RET + +// func mulNeon_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x7(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 152 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x7_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V14.B[0] + VDUP V14.B[0], V14.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x7_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 7 outputs + VLD1.P 32(R1), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V18.B16, V0.B16 + VEOR V17.B16, V19.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V18.B16, V2.B16 + VEOR V17.B16, V19.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V18.B16, V4.B16 + VEOR V17.B16, V19.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V18.B16, V6.B16 + VEOR V17.B16, V19.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V18.B16, V8.B16 + VEOR V17.B16, V19.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V18.B16, V10.B16 + VEOR V17.B16, V19.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V18.B16, V12.B16 + VEOR V17.B16, V19.B16, V13.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 1 to 7 outputs + VLD1.P 32(R4), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 2 to 7 outputs + VLD1.P 32(R5), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 3 to 7 outputs + VLD1.P 32(R8), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 4 to 7 outputs + VLD1.P 32(R9), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 5 to 7 outputs + VLD1.P 32(R10), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 6 to 7 outputs + VLD1.P 32(R11), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 7 to 7 outputs + VLD1.P 32(R12), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 8 to 7 outputs + VLD1.P 32(R13), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x7_store + + // Load and process 32 bytes from input 9 to 7 outputs + VLD1.P 32(R3), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + +mulNeon_10x7_store: + // Store 7 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + MOVD 144(R14), R6 + ADD R15<<3, R6 + VST1 [V12.D2, V13.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x7_loop + +mulNeon_10x7_end: + RET + +// func mulNeon_10x7Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x7Xor(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 152 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x7Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V14.B[0] + VDUP V14.B[0], V14.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x7Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 7 outputs + VLD1.P 32(R1), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + MOVD (R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V0.B16, V1.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + MOVD 24(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V2.B16, V3.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + MOVD 48(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V4.B16, V5.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + MOVD 72(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V6.B16, V7.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + MOVD 96(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V8.B16, V9.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + MOVD 120(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V10.B16, V11.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + MOVD 144(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V12.B16, V13.B16] + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 1 to 7 outputs + VLD1.P 32(R4), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 2 to 7 outputs + VLD1.P 32(R5), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 3 to 7 outputs + VLD1.P 32(R8), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 4 to 7 outputs + VLD1.P 32(R9), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 5 to 7 outputs + VLD1.P 32(R10), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 6 to 7 outputs + VLD1.P 32(R11), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 7 to 7 outputs + VLD1.P 32(R12), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 8 to 7 outputs + VLD1.P 32(R13), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x7Xor_store + + // Load and process 32 bytes from input 9 to 7 outputs + VLD1.P 32(R3), [V20.B16, V21.B16] + VUSHR $4, V20.B16, V22.B16 + VUSHR $4, V21.B16, V23.B16 + VAND V14.B16, V20.B16, V20.B16 + VAND V14.B16, V21.B16, V21.B16 + VAND V14.B16, V22.B16, V22.B16 + VAND V14.B16, V23.B16, V23.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V0.B16, V0.B16 + VEOR V17.B16, V1.B16, V1.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V2.B16, V2.B16 + VEOR V17.B16, V3.B16, V3.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V4.B16, V4.B16 + VEOR V17.B16, V5.B16, V5.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V6.B16, V6.B16 + VEOR V17.B16, V7.B16, V7.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V8.B16, V8.B16 + VEOR V17.B16, V9.B16, V9.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V10.B16, V10.B16 + VEOR V17.B16, V11.B16, V11.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V16.B16, V17.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VTBL V20.B16, [V16.B16], V16.B16 + VTBL V21.B16, [V17.B16], V17.B16 + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VEOR V16.B16, V12.B16, V12.B16 + VEOR V17.B16, V13.B16, V13.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + +mulNeon_10x7Xor_store: + // Store 7 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + MOVD 144(R14), R6 + ADD R15<<3, R6 + VST1 [V12.D2, V13.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x7Xor_loop + +mulNeon_10x7Xor_end: + RET + +// func mulNeon_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x8(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 173 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x8_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V16.B[0] + VDUP V16.B[0], V16.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x8_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 8 outputs + VLD1.P 32(R1), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V20.B16, V0.B16 + VEOR V19.B16, V21.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V20.B16, V2.B16 + VEOR V19.B16, V21.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V20.B16, V4.B16 + VEOR V19.B16, V21.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V20.B16, V6.B16 + VEOR V19.B16, V21.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V20.B16, V8.B16 + VEOR V19.B16, V21.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V20.B16, V10.B16 + VEOR V19.B16, V21.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V20.B16, V12.B16 + VEOR V19.B16, V21.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V20.B16, V14.B16 + VEOR V19.B16, V21.B16, V15.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 1 to 8 outputs + VLD1.P 32(R4), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 2 to 8 outputs + VLD1.P 32(R5), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 3 to 8 outputs + VLD1.P 32(R8), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 4 to 8 outputs + VLD1.P 32(R9), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 5 to 8 outputs + VLD1.P 32(R10), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 6 to 8 outputs + VLD1.P 32(R11), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 7 to 8 outputs + VLD1.P 32(R12), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 8 to 8 outputs + VLD1.P 32(R13), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x8_store + + // Load and process 32 bytes from input 9 to 8 outputs + VLD1.P 32(R3), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + +mulNeon_10x8_store: + // Store 8 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + MOVD 144(R14), R6 + ADD R15<<3, R6 + VST1 [V12.D2, V13.D2], (R6) + MOVD 168(R14), R6 + ADD R15<<3, R6 + VST1 [V14.D2, V15.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x8_loop + +mulNeon_10x8_end: + RET + +// func mulNeon_10x8Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x8Xor(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 173 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x8Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V16.B[0] + VDUP V16.B[0], V16.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x8Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 8 outputs + VLD1.P 32(R1), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + MOVD (R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V0.B16, V1.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + MOVD 24(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V2.B16, V3.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + MOVD 48(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V4.B16, V5.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + MOVD 72(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V6.B16, V7.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + MOVD 96(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V8.B16, V9.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + MOVD 120(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V10.B16, V11.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + MOVD 144(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V12.B16, V13.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + MOVD 168(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V14.B16, V15.B16] + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 1 to 8 outputs + VLD1.P 32(R4), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 2 to 8 outputs + VLD1.P 32(R5), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 3 to 8 outputs + VLD1.P 32(R8), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 4 to 8 outputs + VLD1.P 32(R9), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 5 to 8 outputs + VLD1.P 32(R10), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 6 to 8 outputs + VLD1.P 32(R11), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 7 to 8 outputs + VLD1.P 32(R12), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 8 to 8 outputs + VLD1.P 32(R13), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x8Xor_store + + // Load and process 32 bytes from input 9 to 8 outputs + VLD1.P 32(R3), [V22.B16, V23.B16] + VUSHR $4, V22.B16, V24.B16 + VUSHR $4, V23.B16, V25.B16 + VAND V16.B16, V22.B16, V22.B16 + VAND V16.B16, V23.B16, V23.B16 + VAND V16.B16, V24.B16, V24.B16 + VAND V16.B16, V25.B16, V25.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V0.B16, V0.B16 + VEOR V19.B16, V1.B16, V1.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V2.B16, V2.B16 + VEOR V19.B16, V3.B16, V3.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V4.B16, V4.B16 + VEOR V19.B16, V5.B16, V5.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V6.B16, V6.B16 + VEOR V19.B16, V7.B16, V7.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V8.B16, V8.B16 + VEOR V19.B16, V9.B16, V9.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V10.B16, V10.B16 + VEOR V19.B16, V11.B16, V11.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V12.B16, V12.B16 + VEOR V19.B16, V13.B16, V13.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V18.B16, V19.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VTBL V22.B16, [V18.B16], V18.B16 + VTBL V23.B16, [V19.B16], V19.B16 + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VEOR V18.B16, V14.B16, V14.B16 + VEOR V19.B16, V15.B16, V15.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + +mulNeon_10x8Xor_store: + // Store 8 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + MOVD 144(R14), R6 + ADD R15<<3, R6 + VST1 [V12.D2, V13.D2], (R6) + MOVD 168(R14), R6 + ADD R15<<3, R6 + VST1 [V14.D2, V15.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x8Xor_loop + +mulNeon_10x8Xor_end: + RET + +// func mulNeon_10x9(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x9(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 194 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x9_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V18.B[0] + VDUP V18.B[0], V18.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x9_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 9 outputs + VLD1.P 32(R1), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V0.B16 + VEOR V21.B16, V23.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V2.B16 + VEOR V21.B16, V23.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V4.B16 + VEOR V21.B16, V23.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V6.B16 + VEOR V21.B16, V23.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V8.B16 + VEOR V21.B16, V23.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V10.B16 + VEOR V21.B16, V23.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V12.B16 + VEOR V21.B16, V23.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V14.B16 + VEOR V21.B16, V23.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V22.B16, V16.B16 + VEOR V21.B16, V23.B16, V17.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 1 to 9 outputs + VLD1.P 32(R4), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 2 to 9 outputs + VLD1.P 32(R5), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 3 to 9 outputs + VLD1.P 32(R8), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 4 to 9 outputs + VLD1.P 32(R9), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 5 to 9 outputs + VLD1.P 32(R10), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 6 to 9 outputs + VLD1.P 32(R11), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 7 to 9 outputs + VLD1.P 32(R12), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 8 to 9 outputs + VLD1.P 32(R13), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x9_store + + // Load and process 32 bytes from input 9 to 9 outputs + VLD1.P 32(R3), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + +mulNeon_10x9_store: + // Store 9 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + MOVD 144(R14), R6 + ADD R15<<3, R6 + VST1 [V12.D2, V13.D2], (R6) + MOVD 168(R14), R6 + ADD R15<<3, R6 + VST1 [V14.D2, V15.D2], (R6) + MOVD 192(R14), R6 + ADD R15<<3, R6 + VST1 [V16.D2, V17.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x9_loop + +mulNeon_10x9_end: + RET + +// func mulNeon_10x9Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x9Xor(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 194 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x9Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V18.B[0] + VDUP V18.B[0], V18.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x9Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 9 outputs + VLD1.P 32(R1), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + MOVD (R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V0.B16, V1.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + MOVD 24(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V2.B16, V3.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + MOVD 48(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V4.B16, V5.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + MOVD 72(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V6.B16, V7.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + MOVD 96(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V8.B16, V9.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + MOVD 120(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V10.B16, V11.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + MOVD 144(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V12.B16, V13.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + MOVD 168(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V14.B16, V15.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + MOVD 192(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V16.B16, V17.B16] + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 1 to 9 outputs + VLD1.P 32(R4), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 2 to 9 outputs + VLD1.P 32(R5), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 3 to 9 outputs + VLD1.P 32(R8), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 4 to 9 outputs + VLD1.P 32(R9), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 5 to 9 outputs + VLD1.P 32(R10), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 6 to 9 outputs + VLD1.P 32(R11), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 7 to 9 outputs + VLD1.P 32(R12), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 8 to 9 outputs + VLD1.P 32(R13), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x9Xor_store + + // Load and process 32 bytes from input 9 to 9 outputs + VLD1.P 32(R3), [V24.B16, V25.B16] + VUSHR $4, V24.B16, V26.B16 + VUSHR $4, V25.B16, V27.B16 + VAND V18.B16, V24.B16, V24.B16 + VAND V18.B16, V25.B16, V25.B16 + VAND V18.B16, V26.B16, V26.B16 + VAND V18.B16, V27.B16, V27.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V0.B16, V0.B16 + VEOR V21.B16, V1.B16, V1.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V2.B16, V2.B16 + VEOR V21.B16, V3.B16, V3.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V6.B16, V6.B16 + VEOR V21.B16, V7.B16, V7.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V8.B16, V8.B16 + VEOR V21.B16, V9.B16, V9.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V10.B16, V10.B16 + VEOR V21.B16, V11.B16, V11.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V12.B16, V12.B16 + VEOR V21.B16, V13.B16, V13.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V14.B16, V14.B16 + VEOR V21.B16, V15.B16, V15.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V20.B16, V21.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VTBL V24.B16, [V20.B16], V20.B16 + VTBL V25.B16, [V21.B16], V21.B16 + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VEOR V20.B16, V16.B16, V16.B16 + VEOR V21.B16, V17.B16, V17.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + +mulNeon_10x9Xor_store: + // Store 9 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + MOVD 144(R14), R6 + ADD R15<<3, R6 + VST1 [V12.D2, V13.D2], (R6) + MOVD 168(R14), R6 + ADD R15<<3, R6 + VST1 [V14.D2, V15.D2], (R6) + MOVD 192(R14), R6 + ADD R15<<3, R6 + VST1 [V16.D2, V17.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x9Xor_loop + +mulNeon_10x9Xor_end: + RET + +// func mulNeon_10x10(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x10(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 215 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x10_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V20.B[0] + VDUP V20.B[0], V20.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x10_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 10 outputs + VLD1.P 32(R1), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V0.B16 + VEOR V23.B16, V25.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V2.B16 + VEOR V23.B16, V25.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V4.B16 + VEOR V23.B16, V25.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V6.B16 + VEOR V23.B16, V25.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V8.B16 + VEOR V23.B16, V25.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V10.B16 + VEOR V23.B16, V25.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V12.B16 + VEOR V23.B16, V25.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V14.B16 + VEOR V23.B16, V25.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V16.B16 + VEOR V23.B16, V25.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V24.B16, V18.B16 + VEOR V23.B16, V25.B16, V19.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 1 to 10 outputs + VLD1.P 32(R4), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 2 to 10 outputs + VLD1.P 32(R5), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 3 to 10 outputs + VLD1.P 32(R8), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 4 to 10 outputs + VLD1.P 32(R9), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 5 to 10 outputs + VLD1.P 32(R10), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 6 to 10 outputs + VLD1.P 32(R11), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 7 to 10 outputs + VLD1.P 32(R12), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 8 to 10 outputs + VLD1.P 32(R13), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x10_store + + // Load and process 32 bytes from input 9 to 10 outputs + VLD1.P 32(R3), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + +mulNeon_10x10_store: + // Store 10 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + MOVD 144(R14), R6 + ADD R15<<3, R6 + VST1 [V12.D2, V13.D2], (R6) + MOVD 168(R14), R6 + ADD R15<<3, R6 + VST1 [V14.D2, V15.D2], (R6) + MOVD 192(R14), R6 + ADD R15<<3, R6 + VST1 [V16.D2, V17.D2], (R6) + MOVD 216(R14), R6 + ADD R15<<3, R6 + VST1 [V18.D2, V19.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x10_loop + +mulNeon_10x10_end: + RET + +// func mulNeon_10x10Xor(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: NEON +TEXT ·mulNeon_10x10Xor(SB), NOSPLIT, $8-88 + // Loading no tables to registers + // Destination kept on stack + // Full registers estimated 215 YMM used + MOVD n+80(FP), R0 + LSR $5, R0 + TST R0, R0 + BEQ mulNeon_10x10Xor_end + MOVD in_base+24(FP), R3 + MOVD (R3), R1 + MOVD 24(R3), R4 + MOVD 48(R3), R5 + MOVD 72(R3), R8 + MOVD 96(R3), R9 + MOVD 120(R3), R10 + MOVD 144(R3), R11 + MOVD 168(R3), R12 + MOVD 192(R3), R13 + MOVD 216(R3), R3 + MOVD out_base+48(FP), R14 + MOVD start+72(FP), R15 + + // Add start offset to input + ADD R15, R1 + ADD R15, R4 + ADD R15, R5 + ADD R15, R8 + ADD R15, R9 + ADD R15, R10 + ADD R15, R11 + ADD R15, R12 + ADD R15, R13 + ADD R15, R3 + LSR $3, R15 + MOVD $15, R6 + VMOV R6, V20.B[0] + VDUP V20.B[0], V20.B16 + + // Load number of input shards + MOVD in_len+32(FP), R16 + +mulNeon_10x10Xor_loop: + MOVD matrix_base+0(FP), R2 + // Load and process 32 bytes from input 0 to 10 outputs + VLD1.P 32(R1), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + MOVD (R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V0.B16, V1.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + MOVD 24(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V2.B16, V3.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + MOVD 48(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V4.B16, V5.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + MOVD 72(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V6.B16, V7.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + MOVD 96(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V8.B16, V9.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + MOVD 120(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V10.B16, V11.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + MOVD 144(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V12.B16, V13.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + MOVD 168(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V14.B16, V15.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + MOVD 192(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V16.B16, V17.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + MOVD 216(R14), R6 + ADD R15<<3, R6 + VLD1 (R6), [V18.B16, V19.B16] + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $1, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 1 to 10 outputs + VLD1.P 32(R4), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $2, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 2 to 10 outputs + VLD1.P 32(R5), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $3, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 3 to 10 outputs + VLD1.P 32(R8), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $4, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 4 to 10 outputs + VLD1.P 32(R9), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $5, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 5 to 10 outputs + VLD1.P 32(R10), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $6, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 6 to 10 outputs + VLD1.P 32(R11), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $7, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 7 to 10 outputs + VLD1.P 32(R12), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $8, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 8 to 10 outputs + VLD1.P 32(R13), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + // Check for early termination + CMP $9, R16 + BEQ mulNeon_10x10Xor_store + + // Load and process 32 bytes from input 9 to 10 outputs + VLD1.P 32(R3), [V26.B16, V27.B16] + VUSHR $4, V26.B16, V28.B16 + VUSHR $4, V27.B16, V29.B16 + VAND V20.B16, V26.B16, V26.B16 + VAND V20.B16, V27.B16, V27.B16 + VAND V20.B16, V28.B16, V28.B16 + VAND V20.B16, V29.B16, V29.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V0.B16, V0.B16 + VEOR V23.B16, V1.B16, V1.B16 + VEOR V24.B16, V0.B16, V0.B16 + VEOR V25.B16, V1.B16, V1.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V2.B16, V2.B16 + VEOR V23.B16, V3.B16, V3.B16 + VEOR V24.B16, V2.B16, V2.B16 + VEOR V25.B16, V3.B16, V3.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V4.B16, V4.B16 + VEOR V23.B16, V5.B16, V5.B16 + VEOR V24.B16, V4.B16, V4.B16 + VEOR V25.B16, V5.B16, V5.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V6.B16, V6.B16 + VEOR V23.B16, V7.B16, V7.B16 + VEOR V24.B16, V6.B16, V6.B16 + VEOR V25.B16, V7.B16, V7.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V8.B16, V8.B16 + VEOR V23.B16, V9.B16, V9.B16 + VEOR V24.B16, V8.B16, V8.B16 + VEOR V25.B16, V9.B16, V9.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V10.B16, V10.B16 + VEOR V23.B16, V11.B16, V11.B16 + VEOR V24.B16, V10.B16, V10.B16 + VEOR V25.B16, V11.B16, V11.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V12.B16, V12.B16 + VEOR V23.B16, V13.B16, V13.B16 + VEOR V24.B16, V12.B16, V12.B16 + VEOR V25.B16, V13.B16, V13.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V14.B16, V14.B16 + VEOR V23.B16, V15.B16, V15.B16 + VEOR V24.B16, V14.B16, V14.B16 + VEOR V25.B16, V15.B16, V15.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V16.B16, V16.B16 + VEOR V23.B16, V17.B16, V17.B16 + VEOR V24.B16, V16.B16, V16.B16 + VEOR V25.B16, V17.B16, V17.B16 + VLD1.P 32(R2), [V22.B16, V23.B16] + VLD1.P 32(R2), [V24.B16, V25.B16] + VTBL V26.B16, [V22.B16], V22.B16 + VTBL V27.B16, [V23.B16], V23.B16 + VTBL V28.B16, [V24.B16], V24.B16 + VTBL V29.B16, [V25.B16], V25.B16 + VEOR V22.B16, V18.B16, V18.B16 + VEOR V23.B16, V19.B16, V19.B16 + VEOR V24.B16, V18.B16, V18.B16 + VEOR V25.B16, V19.B16, V19.B16 + +mulNeon_10x10Xor_store: + // Store 10 outputs + MOVD (R14), R6 + ADD R15<<3, R6 + VST1 [V0.D2, V1.D2], (R6) + MOVD 24(R14), R6 + ADD R15<<3, R6 + VST1 [V2.D2, V3.D2], (R6) + MOVD 48(R14), R6 + ADD R15<<3, R6 + VST1 [V4.D2, V5.D2], (R6) + MOVD 72(R14), R6 + ADD R15<<3, R6 + VST1 [V6.D2, V7.D2], (R6) + MOVD 96(R14), R6 + ADD R15<<3, R6 + VST1 [V8.D2, V9.D2], (R6) + MOVD 120(R14), R6 + ADD R15<<3, R6 + VST1 [V10.D2, V11.D2], (R6) + MOVD 144(R14), R6 + ADD R15<<3, R6 + VST1 [V12.D2, V13.D2], (R6) + MOVD 168(R14), R6 + ADD R15<<3, R6 + VST1 [V14.D2, V15.D2], (R6) + MOVD 192(R14), R6 + ADD R15<<3, R6 + VST1 [V16.D2, V17.D2], (R6) + MOVD 216(R14), R6 + ADD R15<<3, R6 + VST1 [V18.D2, V19.D2], (R6) + + // Prepare for next loop + ADD $4, R15 + SUBS $1, R0 + BNE mulNeon_10x10Xor_loop + +mulNeon_10x10Xor_end: + RET + diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go index 11929e68..3e258986 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go @@ -1,26 +1,19 @@ -//go:build !amd64 || noasm || appengine || gccgo || nogen -// +build !amd64 noasm appengine gccgo nogen +//go:build !(amd64 || arm64) || noasm || appengine || gccgo || nogen package reedsolomon -const maxAvx2Inputs = 1 -const maxAvx2Outputs = 1 -const minAvx2Size = 1 -const avxSizeMask = 0 -const avx2CodeGen = false +const ( + codeGen = false + codeGenMaxGoroutines = 8 + codeGenMaxInputs = 1 + codeGenMaxOutputs = 1 + minCodeGenSize = 1 +) -func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { - panic("codegen not available") +func (r *reedSolomon) hasCodeGen(int, int, int) (_, _ *func(matrix []byte, in, out [][]byte, start, stop int) int, ok bool) { + return nil, nil, false } -func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { - panic("codegen not available") -} - -func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { - panic("codegen not available") -} - -func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { - panic("codegen not available") +func (r *reedSolomon) canGFNI(int, int, int) (_, _ *func(matrix []uint64, in, out [][]byte, start, stop int) int, ok bool) { + return nil, nil, false } diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go new file mode 100644 index 00000000..298bf504 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go @@ -0,0 +1,2264 @@ +// Code generated by command: go run gen.go -out ../galois_gen_nopshufb_amd64.s -stubs ../galois_gen_nopshufb_amd64.go -pkg=reedsolomon. DO NOT EDIT. + +//go:build !appengine && !noasm && !nogen && nopshufb && gc + +package reedsolomon + +func _dummy_() + +//go:noescape +func sSE2XorSlice(in []byte, out []byte) + +//go:noescape +func sSE2XorSlice_64(in []byte, out []byte) + +//go:noescape +func avx2XorSlice_64(in []byte, out []byte) + +// mulGFNI_1x1_64 takes 1 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x1 takes 1 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x1_64Xor takes 1 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_1x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x1Xor takes 1 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_1x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x2_64 takes 1 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x2 takes 1 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x2_64Xor takes 1 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_1x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x2Xor takes 1 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_1x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x3_64 takes 1 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x3 takes 1 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x3_64Xor takes 1 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_1x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x3Xor takes 1 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_1x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x4_64 takes 1 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x4 takes 1 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x4_64Xor takes 1 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_1x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x4Xor takes 1 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_1x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x5_64 takes 1 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x5 takes 1 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x5_64Xor takes 1 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_1x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x5Xor takes 1 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_1x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x6_64 takes 1 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x6 takes 1 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x6_64Xor takes 1 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_1x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x6Xor takes 1 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_1x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x7_64 takes 1 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x7 takes 1 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x7_64Xor takes 1 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_1x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x7Xor takes 1 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_1x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x8_64 takes 1 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x8 takes 1 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x8_64Xor takes 1 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_1x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x8Xor takes 1 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_1x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x9_64 takes 1 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x9 takes 1 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x9_64Xor takes 1 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_1x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x9Xor takes 1 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_1x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x10_64 takes 1 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_1x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x10 takes 1 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_1x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_1x10_64Xor takes 1 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_1x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_1x10Xor takes 1 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_1x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x1_64 takes 2 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x1 takes 2 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x1_64Xor takes 2 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_2x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x1Xor takes 2 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_2x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x2_64 takes 2 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x2 takes 2 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x2_64Xor takes 2 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_2x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x2Xor takes 2 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_2x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x3_64 takes 2 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x3 takes 2 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x3_64Xor takes 2 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_2x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x3Xor takes 2 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_2x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x4_64 takes 2 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x4 takes 2 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x4_64Xor takes 2 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_2x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x4Xor takes 2 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_2x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x5_64 takes 2 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x5 takes 2 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x5_64Xor takes 2 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_2x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x5Xor takes 2 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_2x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x6_64 takes 2 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x6 takes 2 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x6_64Xor takes 2 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_2x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x6Xor takes 2 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_2x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x7_64 takes 2 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x7 takes 2 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x7_64Xor takes 2 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_2x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x7Xor takes 2 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_2x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x8_64 takes 2 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x8 takes 2 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x8_64Xor takes 2 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_2x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x8Xor takes 2 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_2x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x9_64 takes 2 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x9 takes 2 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x9_64Xor takes 2 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_2x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x9Xor takes 2 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_2x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x10_64 takes 2 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_2x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x10 takes 2 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_2x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_2x10_64Xor takes 2 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_2x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_2x10Xor takes 2 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_2x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x1_64 takes 3 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x1 takes 3 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x1_64Xor takes 3 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_3x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x1Xor takes 3 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_3x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x2_64 takes 3 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x2 takes 3 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x2_64Xor takes 3 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_3x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x2Xor takes 3 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_3x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x3_64 takes 3 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x3 takes 3 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x3_64Xor takes 3 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_3x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x3Xor takes 3 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_3x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x4_64 takes 3 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x4 takes 3 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x4_64Xor takes 3 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_3x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x4Xor takes 3 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_3x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x5_64 takes 3 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x5 takes 3 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x5_64Xor takes 3 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_3x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x5Xor takes 3 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_3x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x6_64 takes 3 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x6 takes 3 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x6_64Xor takes 3 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_3x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x6Xor takes 3 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_3x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x7_64 takes 3 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x7 takes 3 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x7_64Xor takes 3 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_3x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x7Xor takes 3 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_3x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x8_64 takes 3 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x8 takes 3 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x8_64Xor takes 3 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_3x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x8Xor takes 3 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_3x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x9_64 takes 3 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x9 takes 3 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x9_64Xor takes 3 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_3x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x9Xor takes 3 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_3x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x10_64 takes 3 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_3x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x10 takes 3 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_3x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_3x10_64Xor takes 3 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_3x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_3x10Xor takes 3 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_3x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x1_64 takes 4 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x1 takes 4 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x1_64Xor takes 4 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_4x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x1Xor takes 4 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_4x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x2_64 takes 4 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x2 takes 4 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x2_64Xor takes 4 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_4x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x2Xor takes 4 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_4x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x3_64 takes 4 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x3 takes 4 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x3_64Xor takes 4 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_4x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x3Xor takes 4 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_4x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x4_64 takes 4 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x4 takes 4 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x4_64Xor takes 4 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_4x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x4Xor takes 4 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_4x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x5_64 takes 4 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x5 takes 4 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x5_64Xor takes 4 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_4x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x5Xor takes 4 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_4x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x6_64 takes 4 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x6 takes 4 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x6_64Xor takes 4 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_4x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x6Xor takes 4 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_4x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x7_64 takes 4 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x7 takes 4 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x7_64Xor takes 4 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_4x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x7Xor takes 4 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_4x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x8_64 takes 4 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x8 takes 4 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x8_64Xor takes 4 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_4x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x8Xor takes 4 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_4x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x9_64 takes 4 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x9 takes 4 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x9_64Xor takes 4 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_4x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x9Xor takes 4 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_4x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x10_64 takes 4 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_4x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x10 takes 4 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_4x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_4x10_64Xor takes 4 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_4x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_4x10Xor takes 4 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_4x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x1_64 takes 5 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x1 takes 5 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x1_64Xor takes 5 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_5x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x1Xor takes 5 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_5x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x2_64 takes 5 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x2 takes 5 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x2_64Xor takes 5 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_5x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x2Xor takes 5 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_5x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x3_64 takes 5 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x3 takes 5 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x3_64Xor takes 5 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_5x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x3Xor takes 5 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_5x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x4_64 takes 5 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x4 takes 5 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x4_64Xor takes 5 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_5x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x4Xor takes 5 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_5x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x5_64 takes 5 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x5 takes 5 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x5_64Xor takes 5 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_5x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x5Xor takes 5 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_5x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x6_64 takes 5 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x6 takes 5 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x6_64Xor takes 5 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_5x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x6Xor takes 5 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_5x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x7_64 takes 5 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x7 takes 5 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x7_64Xor takes 5 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_5x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x7Xor takes 5 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_5x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x8_64 takes 5 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x8 takes 5 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x8_64Xor takes 5 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_5x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x8Xor takes 5 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_5x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x9_64 takes 5 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x9 takes 5 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x9_64Xor takes 5 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_5x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x9Xor takes 5 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_5x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x10_64 takes 5 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_5x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x10 takes 5 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_5x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_5x10_64Xor takes 5 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_5x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_5x10Xor takes 5 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_5x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x1_64 takes 6 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x1 takes 6 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x1_64Xor takes 6 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_6x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x1Xor takes 6 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_6x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x2_64 takes 6 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x2 takes 6 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x2_64Xor takes 6 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_6x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x2Xor takes 6 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_6x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x3_64 takes 6 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x3 takes 6 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x3_64Xor takes 6 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_6x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x3Xor takes 6 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_6x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x4_64 takes 6 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x4 takes 6 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x4_64Xor takes 6 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_6x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x4Xor takes 6 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_6x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x5_64 takes 6 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x5 takes 6 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x5_64Xor takes 6 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_6x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x5Xor takes 6 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_6x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x6_64 takes 6 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x6 takes 6 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x6_64Xor takes 6 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_6x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x6Xor takes 6 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_6x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x7_64 takes 6 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x7 takes 6 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x7_64Xor takes 6 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_6x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x7Xor takes 6 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_6x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x8_64 takes 6 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x8 takes 6 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x8_64Xor takes 6 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_6x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x8Xor takes 6 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_6x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x9_64 takes 6 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x9 takes 6 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x9_64Xor takes 6 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_6x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x9Xor takes 6 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_6x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x10_64 takes 6 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_6x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x10 takes 6 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_6x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_6x10_64Xor takes 6 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_6x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_6x10Xor takes 6 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_6x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x1_64 takes 7 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x1 takes 7 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x1_64Xor takes 7 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_7x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x1Xor takes 7 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_7x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x2_64 takes 7 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x2 takes 7 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x2_64Xor takes 7 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_7x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x2Xor takes 7 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_7x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x3_64 takes 7 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x3 takes 7 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x3_64Xor takes 7 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_7x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x3Xor takes 7 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_7x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x4_64 takes 7 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x4 takes 7 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x4_64Xor takes 7 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_7x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x4Xor takes 7 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_7x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x5_64 takes 7 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x5 takes 7 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x5_64Xor takes 7 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_7x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x5Xor takes 7 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_7x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x6_64 takes 7 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x6 takes 7 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x6_64Xor takes 7 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_7x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x6Xor takes 7 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_7x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x7_64 takes 7 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x7 takes 7 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x7_64Xor takes 7 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_7x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x7Xor takes 7 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_7x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x8_64 takes 7 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x8 takes 7 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x8_64Xor takes 7 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_7x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x8Xor takes 7 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_7x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x9_64 takes 7 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x9 takes 7 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x9_64Xor takes 7 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_7x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x9Xor takes 7 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_7x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x10_64 takes 7 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_7x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x10 takes 7 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_7x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_7x10_64Xor takes 7 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_7x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_7x10Xor takes 7 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_7x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x1_64 takes 8 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x1 takes 8 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x1_64Xor takes 8 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_8x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x1Xor takes 8 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_8x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x2_64 takes 8 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x2 takes 8 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x2_64Xor takes 8 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_8x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x2Xor takes 8 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_8x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x3_64 takes 8 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x3 takes 8 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x3_64Xor takes 8 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_8x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x3Xor takes 8 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_8x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x4_64 takes 8 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x4 takes 8 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x4_64Xor takes 8 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_8x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x4Xor takes 8 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_8x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x5_64 takes 8 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x5 takes 8 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x5_64Xor takes 8 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_8x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x5Xor takes 8 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_8x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x6_64 takes 8 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x6 takes 8 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x6_64Xor takes 8 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_8x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x6Xor takes 8 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_8x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x7_64 takes 8 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x7 takes 8 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x7_64Xor takes 8 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_8x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x7Xor takes 8 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_8x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x8_64 takes 8 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x8 takes 8 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x8_64Xor takes 8 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_8x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x8Xor takes 8 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_8x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x9_64 takes 8 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x9 takes 8 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x9_64Xor takes 8 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_8x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x9Xor takes 8 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_8x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x10_64 takes 8 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_8x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x10 takes 8 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_8x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_8x10_64Xor takes 8 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_8x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_8x10Xor takes 8 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_8x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x1_64 takes 9 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x1 takes 9 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x1_64Xor takes 9 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_9x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x1Xor takes 9 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_9x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x2_64 takes 9 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x2 takes 9 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x2_64Xor takes 9 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_9x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x2Xor takes 9 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_9x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x3_64 takes 9 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x3 takes 9 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x3_64Xor takes 9 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_9x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x3Xor takes 9 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_9x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x4_64 takes 9 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x4 takes 9 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x4_64Xor takes 9 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_9x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x4Xor takes 9 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_9x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x5_64 takes 9 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x5 takes 9 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x5_64Xor takes 9 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_9x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x5Xor takes 9 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_9x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x6_64 takes 9 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x6 takes 9 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x6_64Xor takes 9 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_9x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x6Xor takes 9 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_9x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x7_64 takes 9 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x7 takes 9 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x7_64Xor takes 9 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_9x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x7Xor takes 9 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_9x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x8_64 takes 9 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x8 takes 9 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x8_64Xor takes 9 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_9x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x8Xor takes 9 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_9x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x9_64 takes 9 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x9 takes 9 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x9_64Xor takes 9 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_9x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x9Xor takes 9 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_9x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x10_64 takes 9 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_9x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x10 takes 9 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_9x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_9x10_64Xor takes 9 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_9x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_9x10Xor takes 9 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_9x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x1_64 takes 10 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x1 takes 10 inputs and produces 1 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x1_64Xor takes 10 inputs and produces 1 outputs. +// +//go:noescape +func mulGFNI_10x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x1Xor takes 10 inputs and produces 1 outputs. +// +//go:noescape +func mulAvxGFNI_10x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x2_64 takes 10 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x2 takes 10 inputs and produces 2 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x2_64Xor takes 10 inputs and produces 2 outputs. +// +//go:noescape +func mulGFNI_10x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x2Xor takes 10 inputs and produces 2 outputs. +// +//go:noescape +func mulAvxGFNI_10x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x3_64 takes 10 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x3 takes 10 inputs and produces 3 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x3_64Xor takes 10 inputs and produces 3 outputs. +// +//go:noescape +func mulGFNI_10x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x3Xor takes 10 inputs and produces 3 outputs. +// +//go:noescape +func mulAvxGFNI_10x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x4_64 takes 10 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x4 takes 10 inputs and produces 4 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x4_64Xor takes 10 inputs and produces 4 outputs. +// +//go:noescape +func mulGFNI_10x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x4Xor takes 10 inputs and produces 4 outputs. +// +//go:noescape +func mulAvxGFNI_10x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x5_64 takes 10 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x5 takes 10 inputs and produces 5 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x5_64Xor takes 10 inputs and produces 5 outputs. +// +//go:noescape +func mulGFNI_10x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x5Xor takes 10 inputs and produces 5 outputs. +// +//go:noescape +func mulAvxGFNI_10x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x6_64 takes 10 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x6 takes 10 inputs and produces 6 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x6_64Xor takes 10 inputs and produces 6 outputs. +// +//go:noescape +func mulGFNI_10x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x6Xor takes 10 inputs and produces 6 outputs. +// +//go:noescape +func mulAvxGFNI_10x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x7_64 takes 10 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x7 takes 10 inputs and produces 7 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x7_64Xor takes 10 inputs and produces 7 outputs. +// +//go:noescape +func mulGFNI_10x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x7Xor takes 10 inputs and produces 7 outputs. +// +//go:noescape +func mulAvxGFNI_10x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x8_64 takes 10 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x8 takes 10 inputs and produces 8 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x8_64Xor takes 10 inputs and produces 8 outputs. +// +//go:noescape +func mulGFNI_10x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x8Xor takes 10 inputs and produces 8 outputs. +// +//go:noescape +func mulAvxGFNI_10x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x9_64 takes 10 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x9 takes 10 inputs and produces 9 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x9_64Xor takes 10 inputs and produces 9 outputs. +// +//go:noescape +func mulGFNI_10x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x9Xor takes 10 inputs and produces 9 outputs. +// +//go:noescape +func mulAvxGFNI_10x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x10_64 takes 10 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulGFNI_10x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x10 takes 10 inputs and produces 10 outputs. +// The output is initialized to 0. +// +//go:noescape +func mulAvxGFNI_10x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulGFNI_10x10_64Xor takes 10 inputs and produces 10 outputs. +// +//go:noescape +func mulGFNI_10x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +// mulAvxGFNI_10x10Xor takes 10 inputs and produces 10 outputs. +// +//go:noescape +func mulAvxGFNI_10x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) + +//go:noescape +func ifftDIT48_gfni_0(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func fftDIT48_gfni_0(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func ifftDIT48_gfni_1(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func fftDIT48_gfni_1(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func ifftDIT48_gfni_2(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func fftDIT48_gfni_2(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func ifftDIT48_gfni_3(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func fftDIT48_gfni_3(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func ifftDIT48_gfni_4(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func fftDIT48_gfni_4(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func ifftDIT48_gfni_5(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func fftDIT48_gfni_5(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func ifftDIT48_gfni_6(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func fftDIT48_gfni_6(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func ifftDIT48_gfni_7(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) + +//go:noescape +func fftDIT48_gfni_7(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s new file mode 100644 index 00000000..5782759c --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s @@ -0,0 +1,67987 @@ +// Code generated by command: go run gen.go -out ../galois_gen_nopshufb_amd64.s -stubs ../galois_gen_nopshufb_amd64.go -pkg=reedsolomon. DO NOT EDIT. + +//go:build !appengine && !noasm && !nogen && nopshufb && gc + +#include "textflag.h" + +// func _dummy_() +TEXT ·_dummy_(SB), $0 +#ifdef GOAMD64_v4 +#define XOR3WAY(ignore, a, b, dst) \ + VPTERNLOGD $0x96, a, b, dst + +#else +#define XOR3WAY(ignore, a, b, dst) \ + VPXOR a, dst, dst \ + VPXOR b, dst, dst + +#endif + RET + +// sSE2XorSlice will XOR in with out and store in out. +// Processes 16 bytes/loop. + +// func sSE2XorSlice(in []byte, out []byte) +// Requires: SSE2 +TEXT ·sSE2XorSlice(SB), $0-48 + MOVQ in_base+0(FP), AX + MOVQ out_base+24(FP), CX + MOVQ in_len+8(FP), DX + SHRQ $0x04, DX + JZ end + +loop: + MOVOU (AX), X0 + MOVOU (CX), X1 + PXOR X0, X1 + MOVOU X1, (CX) + ADDQ $0x10, AX + ADDQ $0x10, CX + DECQ DX + JNZ loop + +end: + RET + +// sSE2XorSlice_64 will XOR in with out and store in out. +// Processes 64 bytes/loop. + +// func sSE2XorSlice_64(in []byte, out []byte) +// Requires: SSE2 +TEXT ·sSE2XorSlice_64(SB), $0-48 + MOVQ in_base+0(FP), AX + MOVQ out_base+24(FP), CX + MOVQ in_len+8(FP), DX + SHRQ $0x06, DX + JZ end + +loop: + MOVOU (AX), X0 + MOVOU 16(AX), X2 + MOVOU 32(AX), X4 + MOVOU 48(AX), X6 + MOVOU (CX), X1 + MOVOU 16(CX), X3 + MOVOU 32(CX), X5 + MOVOU 48(CX), X7 + PXOR X0, X1 + PXOR X2, X3 + PXOR X4, X5 + PXOR X6, X7 + MOVOU X1, (CX) + MOVOU X3, 16(CX) + MOVOU X5, 32(CX) + MOVOU X7, 48(CX) + ADDQ $0x40, AX + ADDQ $0x40, CX + DECQ DX + JNZ loop + +end: + RET + +// avx2XorSlice_64 will XOR in with out and store in out. +// Processes 64 bytes/loop. + +// func avx2XorSlice_64(in []byte, out []byte) +// Requires: AVX, AVX2 +TEXT ·avx2XorSlice_64(SB), $0-48 + MOVQ in_base+0(FP), AX + MOVQ out_base+24(FP), CX + MOVQ in_len+8(FP), DX + SHRQ $0x06, DX + JZ end + +loop: + VMOVDQU (AX), Y0 + VMOVDQU 32(AX), Y2 + VMOVDQU (CX), Y1 + VMOVDQU 32(CX), Y3 + VPXOR Y0, Y1, Y1 + VPXOR Y2, Y3, Y3 + VMOVDQU Y1, (CX) + VMOVDQU Y3, 32(CX) + ADDQ $0x40, AX + ADDQ $0x40, CX + DECQ DX + JNZ loop + +end: + VZEROUPPER + RET + +// func mulGFNI_1x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 4 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x1_64_end + VBROADCASTF32X2 (CX), Z0 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ start+72(FP), BX + + // Add start offset to output + ADDQ BX, DX + + // Add start offset to input + ADDQ BX, CX + +mulGFNI_1x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (CX), Z1 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z1, Z1 + + // Store 1 outputs + VMOVDQU64 Z1, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x1_64_loop + VZEROUPPER + +mulGFNI_1x1_64_end: + RET + +// func mulAvxGFNI_1x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 4 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x1_end + VBROADCASTSD (CX), Y0 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ start+72(FP), BX + + // Add start offset to output + ADDQ BX, DX + + // Add start offset to input + ADDQ BX, CX + +mulAvxGFNI_1x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (CX), Y1 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y1, Y1 + + // Store 1 outputs + VMOVDQU Y1, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x1_loop + VZEROUPPER + +mulAvxGFNI_1x1_end: + RET + +// func mulGFNI_1x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 4 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ start+72(FP), BX + + // Add start offset to output + ADDQ BX, DX + + // Add start offset to input + ADDQ BX, CX + +mulGFNI_1x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (DX), Z1 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (CX), Z2 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z2, Z2 + VXORPD Z1, Z2, Z1 + + // Store 1 outputs + VMOVDQU64 Z1, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x1_64Xor_loop + VZEROUPPER + +mulGFNI_1x1_64Xor_end: + RET + +// func mulAvxGFNI_1x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 4 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x1Xor_end + VBROADCASTSD (CX), Y0 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ start+72(FP), BX + + // Add start offset to output + ADDQ BX, DX + + // Add start offset to input + ADDQ BX, CX + +mulAvxGFNI_1x1Xor_loop: + // Load 1 outputs + VMOVDQU (DX), Y1 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (CX), Y2 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y2, Y2 + VXORPD Y1, Y2, Y1 + + // Store 1 outputs + VMOVDQU Y1, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x1Xor_loop + VZEROUPPER + +mulAvxGFNI_1x1Xor_end: + RET + +// func mulGFNI_1x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + ADDQ SI, DX + + // Add start offset to input + ADDQ SI, CX + +mulGFNI_1x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (CX), Z3 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z3, Z2 + VGF2P8AFFINEQB $0x00, Z1, Z3, Z3 + + // Store 2 outputs + VMOVDQU64 Z2, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z3, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x2_64_loop + VZEROUPPER + +mulGFNI_1x2_64_end: + RET + +// func mulAvxGFNI_1x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + ADDQ SI, DX + + // Add start offset to input + ADDQ SI, CX + +mulAvxGFNI_1x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (CX), Y3 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y3, Y2 + VGF2P8AFFINEQB $0x00, Y1, Y3, Y3 + + // Store 2 outputs + VMOVDQU Y2, (BX) + ADDQ $0x20, BX + VMOVDQU Y3, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x2_loop + VZEROUPPER + +mulAvxGFNI_1x2_end: + RET + +// func mulGFNI_1x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + ADDQ SI, DX + + // Add start offset to input + ADDQ SI, CX + +mulGFNI_1x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (BX), Z2 + VMOVDQU64 (DX), Z3 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (CX), Z4 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z4, Z5 + VXORPD Z2, Z5, Z2 + VGF2P8AFFINEQB $0x00, Z1, Z4, Z5 + VXORPD Z3, Z5, Z3 + + // Store 2 outputs + VMOVDQU64 Z2, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z3, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x2_64Xor_loop + VZEROUPPER + +mulGFNI_1x2_64Xor_end: + RET + +// func mulAvxGFNI_1x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x2Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + ADDQ SI, DX + + // Add start offset to input + ADDQ SI, CX + +mulAvxGFNI_1x2Xor_loop: + // Load 2 outputs + VMOVDQU (BX), Y2 + VMOVDQU (DX), Y3 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (CX), Y4 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y4, Y5 + VXORPD Y2, Y5, Y2 + VGF2P8AFFINEQB $0x00, Y1, Y4, Y5 + VXORPD Y3, Y5, Y3 + + // Store 2 outputs + VMOVDQU Y2, (BX) + ADDQ $0x20, BX + VMOVDQU Y3, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x2Xor_loop + VZEROUPPER + +mulAvxGFNI_1x2Xor_end: + RET + +// func mulGFNI_1x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, BX + ADDQ DI, SI + ADDQ DI, DX + + // Add start offset to input + ADDQ DI, CX + +mulGFNI_1x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (CX), Z5 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z5, Z3 + VGF2P8AFFINEQB $0x00, Z1, Z5, Z4 + VGF2P8AFFINEQB $0x00, Z2, Z5, Z5 + + // Store 3 outputs + VMOVDQU64 Z3, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z4, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z5, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x3_64_loop + VZEROUPPER + +mulGFNI_1x3_64_end: + RET + +// func mulAvxGFNI_1x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x3(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, BX + ADDQ DI, SI + ADDQ DI, DX + + // Add start offset to input + ADDQ DI, CX + +mulAvxGFNI_1x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (CX), Y5 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y5, Y3 + VGF2P8AFFINEQB $0x00, Y1, Y5, Y4 + VGF2P8AFFINEQB $0x00, Y2, Y5, Y5 + + // Store 3 outputs + VMOVDQU Y3, (BX) + ADDQ $0x20, BX + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x3_loop + VZEROUPPER + +mulAvxGFNI_1x3_end: + RET + +// func mulGFNI_1x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, BX + ADDQ DI, SI + ADDQ DI, DX + + // Add start offset to input + ADDQ DI, CX + +mulGFNI_1x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (BX), Z3 + VMOVDQU64 (SI), Z4 + VMOVDQU64 (DX), Z5 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (CX), Z6 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z6, Z7 + VXORPD Z3, Z7, Z3 + VGF2P8AFFINEQB $0x00, Z1, Z6, Z7 + VXORPD Z4, Z7, Z4 + VGF2P8AFFINEQB $0x00, Z2, Z6, Z7 + VXORPD Z5, Z7, Z5 + + // Store 3 outputs + VMOVDQU64 Z3, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z4, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z5, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x3_64Xor_loop + VZEROUPPER + +mulGFNI_1x3_64Xor_end: + RET + +// func mulAvxGFNI_1x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x3Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, BX + ADDQ DI, SI + ADDQ DI, DX + + // Add start offset to input + ADDQ DI, CX + +mulAvxGFNI_1x3Xor_loop: + // Load 3 outputs + VMOVDQU (BX), Y3 + VMOVDQU (SI), Y4 + VMOVDQU (DX), Y5 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y7 + VXORPD Y3, Y7, Y3 + VGF2P8AFFINEQB $0x00, Y1, Y6, Y7 + VXORPD Y4, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y2, Y6, Y7 + VXORPD Y5, Y7, Y5 + + // Store 3 outputs + VMOVDQU Y3, (BX) + ADDQ $0x20, BX + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x3Xor_loop + VZEROUPPER + +mulAvxGFNI_1x3Xor_end: + RET + +// func mulGFNI_1x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x4_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, DX + + // Add start offset to input + ADDQ R8, CX + +mulGFNI_1x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (CX), Z7 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z7, Z4 + VGF2P8AFFINEQB $0x00, Z1, Z7, Z5 + VGF2P8AFFINEQB $0x00, Z2, Z7, Z6 + VGF2P8AFFINEQB $0x00, Z3, Z7, Z7 + + // Store 4 outputs + VMOVDQU64 Z4, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z5, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z6, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z7, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x4_64_loop + VZEROUPPER + +mulGFNI_1x4_64_end: + RET + +// func mulAvxGFNI_1x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x4(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, DX + + // Add start offset to input + ADDQ R8, CX + +mulAvxGFNI_1x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (CX), Y7 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y7, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y7, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y7, Y7 + + // Store 4 outputs + VMOVDQU Y4, (BX) + ADDQ $0x20, BX + VMOVDQU Y5, (SI) + ADDQ $0x20, SI + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x4_loop + VZEROUPPER + +mulAvxGFNI_1x4_end: + RET + +// func mulGFNI_1x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x4_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, DX + + // Add start offset to input + ADDQ R8, CX + +mulGFNI_1x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (BX), Z4 + VMOVDQU64 (SI), Z5 + VMOVDQU64 (DI), Z6 + VMOVDQU64 (DX), Z7 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (CX), Z8 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z8, Z9 + VXORPD Z4, Z9, Z4 + VGF2P8AFFINEQB $0x00, Z1, Z8, Z9 + VXORPD Z5, Z9, Z5 + VGF2P8AFFINEQB $0x00, Z2, Z8, Z9 + VXORPD Z6, Z9, Z6 + VGF2P8AFFINEQB $0x00, Z3, Z8, Z9 + VXORPD Z7, Z9, Z7 + + // Store 4 outputs + VMOVDQU64 Z4, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z5, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z6, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z7, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x4_64Xor_loop + VZEROUPPER + +mulGFNI_1x4_64Xor_end: + RET + +// func mulAvxGFNI_1x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x4Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, DX + + // Add start offset to input + ADDQ R8, CX + +mulAvxGFNI_1x4Xor_loop: + // Load 4 outputs + VMOVDQU (BX), Y4 + VMOVDQU (SI), Y5 + VMOVDQU (DI), Y6 + VMOVDQU (DX), Y7 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y9 + VXORPD Y4, Y9, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y8, Y9 + VXORPD Y5, Y9, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Store 4 outputs + VMOVDQU Y4, (BX) + ADDQ $0x20, BX + VMOVDQU Y5, (SI) + ADDQ $0x20, SI + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x4Xor_loop + VZEROUPPER + +mulAvxGFNI_1x4Xor_end: + RET + +// func mulGFNI_1x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x5_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, DX + + // Add start offset to input + ADDQ R9, CX + +mulGFNI_1x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (CX), Z9 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z9, Z5 + VGF2P8AFFINEQB $0x00, Z1, Z9, Z6 + VGF2P8AFFINEQB $0x00, Z2, Z9, Z7 + VGF2P8AFFINEQB $0x00, Z3, Z9, Z8 + VGF2P8AFFINEQB $0x00, Z4, Z9, Z9 + + // Store 5 outputs + VMOVDQU64 Z5, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z6, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z7, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z8, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z9, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x5_64_loop + VZEROUPPER + +mulGFNI_1x5_64_end: + RET + +// func mulAvxGFNI_1x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x5(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, DX + + // Add start offset to input + ADDQ R9, CX + +mulAvxGFNI_1x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y9, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y9, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y9 + + // Store 5 outputs + VMOVDQU Y5, (BX) + ADDQ $0x20, BX + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x5_loop + VZEROUPPER + +mulAvxGFNI_1x5_end: + RET + +// func mulGFNI_1x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x5_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, DX + + // Add start offset to input + ADDQ R9, CX + +mulGFNI_1x5_64Xor_loop: + // Load 5 outputs + VMOVDQU64 (BX), Z5 + VMOVDQU64 (SI), Z6 + VMOVDQU64 (DI), Z7 + VMOVDQU64 (R8), Z8 + VMOVDQU64 (DX), Z9 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (CX), Z10 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z10, Z11 + VXORPD Z5, Z11, Z5 + VGF2P8AFFINEQB $0x00, Z1, Z10, Z11 + VXORPD Z6, Z11, Z6 + VGF2P8AFFINEQB $0x00, Z2, Z10, Z11 + VXORPD Z7, Z11, Z7 + VGF2P8AFFINEQB $0x00, Z3, Z10, Z11 + VXORPD Z8, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z4, Z10, Z11 + VXORPD Z9, Z11, Z9 + + // Store 5 outputs + VMOVDQU64 Z5, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z6, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z7, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z8, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z9, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x5_64Xor_loop + VZEROUPPER + +mulGFNI_1x5_64Xor_end: + RET + +// func mulAvxGFNI_1x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x5Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, DX + + // Add start offset to input + ADDQ R9, CX + +mulAvxGFNI_1x5Xor_loop: + // Load 5 outputs + VMOVDQU (BX), Y5 + VMOVDQU (SI), Y6 + VMOVDQU (DI), Y7 + VMOVDQU (R8), Y8 + VMOVDQU (DX), Y9 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y11 + VXORPD Y5, Y11, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y10, Y11 + VXORPD Y6, Y11, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y10, Y11 + VXORPD Y7, Y11, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Store 5 outputs + VMOVDQU Y5, (BX) + ADDQ $0x20, BX + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x5Xor_loop + VZEROUPPER + +mulAvxGFNI_1x5Xor_end: + RET + +// func mulGFNI_1x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x6_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, DX + + // Add start offset to input + ADDQ R10, CX + +mulGFNI_1x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (CX), Z11 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z11, Z6 + VGF2P8AFFINEQB $0x00, Z1, Z11, Z7 + VGF2P8AFFINEQB $0x00, Z2, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z3, Z11, Z9 + VGF2P8AFFINEQB $0x00, Z4, Z11, Z10 + VGF2P8AFFINEQB $0x00, Z5, Z11, Z11 + + // Store 6 outputs + VMOVDQU64 Z6, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z7, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z8, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z9, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z10, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z11, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x6_64_loop + VZEROUPPER + +mulGFNI_1x6_64_end: + RET + +// func mulAvxGFNI_1x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x6(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, DX + + // Add start offset to input + ADDQ R10, CX + +mulAvxGFNI_1x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (CX), Y11 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y11, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y11, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y11, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y11, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y11, Y11 + + // Store 6 outputs + VMOVDQU Y6, (BX) + ADDQ $0x20, BX + VMOVDQU Y7, (SI) + ADDQ $0x20, SI + VMOVDQU Y8, (DI) + ADDQ $0x20, DI + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x6_loop + VZEROUPPER + +mulAvxGFNI_1x6_end: + RET + +// func mulGFNI_1x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x6_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, DX + + // Add start offset to input + ADDQ R10, CX + +mulGFNI_1x6_64Xor_loop: + // Load 6 outputs + VMOVDQU64 (BX), Z6 + VMOVDQU64 (SI), Z7 + VMOVDQU64 (DI), Z8 + VMOVDQU64 (R8), Z9 + VMOVDQU64 (R9), Z10 + VMOVDQU64 (DX), Z11 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (CX), Z12 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z12, Z13 + VXORPD Z6, Z13, Z6 + VGF2P8AFFINEQB $0x00, Z1, Z12, Z13 + VXORPD Z7, Z13, Z7 + VGF2P8AFFINEQB $0x00, Z2, Z12, Z13 + VXORPD Z8, Z13, Z8 + VGF2P8AFFINEQB $0x00, Z3, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z4, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z5, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Store 6 outputs + VMOVDQU64 Z6, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z7, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z8, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z9, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z10, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z11, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x6_64Xor_loop + VZEROUPPER + +mulGFNI_1x6_64Xor_end: + RET + +// func mulAvxGFNI_1x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x6Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, DX + + // Add start offset to input + ADDQ R10, CX + +mulAvxGFNI_1x6Xor_loop: + // Load 6 outputs + VMOVDQU (BX), Y6 + VMOVDQU (SI), Y7 + VMOVDQU (DI), Y8 + VMOVDQU (R8), Y9 + VMOVDQU (R9), Y10 + VMOVDQU (DX), Y11 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y13 + VXORPD Y6, Y13, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y13 + VXORPD Y7, Y13, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y8, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 6 outputs + VMOVDQU Y6, (BX) + ADDQ $0x20, BX + VMOVDQU Y7, (SI) + ADDQ $0x20, SI + VMOVDQU Y8, (DI) + ADDQ $0x20, DI + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x6Xor_loop + VZEROUPPER + +mulAvxGFNI_1x6Xor_end: + RET + +// func mulGFNI_1x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x7_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DX + + // Add start offset to input + ADDQ R11, CX + +mulGFNI_1x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (CX), Z13 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z13, Z7 + VGF2P8AFFINEQB $0x00, Z1, Z13, Z8 + VGF2P8AFFINEQB $0x00, Z2, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z3, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z4, Z13, Z11 + VGF2P8AFFINEQB $0x00, Z5, Z13, Z12 + VGF2P8AFFINEQB $0x00, Z6, Z13, Z13 + + // Store 7 outputs + VMOVDQU64 Z7, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z8, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z9, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z10, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z11, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z12, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z13, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x7_64_loop + VZEROUPPER + +mulGFNI_1x7_64_end: + RET + +// func mulAvxGFNI_1x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x7(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DX + + // Add start offset to input + ADDQ R11, CX + +mulAvxGFNI_1x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (CX), Y13 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y13, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y13, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y13, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y13, Y13 + + // Store 7 outputs + VMOVDQU Y7, (BX) + ADDQ $0x20, BX + VMOVDQU Y8, (SI) + ADDQ $0x20, SI + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x7_loop + VZEROUPPER + +mulAvxGFNI_1x7_end: + RET + +// func mulGFNI_1x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x7_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DX + + // Add start offset to input + ADDQ R11, CX + +mulGFNI_1x7_64Xor_loop: + // Load 7 outputs + VMOVDQU64 (BX), Z7 + VMOVDQU64 (SI), Z8 + VMOVDQU64 (DI), Z9 + VMOVDQU64 (R8), Z10 + VMOVDQU64 (R9), Z11 + VMOVDQU64 (R10), Z12 + VMOVDQU64 (DX), Z13 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (CX), Z14 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z14, Z15 + VXORPD Z7, Z15, Z7 + VGF2P8AFFINEQB $0x00, Z1, Z14, Z15 + VXORPD Z8, Z15, Z8 + VGF2P8AFFINEQB $0x00, Z2, Z14, Z15 + VXORPD Z9, Z15, Z9 + VGF2P8AFFINEQB $0x00, Z3, Z14, Z15 + VXORPD Z10, Z15, Z10 + VGF2P8AFFINEQB $0x00, Z4, Z14, Z15 + VXORPD Z11, Z15, Z11 + VGF2P8AFFINEQB $0x00, Z5, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z6, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Store 7 outputs + VMOVDQU64 Z7, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z8, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z9, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z10, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z11, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z12, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z13, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x7_64Xor_loop + VZEROUPPER + +mulGFNI_1x7_64Xor_end: + RET + +// func mulAvxGFNI_1x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x7Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DX + + // Add start offset to input + ADDQ R11, CX + +mulAvxGFNI_1x7Xor_loop: + // Load 7 outputs + VMOVDQU (BX), Y7 + VMOVDQU (SI), Y8 + VMOVDQU (DI), Y9 + VMOVDQU (R8), Y10 + VMOVDQU (R9), Y11 + VMOVDQU (R10), Y12 + VMOVDQU (DX), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (CX), Y14 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (BX) + ADDQ $0x20, BX + VMOVDQU Y8, (SI) + ADDQ $0x20, SI + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (DX) + ADDQ $0x20, DX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x7Xor_loop + VZEROUPPER + +mulAvxGFNI_1x7Xor_end: + RET + +// func mulGFNI_1x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x8_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, DX + + // Add start offset to input + ADDQ R12, CX + +mulGFNI_1x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (CX), Z15 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z15, Z8 + VGF2P8AFFINEQB $0x00, Z1, Z15, Z9 + VGF2P8AFFINEQB $0x00, Z2, Z15, Z10 + VGF2P8AFFINEQB $0x00, Z3, Z15, Z11 + VGF2P8AFFINEQB $0x00, Z4, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z5, Z15, Z13 + VGF2P8AFFINEQB $0x00, Z6, Z15, Z14 + VGF2P8AFFINEQB $0x00, Z7, Z15, Z15 + + // Store 8 outputs + VMOVDQU64 Z8, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z9, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z10, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z11, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z12, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z13, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z14, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z15, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x8_64_loop + VZEROUPPER + +mulGFNI_1x8_64_end: + RET + +// func mulAvxGFNI_1x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x8(SB), $0-88 + // Loading 6 of 8 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), BX + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, BX + + // Add start offset to input + ADDQ R13, DX + +mulAvxGFNI_1x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (DX), Y13 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y13, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y13, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y13, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y13, Y12 + VBROADCASTSD 56(CX), Y14 + VGF2P8AFFINEQB $0x00, Y14, Y13, Y13 + + // Store 8 outputs + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x8_loop + VZEROUPPER + +mulAvxGFNI_1x8_end: + RET + +// func mulGFNI_1x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x8_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, DX + + // Add start offset to input + ADDQ R12, CX + +mulGFNI_1x8_64Xor_loop: + // Load 8 outputs + VMOVDQU64 (BX), Z8 + VMOVDQU64 (SI), Z9 + VMOVDQU64 (DI), Z10 + VMOVDQU64 (R8), Z11 + VMOVDQU64 (R9), Z12 + VMOVDQU64 (R10), Z13 + VMOVDQU64 (R11), Z14 + VMOVDQU64 (DX), Z15 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (CX), Z16 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z16, Z17 + VXORPD Z8, Z17, Z8 + VGF2P8AFFINEQB $0x00, Z1, Z16, Z17 + VXORPD Z9, Z17, Z9 + VGF2P8AFFINEQB $0x00, Z2, Z16, Z17 + VXORPD Z10, Z17, Z10 + VGF2P8AFFINEQB $0x00, Z3, Z16, Z17 + VXORPD Z11, Z17, Z11 + VGF2P8AFFINEQB $0x00, Z4, Z16, Z17 + VXORPD Z12, Z17, Z12 + VGF2P8AFFINEQB $0x00, Z5, Z16, Z17 + VXORPD Z13, Z17, Z13 + VGF2P8AFFINEQB $0x00, Z6, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z7, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Store 8 outputs + VMOVDQU64 Z8, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z9, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z10, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z11, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z12, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z13, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z14, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z15, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x8_64Xor_loop + VZEROUPPER + +mulGFNI_1x8_64Xor_end: + RET + +// func mulAvxGFNI_1x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x8Xor(SB), $0-88 + // Loading 6 of 8 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), BX + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, BX + + // Add start offset to input + ADDQ R13, DX + +mulAvxGFNI_1x8Xor_loop: + // Load 8 outputs + VMOVDQU (SI), Y6 + VMOVDQU (DI), Y7 + VMOVDQU (R8), Y8 + VMOVDQU (R9), Y9 + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (BX), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x8Xor_loop + VZEROUPPER + +mulAvxGFNI_1x8Xor_end: + RET + +// func mulGFNI_1x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x9_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, DX + + // Add start offset to input + ADDQ R13, CX + +mulGFNI_1x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (CX), Z17 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z17, Z9 + VGF2P8AFFINEQB $0x00, Z1, Z17, Z10 + VGF2P8AFFINEQB $0x00, Z2, Z17, Z11 + VGF2P8AFFINEQB $0x00, Z3, Z17, Z12 + VGF2P8AFFINEQB $0x00, Z4, Z17, Z13 + VGF2P8AFFINEQB $0x00, Z5, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z6, Z17, Z15 + VGF2P8AFFINEQB $0x00, Z7, Z17, Z16 + VGF2P8AFFINEQB $0x00, Z8, Z17, Z17 + + // Store 9 outputs + VMOVDQU64 Z9, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z10, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z11, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z12, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z13, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z14, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z15, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z16, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z17, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x9_64_loop + VZEROUPPER + +mulGFNI_1x9_64_end: + RET + +// func mulAvxGFNI_1x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x9(SB), $0-88 + // Loading 5 of 9 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), BX + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, BX + + // Add start offset to input + ADDQ R14, DX + +mulAvxGFNI_1x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (DX), Y13 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y13, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y13, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y13, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y13, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y13, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y13, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y13, Y12 + VBROADCASTSD 64(CX), Y14 + VGF2P8AFFINEQB $0x00, Y14, Y13, Y13 + + // Store 9 outputs + VMOVDQU Y5, (SI) + ADDQ $0x20, SI + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x9_loop + VZEROUPPER + +mulAvxGFNI_1x9_end: + RET + +// func mulGFNI_1x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x9_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, DX + + // Add start offset to input + ADDQ R13, CX + +mulGFNI_1x9_64Xor_loop: + // Load 9 outputs + VMOVDQU64 (BX), Z9 + VMOVDQU64 (SI), Z10 + VMOVDQU64 (DI), Z11 + VMOVDQU64 (R8), Z12 + VMOVDQU64 (R9), Z13 + VMOVDQU64 (R10), Z14 + VMOVDQU64 (R11), Z15 + VMOVDQU64 (R12), Z16 + VMOVDQU64 (DX), Z17 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (CX), Z18 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z18, Z19 + VXORPD Z9, Z19, Z9 + VGF2P8AFFINEQB $0x00, Z1, Z18, Z19 + VXORPD Z10, Z19, Z10 + VGF2P8AFFINEQB $0x00, Z2, Z18, Z19 + VXORPD Z11, Z19, Z11 + VGF2P8AFFINEQB $0x00, Z3, Z18, Z19 + VXORPD Z12, Z19, Z12 + VGF2P8AFFINEQB $0x00, Z4, Z18, Z19 + VXORPD Z13, Z19, Z13 + VGF2P8AFFINEQB $0x00, Z5, Z18, Z19 + VXORPD Z14, Z19, Z14 + VGF2P8AFFINEQB $0x00, Z6, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z7, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z8, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Store 9 outputs + VMOVDQU64 Z9, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z10, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z11, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z12, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z13, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z14, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z15, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z16, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z17, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x9_64Xor_loop + VZEROUPPER + +mulGFNI_1x9_64Xor_end: + RET + +// func mulAvxGFNI_1x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x9Xor(SB), $0-88 + // Loading 5 of 9 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), BX + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, BX + + // Add start offset to input + ADDQ R14, DX + +mulAvxGFNI_1x9Xor_loop: + // Load 9 outputs + VMOVDQU (SI), Y5 + VMOVDQU (DI), Y6 + VMOVDQU (R8), Y7 + VMOVDQU (R9), Y8 + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (BX), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (SI) + ADDQ $0x20, SI + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x9Xor_loop + VZEROUPPER + +mulAvxGFNI_1x9Xor_end: + RET + +// func mulGFNI_1x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x10_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, DX + + // Add start offset to input + ADDQ R14, CX + +mulGFNI_1x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (CX), Z19 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z19, Z10 + VGF2P8AFFINEQB $0x00, Z1, Z19, Z11 + VGF2P8AFFINEQB $0x00, Z2, Z19, Z12 + VGF2P8AFFINEQB $0x00, Z3, Z19, Z13 + VGF2P8AFFINEQB $0x00, Z4, Z19, Z14 + VGF2P8AFFINEQB $0x00, Z5, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z6, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z7, Z19, Z17 + VGF2P8AFFINEQB $0x00, Z8, Z19, Z18 + VGF2P8AFFINEQB $0x00, Z9, Z19, Z19 + + // Store 10 outputs + VMOVDQU64 Z10, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z11, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z12, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z13, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z14, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z15, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z16, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z17, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z18, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z19, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x10_64_loop + VZEROUPPER + +mulGFNI_1x10_64_end: + RET + +// func mulAvxGFNI_1x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x10(SB), $0-88 + // Loading 4 of 10 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), R14 + MOVQ 216(BX), BX + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, BX + + // Add start offset to input + ADDQ R15, DX + +mulAvxGFNI_1x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (DX), Y13 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y13, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y13, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y13, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y13, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y13, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y13, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y13, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y13, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y13, Y12 + VBROADCASTSD 72(CX), Y14 + VGF2P8AFFINEQB $0x00, Y14, Y13, Y13 + + // Store 10 outputs + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (DI) + ADDQ $0x20, DI + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x10_loop + VZEROUPPER + +mulAvxGFNI_1x10_end: + RET + +// func mulGFNI_1x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_1x10_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_1x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ out_base+48(FP), DX + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, DX + + // Add start offset to input + ADDQ R14, CX + +mulGFNI_1x10_64Xor_loop: + // Load 10 outputs + VMOVDQU64 (BX), Z10 + VMOVDQU64 (SI), Z11 + VMOVDQU64 (DI), Z12 + VMOVDQU64 (R8), Z13 + VMOVDQU64 (R9), Z14 + VMOVDQU64 (R10), Z15 + VMOVDQU64 (R11), Z16 + VMOVDQU64 (R12), Z17 + VMOVDQU64 (R13), Z18 + VMOVDQU64 (DX), Z19 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (CX), Z20 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z0, Z20, Z21 + VXORPD Z10, Z21, Z10 + VGF2P8AFFINEQB $0x00, Z1, Z20, Z21 + VXORPD Z11, Z21, Z11 + VGF2P8AFFINEQB $0x00, Z2, Z20, Z21 + VXORPD Z12, Z21, Z12 + VGF2P8AFFINEQB $0x00, Z3, Z20, Z21 + VXORPD Z13, Z21, Z13 + VGF2P8AFFINEQB $0x00, Z4, Z20, Z21 + VXORPD Z14, Z21, Z14 + VGF2P8AFFINEQB $0x00, Z5, Z20, Z21 + VXORPD Z15, Z21, Z15 + VGF2P8AFFINEQB $0x00, Z6, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z7, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z8, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z9, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Store 10 outputs + VMOVDQU64 Z10, (BX) + ADDQ $0x40, BX + VMOVDQU64 Z11, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z12, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z13, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z14, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z15, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z16, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z17, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z18, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z19, (DX) + ADDQ $0x40, DX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_1x10_64Xor_loop + VZEROUPPER + +mulGFNI_1x10_64Xor_end: + RET + +// func mulAvxGFNI_1x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_1x10Xor(SB), $0-88 + // Loading 4 of 10 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_1x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), DX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), R14 + MOVQ 216(BX), BX + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, BX + + // Add start offset to input + ADDQ R15, DX + +mulAvxGFNI_1x10Xor_loop: + // Load 10 outputs + VMOVDQU (SI), Y4 + VMOVDQU (DI), Y5 + VMOVDQU (R8), Y6 + VMOVDQU (R9), Y7 + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (BX), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (DI) + ADDQ $0x20, DI + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_1x10Xor_loop + VZEROUPPER + +mulAvxGFNI_1x10Xor_end: + RET + +// func mulGFNI_2x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 5 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), BX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + + // Add start offset to input + ADDQ SI, DX + ADDQ SI, CX + +mulGFNI_2x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z3 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z3, Z2 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (CX), Z3 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z1, Z3, Z3 + VXORPD Z2, Z3, Z2 + + // Store 1 outputs + VMOVDQU64 Z2, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x1_64_loop + VZEROUPPER + +mulGFNI_2x1_64_end: + RET + +// func mulAvxGFNI_2x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 5 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), BX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + + // Add start offset to input + ADDQ SI, DX + ADDQ SI, CX + +mulAvxGFNI_2x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y3 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y3, Y2 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (CX), Y3 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y1, Y3, Y3 + VXORPD Y2, Y3, Y2 + + // Store 1 outputs + VMOVDQU Y2, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x1_loop + VZEROUPPER + +mulAvxGFNI_2x1_end: + RET + +// func mulGFNI_2x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 5 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), BX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + + // Add start offset to input + ADDQ SI, DX + ADDQ SI, CX + +mulGFNI_2x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (BX), Z2 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z3 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z3, Z3 + VXORPD Z2, Z3, Z2 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (CX), Z3 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z1, Z3, Z3 + VXORPD Z2, Z3, Z2 + + // Store 1 outputs + VMOVDQU64 Z2, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x1_64Xor_loop + VZEROUPPER + +mulGFNI_2x1_64Xor_end: + RET + +// func mulAvxGFNI_2x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 5 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), BX + MOVQ start+72(FP), SI + + // Add start offset to output + ADDQ SI, BX + + // Add start offset to input + ADDQ SI, DX + ADDQ SI, CX + +mulAvxGFNI_2x1Xor_loop: + // Load 1 outputs + VMOVDQU (BX), Y2 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y3 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y3, Y3 + VXORPD Y2, Y3, Y2 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (CX), Y3 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y1, Y3, Y3 + VXORPD Y2, Y3, Y2 + + // Store 1 outputs + VMOVDQU Y2, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x1Xor_loop + VZEROUPPER + +mulAvxGFNI_2x1Xor_end: + RET + +// func mulGFNI_2x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), BX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + ADDQ DI, BX + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, CX + +mulGFNI_2x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z6 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z6, Z4 + VGF2P8AFFINEQB $0x00, Z1, Z6, Z5 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (CX), Z6 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z2, Z6, Z7 + VXORPD Z4, Z7, Z4 + VGF2P8AFFINEQB $0x00, Z3, Z6, Z7 + VXORPD Z5, Z7, Z5 + + // Store 2 outputs + VMOVDQU64 Z4, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z5, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x2_64_loop + VZEROUPPER + +mulGFNI_2x2_64_end: + RET + +// func mulAvxGFNI_2x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), BX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + ADDQ DI, BX + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, CX + +mulAvxGFNI_2x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y6 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y6, Y5 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y2, Y6, Y7 + VXORPD Y4, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y3, Y6, Y7 + VXORPD Y5, Y7, Y5 + + // Store 2 outputs + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x2_loop + VZEROUPPER + +mulAvxGFNI_2x2_end: + RET + +// func mulGFNI_2x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), BX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + ADDQ DI, BX + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, CX + +mulGFNI_2x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (SI), Z4 + VMOVDQU64 (BX), Z5 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z6 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z6, Z7 + VXORPD Z4, Z7, Z4 + VGF2P8AFFINEQB $0x00, Z1, Z6, Z7 + VXORPD Z5, Z7, Z5 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (CX), Z6 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z2, Z6, Z7 + VXORPD Z4, Z7, Z4 + VGF2P8AFFINEQB $0x00, Z3, Z6, Z7 + VXORPD Z5, Z7, Z5 + + // Store 2 outputs + VMOVDQU64 Z4, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z5, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x2_64Xor_loop + VZEROUPPER + +mulGFNI_2x2_64Xor_end: + RET + +// func mulAvxGFNI_2x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x2Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), BX + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + ADDQ DI, BX + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, CX + +mulAvxGFNI_2x2Xor_loop: + // Load 2 outputs + VMOVDQU (SI), Y4 + VMOVDQU (BX), Y5 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y6 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y7 + VXORPD Y4, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y6, Y7 + VXORPD Y5, Y7, Y5 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y2, Y6, Y7 + VXORPD Y4, Y7, Y4 + VGF2P8AFFINEQB $0x00, Y3, Y6, Y7 + VXORPD Y5, Y7, Y5 + + // Store 2 outputs + VMOVDQU Y4, (SI) + ADDQ $0x20, SI + VMOVDQU Y5, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x2Xor_loop + VZEROUPPER + +mulAvxGFNI_2x2Xor_end: + RET + +// func mulGFNI_2x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), BX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, BX + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, CX + +mulGFNI_2x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z9 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z9, Z6 + VGF2P8AFFINEQB $0x00, Z1, Z9, Z7 + VGF2P8AFFINEQB $0x00, Z2, Z9, Z8 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (CX), Z9 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z3, Z9, Z10 + VXORPD Z6, Z10, Z6 + VGF2P8AFFINEQB $0x00, Z4, Z9, Z10 + VXORPD Z7, Z10, Z7 + VGF2P8AFFINEQB $0x00, Z5, Z9, Z10 + VXORPD Z8, Z10, Z8 + + // Store 3 outputs + VMOVDQU64 Z6, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z7, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z8, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x3_64_loop + VZEROUPPER + +mulGFNI_2x3_64_end: + RET + +// func mulAvxGFNI_2x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x3(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), BX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, BX + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, CX + +mulAvxGFNI_2x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y9 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y9, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y9, Y8 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y3, Y9, Y10 + VXORPD Y6, Y10, Y6 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y10 + VXORPD Y7, Y10, Y7 + VGF2P8AFFINEQB $0x00, Y5, Y9, Y10 + VXORPD Y8, Y10, Y8 + + // Store 3 outputs + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x3_loop + VZEROUPPER + +mulAvxGFNI_2x3_end: + RET + +// func mulGFNI_2x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), BX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, BX + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, CX + +mulGFNI_2x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (SI), Z6 + VMOVDQU64 (DI), Z7 + VMOVDQU64 (BX), Z8 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z9 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z9, Z10 + VXORPD Z6, Z10, Z6 + VGF2P8AFFINEQB $0x00, Z1, Z9, Z10 + VXORPD Z7, Z10, Z7 + VGF2P8AFFINEQB $0x00, Z2, Z9, Z10 + VXORPD Z8, Z10, Z8 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (CX), Z9 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z3, Z9, Z10 + VXORPD Z6, Z10, Z6 + VGF2P8AFFINEQB $0x00, Z4, Z9, Z10 + VXORPD Z7, Z10, Z7 + VGF2P8AFFINEQB $0x00, Z5, Z9, Z10 + VXORPD Z8, Z10, Z8 + + // Store 3 outputs + VMOVDQU64 Z6, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z7, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z8, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x3_64Xor_loop + VZEROUPPER + +mulGFNI_2x3_64Xor_end: + RET + +// func mulAvxGFNI_2x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x3Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), BX + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, SI + ADDQ R8, DI + ADDQ R8, BX + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, CX + +mulAvxGFNI_2x3Xor_loop: + // Load 3 outputs + VMOVDQU (SI), Y6 + VMOVDQU (DI), Y7 + VMOVDQU (BX), Y8 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y9 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y10 + VXORPD Y6, Y10, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y9, Y10 + VXORPD Y7, Y10, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y9, Y10 + VXORPD Y8, Y10, Y8 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y3, Y9, Y10 + VXORPD Y6, Y10, Y6 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y10 + VXORPD Y7, Y10, Y7 + VGF2P8AFFINEQB $0x00, Y5, Y9, Y10 + VXORPD Y8, Y10, Y8 + + // Store 3 outputs + VMOVDQU Y6, (SI) + ADDQ $0x20, SI + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x3Xor_loop + VZEROUPPER + +mulAvxGFNI_2x3Xor_end: + RET + +// func mulGFNI_2x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x4_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), BX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, BX + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, CX + +mulGFNI_2x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z12 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z12, Z8 + VGF2P8AFFINEQB $0x00, Z1, Z12, Z9 + VGF2P8AFFINEQB $0x00, Z2, Z12, Z10 + VGF2P8AFFINEQB $0x00, Z3, Z12, Z11 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (CX), Z12 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z4, Z12, Z13 + VXORPD Z8, Z13, Z8 + VGF2P8AFFINEQB $0x00, Z5, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z6, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z7, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Store 4 outputs + VMOVDQU64 Z8, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z9, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z10, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z11, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x4_64_loop + VZEROUPPER + +mulGFNI_2x4_64_end: + RET + +// func mulAvxGFNI_2x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x4(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), BX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, BX + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, CX + +mulAvxGFNI_2x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y11 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y8, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 4 outputs + VMOVDQU Y8, (SI) + ADDQ $0x20, SI + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x4_loop + VZEROUPPER + +mulAvxGFNI_2x4_end: + RET + +// func mulGFNI_2x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x4_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), BX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, BX + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, CX + +mulGFNI_2x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (SI), Z8 + VMOVDQU64 (DI), Z9 + VMOVDQU64 (R8), Z10 + VMOVDQU64 (BX), Z11 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z12 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z12, Z13 + VXORPD Z8, Z13, Z8 + VGF2P8AFFINEQB $0x00, Z1, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z2, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z3, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (CX), Z12 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z4, Z12, Z13 + VXORPD Z8, Z13, Z8 + VGF2P8AFFINEQB $0x00, Z5, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z6, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z7, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Store 4 outputs + VMOVDQU64 Z8, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z9, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z10, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z11, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x4_64Xor_loop + VZEROUPPER + +mulGFNI_2x4_64Xor_end: + RET + +// func mulAvxGFNI_2x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x4Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), BX + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, BX + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, CX + +mulAvxGFNI_2x4Xor_loop: + // Load 4 outputs + VMOVDQU (SI), Y8 + VMOVDQU (DI), Y9 + VMOVDQU (R8), Y10 + VMOVDQU (BX), Y11 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y13 + VXORPD Y8, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y8, Y13, Y8 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 4 outputs + VMOVDQU Y8, (SI) + ADDQ $0x20, SI + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (BX) + ADDQ $0x20, BX + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x4Xor_loop + VZEROUPPER + +mulAvxGFNI_2x4Xor_end: + RET + +// func mulGFNI_2x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x5_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), BX + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, BX + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, CX + +mulGFNI_2x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z15 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z15, Z10 + VGF2P8AFFINEQB $0x00, Z1, Z15, Z11 + VGF2P8AFFINEQB $0x00, Z2, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z3, Z15, Z13 + VGF2P8AFFINEQB $0x00, Z4, Z15, Z14 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (CX), Z15 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z5, Z15, Z16 + VXORPD Z10, Z16, Z10 + VGF2P8AFFINEQB $0x00, Z6, Z15, Z16 + VXORPD Z11, Z16, Z11 + VGF2P8AFFINEQB $0x00, Z7, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z8, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z9, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Store 5 outputs + VMOVDQU64 Z10, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z11, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z12, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z13, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z14, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x5_64_loop + VZEROUPPER + +mulGFNI_2x5_64_end: + RET + +// func mulAvxGFNI_2x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x5(SB), $0-88 + // Loading 9 of 10 tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), SI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, SI + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, DX + +mulAvxGFNI_2x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x5_loop + VZEROUPPER + +mulAvxGFNI_2x5_end: + RET + +// func mulGFNI_2x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x5_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), BX + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, BX + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, CX + +mulGFNI_2x5_64Xor_loop: + // Load 5 outputs + VMOVDQU64 (SI), Z10 + VMOVDQU64 (DI), Z11 + VMOVDQU64 (R8), Z12 + VMOVDQU64 (R9), Z13 + VMOVDQU64 (BX), Z14 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z15 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z15, Z16 + VXORPD Z10, Z16, Z10 + VGF2P8AFFINEQB $0x00, Z1, Z15, Z16 + VXORPD Z11, Z16, Z11 + VGF2P8AFFINEQB $0x00, Z2, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z3, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z4, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (CX), Z15 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z5, Z15, Z16 + VXORPD Z10, Z16, Z10 + VGF2P8AFFINEQB $0x00, Z6, Z15, Z16 + VXORPD Z11, Z16, Z11 + VGF2P8AFFINEQB $0x00, Z7, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z8, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z9, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Store 5 outputs + VMOVDQU64 Z10, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z11, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z12, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z13, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z14, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x5_64Xor_loop + VZEROUPPER + +mulGFNI_2x5_64Xor_end: + RET + +// func mulAvxGFNI_2x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x5Xor(SB), $0-88 + // Loading 9 of 10 tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), SI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, SI + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, DX + +mulAvxGFNI_2x5Xor_loop: + // Load 5 outputs + VMOVDQU (DI), Y9 + VMOVDQU (R8), Y10 + VMOVDQU (R9), Y11 + VMOVDQU (R10), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x5Xor_loop + VZEROUPPER + +mulAvxGFNI_2x5Xor_end: + RET + +// func mulGFNI_2x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x6_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), BX + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, BX + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, CX + +mulGFNI_2x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (DX), Z18 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z18, Z12 + VGF2P8AFFINEQB $0x00, Z1, Z18, Z13 + VGF2P8AFFINEQB $0x00, Z2, Z18, Z14 + VGF2P8AFFINEQB $0x00, Z3, Z18, Z15 + VGF2P8AFFINEQB $0x00, Z4, Z18, Z16 + VGF2P8AFFINEQB $0x00, Z5, Z18, Z17 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (CX), Z18 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z6, Z18, Z19 + VXORPD Z12, Z19, Z12 + VGF2P8AFFINEQB $0x00, Z7, Z18, Z19 + VXORPD Z13, Z19, Z13 + VGF2P8AFFINEQB $0x00, Z8, Z18, Z19 + VXORPD Z14, Z19, Z14 + VGF2P8AFFINEQB $0x00, Z9, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z10, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z11, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Store 6 outputs + VMOVDQU64 Z12, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z13, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z14, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z15, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z16, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z17, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x6_64_loop + VZEROUPPER + +mulGFNI_2x6_64_end: + RET + +// func mulAvxGFNI_2x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x6(SB), $0-88 + // Loading 8 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), SI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, SI + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, DX + +mulAvxGFNI_2x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (DI) + ADDQ $0x20, DI + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x6_loop + VZEROUPPER + +mulAvxGFNI_2x6_end: + RET + +// func mulGFNI_2x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x6_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), BX + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, BX + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, CX + +mulGFNI_2x6_64Xor_loop: + // Load 6 outputs + VMOVDQU64 (SI), Z12 + VMOVDQU64 (DI), Z13 + VMOVDQU64 (R8), Z14 + VMOVDQU64 (R9), Z15 + VMOVDQU64 (R10), Z16 + VMOVDQU64 (BX), Z17 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (DX), Z18 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z18, Z19 + VXORPD Z12, Z19, Z12 + VGF2P8AFFINEQB $0x00, Z1, Z18, Z19 + VXORPD Z13, Z19, Z13 + VGF2P8AFFINEQB $0x00, Z2, Z18, Z19 + VXORPD Z14, Z19, Z14 + VGF2P8AFFINEQB $0x00, Z3, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z4, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z5, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (CX), Z18 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z6, Z18, Z19 + VXORPD Z12, Z19, Z12 + VGF2P8AFFINEQB $0x00, Z7, Z18, Z19 + VXORPD Z13, Z19, Z13 + VGF2P8AFFINEQB $0x00, Z8, Z18, Z19 + VXORPD Z14, Z19, Z14 + VGF2P8AFFINEQB $0x00, Z9, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z10, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z11, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Store 6 outputs + VMOVDQU64 Z12, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z13, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z14, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z15, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z16, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z17, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x6_64Xor_loop + VZEROUPPER + +mulGFNI_2x6_64Xor_end: + RET + +// func mulAvxGFNI_2x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x6Xor(SB), $0-88 + // Loading 8 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), SI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, SI + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, DX + +mulAvxGFNI_2x6Xor_loop: + // Load 6 outputs + VMOVDQU (DI), Y8 + VMOVDQU (R8), Y9 + VMOVDQU (R9), Y10 + VMOVDQU (R10), Y11 + VMOVDQU (R11), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (DI) + ADDQ $0x20, DI + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x6Xor_loop + VZEROUPPER + +mulAvxGFNI_2x6Xor_end: + RET + +// func mulGFNI_2x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x7_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), BX + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, BX + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, CX + +mulGFNI_2x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (DX), Z21 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z21, Z14 + VGF2P8AFFINEQB $0x00, Z1, Z21, Z15 + VGF2P8AFFINEQB $0x00, Z2, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z3, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z4, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z5, Z21, Z19 + VGF2P8AFFINEQB $0x00, Z6, Z21, Z20 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (CX), Z21 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z7, Z21, Z22 + VXORPD Z14, Z22, Z14 + VGF2P8AFFINEQB $0x00, Z8, Z21, Z22 + VXORPD Z15, Z22, Z15 + VGF2P8AFFINEQB $0x00, Z9, Z21, Z22 + VXORPD Z16, Z22, Z16 + VGF2P8AFFINEQB $0x00, Z10, Z21, Z22 + VXORPD Z17, Z22, Z17 + VGF2P8AFFINEQB $0x00, Z11, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z12, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z13, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Store 7 outputs + VMOVDQU64 Z14, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z15, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z16, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z17, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z18, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z19, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z20, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x7_64_loop + VZEROUPPER + +mulGFNI_2x7_64_end: + RET + +// func mulAvxGFNI_2x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x7(SB), $0-88 + // Loading 7 of 14 tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), SI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, SI + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, DX + +mulAvxGFNI_2x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x7_loop + VZEROUPPER + +mulAvxGFNI_2x7_end: + RET + +// func mulGFNI_2x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x7_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), BX + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, BX + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, CX + +mulGFNI_2x7_64Xor_loop: + // Load 7 outputs + VMOVDQU64 (SI), Z14 + VMOVDQU64 (DI), Z15 + VMOVDQU64 (R8), Z16 + VMOVDQU64 (R9), Z17 + VMOVDQU64 (R10), Z18 + VMOVDQU64 (R11), Z19 + VMOVDQU64 (BX), Z20 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (DX), Z21 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z21, Z22 + VXORPD Z14, Z22, Z14 + VGF2P8AFFINEQB $0x00, Z1, Z21, Z22 + VXORPD Z15, Z22, Z15 + VGF2P8AFFINEQB $0x00, Z2, Z21, Z22 + VXORPD Z16, Z22, Z16 + VGF2P8AFFINEQB $0x00, Z3, Z21, Z22 + VXORPD Z17, Z22, Z17 + VGF2P8AFFINEQB $0x00, Z4, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z5, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z6, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (CX), Z21 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z7, Z21, Z22 + VXORPD Z14, Z22, Z14 + VGF2P8AFFINEQB $0x00, Z8, Z21, Z22 + VXORPD Z15, Z22, Z15 + VGF2P8AFFINEQB $0x00, Z9, Z21, Z22 + VXORPD Z16, Z22, Z16 + VGF2P8AFFINEQB $0x00, Z10, Z21, Z22 + VXORPD Z17, Z22, Z17 + VGF2P8AFFINEQB $0x00, Z11, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z12, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z13, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Store 7 outputs + VMOVDQU64 Z14, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z15, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z16, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z17, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z18, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z19, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z20, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x7_64Xor_loop + VZEROUPPER + +mulGFNI_2x7_64Xor_end: + RET + +// func mulAvxGFNI_2x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x7Xor(SB), $0-88 + // Loading 7 of 14 tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), SI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, SI + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, DX + +mulAvxGFNI_2x7Xor_loop: + // Load 7 outputs + VMOVDQU (DI), Y7 + VMOVDQU (R8), Y8 + VMOVDQU (R9), Y9 + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (DI) + ADDQ $0x20, DI + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x7Xor_loop + VZEROUPPER + +mulAvxGFNI_2x7Xor_end: + RET + +// func mulGFNI_2x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x8_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), BX + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, BX + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, CX + +mulGFNI_2x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (DX), Z24 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z24, Z16 + VGF2P8AFFINEQB $0x00, Z1, Z24, Z17 + VGF2P8AFFINEQB $0x00, Z2, Z24, Z18 + VGF2P8AFFINEQB $0x00, Z3, Z24, Z19 + VGF2P8AFFINEQB $0x00, Z4, Z24, Z20 + VGF2P8AFFINEQB $0x00, Z5, Z24, Z21 + VGF2P8AFFINEQB $0x00, Z6, Z24, Z22 + VGF2P8AFFINEQB $0x00, Z7, Z24, Z23 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (CX), Z24 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z8, Z24, Z25 + VXORPD Z16, Z25, Z16 + VGF2P8AFFINEQB $0x00, Z9, Z24, Z25 + VXORPD Z17, Z25, Z17 + VGF2P8AFFINEQB $0x00, Z10, Z24, Z25 + VXORPD Z18, Z25, Z18 + VGF2P8AFFINEQB $0x00, Z11, Z24, Z25 + VXORPD Z19, Z25, Z19 + VGF2P8AFFINEQB $0x00, Z12, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z13, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z14, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z15, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Store 8 outputs + VMOVDQU64 Z16, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z17, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z18, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z19, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z20, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z21, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z22, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z23, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x8_64_loop + VZEROUPPER + +mulGFNI_2x8_64_end: + RET + +// func mulAvxGFNI_2x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x8(SB), $0-88 + // Loading 6 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), SI + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, SI + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, DX + +mulAvxGFNI_2x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x8_loop + VZEROUPPER + +mulAvxGFNI_2x8_end: + RET + +// func mulGFNI_2x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x8_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), BX + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, BX + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, CX + +mulGFNI_2x8_64Xor_loop: + // Load 8 outputs + VMOVDQU64 (SI), Z16 + VMOVDQU64 (DI), Z17 + VMOVDQU64 (R8), Z18 + VMOVDQU64 (R9), Z19 + VMOVDQU64 (R10), Z20 + VMOVDQU64 (R11), Z21 + VMOVDQU64 (R12), Z22 + VMOVDQU64 (BX), Z23 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (DX), Z24 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z24, Z25 + VXORPD Z16, Z25, Z16 + VGF2P8AFFINEQB $0x00, Z1, Z24, Z25 + VXORPD Z17, Z25, Z17 + VGF2P8AFFINEQB $0x00, Z2, Z24, Z25 + VXORPD Z18, Z25, Z18 + VGF2P8AFFINEQB $0x00, Z3, Z24, Z25 + VXORPD Z19, Z25, Z19 + VGF2P8AFFINEQB $0x00, Z4, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z5, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z6, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z7, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (CX), Z24 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z8, Z24, Z25 + VXORPD Z16, Z25, Z16 + VGF2P8AFFINEQB $0x00, Z9, Z24, Z25 + VXORPD Z17, Z25, Z17 + VGF2P8AFFINEQB $0x00, Z10, Z24, Z25 + VXORPD Z18, Z25, Z18 + VGF2P8AFFINEQB $0x00, Z11, Z24, Z25 + VXORPD Z19, Z25, Z19 + VGF2P8AFFINEQB $0x00, Z12, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z13, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z14, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z15, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Store 8 outputs + VMOVDQU64 Z16, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z17, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z18, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z19, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z20, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z21, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z22, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z23, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x8_64Xor_loop + VZEROUPPER + +mulGFNI_2x8_64Xor_end: + RET + +// func mulAvxGFNI_2x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x8Xor(SB), $0-88 + // Loading 6 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), SI + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, SI + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, DX + +mulAvxGFNI_2x8Xor_loop: + // Load 8 outputs + VMOVDQU (DI), Y6 + VMOVDQU (R8), Y7 + VMOVDQU (R9), Y8 + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x8Xor_loop + VZEROUPPER + +mulAvxGFNI_2x8Xor_end: + RET + +// func mulGFNI_2x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x9_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), BX + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, BX + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, CX + +mulGFNI_2x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (DX), Z27 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z27, Z18 + VGF2P8AFFINEQB $0x00, Z1, Z27, Z19 + VGF2P8AFFINEQB $0x00, Z2, Z27, Z20 + VGF2P8AFFINEQB $0x00, Z3, Z27, Z21 + VGF2P8AFFINEQB $0x00, Z4, Z27, Z22 + VGF2P8AFFINEQB $0x00, Z5, Z27, Z23 + VGF2P8AFFINEQB $0x00, Z6, Z27, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z27, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z27, Z26 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (CX), Z27 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z9, Z27, Z28 + VXORPD Z18, Z28, Z18 + VGF2P8AFFINEQB $0x00, Z10, Z27, Z28 + VXORPD Z19, Z28, Z19 + VGF2P8AFFINEQB $0x00, Z11, Z27, Z28 + VXORPD Z20, Z28, Z20 + VGF2P8AFFINEQB $0x00, Z12, Z27, Z28 + VXORPD Z21, Z28, Z21 + VGF2P8AFFINEQB $0x00, Z13, Z27, Z28 + VXORPD Z22, Z28, Z22 + VGF2P8AFFINEQB $0x00, Z14, Z27, Z28 + VXORPD Z23, Z28, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Store 9 outputs + VMOVDQU64 Z18, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z19, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z20, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z21, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z22, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z23, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z24, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z25, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z26, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x9_64_loop + VZEROUPPER + +mulGFNI_2x9_64_end: + RET + +// func mulAvxGFNI_2x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x9(SB), $0-88 + // Loading 5 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), SI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, SI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, DX + +mulAvxGFNI_2x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (DI) + ADDQ $0x20, DI + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x9_loop + VZEROUPPER + +mulAvxGFNI_2x9_end: + RET + +// func mulGFNI_2x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x9_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), BX + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, BX + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, CX + +mulGFNI_2x9_64Xor_loop: + // Load 9 outputs + VMOVDQU64 (SI), Z18 + VMOVDQU64 (DI), Z19 + VMOVDQU64 (R8), Z20 + VMOVDQU64 (R9), Z21 + VMOVDQU64 (R10), Z22 + VMOVDQU64 (R11), Z23 + VMOVDQU64 (R12), Z24 + VMOVDQU64 (R13), Z25 + VMOVDQU64 (BX), Z26 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (DX), Z27 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z27, Z28 + VXORPD Z18, Z28, Z18 + VGF2P8AFFINEQB $0x00, Z1, Z27, Z28 + VXORPD Z19, Z28, Z19 + VGF2P8AFFINEQB $0x00, Z2, Z27, Z28 + VXORPD Z20, Z28, Z20 + VGF2P8AFFINEQB $0x00, Z3, Z27, Z28 + VXORPD Z21, Z28, Z21 + VGF2P8AFFINEQB $0x00, Z4, Z27, Z28 + VXORPD Z22, Z28, Z22 + VGF2P8AFFINEQB $0x00, Z5, Z27, Z28 + VXORPD Z23, Z28, Z23 + VGF2P8AFFINEQB $0x00, Z6, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (CX), Z27 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z9, Z27, Z28 + VXORPD Z18, Z28, Z18 + VGF2P8AFFINEQB $0x00, Z10, Z27, Z28 + VXORPD Z19, Z28, Z19 + VGF2P8AFFINEQB $0x00, Z11, Z27, Z28 + VXORPD Z20, Z28, Z20 + VGF2P8AFFINEQB $0x00, Z12, Z27, Z28 + VXORPD Z21, Z28, Z21 + VGF2P8AFFINEQB $0x00, Z13, Z27, Z28 + VXORPD Z22, Z28, Z22 + VGF2P8AFFINEQB $0x00, Z14, Z27, Z28 + VXORPD Z23, Z28, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Store 9 outputs + VMOVDQU64 Z18, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z19, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z20, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z21, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z22, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z23, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z24, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z25, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z26, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x9_64Xor_loop + VZEROUPPER + +mulGFNI_2x9_64Xor_end: + RET + +// func mulAvxGFNI_2x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x9Xor(SB), $0-88 + // Loading 5 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), SI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, SI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, DX + +mulAvxGFNI_2x9Xor_loop: + // Load 9 outputs + VMOVDQU (DI), Y5 + VMOVDQU (R8), Y6 + VMOVDQU (R9), Y7 + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (DI) + ADDQ $0x20, DI + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x9Xor_loop + VZEROUPPER + +mulAvxGFNI_2x9Xor_end: + RET + +// func mulGFNI_2x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x10_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), R14 + MOVQ 216(BX), BX + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, BX + + // Add start offset to input + ADDQ R15, DX + ADDQ R15, CX + +mulGFNI_2x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (CX), Z30 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + VMOVDQU64 Z20, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z21, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z22, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z23, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x10_64_loop + VZEROUPPER + +mulGFNI_2x10_64_end: + RET + +// func mulAvxGFNI_2x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x10(SB), $8-88 + // Loading 4 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, DX + +mulAvxGFNI_2x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x10_loop + VZEROUPPER + +mulAvxGFNI_2x10_end: + RET + +// func mulGFNI_2x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_2x10_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_2x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), CX + MOVQ out_base+48(FP), BX + MOVQ out_base+48(FP), BX + MOVQ (BX), SI + MOVQ 24(BX), DI + MOVQ 48(BX), R8 + MOVQ 72(BX), R9 + MOVQ 96(BX), R10 + MOVQ 120(BX), R11 + MOVQ 144(BX), R12 + MOVQ 168(BX), R13 + MOVQ 192(BX), R14 + MOVQ 216(BX), BX + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, BX + + // Add start offset to input + ADDQ R15, DX + ADDQ R15, CX + +mulGFNI_2x10_64Xor_loop: + // Load 10 outputs + VMOVDQU64 (SI), Z20 + VMOVDQU64 (DI), Z21 + VMOVDQU64 (R8), Z22 + VMOVDQU64 (R9), Z23 + VMOVDQU64 (R10), Z24 + VMOVDQU64 (R11), Z25 + VMOVDQU64 (R12), Z26 + VMOVDQU64 (R13), Z27 + VMOVDQU64 (R14), Z28 + VMOVDQU64 (BX), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (CX), Z30 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + VMOVDQU64 Z20, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z21, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z22, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z23, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (BX) + ADDQ $0x40, BX + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_2x10_64Xor_loop + VZEROUPPER + +mulGFNI_2x10_64Xor_end: + RET + +// func mulAvxGFNI_2x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_2x10Xor(SB), $8-88 + // Loading 4 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_2x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, DX + +mulAvxGFNI_2x10Xor_loop: + // Load 10 outputs + VMOVDQU (DI), Y4 + VMOVDQU (R8), Y5 + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_2x10Xor_loop + VZEROUPPER + +mulAvxGFNI_2x10Xor_end: + RET + +// func mulGFNI_3x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), SI + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, BX + ADDQ DI, CX + +mulGFNI_3x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z4 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z4, Z3 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z4 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z4, Z4 + VXORPD Z3, Z4, Z3 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (CX), Z4 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z2, Z4, Z4 + VXORPD Z3, Z4, Z3 + + // Store 1 outputs + VMOVDQU64 Z3, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x1_64_loop + VZEROUPPER + +mulGFNI_3x1_64_end: + RET + +// func mulAvxGFNI_3x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), SI + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, BX + ADDQ DI, CX + +mulAvxGFNI_3x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y4 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y4, Y3 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y4 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y4, Y4 + VXORPD Y3, Y4, Y3 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (CX), Y4 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y2, Y4, Y4 + VXORPD Y3, Y4, Y3 + + // Store 1 outputs + VMOVDQU Y3, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x1_loop + VZEROUPPER + +mulAvxGFNI_3x1_end: + RET + +// func mulGFNI_3x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), SI + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, BX + ADDQ DI, CX + +mulGFNI_3x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (SI), Z3 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z4 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z4, Z4 + VXORPD Z3, Z4, Z3 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z4 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z4, Z4 + VXORPD Z3, Z4, Z3 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (CX), Z4 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z2, Z4, Z4 + VXORPD Z3, Z4, Z3 + + // Store 1 outputs + VMOVDQU64 Z3, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x1_64Xor_loop + VZEROUPPER + +mulGFNI_3x1_64Xor_end: + RET + +// func mulAvxGFNI_3x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), SI + MOVQ start+72(FP), DI + + // Add start offset to output + ADDQ DI, SI + + // Add start offset to input + ADDQ DI, DX + ADDQ DI, BX + ADDQ DI, CX + +mulAvxGFNI_3x1Xor_loop: + // Load 1 outputs + VMOVDQU (SI), Y3 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y4 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y4, Y4 + VXORPD Y3, Y4, Y3 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y4 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y4, Y4 + VXORPD Y3, Y4, Y3 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (CX), Y4 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y2, Y4, Y4 + VXORPD Y3, Y4, Y3 + + // Store 1 outputs + VMOVDQU Y3, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x1Xor_loop + VZEROUPPER + +mulAvxGFNI_3x1Xor_end: + RET + +// func mulGFNI_3x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), SI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + ADDQ R8, SI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, CX + +mulGFNI_3x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z8 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z8, Z6 + VGF2P8AFFINEQB $0x00, Z1, Z8, Z7 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z8 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z8, Z9 + VXORPD Z6, Z9, Z6 + VGF2P8AFFINEQB $0x00, Z3, Z8, Z9 + VXORPD Z7, Z9, Z7 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (CX), Z8 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z4, Z8, Z9 + VXORPD Z6, Z9, Z6 + VGF2P8AFFINEQB $0x00, Z5, Z8, Z9 + VXORPD Z7, Z9, Z7 + + // Store 2 outputs + VMOVDQU64 Z6, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z7, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x2_64_loop + VZEROUPPER + +mulGFNI_3x2_64_end: + RET + +// func mulAvxGFNI_3x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), SI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + ADDQ R8, SI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, CX + +mulAvxGFNI_3x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y8 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y8, Y7 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y8 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y5, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Store 2 outputs + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x2_loop + VZEROUPPER + +mulAvxGFNI_3x2_end: + RET + +// func mulGFNI_3x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), SI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + ADDQ R8, SI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, CX + +mulGFNI_3x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (DI), Z6 + VMOVDQU64 (SI), Z7 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z8 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z8, Z9 + VXORPD Z6, Z9, Z6 + VGF2P8AFFINEQB $0x00, Z1, Z8, Z9 + VXORPD Z7, Z9, Z7 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z8 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z8, Z9 + VXORPD Z6, Z9, Z6 + VGF2P8AFFINEQB $0x00, Z3, Z8, Z9 + VXORPD Z7, Z9, Z7 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (CX), Z8 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z4, Z8, Z9 + VXORPD Z6, Z9, Z6 + VGF2P8AFFINEQB $0x00, Z5, Z8, Z9 + VXORPD Z7, Z9, Z7 + + // Store 2 outputs + VMOVDQU64 Z6, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z7, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x2_64Xor_loop + VZEROUPPER + +mulGFNI_3x2_64Xor_end: + RET + +// func mulAvxGFNI_3x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x2Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), SI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + ADDQ R8, SI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, CX + +mulAvxGFNI_3x2Xor_loop: + // Load 2 outputs + VMOVDQU (DI), Y6 + VMOVDQU (SI), Y7 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y8 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y8 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y8, Y9 + VXORPD Y6, Y9, Y6 + VGF2P8AFFINEQB $0x00, Y5, Y8, Y9 + VXORPD Y7, Y9, Y7 + + // Store 2 outputs + VMOVDQU Y6, (DI) + ADDQ $0x20, DI + VMOVDQU Y7, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x2Xor_loop + VZEROUPPER + +mulAvxGFNI_3x2Xor_end: + RET + +// func mulGFNI_3x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), SI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, SI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, CX + +mulGFNI_3x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z12 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z12, Z9 + VGF2P8AFFINEQB $0x00, Z1, Z12, Z10 + VGF2P8AFFINEQB $0x00, Z2, Z12, Z11 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z12 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z4, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z5, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (CX), Z12 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z6, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z7, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z8, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Store 3 outputs + VMOVDQU64 Z9, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z10, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z11, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x3_64_loop + VZEROUPPER + +mulGFNI_3x3_64_end: + RET + +// func mulAvxGFNI_3x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x3(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), SI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, SI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, CX + +mulAvxGFNI_3x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y11 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (BX), Y12 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y8, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 3 outputs + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x3_loop + VZEROUPPER + +mulAvxGFNI_3x3_end: + RET + +// func mulGFNI_3x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), SI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, SI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, CX + +mulGFNI_3x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (DI), Z9 + VMOVDQU64 (R8), Z10 + VMOVDQU64 (SI), Z11 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z12 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z1, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z2, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z12 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z4, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z5, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (CX), Z12 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z6, Z12, Z13 + VXORPD Z9, Z13, Z9 + VGF2P8AFFINEQB $0x00, Z7, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z8, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Store 3 outputs + VMOVDQU64 Z9, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z10, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z11, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x3_64Xor_loop + VZEROUPPER + +mulGFNI_3x3_64Xor_end: + RET + +// func mulAvxGFNI_3x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x3Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), SI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, DI + ADDQ R9, R8 + ADDQ R9, SI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, CX + +mulAvxGFNI_3x3Xor_loop: + // Load 3 outputs + VMOVDQU (DI), Y9 + VMOVDQU (R8), Y10 + VMOVDQU (SI), Y11 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (BX), Y12 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y9, Y13, Y9 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y8, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 3 outputs + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x3Xor_loop + VZEROUPPER + +mulAvxGFNI_3x3Xor_end: + RET + +// func mulGFNI_3x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x4_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), SI + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, SI + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, CX + +mulGFNI_3x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z16 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z1, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z2, Z16, Z14 + VGF2P8AFFINEQB $0x00, Z3, Z16, Z15 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z16 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z16, Z17 + VXORPD Z12, Z17, Z12 + VGF2P8AFFINEQB $0x00, Z5, Z16, Z17 + VXORPD Z13, Z17, Z13 + VGF2P8AFFINEQB $0x00, Z6, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z7, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (CX), Z16 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z8, Z16, Z17 + VXORPD Z12, Z17, Z12 + VGF2P8AFFINEQB $0x00, Z9, Z16, Z17 + VXORPD Z13, Z17, Z13 + VGF2P8AFFINEQB $0x00, Z10, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z11, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Store 4 outputs + VMOVDQU64 Z12, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z13, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z14, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z15, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x4_64_loop + VZEROUPPER + +mulGFNI_3x4_64_end: + RET + +// func mulAvxGFNI_3x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x4(SB), $0-88 + // Loading 10 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), DI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DI + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DX + +mulAvxGFNI_3x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x4_loop + VZEROUPPER + +mulAvxGFNI_3x4_end: + RET + +// func mulGFNI_3x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x4_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), SI + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, SI + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, CX + +mulGFNI_3x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (DI), Z12 + VMOVDQU64 (R8), Z13 + VMOVDQU64 (R9), Z14 + VMOVDQU64 (SI), Z15 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z16 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z16, Z17 + VXORPD Z12, Z17, Z12 + VGF2P8AFFINEQB $0x00, Z1, Z16, Z17 + VXORPD Z13, Z17, Z13 + VGF2P8AFFINEQB $0x00, Z2, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z3, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z16 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z16, Z17 + VXORPD Z12, Z17, Z12 + VGF2P8AFFINEQB $0x00, Z5, Z16, Z17 + VXORPD Z13, Z17, Z13 + VGF2P8AFFINEQB $0x00, Z6, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z7, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (CX), Z16 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z8, Z16, Z17 + VXORPD Z12, Z17, Z12 + VGF2P8AFFINEQB $0x00, Z9, Z16, Z17 + VXORPD Z13, Z17, Z13 + VGF2P8AFFINEQB $0x00, Z10, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z11, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Store 4 outputs + VMOVDQU64 Z12, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z13, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z14, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z15, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x4_64Xor_loop + VZEROUPPER + +mulGFNI_3x4_64Xor_end: + RET + +// func mulAvxGFNI_3x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x4Xor(SB), $0-88 + // Loading 10 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), DI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DI + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DX + +mulAvxGFNI_3x4Xor_loop: + // Load 4 outputs + VMOVDQU (R8), Y10 + VMOVDQU (R9), Y11 + VMOVDQU (R10), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R8) + ADDQ $0x20, R8 + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x4Xor_loop + VZEROUPPER + +mulAvxGFNI_3x4Xor_end: + RET + +// func mulGFNI_3x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x5_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), SI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, SI + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, CX + +mulGFNI_3x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z20 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z20, Z15 + VGF2P8AFFINEQB $0x00, Z1, Z20, Z16 + VGF2P8AFFINEQB $0x00, Z2, Z20, Z17 + VGF2P8AFFINEQB $0x00, Z3, Z20, Z18 + VGF2P8AFFINEQB $0x00, Z4, Z20, Z19 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (BX), Z20 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z5, Z20, Z21 + VXORPD Z15, Z21, Z15 + VGF2P8AFFINEQB $0x00, Z6, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z7, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z8, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z9, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (CX), Z20 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z10, Z20, Z21 + VXORPD Z15, Z21, Z15 + VGF2P8AFFINEQB $0x00, Z11, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z12, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z13, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z14, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Store 5 outputs + VMOVDQU64 Z15, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z16, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z17, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z18, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z19, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x5_64_loop + VZEROUPPER + +mulGFNI_3x5_64_end: + RET + +// func mulAvxGFNI_3x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x5(SB), $0-88 + // Loading 9 of 15 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), DI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, DI + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DX + +mulAvxGFNI_3x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x5_loop + VZEROUPPER + +mulAvxGFNI_3x5_end: + RET + +// func mulGFNI_3x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x5_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), SI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, SI + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, CX + +mulGFNI_3x5_64Xor_loop: + // Load 5 outputs + VMOVDQU64 (DI), Z15 + VMOVDQU64 (R8), Z16 + VMOVDQU64 (R9), Z17 + VMOVDQU64 (R10), Z18 + VMOVDQU64 (SI), Z19 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z20 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z20, Z21 + VXORPD Z15, Z21, Z15 + VGF2P8AFFINEQB $0x00, Z1, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z2, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z3, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z4, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (BX), Z20 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z5, Z20, Z21 + VXORPD Z15, Z21, Z15 + VGF2P8AFFINEQB $0x00, Z6, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z7, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z8, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z9, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (CX), Z20 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z10, Z20, Z21 + VXORPD Z15, Z21, Z15 + VGF2P8AFFINEQB $0x00, Z11, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z12, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z13, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z14, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Store 5 outputs + VMOVDQU64 Z15, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z16, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z17, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z18, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z19, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x5_64Xor_loop + VZEROUPPER + +mulGFNI_3x5_64Xor_end: + RET + +// func mulAvxGFNI_3x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x5Xor(SB), $0-88 + // Loading 9 of 15 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), DI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, DI + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DX + +mulAvxGFNI_3x5Xor_loop: + // Load 5 outputs + VMOVDQU (R8), Y9 + VMOVDQU (R9), Y10 + VMOVDQU (R10), Y11 + VMOVDQU (R11), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R8) + ADDQ $0x20, R8 + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x5Xor_loop + VZEROUPPER + +mulAvxGFNI_3x5Xor_end: + RET + +// func mulGFNI_3x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x6_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), SI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, SI + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, CX + +mulGFNI_3x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (DX), Z24 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z24, Z18 + VGF2P8AFFINEQB $0x00, Z1, Z24, Z19 + VGF2P8AFFINEQB $0x00, Z2, Z24, Z20 + VGF2P8AFFINEQB $0x00, Z3, Z24, Z21 + VGF2P8AFFINEQB $0x00, Z4, Z24, Z22 + VGF2P8AFFINEQB $0x00, Z5, Z24, Z23 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (BX), Z24 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z6, Z24, Z25 + VXORPD Z18, Z25, Z18 + VGF2P8AFFINEQB $0x00, Z7, Z24, Z25 + VXORPD Z19, Z25, Z19 + VGF2P8AFFINEQB $0x00, Z8, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z9, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (CX), Z24 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z12, Z24, Z25 + VXORPD Z18, Z25, Z18 + VGF2P8AFFINEQB $0x00, Z13, Z24, Z25 + VXORPD Z19, Z25, Z19 + VGF2P8AFFINEQB $0x00, Z14, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z15, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z16, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Store 6 outputs + VMOVDQU64 Z18, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z19, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z20, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z21, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z22, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z23, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x6_64_loop + VZEROUPPER + +mulGFNI_3x6_64_end: + RET + +// func mulAvxGFNI_3x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x6(SB), $0-88 + // Loading 8 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), DI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, DI + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DX + +mulAvxGFNI_3x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x6_loop + VZEROUPPER + +mulAvxGFNI_3x6_end: + RET + +// func mulGFNI_3x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x6_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), SI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, SI + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, CX + +mulGFNI_3x6_64Xor_loop: + // Load 6 outputs + VMOVDQU64 (DI), Z18 + VMOVDQU64 (R8), Z19 + VMOVDQU64 (R9), Z20 + VMOVDQU64 (R10), Z21 + VMOVDQU64 (R11), Z22 + VMOVDQU64 (SI), Z23 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (DX), Z24 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z24, Z25 + VXORPD Z18, Z25, Z18 + VGF2P8AFFINEQB $0x00, Z1, Z24, Z25 + VXORPD Z19, Z25, Z19 + VGF2P8AFFINEQB $0x00, Z2, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z3, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z4, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z5, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (BX), Z24 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z6, Z24, Z25 + VXORPD Z18, Z25, Z18 + VGF2P8AFFINEQB $0x00, Z7, Z24, Z25 + VXORPD Z19, Z25, Z19 + VGF2P8AFFINEQB $0x00, Z8, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z9, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (CX), Z24 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z12, Z24, Z25 + VXORPD Z18, Z25, Z18 + VGF2P8AFFINEQB $0x00, Z13, Z24, Z25 + VXORPD Z19, Z25, Z19 + VGF2P8AFFINEQB $0x00, Z14, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z15, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z16, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Store 6 outputs + VMOVDQU64 Z18, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z19, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z20, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z21, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z22, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z23, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x6_64Xor_loop + VZEROUPPER + +mulGFNI_3x6_64Xor_end: + RET + +// func mulAvxGFNI_3x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x6Xor(SB), $0-88 + // Loading 8 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), DI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, DI + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DX + +mulAvxGFNI_3x6Xor_loop: + // Load 6 outputs + VMOVDQU (R8), Y8 + VMOVDQU (R9), Y9 + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x6Xor_loop + VZEROUPPER + +mulAvxGFNI_3x6Xor_end: + RET + +// func mulGFNI_3x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x7_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), SI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, SI + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, CX + +mulGFNI_3x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (DX), Z28 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z28, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z28, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z28, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z28, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z28, Z27 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (BX), Z28 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z7, Z28, Z29 + VXORPD Z21, Z29, Z21 + VGF2P8AFFINEQB $0x00, Z8, Z28, Z29 + VXORPD Z22, Z29, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z28, Z29 + VXORPD Z23, Z29, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (CX), Z28 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z14, Z28, Z29 + VXORPD Z21, Z29, Z21 + VGF2P8AFFINEQB $0x00, Z15, Z28, Z29 + VXORPD Z22, Z29, Z22 + VGF2P8AFFINEQB $0x00, Z16, Z28, Z29 + VXORPD Z23, Z29, Z23 + VGF2P8AFFINEQB $0x00, Z17, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z18, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z19, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z20, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Store 7 outputs + VMOVDQU64 Z21, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z22, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z23, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x7_64_loop + VZEROUPPER + +mulGFNI_3x7_64_end: + RET + +// func mulAvxGFNI_3x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x7(SB), $0-88 + // Loading 7 of 21 tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), DI + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, DI + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DX + +mulAvxGFNI_3x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x7_loop + VZEROUPPER + +mulAvxGFNI_3x7_end: + RET + +// func mulGFNI_3x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x7_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), CX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), SI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, SI + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, CX + +mulGFNI_3x7_64Xor_loop: + // Load 7 outputs + VMOVDQU64 (DI), Z21 + VMOVDQU64 (R8), Z22 + VMOVDQU64 (R9), Z23 + VMOVDQU64 (R10), Z24 + VMOVDQU64 (R11), Z25 + VMOVDQU64 (R12), Z26 + VMOVDQU64 (SI), Z27 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (DX), Z28 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z28, Z29 + VXORPD Z21, Z29, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z28, Z29 + VXORPD Z22, Z29, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z28, Z29 + VXORPD Z23, Z29, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (BX), Z28 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z7, Z28, Z29 + VXORPD Z21, Z29, Z21 + VGF2P8AFFINEQB $0x00, Z8, Z28, Z29 + VXORPD Z22, Z29, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z28, Z29 + VXORPD Z23, Z29, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (CX), Z28 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z14, Z28, Z29 + VXORPD Z21, Z29, Z21 + VGF2P8AFFINEQB $0x00, Z15, Z28, Z29 + VXORPD Z22, Z29, Z22 + VGF2P8AFFINEQB $0x00, Z16, Z28, Z29 + VXORPD Z23, Z29, Z23 + VGF2P8AFFINEQB $0x00, Z17, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z18, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z19, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z20, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Store 7 outputs + VMOVDQU64 Z21, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z22, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z23, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x7_64Xor_loop + VZEROUPPER + +mulGFNI_3x7_64Xor_end: + RET + +// func mulAvxGFNI_3x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x7Xor(SB), $0-88 + // Loading 7 of 21 tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), DI + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, DI + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DX + +mulAvxGFNI_3x7Xor_loop: + // Load 7 outputs + VMOVDQU (R8), Y7 + VMOVDQU (R9), Y8 + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R8) + ADDQ $0x20, R8 + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x7Xor_loop + VZEROUPPER + +mulAvxGFNI_3x7Xor_end: + RET + +// func mulGFNI_3x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x8_64(SB), $0-88 + // Loading 22 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), DI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, DI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DX + +mulGFNI_3x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + VMOVDQU64 Z22, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z23, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x8_64_loop + VZEROUPPER + +mulGFNI_3x8_64_end: + RET + +// func mulAvxGFNI_3x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x8(SB), $0-88 + // Loading 6 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), DI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, DI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DX + +mulAvxGFNI_3x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x8_loop + VZEROUPPER + +mulAvxGFNI_3x8_end: + RET + +// func mulGFNI_3x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x8_64Xor(SB), $0-88 + // Loading 22 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), DI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, DI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DX + +mulGFNI_3x8_64Xor_loop: + // Load 8 outputs + VMOVDQU64 (R8), Z22 + VMOVDQU64 (R9), Z23 + VMOVDQU64 (R10), Z24 + VMOVDQU64 (R11), Z25 + VMOVDQU64 (R12), Z26 + VMOVDQU64 (R13), Z27 + VMOVDQU64 (R14), Z28 + VMOVDQU64 (DI), Z29 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + VMOVDQU64 Z22, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z23, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x8_64Xor_loop + VZEROUPPER + +mulGFNI_3x8_64Xor_end: + RET + +// func mulAvxGFNI_3x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x8Xor(SB), $0-88 + // Loading 6 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), DI + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, DI + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DX + +mulAvxGFNI_3x8Xor_loop: + // Load 8 outputs + VMOVDQU (R8), Y6 + VMOVDQU (R9), Y7 + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R8) + ADDQ $0x20, R8 + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x8Xor_loop + VZEROUPPER + +mulAvxGFNI_3x8Xor_end: + RET + +// func mulGFNI_3x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x9_64(SB), $8-88 + // Loading 21 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DX + +mulGFNI_3x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + VMOVDQU64 Z21, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x9_64_loop + VZEROUPPER + +mulGFNI_3x9_64_end: + RET + +// func mulAvxGFNI_3x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x9(SB), $8-88 + // Loading 5 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DX + +mulAvxGFNI_3x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x9_loop + VZEROUPPER + +mulAvxGFNI_3x9_end: + RET + +// func mulGFNI_3x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x9_64Xor(SB), $8-88 + // Loading 21 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DX + +mulGFNI_3x9_64Xor_loop: + // Load 9 outputs + VMOVDQU64 (R8), Z21 + VMOVDQU64 (R9), Z22 + VMOVDQU64 (R10), Z23 + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (DI), Z29 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + VMOVDQU64 Z21, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_3x9_64Xor_loop + VZEROUPPER + +mulGFNI_3x9_64Xor_end: + RET + +// func mulAvxGFNI_3x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x9Xor(SB), $8-88 + // Loading 5 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DX + +mulAvxGFNI_3x9Xor_loop: + // Load 9 outputs + VMOVDQU (R8), Y5 + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_3x9Xor_loop + VZEROUPPER + +mulAvxGFNI_3x9Xor_end: + RET + +// func mulGFNI_3x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x10_64(SB), $8-88 + // Loading 20 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), AX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_3x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + VMOVDQU64 Z20, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z21, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_3x10_64_loop + VZEROUPPER + +mulGFNI_3x10_64_end: + RET + +// func mulAvxGFNI_3x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x10(SB), $8-88 + // Loading 4 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), AX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_3x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_3x10_loop + VZEROUPPER + +mulAvxGFNI_3x10_end: + RET + +// func mulGFNI_3x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_3x10_64Xor(SB), $8-88 + // Loading 20 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_3x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), AX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_3x10_64Xor_loop: + // Load 10 outputs + VMOVDQU64 (DI), Z20 + VMOVDQU64 (R8), Z21 + VMOVDQU64 (R9), Z22 + VMOVDQU64 (R10), Z23 + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (SI), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + VMOVDQU64 Z20, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z21, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (SI) + ADDQ $0x40, SI + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_3x10_64Xor_loop + VZEROUPPER + +mulGFNI_3x10_64Xor_end: + RET + +// func mulAvxGFNI_3x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_3x10Xor(SB), $8-88 + // Loading 4 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_3x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), AX + MOVQ out_base+48(FP), SI + MOVQ out_base+48(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), R15 + MOVQ 216(SI), SI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, SI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_3x10Xor_loop: + // Load 10 outputs + VMOVDQU (DI), Y4 + VMOVDQU (R8), Y5 + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (SI), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (SI) + ADDQ $0x20, SI + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_3x10Xor_loop + VZEROUPPER + +mulAvxGFNI_3x10Xor_end: + RET + +// func mulGFNI_4x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 7 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), DI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, CX + +mulGFNI_4x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z5 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z5, Z4 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z5 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z5, Z5 + VXORPD Z4, Z5, Z4 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z5 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z5, Z5 + VXORPD Z4, Z5, Z4 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (CX), Z5 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z3, Z5, Z5 + VXORPD Z4, Z5, Z4 + + // Store 1 outputs + VMOVDQU64 Z4, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x1_64_loop + VZEROUPPER + +mulGFNI_4x1_64_end: + RET + +// func mulAvxGFNI_4x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 7 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), DI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, CX + +mulAvxGFNI_4x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y5 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y5, Y4 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y5 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y5 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (CX), Y5 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y3, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Store 1 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x1_loop + VZEROUPPER + +mulAvxGFNI_4x1_end: + RET + +// func mulGFNI_4x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 7 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), DI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, CX + +mulGFNI_4x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (DI), Z4 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z5 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z5, Z5 + VXORPD Z4, Z5, Z4 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z5 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z5, Z5 + VXORPD Z4, Z5, Z4 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z5 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z5, Z5 + VXORPD Z4, Z5, Z4 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (CX), Z5 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z3, Z5, Z5 + VXORPD Z4, Z5, Z4 + + // Store 1 outputs + VMOVDQU64 Z4, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x1_64Xor_loop + VZEROUPPER + +mulGFNI_4x1_64Xor_end: + RET + +// func mulAvxGFNI_4x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 7 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), DI + MOVQ start+72(FP), R8 + + // Add start offset to output + ADDQ R8, DI + + // Add start offset to input + ADDQ R8, DX + ADDQ R8, BX + ADDQ R8, SI + ADDQ R8, CX + +mulAvxGFNI_4x1Xor_loop: + // Load 1 outputs + VMOVDQU (DI), Y4 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y5 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y5 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y5 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (CX), Y5 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y3, Y5, Y5 + VXORPD Y4, Y5, Y4 + + // Store 1 outputs + VMOVDQU Y4, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x1Xor_loop + VZEROUPPER + +mulAvxGFNI_4x1Xor_end: + RET + +// func mulGFNI_4x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), DI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + ADDQ R9, DI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, CX + +mulGFNI_4x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z10 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z10, Z8 + VGF2P8AFFINEQB $0x00, Z1, Z10, Z9 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z10 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z10, Z11 + VXORPD Z8, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z3, Z10, Z11 + VXORPD Z9, Z11, Z9 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z10 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z10, Z11 + VXORPD Z8, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z5, Z10, Z11 + VXORPD Z9, Z11, Z9 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (CX), Z10 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z6, Z10, Z11 + VXORPD Z8, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z7, Z10, Z11 + VXORPD Z9, Z11, Z9 + + // Store 2 outputs + VMOVDQU64 Z8, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z9, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x2_64_loop + VZEROUPPER + +mulGFNI_4x2_64_end: + RET + +// func mulAvxGFNI_4x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), DI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + ADDQ R9, DI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, CX + +mulAvxGFNI_4x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y10 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y10, Y9 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y10 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y10 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y5, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Store 2 outputs + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x2_loop + VZEROUPPER + +mulAvxGFNI_4x2_end: + RET + +// func mulGFNI_4x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), DI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + ADDQ R9, DI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, CX + +mulGFNI_4x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (R8), Z8 + VMOVDQU64 (DI), Z9 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z10 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z10, Z11 + VXORPD Z8, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z1, Z10, Z11 + VXORPD Z9, Z11, Z9 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z10 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z10, Z11 + VXORPD Z8, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z3, Z10, Z11 + VXORPD Z9, Z11, Z9 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z10 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z10, Z11 + VXORPD Z8, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z5, Z10, Z11 + VXORPD Z9, Z11, Z9 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (CX), Z10 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z6, Z10, Z11 + VXORPD Z8, Z11, Z8 + VGF2P8AFFINEQB $0x00, Z7, Z10, Z11 + VXORPD Z9, Z11, Z9 + + // Store 2 outputs + VMOVDQU64 Z8, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z9, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x2_64Xor_loop + VZEROUPPER + +mulGFNI_4x2_64Xor_end: + RET + +// func mulAvxGFNI_4x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x2Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), DI + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + ADDQ R9, DI + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, CX + +mulAvxGFNI_4x2Xor_loop: + // Load 2 outputs + VMOVDQU (R8), Y8 + VMOVDQU (DI), Y9 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y10 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y10 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y10 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y5, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y10, Y11 + VXORPD Y8, Y11, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y10, Y11 + VXORPD Y9, Y11, Y9 + + // Store 2 outputs + VMOVDQU Y8, (R8) + ADDQ $0x20, R8 + VMOVDQU Y9, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x2Xor_loop + VZEROUPPER + +mulAvxGFNI_4x2Xor_end: + RET + +// func mulGFNI_4x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), DI + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, DI + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, CX + +mulGFNI_4x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z15 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z1, Z15, Z13 + VGF2P8AFFINEQB $0x00, Z2, Z15, Z14 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z15 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z4, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z5, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z15 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z7, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z8, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (CX), Z15 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z9, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z10, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z11, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Store 3 outputs + VMOVDQU64 Z12, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z13, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z14, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x3_64_loop + VZEROUPPER + +mulGFNI_4x3_64_end: + RET + +// func mulAvxGFNI_4x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x3(SB), $0-88 + // Loading 11 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R8 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, R8 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, DX + +mulAvxGFNI_4x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x3_loop + VZEROUPPER + +mulAvxGFNI_4x3_end: + RET + +// func mulGFNI_4x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), DI + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R8 + ADDQ R10, R9 + ADDQ R10, DI + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, CX + +mulGFNI_4x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (R8), Z12 + VMOVDQU64 (R9), Z13 + VMOVDQU64 (DI), Z14 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z15 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z1, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z2, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z15 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z4, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z5, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z15 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z7, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z8, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (CX), Z15 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z9, Z15, Z16 + VXORPD Z12, Z16, Z12 + VGF2P8AFFINEQB $0x00, Z10, Z15, Z16 + VXORPD Z13, Z16, Z13 + VGF2P8AFFINEQB $0x00, Z11, Z15, Z16 + VXORPD Z14, Z16, Z14 + + // Store 3 outputs + VMOVDQU64 Z12, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z13, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z14, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x3_64Xor_loop + VZEROUPPER + +mulGFNI_4x3_64Xor_end: + RET + +// func mulAvxGFNI_4x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x3Xor(SB), $0-88 + // Loading 11 of 12 tables to registers + // Destination kept in GP registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R8 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, R8 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, DX + +mulAvxGFNI_4x3Xor_loop: + // Load 3 outputs + VMOVDQU (R9), Y11 + VMOVDQU (R10), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R9) + ADDQ $0x20, R9 + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x3Xor_loop + VZEROUPPER + +mulAvxGFNI_4x3Xor_end: + RET + +// func mulGFNI_4x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x4_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), DI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DI + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, CX + +mulGFNI_4x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z20 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z20, Z16 + VGF2P8AFFINEQB $0x00, Z1, Z20, Z17 + VGF2P8AFFINEQB $0x00, Z2, Z20, Z18 + VGF2P8AFFINEQB $0x00, Z3, Z20, Z19 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z20 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z5, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z6, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z7, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (SI), Z20 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z9, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z10, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z11, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (CX), Z20 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z12, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z13, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z14, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z15, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Store 4 outputs + VMOVDQU64 Z16, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z17, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z18, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z19, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x4_64_loop + VZEROUPPER + +mulGFNI_4x4_64_end: + RET + +// func mulAvxGFNI_4x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x4(SB), $0-88 + // Loading 10 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R8 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R8 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, DX + +mulAvxGFNI_4x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x4_loop + VZEROUPPER + +mulAvxGFNI_4x4_end: + RET + +// func mulGFNI_4x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x4_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), DI + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, DI + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, CX + +mulGFNI_4x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (R8), Z16 + VMOVDQU64 (R9), Z17 + VMOVDQU64 (R10), Z18 + VMOVDQU64 (DI), Z19 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z20 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z1, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z2, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z3, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z20 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z5, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z6, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z7, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (SI), Z20 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z9, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z10, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z11, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (CX), Z20 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z12, Z20, Z21 + VXORPD Z16, Z21, Z16 + VGF2P8AFFINEQB $0x00, Z13, Z20, Z21 + VXORPD Z17, Z21, Z17 + VGF2P8AFFINEQB $0x00, Z14, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z15, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Store 4 outputs + VMOVDQU64 Z16, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z17, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z18, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z19, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x4_64Xor_loop + VZEROUPPER + +mulGFNI_4x4_64Xor_end: + RET + +// func mulAvxGFNI_4x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x4Xor(SB), $0-88 + // Loading 10 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R8 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R8 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, DX + +mulAvxGFNI_4x4Xor_loop: + // Load 4 outputs + VMOVDQU (R9), Y10 + VMOVDQU (R10), Y11 + VMOVDQU (R11), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x4Xor_loop + VZEROUPPER + +mulAvxGFNI_4x4Xor_end: + RET + +// func mulGFNI_4x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x5_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 27 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), DI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, DI + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, CX + +mulGFNI_4x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z25 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z25, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z25, Z24 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (BX), Z25 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z5, Z25, Z26 + VXORPD Z20, Z26, Z20 + VGF2P8AFFINEQB $0x00, Z6, Z25, Z26 + VXORPD Z21, Z26, Z21 + VGF2P8AFFINEQB $0x00, Z7, Z25, Z26 + VXORPD Z22, Z26, Z22 + VGF2P8AFFINEQB $0x00, Z8, Z25, Z26 + VXORPD Z23, Z26, Z23 + VGF2P8AFFINEQB $0x00, Z9, Z25, Z26 + VXORPD Z24, Z26, Z24 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (SI), Z25 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z25, Z26 + VXORPD Z20, Z26, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z25, Z26 + VXORPD Z21, Z26, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z25, Z26 + VXORPD Z22, Z26, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z25, Z26 + VXORPD Z23, Z26, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z25, Z26 + VXORPD Z24, Z26, Z24 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (CX), Z25 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z15, Z25, Z26 + VXORPD Z20, Z26, Z20 + VGF2P8AFFINEQB $0x00, Z16, Z25, Z26 + VXORPD Z21, Z26, Z21 + VGF2P8AFFINEQB $0x00, Z17, Z25, Z26 + VXORPD Z22, Z26, Z22 + VGF2P8AFFINEQB $0x00, Z18, Z25, Z26 + VXORPD Z23, Z26, Z23 + VGF2P8AFFINEQB $0x00, Z19, Z25, Z26 + VXORPD Z24, Z26, Z24 + + // Store 5 outputs + VMOVDQU64 Z20, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z21, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z22, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z23, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z24, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x5_64_loop + VZEROUPPER + +mulGFNI_4x5_64_end: + RET + +// func mulAvxGFNI_4x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x5(SB), $0-88 + // Loading 9 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 27 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R8 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R8 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, DX + +mulAvxGFNI_4x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x5_loop + VZEROUPPER + +mulAvxGFNI_4x5_end: + RET + +// func mulGFNI_4x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x5_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 27 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), DI + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, DI + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, CX + +mulGFNI_4x5_64Xor_loop: + // Load 5 outputs + VMOVDQU64 (R8), Z20 + VMOVDQU64 (R9), Z21 + VMOVDQU64 (R10), Z22 + VMOVDQU64 (R11), Z23 + VMOVDQU64 (DI), Z24 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z25 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z25, Z26 + VXORPD Z20, Z26, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z25, Z26 + VXORPD Z21, Z26, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z25, Z26 + VXORPD Z22, Z26, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z25, Z26 + VXORPD Z23, Z26, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z25, Z26 + VXORPD Z24, Z26, Z24 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (BX), Z25 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z5, Z25, Z26 + VXORPD Z20, Z26, Z20 + VGF2P8AFFINEQB $0x00, Z6, Z25, Z26 + VXORPD Z21, Z26, Z21 + VGF2P8AFFINEQB $0x00, Z7, Z25, Z26 + VXORPD Z22, Z26, Z22 + VGF2P8AFFINEQB $0x00, Z8, Z25, Z26 + VXORPD Z23, Z26, Z23 + VGF2P8AFFINEQB $0x00, Z9, Z25, Z26 + VXORPD Z24, Z26, Z24 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (SI), Z25 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z25, Z26 + VXORPD Z20, Z26, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z25, Z26 + VXORPD Z21, Z26, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z25, Z26 + VXORPD Z22, Z26, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z25, Z26 + VXORPD Z23, Z26, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z25, Z26 + VXORPD Z24, Z26, Z24 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (CX), Z25 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z15, Z25, Z26 + VXORPD Z20, Z26, Z20 + VGF2P8AFFINEQB $0x00, Z16, Z25, Z26 + VXORPD Z21, Z26, Z21 + VGF2P8AFFINEQB $0x00, Z17, Z25, Z26 + VXORPD Z22, Z26, Z22 + VGF2P8AFFINEQB $0x00, Z18, Z25, Z26 + VXORPD Z23, Z26, Z23 + VGF2P8AFFINEQB $0x00, Z19, Z25, Z26 + VXORPD Z24, Z26, Z24 + + // Store 5 outputs + VMOVDQU64 Z20, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z21, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z22, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z23, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z24, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x5_64Xor_loop + VZEROUPPER + +mulGFNI_4x5_64Xor_end: + RET + +// func mulAvxGFNI_4x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x5Xor(SB), $0-88 + // Loading 9 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 27 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R8 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R8 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, DX + +mulAvxGFNI_4x5Xor_loop: + // Load 5 outputs + VMOVDQU (R9), Y9 + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R9) + ADDQ $0x20, R9 + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x5Xor_loop + VZEROUPPER + +mulAvxGFNI_4x5Xor_end: + RET + +// func mulGFNI_4x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x6_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), DI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, DI + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, CX + +mulGFNI_4x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (CX), Z30 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + VMOVDQU64 Z24, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z25, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z26, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z27, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z28, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z29, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x6_64_loop + VZEROUPPER + +mulGFNI_4x6_64_end: + RET + +// func mulAvxGFNI_4x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x6(SB), $0-88 + // Loading 8 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R8 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R8 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, DX + +mulAvxGFNI_4x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x6_loop + VZEROUPPER + +mulAvxGFNI_4x6_end: + RET + +// func mulGFNI_4x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x6_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), DI + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, DI + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, CX + +mulGFNI_4x6_64Xor_loop: + // Load 6 outputs + VMOVDQU64 (R8), Z24 + VMOVDQU64 (R9), Z25 + VMOVDQU64 (R10), Z26 + VMOVDQU64 (R11), Z27 + VMOVDQU64 (R12), Z28 + VMOVDQU64 (DI), Z29 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (CX), Z30 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + VMOVDQU64 Z24, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z25, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z26, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z27, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z28, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z29, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x6_64Xor_loop + VZEROUPPER + +mulGFNI_4x6_64Xor_end: + RET + +// func mulAvxGFNI_4x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x6Xor(SB), $0-88 + // Loading 8 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R8 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R8 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, DX + +mulAvxGFNI_4x6Xor_loop: + // Load 6 outputs + VMOVDQU (R9), Y8 + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R9) + ADDQ $0x20, R9 + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x6Xor_loop + VZEROUPPER + +mulAvxGFNI_4x6Xor_end: + RET + +// func mulGFNI_4x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x7_64(SB), $0-88 + // Loading 23 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R8 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R8 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, DX + +mulGFNI_4x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + VMOVDQU64 Z23, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x7_64_loop + VZEROUPPER + +mulGFNI_4x7_64_end: + RET + +// func mulAvxGFNI_4x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x7(SB), $0-88 + // Loading 7 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R8 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R8 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, DX + +mulAvxGFNI_4x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x7_loop + VZEROUPPER + +mulAvxGFNI_4x7_end: + RET + +// func mulGFNI_4x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x7_64Xor(SB), $0-88 + // Loading 23 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R8 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R8 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, DX + +mulGFNI_4x7_64Xor_loop: + // Load 7 outputs + VMOVDQU64 (R9), Z23 + VMOVDQU64 (R10), Z24 + VMOVDQU64 (R11), Z25 + VMOVDQU64 (R12), Z26 + VMOVDQU64 (R13), Z27 + VMOVDQU64 (R14), Z28 + VMOVDQU64 (R8), Z29 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + VMOVDQU64 Z23, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x7_64Xor_loop + VZEROUPPER + +mulGFNI_4x7_64Xor_end: + RET + +// func mulAvxGFNI_4x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x7Xor(SB), $0-88 + // Loading 7 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R8 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R8 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, DX + +mulAvxGFNI_4x7Xor_loop: + // Load 7 outputs + VMOVDQU (R9), Y7 + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R9) + ADDQ $0x20, R9 + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x7Xor_loop + VZEROUPPER + +mulAvxGFNI_4x7Xor_end: + RET + +// func mulGFNI_4x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x8_64(SB), $8-88 + // Loading 22 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, DX + +mulGFNI_4x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x8_64_loop + VZEROUPPER + +mulGFNI_4x8_64_end: + RET + +// func mulAvxGFNI_4x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x8(SB), $8-88 + // Loading 6 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, DX + +mulAvxGFNI_4x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x8_loop + VZEROUPPER + +mulAvxGFNI_4x8_end: + RET + +// func mulGFNI_4x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x8_64Xor(SB), $8-88 + // Loading 22 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, DX + +mulGFNI_4x8_64Xor_loop: + // Load 8 outputs + VMOVDQU64 (R9), Z22 + VMOVDQU64 (R10), Z23 + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R8), Z29 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_4x8_64Xor_loop + VZEROUPPER + +mulGFNI_4x8_64Xor_end: + RET + +// func mulAvxGFNI_4x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x8Xor(SB), $8-88 + // Loading 6 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, DX + +mulAvxGFNI_4x8Xor_loop: + // Load 8 outputs + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_4x8Xor_loop + VZEROUPPER + +mulAvxGFNI_4x8Xor_end: + RET + +// func mulGFNI_4x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x9_64(SB), $8-88 + // Loading 21 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), AX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_4x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + VMOVDQU64 Z21, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_4x9_64_loop + VZEROUPPER + +mulGFNI_4x9_64_end: + RET + +// func mulAvxGFNI_4x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x9(SB), $8-88 + // Loading 5 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), AX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_4x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_4x9_loop + VZEROUPPER + +mulAvxGFNI_4x9_end: + RET + +// func mulGFNI_4x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x9_64Xor(SB), $8-88 + // Loading 21 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), AX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_4x9_64Xor_loop: + // Load 9 outputs + VMOVDQU64 (R8), Z21 + VMOVDQU64 (R9), Z22 + VMOVDQU64 (R10), Z23 + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (DI), Z29 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + VMOVDQU64 Z21, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (DI) + ADDQ $0x40, DI + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_4x9_64Xor_loop + VZEROUPPER + +mulGFNI_4x9_64Xor_end: + RET + +// func mulAvxGFNI_4x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x9Xor(SB), $8-88 + // Loading 5 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), AX + MOVQ out_base+48(FP), DI + MOVQ out_base+48(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), R15 + MOVQ 192(DI), DI + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, DI + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_4x9Xor_loop: + // Load 9 outputs + VMOVDQU (R8), Y5 + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (DI), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (DI) + ADDQ $0x20, DI + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_4x9Xor_loop + VZEROUPPER + +mulAvxGFNI_4x9Xor_end: + RET + +// func mulGFNI_4x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x10_64(SB), $0-88 + // Loading 20 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ start+72(FP), R9 + + // Add start offset to input + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, DX + +mulGFNI_4x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R8), R10 + VMOVDQU64 Z20, (R10)(R9*1) + MOVQ 24(R8), R10 + VMOVDQU64 Z21, (R10)(R9*1) + MOVQ 48(R8), R10 + VMOVDQU64 Z22, (R10)(R9*1) + MOVQ 72(R8), R10 + VMOVDQU64 Z23, (R10)(R9*1) + MOVQ 96(R8), R10 + VMOVDQU64 Z24, (R10)(R9*1) + MOVQ 120(R8), R10 + VMOVDQU64 Z25, (R10)(R9*1) + MOVQ 144(R8), R10 + VMOVDQU64 Z26, (R10)(R9*1) + MOVQ 168(R8), R10 + VMOVDQU64 Z27, (R10)(R9*1) + MOVQ 192(R8), R10 + VMOVDQU64 Z28, (R10)(R9*1) + MOVQ 216(R8), R10 + VMOVDQU64 Z29, (R10)(R9*1) + + // Prepare for next loop + ADDQ $0x40, R9 + DECQ AX + JNZ mulGFNI_4x10_64_loop + VZEROUPPER + +mulGFNI_4x10_64_end: + RET + +// func mulAvxGFNI_4x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x10(SB), $0-88 + // Loading 4 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ start+72(FP), R9 + + // Add start offset to input + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, DX + +mulAvxGFNI_4x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R8), R10 + VMOVDQU Y4, (R10)(R9*1) + MOVQ 24(R8), R10 + VMOVDQU Y5, (R10)(R9*1) + MOVQ 48(R8), R10 + VMOVDQU Y6, (R10)(R9*1) + MOVQ 72(R8), R10 + VMOVDQU Y7, (R10)(R9*1) + MOVQ 96(R8), R10 + VMOVDQU Y8, (R10)(R9*1) + MOVQ 120(R8), R10 + VMOVDQU Y9, (R10)(R9*1) + MOVQ 144(R8), R10 + VMOVDQU Y10, (R10)(R9*1) + MOVQ 168(R8), R10 + VMOVDQU Y11, (R10)(R9*1) + MOVQ 192(R8), R10 + VMOVDQU Y12, (R10)(R9*1) + MOVQ 216(R8), R10 + VMOVDQU Y13, (R10)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxGFNI_4x10_loop + VZEROUPPER + +mulAvxGFNI_4x10_end: + RET + +// func mulGFNI_4x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_4x10_64Xor(SB), $0-88 + // Loading 20 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_4x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ start+72(FP), R9 + + // Add start offset to input + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, DX + +mulGFNI_4x10_64Xor_loop: + // Load 10 outputs + MOVQ (R8), R10 + VMOVDQU64 (R10)(R9*1), Z20 + MOVQ 24(R8), R10 + VMOVDQU64 (R10)(R9*1), Z21 + MOVQ 48(R8), R10 + VMOVDQU64 (R10)(R9*1), Z22 + MOVQ 72(R8), R10 + VMOVDQU64 (R10)(R9*1), Z23 + MOVQ 96(R8), R10 + VMOVDQU64 (R10)(R9*1), Z24 + MOVQ 120(R8), R10 + VMOVDQU64 (R10)(R9*1), Z25 + MOVQ 144(R8), R10 + VMOVDQU64 (R10)(R9*1), Z26 + MOVQ 168(R8), R10 + VMOVDQU64 (R10)(R9*1), Z27 + MOVQ 192(R8), R10 + VMOVDQU64 (R10)(R9*1), Z28 + MOVQ 216(R8), R10 + VMOVDQU64 (R10)(R9*1), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R8), R10 + VMOVDQU64 Z20, (R10)(R9*1) + MOVQ 24(R8), R10 + VMOVDQU64 Z21, (R10)(R9*1) + MOVQ 48(R8), R10 + VMOVDQU64 Z22, (R10)(R9*1) + MOVQ 72(R8), R10 + VMOVDQU64 Z23, (R10)(R9*1) + MOVQ 96(R8), R10 + VMOVDQU64 Z24, (R10)(R9*1) + MOVQ 120(R8), R10 + VMOVDQU64 Z25, (R10)(R9*1) + MOVQ 144(R8), R10 + VMOVDQU64 Z26, (R10)(R9*1) + MOVQ 168(R8), R10 + VMOVDQU64 Z27, (R10)(R9*1) + MOVQ 192(R8), R10 + VMOVDQU64 Z28, (R10)(R9*1) + MOVQ 216(R8), R10 + VMOVDQU64 Z29, (R10)(R9*1) + + // Prepare for next loop + ADDQ $0x40, R9 + DECQ AX + JNZ mulGFNI_4x10_64Xor_loop + VZEROUPPER + +mulGFNI_4x10_64Xor_end: + RET + +// func mulAvxGFNI_4x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_4x10Xor(SB), $0-88 + // Loading 4 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_4x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), DX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ start+72(FP), R9 + + // Add start offset to input + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, DX + +mulAvxGFNI_4x10Xor_loop: + // Load 10 outputs + MOVQ (R8), R10 + VMOVDQU (R10)(R9*1), Y4 + MOVQ 24(R8), R10 + VMOVDQU (R10)(R9*1), Y5 + MOVQ 48(R8), R10 + VMOVDQU (R10)(R9*1), Y6 + MOVQ 72(R8), R10 + VMOVDQU (R10)(R9*1), Y7 + MOVQ 96(R8), R10 + VMOVDQU (R10)(R9*1), Y8 + MOVQ 120(R8), R10 + VMOVDQU (R10)(R9*1), Y9 + MOVQ 144(R8), R10 + VMOVDQU (R10)(R9*1), Y10 + MOVQ 168(R8), R10 + VMOVDQU (R10)(R9*1), Y11 + MOVQ 192(R8), R10 + VMOVDQU (R10)(R9*1), Y12 + MOVQ 216(R8), R10 + VMOVDQU (R10)(R9*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R8), R10 + VMOVDQU Y4, (R10)(R9*1) + MOVQ 24(R8), R10 + VMOVDQU Y5, (R10)(R9*1) + MOVQ 48(R8), R10 + VMOVDQU Y6, (R10)(R9*1) + MOVQ 72(R8), R10 + VMOVDQU Y7, (R10)(R9*1) + MOVQ 96(R8), R10 + VMOVDQU Y8, (R10)(R9*1) + MOVQ 120(R8), R10 + VMOVDQU Y9, (R10)(R9*1) + MOVQ 144(R8), R10 + VMOVDQU Y10, (R10)(R9*1) + MOVQ 168(R8), R10 + VMOVDQU Y11, (R10)(R9*1) + MOVQ 192(R8), R10 + VMOVDQU Y12, (R10)(R9*1) + MOVQ 216(R8), R10 + VMOVDQU Y13, (R10)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxGFNI_4x10Xor_loop + VZEROUPPER + +mulAvxGFNI_4x10Xor_end: + RET + +// func mulGFNI_5x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R8 + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, CX + +mulGFNI_5x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z6 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z6, Z5 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z6 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z6 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z6 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (CX), Z6 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z4, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Store 1 outputs + VMOVDQU64 Z5, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x1_64_loop + VZEROUPPER + +mulGFNI_5x1_64_end: + RET + +// func mulAvxGFNI_5x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R8 + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, CX + +mulAvxGFNI_5x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y6 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y5 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y6 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y6 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y6 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Store 1 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x1_loop + VZEROUPPER + +mulAvxGFNI_5x1_end: + RET + +// func mulGFNI_5x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R8 + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, CX + +mulGFNI_5x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (R8), Z5 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z6 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z6 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z6 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z6 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (CX), Z6 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z4, Z6, Z6 + VXORPD Z5, Z6, Z5 + + // Store 1 outputs + VMOVDQU64 Z5, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x1_64Xor_loop + VZEROUPPER + +mulGFNI_5x1_64Xor_end: + RET + +// func mulAvxGFNI_5x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R8 + MOVQ start+72(FP), R9 + + // Add start offset to output + ADDQ R9, R8 + + // Add start offset to input + ADDQ R9, DX + ADDQ R9, BX + ADDQ R9, SI + ADDQ R9, DI + ADDQ R9, CX + +mulAvxGFNI_5x1Xor_loop: + // Load 1 outputs + VMOVDQU (R8), Y5 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y6 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y6 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y6 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y6 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (CX), Y6 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y4, Y6, Y6 + VXORPD Y5, Y6, Y5 + + // Store 1 outputs + VMOVDQU Y5, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x1Xor_loop + VZEROUPPER + +mulAvxGFNI_5x1Xor_end: + RET + +// func mulGFNI_5x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R8 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + ADDQ R10, R8 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, CX + +mulGFNI_5x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z12 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z12, Z10 + VGF2P8AFFINEQB $0x00, Z1, Z12, Z11 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z12 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z3, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z12 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z5, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z12 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z7, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (CX), Z12 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z8, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z9, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Store 2 outputs + VMOVDQU64 Z10, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z11, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x2_64_loop + VZEROUPPER + +mulGFNI_5x2_64_end: + RET + +// func mulAvxGFNI_5x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R8 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + ADDQ R10, R8 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, CX + +mulAvxGFNI_5x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y11 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y12 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y12 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (DI), Y12 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y8, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 2 outputs + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x2_loop + VZEROUPPER + +mulAvxGFNI_5x2_end: + RET + +// func mulGFNI_5x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R8 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + ADDQ R10, R8 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, CX + +mulGFNI_5x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (R9), Z10 + VMOVDQU64 (R8), Z11 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z12 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z1, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z12 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z3, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z12 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z5, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z12 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z7, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (CX), Z12 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z8, Z12, Z13 + VXORPD Z10, Z13, Z10 + VGF2P8AFFINEQB $0x00, Z9, Z12, Z13 + VXORPD Z11, Z13, Z11 + + // Store 2 outputs + VMOVDQU64 Z10, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z11, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x2_64Xor_loop + VZEROUPPER + +mulGFNI_5x2_64Xor_end: + RET + +// func mulAvxGFNI_5x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x2Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R8 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + ADDQ R10, R8 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, CX + +mulAvxGFNI_5x2Xor_loop: + // Load 2 outputs + VMOVDQU (R9), Y10 + VMOVDQU (R8), Y11 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y12 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y12 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y12 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (DI), Y12 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (CX), Y12 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y8, Y12, Y13 + VXORPD Y10, Y13, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y12, Y13 + VXORPD Y11, Y13, Y11 + + // Store 2 outputs + VMOVDQU Y10, (R9) + ADDQ $0x20, R9 + VMOVDQU Y11, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x2Xor_loop + VZEROUPPER + +mulAvxGFNI_5x2Xor_end: + RET + +// func mulGFNI_5x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R8 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, R8 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, CX + +mulGFNI_5x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z18 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z18, Z15 + VGF2P8AFFINEQB $0x00, Z1, Z18, Z16 + VGF2P8AFFINEQB $0x00, Z2, Z18, Z17 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z18 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z4, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z5, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z18 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z7, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z8, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z18 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z10, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z11, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (CX), Z18 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z12, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z13, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z14, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Store 3 outputs + VMOVDQU64 Z15, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z16, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z17, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x3_64_loop + VZEROUPPER + +mulGFNI_5x3_64_end: + RET + +// func mulAvxGFNI_5x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x3(SB), $0-88 + // Loading 11 of 15 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R9 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R9 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, DX + +mulAvxGFNI_5x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x3_loop + VZEROUPPER + +mulAvxGFNI_5x3_end: + RET + +// func mulGFNI_5x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R8 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R9 + ADDQ R11, R10 + ADDQ R11, R8 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, CX + +mulGFNI_5x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (R9), Z15 + VMOVDQU64 (R10), Z16 + VMOVDQU64 (R8), Z17 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z18 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z1, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z2, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z18 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z4, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z5, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z18 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z7, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z8, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z18 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z10, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z11, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (CX), Z18 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z12, Z18, Z19 + VXORPD Z15, Z19, Z15 + VGF2P8AFFINEQB $0x00, Z13, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z14, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Store 3 outputs + VMOVDQU64 Z15, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z16, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z17, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x3_64Xor_loop + VZEROUPPER + +mulGFNI_5x3_64Xor_end: + RET + +// func mulAvxGFNI_5x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x3Xor(SB), $0-88 + // Loading 11 of 15 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R9 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R9 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, DX + +mulAvxGFNI_5x3Xor_loop: + // Load 3 outputs + VMOVDQU (R10), Y11 + VMOVDQU (R11), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R10) + ADDQ $0x20, R10 + VMOVDQU Y12, (R11) + ADDQ $0x20, R11 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x3Xor_loop + VZEROUPPER + +mulAvxGFNI_5x3Xor_end: + RET + +// func mulGFNI_5x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x4_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R8 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R8 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, CX + +mulGFNI_5x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z24 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z24, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z24, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z24, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z24, Z23 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z24 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z5, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z6, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z7, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (SI), Z24 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z9, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (DI), Z24 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z13, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z14, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z15, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (CX), Z24 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z16, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z17, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z18, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z19, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Store 4 outputs + VMOVDQU64 Z20, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z21, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z22, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z23, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x4_64_loop + VZEROUPPER + +mulGFNI_5x4_64_end: + RET + +// func mulAvxGFNI_5x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x4(SB), $0-88 + // Loading 10 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R9 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R9 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, DX + +mulAvxGFNI_5x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x4_loop + VZEROUPPER + +mulAvxGFNI_5x4_end: + RET + +// func mulGFNI_5x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x4_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R8 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R8 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, CX + +mulGFNI_5x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (R9), Z20 + VMOVDQU64 (R10), Z21 + VMOVDQU64 (R11), Z22 + VMOVDQU64 (R8), Z23 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z24 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z24 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z5, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z6, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z7, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (SI), Z24 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z9, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (DI), Z24 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z13, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z14, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z15, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (CX), Z24 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z16, Z24, Z25 + VXORPD Z20, Z25, Z20 + VGF2P8AFFINEQB $0x00, Z17, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z18, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z19, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Store 4 outputs + VMOVDQU64 Z20, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z21, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z22, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z23, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x4_64Xor_loop + VZEROUPPER + +mulGFNI_5x4_64Xor_end: + RET + +// func mulAvxGFNI_5x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x4Xor(SB), $0-88 + // Loading 10 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R9 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R9 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, DX + +mulAvxGFNI_5x4Xor_loop: + // Load 4 outputs + VMOVDQU (R10), Y10 + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R10) + ADDQ $0x20, R10 + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x4Xor_loop + VZEROUPPER + +mulAvxGFNI_5x4Xor_end: + RET + +// func mulGFNI_5x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x5_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R8 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R8 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, CX + +mulGFNI_5x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (CX), Z30 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + VMOVDQU64 Z25, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z26, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z27, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z28, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z29, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x5_64_loop + VZEROUPPER + +mulGFNI_5x5_64_end: + RET + +// func mulAvxGFNI_5x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x5(SB), $0-88 + // Loading 9 of 25 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R9 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R9 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, DX + +mulAvxGFNI_5x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x5_loop + VZEROUPPER + +mulAvxGFNI_5x5_end: + RET + +// func mulGFNI_5x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x5_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R8 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R8 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, CX + +mulGFNI_5x5_64Xor_loop: + // Load 5 outputs + VMOVDQU64 (R9), Z25 + VMOVDQU64 (R10), Z26 + VMOVDQU64 (R11), Z27 + VMOVDQU64 (R12), Z28 + VMOVDQU64 (R8), Z29 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (CX), Z30 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + VMOVDQU64 Z25, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z26, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z27, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z28, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z29, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x5_64Xor_loop + VZEROUPPER + +mulGFNI_5x5_64Xor_end: + RET + +// func mulAvxGFNI_5x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x5Xor(SB), $0-88 + // Loading 9 of 25 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R9 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R9 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, DX + +mulAvxGFNI_5x5Xor_loop: + // Load 5 outputs + VMOVDQU (R10), Y9 + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R10) + ADDQ $0x20, R10 + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x5Xor_loop + VZEROUPPER + +mulAvxGFNI_5x5Xor_end: + RET + +// func mulGFNI_5x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x6_64(SB), $0-88 + // Loading 24 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R9 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R9 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, DX + +mulGFNI_5x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x6_64_loop + VZEROUPPER + +mulGFNI_5x6_64_end: + RET + +// func mulAvxGFNI_5x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x6(SB), $0-88 + // Loading 8 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R9 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R9 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, DX + +mulAvxGFNI_5x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x6_loop + VZEROUPPER + +mulAvxGFNI_5x6_end: + RET + +// func mulGFNI_5x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x6_64Xor(SB), $0-88 + // Loading 24 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R9 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R9 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, DX + +mulGFNI_5x6_64Xor_loop: + // Load 6 outputs + VMOVDQU64 (R10), Z24 + VMOVDQU64 (R11), Z25 + VMOVDQU64 (R12), Z26 + VMOVDQU64 (R13), Z27 + VMOVDQU64 (R14), Z28 + VMOVDQU64 (R9), Z29 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x6_64Xor_loop + VZEROUPPER + +mulGFNI_5x6_64Xor_end: + RET + +// func mulAvxGFNI_5x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x6Xor(SB), $0-88 + // Loading 8 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R9 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R9 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, DX + +mulAvxGFNI_5x6Xor_loop: + // Load 6 outputs + VMOVDQU (R10), Y8 + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R10) + ADDQ $0x20, R10 + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x6Xor_loop + VZEROUPPER + +mulAvxGFNI_5x6Xor_end: + RET + +// func mulGFNI_5x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x7_64(SB), $8-88 + // Loading 23 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, DX + +mulGFNI_5x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x7_64_loop + VZEROUPPER + +mulGFNI_5x7_64_end: + RET + +// func mulAvxGFNI_5x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x7(SB), $8-88 + // Loading 7 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, DX + +mulAvxGFNI_5x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x7_loop + VZEROUPPER + +mulAvxGFNI_5x7_end: + RET + +// func mulGFNI_5x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x7_64Xor(SB), $8-88 + // Loading 23 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, DX + +mulGFNI_5x7_64Xor_loop: + // Load 7 outputs + VMOVDQU64 (R10), Z23 + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R9), Z29 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_5x7_64Xor_loop + VZEROUPPER + +mulGFNI_5x7_64Xor_end: + RET + +// func mulAvxGFNI_5x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x7Xor(SB), $8-88 + // Loading 7 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, DX + +mulAvxGFNI_5x7Xor_loop: + // Load 7 outputs + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_5x7Xor_loop + VZEROUPPER + +mulAvxGFNI_5x7Xor_end: + RET + +// func mulGFNI_5x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x8_64(SB), $8-88 + // Loading 22 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), AX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_5x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_5x8_64_loop + VZEROUPPER + +mulGFNI_5x8_64_end: + RET + +// func mulAvxGFNI_5x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x8(SB), $8-88 + // Loading 6 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), AX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_5x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_5x8_loop + VZEROUPPER + +mulAvxGFNI_5x8_end: + RET + +// func mulGFNI_5x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x8_64Xor(SB), $8-88 + // Loading 22 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), AX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_5x8_64Xor_loop: + // Load 8 outputs + VMOVDQU64 (R9), Z22 + VMOVDQU64 (R10), Z23 + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R8), Z29 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + VMOVDQU64 Z22, (R9) + ADDQ $0x40, R9 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R8) + ADDQ $0x40, R8 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_5x8_64Xor_loop + VZEROUPPER + +mulGFNI_5x8_64Xor_end: + RET + +// func mulAvxGFNI_5x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x8Xor(SB), $8-88 + // Loading 6 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), AX + MOVQ out_base+48(FP), R8 + MOVQ out_base+48(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R15 + MOVQ 168(R8), R8 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R8 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_5x8Xor_loop: + // Load 8 outputs + VMOVDQU (R9), Y6 + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R8), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R8) + ADDQ $0x20, R8 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_5x8Xor_loop + VZEROUPPER + +mulAvxGFNI_5x8Xor_end: + RET + +// func mulGFNI_5x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x9_64(SB), $0-88 + // Loading 21 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulGFNI_5x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R9), R11 + VMOVDQU64 Z21, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU64 Z22, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU64 Z23, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU64 Z24, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU64 Z25, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU64 Z26, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU64 Z27, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU64 Z28, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU64 Z29, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x40, R10 + DECQ AX + JNZ mulGFNI_5x9_64_loop + VZEROUPPER + +mulGFNI_5x9_64_end: + RET + +// func mulAvxGFNI_5x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x9(SB), $0-88 + // Loading 5 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulAvxGFNI_5x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R9), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU Y7, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU Y8, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU Y9, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU Y10, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU Y11, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU Y12, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU Y13, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxGFNI_5x9_loop + VZEROUPPER + +mulAvxGFNI_5x9_end: + RET + +// func mulGFNI_5x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x9_64Xor(SB), $0-88 + // Loading 21 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulGFNI_5x9_64Xor_loop: + // Load 9 outputs + MOVQ (R9), R11 + VMOVDQU64 (R11)(R10*1), Z21 + MOVQ 24(R9), R11 + VMOVDQU64 (R11)(R10*1), Z22 + MOVQ 48(R9), R11 + VMOVDQU64 (R11)(R10*1), Z23 + MOVQ 72(R9), R11 + VMOVDQU64 (R11)(R10*1), Z24 + MOVQ 96(R9), R11 + VMOVDQU64 (R11)(R10*1), Z25 + MOVQ 120(R9), R11 + VMOVDQU64 (R11)(R10*1), Z26 + MOVQ 144(R9), R11 + VMOVDQU64 (R11)(R10*1), Z27 + MOVQ 168(R9), R11 + VMOVDQU64 (R11)(R10*1), Z28 + MOVQ 192(R9), R11 + VMOVDQU64 (R11)(R10*1), Z29 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R9), R11 + VMOVDQU64 Z21, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU64 Z22, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU64 Z23, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU64 Z24, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU64 Z25, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU64 Z26, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU64 Z27, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU64 Z28, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU64 Z29, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x40, R10 + DECQ AX + JNZ mulGFNI_5x9_64Xor_loop + VZEROUPPER + +mulGFNI_5x9_64Xor_end: + RET + +// func mulAvxGFNI_5x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x9Xor(SB), $0-88 + // Loading 5 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulAvxGFNI_5x9Xor_loop: + // Load 9 outputs + MOVQ (R9), R11 + VMOVDQU (R11)(R10*1), Y5 + MOVQ 24(R9), R11 + VMOVDQU (R11)(R10*1), Y6 + MOVQ 48(R9), R11 + VMOVDQU (R11)(R10*1), Y7 + MOVQ 72(R9), R11 + VMOVDQU (R11)(R10*1), Y8 + MOVQ 96(R9), R11 + VMOVDQU (R11)(R10*1), Y9 + MOVQ 120(R9), R11 + VMOVDQU (R11)(R10*1), Y10 + MOVQ 144(R9), R11 + VMOVDQU (R11)(R10*1), Y11 + MOVQ 168(R9), R11 + VMOVDQU (R11)(R10*1), Y12 + MOVQ 192(R9), R11 + VMOVDQU (R11)(R10*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R9), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU Y7, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU Y8, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU Y9, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU Y10, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU Y11, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU Y12, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU Y13, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxGFNI_5x9Xor_loop + VZEROUPPER + +mulAvxGFNI_5x9Xor_end: + RET + +// func mulGFNI_5x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x10_64(SB), $0-88 + // Loading 20 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulGFNI_5x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R9), R11 + VMOVDQU64 Z20, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU64 Z21, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU64 Z22, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU64 Z23, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU64 Z24, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU64 Z25, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU64 Z26, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU64 Z27, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU64 Z28, (R11)(R10*1) + MOVQ 216(R9), R11 + VMOVDQU64 Z29, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x40, R10 + DECQ AX + JNZ mulGFNI_5x10_64_loop + VZEROUPPER + +mulGFNI_5x10_64_end: + RET + +// func mulAvxGFNI_5x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x10(SB), $0-88 + // Loading 4 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulAvxGFNI_5x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R9), R11 + VMOVDQU Y4, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU Y7, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU Y8, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU Y9, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU Y10, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU Y11, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU Y12, (R11)(R10*1) + MOVQ 216(R9), R11 + VMOVDQU Y13, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxGFNI_5x10_loop + VZEROUPPER + +mulAvxGFNI_5x10_end: + RET + +// func mulGFNI_5x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_5x10_64Xor(SB), $0-88 + // Loading 20 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_5x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulGFNI_5x10_64Xor_loop: + // Load 10 outputs + MOVQ (R9), R11 + VMOVDQU64 (R11)(R10*1), Z20 + MOVQ 24(R9), R11 + VMOVDQU64 (R11)(R10*1), Z21 + MOVQ 48(R9), R11 + VMOVDQU64 (R11)(R10*1), Z22 + MOVQ 72(R9), R11 + VMOVDQU64 (R11)(R10*1), Z23 + MOVQ 96(R9), R11 + VMOVDQU64 (R11)(R10*1), Z24 + MOVQ 120(R9), R11 + VMOVDQU64 (R11)(R10*1), Z25 + MOVQ 144(R9), R11 + VMOVDQU64 (R11)(R10*1), Z26 + MOVQ 168(R9), R11 + VMOVDQU64 (R11)(R10*1), Z27 + MOVQ 192(R9), R11 + VMOVDQU64 (R11)(R10*1), Z28 + MOVQ 216(R9), R11 + VMOVDQU64 (R11)(R10*1), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R9), R11 + VMOVDQU64 Z20, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU64 Z21, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU64 Z22, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU64 Z23, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU64 Z24, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU64 Z25, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU64 Z26, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU64 Z27, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU64 Z28, (R11)(R10*1) + MOVQ 216(R9), R11 + VMOVDQU64 Z29, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x40, R10 + DECQ AX + JNZ mulGFNI_5x10_64Xor_loop + VZEROUPPER + +mulGFNI_5x10_64Xor_end: + RET + +// func mulAvxGFNI_5x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_5x10Xor(SB), $0-88 + // Loading 4 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_5x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), DX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ start+72(FP), R10 + + // Add start offset to input + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, DX + +mulAvxGFNI_5x10Xor_loop: + // Load 10 outputs + MOVQ (R9), R11 + VMOVDQU (R11)(R10*1), Y4 + MOVQ 24(R9), R11 + VMOVDQU (R11)(R10*1), Y5 + MOVQ 48(R9), R11 + VMOVDQU (R11)(R10*1), Y6 + MOVQ 72(R9), R11 + VMOVDQU (R11)(R10*1), Y7 + MOVQ 96(R9), R11 + VMOVDQU (R11)(R10*1), Y8 + MOVQ 120(R9), R11 + VMOVDQU (R11)(R10*1), Y9 + MOVQ 144(R9), R11 + VMOVDQU (R11)(R10*1), Y10 + MOVQ 168(R9), R11 + VMOVDQU (R11)(R10*1), Y11 + MOVQ 192(R9), R11 + VMOVDQU (R11)(R10*1), Y12 + MOVQ 216(R9), R11 + VMOVDQU (R11)(R10*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R9), R11 + VMOVDQU Y4, (R11)(R10*1) + MOVQ 24(R9), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 48(R9), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 72(R9), R11 + VMOVDQU Y7, (R11)(R10*1) + MOVQ 96(R9), R11 + VMOVDQU Y8, (R11)(R10*1) + MOVQ 120(R9), R11 + VMOVDQU Y9, (R11)(R10*1) + MOVQ 144(R9), R11 + VMOVDQU Y10, (R11)(R10*1) + MOVQ 168(R9), R11 + VMOVDQU Y11, (R11)(R10*1) + MOVQ 192(R9), R11 + VMOVDQU Y12, (R11)(R10*1) + MOVQ 216(R9), R11 + VMOVDQU Y13, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxGFNI_5x10Xor_loop + VZEROUPPER + +mulAvxGFNI_5x10Xor_end: + RET + +// func mulGFNI_6x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 9 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R9 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, CX + +mulGFNI_6x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z7 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z7, Z6 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z7 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z7 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z7 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z7 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (CX), Z7 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z5, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Store 1 outputs + VMOVDQU64 Z6, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x1_64_loop + VZEROUPPER + +mulGFNI_6x1_64_end: + RET + +// func mulAvxGFNI_6x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 9 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R9 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, CX + +mulAvxGFNI_6x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y7 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y7, Y6 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y7 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y7 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y7 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y7 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (CX), Y7 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y5, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Store 1 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x1_loop + VZEROUPPER + +mulAvxGFNI_6x1_end: + RET + +// func mulGFNI_6x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 9 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R9 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, CX + +mulGFNI_6x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (R9), Z6 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z7 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z7 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z7 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z7 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z7 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (CX), Z7 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z5, Z7, Z7 + VXORPD Z6, Z7, Z6 + + // Store 1 outputs + VMOVDQU64 Z6, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x1_64Xor_loop + VZEROUPPER + +mulGFNI_6x1_64Xor_end: + RET + +// func mulAvxGFNI_6x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 9 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R9 + MOVQ start+72(FP), R10 + + // Add start offset to output + ADDQ R10, R9 + + // Add start offset to input + ADDQ R10, DX + ADDQ R10, BX + ADDQ R10, SI + ADDQ R10, DI + ADDQ R10, R8 + ADDQ R10, CX + +mulAvxGFNI_6x1Xor_loop: + // Load 1 outputs + VMOVDQU (R9), Y6 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y7 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y7 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y7 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y7 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y7 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (CX), Y7 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y5, Y7, Y7 + VXORPD Y6, Y7, Y6 + + // Store 1 outputs + VMOVDQU Y6, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x1Xor_loop + VZEROUPPER + +mulAvxGFNI_6x1Xor_end: + RET + +// func mulGFNI_6x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R9 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + ADDQ R11, R9 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, CX + +mulGFNI_6x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z14 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z14, Z12 + VGF2P8AFFINEQB $0x00, Z1, Z14, Z13 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z14 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z3, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z14 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z5, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z14 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z7, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z14 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z9, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (CX), Z14 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z10, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z11, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Store 2 outputs + VMOVDQU64 Z12, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z13, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x2_64_loop + VZEROUPPER + +mulGFNI_6x2_64_end: + RET + +// func mulAvxGFNI_6x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x2(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R9 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + ADDQ R11, R9 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, CX + +mulAvxGFNI_6x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (CX), Y14 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x2_loop + VZEROUPPER + +mulAvxGFNI_6x2_end: + RET + +// func mulGFNI_6x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R9 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + ADDQ R11, R9 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, CX + +mulGFNI_6x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (R10), Z12 + VMOVDQU64 (R9), Z13 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z14 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z1, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z14 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z3, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z14 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z5, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z14 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z7, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z14 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z9, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (CX), Z14 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z10, Z14, Z15 + VXORPD Z12, Z15, Z12 + VGF2P8AFFINEQB $0x00, Z11, Z14, Z15 + VXORPD Z13, Z15, Z13 + + // Store 2 outputs + VMOVDQU64 Z12, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z13, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x2_64Xor_loop + VZEROUPPER + +mulGFNI_6x2_64Xor_end: + RET + +// func mulAvxGFNI_6x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x2Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R9 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + ADDQ R11, R9 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, CX + +mulAvxGFNI_6x2Xor_loop: + // Load 2 outputs + VMOVDQU (R10), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (CX), Y14 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R10) + ADDQ $0x20, R10 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x2Xor_loop + VZEROUPPER + +mulAvxGFNI_6x2Xor_end: + RET + +// func mulGFNI_6x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R9 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R9 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, CX + +mulGFNI_6x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z21 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z1, Z21, Z19 + VGF2P8AFFINEQB $0x00, Z2, Z21, Z20 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z21 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z4, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z5, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z21 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z7, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z8, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z21 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z10, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z11, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z21 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z13, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z14, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (CX), Z21 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z15, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z16, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z17, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Store 3 outputs + VMOVDQU64 Z18, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z19, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z20, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x3_64_loop + VZEROUPPER + +mulGFNI_6x3_64_end: + RET + +// func mulAvxGFNI_6x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x3(SB), $0-88 + // Loading 11 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R10 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R10 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, DX + +mulAvxGFNI_6x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x3_loop + VZEROUPPER + +mulAvxGFNI_6x3_end: + RET + +// func mulGFNI_6x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R9 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R10 + ADDQ R12, R11 + ADDQ R12, R9 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, CX + +mulGFNI_6x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (R10), Z18 + VMOVDQU64 (R11), Z19 + VMOVDQU64 (R9), Z20 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z21 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z1, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z2, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z21 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z4, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z5, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z21 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z7, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z8, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z21 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z10, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z11, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z21 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z13, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z14, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (CX), Z21 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z15, Z21, Z22 + VXORPD Z18, Z22, Z18 + VGF2P8AFFINEQB $0x00, Z16, Z21, Z22 + VXORPD Z19, Z22, Z19 + VGF2P8AFFINEQB $0x00, Z17, Z21, Z22 + VXORPD Z20, Z22, Z20 + + // Store 3 outputs + VMOVDQU64 Z18, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z19, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z20, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x3_64Xor_loop + VZEROUPPER + +mulGFNI_6x3_64Xor_end: + RET + +// func mulAvxGFNI_6x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x3Xor(SB), $0-88 + // Loading 11 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R10 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R10 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, DX + +mulAvxGFNI_6x3Xor_loop: + // Load 3 outputs + VMOVDQU (R11), Y11 + VMOVDQU (R12), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R11) + ADDQ $0x20, R11 + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x3Xor_loop + VZEROUPPER + +mulAvxGFNI_6x3Xor_end: + RET + +// func mulGFNI_6x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x4_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R9 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R9 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, CX + +mulGFNI_6x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z28 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z28, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z28, Z27 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z28 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (SI), Z28 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (DI), Z28 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R8), Z28 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z16, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z17, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z18, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z19, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (CX), Z28 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z20, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z21, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z22, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z23, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Store 4 outputs + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x4_64_loop + VZEROUPPER + +mulGFNI_6x4_64_end: + RET + +// func mulAvxGFNI_6x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x4(SB), $0-88 + // Loading 10 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R10 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R10 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, DX + +mulAvxGFNI_6x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x4_loop + VZEROUPPER + +mulAvxGFNI_6x4_end: + RET + +// func mulGFNI_6x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x4_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R9 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R9 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, CX + +mulGFNI_6x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (R10), Z24 + VMOVDQU64 (R11), Z25 + VMOVDQU64 (R12), Z26 + VMOVDQU64 (R9), Z27 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z28 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z28 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (SI), Z28 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (DI), Z28 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R8), Z28 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z16, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z17, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z18, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z19, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (CX), Z28 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z20, Z28, Z29 + VXORPD Z24, Z29, Z24 + VGF2P8AFFINEQB $0x00, Z21, Z28, Z29 + VXORPD Z25, Z29, Z25 + VGF2P8AFFINEQB $0x00, Z22, Z28, Z29 + VXORPD Z26, Z29, Z26 + VGF2P8AFFINEQB $0x00, Z23, Z28, Z29 + VXORPD Z27, Z29, Z27 + + // Store 4 outputs + VMOVDQU64 Z24, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x4_64Xor_loop + VZEROUPPER + +mulGFNI_6x4_64Xor_end: + RET + +// func mulAvxGFNI_6x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x4Xor(SB), $0-88 + // Loading 10 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R10 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R10 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, DX + +mulAvxGFNI_6x4Xor_loop: + // Load 4 outputs + VMOVDQU (R11), Y10 + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R11) + ADDQ $0x20, R11 + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x4Xor_loop + VZEROUPPER + +mulAvxGFNI_6x4Xor_end: + RET + +// func mulGFNI_6x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x5_64(SB), $0-88 + // Loading 25 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R10 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R10 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, DX + +mulGFNI_6x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x5_64_loop + VZEROUPPER + +mulGFNI_6x5_64_end: + RET + +// func mulAvxGFNI_6x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x5(SB), $0-88 + // Loading 9 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R10 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R10 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, DX + +mulAvxGFNI_6x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x5_loop + VZEROUPPER + +mulAvxGFNI_6x5_end: + RET + +// func mulGFNI_6x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x5_64Xor(SB), $0-88 + // Loading 25 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R10 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R10 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, DX + +mulGFNI_6x5_64Xor_loop: + // Load 5 outputs + VMOVDQU64 (R11), Z25 + VMOVDQU64 (R12), Z26 + VMOVDQU64 (R13), Z27 + VMOVDQU64 (R14), Z28 + VMOVDQU64 (R10), Z29 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + VMOVDQU64 Z25, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x5_64Xor_loop + VZEROUPPER + +mulGFNI_6x5_64Xor_end: + RET + +// func mulAvxGFNI_6x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x5Xor(SB), $0-88 + // Loading 9 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 37 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R10 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R10 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, DX + +mulAvxGFNI_6x5Xor_loop: + // Load 5 outputs + VMOVDQU (R11), Y9 + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R11) + ADDQ $0x20, R11 + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x5Xor_loop + VZEROUPPER + +mulAvxGFNI_6x5Xor_end: + RET + +// func mulGFNI_6x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x6_64(SB), $8-88 + // Loading 24 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, DX + +mulGFNI_6x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x6_64_loop + VZEROUPPER + +mulGFNI_6x6_64_end: + RET + +// func mulAvxGFNI_6x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x6(SB), $8-88 + // Loading 8 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, DX + +mulAvxGFNI_6x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x6_loop + VZEROUPPER + +mulAvxGFNI_6x6_end: + RET + +// func mulGFNI_6x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x6_64Xor(SB), $8-88 + // Loading 24 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, DX + +mulGFNI_6x6_64Xor_loop: + // Load 6 outputs + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R10), Z29 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_6x6_64Xor_loop + VZEROUPPER + +mulGFNI_6x6_64Xor_end: + RET + +// func mulAvxGFNI_6x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x6Xor(SB), $8-88 + // Loading 8 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, DX + +mulAvxGFNI_6x6Xor_loop: + // Load 6 outputs + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_6x6Xor_loop + VZEROUPPER + +mulAvxGFNI_6x6Xor_end: + RET + +// func mulGFNI_6x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x7_64(SB), $8-88 + // Loading 23 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 51 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), AX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_6x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_6x7_64_loop + VZEROUPPER + +mulGFNI_6x7_64_end: + RET + +// func mulAvxGFNI_6x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x7(SB), $8-88 + // Loading 7 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 51 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), AX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_6x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_6x7_loop + VZEROUPPER + +mulAvxGFNI_6x7_end: + RET + +// func mulGFNI_6x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x7_64Xor(SB), $8-88 + // Loading 23 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 51 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), AX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_6x7_64Xor_loop: + // Load 7 outputs + VMOVDQU64 (R10), Z23 + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R9), Z29 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R9) + ADDQ $0x40, R9 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_6x7_64Xor_loop + VZEROUPPER + +mulGFNI_6x7_64Xor_end: + RET + +// func mulAvxGFNI_6x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x7Xor(SB), $8-88 + // Loading 7 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 51 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), AX + MOVQ out_base+48(FP), R9 + MOVQ out_base+48(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R15 + MOVQ 144(R9), R9 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R9 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_6x7Xor_loop: + // Load 7 outputs + VMOVDQU (R10), Y7 + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R9), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R9) + ADDQ $0x20, R9 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_6x7Xor_loop + VZEROUPPER + +mulAvxGFNI_6x7Xor_end: + RET + +// func mulGFNI_6x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x8_64(SB), $0-88 + // Loading 22 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulGFNI_6x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R10), R12 + VMOVDQU64 Z22, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU64 Z23, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU64 Z24, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU64 Z25, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU64 Z26, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU64 Z27, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU64 Z28, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU64 Z29, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x40, R11 + DECQ AX + JNZ mulGFNI_6x8_64_loop + VZEROUPPER + +mulGFNI_6x8_64_end: + RET + +// func mulAvxGFNI_6x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x8(SB), $0-88 + // Loading 6 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x8_loop + VZEROUPPER + +mulAvxGFNI_6x8_end: + RET + +// func mulGFNI_6x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x8_64Xor(SB), $0-88 + // Loading 22 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulGFNI_6x8_64Xor_loop: + // Load 8 outputs + MOVQ (R10), R12 + VMOVDQU64 (R12)(R11*1), Z22 + MOVQ 24(R10), R12 + VMOVDQU64 (R12)(R11*1), Z23 + MOVQ 48(R10), R12 + VMOVDQU64 (R12)(R11*1), Z24 + MOVQ 72(R10), R12 + VMOVDQU64 (R12)(R11*1), Z25 + MOVQ 96(R10), R12 + VMOVDQU64 (R12)(R11*1), Z26 + MOVQ 120(R10), R12 + VMOVDQU64 (R12)(R11*1), Z27 + MOVQ 144(R10), R12 + VMOVDQU64 (R12)(R11*1), Z28 + MOVQ 168(R10), R12 + VMOVDQU64 (R12)(R11*1), Z29 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R10), R12 + VMOVDQU64 Z22, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU64 Z23, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU64 Z24, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU64 Z25, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU64 Z26, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU64 Z27, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU64 Z28, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU64 Z29, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x40, R11 + DECQ AX + JNZ mulGFNI_6x8_64Xor_loop + VZEROUPPER + +mulGFNI_6x8_64Xor_end: + RET + +// func mulAvxGFNI_6x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x8Xor(SB), $0-88 + // Loading 6 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x8Xor_loop: + // Load 8 outputs + MOVQ (R10), R12 + VMOVDQU (R12)(R11*1), Y6 + MOVQ 24(R10), R12 + VMOVDQU (R12)(R11*1), Y7 + MOVQ 48(R10), R12 + VMOVDQU (R12)(R11*1), Y8 + MOVQ 72(R10), R12 + VMOVDQU (R12)(R11*1), Y9 + MOVQ 96(R10), R12 + VMOVDQU (R12)(R11*1), Y10 + MOVQ 120(R10), R12 + VMOVDQU (R12)(R11*1), Y11 + MOVQ 144(R10), R12 + VMOVDQU (R12)(R11*1), Y12 + MOVQ 168(R10), R12 + VMOVDQU (R12)(R11*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x8Xor_loop + VZEROUPPER + +mulAvxGFNI_6x8Xor_end: + RET + +// func mulGFNI_6x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x9_64(SB), $0-88 + // Loading 21 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulGFNI_6x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R10), R12 + VMOVDQU64 Z21, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU64 Z22, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU64 Z23, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU64 Z24, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU64 Z25, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU64 Z26, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU64 Z27, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU64 Z28, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU64 Z29, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x40, R11 + DECQ AX + JNZ mulGFNI_6x9_64_loop + VZEROUPPER + +mulGFNI_6x9_64_end: + RET + +// func mulAvxGFNI_6x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x9(SB), $0-88 + // Loading 5 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R10), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x9_loop + VZEROUPPER + +mulAvxGFNI_6x9_end: + RET + +// func mulGFNI_6x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x9_64Xor(SB), $0-88 + // Loading 21 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulGFNI_6x9_64Xor_loop: + // Load 9 outputs + MOVQ (R10), R12 + VMOVDQU64 (R12)(R11*1), Z21 + MOVQ 24(R10), R12 + VMOVDQU64 (R12)(R11*1), Z22 + MOVQ 48(R10), R12 + VMOVDQU64 (R12)(R11*1), Z23 + MOVQ 72(R10), R12 + VMOVDQU64 (R12)(R11*1), Z24 + MOVQ 96(R10), R12 + VMOVDQU64 (R12)(R11*1), Z25 + MOVQ 120(R10), R12 + VMOVDQU64 (R12)(R11*1), Z26 + MOVQ 144(R10), R12 + VMOVDQU64 (R12)(R11*1), Z27 + MOVQ 168(R10), R12 + VMOVDQU64 (R12)(R11*1), Z28 + MOVQ 192(R10), R12 + VMOVDQU64 (R12)(R11*1), Z29 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R10), R12 + VMOVDQU64 Z21, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU64 Z22, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU64 Z23, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU64 Z24, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU64 Z25, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU64 Z26, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU64 Z27, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU64 Z28, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU64 Z29, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x40, R11 + DECQ AX + JNZ mulGFNI_6x9_64Xor_loop + VZEROUPPER + +mulGFNI_6x9_64Xor_end: + RET + +// func mulAvxGFNI_6x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x9Xor(SB), $0-88 + // Loading 5 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x9Xor_loop: + // Load 9 outputs + MOVQ (R10), R12 + VMOVDQU (R12)(R11*1), Y5 + MOVQ 24(R10), R12 + VMOVDQU (R12)(R11*1), Y6 + MOVQ 48(R10), R12 + VMOVDQU (R12)(R11*1), Y7 + MOVQ 72(R10), R12 + VMOVDQU (R12)(R11*1), Y8 + MOVQ 96(R10), R12 + VMOVDQU (R12)(R11*1), Y9 + MOVQ 120(R10), R12 + VMOVDQU (R12)(R11*1), Y10 + MOVQ 144(R10), R12 + VMOVDQU (R12)(R11*1), Y11 + MOVQ 168(R10), R12 + VMOVDQU (R12)(R11*1), Y12 + MOVQ 192(R10), R12 + VMOVDQU (R12)(R11*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R10), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x9Xor_loop + VZEROUPPER + +mulAvxGFNI_6x9Xor_end: + RET + +// func mulGFNI_6x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x10_64(SB), $0-88 + // Loading 20 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulGFNI_6x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R10), R12 + VMOVDQU64 Z20, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU64 Z21, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU64 Z22, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU64 Z23, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU64 Z24, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU64 Z25, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU64 Z26, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU64 Z27, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU64 Z28, (R12)(R11*1) + MOVQ 216(R10), R12 + VMOVDQU64 Z29, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x40, R11 + DECQ AX + JNZ mulGFNI_6x10_64_loop + VZEROUPPER + +mulGFNI_6x10_64_end: + RET + +// func mulAvxGFNI_6x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x10(SB), $0-88 + // Loading 4 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R10), R12 + VMOVDQU Y4, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 216(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x10_loop + VZEROUPPER + +mulAvxGFNI_6x10_end: + RET + +// func mulGFNI_6x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_6x10_64Xor(SB), $0-88 + // Loading 20 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_6x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulGFNI_6x10_64Xor_loop: + // Load 10 outputs + MOVQ (R10), R12 + VMOVDQU64 (R12)(R11*1), Z20 + MOVQ 24(R10), R12 + VMOVDQU64 (R12)(R11*1), Z21 + MOVQ 48(R10), R12 + VMOVDQU64 (R12)(R11*1), Z22 + MOVQ 72(R10), R12 + VMOVDQU64 (R12)(R11*1), Z23 + MOVQ 96(R10), R12 + VMOVDQU64 (R12)(R11*1), Z24 + MOVQ 120(R10), R12 + VMOVDQU64 (R12)(R11*1), Z25 + MOVQ 144(R10), R12 + VMOVDQU64 (R12)(R11*1), Z26 + MOVQ 168(R10), R12 + VMOVDQU64 (R12)(R11*1), Z27 + MOVQ 192(R10), R12 + VMOVDQU64 (R12)(R11*1), Z28 + MOVQ 216(R10), R12 + VMOVDQU64 (R12)(R11*1), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R10), R12 + VMOVDQU64 Z20, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU64 Z21, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU64 Z22, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU64 Z23, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU64 Z24, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU64 Z25, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU64 Z26, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU64 Z27, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU64 Z28, (R12)(R11*1) + MOVQ 216(R10), R12 + VMOVDQU64 Z29, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x40, R11 + DECQ AX + JNZ mulGFNI_6x10_64Xor_loop + VZEROUPPER + +mulGFNI_6x10_64Xor_end: + RET + +// func mulAvxGFNI_6x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_6x10Xor(SB), $0-88 + // Loading 4 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_6x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), DX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ start+72(FP), R11 + + // Add start offset to input + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, DX + +mulAvxGFNI_6x10Xor_loop: + // Load 10 outputs + MOVQ (R10), R12 + VMOVDQU (R12)(R11*1), Y4 + MOVQ 24(R10), R12 + VMOVDQU (R12)(R11*1), Y5 + MOVQ 48(R10), R12 + VMOVDQU (R12)(R11*1), Y6 + MOVQ 72(R10), R12 + VMOVDQU (R12)(R11*1), Y7 + MOVQ 96(R10), R12 + VMOVDQU (R12)(R11*1), Y8 + MOVQ 120(R10), R12 + VMOVDQU (R12)(R11*1), Y9 + MOVQ 144(R10), R12 + VMOVDQU (R12)(R11*1), Y10 + MOVQ 168(R10), R12 + VMOVDQU (R12)(R11*1), Y11 + MOVQ 192(R10), R12 + VMOVDQU (R12)(R11*1), Y12 + MOVQ 216(R10), R12 + VMOVDQU (R12)(R11*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R10), R12 + VMOVDQU Y4, (R12)(R11*1) + MOVQ 24(R10), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 48(R10), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 72(R10), R12 + VMOVDQU Y7, (R12)(R11*1) + MOVQ 96(R10), R12 + VMOVDQU Y8, (R12)(R11*1) + MOVQ 120(R10), R12 + VMOVDQU Y9, (R12)(R11*1) + MOVQ 144(R10), R12 + VMOVDQU Y10, (R12)(R11*1) + MOVQ 168(R10), R12 + VMOVDQU Y11, (R12)(R11*1) + MOVQ 192(R10), R12 + VMOVDQU Y12, (R12)(R11*1) + MOVQ 216(R10), R12 + VMOVDQU Y13, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxGFNI_6x10Xor_loop + VZEROUPPER + +mulAvxGFNI_6x10Xor_end: + RET + +// func mulGFNI_7x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R10 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, CX + +mulGFNI_7x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z8 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z8, Z7 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z8 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z8 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z8 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z8 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (R9), Z8 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z5, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 6 to 1 outputs + VMOVDQU64 (CX), Z8 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z6, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Store 1 outputs + VMOVDQU64 Z7, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x1_64_loop + VZEROUPPER + +mulGFNI_7x1_64_end: + RET + +// func mulAvxGFNI_7x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R10 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, CX + +mulAvxGFNI_7x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y8 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y7 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y8 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y8 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y8 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y8 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y8 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Store 1 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x1_loop + VZEROUPPER + +mulAvxGFNI_7x1_end: + RET + +// func mulGFNI_7x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R10 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, CX + +mulGFNI_7x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (R10), Z7 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z8 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z8 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z8 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z8 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z8 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (R9), Z8 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z5, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Load and process 64 bytes from input 6 to 1 outputs + VMOVDQU64 (CX), Z8 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z6, Z8, Z8 + VXORPD Z7, Z8, Z7 + + // Store 1 outputs + VMOVDQU64 Z7, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x1_64Xor_loop + VZEROUPPER + +mulGFNI_7x1_64Xor_end: + RET + +// func mulAvxGFNI_7x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R10 + MOVQ start+72(FP), R11 + + // Add start offset to output + ADDQ R11, R10 + + // Add start offset to input + ADDQ R11, DX + ADDQ R11, BX + ADDQ R11, SI + ADDQ R11, DI + ADDQ R11, R8 + ADDQ R11, R9 + ADDQ R11, CX + +mulAvxGFNI_7x1Xor_loop: + // Load 1 outputs + VMOVDQU (R10), Y7 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y8 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y8 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y8 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y8 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y8 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y8 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (CX), Y8 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y6, Y8, Y8 + VXORPD Y7, Y8, Y7 + + // Store 1 outputs + VMOVDQU Y7, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x1Xor_loop + VZEROUPPER + +mulAvxGFNI_7x1Xor_end: + RET + +// func mulGFNI_7x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R10 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R11 + ADDQ R12, R10 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, CX + +mulGFNI_7x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z16 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z16, Z14 + VGF2P8AFFINEQB $0x00, Z1, Z16, Z15 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z16 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z3, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z16 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z5, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z16 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z7, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z16 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z9, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (R9), Z16 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z10, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z11, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 6 to 2 outputs + VMOVDQU64 (CX), Z16 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z12, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z13, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Store 2 outputs + VMOVDQU64 Z14, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z15, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x2_64_loop + VZEROUPPER + +mulGFNI_7x2_64_end: + RET + +// func mulAvxGFNI_7x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x2(SB), $0-88 + // Loading 12 of 14 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R11 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + ADDQ R13, R11 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, DX + +mulAvxGFNI_7x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x2_loop + VZEROUPPER + +mulAvxGFNI_7x2_end: + RET + +// func mulGFNI_7x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R10 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R11 + ADDQ R12, R10 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, CX + +mulGFNI_7x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (R11), Z14 + VMOVDQU64 (R10), Z15 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z16 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z1, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z16 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z3, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z16 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z5, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z16 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z7, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z16 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z9, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (R9), Z16 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z10, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z11, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Load and process 64 bytes from input 6 to 2 outputs + VMOVDQU64 (CX), Z16 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z12, Z16, Z17 + VXORPD Z14, Z17, Z14 + VGF2P8AFFINEQB $0x00, Z13, Z16, Z17 + VXORPD Z15, Z17, Z15 + + // Store 2 outputs + VMOVDQU64 Z14, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z15, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x2_64Xor_loop + VZEROUPPER + +mulGFNI_7x2_64Xor_end: + RET + +// func mulAvxGFNI_7x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x2Xor(SB), $0-88 + // Loading 12 of 14 tables to registers + // Destination kept in GP registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R11 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + ADDQ R13, R11 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, DX + +mulAvxGFNI_7x2Xor_loop: + // Load 2 outputs + VMOVDQU (R12), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R12) + ADDQ $0x20, R12 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x2Xor_loop + VZEROUPPER + +mulAvxGFNI_7x2Xor_end: + RET + +// func mulGFNI_7x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R10 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R10 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, CX + +mulGFNI_7x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z24 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z24, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z24, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z24, Z23 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z24 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z4, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z5, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z24 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z7, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z8, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z24 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z24 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z13, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z14, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (R9), Z24 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z15, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z16, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 6 to 3 outputs + VMOVDQU64 (CX), Z24 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z18, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Store 3 outputs + VMOVDQU64 Z21, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z22, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x3_64_loop + VZEROUPPER + +mulGFNI_7x3_64_end: + RET + +// func mulAvxGFNI_7x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x3(SB), $0-88 + // Loading 11 of 21 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R11 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R11 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, DX + +mulAvxGFNI_7x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x3_loop + VZEROUPPER + +mulAvxGFNI_7x3_end: + RET + +// func mulGFNI_7x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), CX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R10 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R11 + ADDQ R13, R12 + ADDQ R13, R10 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, CX + +mulGFNI_7x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (R11), Z21 + VMOVDQU64 (R12), Z22 + VMOVDQU64 (R10), Z23 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z24 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z24 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z4, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z5, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z24 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z7, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z8, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z24 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z24 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z13, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z14, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (R9), Z24 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z15, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z16, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Load and process 64 bytes from input 6 to 3 outputs + VMOVDQU64 (CX), Z24 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z18, Z24, Z25 + VXORPD Z21, Z25, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z24, Z25 + VXORPD Z22, Z25, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z24, Z25 + VXORPD Z23, Z25, Z23 + + // Store 3 outputs + VMOVDQU64 Z21, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z22, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z23, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x3_64Xor_loop + VZEROUPPER + +mulGFNI_7x3_64Xor_end: + RET + +// func mulAvxGFNI_7x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x3Xor(SB), $0-88 + // Loading 11 of 21 tables to registers + // Destination kept in GP registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R11 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R11 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, DX + +mulAvxGFNI_7x3Xor_loop: + // Load 3 outputs + VMOVDQU (R12), Y11 + VMOVDQU (R13), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R12) + ADDQ $0x20, R12 + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x3Xor_loop + VZEROUPPER + +mulAvxGFNI_7x3Xor_end: + RET + +// func mulGFNI_7x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x4_64(SB), $0-88 + // Loading 26 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R11 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R11 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, DX + +mulGFNI_7x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z29 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 4 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 4 outputs + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x4_64_loop + VZEROUPPER + +mulGFNI_7x4_64_end: + RET + +// func mulAvxGFNI_7x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x4(SB), $0-88 + // Loading 10 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R11 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R11 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, DX + +mulAvxGFNI_7x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x4_loop + VZEROUPPER + +mulAvxGFNI_7x4_end: + RET + +// func mulGFNI_7x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x4_64Xor(SB), $0-88 + // Loading 26 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R11 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R11 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, DX + +mulGFNI_7x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (R12), Z26 + VMOVDQU64 (R13), Z27 + VMOVDQU64 (R14), Z28 + VMOVDQU64 (R11), Z29 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 4 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 4 outputs + VMOVDQU64 Z26, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x4_64Xor_loop + VZEROUPPER + +mulGFNI_7x4_64Xor_end: + RET + +// func mulAvxGFNI_7x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x4Xor(SB), $0-88 + // Loading 10 of 28 tables to registers + // Destination kept in GP registers + // Full registers estimated 34 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R11 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R11 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, DX + +mulAvxGFNI_7x4Xor_loop: + // Load 4 outputs + VMOVDQU (R12), Y10 + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R12) + ADDQ $0x20, R12 + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x4Xor_loop + VZEROUPPER + +mulAvxGFNI_7x4Xor_end: + RET + +// func mulGFNI_7x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x5_64(SB), $8-88 + // Loading 25 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, DX + +mulGFNI_7x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x5_64_loop + VZEROUPPER + +mulGFNI_7x5_64_end: + RET + +// func mulAvxGFNI_7x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x5(SB), $8-88 + // Loading 9 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, DX + +mulAvxGFNI_7x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x5_loop + VZEROUPPER + +mulAvxGFNI_7x5_end: + RET + +// func mulGFNI_7x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x5_64Xor(SB), $8-88 + // Loading 25 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, DX + +mulGFNI_7x5_64Xor_loop: + // Load 5 outputs + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R11), Z29 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_7x5_64Xor_loop + VZEROUPPER + +mulGFNI_7x5_64Xor_end: + RET + +// func mulAvxGFNI_7x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x5Xor(SB), $8-88 + // Loading 9 of 35 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, DX + +mulAvxGFNI_7x5Xor_loop: + // Load 5 outputs + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_7x5Xor_loop + VZEROUPPER + +mulAvxGFNI_7x5Xor_end: + RET + +// func mulGFNI_7x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x6_64(SB), $8-88 + // Loading 24 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), AX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_7x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 6 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_7x6_64_loop + VZEROUPPER + +mulGFNI_7x6_64_end: + RET + +// func mulAvxGFNI_7x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x6(SB), $8-88 + // Loading 8 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), AX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_7x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_7x6_loop + VZEROUPPER + +mulAvxGFNI_7x6_end: + RET + +// func mulGFNI_7x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x6_64Xor(SB), $8-88 + // Loading 24 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), AX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_7x6_64Xor_loop: + // Load 6 outputs + VMOVDQU64 (R11), Z24 + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R10), Z29 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 6 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + VMOVDQU64 Z24, (R11) + ADDQ $0x40, R11 + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R10) + ADDQ $0x40, R10 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_7x6_64Xor_loop + VZEROUPPER + +mulGFNI_7x6_64Xor_end: + RET + +// func mulAvxGFNI_7x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x6Xor(SB), $8-88 + // Loading 8 of 42 tables to registers + // Destination kept in GP registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), AX + MOVQ out_base+48(FP), R10 + MOVQ out_base+48(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R15 + MOVQ 120(R10), R10 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R10 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_7x6Xor_loop: + // Load 6 outputs + VMOVDQU (R11), Y8 + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R10), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R10) + ADDQ $0x20, R10 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_7x6Xor_loop + VZEROUPPER + +mulAvxGFNI_7x6Xor_end: + RET + +// func mulGFNI_7x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x7_64(SB), $0-88 + // Loading 23 of 49 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulGFNI_7x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + MOVQ (R11), R13 + VMOVDQU64 Z23, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU64 Z24, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU64 Z25, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU64 Z26, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU64 Z27, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU64 Z28, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU64 Z29, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x40, R12 + DECQ AX + JNZ mulGFNI_7x7_64_loop + VZEROUPPER + +mulGFNI_7x7_64_end: + RET + +// func mulAvxGFNI_7x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x7(SB), $0-88 + // Loading 7 of 49 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x7_loop + VZEROUPPER + +mulAvxGFNI_7x7_end: + RET + +// func mulGFNI_7x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x7_64Xor(SB), $0-88 + // Loading 23 of 49 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulGFNI_7x7_64Xor_loop: + // Load 7 outputs + MOVQ (R11), R13 + VMOVDQU64 (R13)(R12*1), Z23 + MOVQ 24(R11), R13 + VMOVDQU64 (R13)(R12*1), Z24 + MOVQ 48(R11), R13 + VMOVDQU64 (R13)(R12*1), Z25 + MOVQ 72(R11), R13 + VMOVDQU64 (R13)(R12*1), Z26 + MOVQ 96(R11), R13 + VMOVDQU64 (R13)(R12*1), Z27 + MOVQ 120(R11), R13 + VMOVDQU64 (R13)(R12*1), Z28 + MOVQ 144(R11), R13 + VMOVDQU64 (R13)(R12*1), Z29 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + MOVQ (R11), R13 + VMOVDQU64 Z23, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU64 Z24, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU64 Z25, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU64 Z26, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU64 Z27, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU64 Z28, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU64 Z29, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x40, R12 + DECQ AX + JNZ mulGFNI_7x7_64Xor_loop + VZEROUPPER + +mulGFNI_7x7_64Xor_end: + RET + +// func mulAvxGFNI_7x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x7Xor(SB), $0-88 + // Loading 7 of 49 tables to registers + // Destination kept on stack + // Full registers estimated 58 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x7Xor_loop: + // Load 7 outputs + MOVQ (R11), R13 + VMOVDQU (R13)(R12*1), Y7 + MOVQ 24(R11), R13 + VMOVDQU (R13)(R12*1), Y8 + MOVQ 48(R11), R13 + VMOVDQU (R13)(R12*1), Y9 + MOVQ 72(R11), R13 + VMOVDQU (R13)(R12*1), Y10 + MOVQ 96(R11), R13 + VMOVDQU (R13)(R12*1), Y11 + MOVQ 120(R11), R13 + VMOVDQU (R13)(R12*1), Y12 + MOVQ 144(R11), R13 + VMOVDQU (R13)(R12*1), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x7Xor_loop + VZEROUPPER + +mulAvxGFNI_7x7Xor_end: + RET + +// func mulGFNI_7x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x8_64(SB), $0-88 + // Loading 22 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 66 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulGFNI_7x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R11), R13 + VMOVDQU64 Z22, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU64 Z23, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU64 Z24, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU64 Z25, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU64 Z26, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU64 Z27, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU64 Z28, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU64 Z29, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x40, R12 + DECQ AX + JNZ mulGFNI_7x8_64_loop + VZEROUPPER + +mulGFNI_7x8_64_end: + RET + +// func mulAvxGFNI_7x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x8(SB), $0-88 + // Loading 6 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 66 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x8_loop + VZEROUPPER + +mulAvxGFNI_7x8_end: + RET + +// func mulGFNI_7x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x8_64Xor(SB), $0-88 + // Loading 22 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 66 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulGFNI_7x8_64Xor_loop: + // Load 8 outputs + MOVQ (R11), R13 + VMOVDQU64 (R13)(R12*1), Z22 + MOVQ 24(R11), R13 + VMOVDQU64 (R13)(R12*1), Z23 + MOVQ 48(R11), R13 + VMOVDQU64 (R13)(R12*1), Z24 + MOVQ 72(R11), R13 + VMOVDQU64 (R13)(R12*1), Z25 + MOVQ 96(R11), R13 + VMOVDQU64 (R13)(R12*1), Z26 + MOVQ 120(R11), R13 + VMOVDQU64 (R13)(R12*1), Z27 + MOVQ 144(R11), R13 + VMOVDQU64 (R13)(R12*1), Z28 + MOVQ 168(R11), R13 + VMOVDQU64 (R13)(R12*1), Z29 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R11), R13 + VMOVDQU64 Z22, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU64 Z23, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU64 Z24, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU64 Z25, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU64 Z26, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU64 Z27, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU64 Z28, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU64 Z29, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x40, R12 + DECQ AX + JNZ mulGFNI_7x8_64Xor_loop + VZEROUPPER + +mulGFNI_7x8_64Xor_end: + RET + +// func mulAvxGFNI_7x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x8Xor(SB), $0-88 + // Loading 6 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 66 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x8Xor_loop: + // Load 8 outputs + MOVQ (R11), R13 + VMOVDQU (R13)(R12*1), Y6 + MOVQ 24(R11), R13 + VMOVDQU (R13)(R12*1), Y7 + MOVQ 48(R11), R13 + VMOVDQU (R13)(R12*1), Y8 + MOVQ 72(R11), R13 + VMOVDQU (R13)(R12*1), Y9 + MOVQ 96(R11), R13 + VMOVDQU (R13)(R12*1), Y10 + MOVQ 120(R11), R13 + VMOVDQU (R13)(R12*1), Y11 + MOVQ 144(R11), R13 + VMOVDQU (R13)(R12*1), Y12 + MOVQ 168(R11), R13 + VMOVDQU (R13)(R12*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x8Xor_loop + VZEROUPPER + +mulAvxGFNI_7x8Xor_end: + RET + +// func mulGFNI_7x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x9_64(SB), $0-88 + // Loading 21 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulGFNI_7x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R11), R13 + VMOVDQU64 Z21, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU64 Z22, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU64 Z23, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU64 Z24, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU64 Z25, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU64 Z26, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU64 Z27, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU64 Z28, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU64 Z29, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x40, R12 + DECQ AX + JNZ mulGFNI_7x9_64_loop + VZEROUPPER + +mulGFNI_7x9_64_end: + RET + +// func mulAvxGFNI_7x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x9(SB), $0-88 + // Loading 5 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R11), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x9_loop + VZEROUPPER + +mulAvxGFNI_7x9_end: + RET + +// func mulGFNI_7x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x9_64Xor(SB), $0-88 + // Loading 21 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulGFNI_7x9_64Xor_loop: + // Load 9 outputs + MOVQ (R11), R13 + VMOVDQU64 (R13)(R12*1), Z21 + MOVQ 24(R11), R13 + VMOVDQU64 (R13)(R12*1), Z22 + MOVQ 48(R11), R13 + VMOVDQU64 (R13)(R12*1), Z23 + MOVQ 72(R11), R13 + VMOVDQU64 (R13)(R12*1), Z24 + MOVQ 96(R11), R13 + VMOVDQU64 (R13)(R12*1), Z25 + MOVQ 120(R11), R13 + VMOVDQU64 (R13)(R12*1), Z26 + MOVQ 144(R11), R13 + VMOVDQU64 (R13)(R12*1), Z27 + MOVQ 168(R11), R13 + VMOVDQU64 (R13)(R12*1), Z28 + MOVQ 192(R11), R13 + VMOVDQU64 (R13)(R12*1), Z29 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R11), R13 + VMOVDQU64 Z21, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU64 Z22, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU64 Z23, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU64 Z24, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU64 Z25, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU64 Z26, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU64 Z27, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU64 Z28, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU64 Z29, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x40, R12 + DECQ AX + JNZ mulGFNI_7x9_64Xor_loop + VZEROUPPER + +mulGFNI_7x9_64Xor_end: + RET + +// func mulAvxGFNI_7x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x9Xor(SB), $0-88 + // Loading 5 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x9Xor_loop: + // Load 9 outputs + MOVQ (R11), R13 + VMOVDQU (R13)(R12*1), Y5 + MOVQ 24(R11), R13 + VMOVDQU (R13)(R12*1), Y6 + MOVQ 48(R11), R13 + VMOVDQU (R13)(R12*1), Y7 + MOVQ 72(R11), R13 + VMOVDQU (R13)(R12*1), Y8 + MOVQ 96(R11), R13 + VMOVDQU (R13)(R12*1), Y9 + MOVQ 120(R11), R13 + VMOVDQU (R13)(R12*1), Y10 + MOVQ 144(R11), R13 + VMOVDQU (R13)(R12*1), Y11 + MOVQ 168(R11), R13 + VMOVDQU (R13)(R12*1), Y12 + MOVQ 192(R11), R13 + VMOVDQU (R13)(R12*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R11), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x9Xor_loop + VZEROUPPER + +mulAvxGFNI_7x9Xor_end: + RET + +// func mulGFNI_7x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x10_64(SB), $0-88 + // Loading 20 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulGFNI_7x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R11), R13 + VMOVDQU64 Z20, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU64 Z21, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU64 Z22, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU64 Z23, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU64 Z24, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU64 Z25, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU64 Z26, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU64 Z27, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU64 Z28, (R13)(R12*1) + MOVQ 216(R11), R13 + VMOVDQU64 Z29, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x40, R12 + DECQ AX + JNZ mulGFNI_7x10_64_loop + VZEROUPPER + +mulGFNI_7x10_64_end: + RET + +// func mulAvxGFNI_7x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x10(SB), $0-88 + // Loading 4 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R11), R13 + VMOVDQU Y4, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 216(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x10_loop + VZEROUPPER + +mulAvxGFNI_7x10_end: + RET + +// func mulGFNI_7x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_7x10_64Xor(SB), $0-88 + // Loading 20 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_7x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulGFNI_7x10_64Xor_loop: + // Load 10 outputs + MOVQ (R11), R13 + VMOVDQU64 (R13)(R12*1), Z20 + MOVQ 24(R11), R13 + VMOVDQU64 (R13)(R12*1), Z21 + MOVQ 48(R11), R13 + VMOVDQU64 (R13)(R12*1), Z22 + MOVQ 72(R11), R13 + VMOVDQU64 (R13)(R12*1), Z23 + MOVQ 96(R11), R13 + VMOVDQU64 (R13)(R12*1), Z24 + MOVQ 120(R11), R13 + VMOVDQU64 (R13)(R12*1), Z25 + MOVQ 144(R11), R13 + VMOVDQU64 (R13)(R12*1), Z26 + MOVQ 168(R11), R13 + VMOVDQU64 (R13)(R12*1), Z27 + MOVQ 192(R11), R13 + VMOVDQU64 (R13)(R12*1), Z28 + MOVQ 216(R11), R13 + VMOVDQU64 (R13)(R12*1), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R11), R13 + VMOVDQU64 Z20, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU64 Z21, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU64 Z22, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU64 Z23, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU64 Z24, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU64 Z25, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU64 Z26, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU64 Z27, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU64 Z28, (R13)(R12*1) + MOVQ 216(R11), R13 + VMOVDQU64 Z29, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x40, R12 + DECQ AX + JNZ mulGFNI_7x10_64Xor_loop + VZEROUPPER + +mulGFNI_7x10_64Xor_end: + RET + +// func mulAvxGFNI_7x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_7x10Xor(SB), $0-88 + // Loading 4 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_7x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), DX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ start+72(FP), R12 + + // Add start offset to input + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, DX + +mulAvxGFNI_7x10Xor_loop: + // Load 10 outputs + MOVQ (R11), R13 + VMOVDQU (R13)(R12*1), Y4 + MOVQ 24(R11), R13 + VMOVDQU (R13)(R12*1), Y5 + MOVQ 48(R11), R13 + VMOVDQU (R13)(R12*1), Y6 + MOVQ 72(R11), R13 + VMOVDQU (R13)(R12*1), Y7 + MOVQ 96(R11), R13 + VMOVDQU (R13)(R12*1), Y8 + MOVQ 120(R11), R13 + VMOVDQU (R13)(R12*1), Y9 + MOVQ 144(R11), R13 + VMOVDQU (R13)(R12*1), Y10 + MOVQ 168(R11), R13 + VMOVDQU (R13)(R12*1), Y11 + MOVQ 192(R11), R13 + VMOVDQU (R13)(R12*1), Y12 + MOVQ 216(R11), R13 + VMOVDQU (R13)(R12*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R11), R13 + VMOVDQU Y4, (R13)(R12*1) + MOVQ 24(R11), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 48(R11), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 72(R11), R13 + VMOVDQU Y7, (R13)(R12*1) + MOVQ 96(R11), R13 + VMOVDQU Y8, (R13)(R12*1) + MOVQ 120(R11), R13 + VMOVDQU Y9, (R13)(R12*1) + MOVQ 144(R11), R13 + VMOVDQU Y10, (R13)(R12*1) + MOVQ 168(R11), R13 + VMOVDQU Y11, (R13)(R12*1) + MOVQ 192(R11), R13 + VMOVDQU Y12, (R13)(R12*1) + MOVQ 216(R11), R13 + VMOVDQU Y13, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxGFNI_7x10Xor_loop + VZEROUPPER + +mulAvxGFNI_7x10Xor_end: + RET + +// func mulGFNI_8x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R11 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R11 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, CX + +mulGFNI_8x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z9 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z9, Z8 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z9 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z9 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z9 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z9 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (R9), Z9 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z5, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 6 to 1 outputs + VMOVDQU64 (R10), Z9 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z6, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 7 to 1 outputs + VMOVDQU64 (CX), Z9 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z7, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Store 1 outputs + VMOVDQU64 Z8, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_8x1_64_loop + VZEROUPPER + +mulGFNI_8x1_64_end: + RET + +// func mulAvxGFNI_8x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R11 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R11 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, CX + +mulAvxGFNI_8x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y9 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y8 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y9 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y9 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y9 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y9 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y9 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y9 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y7, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Store 1 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x1_loop + VZEROUPPER + +mulAvxGFNI_8x1_end: + RET + +// func mulGFNI_8x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R11 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R11 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, CX + +mulGFNI_8x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (R11), Z8 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z9 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z9 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z9 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z9 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z9 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (R9), Z9 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z5, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 6 to 1 outputs + VMOVDQU64 (R10), Z9 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z6, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Load and process 64 bytes from input 7 to 1 outputs + VMOVDQU64 (CX), Z9 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z7, Z9, Z9 + VXORPD Z8, Z9, Z8 + + // Store 1 outputs + VMOVDQU64 Z8, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_8x1_64Xor_loop + VZEROUPPER + +mulGFNI_8x1_64Xor_end: + RET + +// func mulAvxGFNI_8x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R11 + MOVQ start+72(FP), R12 + + // Add start offset to output + ADDQ R12, R11 + + // Add start offset to input + ADDQ R12, DX + ADDQ R12, BX + ADDQ R12, SI + ADDQ R12, DI + ADDQ R12, R8 + ADDQ R12, R9 + ADDQ R12, R10 + ADDQ R12, CX + +mulAvxGFNI_8x1Xor_loop: + // Load 1 outputs + VMOVDQU (R11), Y8 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y9 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y9 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y9 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y9 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y9 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y9 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y9 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (CX), Y9 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y7, Y9, Y9 + VXORPD Y8, Y9, Y8 + + // Store 1 outputs + VMOVDQU Y8, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x1Xor_loop + VZEROUPPER + +mulAvxGFNI_8x1Xor_end: + RET + +// func mulGFNI_8x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R11 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + ADDQ R13, R11 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, CX + +mulGFNI_8x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z18 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z18, Z16 + VGF2P8AFFINEQB $0x00, Z1, Z18, Z17 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z18 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z3, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z18 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z5, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z18 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z7, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z18 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z9, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (R9), Z18 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z10, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z11, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 6 to 2 outputs + VMOVDQU64 (R10), Z18 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z12, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z13, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 7 to 2 outputs + VMOVDQU64 (CX), Z18 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z14, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z15, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Store 2 outputs + VMOVDQU64 Z16, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z17, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_8x2_64_loop + VZEROUPPER + +mulGFNI_8x2_64_end: + RET + +// func mulAvxGFNI_8x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x2(SB), $0-88 + // Loading 12 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R12 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + ADDQ R14, R12 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, DX + +mulAvxGFNI_8x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x2_loop + VZEROUPPER + +mulAvxGFNI_8x2_end: + RET + +// func mulGFNI_8x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R11 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + ADDQ R13, R11 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, CX + +mulGFNI_8x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (R12), Z16 + VMOVDQU64 (R11), Z17 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z18 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z1, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z18 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z3, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z18 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z5, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z18 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z7, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z18 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z9, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (R9), Z18 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z10, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z11, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 6 to 2 outputs + VMOVDQU64 (R10), Z18 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z12, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z13, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Load and process 64 bytes from input 7 to 2 outputs + VMOVDQU64 (CX), Z18 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z14, Z18, Z19 + VXORPD Z16, Z19, Z16 + VGF2P8AFFINEQB $0x00, Z15, Z18, Z19 + VXORPD Z17, Z19, Z17 + + // Store 2 outputs + VMOVDQU64 Z16, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z17, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_8x2_64Xor_loop + VZEROUPPER + +mulGFNI_8x2_64Xor_end: + RET + +// func mulAvxGFNI_8x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x2Xor(SB), $0-88 + // Loading 12 of 16 tables to registers + // Destination kept in GP registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R12 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + ADDQ R14, R12 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, DX + +mulAvxGFNI_8x2Xor_loop: + // Load 2 outputs + VMOVDQU (R13), Y12 + VMOVDQU (R12), Y13 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R13) + ADDQ $0x20, R13 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x2Xor_loop + VZEROUPPER + +mulAvxGFNI_8x2Xor_end: + RET + +// func mulGFNI_8x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R11 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R11 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, CX + +mulGFNI_8x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z27 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z27, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z27, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z27, Z26 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z27 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z27 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z27 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z10, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z27 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (R9), Z27 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z15, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 6 to 3 outputs + VMOVDQU64 (R10), Z27 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z18, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 7 to 3 outputs + VMOVDQU64 (CX), Z27 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z21, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z22, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z23, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Store 3 outputs + VMOVDQU64 Z24, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z25, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z26, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_8x3_64_loop + VZEROUPPER + +mulGFNI_8x3_64_end: + RET + +// func mulAvxGFNI_8x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x3(SB), $0-88 + // Loading 11 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R12 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R12 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, DX + +mulAvxGFNI_8x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x3_loop + VZEROUPPER + +mulAvxGFNI_8x3_end: + RET + +// func mulGFNI_8x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), CX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R11 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R12 + ADDQ R14, R13 + ADDQ R14, R11 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, CX + +mulGFNI_8x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (R12), Z24 + VMOVDQU64 (R13), Z25 + VMOVDQU64 (R11), Z26 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z27 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z27 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z27 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z27 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z10, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z27 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (R9), Z27 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z15, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 6 to 3 outputs + VMOVDQU64 (R10), Z27 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z18, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Load and process 64 bytes from input 7 to 3 outputs + VMOVDQU64 (CX), Z27 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z21, Z27, Z28 + VXORPD Z24, Z28, Z24 + VGF2P8AFFINEQB $0x00, Z22, Z27, Z28 + VXORPD Z25, Z28, Z25 + VGF2P8AFFINEQB $0x00, Z23, Z27, Z28 + VXORPD Z26, Z28, Z26 + + // Store 3 outputs + VMOVDQU64 Z24, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z25, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z26, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_8x3_64Xor_loop + VZEROUPPER + +mulGFNI_8x3_64Xor_end: + RET + +// func mulAvxGFNI_8x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x3Xor(SB), $0-88 + // Loading 11 of 24 tables to registers + // Destination kept in GP registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R12 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R12 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, DX + +mulAvxGFNI_8x3Xor_loop: + // Load 3 outputs + VMOVDQU (R13), Y11 + VMOVDQU (R14), Y12 + VMOVDQU (R12), Y13 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R13) + ADDQ $0x20, R13 + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x3Xor_loop + VZEROUPPER + +mulAvxGFNI_8x3Xor_end: + RET + +// func mulGFNI_8x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x4_64(SB), $8-88 + // Loading 26 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, DX + +mulGFNI_8x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z29 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 4 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 4 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 4 outputs + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_8x4_64_loop + VZEROUPPER + +mulGFNI_8x4_64_end: + RET + +// func mulAvxGFNI_8x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x4(SB), $8-88 + // Loading 10 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, DX + +mulAvxGFNI_8x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x4_loop + VZEROUPPER + +mulAvxGFNI_8x4_end: + RET + +// func mulGFNI_8x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x4_64Xor(SB), $8-88 + // Loading 26 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, DX + +mulGFNI_8x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R12), Z29 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 4 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 4 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 4 outputs + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_8x4_64Xor_loop + VZEROUPPER + +mulGFNI_8x4_64Xor_end: + RET + +// func mulAvxGFNI_8x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x4Xor(SB), $8-88 + // Loading 10 of 32 tables to registers + // Destination kept in GP registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, DX + +mulAvxGFNI_8x4Xor_loop: + // Load 4 outputs + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R12), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_8x4Xor_loop + VZEROUPPER + +mulAvxGFNI_8x4Xor_end: + RET + +// func mulGFNI_8x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x5_64(SB), $8-88 + // Loading 25 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), AX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_8x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 5 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 5 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_8x5_64_loop + VZEROUPPER + +mulGFNI_8x5_64_end: + RET + +// func mulAvxGFNI_8x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x5(SB), $8-88 + // Loading 9 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), AX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_8x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_8x5_loop + VZEROUPPER + +mulAvxGFNI_8x5_end: + RET + +// func mulGFNI_8x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x5_64Xor(SB), $8-88 + // Loading 25 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), AX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_8x5_64Xor_loop: + // Load 5 outputs + VMOVDQU64 (R12), Z25 + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R11), Z29 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 5 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 5 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + VMOVDQU64 Z25, (R12) + ADDQ $0x40, R12 + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R11) + ADDQ $0x40, R11 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_8x5_64Xor_loop + VZEROUPPER + +mulGFNI_8x5_64Xor_end: + RET + +// func mulAvxGFNI_8x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x5Xor(SB), $8-88 + // Loading 9 of 40 tables to registers + // Destination kept in GP registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), AX + MOVQ out_base+48(FP), R11 + MOVQ out_base+48(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R15 + MOVQ 96(R11), R11 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R11 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_8x5Xor_loop: + // Load 5 outputs + VMOVDQU (R12), Y9 + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R11), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R11) + ADDQ $0x20, R11 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_8x5Xor_loop + VZEROUPPER + +mulAvxGFNI_8x5Xor_end: + RET + +// func mulGFNI_8x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x6_64(SB), $0-88 + // Loading 24 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 6 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + MOVQ (R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x6_64_loop + VZEROUPPER + +mulGFNI_8x6_64_end: + RET + +// func mulAvxGFNI_8x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x6(SB), $0-88 + // Loading 8 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x6_loop + VZEROUPPER + +mulAvxGFNI_8x6_end: + RET + +// func mulGFNI_8x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x6_64Xor(SB), $0-88 + // Loading 24 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x6_64Xor_loop: + // Load 6 outputs + MOVQ (R12), R14 + VMOVDQU64 (R14)(R13*1), Z24 + MOVQ 24(R12), R14 + VMOVDQU64 (R14)(R13*1), Z25 + MOVQ 48(R12), R14 + VMOVDQU64 (R14)(R13*1), Z26 + MOVQ 72(R12), R14 + VMOVDQU64 (R14)(R13*1), Z27 + MOVQ 96(R12), R14 + VMOVDQU64 (R14)(R13*1), Z28 + MOVQ 120(R12), R14 + VMOVDQU64 (R14)(R13*1), Z29 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 6 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + MOVQ (R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x6_64Xor_loop + VZEROUPPER + +mulGFNI_8x6_64Xor_end: + RET + +// func mulAvxGFNI_8x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x6Xor(SB), $0-88 + // Loading 8 of 48 tables to registers + // Destination kept on stack + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x6Xor_loop: + // Load 6 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x6Xor_loop + VZEROUPPER + +mulAvxGFNI_8x6Xor_end: + RET + +// func mulGFNI_8x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x7_64(SB), $0-88 + // Loading 23 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 7 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + MOVQ (R12), R14 + VMOVDQU64 Z23, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x7_64_loop + VZEROUPPER + +mulGFNI_8x7_64_end: + RET + +// func mulAvxGFNI_8x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x7(SB), $0-88 + // Loading 7 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x7_loop + VZEROUPPER + +mulAvxGFNI_8x7_end: + RET + +// func mulGFNI_8x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x7_64Xor(SB), $0-88 + // Loading 23 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x7_64Xor_loop: + // Load 7 outputs + MOVQ (R12), R14 + VMOVDQU64 (R14)(R13*1), Z23 + MOVQ 24(R12), R14 + VMOVDQU64 (R14)(R13*1), Z24 + MOVQ 48(R12), R14 + VMOVDQU64 (R14)(R13*1), Z25 + MOVQ 72(R12), R14 + VMOVDQU64 (R14)(R13*1), Z26 + MOVQ 96(R12), R14 + VMOVDQU64 (R14)(R13*1), Z27 + MOVQ 120(R12), R14 + VMOVDQU64 (R14)(R13*1), Z28 + MOVQ 144(R12), R14 + VMOVDQU64 (R14)(R13*1), Z29 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 7 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + MOVQ (R12), R14 + VMOVDQU64 Z23, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x7_64Xor_loop + VZEROUPPER + +mulGFNI_8x7_64Xor_end: + RET + +// func mulAvxGFNI_8x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x7Xor(SB), $0-88 + // Loading 7 of 56 tables to registers + // Destination kept on stack + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x7Xor_loop: + // Load 7 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y7 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 144(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x7Xor_loop + VZEROUPPER + +mulAvxGFNI_8x7Xor_end: + RET + +// func mulGFNI_8x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x8_64(SB), $0-88 + // Loading 22 of 64 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 8 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R12), R14 + VMOVDQU64 Z22, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z23, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x8_64_loop + VZEROUPPER + +mulGFNI_8x8_64_end: + RET + +// func mulAvxGFNI_8x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x8(SB), $0-88 + // Loading 6 of 64 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x8_loop + VZEROUPPER + +mulAvxGFNI_8x8_end: + RET + +// func mulGFNI_8x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x8_64Xor(SB), $0-88 + // Loading 22 of 64 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x8_64Xor_loop: + // Load 8 outputs + MOVQ (R12), R14 + VMOVDQU64 (R14)(R13*1), Z22 + MOVQ 24(R12), R14 + VMOVDQU64 (R14)(R13*1), Z23 + MOVQ 48(R12), R14 + VMOVDQU64 (R14)(R13*1), Z24 + MOVQ 72(R12), R14 + VMOVDQU64 (R14)(R13*1), Z25 + MOVQ 96(R12), R14 + VMOVDQU64 (R14)(R13*1), Z26 + MOVQ 120(R12), R14 + VMOVDQU64 (R14)(R13*1), Z27 + MOVQ 144(R12), R14 + VMOVDQU64 (R14)(R13*1), Z28 + MOVQ 168(R12), R14 + VMOVDQU64 (R14)(R13*1), Z29 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 8 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R12), R14 + VMOVDQU64 Z22, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z23, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x8_64Xor_loop + VZEROUPPER + +mulGFNI_8x8_64Xor_end: + RET + +// func mulAvxGFNI_8x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x8Xor(SB), $0-88 + // Loading 6 of 64 tables to registers + // Destination kept on stack + // Full registers estimated 74 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x8Xor_loop: + // Load 8 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y6 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y7 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 144(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 168(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x8Xor_loop + VZEROUPPER + +mulAvxGFNI_8x8Xor_end: + RET + +// func mulGFNI_8x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x9_64(SB), $0-88 + // Loading 21 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 83 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 9 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R12), R14 + VMOVDQU64 Z21, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z22, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z23, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x9_64_loop + VZEROUPPER + +mulGFNI_8x9_64_end: + RET + +// func mulAvxGFNI_8x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x9(SB), $0-88 + // Loading 5 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 83 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R12), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x9_loop + VZEROUPPER + +mulAvxGFNI_8x9_end: + RET + +// func mulGFNI_8x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x9_64Xor(SB), $0-88 + // Loading 21 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 83 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x9_64Xor_loop: + // Load 9 outputs + MOVQ (R12), R14 + VMOVDQU64 (R14)(R13*1), Z21 + MOVQ 24(R12), R14 + VMOVDQU64 (R14)(R13*1), Z22 + MOVQ 48(R12), R14 + VMOVDQU64 (R14)(R13*1), Z23 + MOVQ 72(R12), R14 + VMOVDQU64 (R14)(R13*1), Z24 + MOVQ 96(R12), R14 + VMOVDQU64 (R14)(R13*1), Z25 + MOVQ 120(R12), R14 + VMOVDQU64 (R14)(R13*1), Z26 + MOVQ 144(R12), R14 + VMOVDQU64 (R14)(R13*1), Z27 + MOVQ 168(R12), R14 + VMOVDQU64 (R14)(R13*1), Z28 + MOVQ 192(R12), R14 + VMOVDQU64 (R14)(R13*1), Z29 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 9 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R12), R14 + VMOVDQU64 Z21, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z22, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z23, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x9_64Xor_loop + VZEROUPPER + +mulGFNI_8x9_64Xor_end: + RET + +// func mulAvxGFNI_8x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x9Xor(SB), $0-88 + // Loading 5 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 83 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x9Xor_loop: + // Load 9 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y5 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y6 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y7 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 144(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 168(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 192(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R12), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x9Xor_loop + VZEROUPPER + +mulAvxGFNI_8x9Xor_end: + RET + +// func mulGFNI_8x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x10_64(SB), $0-88 + // Loading 20 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 10 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R12), R14 + VMOVDQU64 Z20, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z21, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z22, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z23, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 216(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x10_64_loop + VZEROUPPER + +mulGFNI_8x10_64_end: + RET + +// func mulAvxGFNI_8x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x10(SB), $0-88 + // Loading 4 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R12), R14 + VMOVDQU Y4, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 216(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x10_loop + VZEROUPPER + +mulAvxGFNI_8x10_end: + RET + +// func mulGFNI_8x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_8x10_64Xor(SB), $0-88 + // Loading 20 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_8x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulGFNI_8x10_64Xor_loop: + // Load 10 outputs + MOVQ (R12), R14 + VMOVDQU64 (R14)(R13*1), Z20 + MOVQ 24(R12), R14 + VMOVDQU64 (R14)(R13*1), Z21 + MOVQ 48(R12), R14 + VMOVDQU64 (R14)(R13*1), Z22 + MOVQ 72(R12), R14 + VMOVDQU64 (R14)(R13*1), Z23 + MOVQ 96(R12), R14 + VMOVDQU64 (R14)(R13*1), Z24 + MOVQ 120(R12), R14 + VMOVDQU64 (R14)(R13*1), Z25 + MOVQ 144(R12), R14 + VMOVDQU64 (R14)(R13*1), Z26 + MOVQ 168(R12), R14 + VMOVDQU64 (R14)(R13*1), Z27 + MOVQ 192(R12), R14 + VMOVDQU64 (R14)(R13*1), Z28 + MOVQ 216(R12), R14 + VMOVDQU64 (R14)(R13*1), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 10 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R12), R14 + VMOVDQU64 Z20, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU64 Z21, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU64 Z22, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU64 Z23, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU64 Z24, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU64 Z25, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU64 Z26, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU64 Z27, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU64 Z28, (R14)(R13*1) + MOVQ 216(R12), R14 + VMOVDQU64 Z29, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x40, R13 + DECQ AX + JNZ mulGFNI_8x10_64Xor_loop + VZEROUPPER + +mulGFNI_8x10_64Xor_end: + RET + +// func mulAvxGFNI_8x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_8x10Xor(SB), $0-88 + // Loading 4 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_8x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), DX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ start+72(FP), R13 + + // Add start offset to input + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, DX + +mulAvxGFNI_8x10Xor_loop: + // Load 10 outputs + MOVQ (R12), R14 + VMOVDQU (R14)(R13*1), Y4 + MOVQ 24(R12), R14 + VMOVDQU (R14)(R13*1), Y5 + MOVQ 48(R12), R14 + VMOVDQU (R14)(R13*1), Y6 + MOVQ 72(R12), R14 + VMOVDQU (R14)(R13*1), Y7 + MOVQ 96(R12), R14 + VMOVDQU (R14)(R13*1), Y8 + MOVQ 120(R12), R14 + VMOVDQU (R14)(R13*1), Y9 + MOVQ 144(R12), R14 + VMOVDQU (R14)(R13*1), Y10 + MOVQ 168(R12), R14 + VMOVDQU (R14)(R13*1), Y11 + MOVQ 192(R12), R14 + VMOVDQU (R14)(R13*1), Y12 + MOVQ 216(R12), R14 + VMOVDQU (R14)(R13*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R12), R14 + VMOVDQU Y4, (R14)(R13*1) + MOVQ 24(R12), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 48(R12), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 72(R12), R14 + VMOVDQU Y7, (R14)(R13*1) + MOVQ 96(R12), R14 + VMOVDQU Y8, (R14)(R13*1) + MOVQ 120(R12), R14 + VMOVDQU Y9, (R14)(R13*1) + MOVQ 144(R12), R14 + VMOVDQU Y10, (R14)(R13*1) + MOVQ 168(R12), R14 + VMOVDQU Y11, (R14)(R13*1) + MOVQ 192(R12), R14 + VMOVDQU Y12, (R14)(R13*1) + MOVQ 216(R12), R14 + VMOVDQU Y13, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxGFNI_8x10Xor_loop + VZEROUPPER + +mulAvxGFNI_8x10Xor_end: + RET + +// func mulGFNI_9x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R12 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, CX + +mulGFNI_9x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z10 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z10, Z9 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z10 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z10 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z10 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z10 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (R9), Z10 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z5, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 6 to 1 outputs + VMOVDQU64 (R10), Z10 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z6, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 7 to 1 outputs + VMOVDQU64 (R11), Z10 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z7, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 8 to 1 outputs + VMOVDQU64 (CX), Z10 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z8, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Store 1 outputs + VMOVDQU64 Z9, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_9x1_64_loop + VZEROUPPER + +mulGFNI_9x1_64_end: + RET + +// func mulAvxGFNI_9x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R12 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, CX + +mulAvxGFNI_9x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y10 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y9 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y10 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y10 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y10 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y10 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y10 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y10 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R11), Y10 + ADDQ $0x20, R11 + VGF2P8AFFINEQB $0x00, Y7, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y8, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Store 1 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x1_loop + VZEROUPPER + +mulAvxGFNI_9x1_end: + RET + +// func mulGFNI_9x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R12 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, CX + +mulGFNI_9x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (R12), Z9 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z10 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z10 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z10 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z10 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z10 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (R9), Z10 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z5, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 6 to 1 outputs + VMOVDQU64 (R10), Z10 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z6, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 7 to 1 outputs + VMOVDQU64 (R11), Z10 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z7, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Load and process 64 bytes from input 8 to 1 outputs + VMOVDQU64 (CX), Z10 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z8, Z10, Z10 + VXORPD Z9, Z10, Z9 + + // Store 1 outputs + VMOVDQU64 Z9, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_9x1_64Xor_loop + VZEROUPPER + +mulGFNI_9x1_64Xor_end: + RET + +// func mulAvxGFNI_9x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R12 + MOVQ start+72(FP), R13 + + // Add start offset to output + ADDQ R13, R12 + + // Add start offset to input + ADDQ R13, DX + ADDQ R13, BX + ADDQ R13, SI + ADDQ R13, DI + ADDQ R13, R8 + ADDQ R13, R9 + ADDQ R13, R10 + ADDQ R13, R11 + ADDQ R13, CX + +mulAvxGFNI_9x1Xor_loop: + // Load 1 outputs + VMOVDQU (R12), Y9 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y10 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y10 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y10 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y10 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y10 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y10 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y10 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R11), Y10 + ADDQ $0x20, R11 + VGF2P8AFFINEQB $0x00, Y7, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (CX), Y10 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y8, Y10, Y10 + VXORPD Y9, Y10, Y9 + + // Store 1 outputs + VMOVDQU Y9, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x1Xor_loop + VZEROUPPER + +mulAvxGFNI_9x1Xor_end: + RET + +// func mulGFNI_9x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R12 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + ADDQ R14, R12 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, CX + +mulGFNI_9x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z20 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z20, Z18 + VGF2P8AFFINEQB $0x00, Z1, Z20, Z19 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z20 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z3, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z20 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z5, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z20 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z7, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z20 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z9, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (R9), Z20 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z10, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z11, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 6 to 2 outputs + VMOVDQU64 (R10), Z20 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z12, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z13, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 7 to 2 outputs + VMOVDQU64 (R11), Z20 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z14, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z15, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 8 to 2 outputs + VMOVDQU64 (CX), Z20 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z16, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z17, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Store 2 outputs + VMOVDQU64 Z18, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z19, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_9x2_64_loop + VZEROUPPER + +mulGFNI_9x2_64_end: + RET + +// func mulAvxGFNI_9x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x2(SB), $0-88 + // Loading 12 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R13 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R14 + ADDQ R15, R13 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, DX + +mulAvxGFNI_9x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x2_loop + VZEROUPPER + +mulAvxGFNI_9x2_end: + RET + +// func mulGFNI_9x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R12 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + ADDQ R14, R12 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, CX + +mulGFNI_9x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (R13), Z18 + VMOVDQU64 (R12), Z19 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z20 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z1, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z20 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z3, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z20 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z5, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z20 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z7, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z20 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z9, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (R9), Z20 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z10, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z11, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 6 to 2 outputs + VMOVDQU64 (R10), Z20 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z12, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z13, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 7 to 2 outputs + VMOVDQU64 (R11), Z20 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z14, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z15, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Load and process 64 bytes from input 8 to 2 outputs + VMOVDQU64 (CX), Z20 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z16, Z20, Z21 + VXORPD Z18, Z21, Z18 + VGF2P8AFFINEQB $0x00, Z17, Z20, Z21 + VXORPD Z19, Z21, Z19 + + // Store 2 outputs + VMOVDQU64 Z18, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z19, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_9x2_64Xor_loop + VZEROUPPER + +mulGFNI_9x2_64Xor_end: + RET + +// func mulAvxGFNI_9x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x2Xor(SB), $0-88 + // Loading 12 of 18 tables to registers + // Destination kept in GP registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R13 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R14 + ADDQ R15, R13 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, DX + +mulAvxGFNI_9x2Xor_loop: + // Load 2 outputs + VMOVDQU (R14), Y12 + VMOVDQU (R13), Y13 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R14) + ADDQ $0x20, R14 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x2Xor_loop + VZEROUPPER + +mulAvxGFNI_9x2Xor_end: + RET + +// func mulGFNI_9x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x3_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + VBROADCASTF32X2 208(CX), Z26 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R12 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R12 + + // Add start offset to input + ADDQ R15, DX + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, CX + +mulGFNI_9x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z29 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 3 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 3 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 3 outputs + VMOVDQU64 (CX), Z30 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z26, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 3 outputs + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_9x3_64_loop + VZEROUPPER + +mulGFNI_9x3_64_end: + RET + +// func mulAvxGFNI_9x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x3(SB), $8-88 + // Loading 11 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R13 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, DX + +mulAvxGFNI_9x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x3_loop + VZEROUPPER + +mulAvxGFNI_9x3_end: + RET + +// func mulGFNI_9x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x3_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + VBROADCASTF32X2 208(CX), Z26 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), CX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R12 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R13 + ADDQ R15, R14 + ADDQ R15, R12 + + // Add start offset to input + ADDQ R15, DX + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, CX + +mulGFNI_9x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (R13), Z27 + VMOVDQU64 (R14), Z28 + VMOVDQU64 (R12), Z29 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 3 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 3 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 3 outputs + VMOVDQU64 (CX), Z30 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z26, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 3 outputs + VMOVDQU64 Z27, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z28, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z29, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_9x3_64Xor_loop + VZEROUPPER + +mulGFNI_9x3_64Xor_end: + RET + +// func mulAvxGFNI_9x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x3Xor(SB), $8-88 + // Loading 11 of 27 tables to registers + // Destination kept in GP registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R13 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, DX + +mulAvxGFNI_9x3Xor_loop: + // Load 3 outputs + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R13), Y13 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_9x3Xor_loop + VZEROUPPER + +mulAvxGFNI_9x3Xor_end: + RET + +// func mulGFNI_9x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x4_64(SB), $8-88 + // Loading 26 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), AX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_9x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z29 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 4 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 4 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 4 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 4 outputs + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_9x4_64_loop + VZEROUPPER + +mulGFNI_9x4_64_end: + RET + +// func mulAvxGFNI_9x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x4(SB), $8-88 + // Loading 10 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), AX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_9x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_9x4_loop + VZEROUPPER + +mulAvxGFNI_9x4_end: + RET + +// func mulGFNI_9x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x4_64Xor(SB), $8-88 + // Loading 26 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), AX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_9x4_64Xor_loop: + // Load 4 outputs + VMOVDQU64 (R13), Z26 + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R12), Z29 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 4 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 4 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 4 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 4 outputs + VMOVDQU64 Z26, (R13) + ADDQ $0x40, R13 + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R12) + ADDQ $0x40, R12 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_9x4_64Xor_loop + VZEROUPPER + +mulGFNI_9x4_64Xor_end: + RET + +// func mulAvxGFNI_9x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x4Xor(SB), $8-88 + // Loading 10 of 36 tables to registers + // Destination kept in GP registers + // Full registers estimated 42 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), AX + MOVQ out_base+48(FP), R12 + MOVQ out_base+48(FP), R12 + MOVQ (R12), R13 + MOVQ 24(R12), R14 + MOVQ 48(R12), R15 + MOVQ 72(R12), R12 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R13 + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R12 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_9x4Xor_loop: + // Load 4 outputs + VMOVDQU (R13), Y10 + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R12), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R12) + ADDQ $0x20, R12 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_9x4Xor_loop + VZEROUPPER + +mulAvxGFNI_9x4Xor_end: + RET + +// func mulGFNI_9x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x5_64(SB), $0-88 + // Loading 25 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 5 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 5 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + MOVQ (R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x5_64_loop + VZEROUPPER + +mulGFNI_9x5_64_end: + RET + +// func mulAvxGFNI_9x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x5(SB), $0-88 + // Loading 9 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + MOVQ (R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x5_loop + VZEROUPPER + +mulAvxGFNI_9x5_end: + RET + +// func mulGFNI_9x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x5_64Xor(SB), $0-88 + // Loading 25 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x5_64Xor_loop: + // Load 5 outputs + MOVQ (R13), R15 + VMOVDQU64 (R15)(R14*1), Z25 + MOVQ 24(R13), R15 + VMOVDQU64 (R15)(R14*1), Z26 + MOVQ 48(R13), R15 + VMOVDQU64 (R15)(R14*1), Z27 + MOVQ 72(R13), R15 + VMOVDQU64 (R15)(R14*1), Z28 + MOVQ 96(R13), R15 + VMOVDQU64 (R15)(R14*1), Z29 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 5 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 5 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + MOVQ (R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x5_64Xor_loop + VZEROUPPER + +mulGFNI_9x5_64Xor_end: + RET + +// func mulAvxGFNI_9x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x5Xor(SB), $0-88 + // Loading 9 of 45 tables to registers + // Destination kept on stack + // Full registers estimated 52 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x5Xor_loop: + // Load 5 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + MOVQ (R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x5Xor_loop + VZEROUPPER + +mulAvxGFNI_9x5Xor_end: + RET + +// func mulGFNI_9x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x6_64(SB), $0-88 + // Loading 24 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 6 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 6 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + MOVQ (R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x6_64_loop + VZEROUPPER + +mulGFNI_9x6_64_end: + RET + +// func mulAvxGFNI_9x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x6(SB), $0-88 + // Loading 8 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x6_loop + VZEROUPPER + +mulAvxGFNI_9x6_end: + RET + +// func mulGFNI_9x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x6_64Xor(SB), $0-88 + // Loading 24 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x6_64Xor_loop: + // Load 6 outputs + MOVQ (R13), R15 + VMOVDQU64 (R15)(R14*1), Z24 + MOVQ 24(R13), R15 + VMOVDQU64 (R15)(R14*1), Z25 + MOVQ 48(R13), R15 + VMOVDQU64 (R15)(R14*1), Z26 + MOVQ 72(R13), R15 + VMOVDQU64 (R15)(R14*1), Z27 + MOVQ 96(R13), R15 + VMOVDQU64 (R15)(R14*1), Z28 + MOVQ 120(R13), R15 + VMOVDQU64 (R15)(R14*1), Z29 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 6 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 6 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + MOVQ (R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x6_64Xor_loop + VZEROUPPER + +mulGFNI_9x6_64Xor_end: + RET + +// func mulAvxGFNI_9x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x6Xor(SB), $0-88 + // Loading 8 of 54 tables to registers + // Destination kept on stack + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x6Xor_loop: + // Load 6 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x6Xor_loop + VZEROUPPER + +mulAvxGFNI_9x6Xor_end: + RET + +// func mulGFNI_9x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x7_64(SB), $0-88 + // Loading 23 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 7 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 7 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + MOVQ (R13), R15 + VMOVDQU64 Z23, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x7_64_loop + VZEROUPPER + +mulGFNI_9x7_64_end: + RET + +// func mulAvxGFNI_9x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x7(SB), $0-88 + // Loading 7 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x7_loop + VZEROUPPER + +mulAvxGFNI_9x7_end: + RET + +// func mulGFNI_9x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x7_64Xor(SB), $0-88 + // Loading 23 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x7_64Xor_loop: + // Load 7 outputs + MOVQ (R13), R15 + VMOVDQU64 (R15)(R14*1), Z23 + MOVQ 24(R13), R15 + VMOVDQU64 (R15)(R14*1), Z24 + MOVQ 48(R13), R15 + VMOVDQU64 (R15)(R14*1), Z25 + MOVQ 72(R13), R15 + VMOVDQU64 (R15)(R14*1), Z26 + MOVQ 96(R13), R15 + VMOVDQU64 (R15)(R14*1), Z27 + MOVQ 120(R13), R15 + VMOVDQU64 (R15)(R14*1), Z28 + MOVQ 144(R13), R15 + VMOVDQU64 (R15)(R14*1), Z29 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 7 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 7 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + MOVQ (R13), R15 + VMOVDQU64 Z23, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x7_64Xor_loop + VZEROUPPER + +mulGFNI_9x7_64Xor_end: + RET + +// func mulAvxGFNI_9x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x7Xor(SB), $0-88 + // Loading 7 of 63 tables to registers + // Destination kept on stack + // Full registers estimated 72 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x7Xor_loop: + // Load 7 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y7 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 144(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x7Xor_loop + VZEROUPPER + +mulAvxGFNI_9x7Xor_end: + RET + +// func mulGFNI_9x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x8_64(SB), $0-88 + // Loading 22 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 8 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 8 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R13), R15 + VMOVDQU64 Z22, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z23, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x8_64_loop + VZEROUPPER + +mulGFNI_9x8_64_end: + RET + +// func mulAvxGFNI_9x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x8(SB), $0-88 + // Loading 6 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x8_loop + VZEROUPPER + +mulAvxGFNI_9x8_end: + RET + +// func mulGFNI_9x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x8_64Xor(SB), $0-88 + // Loading 22 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x8_64Xor_loop: + // Load 8 outputs + MOVQ (R13), R15 + VMOVDQU64 (R15)(R14*1), Z22 + MOVQ 24(R13), R15 + VMOVDQU64 (R15)(R14*1), Z23 + MOVQ 48(R13), R15 + VMOVDQU64 (R15)(R14*1), Z24 + MOVQ 72(R13), R15 + VMOVDQU64 (R15)(R14*1), Z25 + MOVQ 96(R13), R15 + VMOVDQU64 (R15)(R14*1), Z26 + MOVQ 120(R13), R15 + VMOVDQU64 (R15)(R14*1), Z27 + MOVQ 144(R13), R15 + VMOVDQU64 (R15)(R14*1), Z28 + MOVQ 168(R13), R15 + VMOVDQU64 (R15)(R14*1), Z29 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 8 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 8 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R13), R15 + VMOVDQU64 Z22, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z23, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x8_64Xor_loop + VZEROUPPER + +mulGFNI_9x8_64Xor_end: + RET + +// func mulAvxGFNI_9x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x8Xor(SB), $0-88 + // Loading 6 of 72 tables to registers + // Destination kept on stack + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x8Xor_loop: + // Load 8 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y6 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y7 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 144(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 168(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x8Xor_loop + VZEROUPPER + +mulAvxGFNI_9x8Xor_end: + RET + +// func mulGFNI_9x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x9_64(SB), $0-88 + // Loading 21 of 81 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 9 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 9 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R13), R15 + VMOVDQU64 Z21, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z22, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z23, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x9_64_loop + VZEROUPPER + +mulGFNI_9x9_64_end: + RET + +// func mulAvxGFNI_9x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x9(SB), $0-88 + // Loading 5 of 81 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R13), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x9_loop + VZEROUPPER + +mulAvxGFNI_9x9_end: + RET + +// func mulGFNI_9x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x9_64Xor(SB), $0-88 + // Loading 21 of 81 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x9_64Xor_loop: + // Load 9 outputs + MOVQ (R13), R15 + VMOVDQU64 (R15)(R14*1), Z21 + MOVQ 24(R13), R15 + VMOVDQU64 (R15)(R14*1), Z22 + MOVQ 48(R13), R15 + VMOVDQU64 (R15)(R14*1), Z23 + MOVQ 72(R13), R15 + VMOVDQU64 (R15)(R14*1), Z24 + MOVQ 96(R13), R15 + VMOVDQU64 (R15)(R14*1), Z25 + MOVQ 120(R13), R15 + VMOVDQU64 (R15)(R14*1), Z26 + MOVQ 144(R13), R15 + VMOVDQU64 (R15)(R14*1), Z27 + MOVQ 168(R13), R15 + VMOVDQU64 (R15)(R14*1), Z28 + MOVQ 192(R13), R15 + VMOVDQU64 (R15)(R14*1), Z29 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 9 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 9 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R13), R15 + VMOVDQU64 Z21, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z22, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z23, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x9_64Xor_loop + VZEROUPPER + +mulGFNI_9x9_64Xor_end: + RET + +// func mulAvxGFNI_9x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x9Xor(SB), $0-88 + // Loading 5 of 81 tables to registers + // Destination kept on stack + // Full registers estimated 92 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x9Xor_loop: + // Load 9 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y5 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y6 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y7 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 144(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 168(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 192(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R13), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x9Xor_loop + VZEROUPPER + +mulAvxGFNI_9x9Xor_end: + RET + +// func mulGFNI_9x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x10_64(SB), $0-88 + // Loading 20 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 102 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 10 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 10 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R13), R15 + VMOVDQU64 Z20, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z21, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z22, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z23, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 216(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x10_64_loop + VZEROUPPER + +mulGFNI_9x10_64_end: + RET + +// func mulAvxGFNI_9x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x10(SB), $0-88 + // Loading 4 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 102 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R13), R15 + VMOVDQU Y4, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 216(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x10_loop + VZEROUPPER + +mulAvxGFNI_9x10_end: + RET + +// func mulGFNI_9x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_9x10_64Xor(SB), $0-88 + // Loading 20 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 102 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_9x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulGFNI_9x10_64Xor_loop: + // Load 10 outputs + MOVQ (R13), R15 + VMOVDQU64 (R15)(R14*1), Z20 + MOVQ 24(R13), R15 + VMOVDQU64 (R15)(R14*1), Z21 + MOVQ 48(R13), R15 + VMOVDQU64 (R15)(R14*1), Z22 + MOVQ 72(R13), R15 + VMOVDQU64 (R15)(R14*1), Z23 + MOVQ 96(R13), R15 + VMOVDQU64 (R15)(R14*1), Z24 + MOVQ 120(R13), R15 + VMOVDQU64 (R15)(R14*1), Z25 + MOVQ 144(R13), R15 + VMOVDQU64 (R15)(R14*1), Z26 + MOVQ 168(R13), R15 + VMOVDQU64 (R15)(R14*1), Z27 + MOVQ 192(R13), R15 + VMOVDQU64 (R15)(R14*1), Z28 + MOVQ 216(R13), R15 + VMOVDQU64 (R15)(R14*1), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 10 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 10 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R13), R15 + VMOVDQU64 Z20, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU64 Z21, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU64 Z22, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU64 Z23, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU64 Z24, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU64 Z25, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU64 Z26, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU64 Z27, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU64 Z28, (R15)(R14*1) + MOVQ 216(R13), R15 + VMOVDQU64 Z29, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x40, R14 + DECQ AX + JNZ mulGFNI_9x10_64Xor_loop + VZEROUPPER + +mulGFNI_9x10_64Xor_end: + RET + +// func mulAvxGFNI_9x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_9x10Xor(SB), $0-88 + // Loading 4 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 102 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_9x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), DX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ start+72(FP), R14 + + // Add start offset to input + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, DX + +mulAvxGFNI_9x10Xor_loop: + // Load 10 outputs + MOVQ (R13), R15 + VMOVDQU (R15)(R14*1), Y4 + MOVQ 24(R13), R15 + VMOVDQU (R15)(R14*1), Y5 + MOVQ 48(R13), R15 + VMOVDQU (R15)(R14*1), Y6 + MOVQ 72(R13), R15 + VMOVDQU (R15)(R14*1), Y7 + MOVQ 96(R13), R15 + VMOVDQU (R15)(R14*1), Y8 + MOVQ 120(R13), R15 + VMOVDQU (R15)(R14*1), Y9 + MOVQ 144(R13), R15 + VMOVDQU (R15)(R14*1), Y10 + MOVQ 168(R13), R15 + VMOVDQU (R15)(R14*1), Y11 + MOVQ 192(R13), R15 + VMOVDQU (R15)(R14*1), Y12 + MOVQ 216(R13), R15 + VMOVDQU (R15)(R14*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R13), R15 + VMOVDQU Y4, (R15)(R14*1) + MOVQ 24(R13), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 48(R13), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 72(R13), R15 + VMOVDQU Y7, (R15)(R14*1) + MOVQ 96(R13), R15 + VMOVDQU Y8, (R15)(R14*1) + MOVQ 120(R13), R15 + VMOVDQU Y9, (R15)(R14*1) + MOVQ 144(R13), R15 + VMOVDQU Y10, (R15)(R14*1) + MOVQ 168(R13), R15 + VMOVDQU Y11, (R15)(R14*1) + MOVQ 192(R13), R15 + VMOVDQU Y12, (R15)(R14*1) + MOVQ 216(R13), R15 + VMOVDQU Y13, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxGFNI_9x10Xor_loop + VZEROUPPER + +mulAvxGFNI_9x10Xor_end: + RET + +// func mulGFNI_10x1_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x1_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 13 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x1_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), R12 + MOVQ 216(CX), CX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R13 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, CX + +mulGFNI_10x1_64_loop: + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z11 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z11, Z10 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z11 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z11 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z11 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z11 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (R9), Z11 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z5, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 6 to 1 outputs + VMOVDQU64 (R10), Z11 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z6, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 7 to 1 outputs + VMOVDQU64 (R11), Z11 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z7, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 8 to 1 outputs + VMOVDQU64 (R12), Z11 + ADDQ $0x40, R12 + VGF2P8AFFINEQB $0x00, Z8, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 9 to 1 outputs + VMOVDQU64 (CX), Z11 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z9, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Store 1 outputs + VMOVDQU64 Z10, (R13) + ADDQ $0x40, R13 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_10x1_64_loop + VZEROUPPER + +mulGFNI_10x1_64_end: + RET + +// func mulAvxGFNI_10x1(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x1(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 13 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x1_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), R12 + MOVQ 216(CX), CX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R13 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, CX + +mulAvxGFNI_10x1_loop: + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y11 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y11, Y10 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y11 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y11 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y11 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y11 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y11 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y11 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R11), Y11 + ADDQ $0x20, R11 + VGF2P8AFFINEQB $0x00, Y7, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (R12), Y11 + ADDQ $0x20, R12 + VGF2P8AFFINEQB $0x00, Y8, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 9 to 1 outputs + VMOVDQU (CX), Y11 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y9, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Store 1 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_10x1_loop + VZEROUPPER + +mulAvxGFNI_10x1_end: + RET + +// func mulGFNI_10x1_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x1_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 13 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x1_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), R12 + MOVQ 216(CX), CX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R13 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, CX + +mulGFNI_10x1_64Xor_loop: + // Load 1 outputs + VMOVDQU64 (R13), Z10 + + // Load and process 64 bytes from input 0 to 1 outputs + VMOVDQU64 (DX), Z11 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 1 to 1 outputs + VMOVDQU64 (BX), Z11 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z1, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 2 to 1 outputs + VMOVDQU64 (SI), Z11 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z2, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 3 to 1 outputs + VMOVDQU64 (DI), Z11 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z3, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 4 to 1 outputs + VMOVDQU64 (R8), Z11 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z4, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 5 to 1 outputs + VMOVDQU64 (R9), Z11 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z5, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 6 to 1 outputs + VMOVDQU64 (R10), Z11 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z6, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 7 to 1 outputs + VMOVDQU64 (R11), Z11 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z7, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 8 to 1 outputs + VMOVDQU64 (R12), Z11 + ADDQ $0x40, R12 + VGF2P8AFFINEQB $0x00, Z8, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Load and process 64 bytes from input 9 to 1 outputs + VMOVDQU64 (CX), Z11 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z9, Z11, Z11 + VXORPD Z10, Z11, Z10 + + // Store 1 outputs + VMOVDQU64 Z10, (R13) + ADDQ $0x40, R13 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_10x1_64Xor_loop + VZEROUPPER + +mulGFNI_10x1_64Xor_end: + RET + +// func mulAvxGFNI_10x1Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x1Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 13 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x1Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), R12 + MOVQ 216(CX), CX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R13 + MOVQ start+72(FP), R14 + + // Add start offset to output + ADDQ R14, R13 + + // Add start offset to input + ADDQ R14, DX + ADDQ R14, BX + ADDQ R14, SI + ADDQ R14, DI + ADDQ R14, R8 + ADDQ R14, R9 + ADDQ R14, R10 + ADDQ R14, R11 + ADDQ R14, R12 + ADDQ R14, CX + +mulAvxGFNI_10x1Xor_loop: + // Load 1 outputs + VMOVDQU (R13), Y10 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (DX), Y11 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BX), Y11 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y1, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI), Y11 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI), Y11 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y3, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8), Y11 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y4, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R9), Y11 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y5, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R10), Y11 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y6, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R11), Y11 + ADDQ $0x20, R11 + VGF2P8AFFINEQB $0x00, Y7, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (R12), Y11 + ADDQ $0x20, R12 + VGF2P8AFFINEQB $0x00, Y8, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Load and process 32 bytes from input 9 to 1 outputs + VMOVDQU (CX), Y11 + ADDQ $0x20, CX + VGF2P8AFFINEQB $0x00, Y9, Y11, Y11 + VXORPD Y10, Y11, Y10 + + // Store 1 outputs + VMOVDQU Y10, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_10x1Xor_loop + VZEROUPPER + +mulAvxGFNI_10x1Xor_end: + RET + +// func mulGFNI_10x2_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x2_64(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 24 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x2_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), R12 + MOVQ 216(CX), CX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R13 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R14 + ADDQ R15, R13 + + // Add start offset to input + ADDQ R15, DX + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, CX + +mulGFNI_10x2_64_loop: + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z22 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z22, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z22, Z21 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z22 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z3, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z22 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z5, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z22 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z7, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z22 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z9, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (R9), Z22 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z10, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 6 to 2 outputs + VMOVDQU64 (R10), Z22 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z12, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z13, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 7 to 2 outputs + VMOVDQU64 (R11), Z22 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z14, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z15, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 8 to 2 outputs + VMOVDQU64 (R12), Z22 + ADDQ $0x40, R12 + VGF2P8AFFINEQB $0x00, Z16, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z17, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 9 to 2 outputs + VMOVDQU64 (CX), Z22 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z18, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z19, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Store 2 outputs + VMOVDQU64 Z20, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z21, (R13) + ADDQ $0x40, R13 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_10x2_64_loop + VZEROUPPER + +mulGFNI_10x2_64_end: + RET + +// func mulAvxGFNI_10x2(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x2(SB), $8-88 + // Loading 12 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 24 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x2_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ (R14), R15 + MOVQ 24(R14), R14 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R15 + ADDQ BP, R14 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, DX + +mulAvxGFNI_10x2_loop: + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 2 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R14) + ADDQ $0x20, R14 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_10x2_loop + VZEROUPPER + +mulAvxGFNI_10x2_end: + RET + +// func mulGFNI_10x2_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x2_64Xor(SB), $0-88 + // Loading all tables to registers + // Destination kept in GP registers + // Full registers estimated 24 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x2_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), CX + MOVQ (CX), DX + MOVQ 24(CX), BX + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), R9 + MOVQ 144(CX), R10 + MOVQ 168(CX), R11 + MOVQ 192(CX), R12 + MOVQ 216(CX), CX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R13 + MOVQ start+72(FP), R15 + + // Add start offset to output + ADDQ R15, R14 + ADDQ R15, R13 + + // Add start offset to input + ADDQ R15, DX + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, CX + +mulGFNI_10x2_64Xor_loop: + // Load 2 outputs + VMOVDQU64 (R14), Z20 + VMOVDQU64 (R13), Z21 + + // Load and process 64 bytes from input 0 to 2 outputs + VMOVDQU64 (DX), Z22 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 1 to 2 outputs + VMOVDQU64 (BX), Z22 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z2, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z3, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 2 to 2 outputs + VMOVDQU64 (SI), Z22 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z5, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 3 to 2 outputs + VMOVDQU64 (DI), Z22 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z6, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z7, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 4 to 2 outputs + VMOVDQU64 (R8), Z22 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z8, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z9, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 5 to 2 outputs + VMOVDQU64 (R9), Z22 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z10, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 6 to 2 outputs + VMOVDQU64 (R10), Z22 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z12, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z13, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 7 to 2 outputs + VMOVDQU64 (R11), Z22 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z14, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z15, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 8 to 2 outputs + VMOVDQU64 (R12), Z22 + ADDQ $0x40, R12 + VGF2P8AFFINEQB $0x00, Z16, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z17, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Load and process 64 bytes from input 9 to 2 outputs + VMOVDQU64 (CX), Z22 + ADDQ $0x40, CX + VGF2P8AFFINEQB $0x00, Z18, Z22, Z23 + VXORPD Z20, Z23, Z20 + VGF2P8AFFINEQB $0x00, Z19, Z22, Z23 + VXORPD Z21, Z23, Z21 + + // Store 2 outputs + VMOVDQU64 Z20, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z21, (R13) + ADDQ $0x40, R13 + + // Prepare for next loop + DECQ AX + JNZ mulGFNI_10x2_64Xor_loop + VZEROUPPER + +mulGFNI_10x2_64Xor_end: + RET + +// func mulAvxGFNI_10x2Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x2Xor(SB), $8-88 + // Loading 12 of 20 tables to registers + // Destination kept in GP registers + // Full registers estimated 24 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x2Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + VBROADCASTSD 88(CX), Y11 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ (R14), R15 + MOVQ 24(R14), R14 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R15 + ADDQ BP, R14 + + // Add start offset to input + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, R13 + ADDQ BP, DX + +mulAvxGFNI_10x2Xor_loop: + // Load 2 outputs + VMOVDQU (R15), Y12 + VMOVDQU (R14), Y13 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 2 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 2 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 2 outputs + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R14) + ADDQ $0x20, R14 + + // Prepare for next loop + DECQ AX + JNZ mulAvxGFNI_10x2Xor_loop + VZEROUPPER + +mulAvxGFNI_10x2Xor_end: + RET + +// func mulGFNI_10x3_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x3_64(SB), $8-88 + // Loading 27 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 35 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x3_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + VBROADCASTF32X2 208(CX), Z26 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), R12 + MOVQ 216(AX), AX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R13 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_10x3_64_loop: + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z29 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 3 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 3 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 3 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z26, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 3 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 3 outputs + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R13) + ADDQ $0x40, R13 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_10x3_64_loop + VZEROUPPER + +mulGFNI_10x3_64_end: + RET + +// func mulAvxGFNI_10x3(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x3(SB), $8-88 + // Loading 11 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 35 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x3_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), R12 + MOVQ 216(AX), AX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R13 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_10x3_loop: + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 3 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 3 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_10x3_loop + VZEROUPPER + +mulAvxGFNI_10x3_end: + RET + +// func mulGFNI_10x3_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x3_64Xor(SB), $8-88 + // Loading 27 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 35 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x3_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + VBROADCASTF32X2 208(CX), Z26 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), R12 + MOVQ 216(AX), AX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R13 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x06, BP + +mulGFNI_10x3_64Xor_loop: + // Load 3 outputs + VMOVDQU64 (R14), Z27 + VMOVDQU64 (R15), Z28 + VMOVDQU64 (R13), Z29 + + // Load and process 64 bytes from input 0 to 3 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 3 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 3 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 3 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 3 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 3 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 3 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 3 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 3 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z26, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 3 outputs + VMOVDQU64 (AX), Z30 + ADDQ $0x40, AX + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 3 outputs + VMOVDQU64 Z27, (R14) + ADDQ $0x40, R14 + VMOVDQU64 Z28, (R15) + ADDQ $0x40, R15 + VMOVDQU64 Z29, (R13) + ADDQ $0x40, R13 + + // Prepare for next loop + DECQ BP + JNZ mulGFNI_10x3_64Xor_loop + VZEROUPPER + +mulGFNI_10x3_64Xor_end: + RET + +// func mulAvxGFNI_10x3Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x3Xor(SB), $8-88 + // Loading 11 of 30 tables to registers + // Destination kept in GP registers + // Full registers estimated 35 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x3Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + VBROADCASTSD 80(CX), Y10 + MOVQ in_base+24(FP), AX + MOVQ (AX), DX + MOVQ 24(AX), BX + MOVQ 48(AX), SI + MOVQ 72(AX), DI + MOVQ 96(AX), R8 + MOVQ 120(AX), R9 + MOVQ 144(AX), R10 + MOVQ 168(AX), R11 + MOVQ 192(AX), R12 + MOVQ 216(AX), AX + MOVQ out_base+48(FP), R13 + MOVQ out_base+48(FP), R13 + MOVQ (R13), R14 + MOVQ 24(R13), R15 + MOVQ 48(R13), R13 + MOVQ start+72(FP), BP + + // Add start offset to output + ADDQ BP, R14 + ADDQ BP, R15 + ADDQ BP, R13 + + // Add start offset to input + ADDQ BP, DX + ADDQ BP, BX + ADDQ BP, SI + ADDQ BP, DI + ADDQ BP, R8 + ADDQ BP, R9 + ADDQ BP, R10 + ADDQ BP, R11 + ADDQ BP, R12 + ADDQ BP, AX + + // Reload length to save a register + MOVQ n+80(FP), BP + SHRQ $0x05, BP + +mulAvxGFNI_10x3Xor_loop: + // Load 3 outputs + VMOVDQU (R14), Y11 + VMOVDQU (R15), Y12 + VMOVDQU (R13), Y13 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 3 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 3 outputs + VMOVDQU (AX), Y14 + ADDQ $0x20, AX + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 3 outputs + VMOVDQU Y11, (R14) + ADDQ $0x20, R14 + VMOVDQU Y12, (R15) + ADDQ $0x20, R15 + VMOVDQU Y13, (R13) + ADDQ $0x20, R13 + + // Prepare for next loop + DECQ BP + JNZ mulAvxGFNI_10x3Xor_loop + VZEROUPPER + +mulAvxGFNI_10x3Xor_end: + RET + +// func mulGFNI_10x4_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x4_64(SB), $8-88 + // Loading 26 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 46 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x4_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x4_64_loop: + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z29 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 4 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 4 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 4 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 4 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 4 outputs + MOVQ (R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x4_64_loop + VZEROUPPER + +mulGFNI_10x4_64_end: + RET + +// func mulAvxGFNI_10x4(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x4(SB), $8-88 + // Loading 10 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 46 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x4_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x4_loop: + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + MOVQ (R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x4_loop + VZEROUPPER + +mulAvxGFNI_10x4_end: + RET + +// func mulGFNI_10x4_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x4_64Xor(SB), $8-88 + // Loading 26 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 46 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x4_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + VBROADCASTF32X2 200(CX), Z25 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x4_64Xor_loop: + // Load 4 outputs + MOVQ (R14), BP + VMOVDQU64 (BP)(R15*1), Z26 + MOVQ 24(R14), BP + VMOVDQU64 (BP)(R15*1), Z27 + MOVQ 48(R14), BP + VMOVDQU64 (BP)(R15*1), Z28 + MOVQ 72(R14), BP + VMOVDQU64 (BP)(R15*1), Z29 + + // Load and process 64 bytes from input 0 to 4 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 4 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 4 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 4 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 4 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 4 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 4 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z25, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 4 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 4 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 4 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 4 outputs + MOVQ (R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x4_64Xor_loop + VZEROUPPER + +mulGFNI_10x4_64Xor_end: + RET + +// func mulAvxGFNI_10x4Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x4Xor(SB), $8-88 + // Loading 10 of 40 tables to registers + // Destination kept on stack + // Full registers estimated 46 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x4Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + VBROADCASTSD 72(CX), Y9 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x4Xor_loop: + // Load 4 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 4 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 4 outputs + MOVQ (R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x4Xor_loop + VZEROUPPER + +mulAvxGFNI_10x4Xor_end: + RET + +// func mulGFNI_10x5_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x5_64(SB), $8-88 + // Loading 25 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 57 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x5_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x5_64_loop: + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 5 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 5 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 5 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + MOVQ (R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x5_64_loop + VZEROUPPER + +mulGFNI_10x5_64_end: + RET + +// func mulAvxGFNI_10x5(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x5(SB), $8-88 + // Loading 9 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 57 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x5_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x5_loop: + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + MOVQ (R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x5_loop + VZEROUPPER + +mulAvxGFNI_10x5_end: + RET + +// func mulGFNI_10x5_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x5_64Xor(SB), $8-88 + // Loading 25 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 57 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x5_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + VBROADCASTF32X2 192(CX), Z24 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x5_64Xor_loop: + // Load 5 outputs + MOVQ (R14), BP + VMOVDQU64 (BP)(R15*1), Z25 + MOVQ 24(R14), BP + VMOVDQU64 (BP)(R15*1), Z26 + MOVQ 48(R14), BP + VMOVDQU64 (BP)(R15*1), Z27 + MOVQ 72(R14), BP + VMOVDQU64 (BP)(R15*1), Z28 + MOVQ 96(R14), BP + VMOVDQU64 (BP)(R15*1), Z29 + + // Load and process 64 bytes from input 0 to 5 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 5 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 5 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 5 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 5 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z24, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 5 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 5 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 5 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 5 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 5 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 5 outputs + MOVQ (R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x5_64Xor_loop + VZEROUPPER + +mulGFNI_10x5_64Xor_end: + RET + +// func mulAvxGFNI_10x5Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x5Xor(SB), $8-88 + // Loading 9 of 50 tables to registers + // Destination kept on stack + // Full registers estimated 57 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x5Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + VBROADCASTSD 64(CX), Y8 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x5Xor_loop: + // Load 5 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 5 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 5 outputs + MOVQ (R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x5Xor_loop + VZEROUPPER + +mulAvxGFNI_10x5Xor_end: + RET + +// func mulGFNI_10x6_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x6_64(SB), $8-88 + // Loading 24 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 68 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x6_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x6_64_loop: + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 6 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 6 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 6 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + MOVQ (R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x6_64_loop + VZEROUPPER + +mulGFNI_10x6_64_end: + RET + +// func mulAvxGFNI_10x6(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x6(SB), $8-88 + // Loading 8 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 68 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x6_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x6_loop: + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x6_loop + VZEROUPPER + +mulAvxGFNI_10x6_end: + RET + +// func mulGFNI_10x6_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x6_64Xor(SB), $8-88 + // Loading 24 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 68 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x6_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + VBROADCASTF32X2 184(CX), Z23 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x6_64Xor_loop: + // Load 6 outputs + MOVQ (R14), BP + VMOVDQU64 (BP)(R15*1), Z24 + MOVQ 24(R14), BP + VMOVDQU64 (BP)(R15*1), Z25 + MOVQ 48(R14), BP + VMOVDQU64 (BP)(R15*1), Z26 + MOVQ 72(R14), BP + VMOVDQU64 (BP)(R15*1), Z27 + MOVQ 96(R14), BP + VMOVDQU64 (BP)(R15*1), Z28 + MOVQ 120(R14), BP + VMOVDQU64 (BP)(R15*1), Z29 + + // Load and process 64 bytes from input 0 to 6 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 6 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 6 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 6 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z23, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 6 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 6 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 6 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 6 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 6 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 6 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 6 outputs + MOVQ (R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x6_64Xor_loop + VZEROUPPER + +mulGFNI_10x6_64Xor_end: + RET + +// func mulAvxGFNI_10x6Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x6Xor(SB), $8-88 + // Loading 8 of 60 tables to registers + // Destination kept on stack + // Full registers estimated 68 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x6Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + VBROADCASTSD 56(CX), Y7 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x6Xor_loop: + // Load 6 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y7, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 6 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 6 outputs + MOVQ (R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x6Xor_loop + VZEROUPPER + +mulAvxGFNI_10x6Xor_end: + RET + +// func mulGFNI_10x7_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x7_64(SB), $8-88 + // Loading 23 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 79 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x7_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x7_64_loop: + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 7 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 7 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 7 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + MOVQ (R14), BP + VMOVDQU64 Z23, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x7_64_loop + VZEROUPPER + +mulGFNI_10x7_64_end: + RET + +// func mulAvxGFNI_10x7(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x7(SB), $8-88 + // Loading 7 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 79 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x7_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x7_loop: + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x7_loop + VZEROUPPER + +mulAvxGFNI_10x7_end: + RET + +// func mulGFNI_10x7_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x7_64Xor(SB), $8-88 + // Loading 23 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 79 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x7_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + VBROADCASTF32X2 176(CX), Z22 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x7_64Xor_loop: + // Load 7 outputs + MOVQ (R14), BP + VMOVDQU64 (BP)(R15*1), Z23 + MOVQ 24(R14), BP + VMOVDQU64 (BP)(R15*1), Z24 + MOVQ 48(R14), BP + VMOVDQU64 (BP)(R15*1), Z25 + MOVQ 72(R14), BP + VMOVDQU64 (BP)(R15*1), Z26 + MOVQ 96(R14), BP + VMOVDQU64 (BP)(R15*1), Z27 + MOVQ 120(R14), BP + VMOVDQU64 (BP)(R15*1), Z28 + MOVQ 144(R14), BP + VMOVDQU64 (BP)(R15*1), Z29 + + // Load and process 64 bytes from input 0 to 7 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 7 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 7 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 7 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z22, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 7 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 7 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 7 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 7 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 7 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 7 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 7 outputs + MOVQ (R14), BP + VMOVDQU64 Z23, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x7_64Xor_loop + VZEROUPPER + +mulGFNI_10x7_64Xor_end: + RET + +// func mulAvxGFNI_10x7Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x7Xor(SB), $8-88 + // Loading 7 of 70 tables to registers + // Destination kept on stack + // Full registers estimated 79 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x7Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + VBROADCASTSD 48(CX), Y6 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x7Xor_loop: + // Load 7 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y7 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 144(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y11, Y15, Y11 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y12, Y15, Y12 + VGF2P8AFFINEQB $0x00, Y6, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 7 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 7 outputs + MOVQ (R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x7Xor_loop + VZEROUPPER + +mulAvxGFNI_10x7Xor_end: + RET + +// func mulGFNI_10x8_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x8_64(SB), $8-88 + // Loading 22 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 90 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x8_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x8_64_loop: + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 8 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 8 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 8 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R14), BP + VMOVDQU64 Z22, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z23, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x8_64_loop + VZEROUPPER + +mulGFNI_10x8_64_end: + RET + +// func mulAvxGFNI_10x8(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x8(SB), $8-88 + // Loading 6 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 90 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x8_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x8_loop: + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y11 + VBROADCASTSD 48(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 56(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x8_loop + VZEROUPPER + +mulAvxGFNI_10x8_end: + RET + +// func mulGFNI_10x8_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x8_64Xor(SB), $8-88 + // Loading 22 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 90 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x8_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + VBROADCASTF32X2 168(CX), Z21 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x8_64Xor_loop: + // Load 8 outputs + MOVQ (R14), BP + VMOVDQU64 (BP)(R15*1), Z22 + MOVQ 24(R14), BP + VMOVDQU64 (BP)(R15*1), Z23 + MOVQ 48(R14), BP + VMOVDQU64 (BP)(R15*1), Z24 + MOVQ 72(R14), BP + VMOVDQU64 (BP)(R15*1), Z25 + MOVQ 96(R14), BP + VMOVDQU64 (BP)(R15*1), Z26 + MOVQ 120(R14), BP + VMOVDQU64 (BP)(R15*1), Z27 + MOVQ 144(R14), BP + VMOVDQU64 (BP)(R15*1), Z28 + MOVQ 168(R14), BP + VMOVDQU64 (BP)(R15*1), Z29 + + // Load and process 64 bytes from input 0 to 8 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 8 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 8 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z21, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 8 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 8 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 8 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 8 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 8 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 8 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 8 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 8 outputs + MOVQ (R14), BP + VMOVDQU64 Z22, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z23, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x8_64Xor_loop + VZEROUPPER + +mulGFNI_10x8_64Xor_end: + RET + +// func mulAvxGFNI_10x8Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x8Xor(SB), $8-88 + // Loading 6 of 80 tables to registers + // Destination kept on stack + // Full registers estimated 90 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x8Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + VBROADCASTSD 40(CX), Y5 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x8Xor_loop: + // Load 8 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y6 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y7 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 144(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 168(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y9, Y15, Y9 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y10, Y15, Y10 + VGF2P8AFFINEQB $0x00, Y5, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 8 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 8 outputs + MOVQ (R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x8Xor_loop + VZEROUPPER + +mulAvxGFNI_10x8Xor_end: + RET + +// func mulGFNI_10x9_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x9_64(SB), $8-88 + // Loading 21 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 101 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x9_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x9_64_loop: + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 9 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 9 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 9 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R14), BP + VMOVDQU64 Z21, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z22, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z23, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x9_64_loop + VZEROUPPER + +mulGFNI_10x9_64_end: + RET + +// func mulAvxGFNI_10x9(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x9(SB), $8-88 + // Loading 5 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 101 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x9_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x9_loop: + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y9 + VBROADCASTSD 40(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 48(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 56(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 64(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 9 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R14), BP + VMOVDQU Y5, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x9_loop + VZEROUPPER + +mulAvxGFNI_10x9_end: + RET + +// func mulGFNI_10x9_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x9_64Xor(SB), $8-88 + // Loading 21 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 101 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x9_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + VBROADCASTF32X2 160(CX), Z20 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x9_64Xor_loop: + // Load 9 outputs + MOVQ (R14), BP + VMOVDQU64 (BP)(R15*1), Z21 + MOVQ 24(R14), BP + VMOVDQU64 (BP)(R15*1), Z22 + MOVQ 48(R14), BP + VMOVDQU64 (BP)(R15*1), Z23 + MOVQ 72(R14), BP + VMOVDQU64 (BP)(R15*1), Z24 + MOVQ 96(R14), BP + VMOVDQU64 (BP)(R15*1), Z25 + MOVQ 120(R14), BP + VMOVDQU64 (BP)(R15*1), Z26 + MOVQ 144(R14), BP + VMOVDQU64 (BP)(R15*1), Z27 + MOVQ 168(R14), BP + VMOVDQU64 (BP)(R15*1), Z28 + MOVQ 192(R14), BP + VMOVDQU64 (BP)(R15*1), Z29 + + // Load and process 64 bytes from input 0 to 9 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 9 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 9 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z20, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 9 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 9 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 9 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 9 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 9 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 9 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 9 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 9 outputs + MOVQ (R14), BP + VMOVDQU64 Z21, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z22, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z23, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x9_64Xor_loop + VZEROUPPER + +mulGFNI_10x9_64Xor_end: + RET + +// func mulAvxGFNI_10x9Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x9Xor(SB), $8-88 + // Loading 5 of 90 tables to registers + // Destination kept on stack + // Full registers estimated 101 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x9Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + VBROADCASTSD 32(CX), Y4 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x9Xor_loop: + // Load 9 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y5 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y6 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y7 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 144(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 168(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 192(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 9 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y7, Y15, Y7 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y8, Y15, Y8 + VGF2P8AFFINEQB $0x00, Y4, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 9 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 9 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 9 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 9 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 9 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 9 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 9 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 9 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 9 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 9 outputs + MOVQ (R14), BP + VMOVDQU Y5, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x9Xor_loop + VZEROUPPER + +mulAvxGFNI_10x9Xor_end: + RET + +// func mulGFNI_10x10_64(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x10_64(SB), $8-88 + // Loading 20 of 100 tables to registers + // Destination kept on stack + // Full registers estimated 112 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x10_64_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x10_64_loop: + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 10 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 10 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 10 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 720(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 728(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 736(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 744(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 752(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 760(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 768(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 776(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 784(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 792(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R14), BP + VMOVDQU64 Z20, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z21, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z22, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z23, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 216(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x10_64_loop + VZEROUPPER + +mulGFNI_10x10_64_end: + RET + +// func mulAvxGFNI_10x10(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x10(SB), $8-88 + // Loading 4 of 100 tables to registers + // Destination kept on stack + // Full registers estimated 112 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x10_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x10_loop: + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y7 + VBROADCASTSD 32(CX), Y8 + VGF2P8AFFINEQB $0x00, Y8, Y14, Y8 + VBROADCASTSD 40(CX), Y9 + VGF2P8AFFINEQB $0x00, Y9, Y14, Y9 + VBROADCASTSD 48(CX), Y10 + VGF2P8AFFINEQB $0x00, Y10, Y14, Y10 + VBROADCASTSD 56(CX), Y11 + VGF2P8AFFINEQB $0x00, Y11, Y14, Y11 + VBROADCASTSD 64(CX), Y12 + VGF2P8AFFINEQB $0x00, Y12, Y14, Y12 + VBROADCASTSD 72(CX), Y13 + VGF2P8AFFINEQB $0x00, Y13, Y14, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 10 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 720(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 728(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 736(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 744(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 752(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 760(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 768(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 776(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 784(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 792(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R14), BP + VMOVDQU Y4, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y5, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 216(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x10_loop + VZEROUPPER + +mulAvxGFNI_10x10_end: + RET + +// func mulGFNI_10x10_64Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·mulGFNI_10x10_64Xor(SB), $8-88 + // Loading 20 of 100 tables to registers + // Destination kept on stack + // Full registers estimated 112 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x06, AX + TESTQ AX, AX + JZ mulGFNI_10x10_64Xor_end + VBROADCASTF32X2 (CX), Z0 + VBROADCASTF32X2 8(CX), Z1 + VBROADCASTF32X2 16(CX), Z2 + VBROADCASTF32X2 24(CX), Z3 + VBROADCASTF32X2 32(CX), Z4 + VBROADCASTF32X2 40(CX), Z5 + VBROADCASTF32X2 48(CX), Z6 + VBROADCASTF32X2 56(CX), Z7 + VBROADCASTF32X2 64(CX), Z8 + VBROADCASTF32X2 72(CX), Z9 + VBROADCASTF32X2 80(CX), Z10 + VBROADCASTF32X2 88(CX), Z11 + VBROADCASTF32X2 96(CX), Z12 + VBROADCASTF32X2 104(CX), Z13 + VBROADCASTF32X2 112(CX), Z14 + VBROADCASTF32X2 120(CX), Z15 + VBROADCASTF32X2 128(CX), Z16 + VBROADCASTF32X2 136(CX), Z17 + VBROADCASTF32X2 144(CX), Z18 + VBROADCASTF32X2 152(CX), Z19 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulGFNI_10x10_64Xor_loop: + // Load 10 outputs + MOVQ (R14), BP + VMOVDQU64 (BP)(R15*1), Z20 + MOVQ 24(R14), BP + VMOVDQU64 (BP)(R15*1), Z21 + MOVQ 48(R14), BP + VMOVDQU64 (BP)(R15*1), Z22 + MOVQ 72(R14), BP + VMOVDQU64 (BP)(R15*1), Z23 + MOVQ 96(R14), BP + VMOVDQU64 (BP)(R15*1), Z24 + MOVQ 120(R14), BP + VMOVDQU64 (BP)(R15*1), Z25 + MOVQ 144(R14), BP + VMOVDQU64 (BP)(R15*1), Z26 + MOVQ 168(R14), BP + VMOVDQU64 (BP)(R15*1), Z27 + MOVQ 192(R14), BP + VMOVDQU64 (BP)(R15*1), Z28 + MOVQ 216(R14), BP + VMOVDQU64 (BP)(R15*1), Z29 + + // Load and process 64 bytes from input 0 to 10 outputs + VMOVDQU64 (BX), Z30 + ADDQ $0x40, BX + VGF2P8AFFINEQB $0x00, Z0, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z1, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z2, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z3, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z4, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z5, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z6, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z7, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z8, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z9, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 1 to 10 outputs + VMOVDQU64 (SI), Z30 + ADDQ $0x40, SI + VGF2P8AFFINEQB $0x00, Z10, Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB $0x00, Z11, Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB $0x00, Z12, Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB $0x00, Z13, Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB $0x00, Z14, Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB $0x00, Z15, Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB $0x00, Z16, Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB $0x00, Z17, Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB $0x00, Z18, Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB $0x00, Z19, Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 2 to 10 outputs + VMOVDQU64 (DI), Z30 + ADDQ $0x40, DI + VGF2P8AFFINEQB.BCST $0x00, 160(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 168(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 176(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 184(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 192(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 200(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 208(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 216(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 224(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 232(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 3 to 10 outputs + VMOVDQU64 (R8), Z30 + ADDQ $0x40, R8 + VGF2P8AFFINEQB.BCST $0x00, 240(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 248(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 256(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 264(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 272(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 280(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 288(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 296(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 304(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 312(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 4 to 10 outputs + VMOVDQU64 (R9), Z30 + ADDQ $0x40, R9 + VGF2P8AFFINEQB.BCST $0x00, 320(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 328(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 336(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 344(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 352(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 360(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 368(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 376(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 384(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 392(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 5 to 10 outputs + VMOVDQU64 (R10), Z30 + ADDQ $0x40, R10 + VGF2P8AFFINEQB.BCST $0x00, 400(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 408(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 416(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 424(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 432(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 440(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 448(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 456(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 464(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 472(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 6 to 10 outputs + VMOVDQU64 (R11), Z30 + ADDQ $0x40, R11 + VGF2P8AFFINEQB.BCST $0x00, 480(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 488(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 496(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 504(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 512(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 520(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 528(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 536(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 544(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 552(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 7 to 10 outputs + VMOVDQU64 (R12), Z30 + ADDQ $0x40, R12 + VGF2P8AFFINEQB.BCST $0x00, 560(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 568(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 576(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 584(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 592(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 600(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 608(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 616(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 624(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 632(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 8 to 10 outputs + VMOVDQU64 (R13), Z30 + ADDQ $0x40, R13 + VGF2P8AFFINEQB.BCST $0x00, 640(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 648(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 656(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 664(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 672(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 680(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 688(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 696(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 704(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 712(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Load and process 64 bytes from input 9 to 10 outputs + VMOVDQU64 (DX), Z30 + ADDQ $0x40, DX + VGF2P8AFFINEQB.BCST $0x00, 720(CX), Z30, Z31 + VXORPD Z20, Z31, Z20 + VGF2P8AFFINEQB.BCST $0x00, 728(CX), Z30, Z31 + VXORPD Z21, Z31, Z21 + VGF2P8AFFINEQB.BCST $0x00, 736(CX), Z30, Z31 + VXORPD Z22, Z31, Z22 + VGF2P8AFFINEQB.BCST $0x00, 744(CX), Z30, Z31 + VXORPD Z23, Z31, Z23 + VGF2P8AFFINEQB.BCST $0x00, 752(CX), Z30, Z31 + VXORPD Z24, Z31, Z24 + VGF2P8AFFINEQB.BCST $0x00, 760(CX), Z30, Z31 + VXORPD Z25, Z31, Z25 + VGF2P8AFFINEQB.BCST $0x00, 768(CX), Z30, Z31 + VXORPD Z26, Z31, Z26 + VGF2P8AFFINEQB.BCST $0x00, 776(CX), Z30, Z31 + VXORPD Z27, Z31, Z27 + VGF2P8AFFINEQB.BCST $0x00, 784(CX), Z30, Z31 + VXORPD Z28, Z31, Z28 + VGF2P8AFFINEQB.BCST $0x00, 792(CX), Z30, Z31 + VXORPD Z29, Z31, Z29 + + // Store 10 outputs + MOVQ (R14), BP + VMOVDQU64 Z20, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU64 Z21, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU64 Z22, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU64 Z23, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU64 Z24, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU64 Z25, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU64 Z26, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU64 Z27, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU64 Z28, (BP)(R15*1) + MOVQ 216(R14), BP + VMOVDQU64 Z29, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x40, R15 + DECQ AX + JNZ mulGFNI_10x10_64Xor_loop + VZEROUPPER + +mulGFNI_10x10_64Xor_end: + RET + +// func mulAvxGFNI_10x10Xor(matrix []uint64, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, GFNI +TEXT ·mulAvxGFNI_10x10Xor(SB), $8-88 + // Loading 4 of 100 tables to registers + // Destination kept on stack + // Full registers estimated 112 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxGFNI_10x10Xor_end + VBROADCASTSD (CX), Y0 + VBROADCASTSD 8(CX), Y1 + VBROADCASTSD 16(CX), Y2 + VBROADCASTSD 24(CX), Y3 + MOVQ in_base+24(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), SI + MOVQ 48(DX), DI + MOVQ 72(DX), R8 + MOVQ 96(DX), R9 + MOVQ 120(DX), R10 + MOVQ 144(DX), R11 + MOVQ 168(DX), R12 + MOVQ 192(DX), R13 + MOVQ 216(DX), DX + MOVQ out_base+48(FP), R14 + MOVQ out_base+48(FP), R14 + MOVQ start+72(FP), R15 + + // Add start offset to input + ADDQ R15, BX + ADDQ R15, SI + ADDQ R15, DI + ADDQ R15, R8 + ADDQ R15, R9 + ADDQ R15, R10 + ADDQ R15, R11 + ADDQ R15, R12 + ADDQ R15, R13 + ADDQ R15, DX + +mulAvxGFNI_10x10Xor_loop: + // Load 10 outputs + MOVQ (R14), BP + VMOVDQU (BP)(R15*1), Y4 + MOVQ 24(R14), BP + VMOVDQU (BP)(R15*1), Y5 + MOVQ 48(R14), BP + VMOVDQU (BP)(R15*1), Y6 + MOVQ 72(R14), BP + VMOVDQU (BP)(R15*1), Y7 + MOVQ 96(R14), BP + VMOVDQU (BP)(R15*1), Y8 + MOVQ 120(R14), BP + VMOVDQU (BP)(R15*1), Y9 + MOVQ 144(R14), BP + VMOVDQU (BP)(R15*1), Y10 + MOVQ 168(R14), BP + VMOVDQU (BP)(R15*1), Y11 + MOVQ 192(R14), BP + VMOVDQU (BP)(R15*1), Y12 + MOVQ 216(R14), BP + VMOVDQU (BP)(R15*1), Y13 + + // Load and process 32 bytes from input 0 to 10 outputs + VMOVDQU (BX), Y14 + ADDQ $0x20, BX + VGF2P8AFFINEQB $0x00, Y0, Y14, Y15 + VXORPD Y4, Y15, Y4 + VGF2P8AFFINEQB $0x00, Y1, Y14, Y15 + VXORPD Y5, Y15, Y5 + VGF2P8AFFINEQB $0x00, Y2, Y14, Y15 + VXORPD Y6, Y15, Y6 + VGF2P8AFFINEQB $0x00, Y3, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 32(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 40(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 48(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 56(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 64(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 72(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 1 to 10 outputs + VMOVDQU (SI), Y14 + ADDQ $0x20, SI + VBROADCASTSD 80(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 88(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 96(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 104(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 112(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 120(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 128(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 136(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 144(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 152(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 2 to 10 outputs + VMOVDQU (DI), Y14 + ADDQ $0x20, DI + VBROADCASTSD 160(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 168(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 176(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 184(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 192(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 200(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 208(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 216(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 224(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 232(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 3 to 10 outputs + VMOVDQU (R8), Y14 + ADDQ $0x20, R8 + VBROADCASTSD 240(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 248(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 256(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 264(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 272(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 280(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 288(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 296(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 304(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 312(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 4 to 10 outputs + VMOVDQU (R9), Y14 + ADDQ $0x20, R9 + VBROADCASTSD 320(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 328(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 336(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 344(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 352(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 360(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 368(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 376(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 384(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 392(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 5 to 10 outputs + VMOVDQU (R10), Y14 + ADDQ $0x20, R10 + VBROADCASTSD 400(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 408(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 416(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 424(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 432(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 440(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 448(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 456(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 464(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 472(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 6 to 10 outputs + VMOVDQU (R11), Y14 + ADDQ $0x20, R11 + VBROADCASTSD 480(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 488(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 496(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 504(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 512(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 520(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 528(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 536(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 544(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 552(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 7 to 10 outputs + VMOVDQU (R12), Y14 + ADDQ $0x20, R12 + VBROADCASTSD 560(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 568(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 576(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 584(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 592(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 600(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 608(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 616(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 624(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 632(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 8 to 10 outputs + VMOVDQU (R13), Y14 + ADDQ $0x20, R13 + VBROADCASTSD 640(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 648(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 656(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 664(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 672(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 680(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 688(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 696(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 704(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 712(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Load and process 32 bytes from input 9 to 10 outputs + VMOVDQU (DX), Y14 + ADDQ $0x20, DX + VBROADCASTSD 720(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y4, Y15, Y4 + VBROADCASTSD 728(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y5, Y15, Y5 + VBROADCASTSD 736(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y6, Y15, Y6 + VBROADCASTSD 744(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y7, Y15, Y7 + VBROADCASTSD 752(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y8, Y15, Y8 + VBROADCASTSD 760(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y9, Y15, Y9 + VBROADCASTSD 768(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y10, Y15, Y10 + VBROADCASTSD 776(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y11, Y15, Y11 + VBROADCASTSD 784(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y12, Y15, Y12 + VBROADCASTSD 792(CX), Y15 + VGF2P8AFFINEQB $0x00, Y15, Y14, Y15 + VXORPD Y13, Y15, Y13 + + // Store 10 outputs + MOVQ (R14), BP + VMOVDQU Y4, (BP)(R15*1) + MOVQ 24(R14), BP + VMOVDQU Y5, (BP)(R15*1) + MOVQ 48(R14), BP + VMOVDQU Y6, (BP)(R15*1) + MOVQ 72(R14), BP + VMOVDQU Y7, (BP)(R15*1) + MOVQ 96(R14), BP + VMOVDQU Y8, (BP)(R15*1) + MOVQ 120(R14), BP + VMOVDQU Y9, (BP)(R15*1) + MOVQ 144(R14), BP + VMOVDQU Y10, (BP)(R15*1) + MOVQ 168(R14), BP + VMOVDQU Y11, (BP)(R15*1) + MOVQ 192(R14), BP + VMOVDQU Y12, (BP)(R15*1) + MOVQ 216(R14), BP + VMOVDQU Y13, (BP)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxGFNI_10x10Xor_loop + VZEROUPPER + +mulAvxGFNI_10x10Xor_end: + RET + +// func ifftDIT48_gfni_0(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·ifftDIT48_gfni_0(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t01+32(FP), Z0 + VBROADCASTF32X2 t23+40(FP), Z1 + VBROADCASTF32X2 t02+48(FP), Z2 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z3 + VMOVDQU64 (DI), Z4 + VMOVDQU64 (R8), Z5 + VMOVDQU64 (AX), Z6 + VXORPD Z4, Z3, Z4 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z4, Z7 + VXORPD Z3, Z7, Z3 + VXORPD Z5, Z6, Z6 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z6, Z7 + VPTERNLOGD $0x96, Z7, Z3, Z5 + VXORPD Z4, Z6, Z6 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z2, Z5, Z7 + VXORPD Z3, Z7, Z3 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z2, Z6, Z7 + VXORPD Z4, Z7, Z4 + VMOVDQU64 Z3, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z4, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z5, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z6, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func fftDIT48_gfni_0(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·fftDIT48_gfni_0(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t01+32(FP), Z0 + VBROADCASTF32X2 t23+40(FP), Z1 + VBROADCASTF32X2 t02+48(FP), Z2 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z3 + VMOVDQU64 (DI), Z4 + VMOVDQU64 (R8), Z5 + VMOVDQU64 (AX), Z6 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z2, Z5, Z7 + VXORPD Z3, Z7, Z3 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z2, Z6, Z7 + VXORPD Z4, Z7, Z4 + VXORPD Z3, Z5, Z5 + VXORPD Z4, Z6, Z6 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z4, Z7 + VXORPD Z3, Z7, Z3 + VXORPD Z4, Z3, Z4 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z6, Z7 + VXORPD Z5, Z7, Z5 + VXORPD Z5, Z6, Z6 + VMOVDQU64 Z3, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z4, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z5, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z6, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func ifftDIT48_gfni_1(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·ifftDIT48_gfni_1(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t23+40(FP), Z0 + VBROADCASTF32X2 t02+48(FP), Z1 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z2 + VMOVDQU64 (DI), Z3 + VMOVDQU64 (R8), Z4 + VMOVDQU64 (AX), Z5 + VXORPD Z3, Z2, Z3 + VXORPD Z4, Z5, Z5 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z5, Z6 + VPTERNLOGD $0x96, Z6, Z2, Z4 + VXORPD Z3, Z5, Z5 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z4, Z6 + VXORPD Z2, Z6, Z2 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z5, Z6 + VXORPD Z3, Z6, Z3 + VMOVDQU64 Z2, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z3, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z4, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z5, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func fftDIT48_gfni_1(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·fftDIT48_gfni_1(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t01+32(FP), Z0 + VBROADCASTF32X2 t23+40(FP), Z1 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z2 + VMOVDQU64 (DI), Z3 + VMOVDQU64 (R8), Z4 + VMOVDQU64 (AX), Z5 + VXORPD Z2, Z4, Z4 + VXORPD Z3, Z5, Z5 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z3, Z6 + VXORPD Z2, Z6, Z2 + VXORPD Z3, Z2, Z3 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z5, Z6 + VXORPD Z4, Z6, Z4 + VXORPD Z4, Z5, Z5 + VMOVDQU64 Z2, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z3, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z4, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z5, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func ifftDIT48_gfni_2(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·ifftDIT48_gfni_2(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t01+32(FP), Z0 + VBROADCASTF32X2 t02+48(FP), Z1 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z2 + VMOVDQU64 (DI), Z3 + VMOVDQU64 (R8), Z4 + VMOVDQU64 (AX), Z5 + VXORPD Z3, Z2, Z3 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z3, Z6 + VXORPD Z2, Z6, Z2 + VXORPD Z4, Z5, Z5 + VXORPD Z2, Z4, Z4 + VXORPD Z3, Z5, Z5 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z4, Z6 + VXORPD Z2, Z6, Z2 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z5, Z6 + VXORPD Z3, Z6, Z3 + VMOVDQU64 Z2, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z3, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z4, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z5, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func fftDIT48_gfni_2(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·fftDIT48_gfni_2(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t23+40(FP), Z0 + VBROADCASTF32X2 t02+48(FP), Z1 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z2 + VMOVDQU64 (DI), Z3 + VMOVDQU64 (R8), Z4 + VMOVDQU64 (AX), Z5 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z4, Z6 + VXORPD Z2, Z6, Z2 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z5, Z6 + VXORPD Z3, Z6, Z3 + VXORPD Z2, Z4, Z4 + VXORPD Z3, Z5, Z5 + VXORPD Z3, Z2, Z3 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z5, Z6 + VXORPD Z4, Z6, Z4 + VXORPD Z4, Z5, Z5 + VMOVDQU64 Z2, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z3, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z4, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z5, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func ifftDIT48_gfni_3(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·ifftDIT48_gfni_3(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t02+48(FP), Z0 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z1 + VMOVDQU64 (DI), Z2 + VMOVDQU64 (R8), Z3 + VMOVDQU64 (AX), Z4 + VXORPD Z2, Z1, Z2 + VXORPD Z3, Z4, Z4 + VXORPD Z1, Z3, Z3 + VXORPD Z2, Z4, Z4 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z3, Z5 + VXORPD Z1, Z5, Z1 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z4, Z5 + VXORPD Z2, Z5, Z2 + VMOVDQU64 Z1, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z2, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z3, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z4, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func fftDIT48_gfni_3(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·fftDIT48_gfni_3(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t23+40(FP), Z0 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z1 + VMOVDQU64 (DI), Z2 + VMOVDQU64 (R8), Z3 + VMOVDQU64 (AX), Z4 + VXORPD Z1, Z3, Z3 + VXORPD Z2, Z4, Z4 + VXORPD Z2, Z1, Z2 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z4, Z5 + VXORPD Z3, Z5, Z3 + VXORPD Z3, Z4, Z4 + VMOVDQU64 Z1, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z2, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z3, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z4, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func ifftDIT48_gfni_4(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·ifftDIT48_gfni_4(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t01+32(FP), Z0 + VBROADCASTF32X2 t23+40(FP), Z1 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z2 + VMOVDQU64 (DI), Z3 + VMOVDQU64 (R8), Z4 + VMOVDQU64 (AX), Z5 + VXORPD Z3, Z2, Z3 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z3, Z6 + VXORPD Z2, Z6, Z2 + VXORPD Z4, Z5, Z5 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z5, Z6 + VPTERNLOGD $0x96, Z6, Z2, Z4 + VXORPD Z3, Z5, Z5 + VMOVDQU64 Z2, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z3, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z4, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z5, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func fftDIT48_gfni_4(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·fftDIT48_gfni_4(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t01+32(FP), Z0 + VBROADCASTF32X2 t02+48(FP), Z1 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z2 + VMOVDQU64 (DI), Z3 + VMOVDQU64 (R8), Z4 + VMOVDQU64 (AX), Z5 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z4, Z6 + VXORPD Z2, Z6, Z2 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z1, Z5, Z6 + VXORPD Z3, Z6, Z3 + VXORPD Z2, Z4, Z4 + VXORPD Z3, Z5, Z5 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z3, Z6 + VXORPD Z2, Z6, Z2 + VXORPD Z3, Z2, Z3 + VXORPD Z4, Z5, Z5 + VMOVDQU64 Z2, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z3, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z4, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z5, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func ifftDIT48_gfni_5(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·ifftDIT48_gfni_5(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t23+40(FP), Z0 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z1 + VMOVDQU64 (DI), Z2 + VMOVDQU64 (R8), Z3 + VMOVDQU64 (AX), Z4 + VXORPD Z2, Z1, Z2 + VXORPD Z3, Z4, Z4 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z4, Z5 + VPTERNLOGD $0x96, Z5, Z1, Z3 + VXORPD Z2, Z4, Z4 + VMOVDQU64 Z1, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z2, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z3, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z4, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func fftDIT48_gfni_5(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·fftDIT48_gfni_5(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t01+32(FP), Z0 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z1 + VMOVDQU64 (DI), Z2 + VMOVDQU64 (R8), Z3 + VMOVDQU64 (AX), Z4 + VXORPD Z1, Z3, Z3 + VXORPD Z2, Z4, Z4 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z2, Z5 + VXORPD Z1, Z5, Z1 + VXORPD Z2, Z1, Z2 + VXORPD Z3, Z4, Z4 + VMOVDQU64 Z1, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z2, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z3, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z4, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func ifftDIT48_gfni_6(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·ifftDIT48_gfni_6(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t01+32(FP), Z0 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z1 + VMOVDQU64 (DI), Z2 + VMOVDQU64 (R8), Z3 + VMOVDQU64 (AX), Z4 + VXORPD Z2, Z1, Z2 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z2, Z5 + VXORPD Z1, Z5, Z1 + VXORPD Z3, Z4, Z4 + VXORPD Z1, Z3, Z3 + VXORPD Z2, Z4, Z4 + VMOVDQU64 Z1, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z2, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z3, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z4, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func fftDIT48_gfni_6(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F, GFNI +TEXT ·fftDIT48_gfni_6(SB), NOSPLIT, $0-56 + VBROADCASTF32X2 t02+48(FP), Z0 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z1 + VMOVDQU64 (DI), Z2 + VMOVDQU64 (R8), Z3 + VMOVDQU64 (AX), Z4 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z3, Z5 + VXORPD Z1, Z5, Z1 + + // LEO_MULADD_512 + VGF2P8AFFINEQB $0x00, Z0, Z4, Z5 + VXORPD Z2, Z5, Z2 + VXORPD Z1, Z3, Z3 + VXORPD Z2, Z4, Z4 + VXORPD Z2, Z1, Z2 + VXORPD Z3, Z4, Z4 + VMOVDQU64 Z1, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z2, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z3, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z4, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func ifftDIT48_gfni_7(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F +TEXT ·ifftDIT48_gfni_7(SB), NOSPLIT, $0-56 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z0 + VMOVDQU64 (DI), Z1 + VMOVDQU64 (R8), Z2 + VMOVDQU64 (AX), Z3 + VXORPD Z1, Z0, Z1 + VXORPD Z2, Z3, Z3 + VXORPD Z0, Z2, Z2 + VXORPD Z1, Z3, Z3 + VMOVDQU64 Z0, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z1, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z2, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z3, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET + +// func fftDIT48_gfni_7(work [][]byte, dist int, t01 uint64, t23 uint64, t02 uint64) +// Requires: AVX, AVX512DQ, AVX512F +TEXT ·fftDIT48_gfni_7(SB), NOSPLIT, $0-56 + MOVQ dist+24(FP), AX + MOVQ work_base+0(FP), CX + MOVQ 8(CX), DX + XORQ BX, BX + MOVQ (CX)(BX*1), SI + ADDQ AX, BX + MOVQ (CX)(BX*1), DI + ADDQ AX, BX + MOVQ (CX)(BX*1), R8 + ADDQ AX, BX + MOVQ (CX)(BX*1), AX + +loop: + VMOVDQU64 (SI), Z0 + VMOVDQU64 (DI), Z1 + VMOVDQU64 (R8), Z2 + VMOVDQU64 (AX), Z3 + VXORPD Z0, Z2, Z2 + VXORPD Z1, Z3, Z3 + VXORPD Z1, Z0, Z1 + VXORPD Z2, Z3, Z3 + VMOVDQU64 Z0, (SI) + ADDQ $0x40, SI + VMOVDQU64 Z1, (DI) + ADDQ $0x40, DI + VMOVDQU64 Z2, (R8) + ADDQ $0x40, R8 + VMOVDQU64 Z3, (AX) + ADDQ $0x40, AX + SUBQ $0x40, DX + JA loop + VZEROUPPER + RET diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go index ffc1bb1c..f9c36e29 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go @@ -1,7 +1,7 @@ // Code generated by command: go generate gen.go. DO NOT EDIT. -//go:build !appengine && !noasm && gc && !nogen -// +build !appengine,!noasm,gc,!nogen +//go:build !appengine && !noasm && gc && !nogen && !nopshufb +// +build !appengine,!noasm,gc,!nogen,!nopshufb package reedsolomon @@ -10,691 +10,734 @@ import ( ) const ( - avx2CodeGen = true - maxAvx2Inputs = 10 - maxAvx2Outputs = 10 - minAvx2Size = 64 - avxSizeMask = maxInt - (minAvx2Size - 1) + codeGen = true + codeGenMaxGoroutines = 8 + codeGenMaxInputs = 10 + codeGenMaxOutputs = 10 + minCodeGenSize = 64 ) -func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop - start) & avxSizeMask +var ( + fAvx2 = galMulSlicesAvx2 + fAvx2Xor = galMulSlicesAvx2Xor + fGFNI = galMulSlicesGFNI + fGFNIXor = galMulSlicesGFNIXor + fAvxGFNI = galMulSlicesAvxGFNI + fAvxGFNIXor = galMulSlicesAvxGFNIXor +) + +func (r *reedSolomon) hasCodeGen(byteCount int, inputs, outputs int) (_, _ *func(matrix []byte, in, out [][]byte, start, stop int) int, ok bool) { + return &fAvx2, &fAvx2Xor, codeGen && pshufb && r.o.useAVX2 && + byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && + inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs +} + +func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(matrix []uint64, in, out [][]byte, start, stop int) int, ok bool) { + if r.o.useAvx512GFNI { + return &fGFNI, &fGFNIXor, codeGen && + byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && + inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs + } + return &fAvxGFNI, &fAvxGFNIXor, codeGen && r.o.useAvxGNFI && + byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && + inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs +} + +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(in) { case 1: switch len(out) { case 1: mulAvxTwo_1x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_1x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_1x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_1x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_1x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_1x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_1x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_1x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_1x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_1x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 2: switch len(out) { case 1: mulAvxTwo_2x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_2x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_2x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_2x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_2x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_2x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_2x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_2x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_2x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_2x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 3: switch len(out) { case 1: mulAvxTwo_3x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_3x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_3x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_3x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_3x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_3x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_3x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_3x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_3x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_3x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 4: switch len(out) { case 1: mulAvxTwo_4x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_4x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_4x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_4x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_4x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_4x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_4x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_4x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_4x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_4x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 5: switch len(out) { case 1: mulAvxTwo_5x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_5x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_5x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_5x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_5x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_5x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_5x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_5x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_5x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_5x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 6: switch len(out) { case 1: mulAvxTwo_6x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_6x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_6x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_6x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_6x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_6x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_6x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_6x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_6x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_6x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 7: switch len(out) { case 1: mulAvxTwo_7x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_7x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_7x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_7x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_7x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_7x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_7x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_7x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_7x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_7x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 8: switch len(out) { case 1: mulAvxTwo_8x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_8x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_8x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_8x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_8x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_8x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_8x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_8x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_8x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_8x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 9: switch len(out) { case 1: mulAvxTwo_9x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_9x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_9x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_9x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_9x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_9x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_9x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_9x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_9x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_9x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 10: switch len(out) { case 1: mulAvxTwo_10x1_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_10x2_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_10x3_64(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_10x4(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_10x5(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_10x6(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_10x7(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_10x8(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_10x9(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_10x10(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } } panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) } -func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { - n := (stop - start) & avxSizeMask +func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } switch len(in) { case 1: switch len(out) { case 1: mulAvxTwo_1x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_1x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_1x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_1x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_1x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_1x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_1x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_1x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_1x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_1x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 2: switch len(out) { case 1: mulAvxTwo_2x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_2x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_2x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_2x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_2x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_2x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_2x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_2x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_2x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_2x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 3: switch len(out) { case 1: mulAvxTwo_3x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_3x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_3x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_3x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_3x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_3x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_3x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_3x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_3x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_3x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 4: switch len(out) { case 1: mulAvxTwo_4x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_4x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_4x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_4x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_4x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_4x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_4x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_4x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_4x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_4x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 5: switch len(out) { case 1: mulAvxTwo_5x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_5x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_5x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_5x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_5x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_5x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_5x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_5x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_5x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_5x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 6: switch len(out) { case 1: mulAvxTwo_6x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_6x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_6x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_6x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_6x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_6x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_6x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_6x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_6x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_6x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 7: switch len(out) { case 1: mulAvxTwo_7x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_7x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_7x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_7x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_7x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_7x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_7x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_7x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_7x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_7x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 8: switch len(out) { case 1: mulAvxTwo_8x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_8x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_8x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_8x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_8x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_8x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_8x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_8x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_8x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_8x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 9: switch len(out) { case 1: mulAvxTwo_9x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_9x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_9x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_9x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_9x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_9x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_9x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_9x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_9x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_9x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } case 10: switch len(out) { case 1: mulAvxTwo_10x1_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 2: mulAvxTwo_10x2_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 3: mulAvxTwo_10x3_64Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 63) case 4: mulAvxTwo_10x4Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 5: mulAvxTwo_10x5Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 6: mulAvxTwo_10x6Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 7: mulAvxTwo_10x7Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 8: mulAvxTwo_10x8Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 9: mulAvxTwo_10x9Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) case 10: mulAvxTwo_10x10Xor(matrix, in, out, start, n) - return n + return n & (maxInt - 31) } } panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) } func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { - n := (stop - start) & avxSizeMask + n := (stop - start) & (maxInt - (64 - 1)) + + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } switch len(in) { case 1: @@ -1032,7 +1075,12 @@ func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { } func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { - n := (stop - start) & avxSizeMask + n := (stop - start) & (maxInt - (64 - 1)) + + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } switch len(in) { case 1: @@ -1368,3 +1416,689 @@ func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int } panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) } + +func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { + n := (stop - start) & (maxInt - (32 - 1)) + + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + + switch len(in) { + case 1: + switch len(out) { + case 1: + mulAvxGFNI_1x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_1x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_1x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_1x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_1x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_1x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_1x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_1x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_1x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_1x10(matrix, in, out, start, n) + return n + } + case 2: + switch len(out) { + case 1: + mulAvxGFNI_2x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_2x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_2x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_2x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_2x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_2x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_2x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_2x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_2x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_2x10(matrix, in, out, start, n) + return n + } + case 3: + switch len(out) { + case 1: + mulAvxGFNI_3x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_3x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_3x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_3x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_3x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_3x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_3x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_3x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_3x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_3x10(matrix, in, out, start, n) + return n + } + case 4: + switch len(out) { + case 1: + mulAvxGFNI_4x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_4x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_4x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_4x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_4x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_4x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_4x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_4x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_4x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_4x10(matrix, in, out, start, n) + return n + } + case 5: + switch len(out) { + case 1: + mulAvxGFNI_5x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_5x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_5x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_5x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_5x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_5x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_5x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_5x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_5x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_5x10(matrix, in, out, start, n) + return n + } + case 6: + switch len(out) { + case 1: + mulAvxGFNI_6x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_6x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_6x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_6x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_6x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_6x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_6x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_6x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_6x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_6x10(matrix, in, out, start, n) + return n + } + case 7: + switch len(out) { + case 1: + mulAvxGFNI_7x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_7x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_7x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_7x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_7x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_7x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_7x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_7x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_7x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_7x10(matrix, in, out, start, n) + return n + } + case 8: + switch len(out) { + case 1: + mulAvxGFNI_8x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_8x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_8x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_8x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_8x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_8x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_8x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_8x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_8x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_8x10(matrix, in, out, start, n) + return n + } + case 9: + switch len(out) { + case 1: + mulAvxGFNI_9x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_9x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_9x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_9x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_9x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_9x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_9x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_9x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_9x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_9x10(matrix, in, out, start, n) + return n + } + case 10: + switch len(out) { + case 1: + mulAvxGFNI_10x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_10x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_10x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_10x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_10x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_10x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_10x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_10x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_10x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_10x10(matrix, in, out, start, n) + return n + } + } + panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) +} + +func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { + n := (stop - start) & (maxInt - (32 - 1)) + + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + + switch len(in) { + case 1: + switch len(out) { + case 1: + mulAvxGFNI_1x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_1x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_1x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_1x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_1x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_1x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_1x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_1x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_1x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_1x10Xor(matrix, in, out, start, n) + return n + } + case 2: + switch len(out) { + case 1: + mulAvxGFNI_2x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_2x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_2x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_2x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_2x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_2x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_2x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_2x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_2x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_2x10Xor(matrix, in, out, start, n) + return n + } + case 3: + switch len(out) { + case 1: + mulAvxGFNI_3x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_3x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_3x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_3x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_3x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_3x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_3x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_3x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_3x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_3x10Xor(matrix, in, out, start, n) + return n + } + case 4: + switch len(out) { + case 1: + mulAvxGFNI_4x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_4x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_4x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_4x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_4x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_4x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_4x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_4x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_4x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_4x10Xor(matrix, in, out, start, n) + return n + } + case 5: + switch len(out) { + case 1: + mulAvxGFNI_5x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_5x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_5x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_5x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_5x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_5x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_5x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_5x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_5x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_5x10Xor(matrix, in, out, start, n) + return n + } + case 6: + switch len(out) { + case 1: + mulAvxGFNI_6x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_6x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_6x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_6x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_6x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_6x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_6x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_6x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_6x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_6x10Xor(matrix, in, out, start, n) + return n + } + case 7: + switch len(out) { + case 1: + mulAvxGFNI_7x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_7x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_7x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_7x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_7x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_7x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_7x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_7x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_7x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_7x10Xor(matrix, in, out, start, n) + return n + } + case 8: + switch len(out) { + case 1: + mulAvxGFNI_8x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_8x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_8x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_8x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_8x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_8x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_8x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_8x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_8x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_8x10Xor(matrix, in, out, start, n) + return n + } + case 9: + switch len(out) { + case 1: + mulAvxGFNI_9x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_9x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_9x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_9x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_9x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_9x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_9x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_9x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_9x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_9x10Xor(matrix, in, out, start, n) + return n + } + case 10: + switch len(out) { + case 1: + mulAvxGFNI_10x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_10x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_10x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_10x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_10x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_10x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_10x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_10x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_10x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_10x10Xor(matrix, in, out, start, n) + return n + } + } + panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_arm64.go new file mode 100644 index 00000000..656e0621 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_arm64.go @@ -0,0 +1,219 @@ +//go:build !appengine && !noasm && gc && !nogen && !nopshufb +// +build !appengine,!noasm,gc,!nogen,!nopshufb + +package reedsolomon + +import ( + "fmt" +) + +const ( + codeGen = true + codeGenMaxGoroutines = 16 + codeGenMaxInputs = 10 + codeGenMaxOutputs = 10 + minCodeGenSize = 64 +) + +var ( + fSve = galMulSlicesSve + fSveXor = galMulSlicesSveXor + fNeon = galMulSlicesNeon + fNeonXor = galMulSlicesNeonXor +) + +func (r *reedSolomon) hasCodeGen(byteCount int, inputs, outputs int) (_, _ *func(matrix []byte, in, out [][]byte, start, stop int) int, ok bool) { + if r.o.useSVE { + return &fSve, &fSveXor, codeGen && pshufb && + byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && + inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs + } + return &fNeon, &fNeonXor, codeGen && pshufb && r.o.useNEON && + byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && + inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs +} + +func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(matrix []uint64, in, out [][]byte, start, stop int) int, ok bool) { + return nil, nil, false +} + +// galMulSlicesSve +func galMulSlicesSve(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } + // fmt.Println(len(in), len(out)) + switch len(out) { + case 1: + mulSve_10x1_64(matrix, in, out, start, n) + return n & (maxInt - 63) + case 2: + mulSve_10x2_64(matrix, in, out, start, n) + return n & (maxInt - 63) + case 3: + mulSve_10x3_64(matrix, in, out, start, n) + return n & (maxInt - 63) + case 4: + mulSve_10x4(matrix, in, out, start, n) + return n & (maxInt - 31) + case 5: + mulSve_10x5(matrix, in, out, start, n) + return n & (maxInt - 31) + case 6: + mulSve_10x6(matrix, in, out, start, n) + return n & (maxInt - 31) + case 7: + mulSve_10x7(matrix, in, out, start, n) + return n & (maxInt - 31) + case 8: + mulSve_10x8(matrix, in, out, start, n) + return n & (maxInt - 31) + case 9: + mulSve_10x9(matrix, in, out, start, n) + return n & (maxInt - 31) + case 10: + mulSve_10x10(matrix, in, out, start, n) + return n & (maxInt - 31) + } + panic(fmt.Sprintf("ARM SVE: unhandled size: %dx%d", len(in), len(out))) +} + +// galMulSlicesSveXor +func galMulSlicesSveXor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = (stop - start) + + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } + + switch len(out) { + case 1: + mulSve_10x1_64Xor(matrix, in, out, start, n) + return n & (maxInt - 63) + case 2: + mulSve_10x2_64Xor(matrix, in, out, start, n) + return n & (maxInt - 63) + case 3: + mulSve_10x3_64Xor(matrix, in, out, start, n) + return n & (maxInt - 63) + case 4: + mulSve_10x4Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 5: + mulSve_10x5Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 6: + mulSve_10x6Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 7: + mulSve_10x7Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 8: + mulSve_10x8Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 9: + mulSve_10x9Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 10: + mulSve_10x10Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + } + panic(fmt.Sprintf("ARM SVE: unhandled size: %dx%d", len(in), len(out))) +} + +// galMulSlicesNeon +func galMulSlicesNeon(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = stop - start + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } + + switch len(out) { + case 1: + mulNeon_10x1_64(matrix, in, out, start, n) + return n & (maxInt - 63) + case 2: + mulNeon_10x2_64(matrix, in, out, start, n) + return n & (maxInt - 63) + case 3: + mulNeon_10x3_64(matrix, in, out, start, n) + return n & (maxInt - 63) + case 4: + mulNeon_10x4(matrix, in, out, start, n) + return n & (maxInt - 31) + case 5: + mulNeon_10x5(matrix, in, out, start, n) + return n & (maxInt - 31) + case 6: + mulNeon_10x6(matrix, in, out, start, n) + return n & (maxInt - 31) + case 7: + mulNeon_10x7(matrix, in, out, start, n) + return n & (maxInt - 31) + case 8: + mulNeon_10x8(matrix, in, out, start, n) + return n & (maxInt - 31) + case 9: + mulNeon_10x9(matrix, in, out, start, n) + return n & (maxInt - 31) + case 10: + mulNeon_10x10(matrix, in, out, start, n) + return n & (maxInt - 31) + } + panic(fmt.Sprintf("ARM NEON: unhandled size: %dx%d", len(in), len(out))) +} + +// galMulSlicesNeonXor +func galMulSlicesNeonXor(matrix []byte, in, out [][]byte, start, stop int) (n int) { + n = (stop - start) + if raceEnabled { + defer func() { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + }() + } + switch len(out) { + case 1: + mulNeon_10x1_64Xor(matrix, in, out, start, n) + return n & (maxInt - 63) + case 2: + mulNeon_10x2_64Xor(matrix, in, out, start, n) + return n & (maxInt - 63) + case 3: + mulNeon_10x3_64Xor(matrix, in, out, start, n) + return n & (maxInt - 63) + case 4: + mulNeon_10x4Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 5: + mulNeon_10x5Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 6: + mulNeon_10x6Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 7: + mulNeon_10x7Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 8: + mulNeon_10x8Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 9: + mulNeon_10x9Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + case 10: + mulNeon_10x10Xor(matrix, in, out, start, n) + return n & (maxInt - 31) + } + panic(fmt.Sprintf("ARM NEON: unhandled size: %dx%d", len(in), len(out))) +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go new file mode 100644 index 00000000..3ac349d3 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go @@ -0,0 +1,1415 @@ +// Code generated by command: go generate gen.go. DO NOT EDIT. + +//go:build !appengine && !noasm && gc && !nogen && nopshufb +// +build !appengine,!noasm,gc,!nogen,nopshufb + +package reedsolomon + +import ( + "fmt" +) + +const ( + codeGen = true + codeGenMaxGoroutines = 8 + codeGenMaxInputs = 10 + codeGenMaxOutputs = 10 + minCodeGenSize = 64 +) + +var ( + fGFNI = galMulSlicesGFNI + fGFNIXor = galMulSlicesGFNIXor + fAvxGFNI = galMulSlicesAvxGFNI + fAvxGFNIXor = galMulSlicesAvxGFNIXor +) + +func (r *reedSolomon) hasCodeGen(byteCount int, inputs, outputs int) (_, _ *func(matrix []byte, in, out [][]byte, start, stop int) int, ok bool) { + return nil, nil, false // no code generation for generic case (only GFNI cases) +} + +func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(matrix []uint64, in, out [][]byte, start, stop int) int, ok bool) { + if r.o.useAvx512GFNI { + return &fGFNI, &fGFNIXor, codeGen && + byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && + inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs + } + return &fAvxGFNI, &fAvxGFNIXor, codeGen && r.o.useAvxGNFI && + byteCount >= codeGenMinSize && inputs+outputs >= codeGenMinShards && + inputs <= codeGenMaxInputs && outputs <= codeGenMaxOutputs +} + +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { panic(`no pshufb`) } +func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int { panic(`no pshufb`) } + +func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { + n := (stop - start) & (maxInt - (64 - 1)) + + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + + switch len(in) { + case 1: + switch len(out) { + case 1: + mulGFNI_1x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_1x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_1x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_1x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_1x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_1x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_1x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_1x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_1x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_1x10_64(matrix, in, out, start, n) + return n + } + case 2: + switch len(out) { + case 1: + mulGFNI_2x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_2x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_2x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_2x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_2x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_2x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_2x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_2x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_2x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_2x10_64(matrix, in, out, start, n) + return n + } + case 3: + switch len(out) { + case 1: + mulGFNI_3x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_3x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_3x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_3x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_3x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_3x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_3x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_3x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_3x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_3x10_64(matrix, in, out, start, n) + return n + } + case 4: + switch len(out) { + case 1: + mulGFNI_4x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_4x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_4x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_4x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_4x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_4x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_4x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_4x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_4x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_4x10_64(matrix, in, out, start, n) + return n + } + case 5: + switch len(out) { + case 1: + mulGFNI_5x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_5x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_5x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_5x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_5x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_5x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_5x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_5x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_5x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_5x10_64(matrix, in, out, start, n) + return n + } + case 6: + switch len(out) { + case 1: + mulGFNI_6x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_6x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_6x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_6x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_6x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_6x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_6x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_6x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_6x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_6x10_64(matrix, in, out, start, n) + return n + } + case 7: + switch len(out) { + case 1: + mulGFNI_7x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_7x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_7x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_7x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_7x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_7x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_7x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_7x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_7x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_7x10_64(matrix, in, out, start, n) + return n + } + case 8: + switch len(out) { + case 1: + mulGFNI_8x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_8x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_8x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_8x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_8x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_8x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_8x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_8x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_8x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_8x10_64(matrix, in, out, start, n) + return n + } + case 9: + switch len(out) { + case 1: + mulGFNI_9x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_9x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_9x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_9x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_9x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_9x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_9x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_9x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_9x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_9x10_64(matrix, in, out, start, n) + return n + } + case 10: + switch len(out) { + case 1: + mulGFNI_10x1_64(matrix, in, out, start, n) + return n + case 2: + mulGFNI_10x2_64(matrix, in, out, start, n) + return n + case 3: + mulGFNI_10x3_64(matrix, in, out, start, n) + return n + case 4: + mulGFNI_10x4_64(matrix, in, out, start, n) + return n + case 5: + mulGFNI_10x5_64(matrix, in, out, start, n) + return n + case 6: + mulGFNI_10x6_64(matrix, in, out, start, n) + return n + case 7: + mulGFNI_10x7_64(matrix, in, out, start, n) + return n + case 8: + mulGFNI_10x8_64(matrix, in, out, start, n) + return n + case 9: + mulGFNI_10x9_64(matrix, in, out, start, n) + return n + case 10: + mulGFNI_10x10_64(matrix, in, out, start, n) + return n + } + } + panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) +} + +func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { + n := (stop - start) & (maxInt - (64 - 1)) + + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + + switch len(in) { + case 1: + switch len(out) { + case 1: + mulGFNI_1x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_1x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_1x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_1x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_1x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_1x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_1x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_1x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_1x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_1x10_64Xor(matrix, in, out, start, n) + return n + } + case 2: + switch len(out) { + case 1: + mulGFNI_2x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_2x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_2x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_2x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_2x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_2x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_2x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_2x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_2x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_2x10_64Xor(matrix, in, out, start, n) + return n + } + case 3: + switch len(out) { + case 1: + mulGFNI_3x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_3x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_3x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_3x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_3x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_3x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_3x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_3x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_3x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_3x10_64Xor(matrix, in, out, start, n) + return n + } + case 4: + switch len(out) { + case 1: + mulGFNI_4x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_4x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_4x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_4x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_4x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_4x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_4x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_4x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_4x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_4x10_64Xor(matrix, in, out, start, n) + return n + } + case 5: + switch len(out) { + case 1: + mulGFNI_5x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_5x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_5x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_5x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_5x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_5x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_5x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_5x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_5x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_5x10_64Xor(matrix, in, out, start, n) + return n + } + case 6: + switch len(out) { + case 1: + mulGFNI_6x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_6x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_6x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_6x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_6x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_6x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_6x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_6x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_6x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_6x10_64Xor(matrix, in, out, start, n) + return n + } + case 7: + switch len(out) { + case 1: + mulGFNI_7x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_7x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_7x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_7x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_7x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_7x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_7x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_7x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_7x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_7x10_64Xor(matrix, in, out, start, n) + return n + } + case 8: + switch len(out) { + case 1: + mulGFNI_8x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_8x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_8x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_8x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_8x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_8x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_8x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_8x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_8x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_8x10_64Xor(matrix, in, out, start, n) + return n + } + case 9: + switch len(out) { + case 1: + mulGFNI_9x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_9x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_9x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_9x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_9x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_9x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_9x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_9x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_9x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_9x10_64Xor(matrix, in, out, start, n) + return n + } + case 10: + switch len(out) { + case 1: + mulGFNI_10x1_64Xor(matrix, in, out, start, n) + return n + case 2: + mulGFNI_10x2_64Xor(matrix, in, out, start, n) + return n + case 3: + mulGFNI_10x3_64Xor(matrix, in, out, start, n) + return n + case 4: + mulGFNI_10x4_64Xor(matrix, in, out, start, n) + return n + case 5: + mulGFNI_10x5_64Xor(matrix, in, out, start, n) + return n + case 6: + mulGFNI_10x6_64Xor(matrix, in, out, start, n) + return n + case 7: + mulGFNI_10x7_64Xor(matrix, in, out, start, n) + return n + case 8: + mulGFNI_10x8_64Xor(matrix, in, out, start, n) + return n + case 9: + mulGFNI_10x9_64Xor(matrix, in, out, start, n) + return n + case 10: + mulGFNI_10x10_64Xor(matrix, in, out, start, n) + return n + } + } + panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) +} + +func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int { + n := (stop - start) & (maxInt - (32 - 1)) + + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + + switch len(in) { + case 1: + switch len(out) { + case 1: + mulAvxGFNI_1x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_1x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_1x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_1x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_1x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_1x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_1x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_1x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_1x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_1x10(matrix, in, out, start, n) + return n + } + case 2: + switch len(out) { + case 1: + mulAvxGFNI_2x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_2x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_2x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_2x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_2x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_2x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_2x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_2x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_2x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_2x10(matrix, in, out, start, n) + return n + } + case 3: + switch len(out) { + case 1: + mulAvxGFNI_3x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_3x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_3x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_3x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_3x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_3x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_3x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_3x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_3x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_3x10(matrix, in, out, start, n) + return n + } + case 4: + switch len(out) { + case 1: + mulAvxGFNI_4x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_4x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_4x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_4x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_4x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_4x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_4x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_4x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_4x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_4x10(matrix, in, out, start, n) + return n + } + case 5: + switch len(out) { + case 1: + mulAvxGFNI_5x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_5x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_5x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_5x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_5x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_5x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_5x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_5x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_5x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_5x10(matrix, in, out, start, n) + return n + } + case 6: + switch len(out) { + case 1: + mulAvxGFNI_6x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_6x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_6x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_6x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_6x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_6x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_6x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_6x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_6x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_6x10(matrix, in, out, start, n) + return n + } + case 7: + switch len(out) { + case 1: + mulAvxGFNI_7x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_7x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_7x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_7x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_7x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_7x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_7x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_7x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_7x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_7x10(matrix, in, out, start, n) + return n + } + case 8: + switch len(out) { + case 1: + mulAvxGFNI_8x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_8x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_8x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_8x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_8x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_8x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_8x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_8x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_8x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_8x10(matrix, in, out, start, n) + return n + } + case 9: + switch len(out) { + case 1: + mulAvxGFNI_9x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_9x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_9x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_9x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_9x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_9x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_9x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_9x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_9x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_9x10(matrix, in, out, start, n) + return n + } + case 10: + switch len(out) { + case 1: + mulAvxGFNI_10x1(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_10x2(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_10x3(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_10x4(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_10x5(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_10x6(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_10x7(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_10x8(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_10x9(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_10x10(matrix, in, out, start, n) + return n + } + } + panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) +} + +func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int { + n := (stop - start) & (maxInt - (32 - 1)) + + if raceEnabled { + raceReadSlices(in, start, n) + raceWriteSlices(out, start, n) + } + + switch len(in) { + case 1: + switch len(out) { + case 1: + mulAvxGFNI_1x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_1x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_1x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_1x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_1x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_1x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_1x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_1x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_1x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_1x10Xor(matrix, in, out, start, n) + return n + } + case 2: + switch len(out) { + case 1: + mulAvxGFNI_2x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_2x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_2x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_2x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_2x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_2x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_2x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_2x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_2x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_2x10Xor(matrix, in, out, start, n) + return n + } + case 3: + switch len(out) { + case 1: + mulAvxGFNI_3x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_3x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_3x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_3x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_3x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_3x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_3x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_3x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_3x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_3x10Xor(matrix, in, out, start, n) + return n + } + case 4: + switch len(out) { + case 1: + mulAvxGFNI_4x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_4x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_4x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_4x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_4x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_4x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_4x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_4x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_4x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_4x10Xor(matrix, in, out, start, n) + return n + } + case 5: + switch len(out) { + case 1: + mulAvxGFNI_5x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_5x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_5x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_5x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_5x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_5x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_5x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_5x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_5x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_5x10Xor(matrix, in, out, start, n) + return n + } + case 6: + switch len(out) { + case 1: + mulAvxGFNI_6x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_6x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_6x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_6x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_6x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_6x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_6x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_6x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_6x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_6x10Xor(matrix, in, out, start, n) + return n + } + case 7: + switch len(out) { + case 1: + mulAvxGFNI_7x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_7x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_7x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_7x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_7x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_7x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_7x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_7x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_7x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_7x10Xor(matrix, in, out, start, n) + return n + } + case 8: + switch len(out) { + case 1: + mulAvxGFNI_8x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_8x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_8x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_8x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_8x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_8x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_8x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_8x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_8x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_8x10Xor(matrix, in, out, start, n) + return n + } + case 9: + switch len(out) { + case 1: + mulAvxGFNI_9x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_9x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_9x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_9x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_9x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_9x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_9x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_9x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_9x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_9x10Xor(matrix, in, out, start, n) + return n + } + case 10: + switch len(out) { + case 1: + mulAvxGFNI_10x1Xor(matrix, in, out, start, n) + return n + case 2: + mulAvxGFNI_10x2Xor(matrix, in, out, start, n) + return n + case 3: + mulAvxGFNI_10x3Xor(matrix, in, out, start, n) + return n + case 4: + mulAvxGFNI_10x4Xor(matrix, in, out, start, n) + return n + case 5: + mulAvxGFNI_10x5Xor(matrix, in, out, start, n) + return n + case 6: + mulAvxGFNI_10x6Xor(matrix, in, out, start, n) + return n + case 7: + mulAvxGFNI_10x7Xor(matrix, in, out, start, n) + return n + case 8: + mulAvxGFNI_10x8Xor(matrix, in, out, start, n) + return n + case 9: + mulAvxGFNI_10x9Xor(matrix, in, out, start, n) + return n + case 10: + mulAvxGFNI_10x10Xor(matrix, in, out, start, n) + return n + } + } + panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_arm64.go new file mode 100644 index 00000000..db2aaa61 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_arm64.go @@ -0,0 +1,22 @@ +// Code generated by command: go generate gen.go. DO NOT EDIT. + +//go:build !appengine && !noasm && gc && !nogen && nopshufb +// +build !appengine,!noasm,gc,!nogen,nopshufb + +package reedsolomon + +const ( + codeGen = false + codeGenMaxGoroutines = 16 + codeGenMaxInputs = 10 + codeGenMaxOutputs = 10 + minCodeGenSize = 64 +) + +func (r *reedSolomon) hasCodeGen(byteCount int, inputs, outputs int) (_, _ *func(matrix []byte, in, out [][]byte, start, stop int) int, ok bool) { + return nil, nil, false +} + +func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) (_, _ *func(matrix []uint64, in, out [][]byte, start, stop int) int, ok bool) { + return nil, nil, false +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go index 9043601a..fb5a3b65 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go @@ -1,12 +1,11 @@ -//go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo) && (!ppc64le || noasm || appengine || gccgo) -// +build !amd64 noasm appengine gccgo -// +build !arm64 noasm appengine gccgo -// +build !ppc64le noasm appengine gccgo +//go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo || nopshufb) && (!ppc64le || noasm || appengine || gccgo || nopshufb) // Copyright 2015, Klaus Post, see LICENSE for details. package reedsolomon +const pshufb = false + func galMulSlice(c byte, in, out []byte, o *options) { out = out[:len(in)] if c == 1 { @@ -31,11 +30,6 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } } -// simple slice xor -func sliceXor(in, out []byte, o *options) { - sliceXorGo(in, out, o) -} - func init() { defaultOptions.useAVX512 = false } diff --git a/vendor/github.com/klauspost/reedsolomon/galois_nopshufb_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_nopshufb_amd64.go new file mode 100644 index 00000000..89c74e24 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_nopshufb_amd64.go @@ -0,0 +1,146 @@ +// Copyright 2015, Klaus Post, see LICENSE for details + +//go:build nopshufb && !noasm + +package reedsolomon + +// bigSwitchover is the size where 64 bytes are processed per loop. +const bigSwitchover = 128 + +const pshufb = false + +// simple slice xor +func sliceXor(in, out []byte, o *options) { + if o.useSSE2 { + if len(in) >= bigSwitchover { + if o.useAVX2 { + avx2XorSlice_64(in, out) + done := (len(in) >> 6) << 6 + in = in[done:] + out = out[done:] + } else { + sSE2XorSlice_64(in, out) + done := (len(in) >> 6) << 6 + in = in[done:] + out = out[done:] + } + } + if len(in) >= 16 { + sSE2XorSlice(in, out) + done := (len(in) >> 4) << 4 + in = in[done:] + out = out[done:] + } + } else { + sliceXorGo(in, out, o) + return + } + out = out[:len(in)] + for i := range in { + out[i] ^= in[i] + } +} + +func galMulSlice(c byte, in, out []byte, o *options) { + out = out[:len(in)] + if c == 1 { + copy(out, in) + return + } + mt := mulTable[c][:256] + for len(in) >= 4 { + ii := (*[4]byte)(in) + oo := (*[4]byte)(out) + oo[0] = mt[ii[0]] + oo[1] = mt[ii[1]] + oo[2] = mt[ii[2]] + oo[3] = mt[ii[3]] + in = in[4:] + out = out[4:] + } + for n, input := range in { + out[n] = mt[input] + } +} + +func galMulSliceXor(c byte, in, out []byte, o *options) { + out = out[:len(in)] + if c == 1 { + sliceXor(in, out, o) + return + } + mt := mulTable[c][:256] + for len(in) >= 4 { + ii := (*[4]byte)(in) + oo := (*[4]byte)(out) + oo[0] ^= mt[ii[0]] + oo[1] ^= mt[ii[1]] + oo[2] ^= mt[ii[2]] + oo[3] ^= mt[ii[3]] + in = in[4:] + out = out[4:] + } + for n, input := range in { + out[n] ^= mt[input] + } +} + +func init() { + defaultOptions.useAVX512 = false +} + +// 4-way butterfly +func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) { + ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o) +} + +// 4-way butterfly +func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) { + ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o) +} + +// 4-way butterfly +func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) { + fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o) +} + +// 4-way butterfly +func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) { + fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o) +} + +// 2-way butterfly forward +func fftDIT2(x, y []byte, log_m ffe, o *options) { + // Reference version: + refMulAdd(x, y, log_m) + sliceXor(x, y, o) +} + +// 2-way butterfly forward +func fftDIT28(x, y []byte, log_m ffe8, o *options) { + // Reference version: + refMulAdd8(x, y, log_m) + sliceXor(x, y, o) +} + +// 2-way butterfly inverse +func ifftDIT2(x, y []byte, log_m ffe, o *options) { + // Reference version: + sliceXor(x, y, o) + refMulAdd(x, y, log_m) +} + +// 2-way butterfly inverse +func ifftDIT28(x, y []byte, log_m ffe8, o *options) { + // Reference version: + sliceXor(x, y, o) + refMulAdd8(x, y, log_m) +} + +func mulgf16(x, y []byte, log_m ffe, o *options) { + refMul(x, y, log_m) +} + +func mulgf8(x, y []byte, log_m ffe8, o *options) { + refMul8(x, y, log_m) +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go deleted file mode 100644 index e67905b1..00000000 --- a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go +++ /dev/null @@ -1,14 +0,0 @@ -//go:build !amd64 || noasm || appengine || gccgo -// +build !amd64 noasm appengine gccgo - -// Copyright 2020, Klaus Post, see LICENSE for details. - -package reedsolomon - -func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, byteCount int) { - panic("codeSomeShardsAvx512 should not be called if built without asm") -} - -func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, byteCount int) { - panic("codeSomeShardsAvx512P should not be called if built without asm") -} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go index 8cd7b52b..c4c80351 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go +++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go @@ -1,11 +1,12 @@ -//go:build !noasm && !appengine && !gccgo -// +build !noasm,!appengine,!gccgo +//go:build !noasm && !appengine && !gccgo && !nopshufb // Copyright 2015, Klaus Post, see LICENSE for details. // Copyright 2018, Minio, Inc. package reedsolomon +const pshufb = true + //go:noescape func galMulPpc(low, high, in, out []byte) @@ -66,11 +67,6 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } } -// slice galois add -func sliceXor(in, out []byte, o *options) { - sliceXorGo(in, out, o) -} - // 4-way butterfly func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) { ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o) diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s index 7213c61b..c585c2b6 100644 --- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s +++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s @@ -1,6 +1,7 @@ //+build !noasm //+build !appengine //+build !gccgo +//+build !pshufb // Copyright 2015, Klaus Post, see LICENSE for details. // Copyright 2018, Minio, Inc. diff --git a/vendor/github.com/klauspost/reedsolomon/leopard.go b/vendor/github.com/klauspost/reedsolomon/leopard.go index 16bec4b9..adf72c8f 100644 --- a/vendor/github.com/klauspost/reedsolomon/leopard.go +++ b/vendor/github.com/klauspost/reedsolomon/leopard.go @@ -303,7 +303,7 @@ func (r *leopardFF16) Split(data []byte) ([][]byte, error) { // Copy partial shards copyFrom := data[perShard*fullShards : dataLen] for i := range padding { - if len(copyFrom) <= 0 { + if len(copyFrom) == 0 { break } copyFrom = copyFrom[copy(padding[i], copyFrom):] @@ -333,7 +333,10 @@ func (r *leopardFF16) Split(data []byte) ([][]byte, error) { } func (r *leopardFF16) ReconstructSome(shards [][]byte, required []bool) error { - return r.ReconstructData(shards) + if len(required) == r.totalShards { + return r.reconstruct(shards, true) + } + return r.reconstruct(shards, false) } func (r *leopardFF16) Reconstruct(shards [][]byte) error { @@ -448,13 +451,13 @@ func (r *leopardFF16) reconstruct(shards [][]byte, recoverAll bool) error { } // Evaluate error locator polynomial - fwht(&errLocs, order, m+r.dataShards) + fwht(&errLocs, m+r.dataShards) for i := 0; i < order; i++ { errLocs[i] = ffe((uint(errLocs[i]) * uint(logWalsh[i])) % modulus) } - fwht(&errLocs, order, order) + fwht(&errLocs, order) var work [][]byte if w, ok := r.workPool.Get().([][]byte); ok { @@ -860,11 +863,11 @@ func ceilPow2(n int) int { // Decimation in time (DIT) Fast Walsh-Hadamard Transform // Unrolls pairs of layers to perform cross-layer operations in registers // mtrunc: Number of elements that are non-zero at the front of data -func fwht(data *[order]ffe, m, mtrunc int) { +func fwht(data *[order]ffe, mtrunc int) { // Decimation in time: Unroll 2 layers at a time dist := 1 dist4 := 4 - for dist4 <= m { + for dist4 <= order { // For each set of dist*4 elements: for r := 0; r < mtrunc; r += dist4 { // For each set of dist elements: @@ -895,14 +898,6 @@ func fwht(data *[order]ffe, m, mtrunc int) { dist = dist4 dist4 <<= 2 } - - // If there is one layer left: - if dist < m { - dist := uint16(dist) - for i := uint16(0); i < dist; i++ { - fwht2(&data[i], &data[i+dist]) - } - } } func fwht4(data []ffe, s int) { @@ -1033,7 +1028,7 @@ func initFFTSkew() { } logWalsh[0] = 0 - fwht(logWalsh, order, order) + fwht(logWalsh, order) } func initMul16LUT() { diff --git a/vendor/github.com/klauspost/reedsolomon/leopard8.go b/vendor/github.com/klauspost/reedsolomon/leopard8.go index 31c97ea3..cd0a23ee 100644 --- a/vendor/github.com/klauspost/reedsolomon/leopard8.go +++ b/vendor/github.com/klauspost/reedsolomon/leopard8.go @@ -344,7 +344,7 @@ func (r *leopardFF8) Split(data []byte) ([][]byte, error) { // Copy partial shards copyFrom := data[perShard*fullShards : dataLen] for i := range padding { - if len(copyFrom) <= 0 { + if len(copyFrom) == 0 { break } copyFrom = copyFrom[copy(padding[i], copyFrom):] @@ -369,7 +369,10 @@ func (r *leopardFF8) Split(data []byte) ([][]byte, error) { } func (r *leopardFF8) ReconstructSome(shards [][]byte, required []bool) error { - return r.ReconstructData(shards) + if len(required) == r.totalShards { + return r.reconstruct(shards, true) + } + return r.reconstruct(shards, false) } func (r *leopardFF8) Reconstruct(shards [][]byte) error { @@ -506,13 +509,13 @@ func (r *leopardFF8) reconstruct(shards [][]byte, recoverAll bool) error { } // Evaluate error locator polynomial8 - fwht8(&errLocs, order8, m+r.dataShards) + fwht8(&errLocs, m+r.dataShards) for i := 0; i < order8; i++ { errLocs[i] = ffe8((uint(errLocs[i]) * uint(logWalsh8[i])) % modulus8) } - fwht8(&errLocs, order8, order8) + fwht8(&errLocs, order8) if r.inversion != nil { c := leopardGF8cache{ @@ -940,11 +943,11 @@ func subMod8(a, b ffe8) ffe8 { // Decimation in time (DIT) Fast Walsh-Hadamard Transform // Unrolls pairs of layers to perform cross-layer operations in registers // mtrunc: Number of elements that are non-zero at the front of data -func fwht8(data *[order8]ffe8, m, mtrunc int) { +func fwht8(data *[order8]ffe8, mtrunc int) { // Decimation in time: Unroll 2 layers at a time dist := 1 dist4 := 4 - for dist4 <= m { + for dist4 <= order8 { // For each set of dist*4 elements: for r := 0; r < mtrunc; r += dist4 { // For each set of dist elements: @@ -975,14 +978,6 @@ func fwht8(data *[order8]ffe8, m, mtrunc int) { dist = dist4 dist4 <<= 2 } - - // If there is one layer left: - if dist < m { - dist := uint16(dist) - for i := uint16(0); i < dist; i++ { - fwht28(&data[i], &data[i+dist]) - } - } } func fwht48(data []ffe8, s int) { @@ -1110,7 +1105,7 @@ func initFFTSkew8() { } logWalsh8[0] = 0 - fwht8(logWalsh8, order8, order8) + fwht8(logWalsh8, order8) } func initMul8LUT() { diff --git a/vendor/github.com/klauspost/reedsolomon/matrix.go b/vendor/github.com/klauspost/reedsolomon/matrix.go index 497a3d9a..bfdcca66 100644 --- a/vendor/github.com/klauspost/reedsolomon/matrix.go +++ b/vendor/github.com/klauspost/reedsolomon/matrix.go @@ -67,11 +67,11 @@ var errColSizeMismatch = errors.New("column size is not the same for all rows") func (m matrix) Check() error { rows := len(m) - if rows <= 0 { + if rows == 0 { return errInvalidRowSize } cols := len(m[0]) - if cols <= 0 { + if cols == 0 { return errInvalidColSize } @@ -231,7 +231,7 @@ func (m matrix) gaussianElimination() error { } // Scale to 1. if m[r][r] != 1 { - scale := galDivide(1, m[r][r]) + scale := galOneOver(m[r][r]) for c := 0; c < columns; c++ { m[r][c] = galMultiply(m[r][c], scale) } diff --git a/vendor/github.com/klauspost/reedsolomon/options.go b/vendor/github.com/klauspost/reedsolomon/options.go index f74fe00f..cde25556 100644 --- a/vendor/github.com/klauspost/reedsolomon/options.go +++ b/vendor/github.com/klauspost/reedsolomon/options.go @@ -2,6 +2,7 @@ package reedsolomon import ( "runtime" + "strings" "github.com/klauspost/cpuid/v2" ) @@ -15,15 +16,24 @@ type options struct { shardSize int perRound int - useGFNI, useAVX512, useAVX2, useSSSE3, useSSE2 bool - useJerasureMatrix bool - usePAR1Matrix bool - useCauchy bool - fastOneParity bool - inversionCache bool - forcedInversionCache bool - customMatrix [][]byte - withLeopard leopardMode + useAvxGNFI, + useAvx512GFNI, + useAVX512, + useAVX2, + useSSSE3, + useSSE2, + useNEON, + useSVE bool + vectorLength int + + useJerasureMatrix bool + usePAR1Matrix bool + useCauchy bool + fastOneParity bool + inversionCache bool + forcedInversionCache bool + customMatrix [][]byte + withLeopard leopardMode // stream options concReads bool @@ -38,11 +48,15 @@ var defaultOptions = options{ inversionCache: true, // Detect CPU capabilities. - useSSSE3: cpuid.CPU.Supports(cpuid.SSSE3), - useSSE2: cpuid.CPU.Supports(cpuid.SSE2), - useAVX2: cpuid.CPU.Supports(cpuid.AVX2), - useAVX512: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512BW, cpuid.AVX512VL), - useGFNI: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.GFNI, cpuid.AVX512DQ), + useSSSE3: cpuid.CPU.Supports(cpuid.SSSE3), + useSSE2: cpuid.CPU.Supports(cpuid.SSE2), + useAVX2: cpuid.CPU.Supports(cpuid.AVX2), + useAVX512: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512BW, cpuid.AVX512VL), + useAvx512GFNI: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.GFNI, cpuid.AVX512DQ), + useAvxGNFI: cpuid.CPU.Supports(cpuid.AVX, cpuid.GFNI), + useNEON: cpuid.CPU.Supports(cpuid.ASIMD), + useSVE: cpuid.CPU.Supports(cpuid.SVE), + vectorLength: 32, // default vector length is 32 bytes (256 bits) for AVX2 code gen } // leopardMode controls the use of leopard GF in encoding and decoding. @@ -159,10 +173,14 @@ func WithSSSE3(enabled bool) Option { } // WithAVX2 allows to enable/disable AVX2 instructions. -// If not set, AVX2 will be turned on or off automatically based on CPU ID information. +// If not set, AVX will be turned on or off automatically based on CPU ID information. +// This will also disable AVX GFNI instructions. func WithAVX2(enabled bool) Option { return func(o *options) { o.useAVX2 = enabled + if o.useAvxGNFI { + o.useAvxGNFI = enabled + } } } @@ -178,7 +196,7 @@ func WithSSE2(enabled bool) Option { func WithAVX512(enabled bool) Option { return func(o *options) { o.useAVX512 = enabled - o.useGFNI = enabled + o.useAvx512GFNI = enabled } } @@ -186,7 +204,15 @@ func WithAVX512(enabled bool) Option { // If not set, GFNI will be turned on or off automatically based on CPU ID information. func WithGFNI(enabled bool) Option { return func(o *options) { - o.useGFNI = enabled + o.useAvx512GFNI = enabled + } +} + +// WithAVXGFNI allows to enable/disable GFNI with AVX instructions. +// If not set, GFNI will be turned on or off automatically based on CPU ID information. +func WithAVXGFNI(enabled bool) Option { + return func(o *options) { + o.useAvxGNFI = enabled } } @@ -275,3 +301,34 @@ func WithLeopardGF(enabled bool) Option { } } } + +func (o *options) cpuOptions() string { + var res []string + if o.useSSE2 { + res = append(res, "SSE2") + } + if o.useAVX2 { + res = append(res, "AVX2") + } + if o.useSSSE3 { + res = append(res, "SSSE3") + } + if o.useAVX512 { + res = append(res, "AVX512") + } + if o.useAvx512GFNI { + res = append(res, "AVX512+GFNI") + } + if o.useAvxGNFI { + res = append(res, "AVX+GFNI") + } + if o.useSVE { + res = append(res, "ARM+SVE") + } else if o.useNEON { + res = append(res, "ARM+NEON") + } + if len(res) == 0 { + return "pure Go" + } + return strings.Join(res, ",") +} diff --git a/vendor/github.com/klauspost/reedsolomon/race.go b/vendor/github.com/klauspost/reedsolomon/race.go new file mode 100644 index 00000000..4f2c0b69 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/race.go @@ -0,0 +1,61 @@ +// Copyright (c) 2024+ Klaus Post. See LICENSE for license + +//go:build race + +package reedsolomon + +import ( + "runtime" + "unsafe" +) + +const raceEnabled = true + +func raceReadSlice[T any](s []T) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) +} + +func raceWriteSlice[T any](s []T) { + if len(s) == 0 { + return + } + runtime.RaceWriteRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) +} + +func raceReadSlices[T any](s [][]T, start, n int) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) + for _, v := range s { + if len(v) == 0 { + continue + } + n := n + if n < 0 { + n = len(v) - start + } + runtime.RaceReadRange(unsafe.Pointer(&v[start]), n*int(unsafe.Sizeof(v[0]))) + } +} + +func raceWriteSlices[T any](s [][]T, start, n int) { + if len(s) == 0 { + return + } + runtime.RaceReadRange(unsafe.Pointer(&s[0]), len(s)*int(unsafe.Sizeof(s[0]))) + + for _, v := range s { + if len(v) == 0 { + continue + } + n := n + if n < 0 { + n = len(v) - start + } + runtime.RaceWriteRange(unsafe.Pointer(&v[start]), n*int(unsafe.Sizeof(v[0]))) + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/race_none.go b/vendor/github.com/klauspost/reedsolomon/race_none.go new file mode 100644 index 00000000..c7d05f28 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/race_none.go @@ -0,0 +1,17 @@ +// Copyright (c) 2024+ Klaus Post. See LICENSE for license + +//go:build !race + +package reedsolomon + +const raceEnabled = false + +func raceReadSlice[T any](s []T) { +} + +func raceWriteSlice[T any](s []T) { +} + +func raceReadSlices[T any](s [][]T, start, n int) {} + +func raceWriteSlices[T any](s [][]T, start, n int) {} diff --git a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go index 20e39748..443543f5 100644 --- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go +++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go @@ -77,11 +77,12 @@ type Encoder interface { // calling the Verify function is likely to fail. ReconstructData(shards [][]byte) error - // ReconstructSome will recreate only requested data shards, if possible. + // ReconstructSome will recreate only requested shards, if possible. // // Given a list of shards, some of which contain data, fills in the - // data shards indicated by true values in the "required" parameter. - // The length of "required" array must be equal to DataShards. + // shards indicated by true values in the "required" parameter. + // The length of the "required" array must be equal to either Shards or DataShards. + // If the length is equal to DataShards, the reconstruction of parity shards will be ignored. // // The length of "shards" array must be equal to Shards. // You indicate that a shard is missing by setting it to nil or zero-length. @@ -152,9 +153,8 @@ type Extensions interface { } const ( - avx2CodeGenMinSize = 64 - avx2CodeGenMinShards = 3 - avx2CodeGenMaxGoroutines = 8 + codeGenMinSize = 64 + codeGenMinShards = 3 gfniCodeGenMaxGoroutines = 4 intSize = 32 << (^uint(0) >> 63) // 32 or 64 @@ -283,7 +283,7 @@ func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) { // Multiply by the inverted matrix (same as vm.Multiply(vm[0:dataShards].Invert())) if vm[i][i] != 1 { // Make vm[i][i] = 1 by dividing the column by vm[i][i] - tmp := galDivide(1, vm[i][i]) + tmp := galOneOver(vm[i][i]) for j := 0; j < totalShards; j++ { vm[j][i] = galMultiply(vm[j][i], tmp) } @@ -303,7 +303,7 @@ func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) { for j := 0; j < dataShards; j++ { tmp := vm[dataShards][j] if tmp != 1 { - tmp = galDivide(1, tmp) + tmp = galOneOver(tmp) for i := dataShards; i < totalShards; i++ { vm[i][j] = galMultiply(vm[i][j], tmp) } @@ -314,7 +314,7 @@ func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) { for i := dataShards + 1; i < totalShards; i++ { tmp := vm[i][0] if tmp != 1 { - tmp = galDivide(1, tmp) + tmp = galOneOver(tmp) for j := 0; j < dataShards; j++ { vm[i][j] = galMultiply(vm[i][j], tmp) } @@ -481,21 +481,23 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) { r.o.perRound = 128 << 10 } + _, _, useCodeGen := r.hasCodeGen(codeGenMinSize, codeGenMaxInputs, codeGenMaxOutputs) + divide := parityShards + 1 - if avx2CodeGen && r.o.useAVX2 && (dataShards > maxAvx2Inputs || parityShards > maxAvx2Outputs) { + if codeGen && useCodeGen && (dataShards > codeGenMaxInputs || parityShards > codeGenMaxOutputs) { // Base on L1 cache if we have many inputs. r.o.perRound = cpuid.CPU.Cache.L1D if r.o.perRound < 32<<10 { r.o.perRound = 32 << 10 } divide = 0 - if dataShards > maxAvx2Inputs { - divide += maxAvx2Inputs + if dataShards > codeGenMaxInputs { + divide += codeGenMaxInputs } else { divide += dataShards } - if parityShards > maxAvx2Inputs { - divide += maxAvx2Outputs + if parityShards > codeGenMaxInputs { + divide += codeGenMaxOutputs } else { divide += parityShards } @@ -554,11 +556,11 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) { // Generated AVX2 does not need data to stay in L1 cache between runs. // We will be purely limited by RAM speed. - if r.canAVX2C(avx2CodeGenMinSize, maxAvx2Inputs, maxAvx2Outputs) && r.o.maxGoroutines > avx2CodeGenMaxGoroutines { - r.o.maxGoroutines = avx2CodeGenMaxGoroutines + if useCodeGen && r.o.maxGoroutines > codeGenMaxGoroutines { + r.o.maxGoroutines = codeGenMaxGoroutines } - if r.canGFNI(avx2CodeGenMinSize, maxAvx2Inputs, maxAvx2Outputs) && r.o.maxGoroutines > gfniCodeGenMaxGoroutines { + if _, _, useGFNI := r.canGFNI(codeGenMinSize, codeGenMaxInputs, codeGenMaxOutputs); useGFNI && r.o.maxGoroutines > gfniCodeGenMaxGoroutines { r.o.maxGoroutines = gfniCodeGenMaxGoroutines } @@ -576,7 +578,7 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) { r.parity[i] = r.m[dataShards+i] } - if avx2CodeGen && r.o.useAVX2 { + if codeGen /* && r.o.useAVX2 */ { sz := r.dataShards * r.parityShards * 2 * 32 r.mPool.New = func() interface{} { return AllocAligned(1, sz)[0] @@ -652,15 +654,15 @@ func (r *reedSolomon) EncodeIdx(dataShard []byte, idx int, parity [][]byte) erro return ErrShardSize } - if avx2CodeGen && len(dataShard) >= r.o.perRound && len(parity) >= avx2CodeGenMinShards && (r.o.useAVX2 || r.o.useGFNI) { + if codeGen && len(dataShard) >= r.o.perRound && len(parity) >= codeGenMinShards && (pshufb || r.o.useAvx512GFNI || r.o.useAvxGNFI) { m := make([][]byte, r.parityShards) for iRow := range m { m[iRow] = r.parity[iRow][idx : idx+1] } - if r.o.useGFNI { - r.codeSomeShardsGFNI(m, [][]byte{dataShard}, parity, len(dataShard), false) + if r.o.useAvx512GFNI || r.o.useAvxGNFI { + r.codeSomeShardsGFNI(m, [][]byte{dataShard}, parity, len(dataShard), false, nil, nil) } else { - r.codeSomeShardsAVXP(m, [][]byte{dataShard}, parity, len(dataShard), false) + r.codeSomeShardsAVXP(m, [][]byte{dataShard}, parity, len(dataShard), false, nil, nil) } return nil } @@ -802,18 +804,6 @@ func (r *reedSolomon) Verify(shards [][]byte) (bool, error) { return r.checkSomeShards(r.parity, shards[:r.dataShards], toCheck[:r.parityShards], len(shards[0])), nil } -func (r *reedSolomon) canAVX2C(byteCount int, inputs, outputs int) bool { - return avx2CodeGen && r.o.useAVX2 && - byteCount >= avx2CodeGenMinSize && inputs+outputs >= avx2CodeGenMinShards && - inputs <= maxAvx2Inputs && outputs <= maxAvx2Outputs -} - -func (r *reedSolomon) canGFNI(byteCount int, inputs, outputs int) bool { - return avx2CodeGen && r.o.useGFNI && - byteCount >= avx2CodeGenMinSize && inputs+outputs >= avx2CodeGenMinShards && - inputs <= maxAvx2Inputs && outputs <= maxAvx2Outputs -} - // Multiplies a subset of rows from a coding matrix by a full set of // input totalShards to produce some output totalShards. // 'matrixRows' is The rows from the matrix to use. @@ -837,18 +827,18 @@ func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, byteC if end > len(inputs[0]) { end = len(inputs[0]) } - if r.canGFNI(byteCount, len(inputs), len(outputs)) { - var gfni [maxAvx2Inputs * maxAvx2Outputs]uint64 + if galMulGFNI, galMulGFNIXor, useGFNI := r.canGFNI(byteCount, len(inputs), len(outputs)); useGFNI { + var gfni [codeGenMaxInputs * codeGenMaxOutputs]uint64 m := genGFNIMatrix(matrixRows, len(inputs), 0, len(outputs), gfni[:]) - start += galMulSlicesGFNI(m, inputs, outputs, 0, byteCount) + start += (*galMulGFNI)(m, inputs, outputs, 0, byteCount) end = len(inputs[0]) - } else if r.canAVX2C(byteCount, len(inputs), len(outputs)) { - m := genAvx2Matrix(matrixRows, len(inputs), 0, len(outputs), r.getTmpSlice()) - start += galMulSlicesAvx2(m, inputs, outputs, 0, byteCount) + } else if galMulGen, _, ok := r.hasCodeGen(byteCount, len(inputs), len(outputs)); ok { + m := genCodeGenMatrix(matrixRows, len(inputs), 0, len(outputs), r.o.vectorLength, r.getTmpSlice()) + start += (*galMulGen)(m, inputs, outputs, 0, byteCount) r.putTmpSlice(m) end = len(inputs[0]) - } else if len(inputs)+len(outputs) > avx2CodeGenMinShards && r.canAVX2C(byteCount, maxAvx2Inputs, maxAvx2Outputs) { - var gfni [maxAvx2Inputs * maxAvx2Outputs]uint64 + } else if galMulGen, galMulGenXor, ok := r.hasCodeGen(byteCount, codeGenMaxInputs, codeGenMaxOutputs); len(inputs)+len(outputs) > codeGenMinShards && ok { + var gfni [codeGenMaxInputs * codeGenMaxOutputs]uint64 end = len(inputs[0]) inIdx := 0 m := r.getTmpSlice() @@ -856,32 +846,31 @@ func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, byteC ins := inputs for len(ins) > 0 { inPer := ins - if len(inPer) > maxAvx2Inputs { - inPer = inPer[:maxAvx2Inputs] + if len(inPer) > codeGenMaxInputs { + inPer = inPer[:codeGenMaxInputs] } outs := outputs outIdx := 0 for len(outs) > 0 { outPer := outs - if len(outPer) > maxAvx2Outputs { - outPer = outPer[:maxAvx2Outputs] + if len(outPer) > codeGenMaxOutputs { + outPer = outPer[:codeGenMaxOutputs] } - if r.o.useGFNI { + if useGFNI { m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), gfni[:]) if inIdx == 0 { - galMulSlicesGFNI(m, inPer, outPer, 0, byteCount) + start = (*galMulGFNI)(m, inPer, outPer, 0, byteCount) } else { - galMulSlicesGFNIXor(m, inPer, outPer, 0, byteCount) + start = (*galMulGFNIXor)(m, inPer, outPer, 0, byteCount) } } else { - m = genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), m) + m = genCodeGenMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), r.o.vectorLength, m) if inIdx == 0 { - galMulSlicesAvx2(m, inPer, outPer, 0, byteCount) + start = (*galMulGen)(m, inPer, outPer, 0, byteCount) } else { - galMulSlicesAvx2Xor(m, inPer, outPer, 0, byteCount) + start = (*galMulGenXor)(m, inPer, outPer, 0, byteCount) } } - start = byteCount & avxSizeMask outIdx += len(outPer) outs = outs[len(outPer):] } @@ -917,27 +906,27 @@ func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, byte var wg sync.WaitGroup gor := r.o.maxGoroutines - var avx2Matrix []byte + var genMatrix []byte var gfniMatrix []uint64 - useAvx2 := r.canAVX2C(byteCount, len(inputs), len(outputs)) - useGFNI := r.canGFNI(byteCount, len(inputs), len(outputs)) + galMulGen, _, useCodeGen := r.hasCodeGen(byteCount, len(inputs), len(outputs)) + galMulGFNI, _, useGFNI := r.canGFNI(byteCount, len(inputs), len(outputs)) if useGFNI { - var tmp [maxAvx2Inputs * maxAvx2Outputs]uint64 + var tmp [codeGenMaxInputs * codeGenMaxOutputs]uint64 gfniMatrix = genGFNIMatrix(matrixRows, len(inputs), 0, len(outputs), tmp[:]) - } else if useAvx2 { - avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), 0, len(outputs), r.getTmpSlice()) - defer r.putTmpSlice(avx2Matrix) - } else if r.o.useGFNI && byteCount < 10<<20 && len(inputs)+len(outputs) > avx2CodeGenMinShards && - r.canGFNI(byteCount/4, maxAvx2Inputs, maxAvx2Outputs) { + } else if useCodeGen { + genMatrix = genCodeGenMatrix(matrixRows, len(inputs), 0, len(outputs), r.o.vectorLength, r.getTmpSlice()) + defer r.putTmpSlice(genMatrix) + } else if galMulGFNI, galMulGFNIXor, useGFNI := r.canGFNI(byteCount/4, codeGenMaxInputs, codeGenMaxOutputs); useGFNI && + byteCount < 10<<20 && len(inputs)+len(outputs) > codeGenMinShards { // It appears there is a switchover point at around 10MB where // Regular processing is faster... - r.codeSomeShardsGFNI(matrixRows, inputs, outputs, byteCount, true) + r.codeSomeShardsGFNI(matrixRows, inputs, outputs, byteCount, true, galMulGFNI, galMulGFNIXor) return - } else if r.o.useAVX2 && byteCount < 10<<20 && len(inputs)+len(outputs) > avx2CodeGenMinShards && - r.canAVX2C(byteCount/4, maxAvx2Inputs, maxAvx2Outputs) { + } else if galMulGen, galMulGenXor, ok := r.hasCodeGen(byteCount/4, codeGenMaxInputs, codeGenMaxOutputs); ok && + byteCount < 10<<20 && len(inputs)+len(outputs) > codeGenMinShards { // It appears there is a switchover point at around 10MB where // Regular processing is faster... - r.codeSomeShardsAVXP(matrixRows, inputs, outputs, byteCount, true) + r.codeSomeShardsAVXP(matrixRows, inputs, outputs, byteCount, true, galMulGen, galMulGenXor) return } @@ -949,9 +938,9 @@ func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, byte exec := func(start, stop int) { if stop-start >= 64 { if useGFNI { - start += galMulSlicesGFNI(gfniMatrix, inputs, outputs, start, stop) - } else if useAvx2 { - start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop) + start += (*galMulGFNI)(gfniMatrix, inputs, outputs, start, stop) + } else if useCodeGen { + start += (*galMulGen)(genMatrix, inputs, outputs, start, stop) } } @@ -1002,7 +991,7 @@ func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, byte // Perform the same as codeSomeShards, but split the workload into // several goroutines. // If clear is set, the first write will overwrite the output. -func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, byteCount int, clear bool) { +func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, byteCount int, clear bool, galMulGen, galMulGenXor *func(matrix []byte, in [][]byte, out [][]byte, start int, stop int) int) { var wg sync.WaitGroup gor := r.o.maxGoroutines @@ -1013,7 +1002,7 @@ func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, b first bool } // Make a plan... - plan := make([]state, 0, ((len(inputs)+maxAvx2Inputs-1)/maxAvx2Inputs)*((len(outputs)+maxAvx2Outputs-1)/maxAvx2Outputs)) + plan := make([]state, 0, ((len(inputs)+codeGenMaxInputs-1)/codeGenMaxInputs)*((len(outputs)+codeGenMaxOutputs-1)/codeGenMaxOutputs)) tmp := r.getTmpSlice() defer r.putTmpSlice(tmp) @@ -1025,18 +1014,18 @@ func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, b ins := inputs for len(ins) > 0 { inPer := ins - if len(inPer) > maxAvx2Inputs { - inPer = inPer[:maxAvx2Inputs] + if len(inPer) > codeGenMaxInputs { + inPer = inPer[:codeGenMaxInputs] } outs := outputs outIdx := 0 for len(outs) > 0 { outPer := outs - if len(outPer) > maxAvx2Outputs { - outPer = outPer[:maxAvx2Outputs] + if len(outPer) > codeGenMaxOutputs { + outPer = outPer[:codeGenMaxOutputs] } // Generate local matrix - m := genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), tmp) + m := genCodeGenMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), r.o.vectorLength, tmp) tmp = tmp[len(m):] plan = append(plan, state{ input: inPer, @@ -1055,19 +1044,19 @@ func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, b outIdx := 0 for len(outs) > 0 { outPer := outs - if len(outPer) > maxAvx2Outputs { - outPer = outPer[:maxAvx2Outputs] + if len(outPer) > codeGenMaxOutputs { + outPer = outPer[:codeGenMaxOutputs] } inIdx := 0 ins := inputs for len(ins) > 0 { inPer := ins - if len(inPer) > maxAvx2Inputs { - inPer = inPer[:maxAvx2Inputs] + if len(inPer) > codeGenMaxInputs { + inPer = inPer[:codeGenMaxInputs] } // Generate local matrix - m := genAvx2Matrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), tmp) + m := genCodeGenMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), r.o.vectorLength, tmp) tmp = tmp[len(m):] //fmt.Println("bytes:", len(inPer)*r.o.perRound, "out:", len(outPer)*r.o.perRound) plan = append(plan, state{ @@ -1096,16 +1085,17 @@ func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, b lstop = stop } for lstart < stop { - if lstop-lstart >= minAvx2Size { + if galMulGen != nil && galMulGenXor != nil && lstop-lstart >= minCodeGenSize { // Execute plan... + var n int for _, p := range plan { if p.first { - galMulSlicesAvx2(p.m, p.input, p.output, lstart, lstop) + n = (*galMulGen)(p.m, p.input, p.output, lstart, lstop) } else { - galMulSlicesAvx2Xor(p.m, p.input, p.output, lstart, lstop) + n = (*galMulGenXor)(p.m, p.input, p.output, lstart, lstop) } } - lstart += (lstop - lstart) & avxSizeMask + lstart += n if lstart == lstop { lstop += r.o.perRound if lstop > stop { @@ -1156,7 +1146,7 @@ func (r *reedSolomon) codeSomeShardsAVXP(matrixRows, inputs, outputs [][]byte, b // Perform the same as codeSomeShards, but split the workload into // several goroutines. // If clear is set, the first write will overwrite the output. -func (r *reedSolomon) codeSomeShardsGFNI(matrixRows, inputs, outputs [][]byte, byteCount int, clear bool) { +func (r *reedSolomon) codeSomeShardsGFNI(matrixRows, inputs, outputs [][]byte, byteCount int, clear bool, galMulGFNI, galMulGFNIXor *func(matrix []uint64, in, out [][]byte, start, stop int) int) { var wg sync.WaitGroup gor := r.o.maxGoroutines @@ -1167,7 +1157,7 @@ func (r *reedSolomon) codeSomeShardsGFNI(matrixRows, inputs, outputs [][]byte, b first bool } // Make a plan... - plan := make([]state, 0, ((len(inputs)+maxAvx2Inputs-1)/maxAvx2Inputs)*((len(outputs)+maxAvx2Outputs-1)/maxAvx2Outputs)) + plan := make([]state, 0, ((len(inputs)+codeGenMaxInputs-1)/codeGenMaxInputs)*((len(outputs)+codeGenMaxOutputs-1)/codeGenMaxOutputs)) // Flips between input first to output first. // We put the smallest data load in the inner loop. @@ -1176,15 +1166,15 @@ func (r *reedSolomon) codeSomeShardsGFNI(matrixRows, inputs, outputs [][]byte, b ins := inputs for len(ins) > 0 { inPer := ins - if len(inPer) > maxAvx2Inputs { - inPer = inPer[:maxAvx2Inputs] + if len(inPer) > codeGenMaxInputs { + inPer = inPer[:codeGenMaxInputs] } outs := outputs outIdx := 0 for len(outs) > 0 { outPer := outs - if len(outPer) > maxAvx2Outputs { - outPer = outPer[:maxAvx2Outputs] + if len(outPer) > codeGenMaxOutputs { + outPer = outPer[:codeGenMaxOutputs] } // Generate local matrix m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), make([]uint64, len(inPer)*len(outPer))) @@ -1205,16 +1195,16 @@ func (r *reedSolomon) codeSomeShardsGFNI(matrixRows, inputs, outputs [][]byte, b outIdx := 0 for len(outs) > 0 { outPer := outs - if len(outPer) > maxAvx2Outputs { - outPer = outPer[:maxAvx2Outputs] + if len(outPer) > codeGenMaxOutputs { + outPer = outPer[:codeGenMaxOutputs] } inIdx := 0 ins := inputs for len(ins) > 0 { inPer := ins - if len(inPer) > maxAvx2Inputs { - inPer = inPer[:maxAvx2Inputs] + if len(inPer) > codeGenMaxInputs { + inPer = inPer[:codeGenMaxInputs] } // Generate local matrix m := genGFNIMatrix(matrixRows[outIdx:], len(inPer), inIdx, len(outPer), make([]uint64, len(inPer)*len(outPer))) @@ -1245,16 +1235,17 @@ func (r *reedSolomon) codeSomeShardsGFNI(matrixRows, inputs, outputs [][]byte, b lstop = stop } for lstart < stop { - if lstop-lstart >= minAvx2Size { + if galMulGFNI != nil && galMulGFNIXor != nil && lstop-lstart >= minCodeGenSize { // Execute plan... + var n int for _, p := range plan { if p.first { - galMulSlicesGFNI(p.m, p.input, p.output, lstart, lstop) + n = (*galMulGFNI)(p.m, p.input, p.output, lstart, lstop) } else { - galMulSlicesGFNIXor(p.m, p.input, p.output, lstart, lstop) + n = (*galMulGFNIXor)(p.m, p.input, p.output, lstart, lstop) } } - lstart += (lstop - lstart) & avxSizeMask + lstart += n if lstart == lstop { lstop += r.o.perRound if lstop > stop { @@ -1402,13 +1393,14 @@ func (r *reedSolomon) ReconstructData(shards [][]byte) error { return r.reconstruct(shards, true, nil) } -// ReconstructSome will recreate only requested data shards, if possible. +// ReconstructSome will recreate only requested shards, if possible. // // Given a list of shards, some of which contain data, fills in the -// data shards indicated by true values in the "required" parameter. -// The length of "required" array must be equal to dataShards. +// shards indicated by true values in the "required" parameter. +// The length of the "required" array must be equal to either Shards or DataShards. +// If the length is equal to DataShards, the reconstruction of parity shards will be ignored. // -// The length of "shards" array must be equal to shards. +// The length of "shards" array must be equal to Shards. // You indicate that a shard is missing by setting it to nil or zero-length. // If a shard is zero-length but has sufficient capacity, that memory will // be used, otherwise a new []byte will be allocated. @@ -1419,6 +1411,9 @@ func (r *reedSolomon) ReconstructData(shards [][]byte) error { // As the reconstructed shard set may contain missing parity shards, // calling the Verify function is likely to fail. func (r *reedSolomon) ReconstructSome(shards [][]byte, required []bool) error { + if len(required) == r.totalShards { + return r.reconstruct(shards, false, required) + } return r.reconstruct(shards, true, required) } @@ -1633,7 +1628,7 @@ func (r *reedSolomon) Split(data []byte) ([][]byte, error) { // Copy partial shards copyFrom := data[perShard*fullShards : dataLen] for i := range padding { - if len(copyFrom) <= 0 { + if len(copyFrom) == 0 { break } copyFrom = copyFrom[copy(padding[i], copyFrom):] diff --git a/vendor/github.com/klauspost/reedsolomon/xor_arm64.go b/vendor/github.com/klauspost/reedsolomon/xor_arm64.go new file mode 100644 index 00000000..ffda8884 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/xor_arm64.go @@ -0,0 +1,23 @@ +//go:build !noasm && !appengine && !gccgo + +package reedsolomon + +//go:noescape +func xorSliceNEON(in, out []byte) + +// simple slice xor +func sliceXor(in, out []byte, o *options) { + done := (len(in) >> 5) << 5 + if raceEnabled { + raceWriteSlice(out[:done]) + raceReadSlice(in[:done]) + } + xorSliceNEON(in, out) + + remain := len(in) - done + if remain > 0 { + for i := done; i < len(in); i++ { + out[i] ^= in[i] + } + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/xor_arm64.s b/vendor/github.com/klauspost/reedsolomon/xor_arm64.s new file mode 100644 index 00000000..56298731 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/xor_arm64.s @@ -0,0 +1,29 @@ +//+build !noasm +//+build !appengine +//+build !gccgo + +// func xorSliceNEON(in, out []byte) +TEXT ·xorSliceNEON(SB), 7, $0 + MOVD in_base+0(FP), R1 + MOVD in_len+8(FP), R2 // length of message + MOVD out_base+24(FP), R5 + SUBS $32, R2 + BMI completeXor + +loopXor: + // Main loop + VLD1.P 32(R1), [V0.B16, V1.B16] + VLD1 (R5), [V20.B16, V21.B16] + + VEOR V20.B16, V0.B16, V4.B16 + VEOR V21.B16, V1.B16, V5.B16 + + // Store result + VST1.P [V4.D2, V5.D2], 32(R5) + + SUBS $32, R2 + BPL loopXor + +completeXor: + RET + diff --git a/vendor/github.com/klauspost/reedsolomon/xor_noasm.go b/vendor/github.com/klauspost/reedsolomon/xor_noasm.go new file mode 100644 index 00000000..d3e29f90 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/xor_noasm.go @@ -0,0 +1,7 @@ +//go:build noasm || gccgo || appengine || (!amd64 && !arm64) + +package reedsolomon + +func sliceXor(in, out []byte, o *options) { + sliceXorGo(in, out, o) +} diff --git a/vendor/github.com/leodido/go-urn/.gitignore b/vendor/github.com/leodido/go-urn/.gitignore index 89d4bc55..427454f8 100644 --- a/vendor/github.com/leodido/go-urn/.gitignore +++ b/vendor/github.com/leodido/go-urn/.gitignore @@ -9,4 +9,5 @@ *.txt vendor/ -/removecomments \ No newline at end of file +/removecomments +/snake2camel \ No newline at end of file diff --git a/vendor/github.com/leodido/go-urn/README.md b/vendor/github.com/leodido/go-urn/README.md index 731eecbb..619475bf 100644 --- a/vendor/github.com/leodido/go-urn/README.md +++ b/vendor/github.com/leodido/go-urn/README.md @@ -2,21 +2,34 @@ **A parser for URNs**. -> As seen on [RFC 2141](https://tools.ietf.org/html/rfc2141#ref-1). +> As seen on [RFC 2141](https://datatracker.ietf.org/doc/html/rfc2141), [RFC 7643](https://datatracker.ietf.org/doc/html/rfc7643#section-10), and on [RFC 8141](https://datatracker.ietf.org/doc/html/rfc8141). [API documentation](https://godoc.org/github.com/leodido/go-urn). +Starting with version 1.3 this library also supports [RFC 7643 SCIM URNs](https://datatracker.ietf.org/doc/html/rfc7643#section-10). + +Starting with version 1.4 this library also supports [RFC 8141 URNs (2017)](https://datatracker.ietf.org/doc/html/rfc8141). + ## Installation ``` go get github.com/leodido/go-urn ``` +## Features + +1. RFC 2141 URNs parsing (default) +2. RFC 8141 URNs parsing (supersedes RFC 2141) +3. RFC 7643 SCIM URNs parsing +4. Normalization as per RFCs +5. Lexical equivalence as per RFCs +6. Precise, fine-grained errors + ## Performances This implementation results to be really fast. -Usually below ½ microsecond on my machine[1](#mymachine). +Usually below 400 ns on my machine[1](#mymachine). Notice it also performs, while parsing: @@ -24,35 +37,64 @@ Notice it also performs, while parsing: 2. specific-string normalization ``` -ok/00/urn:a:b______________________________________/-4 20000000 265 ns/op 182 B/op 6 allocs/op -ok/01/URN:foo:a123,456_____________________________/-4 30000000 296 ns/op 200 B/op 6 allocs/op -ok/02/urn:foo:a123%2c456___________________________/-4 20000000 331 ns/op 208 B/op 6 allocs/op -ok/03/urn:ietf:params:scim:schemas:core:2.0:User___/-4 20000000 430 ns/op 280 B/op 6 allocs/op -ok/04/urn:ietf:params:scim:schemas:extension:enterp/-4 20000000 411 ns/op 312 B/op 6 allocs/op -ok/05/urn:ietf:params:scim:schemas:extension:enterp/-4 20000000 472 ns/op 344 B/op 6 allocs/op -ok/06/urn:burnout:nss______________________________/-4 30000000 257 ns/op 192 B/op 6 allocs/op -ok/07/urn:abcdefghilmnopqrstuvzabcdefghilm:x_______/-4 20000000 375 ns/op 213 B/op 6 allocs/op -ok/08/urn:urnurnurn:urn____________________________/-4 30000000 265 ns/op 197 B/op 6 allocs/op -ok/09/urn:ciao:@!=%2c(xyz)+a,b.*@g=$_'_____________/-4 20000000 307 ns/op 248 B/op 6 allocs/op -ok/10/URN:x:abc%1dz%2f%3az_________________________/-4 30000000 259 ns/op 212 B/op 6 allocs/op -no/11/URN:-xxx:x___________________________________/-4 20000000 445 ns/op 320 B/op 6 allocs/op -no/12/urn::colon:nss_______________________________/-4 20000000 461 ns/op 320 B/op 6 allocs/op -no/13/urn:abcdefghilmnopqrstuvzabcdefghilmn:specifi/-4 10000000 660 ns/op 320 B/op 6 allocs/op -no/14/URN:a!?:x____________________________________/-4 20000000 507 ns/op 320 B/op 6 allocs/op -no/15/urn:urn:NSS__________________________________/-4 20000000 429 ns/op 288 B/op 6 allocs/op -no/16/urn:white_space:NSS__________________________/-4 20000000 482 ns/op 320 B/op 6 allocs/op -no/17/urn:concat:no_spaces_________________________/-4 20000000 539 ns/op 328 B/op 7 allocs/op -no/18/urn:a:/______________________________________/-4 20000000 470 ns/op 320 B/op 7 allocs/op -no/19/urn:UrN:NSS__________________________________/-4 20000000 399 ns/op 288 B/op 6 allocs/op +ok/00/urn:a:b______________________________________/-10 51372006 109.0 ns/op 275 B/op 3 allocs/op +ok/01/URN:foo:a123,456_____________________________/-10 36024072 160.8 ns/op 296 B/op 6 allocs/op +ok/02/urn:foo:a123%2C456___________________________/-10 31901007 188.4 ns/op 320 B/op 7 allocs/op +ok/03/urn:ietf:params:scim:schemas:core:2.0:User___/-10 22736756 266.6 ns/op 376 B/op 6 allocs/op +ok/04/urn:ietf:params:scim:schemas:extension:enterp/-10 18291859 335.2 ns/op 408 B/op 6 allocs/op +ok/05/urn:ietf:params:scim:schemas:extension:enterp/-10 15283087 379.4 ns/op 440 B/op 6 allocs/op +ok/06/urn:burnout:nss______________________________/-10 39407593 155.1 ns/op 288 B/op 6 allocs/op +ok/07/urn:abcdefghilmnopqrstuvzabcdefghilm:x_______/-10 27832718 211.4 ns/op 307 B/op 4 allocs/op +ok/08/urn:urnurnurn:urn____________________________/-10 33269596 168.1 ns/op 293 B/op 6 allocs/op +ok/09/urn:ciao:!!*_________________________________/-10 41100675 148.8 ns/op 288 B/op 6 allocs/op +ok/10/urn:ciao:=@__________________________________/-10 37214253 149.7 ns/op 284 B/op 6 allocs/op +ok/11/urn:ciao:@!=%2C(xyz)+a,b.*@g=$_'_____________/-10 26534240 229.8 ns/op 336 B/op 7 allocs/op +ok/12/URN:x:abc%1Dz%2F%3az_________________________/-10 28166396 211.8 ns/op 336 B/op 7 allocs/op +no/13/URN:---xxx:x_________________________________/-10 23635159 255.6 ns/op 419 B/op 5 allocs/op +no/14/urn::colon:nss_______________________________/-10 23594779 258.4 ns/op 419 B/op 5 allocs/op +no/15/URN:@,:x_____________________________________/-10 23742535 261.5 ns/op 419 B/op 5 allocs/op +no/16/URN:URN:NSS__________________________________/-10 27432714 223.3 ns/op 371 B/op 5 allocs/op +no/17/urn:UrN:NSS__________________________________/-10 26922117 224.9 ns/op 371 B/op 5 allocs/op +no/18/urn:a:%______________________________________/-10 24926733 224.6 ns/op 371 B/op 5 allocs/op +no/19/urn:urn:NSS__________________________________/-10 27652641 220.7 ns/op 371 B/op 5 allocs/op ``` ---- - -* [1]: Intel Core i7-7600U CPU @ 2.80GHz +* [1]: Apple M1 Pro ---- ## Example + +For more examples take a look at the [examples file](examples_test.go). + + +```go +package main + +import ( + "fmt" + "github.com/leodido/go-urn" +) + +func main() { + var uid = "URN:foo:a123,456" + + // Parse the input string as a RFC 2141 URN only + u, e := urn.NewMachine().Parse(uid) + if e != nil { + fmt.Errorf(err) + + return + } + + fmt.Println(u.ID) + fmt.Println(u.SS) + + // Output: + // foo + // a123,456 +} +``` + ```go package main @@ -64,6 +106,7 @@ import ( func main() { var uid = "URN:foo:a123,456" + // Parse the input string as a RFC 2141 URN only u, ok := urn.Parse([]byte(uid)) if !ok { panic("error parsing urn") @@ -78,4 +121,33 @@ func main() { } ``` -[![Analytics](https://ga-beacon.appspot.com/UA-49657176-1/go-urn?flat)](https://github.com/igrigorik/ga-beacon) \ No newline at end of file +```go +package main + +import ( + "fmt" + "github.com/leodido/go-urn" +) + +func main() { + input := "urn:ietf:params:scim:api:messages:2.0:ListResponse" + + // Parsing the input string as a RFC 7643 SCIM URN + u, ok := urn.Parse([]byte(input), urn.WithParsingMode(urn.RFC7643Only)) + if !ok { + panic("error parsing urn") + } + + fmt.Println(u.IsSCIM()) + scim := u.SCIM() + fmt.Println(scim.Type.String()) + fmt.Println(scim.Name) + fmt.Println(scim.Other) + + // Output: + // true + // api + // messages + // 2.0:ListResponse +} +``` \ No newline at end of file diff --git a/vendor/github.com/leodido/go-urn/kind.go b/vendor/github.com/leodido/go-urn/kind.go new file mode 100644 index 00000000..f5e140f0 --- /dev/null +++ b/vendor/github.com/leodido/go-urn/kind.go @@ -0,0 +1,10 @@ +package urn + +type Kind int + +const ( + NONE Kind = iota + RFC2141 + RFC7643 + RFC8141 +) diff --git a/vendor/github.com/leodido/go-urn/machine.go b/vendor/github.com/leodido/go-urn/machine.go index f8d57b41..aec1ba69 100644 --- a/vendor/github.com/leodido/go-urn/machine.go +++ b/vendor/github.com/leodido/go-urn/machine.go @@ -2,27 +2,98 @@ package urn import ( "fmt" + + scimschema "github.com/leodido/go-urn/scim/schema" ) var ( - errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]" - errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its start) [col %d]" - errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]" - errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]" - errHex = "expecting the specific string hex chars to be well-formed (%%alnum{2}) [col %d]" - errParse = "parsing error [col %d]" + errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]" + errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its beginning) [col %d]" + errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]" + errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]" + errHex = "expecting the percent encoded chars to be well-formed (%%alnum{2}) [col %d]" + errSCIMNamespace = "expecing the SCIM namespace identifier (ietf:params:scim) [col %d]" + errSCIMType = "expecting a correct SCIM type (schemas, api, param) [col %d]" + errSCIMName = "expecting one or more alnum char in the SCIM name part [col %d]" + errSCIMOther = "expecting a well-formed other SCIM part [col %d]" + errSCIMOtherIncomplete = "expecting a not empty SCIM other part after colon [col %d]" + err8141InformalID = "informal URN namespace must be in the form urn-[1-9][0-9] [col %d]" + err8141SpecificString = "expecting the specific string to contain alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] not in first position) chars [col %d]" + err8141Identifier = "expecting the indentifier to be a string with (length 2 to 32 chars) containing alnum (or dashes) not starting or ending with a dash [col %d]" + err8141RComponentStart = "expecting only one r-component (starting with the ?+ sequence) [col %d]" + err8141QComponentStart = "expecting only one q-component (starting with the ?= sequence) [col %d]" + err8141MalformedRComp = "expecting a non-empty r-component containing alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] but not at its beginning) [col %d]" + err8141MalformedQComp = "expecting a non-empty q-component containing alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] but not at its beginning) [col %d]" ) +var _toStateActions []byte = []byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 33, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, +} + +var _eofActions []byte = []byte{ + 0, 1, 1, 1, 1, 4, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 8, 9, + 9, 4, 4, 11, 1, 1, 1, 1, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 14, 14, 14, 14, 16, 18, 20, + 20, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 1, 1, 1, 1, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 24, 24, 25, 25, 0, 26, 28, + 28, 29, 29, 30, 30, 26, 26, 31, + 31, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 21, + 21, 22, 22, 22, 34, 34, 35, 37, + 37, 38, 40, 41, 41, 38, 42, 42, + 42, 44, 42, 48, 48, 48, 50, 44, + 50, 0, +} const start int = 1 -const firstFinal int = 44 +const firstFinal int = 172 -const enFail int = 46 +const enScimOnly int = 44 +const enRfc8141Only int = 83 +const enFail int = 193 const enMain int = 1 // Machine is the interface representing the FSM type Machine interface { Error() error Parse(input []byte) (*URN, error) + WithParsingMode(ParsingMode) } type machine struct { @@ -30,12 +101,24 @@ type machine struct { cs int p, pe, eof, pb int err error - tolower []int + startParsingAt int + parsingMode ParsingMode + parsingModeSet bool } // NewMachine creates a new FSM able to parse RFC 2141 strings. -func NewMachine() Machine { - m := &machine{} +func NewMachine(options ...Option) Machine { + m := &machine{ + parsingModeSet: false, + } + + for _, o := range options { + o(m) + } + // Set default parsing mode + if !m.parsingModeSet { + m.WithParsingMode(DefaultParsingMode) + } return m } @@ -51,7 +134,7 @@ func (m *machine) text() []byte { return m.data[m.pb:m.p] } -// Parse parses the input byte array as a RFC 2141 string. +// Parse parses the input byte array as a RFC 2141 or RFC7643 string. func (m *machine) Parse(input []byte) (*URN, error) { m.data = input m.p = 0 @@ -59,1619 +142,4881 @@ func (m *machine) Parse(input []byte) (*URN, error) { m.pe = len(input) m.eof = len(input) m.err = nil - m.tolower = []int{} - output := &URN{} - { - m.cs = start + m.cs = m.startParsingAt + output := &URN{ + tolower: []int{}, } { if (m.p) == (m.pe) { goto _testEof } + if m.cs == 0 { + goto _out + } + _resume: switch m.cs { case 1: - goto stCase1 + switch (m.data)[(m.p)] { + case 85: + goto tr1 + case 117: + goto tr1 + } + goto tr0 case 0: - goto stCase0 + goto _out case 2: - goto stCase2 + switch (m.data)[(m.p)] { + case 82: + goto tr2 + case 114: + goto tr2 + } + goto tr0 case 3: - goto stCase3 + switch (m.data)[(m.p)] { + case 78: + goto tr3 + case 110: + goto tr3 + } + goto tr0 case 4: - goto stCase4 + if (m.data)[(m.p)] == 58 { + goto tr4 + } + goto tr0 case 5: - goto stCase5 + switch (m.data)[(m.p)] { + case 85: + goto tr7 + case 117: + goto tr7 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr6 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr6 + } + default: + goto tr6 + } + goto tr5 case 6: - goto stCase6 + switch (m.data)[(m.p)] { + case 45: + goto tr9 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr9 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr9 + } + default: + goto tr9 + } + goto tr8 case 7: - goto stCase7 + switch (m.data)[(m.p)] { + case 45: + goto tr11 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr11 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr11 + } + default: + goto tr11 + } + goto tr8 case 8: - goto stCase8 + switch (m.data)[(m.p)] { + case 45: + goto tr12 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr12 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr12 + } + default: + goto tr12 + } + goto tr8 case 9: - goto stCase9 + switch (m.data)[(m.p)] { + case 45: + goto tr13 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr13 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr13 + } + default: + goto tr13 + } + goto tr8 case 10: - goto stCase10 + switch (m.data)[(m.p)] { + case 45: + goto tr14 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr14 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr14 + } + default: + goto tr14 + } + goto tr8 case 11: - goto stCase11 + switch (m.data)[(m.p)] { + case 45: + goto tr15 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr15 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr15 + } + default: + goto tr15 + } + goto tr8 case 12: - goto stCase12 + switch (m.data)[(m.p)] { + case 45: + goto tr16 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr16 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr16 + } + default: + goto tr16 + } + goto tr8 case 13: - goto stCase13 + switch (m.data)[(m.p)] { + case 45: + goto tr17 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr17 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr17 + } + default: + goto tr17 + } + goto tr8 case 14: - goto stCase14 + switch (m.data)[(m.p)] { + case 45: + goto tr18 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr18 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr18 + } + default: + goto tr18 + } + goto tr8 case 15: - goto stCase15 + switch (m.data)[(m.p)] { + case 45: + goto tr19 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr19 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr19 + } + default: + goto tr19 + } + goto tr8 case 16: - goto stCase16 + switch (m.data)[(m.p)] { + case 45: + goto tr20 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr20 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr8 case 17: - goto stCase17 + switch (m.data)[(m.p)] { + case 45: + goto tr21 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr21 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr21 + } + default: + goto tr21 + } + goto tr8 case 18: - goto stCase18 + switch (m.data)[(m.p)] { + case 45: + goto tr22 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr22 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr22 + } + default: + goto tr22 + } + goto tr8 case 19: - goto stCase19 + switch (m.data)[(m.p)] { + case 45: + goto tr23 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr23 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr23 + } + default: + goto tr23 + } + goto tr8 case 20: - goto stCase20 + switch (m.data)[(m.p)] { + case 45: + goto tr24 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr24 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr24 + } + default: + goto tr24 + } + goto tr8 case 21: - goto stCase21 + switch (m.data)[(m.p)] { + case 45: + goto tr25 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr25 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr25 + } + default: + goto tr25 + } + goto tr8 case 22: - goto stCase22 + switch (m.data)[(m.p)] { + case 45: + goto tr26 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr26 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr26 + } + default: + goto tr26 + } + goto tr8 case 23: - goto stCase23 + switch (m.data)[(m.p)] { + case 45: + goto tr27 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr27 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr27 + } + default: + goto tr27 + } + goto tr8 case 24: - goto stCase24 + switch (m.data)[(m.p)] { + case 45: + goto tr28 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr28 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr28 + } + default: + goto tr28 + } + goto tr8 case 25: - goto stCase25 + switch (m.data)[(m.p)] { + case 45: + goto tr29 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr29 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr29 + } + default: + goto tr29 + } + goto tr8 case 26: - goto stCase26 + switch (m.data)[(m.p)] { + case 45: + goto tr30 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr30 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr30 + } + default: + goto tr30 + } + goto tr8 case 27: - goto stCase27 + switch (m.data)[(m.p)] { + case 45: + goto tr31 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr31 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr31 + } + default: + goto tr31 + } + goto tr8 case 28: - goto stCase28 + switch (m.data)[(m.p)] { + case 45: + goto tr32 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr32 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr32 + } + default: + goto tr32 + } + goto tr8 case 29: - goto stCase29 + switch (m.data)[(m.p)] { + case 45: + goto tr33 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr33 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr33 + } + default: + goto tr33 + } + goto tr8 case 30: - goto stCase30 + switch (m.data)[(m.p)] { + case 45: + goto tr34 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr34 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr34 + } + default: + goto tr34 + } + goto tr8 case 31: - goto stCase31 + switch (m.data)[(m.p)] { + case 45: + goto tr35 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr35 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr35 + } + default: + goto tr35 + } + goto tr8 case 32: - goto stCase32 + switch (m.data)[(m.p)] { + case 45: + goto tr36 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr36 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr36 + } + default: + goto tr36 + } + goto tr8 case 33: - goto stCase33 + switch (m.data)[(m.p)] { + case 45: + goto tr37 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr37 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr37 + } + default: + goto tr37 + } + goto tr8 case 34: - goto stCase34 + switch (m.data)[(m.p)] { + case 45: + goto tr38 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr38 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr38 + } + default: + goto tr38 + } + goto tr8 case 35: - goto stCase35 + switch (m.data)[(m.p)] { + case 45: + goto tr39 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr39 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr39 + } + default: + goto tr39 + } + goto tr8 case 36: - goto stCase36 + switch (m.data)[(m.p)] { + case 45: + goto tr40 + case 58: + goto tr10 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr40 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr40 + } + default: + goto tr40 + } + goto tr8 case 37: - goto stCase37 + if (m.data)[(m.p)] == 58 { + goto tr10 + } + goto tr8 case 38: - goto stCase38 - case 44: - goto stCase44 + switch (m.data)[(m.p)] { + case 33: + goto tr42 + case 36: + goto tr42 + case 37: + goto tr43 + case 61: + goto tr42 + case 95: + goto tr42 + } + switch { + case (m.data)[(m.p)] < 48: + if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr42 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr42 + } + case (m.data)[(m.p)] >= 64: + goto tr42 + } + default: + goto tr42 + } + goto tr41 + case 172: + switch (m.data)[(m.p)] { + case 33: + goto tr212 + case 36: + goto tr212 + case 37: + goto tr213 + case 61: + goto tr212 + case 95: + goto tr212 + } + switch { + case (m.data)[(m.p)] < 48: + if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr212 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr212 + } + case (m.data)[(m.p)] >= 64: + goto tr212 + } + default: + goto tr212 + } + goto tr41 case 39: - goto stCase39 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr45 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr45 + } + default: + goto tr46 + } + goto tr44 case 40: - goto stCase40 - case 45: - goto stCase45 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr47 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr47 + } + default: + goto tr48 + } + goto tr44 + case 173: + switch (m.data)[(m.p)] { + case 33: + goto tr212 + case 36: + goto tr212 + case 37: + goto tr213 + case 61: + goto tr212 + case 95: + goto tr212 + } + switch { + case (m.data)[(m.p)] < 48: + if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr212 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr212 + } + case (m.data)[(m.p)] >= 64: + goto tr212 + } + default: + goto tr212 + } + goto tr44 case 41: - goto stCase41 - case 42: - goto stCase42 - case 43: - goto stCase43 - case 46: - goto stCase46 - } - goto stOut - stCase1: - switch (m.data)[(m.p)] { - case 85: - goto tr1 - case 117: - goto tr1 - } - goto tr0 - tr0: - - m.err = fmt.Errorf(errParse, m.p) - (m.p)-- - - { - goto st46 - } - - goto st0 - tr3: - - m.err = fmt.Errorf(errPrefix, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errParse, m.p) - (m.p)-- - - { - goto st46 - } - - goto st0 - tr6: - - m.err = fmt.Errorf(errIdentifier, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errParse, m.p) - (m.p)-- - - { - goto st46 - } - - goto st0 - tr41: - - m.err = fmt.Errorf(errSpecificString, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errParse, m.p) - (m.p)-- - - { - goto st46 - } - - goto st0 - tr44: - - m.err = fmt.Errorf(errHex, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errSpecificString, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errParse, m.p) - (m.p)-- - - { - goto st46 - } - - goto st0 - tr50: - - m.err = fmt.Errorf(errPrefix, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errIdentifier, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errParse, m.p) - (m.p)-- - - { - goto st46 - } - - goto st0 - tr52: - - m.err = fmt.Errorf(errNoUrnWithinID, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errIdentifier, m.p) - (m.p)-- - - { - goto st46 - } - - m.err = fmt.Errorf(errParse, m.p) - (m.p)-- - - { - goto st46 - } - - goto st0 - stCase0: - st0: - m.cs = 0 - goto _out - tr1: - - m.pb = m.p - - goto st2 - st2: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof2 - } - stCase2: - switch (m.data)[(m.p)] { - case 82: - goto st3 - case 114: - goto st3 - } - goto tr0 - st3: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof3 - } - stCase3: - switch (m.data)[(m.p)] { - case 78: - goto st4 - case 110: - goto st4 - } - goto tr3 - st4: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof4 - } - stCase4: - if (m.data)[(m.p)] == 58 { + switch (m.data)[(m.p)] { + case 45: + goto tr9 + case 58: + goto tr10 + case 82: + goto tr49 + case 114: + goto tr49 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr9 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr9 + } + default: + goto tr9 + } goto tr5 - } - goto tr0 - tr5: - - output.prefix = string(m.text()) - - goto st5 - st5: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof5 - } - stCase5: - switch (m.data)[(m.p)] { - case 85: - goto tr8 - case 117: - goto tr8 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto tr7 + case 42: + switch (m.data)[(m.p)] { + case 45: + goto tr11 + case 58: + goto tr10 + case 78: + goto tr50 + case 110: + goto tr50 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto tr7 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr11 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr11 + } + default: + goto tr11 } - default: - goto tr7 - } - goto tr6 - tr7: - - m.pb = m.p - - goto st6 - st6: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof6 - } - stCase6: - switch (m.data)[(m.p)] { - case 45: - goto st7 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st7 + goto tr5 + case 43: + if (m.data)[(m.p)] == 45 { + goto tr12 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr12 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr12 + } + default: + goto tr12 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st7 + goto tr51 + case 44: + switch (m.data)[(m.p)] { + case 85: + goto tr52 + case 117: + goto tr52 } - default: - goto st7 - } - goto tr6 - st7: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof7 - } - stCase7: - switch (m.data)[(m.p)] { + goto tr0 case 45: - goto st8 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st8 + switch (m.data)[(m.p)] { + case 82: + goto tr53 + case 114: + goto tr53 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st8 + goto tr0 + case 46: + switch (m.data)[(m.p)] { + case 78: + goto tr54 + case 110: + goto tr54 } - default: - goto st8 - } - goto tr6 - st8: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof8 - } - stCase8: - switch (m.data)[(m.p)] { - case 45: - goto st9 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st9 + goto tr0 + case 47: + if (m.data)[(m.p)] == 58 { + goto tr55 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st9 + goto tr0 + case 48: + if (m.data)[(m.p)] == 105 { + goto tr57 } - default: - goto st9 - } - goto tr6 - st9: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof9 - } - stCase9: - switch (m.data)[(m.p)] { - case 45: - goto st10 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st10 + goto tr56 + case 49: + if (m.data)[(m.p)] == 101 { + goto tr58 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st10 + goto tr56 + case 50: + if (m.data)[(m.p)] == 116 { + goto tr59 } - default: - goto st10 - } - goto tr6 - st10: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof10 - } - stCase10: - switch (m.data)[(m.p)] { - case 45: - goto st11 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st11 + goto tr56 + case 51: + if (m.data)[(m.p)] == 102 { + goto tr60 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st11 + goto tr56 + case 52: + if (m.data)[(m.p)] == 58 { + goto tr61 } - default: - goto st11 - } - goto tr6 - st11: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof11 - } - stCase11: - switch (m.data)[(m.p)] { - case 45: - goto st12 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st12 + goto tr56 + case 53: + if (m.data)[(m.p)] == 112 { + goto tr62 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st12 + goto tr56 + case 54: + if (m.data)[(m.p)] == 97 { + goto tr63 } - default: - goto st12 - } - goto tr6 - st12: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof12 - } - stCase12: - switch (m.data)[(m.p)] { - case 45: - goto st13 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st13 + goto tr56 + case 55: + if (m.data)[(m.p)] == 114 { + goto tr64 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st13 + goto tr56 + case 56: + if (m.data)[(m.p)] == 97 { + goto tr65 } - default: - goto st13 - } - goto tr6 - st13: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof13 - } - stCase13: - switch (m.data)[(m.p)] { - case 45: - goto st14 + goto tr56 + case 57: + if (m.data)[(m.p)] == 109 { + goto tr66 + } + goto tr56 case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st14 + if (m.data)[(m.p)] == 115 { + goto tr67 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st14 + goto tr56 + case 59: + if (m.data)[(m.p)] == 58 { + goto tr68 } - default: - goto st14 - } - goto tr6 - st14: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof14 - } - stCase14: - switch (m.data)[(m.p)] { - case 45: - goto st15 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st15 + goto tr56 + case 60: + if (m.data)[(m.p)] == 115 { + goto tr69 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st15 + goto tr56 + case 61: + if (m.data)[(m.p)] == 99 { + goto tr70 } - default: - goto st15 - } - goto tr6 - st15: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof15 - } - stCase15: - switch (m.data)[(m.p)] { - case 45: - goto st16 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st16 + goto tr56 + case 62: + if (m.data)[(m.p)] == 105 { + goto tr71 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st16 + goto tr56 + case 63: + if (m.data)[(m.p)] == 109 { + goto tr72 } - default: - goto st16 - } - goto tr6 - st16: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof16 - } - stCase16: - switch (m.data)[(m.p)] { - case 45: - goto st17 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st17 + goto tr56 + case 64: + if (m.data)[(m.p)] == 58 { + goto tr73 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st17 + goto tr56 + case 65: + switch (m.data)[(m.p)] { + case 97: + goto tr75 + case 112: + goto tr76 + case 115: + goto tr77 } - default: - goto st17 - } - goto tr6 - st17: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof17 - } - stCase17: - switch (m.data)[(m.p)] { - case 45: - goto st18 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st18 + goto tr74 + case 66: + if (m.data)[(m.p)] == 112 { + goto tr78 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st18 + goto tr74 + case 67: + if (m.data)[(m.p)] == 105 { + goto tr79 } - default: - goto st18 - } - goto tr6 - st18: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof18 - } - stCase18: - switch (m.data)[(m.p)] { - case 45: - goto st19 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st19 + goto tr74 + case 68: + if (m.data)[(m.p)] == 58 { + goto tr80 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st19 + goto tr74 + case 69: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr82 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr82 + } + default: + goto tr82 } - default: - goto st19 - } - goto tr6 - st19: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof19 - } - stCase19: - switch (m.data)[(m.p)] { - case 45: - goto st20 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st20 + goto tr81 + case 174: + if (m.data)[(m.p)] == 58 { + goto tr215 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st20 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr214 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr214 + } + default: + goto tr214 } - default: - goto st20 - } - goto tr6 - st20: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof20 - } - stCase20: - switch (m.data)[(m.p)] { - case 45: - goto st21 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st21 + goto tr81 + case 70: + switch (m.data)[(m.p)] { + case 33: + goto tr84 + case 36: + goto tr84 + case 37: + goto tr85 + case 61: + goto tr84 + case 95: + goto tr84 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st21 + switch { + case (m.data)[(m.p)] < 48: + if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr84 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr84 + } + case (m.data)[(m.p)] >= 64: + goto tr84 + } + default: + goto tr84 } - default: - goto st21 - } - goto tr6 - st21: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof21 - } - stCase21: - switch (m.data)[(m.p)] { - case 45: - goto st22 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st22 + goto tr83 + case 175: + switch (m.data)[(m.p)] { + case 33: + goto tr216 + case 36: + goto tr216 + case 37: + goto tr217 + case 61: + goto tr216 + case 95: + goto tr216 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st22 + switch { + case (m.data)[(m.p)] < 48: + if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr216 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr216 + } + case (m.data)[(m.p)] >= 64: + goto tr216 + } + default: + goto tr216 } - default: - goto st22 - } - goto tr6 - st22: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof22 - } - stCase22: - switch (m.data)[(m.p)] { - case 45: - goto st23 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st23 + goto tr83 + case 71: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr87 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr87 + } + default: + goto tr88 + } + goto tr86 + case 72: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr89 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr89 + } + default: + goto tr90 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st23 + goto tr86 + case 176: + switch (m.data)[(m.p)] { + case 33: + goto tr216 + case 36: + goto tr216 + case 37: + goto tr217 + case 61: + goto tr216 + case 95: + goto tr216 } - default: - goto st23 - } - goto tr6 - st23: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof23 - } - stCase23: - switch (m.data)[(m.p)] { - case 45: - goto st24 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st24 + switch { + case (m.data)[(m.p)] < 48: + if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr216 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr216 + } + case (m.data)[(m.p)] >= 64: + goto tr216 + } + default: + goto tr216 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st24 + goto tr86 + case 73: + if (m.data)[(m.p)] == 97 { + goto tr91 } - default: - goto st24 - } - goto tr6 - st24: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof24 - } - stCase24: - switch (m.data)[(m.p)] { - case 45: - goto st25 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st25 + goto tr74 + case 74: + if (m.data)[(m.p)] == 114 { + goto tr92 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st25 + goto tr74 + case 75: + if (m.data)[(m.p)] == 97 { + goto tr93 } - default: - goto st25 - } - goto tr6 - st25: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof25 - } - stCase25: - switch (m.data)[(m.p)] { - case 45: - goto st26 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st26 + goto tr74 + case 76: + if (m.data)[(m.p)] == 109 { + goto tr79 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st26 + goto tr74 + case 77: + if (m.data)[(m.p)] == 99 { + goto tr94 } - default: - goto st26 - } - goto tr6 - st26: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof26 - } - stCase26: - switch (m.data)[(m.p)] { - case 45: - goto st27 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st27 + goto tr74 + case 78: + if (m.data)[(m.p)] == 104 { + goto tr95 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st27 + goto tr74 + case 79: + if (m.data)[(m.p)] == 101 { + goto tr96 } - default: - goto st27 - } - goto tr6 - st27: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof27 - } - stCase27: - switch (m.data)[(m.p)] { - case 45: - goto st28 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st28 + goto tr74 + case 80: + if (m.data)[(m.p)] == 109 { + goto tr97 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st28 + goto tr74 + case 81: + if (m.data)[(m.p)] == 97 { + goto tr98 } - default: - goto st28 - } - goto tr6 - st28: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof28 - } - stCase28: - switch (m.data)[(m.p)] { - case 45: - goto st29 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st29 + goto tr74 + case 82: + if (m.data)[(m.p)] == 115 { + goto tr79 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st29 + goto tr74 + case 83: + switch (m.data)[(m.p)] { + case 85: + goto tr99 + case 117: + goto tr99 } - default: - goto st29 - } - goto tr6 - st29: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof29 - } - stCase29: - switch (m.data)[(m.p)] { - case 45: - goto st30 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st30 + goto tr0 + case 84: + switch (m.data)[(m.p)] { + case 82: + goto tr100 + case 114: + goto tr100 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st30 + goto tr0 + case 85: + switch (m.data)[(m.p)] { + case 78: + goto tr101 + case 110: + goto tr101 } - default: - goto st30 - } - goto tr6 - st30: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof30 - } - stCase30: - switch (m.data)[(m.p)] { - case 45: - goto st31 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st31 + goto tr0 + case 86: + if (m.data)[(m.p)] == 58 { + goto tr102 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st31 + goto tr0 + case 87: + switch (m.data)[(m.p)] { + case 85: + goto tr105 + case 117: + goto tr105 } - default: - goto st31 - } - goto tr6 - st31: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof31 - } - stCase31: - switch (m.data)[(m.p)] { - case 45: - goto st32 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st32 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr104 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr104 + } + default: + goto tr104 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st32 + goto tr103 + case 88: + if (m.data)[(m.p)] == 45 { + goto tr107 } - default: - goto st32 - } - goto tr6 - st32: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof32 - } - stCase32: - switch (m.data)[(m.p)] { - case 45: - goto st33 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st33 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr108 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr108 + } + default: + goto tr108 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st33 + goto tr106 + case 89: + if (m.data)[(m.p)] == 45 { + goto tr109 } - default: - goto st33 - } - goto tr6 - st33: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof33 - } - stCase33: - switch (m.data)[(m.p)] { - case 45: - goto st34 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st34 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr110 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr110 + } + default: + goto tr110 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st34 + goto tr106 + case 90: + if (m.data)[(m.p)] == 45 { + goto tr111 } - default: - goto st34 - } - goto tr6 - st34: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof34 - } - stCase34: - switch (m.data)[(m.p)] { - case 45: - goto st35 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st35 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr112 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr112 + } + default: + goto tr112 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st35 + goto tr106 + case 91: + if (m.data)[(m.p)] == 45 { + goto tr113 } - default: - goto st35 - } - goto tr6 - st35: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof35 - } - stCase35: - switch (m.data)[(m.p)] { - case 45: - goto st36 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st36 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr114 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr114 + } + default: + goto tr114 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st36 + goto tr106 + case 92: + if (m.data)[(m.p)] == 45 { + goto tr115 } - default: - goto st36 - } - goto tr6 - st36: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof36 - } - stCase36: - switch (m.data)[(m.p)] { - case 45: - goto st37 - case 58: - goto tr10 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st37 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr116 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr116 + } + default: + goto tr116 } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st37 + goto tr106 + case 93: + if (m.data)[(m.p)] == 45 { + goto tr117 } - default: - goto st37 - } - goto tr6 - st37: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof37 - } - stCase37: - if (m.data)[(m.p)] == 58 { - goto tr10 - } - goto tr6 - tr10: - - output.ID = string(m.text()) - - goto st38 - st38: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof38 - } - stCase38: - switch (m.data)[(m.p)] { - case 33: - goto tr42 - case 36: - goto tr42 - case 37: - goto tr43 - case 61: - goto tr42 - case 95: - goto tr42 - } - switch { - case (m.data)[(m.p)] < 48: - if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { - goto tr42 + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr118 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr118 + } + default: + goto tr118 + } + goto tr106 + case 94: + if (m.data)[(m.p)] == 45 { + goto tr119 } - case (m.data)[(m.p)] > 59: switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr120 + } case (m.data)[(m.p)] > 90: if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto tr42 + goto tr120 } - case (m.data)[(m.p)] >= 64: - goto tr42 + default: + goto tr120 } - default: - goto tr42 - } - goto tr41 - tr42: - - m.pb = m.p - - goto st44 - st44: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof44 - } - stCase44: - switch (m.data)[(m.p)] { - case 33: - goto st44 - case 36: - goto st44 - case 37: - goto st39 - case 61: - goto st44 + goto tr106 case 95: - goto st44 - } - switch { - case (m.data)[(m.p)] < 48: - if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { - goto st44 + if (m.data)[(m.p)] == 45 { + goto tr121 } - case (m.data)[(m.p)] > 59: switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr122 + } case (m.data)[(m.p)] > 90: if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st44 + goto tr122 } - case (m.data)[(m.p)] >= 64: - goto st44 + default: + goto tr122 } - default: - goto st44 - } - goto tr41 - tr43: + goto tr106 + case 96: + if (m.data)[(m.p)] == 45 { + goto tr123 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr124 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr124 + } + default: + goto tr124 + } + goto tr106 + case 97: + if (m.data)[(m.p)] == 45 { + goto tr125 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr126 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr126 + } + default: + goto tr126 + } + goto tr106 + case 98: + if (m.data)[(m.p)] == 45 { + goto tr127 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr128 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr128 + } + default: + goto tr128 + } + goto tr106 + case 99: + if (m.data)[(m.p)] == 45 { + goto tr129 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr130 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr130 + } + default: + goto tr130 + } + goto tr106 + case 100: + if (m.data)[(m.p)] == 45 { + goto tr131 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr132 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr132 + } + default: + goto tr132 + } + goto tr106 + case 101: + if (m.data)[(m.p)] == 45 { + goto tr133 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr134 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr134 + } + default: + goto tr134 + } + goto tr106 + case 102: + if (m.data)[(m.p)] == 45 { + goto tr135 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr136 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr136 + } + default: + goto tr136 + } + goto tr106 + case 103: + if (m.data)[(m.p)] == 45 { + goto tr137 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr138 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr138 + } + default: + goto tr138 + } + goto tr106 + case 104: + if (m.data)[(m.p)] == 45 { + goto tr139 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr140 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr140 + } + default: + goto tr140 + } + goto tr106 + case 105: + if (m.data)[(m.p)] == 45 { + goto tr141 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr142 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr142 + } + default: + goto tr142 + } + goto tr106 + case 106: + if (m.data)[(m.p)] == 45 { + goto tr143 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr144 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr144 + } + default: + goto tr144 + } + goto tr106 + case 107: + if (m.data)[(m.p)] == 45 { + goto tr145 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr146 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr146 + } + default: + goto tr146 + } + goto tr106 + case 108: + if (m.data)[(m.p)] == 45 { + goto tr147 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr148 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr148 + } + default: + goto tr148 + } + goto tr106 + case 109: + if (m.data)[(m.p)] == 45 { + goto tr149 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr150 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr150 + } + default: + goto tr150 + } + goto tr106 + case 110: + if (m.data)[(m.p)] == 45 { + goto tr151 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr152 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr152 + } + default: + goto tr152 + } + goto tr106 + case 111: + if (m.data)[(m.p)] == 45 { + goto tr153 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr154 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr154 + } + default: + goto tr154 + } + goto tr106 + case 112: + if (m.data)[(m.p)] == 45 { + goto tr155 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr156 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr156 + } + default: + goto tr156 + } + goto tr106 + case 113: + if (m.data)[(m.p)] == 45 { + goto tr157 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr158 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr158 + } + default: + goto tr158 + } + goto tr106 + case 114: + if (m.data)[(m.p)] == 45 { + goto tr159 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr160 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr160 + } + default: + goto tr160 + } + goto tr106 + case 115: + if (m.data)[(m.p)] == 45 { + goto tr161 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr162 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr162 + } + default: + goto tr162 + } + goto tr106 + case 116: + if (m.data)[(m.p)] == 45 { + goto tr163 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr164 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr164 + } + default: + goto tr164 + } + goto tr106 + case 117: + if (m.data)[(m.p)] == 45 { + goto tr165 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr166 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr166 + } + default: + goto tr166 + } + goto tr106 + case 118: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr167 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr167 + } + default: + goto tr167 + } + goto tr106 + case 119: + if (m.data)[(m.p)] == 58 { + goto tr168 + } + goto tr106 + case 120: + switch (m.data)[(m.p)] { + case 33: + goto tr170 + case 37: + goto tr171 + case 61: + goto tr170 + case 95: + goto tr170 + case 126: + goto tr170 + } + switch { + case (m.data)[(m.p)] < 48: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr170 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr170 + } + case (m.data)[(m.p)] >= 64: + goto tr170 + } + default: + goto tr170 + } + goto tr169 + case 177: + switch (m.data)[(m.p)] { + case 33: + goto tr218 + case 35: + goto tr219 + case 37: + goto tr220 + case 61: + goto tr218 + case 63: + goto tr221 + case 95: + goto tr218 + case 126: + goto tr218 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr218 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr218 + } + default: + goto tr218 + } + goto tr169 + case 178: + switch (m.data)[(m.p)] { + case 33: + goto tr222 + case 37: + goto tr223 + case 61: + goto tr222 + case 95: + goto tr222 + case 126: + goto tr222 + } + switch { + case (m.data)[(m.p)] < 63: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr222 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr222 + } + default: + goto tr222 + } + goto tr183 + case 179: + switch (m.data)[(m.p)] { + case 33: + goto tr224 + case 37: + goto tr225 + case 61: + goto tr224 + case 95: + goto tr224 + case 126: + goto tr224 + } + switch { + case (m.data)[(m.p)] < 63: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr224 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr224 + } + default: + goto tr224 + } + goto tr183 + case 121: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr173 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr173 + } + default: + goto tr174 + } + goto tr172 + case 122: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr175 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr175 + } + default: + goto tr176 + } + goto tr172 + case 180: + switch (m.data)[(m.p)] { + case 33: + goto tr224 + case 37: + goto tr225 + case 61: + goto tr224 + case 95: + goto tr224 + case 126: + goto tr224 + } + switch { + case (m.data)[(m.p)] < 63: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr224 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr224 + } + default: + goto tr224 + } + goto tr172 + case 123: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr178 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr178 + } + default: + goto tr179 + } + goto tr177 + case 124: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr180 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr180 + } + default: + goto tr181 + } + goto tr177 + case 181: + switch (m.data)[(m.p)] { + case 33: + goto tr218 + case 35: + goto tr219 + case 37: + goto tr220 + case 61: + goto tr218 + case 63: + goto tr221 + case 95: + goto tr218 + case 126: + goto tr218 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr218 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr218 + } + default: + goto tr218 + } + goto tr177 + case 125: + switch (m.data)[(m.p)] { + case 43: + goto tr182 + case 61: + goto tr184 + } + goto tr183 + case 126: + switch (m.data)[(m.p)] { + case 33: + goto tr186 + case 37: + goto tr187 + case 61: + goto tr186 + case 63: + goto tr188 + case 95: + goto tr186 + case 126: + goto tr186 + } + switch { + case (m.data)[(m.p)] < 48: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr186 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr186 + } + case (m.data)[(m.p)] >= 64: + goto tr186 + } + default: + goto tr186 + } + goto tr185 + case 182: + switch (m.data)[(m.p)] { + case 33: + goto tr226 + case 35: + goto tr227 + case 37: + goto tr228 + case 61: + goto tr226 + case 63: + goto tr229 + case 95: + goto tr226 + case 126: + goto tr226 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr226 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr226 + } + default: + goto tr226 + } + goto tr185 + case 127: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr190 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr190 + } + default: + goto tr191 + } + goto tr189 + case 128: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr192 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr192 + } + default: + goto tr193 + } + goto tr189 + case 183: + switch (m.data)[(m.p)] { + case 33: + goto tr226 + case 35: + goto tr227 + case 37: + goto tr228 + case 61: + goto tr226 + case 63: + goto tr229 + case 95: + goto tr226 + case 126: + goto tr226 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr226 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr226 + } + default: + goto tr226 + } + goto tr189 + case 184: + switch (m.data)[(m.p)] { + case 33: + goto tr226 + case 35: + goto tr227 + case 37: + goto tr228 + case 43: + goto tr230 + case 61: + goto tr231 + case 63: + goto tr229 + case 95: + goto tr226 + case 126: + goto tr226 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr226 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr226 + } + default: + goto tr226 + } + goto tr185 + case 185: + switch (m.data)[(m.p)] { + case 33: + goto tr232 + case 35: + goto tr233 + case 37: + goto tr234 + case 47: + goto tr226 + case 61: + goto tr232 + case 63: + goto tr235 + case 95: + goto tr232 + case 126: + goto tr232 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr232 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr232 + } + default: + goto tr232 + } + goto tr185 + case 186: + switch (m.data)[(m.p)] { + case 33: + goto tr204 + case 35: + goto tr227 + case 37: + goto tr237 + case 47: + goto tr226 + case 61: + goto tr204 + case 63: + goto tr229 + case 95: + goto tr204 + case 126: + goto tr204 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr204 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr204 + } + default: + goto tr204 + } + goto tr236 + case 187: + switch (m.data)[(m.p)] { + case 33: + goto tr238 + case 35: + goto tr239 + case 37: + goto tr240 + case 61: + goto tr238 + case 63: + goto tr241 + case 95: + goto tr238 + case 126: + goto tr238 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr238 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr238 + } + default: + goto tr238 + } + goto tr203 + case 129: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr195 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr195 + } + default: + goto tr196 + } + goto tr194 + case 130: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr197 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr197 + } + default: + goto tr198 + } + goto tr194 + case 188: + switch (m.data)[(m.p)] { + case 33: + goto tr238 + case 35: + goto tr239 + case 37: + goto tr240 + case 61: + goto tr238 + case 63: + goto tr241 + case 95: + goto tr238 + case 126: + goto tr238 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr238 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr238 + } + default: + goto tr238 + } + goto tr194 + case 189: + switch (m.data)[(m.p)] { + case 33: + goto tr238 + case 35: + goto tr239 + case 37: + goto tr240 + case 61: + goto tr242 + case 63: + goto tr241 + case 95: + goto tr238 + case 126: + goto tr238 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr238 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr238 + } + default: + goto tr238 + } + goto tr203 + case 190: + switch (m.data)[(m.p)] { + case 33: + goto tr243 + case 35: + goto tr244 + case 37: + goto tr245 + case 47: + goto tr238 + case 61: + goto tr243 + case 63: + goto tr246 + case 95: + goto tr243 + case 126: + goto tr243 + } + switch { + case (m.data)[(m.p)] < 64: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 59 { + goto tr243 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr243 + } + default: + goto tr243 + } + goto tr203 + case 131: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr200 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr200 + } + default: + goto tr201 + } + goto tr199 + case 132: + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr197 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr197 + } + default: + goto tr198 + } + goto tr199 + case 133: + if (m.data)[(m.p)] == 43 { + goto tr202 + } + goto tr185 + case 191: + switch (m.data)[(m.p)] { + case 33: + goto tr232 + case 35: + goto tr233 + case 37: + goto tr234 + case 61: + goto tr232 + case 63: + goto tr247 + case 95: + goto tr232 + case 126: + goto tr232 + } + switch { + case (m.data)[(m.p)] < 48: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr232 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr232 + } + case (m.data)[(m.p)] >= 64: + goto tr232 + } + default: + goto tr232 + } + goto tr185 + case 134: + switch (m.data)[(m.p)] { + case 43: + goto tr202 + case 61: + goto tr184 + } + goto tr185 + case 135: + switch (m.data)[(m.p)] { + case 33: + goto tr204 + case 37: + goto tr205 + case 61: + goto tr204 + case 63: + goto tr206 + case 95: + goto tr204 + case 126: + goto tr204 + } + switch { + case (m.data)[(m.p)] < 48: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr204 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr204 + } + case (m.data)[(m.p)] >= 64: + goto tr204 + } + default: + goto tr204 + } + goto tr203 + case 136: + if (m.data)[(m.p)] == 61 { + goto tr207 + } + goto tr203 + case 192: + switch (m.data)[(m.p)] { + case 33: + goto tr243 + case 35: + goto tr244 + case 37: + goto tr245 + case 61: + goto tr243 + case 63: + goto tr248 + case 95: + goto tr243 + case 126: + goto tr243 + } + switch { + case (m.data)[(m.p)] < 48: + if 36 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { + goto tr243 + } + case (m.data)[(m.p)] > 59: + switch { + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr243 + } + case (m.data)[(m.p)] >= 64: + goto tr243 + } + default: + goto tr243 + } + goto tr203 + case 137: + if (m.data)[(m.p)] == 58 { + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr167 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr167 + } + default: + goto tr167 + } + goto tr106 + case 138: + switch (m.data)[(m.p)] { + case 45: + goto tr165 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr166 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr166 + } + default: + goto tr166 + } + goto tr106 + case 139: + switch (m.data)[(m.p)] { + case 45: + goto tr163 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr164 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr164 + } + default: + goto tr164 + } + goto tr106 + case 140: + switch (m.data)[(m.p)] { + case 45: + goto tr161 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr162 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr162 + } + default: + goto tr162 + } + goto tr106 + case 141: + switch (m.data)[(m.p)] { + case 45: + goto tr159 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr160 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr160 + } + default: + goto tr160 + } + goto tr106 + case 142: + switch (m.data)[(m.p)] { + case 45: + goto tr157 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr158 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr158 + } + default: + goto tr158 + } + goto tr106 + case 143: + switch (m.data)[(m.p)] { + case 45: + goto tr155 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr156 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr156 + } + default: + goto tr156 + } + goto tr106 + case 144: + switch (m.data)[(m.p)] { + case 45: + goto tr153 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr154 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr154 + } + default: + goto tr154 + } + goto tr106 + case 145: + switch (m.data)[(m.p)] { + case 45: + goto tr151 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr152 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr152 + } + default: + goto tr152 + } + goto tr106 + case 146: + switch (m.data)[(m.p)] { + case 45: + goto tr149 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr150 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr150 + } + default: + goto tr150 + } + goto tr106 + case 147: + switch (m.data)[(m.p)] { + case 45: + goto tr147 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr148 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr148 + } + default: + goto tr148 + } + goto tr106 + case 148: + switch (m.data)[(m.p)] { + case 45: + goto tr145 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr146 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr146 + } + default: + goto tr146 + } + goto tr106 + case 149: + switch (m.data)[(m.p)] { + case 45: + goto tr143 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr144 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr144 + } + default: + goto tr144 + } + goto tr106 + case 150: + switch (m.data)[(m.p)] { + case 45: + goto tr141 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr142 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr142 + } + default: + goto tr142 + } + goto tr106 + case 151: + switch (m.data)[(m.p)] { + case 45: + goto tr139 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr140 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr140 + } + default: + goto tr140 + } + goto tr106 + case 152: + switch (m.data)[(m.p)] { + case 45: + goto tr137 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr138 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr138 + } + default: + goto tr138 + } + goto tr106 + case 153: + switch (m.data)[(m.p)] { + case 45: + goto tr135 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr136 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr136 + } + default: + goto tr136 + } + goto tr106 + case 154: + switch (m.data)[(m.p)] { + case 45: + goto tr133 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr134 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr134 + } + default: + goto tr134 + } + goto tr106 + case 155: + switch (m.data)[(m.p)] { + case 45: + goto tr131 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr132 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr132 + } + default: + goto tr132 + } + goto tr106 + case 156: + switch (m.data)[(m.p)] { + case 45: + goto tr129 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr130 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr130 + } + default: + goto tr130 + } + goto tr106 + case 157: + switch (m.data)[(m.p)] { + case 45: + goto tr127 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr128 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr128 + } + default: + goto tr128 + } + goto tr106 + case 158: + switch (m.data)[(m.p)] { + case 45: + goto tr125 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr126 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr126 + } + default: + goto tr126 + } + goto tr106 + case 159: + switch (m.data)[(m.p)] { + case 45: + goto tr123 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr124 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr124 + } + default: + goto tr124 + } + goto tr106 + case 160: + switch (m.data)[(m.p)] { + case 45: + goto tr121 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr122 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr122 + } + default: + goto tr122 + } + goto tr106 + case 161: + switch (m.data)[(m.p)] { + case 45: + goto tr119 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr120 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr120 + } + default: + goto tr120 + } + goto tr106 + case 162: + switch (m.data)[(m.p)] { + case 45: + goto tr117 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr118 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr118 + } + default: + goto tr118 + } + goto tr106 + case 163: + switch (m.data)[(m.p)] { + case 45: + goto tr115 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr116 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr116 + } + default: + goto tr116 + } + goto tr106 + case 164: + switch (m.data)[(m.p)] { + case 45: + goto tr113 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr114 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr114 + } + default: + goto tr114 + } + goto tr106 + case 165: + switch (m.data)[(m.p)] { + case 45: + goto tr111 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr112 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr112 + } + default: + goto tr112 + } + goto tr106 + case 166: + switch (m.data)[(m.p)] { + case 45: + goto tr109 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr110 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr110 + } + default: + goto tr110 + } + goto tr106 + case 167: + switch (m.data)[(m.p)] { + case 45: + goto tr107 + case 82: + goto tr208 + case 114: + goto tr208 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr108 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr108 + } + default: + goto tr108 + } + goto tr103 + case 168: + switch (m.data)[(m.p)] { + case 45: + goto tr109 + case 58: + goto tr168 + case 78: + goto tr209 + case 110: + goto tr209 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr110 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr110 + } + default: + goto tr110 + } + goto tr103 + case 169: + switch (m.data)[(m.p)] { + case 45: + goto tr210 + case 58: + goto tr168 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr112 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr112 + } + default: + goto tr112 + } + goto tr106 + case 170: + switch (m.data)[(m.p)] { + case 45: + goto tr113 + case 48: + goto tr211 + } + switch { + case (m.data)[(m.p)] < 65: + if 49 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr114 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr211 + } + default: + goto tr211 + } + goto tr106 + case 171: + if (m.data)[(m.p)] == 45 { + goto tr115 + } + switch { + case (m.data)[(m.p)] < 65: + if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { + goto tr116 + } + case (m.data)[(m.p)] > 90: + if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { + goto tr116 + } + default: + goto tr116 + } + goto tr106 + case 193: + switch (m.data)[(m.p)] { + case 10: + goto tr183 + case 13: + goto tr183 + } + goto tr249 + } + + tr183: + m.cs = 0 + goto _again + tr0: + m.cs = 0 + goto f0 + tr5: + m.cs = 0 + goto f3 + tr8: + m.cs = 0 + goto f5 + tr41: + m.cs = 0 + goto f7 + tr44: + m.cs = 0 + goto f8 + tr51: + m.cs = 0 + goto f10 + tr56: + m.cs = 0 + goto f11 + tr74: + m.cs = 0 + goto f13 + tr81: + m.cs = 0 + goto f15 + tr83: + m.cs = 0 + goto f17 + tr86: + m.cs = 0 + goto f19 + tr103: + m.cs = 0 + goto f20 + tr106: + m.cs = 0 + goto f21 + tr169: + m.cs = 0 + goto f22 + tr172: + m.cs = 0 + goto f23 + tr177: + m.cs = 0 + goto f24 + tr185: + m.cs = 0 + goto f25 + tr189: + m.cs = 0 + goto f27 + tr194: + m.cs = 0 + goto f28 + tr199: + m.cs = 0 + goto f29 + tr203: + m.cs = 0 + goto f30 + tr236: + m.cs = 0 + goto f46 + tr1: + m.cs = 2 + goto f1 + tr2: + m.cs = 3 + goto _again + tr3: + m.cs = 4 + goto _again + tr4: + m.cs = 5 + goto f2 + tr6: + m.cs = 6 + goto f4 + tr9: + m.cs = 7 + goto _again + tr11: + m.cs = 8 + goto _again + tr12: + m.cs = 9 + goto _again + tr13: + m.cs = 10 + goto _again + tr14: + m.cs = 11 + goto _again + tr15: + m.cs = 12 + goto _again + tr16: + m.cs = 13 + goto _again + tr17: + m.cs = 14 + goto _again + tr18: + m.cs = 15 + goto _again + tr19: + m.cs = 16 + goto _again + tr20: + m.cs = 17 + goto _again + tr21: + m.cs = 18 + goto _again + tr22: + m.cs = 19 + goto _again + tr23: + m.cs = 20 + goto _again + tr24: + m.cs = 21 + goto _again + tr25: + m.cs = 22 + goto _again + tr26: + m.cs = 23 + goto _again + tr27: + m.cs = 24 + goto _again + tr28: + m.cs = 25 + goto _again + tr29: + m.cs = 26 + goto _again + tr30: + m.cs = 27 + goto _again + tr31: + m.cs = 28 + goto _again + tr32: + m.cs = 29 + goto _again + tr33: + m.cs = 30 + goto _again + tr34: + m.cs = 31 + goto _again + tr35: + m.cs = 32 + goto _again + tr36: + m.cs = 33 + goto _again + tr37: + m.cs = 34 + goto _again + tr38: + m.cs = 35 + goto _again + tr39: + m.cs = 36 + goto _again + tr40: + m.cs = 37 + goto _again + tr10: + m.cs = 38 + goto f6 + tr213: + m.cs = 39 + goto _again + tr43: + m.cs = 39 + goto f4 + tr45: + m.cs = 40 + goto _again + tr46: + m.cs = 40 + goto f9 + tr7: + m.cs = 41 + goto f1 + tr49: + m.cs = 42 + goto _again + tr50: + m.cs = 43 + goto _again + tr52: + m.cs = 45 + goto f1 + tr53: + m.cs = 46 + goto _again + tr54: + m.cs = 47 + goto _again + tr55: + m.cs = 48 + goto f2 + tr57: + m.cs = 49 + goto f4 + tr58: + m.cs = 50 + goto _again + tr59: + m.cs = 51 + goto _again + tr60: + m.cs = 52 + goto _again + tr61: + m.cs = 53 + goto _again + tr62: + m.cs = 54 + goto _again + tr63: + m.cs = 55 + goto _again + tr64: + m.cs = 56 + goto _again + tr65: + m.cs = 57 + goto _again + tr66: + m.cs = 58 + goto _again + tr67: + m.cs = 59 + goto _again + tr68: + m.cs = 60 + goto _again + tr69: + m.cs = 61 + goto _again + tr70: + m.cs = 62 + goto _again + tr71: + m.cs = 63 + goto _again + tr72: + m.cs = 64 + goto _again + tr73: + m.cs = 65 + goto f12 + tr75: + m.cs = 66 + goto f4 + tr78: + m.cs = 67 + goto _again + tr79: + m.cs = 68 + goto _again + tr80: + m.cs = 69 + goto f14 + tr215: + m.cs = 70 + goto f35 + tr217: + m.cs = 71 + goto _again + tr85: + m.cs = 71 + goto f18 + tr87: + m.cs = 72 + goto _again + tr88: + m.cs = 72 + goto f9 + tr76: + m.cs = 73 + goto f4 + tr91: + m.cs = 74 + goto _again + tr92: + m.cs = 75 + goto _again + tr93: + m.cs = 76 + goto _again + tr77: + m.cs = 77 + goto f4 + tr94: + m.cs = 78 + goto _again + tr95: + m.cs = 79 + goto _again + tr96: + m.cs = 80 + goto _again + tr97: + m.cs = 81 + goto _again + tr98: + m.cs = 82 + goto _again + tr99: + m.cs = 84 + goto f1 + tr100: + m.cs = 85 + goto _again + tr101: + m.cs = 86 + goto _again + tr102: + m.cs = 87 + goto f2 + tr104: + m.cs = 88 + goto f4 + tr107: + m.cs = 89 + goto _again + tr109: + m.cs = 90 + goto _again + tr111: + m.cs = 91 + goto _again + tr113: + m.cs = 92 + goto _again + tr115: + m.cs = 93 + goto _again + tr117: + m.cs = 94 + goto _again + tr119: + m.cs = 95 + goto _again + tr121: + m.cs = 96 + goto _again + tr123: + m.cs = 97 + goto _again + tr125: + m.cs = 98 + goto _again + tr127: + m.cs = 99 + goto _again + tr129: + m.cs = 100 + goto _again + tr131: + m.cs = 101 + goto _again + tr133: + m.cs = 102 + goto _again + tr135: + m.cs = 103 + goto _again + tr137: + m.cs = 104 + goto _again + tr139: + m.cs = 105 + goto _again + tr141: + m.cs = 106 + goto _again + tr143: + m.cs = 107 + goto _again + tr145: + m.cs = 108 + goto _again + tr147: + m.cs = 109 + goto _again + tr149: + m.cs = 110 + goto _again + tr151: + m.cs = 111 + goto _again + tr153: + m.cs = 112 + goto _again + tr155: + m.cs = 113 + goto _again + tr157: + m.cs = 114 + goto _again + tr159: + m.cs = 115 + goto _again + tr161: + m.cs = 116 + goto _again + tr163: + m.cs = 117 + goto _again + tr165: + m.cs = 118 + goto _again + tr167: + m.cs = 119 + goto _again + tr168: + m.cs = 120 + goto f6 + tr225: + m.cs = 121 + goto _again + tr223: + m.cs = 121 + goto f4 + tr173: + m.cs = 122 + goto _again + tr174: + m.cs = 122 + goto f9 + tr220: + m.cs = 123 + goto _again + tr171: + m.cs = 123 + goto f4 + tr178: + m.cs = 124 + goto _again + tr179: + m.cs = 124 + goto f9 + tr221: + m.cs = 125 + goto f38 + tr182: + m.cs = 126 + goto _again + tr228: + m.cs = 127 + goto _again + tr187: + m.cs = 127 + goto f26 + tr234: + m.cs = 127 + goto f44 + tr190: + m.cs = 128 + goto _again + tr191: + m.cs = 128 + goto f9 + tr240: + m.cs = 129 + goto _again + tr205: + m.cs = 129 + goto f31 + tr245: + m.cs = 129 + goto f50 + tr195: + m.cs = 130 + goto _again + tr196: + m.cs = 130 + goto f9 + tr237: + m.cs = 131 + goto f31 + tr200: + m.cs = 132 + goto _again + tr201: + m.cs = 132 + goto f9 + tr188: + m.cs = 133 + goto f26 + tr247: + m.cs = 134 + goto f45 + tr184: + m.cs = 135 + goto _again + tr206: + m.cs = 136 + goto f31 + tr248: + m.cs = 136 + goto f50 + tr166: + m.cs = 137 + goto _again + tr164: + m.cs = 138 + goto _again + tr162: + m.cs = 139 + goto _again + tr160: + m.cs = 140 + goto _again + tr158: + m.cs = 141 + goto _again + tr156: + m.cs = 142 + goto _again + tr154: + m.cs = 143 + goto _again + tr152: + m.cs = 144 + goto _again + tr150: + m.cs = 145 + goto _again + tr148: + m.cs = 146 + goto _again + tr146: + m.cs = 147 + goto _again + tr144: + m.cs = 148 + goto _again + tr142: + m.cs = 149 + goto _again + tr140: + m.cs = 150 + goto _again + tr138: + m.cs = 151 + goto _again + tr136: + m.cs = 152 + goto _again + tr134: + m.cs = 153 + goto _again + tr132: + m.cs = 154 + goto _again + tr130: + m.cs = 155 + goto _again + tr128: + m.cs = 156 + goto _again + tr126: + m.cs = 157 + goto _again + tr124: + m.cs = 158 + goto _again + tr122: + m.cs = 159 + goto _again + tr120: + m.cs = 160 + goto _again + tr118: + m.cs = 161 + goto _again + tr116: + m.cs = 162 + goto _again + tr114: + m.cs = 163 + goto _again + tr112: + m.cs = 164 + goto _again + tr110: + m.cs = 165 + goto _again + tr108: + m.cs = 166 + goto _again + tr105: + m.cs = 167 + goto f1 + tr208: + m.cs = 168 + goto _again + tr209: + m.cs = 169 + goto _again + tr210: + m.cs = 170 + goto f2 + tr211: + m.cs = 171 + goto _again + tr212: + m.cs = 172 + goto _again + tr42: + m.cs = 172 + goto f4 + tr47: + m.cs = 173 + goto _again + tr48: + m.cs = 173 + goto f9 + tr214: + m.cs = 174 + goto _again + tr82: + m.cs = 174 + goto f16 + tr216: + m.cs = 175 + goto _again + tr84: + m.cs = 175 + goto f18 + tr89: + m.cs = 176 + goto _again + tr90: + m.cs = 176 + goto f9 + tr218: + m.cs = 177 + goto _again + tr170: + m.cs = 177 + goto f4 + tr219: + m.cs = 178 + goto f38 + tr227: + m.cs = 178 + goto f42 + tr233: + m.cs = 178 + goto f45 + tr239: + m.cs = 178 + goto f48 + tr244: + m.cs = 178 + goto f51 + tr224: + m.cs = 179 + goto _again + tr222: + m.cs = 179 + goto f4 + tr175: + m.cs = 180 + goto _again + tr176: + m.cs = 180 + goto f9 + tr180: + m.cs = 181 + goto _again + tr181: + m.cs = 181 + goto f9 + tr226: + m.cs = 182 + goto _again + tr186: + m.cs = 182 + goto f26 + tr232: + m.cs = 182 + goto f44 + tr192: + m.cs = 183 + goto _again + tr193: + m.cs = 183 + goto f9 + tr229: + m.cs = 184 + goto f42 + tr235: + m.cs = 184 + goto f45 + tr230: + m.cs = 185 + goto _again + tr231: + m.cs = 186 + goto _again + tr238: + m.cs = 187 + goto _again + tr204: + m.cs = 187 + goto f31 + tr243: + m.cs = 187 + goto f50 + tr197: + m.cs = 188 + goto _again + tr198: + m.cs = 188 + goto f9 + tr241: + m.cs = 189 + goto _again + tr246: + m.cs = 189 + goto f50 + tr242: + m.cs = 190 + goto _again + tr202: + m.cs = 191 + goto _again + tr207: + m.cs = 192 + goto _again + tr249: + m.cs = 193 + goto _again + + f4: + + m.pb = m.p + + goto _again + f9: + + // List of positions in the buffer to later lowercase + output.tolower = append(output.tolower, m.p-m.pb) + + goto _again + f2: + + output.prefix = string(m.text()) + + goto _again + f6: + + output.ID = string(m.text()) + + goto _again + f38: + + output.SS = string(m.text()) + // Iterate upper letters lowering them + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] + 32 + } + output.norm = string(m.text()) + // Revert the buffer to the original + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] - 32 + } + + goto _again + f0: + + m.err = fmt.Errorf(errPrefix, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f5: + + m.err = fmt.Errorf(errIdentifier, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f7: + + m.err = fmt.Errorf(errSpecificString, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f23: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + + goto _again + f11: + + m.err = fmt.Errorf(errSCIMNamespace, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f13: + + m.err = fmt.Errorf(errSCIMType, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f15: + + m.err = fmt.Errorf(errSCIMName, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f17: + + if m.p == m.pe { + m.err = fmt.Errorf(errSCIMOtherIncomplete, m.p-1) + } else { + m.err = fmt.Errorf(errSCIMOther, m.p) + } + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f14: + + output.scim.Type = scimschema.TypeFromString(string(m.text())) + + goto _again + f16: + + output.scim.pos = m.p + + goto _again + f35: + + output.scim.Name = string(m.data[output.scim.pos:m.p]) + + goto _again + f18: + + output.scim.pos = m.p + + goto _again + f22: + + m.err = fmt.Errorf(err8141SpecificString, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f21: + + m.err = fmt.Errorf(err8141Identifier, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f42: + + output.rComponent = string(m.text()) + + goto _again + f48: + + output.qComponent = string(m.text()) + + goto _again + f44: + + if output.rStart { + m.err = fmt.Errorf(err8141RComponentStart, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + output.rStart = true + + goto _again + f50: + + if output.qStart { + m.err = fmt.Errorf(err8141QComponentStart, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + output.qStart = true + + goto _again + f25: + + m.err = fmt.Errorf(err8141MalformedRComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f30: + + m.err = fmt.Errorf(err8141MalformedQComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f1: + + m.pb = m.p + + if m.parsingMode != RFC8141Only { + // Throw an error when: + // - we are entering here matching the the prefix in the namespace identifier part + // - looking ahead (3 chars) we find a colon + if pos := m.p + 3; pos < m.pe && m.data[pos] == 58 && output.prefix != "" { + m.err = fmt.Errorf(errNoUrnWithinID, pos) + (m.p)-- + + m.cs = 193 + goto _again + + } + } + + goto _again + f12: + + output.ID = string(m.text()) + + output.scim = &SCIM{} + + goto _again + f3: + + m.err = fmt.Errorf(errIdentifier, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + m.err = fmt.Errorf(errPrefix, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f10: + + m.err = fmt.Errorf(errIdentifier, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + m.err = fmt.Errorf(errNoUrnWithinID, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f8: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + + m.err = fmt.Errorf(errSpecificString, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f19: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + + if m.p == m.pe { + m.err = fmt.Errorf(errSCIMOtherIncomplete, m.p-1) + } else { + m.err = fmt.Errorf(errSCIMOther, m.p) + } + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f24: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + + m.err = fmt.Errorf(err8141SpecificString, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f27: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + + m.err = fmt.Errorf(err8141MalformedRComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f28: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + + m.err = fmt.Errorf(err8141MalformedQComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f20: + + m.err = fmt.Errorf(err8141Identifier, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + m.err = fmt.Errorf(errPrefix, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f26: + + if output.rStart { + m.err = fmt.Errorf(err8141RComponentStart, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + output.rStart = true + + m.pb = m.p + + goto _again + f45: - m.pb = m.p + if output.rStart { + m.err = fmt.Errorf(err8141RComponentStart, m.p) + (m.p)-- + + m.cs = 193 + goto _again - goto st39 - st39: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof39 - } - stCase39: - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st40 - } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st40 - } - default: - goto tr46 } - goto tr44 - tr46: + output.rStart = true - m.tolower = append(m.tolower, m.p-m.pb) + output.rComponent = string(m.text()) - goto st40 - st40: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof40 - } - stCase40: - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st45 - } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st45 - } - default: - goto tr48 - } - goto tr44 - tr48: + goto _again + f31: - m.tolower = append(m.tolower, m.p-m.pb) + if output.qStart { + m.err = fmt.Errorf(err8141QComponentStart, m.p) + (m.p)-- + + m.cs = 193 + goto _again - goto st45 - st45: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof45 - } - stCase45: - switch (m.data)[(m.p)] { - case 33: - goto st44 - case 36: - goto st44 - case 37: - goto st39 - case 61: - goto st44 - case 95: - goto st44 - } - switch { - case (m.data)[(m.p)] < 48: - if 39 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 46 { - goto st44 - } - case (m.data)[(m.p)] > 59: - switch { - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st44 - } - case (m.data)[(m.p)] >= 64: - goto st44 - } - default: - goto st44 } - goto tr44 - tr8: + output.qStart = true m.pb = m.p - goto st41 - st41: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof41 - } - stCase41: - switch (m.data)[(m.p)] { - case 45: - goto st7 - case 58: - goto tr10 - case 82: - goto st42 - case 114: - goto st42 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st7 - } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st7 - } - default: - goto st7 - } - goto tr6 - st42: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof42 - } - stCase42: - switch (m.data)[(m.p)] { - case 45: - goto st8 - case 58: - goto tr10 - case 78: - goto st43 - case 110: - goto st43 - } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st8 - } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st8 - } - default: - goto st8 - } - goto tr50 - st43: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof43 + goto _again + f51: + + if output.qStart { + m.err = fmt.Errorf(err8141QComponentStart, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } - stCase43: - if (m.data)[(m.p)] == 45 { - goto st9 + output.qStart = true + + output.qComponent = string(m.text()) + + goto _again + f46: + + m.err = fmt.Errorf(err8141MalformedRComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + m.err = fmt.Errorf(err8141MalformedQComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + f29: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } - switch { - case (m.data)[(m.p)] < 65: - if 48 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 57 { - goto st9 - } - case (m.data)[(m.p)] > 90: - if 97 <= (m.data)[(m.p)] && (m.data)[(m.p)] <= 122 { - goto st9 - } - default: - goto st9 + + m.err = fmt.Errorf(err8141MalformedRComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + m.err = fmt.Errorf(err8141MalformedQComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + goto _again + + _again: + switch _toStateActions[m.cs] { + case 33: + + (m.p)-- + + m.err = fmt.Errorf(err8141InformalID, m.p) + m.cs = 193 + goto _again } - goto tr52 - st46: - if (m.p)++; (m.p) == (m.pe) { - goto _testEof46 + + if m.cs == 0 { + goto _out } - stCase46: - switch (m.data)[(m.p)] { - case 10: - goto st0 - case 13: - goto st0 + if (m.p)++; (m.p) != (m.pe) { + goto _resume } - goto st46 - stOut: - _testEof2: - m.cs = 2 - goto _testEof - _testEof3: - m.cs = 3 - goto _testEof - _testEof4: - m.cs = 4 - goto _testEof - _testEof5: - m.cs = 5 - goto _testEof - _testEof6: - m.cs = 6 - goto _testEof - _testEof7: - m.cs = 7 - goto _testEof - _testEof8: - m.cs = 8 - goto _testEof - _testEof9: - m.cs = 9 - goto _testEof - _testEof10: - m.cs = 10 - goto _testEof - _testEof11: - m.cs = 11 - goto _testEof - _testEof12: - m.cs = 12 - goto _testEof - _testEof13: - m.cs = 13 - goto _testEof - _testEof14: - m.cs = 14 - goto _testEof - _testEof15: - m.cs = 15 - goto _testEof - _testEof16: - m.cs = 16 - goto _testEof - _testEof17: - m.cs = 17 - goto _testEof - _testEof18: - m.cs = 18 - goto _testEof - _testEof19: - m.cs = 19 - goto _testEof - _testEof20: - m.cs = 20 - goto _testEof - _testEof21: - m.cs = 21 - goto _testEof - _testEof22: - m.cs = 22 - goto _testEof - _testEof23: - m.cs = 23 - goto _testEof - _testEof24: - m.cs = 24 - goto _testEof - _testEof25: - m.cs = 25 - goto _testEof - _testEof26: - m.cs = 26 - goto _testEof - _testEof27: - m.cs = 27 - goto _testEof - _testEof28: - m.cs = 28 - goto _testEof - _testEof29: - m.cs = 29 - goto _testEof - _testEof30: - m.cs = 30 - goto _testEof - _testEof31: - m.cs = 31 - goto _testEof - _testEof32: - m.cs = 32 - goto _testEof - _testEof33: - m.cs = 33 - goto _testEof - _testEof34: - m.cs = 34 - goto _testEof - _testEof35: - m.cs = 35 - goto _testEof - _testEof36: - m.cs = 36 - goto _testEof - _testEof37: - m.cs = 37 - goto _testEof - _testEof38: - m.cs = 38 - goto _testEof - _testEof44: - m.cs = 44 - goto _testEof - _testEof39: - m.cs = 39 - goto _testEof - _testEof40: - m.cs = 40 - goto _testEof - _testEof45: - m.cs = 45 - goto _testEof - _testEof41: - m.cs = 41 - goto _testEof - _testEof42: - m.cs = 42 - goto _testEof - _testEof43: - m.cs = 43 - goto _testEof - _testEof46: - m.cs = 46 - goto _testEof - _testEof: { } if (m.p) == (m.eof) { - switch m.cs { - case 44, 45: + switch _eofActions[m.cs] { + case 1: - raw := m.text() - output.SS = string(raw) - // Iterate upper letters lowering them - for _, i := range m.tolower { - raw[i] = raw[i] + 32 - } - output.norm = string(raw) + m.err = fmt.Errorf(errPrefix, m.p) + (m.p)-- - case 1, 2, 4: + m.cs = 193 + goto _again - m.err = fmt.Errorf(errParse, m.p) + case 6: + + m.err = fmt.Errorf(errIdentifier, m.p) (m.p)-- - { - goto st46 - } + m.cs = 193 + goto _again - case 3: + case 8: - m.err = fmt.Errorf(errPrefix, m.p) + m.err = fmt.Errorf(errSpecificString, m.p) (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 24: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } - m.err = fmt.Errorf(errParse, m.p) + case 12: + + m.err = fmt.Errorf(errSCIMNamespace, m.p) (m.p)-- - { - goto st46 - } + m.cs = 193 + goto _again - case 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41: + case 14: - m.err = fmt.Errorf(errIdentifier, m.p) + m.err = fmt.Errorf(errSCIMType, m.p) (m.p)-- - { - goto st46 - } + m.cs = 193 + goto _again + + case 16: - m.err = fmt.Errorf(errParse, m.p) + m.err = fmt.Errorf(errSCIMName, m.p) (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 18: + + if m.p == m.pe { + m.err = fmt.Errorf(errSCIMOtherIncomplete, m.p-1) + } else { + m.err = fmt.Errorf(errSCIMOther, m.p) } + (m.p)-- - case 38: + m.cs = 193 + goto _again - m.err = fmt.Errorf(errSpecificString, m.p) + case 23: + + m.err = fmt.Errorf(err8141SpecificString, m.p) (m.p)-- - { - goto st46 - } + m.cs = 193 + goto _again + + case 22: - m.err = fmt.Errorf(errParse, m.p) + m.err = fmt.Errorf(err8141Identifier, m.p) (m.p)-- - { - goto st46 - } + m.cs = 193 + goto _again - case 42: + case 26: - m.err = fmt.Errorf(errPrefix, m.p) + m.err = fmt.Errorf(err8141MalformedRComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + case 31: + + m.err = fmt.Errorf(err8141MalformedQComp, m.p) (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 34: + + output.SS = string(m.text()) + // Iterate upper letters lowering them + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] + 32 + } + output.norm = string(m.text()) + // Revert the buffer to the original + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] - 32 + } + + output.kind = RFC2141 + + case 38: + + output.SS = string(m.text()) + // Iterate upper letters lowering them + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] + 32 + } + output.norm = string(m.text()) + // Revert the buffer to the original + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] - 32 } + output.kind = RFC8141 + + case 4: + m.err = fmt.Errorf(errIdentifier, m.p) (m.p)-- - { - goto st46 - } + m.cs = 193 + goto _again - m.err = fmt.Errorf(errParse, m.p) + m.err = fmt.Errorf(errPrefix, m.p) (m.p)-- - { - goto st46 - } + m.cs = 193 + goto _again - case 43: + case 11: + + m.err = fmt.Errorf(errIdentifier, m.p) + (m.p)-- + + m.cs = 193 + goto _again m.err = fmt.Errorf(errNoUrnWithinID, m.p) (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 9: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } - m.err = fmt.Errorf(errIdentifier, m.p) + m.err = fmt.Errorf(errSpecificString, m.p) (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 20: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } - m.err = fmt.Errorf(errParse, m.p) + if m.p == m.pe { + m.err = fmt.Errorf(errSCIMOtherIncomplete, m.p-1) + } else { + m.err = fmt.Errorf(errSCIMOther, m.p) + } (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 25: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } - case 39, 40: + m.err = fmt.Errorf(err8141SpecificString, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + case 28: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } - m.err = fmt.Errorf(errHex, m.p) + m.err = fmt.Errorf(err8141MalformedRComp, m.p) (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 29: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } - m.err = fmt.Errorf(errSpecificString, m.p) + m.err = fmt.Errorf(err8141MalformedQComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + case 21: + + m.err = fmt.Errorf(err8141Identifier, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + m.err = fmt.Errorf(errPrefix, m.p) (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 42: + + output.rComponent = string(m.text()) + + output.kind = RFC8141 + + case 48: + + output.qComponent = string(m.text()) + + output.kind = RFC8141 + + case 41: + + output.fComponent = string(m.text()) + + output.kind = RFC8141 + + case 40: + + m.pb = m.p + + output.fComponent = string(m.text()) + + output.kind = RFC8141 + + case 30: + + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } - m.err = fmt.Errorf(errParse, m.p) + m.err = fmt.Errorf(err8141MalformedRComp, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + m.err = fmt.Errorf(err8141MalformedQComp, m.p) (m.p)-- - { - goto st46 + m.cs = 193 + goto _again + + case 35: + + output.scim.Name = string(m.data[output.scim.pos:m.p]) + + output.SS = string(m.text()) + // Iterate upper letters lowering them + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] + 32 + } + output.norm = string(m.text()) + // Revert the buffer to the original + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] - 32 + } + + output.kind = RFC7643 + + case 37: + + output.scim.Other = string(m.data[output.scim.pos:m.p]) + + output.SS = string(m.text()) + // Iterate upper letters lowering them + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] + 32 + } + output.norm = string(m.text()) + // Revert the buffer to the original + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] - 32 + } + + output.kind = RFC7643 + + case 44: + + if output.rStart { + m.err = fmt.Errorf(err8141RComponentStart, m.p) + (m.p)-- + + m.cs = 193 + goto _again + + } + output.rStart = true + + output.rComponent = string(m.text()) + + output.kind = RFC8141 + + case 50: + + if output.qStart { + m.err = fmt.Errorf(err8141QComponentStart, m.p) + (m.p)-- + + m.cs = 193 + goto _again + } + output.qStart = true + + output.qComponent = string(m.text()) + + output.kind = RFC8141 } } @@ -1686,3 +5031,16 @@ func (m *machine) Parse(input []byte) (*URN, error) { return output, nil } + +func (m *machine) WithParsingMode(x ParsingMode) { + m.parsingMode = x + switch m.parsingMode { + case RFC2141Only: + m.startParsingAt = enMain + case RFC8141Only: + m.startParsingAt = enRfc8141Only + case RFC7643Only: + m.startParsingAt = enScimOnly + } + m.parsingModeSet = true +} diff --git a/vendor/github.com/leodido/go-urn/machine.go.rl b/vendor/github.com/leodido/go-urn/machine.go.rl index 3bc05a65..0a174219 100644 --- a/vendor/github.com/leodido/go-urn/machine.go.rl +++ b/vendor/github.com/leodido/go-urn/machine.go.rl @@ -2,15 +2,28 @@ package urn import ( "fmt" + + scimschema "github.com/leodido/go-urn/scim/schema" ) var ( - errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]" - errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its start) [col %d]" - errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]" - errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]" - errHex = "expecting the specific string hex chars to be well-formed (%%alnum{2}) [col %d]" - errParse = "parsing error [col %d]" + errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]" + errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its beginning) [col %d]" + errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]" + errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]" + errHex = "expecting the percent encoded chars to be well-formed (%%alnum{2}) [col %d]" + errSCIMNamespace = "expecing the SCIM namespace identifier (ietf:params:scim) [col %d]" + errSCIMType = "expecting a correct SCIM type (schemas, api, param) [col %d]" + errSCIMName = "expecting one or more alnum char in the SCIM name part [col %d]" + errSCIMOther = "expecting a well-formed other SCIM part [col %d]" + errSCIMOtherIncomplete = "expecting a not empty SCIM other part after colon [col %d]" + err8141InformalID = "informal URN namespace must be in the form urn-[1-9][0-9] [col %d]" + err8141SpecificString = "expecting the specific string to contain alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] not in first position) chars [col %d]" + err8141Identifier = "expecting the indentifier to be a string with (length 2 to 32 chars) containing alnum (or dashes) not starting or ending with a dash [col %d]" + err8141RComponentStart = "expecting only one r-component (starting with the ?+ sequence) [col %d]" + err8141QComponentStart = "expecting only one q-component (starting with the ?= sequence) [col %d]" + err8141MalformedRComp = "expecting a non-empty r-component containing alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] but not at its beginning) [col %d]" + err8141MalformedQComp = "expecting a non-empty q-component containing alnum, hex, or others ([~&()+,-.:=@;$_!*'] or [/?] but not at its beginning) [col %d]" ) %%{ @@ -24,25 +37,42 @@ action mark { } action tolower { - m.tolower = append(m.tolower, m.p - m.pb) + // List of positions in the buffer to later lowercase + output.tolower = append(output.tolower, m.p - m.pb) } action set_pre { output.prefix = string(m.text()) } +action throw_pre_urn_err { + if m.parsingMode != RFC8141Only { + // Throw an error when: + // - we are entering here matching the the prefix in the namespace identifier part + // - looking ahead (3 chars) we find a colon + if pos := m.p + 3; pos < m.pe && m.data[pos] == 58 && output.prefix != "" { + m.err = fmt.Errorf(errNoUrnWithinID, pos) + fhold; + fgoto fail; + } + } +} + action set_nid { output.ID = string(m.text()) } action set_nss { - raw := m.text() - output.SS = string(raw) + output.SS = string(m.text()) // Iterate upper letters lowering them - for _, i := range m.tolower { - raw[i] = raw[i] + 32 + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] + 32 + } + output.norm = string(m.text()) + // Revert the buffer to the original + for _, i := range output.tolower { + m.data[m.pb+i] = m.data[m.pb+i] - 32 } - output.norm = string(raw) } action err_pre { @@ -70,30 +100,200 @@ action err_urn { } action err_hex { - m.err = fmt.Errorf(errHex, m.p) + if m.parsingMode == RFC2141Only || m.parsingMode == RFC8141Only { + m.err = fmt.Errorf(errHex, m.p) + fhold; + fgoto fail; + } +} + +action base_type { + output.kind = RFC2141; +} + +pre = ([uU] @err(err_pre) [rR] @err(err_pre) [nN] @err(err_pre)) >mark >throw_pre_urn_err %set_pre; + +nid = (alnum >mark (alnum | '-'){0,31}) $err(err_nid) %set_nid; + +hex = '%' (digit | lower | upper >tolower){2} $err(err_hex); + +sss = (alnum | [()+,\-.:=@;$_!*']); + +nss = (sss | hex)+ $err(err_nss); + +nid_not_urn = (nid - pre %err(err_urn)); + +urn = pre ':' @err(err_pre) (nid_not_urn ':' nss >mark %set_nss) %eof(base_type); + +### SCIM BEG + +action err_scim_nid { + m.err = fmt.Errorf(errSCIMNamespace, m.p) fhold; fgoto fail; } -action err_parse { - m.err = fmt.Errorf(errParse, m.p) +action err_scim_type { + m.err = fmt.Errorf(errSCIMType, m.p) fhold; fgoto fail; } -pre = ([uU][rR][nN] @err(err_pre)) >mark %set_pre; +action err_scim_name { + m.err = fmt.Errorf(errSCIMName, m.p) + fhold; + fgoto fail; +} -nid = (alnum >mark (alnum | '-'){0,31}) %set_nid; +action err_scim_other { + if m.p == m.pe { + m.err = fmt.Errorf(errSCIMOtherIncomplete, m.p-1) + } else { + m.err = fmt.Errorf(errSCIMOther, m.p) + } + fhold; + fgoto fail; +} -hex = '%' (digit | lower | upper >tolower){2} $err(err_hex); +action scim_type { + output.kind = RFC7643; +} -sss = (alnum | [()+,\-.:=@;$_!*']); +action create_scim { + output.scim = &SCIM{}; +} -nss = (sss | hex)+ $err(err_nss); +action set_scim_type { + output.scim.Type = scimschema.TypeFromString(string(m.text())) +} + +action mark_scim_name { + output.scim.pos = m.p +} + +action set_scim_name { + output.scim.Name = string(m.data[output.scim.pos:m.p]) +} + +action mark_scim_other { + output.scim.pos = m.p +} + +action set_scim_other { + output.scim.Other = string(m.data[output.scim.pos:m.p]) +} + +scim_nid = 'ietf:params:scim' >mark %set_nid %create_scim $err(err_scim_nid); + +scim_other = ':' (sss | hex)+ >mark_scim_other %set_scim_other $err(err_scim_other); + +scim_name = (alnum)+ >mark_scim_name %set_scim_name $err(err_scim_name); + +scim_type = ('schemas' | 'api' | 'param') >mark %set_scim_type $err(err_scim_type); + +scim_only := pre ':' @err(err_pre) (scim_nid ':' scim_type ':' scim_name scim_other? %set_nss) %eof(scim_type); + +### SCIM END + +### 8141 BEG + +action err_nss_8141 { + m.err = fmt.Errorf(err8141SpecificString, m.p) + fhold; + fgoto fail; +} + +action err_nid_8141 { + m.err = fmt.Errorf(err8141Identifier, m.p) + fhold; + fgoto fail; +} + +action rfc8141_type { + output.kind = RFC8141; +} + +action set_r_component { + output.rComponent = string(m.text()) +} + +action set_q_component { + output.qComponent = string(m.text()) +} + +action set_f_component { + output.fComponent = string(m.text()) +} + +action informal_nid_match { + fhold; + m.err = fmt.Errorf(err8141InformalID, m.p); + fgoto fail; +} + +action mark_r_start { + if output.rStart { + m.err = fmt.Errorf(err8141RComponentStart, m.p) + fhold; + fgoto fail; + } + output.rStart = true +} + +action mark_q_start { + if output.qStart { + m.err = fmt.Errorf(err8141QComponentStart, m.p) + fhold; + fgoto fail; + } + output.qStart = true +} + +action err_malformed_r_component { + m.err = fmt.Errorf(err8141MalformedRComp, m.p) + fhold; + fgoto fail; +} + +action err_malformed_q_component { + m.err = fmt.Errorf(err8141MalformedQComp, m.p) + fhold; + fgoto fail; +} + +pchar = (sss | '~' | '&' | hex); + +component = pchar (pchar | '/' | '?')*; + +r_start = ('?+') %mark_r_start; + +r_component = r_start <: (r_start | component)+ $err(err_malformed_r_component) >mark %set_r_component; + +q_start = ('?=') %mark_q_start; + +q_component = q_start <: (q_start | component)+ $err(err_malformed_q_component) >mark %set_q_component; + +rq_components = (r_component :>> q_component? | q_component); + +fragment = (pchar | '/' | '?')*; + +f_component = '#' fragment >mark %set_f_component; + +nss_rfc8141 = (pchar >mark (pchar | '/')*) $err(err_nss_8141) %set_nss; + +nid_rfc8141 = (alnum >mark (alnum | '-'){0,30} alnum) $err(err_nid_8141) %set_nid; + +informal_id = pre ('-' [a-zA-z0] %to(informal_nid_match)); + +nid_rfc8141_not_urn = (nid_rfc8141 - informal_id?); + +rfc8141_only := pre ':' @err(err_pre) nid_rfc8141_not_urn ':' nss_rfc8141 rq_components? f_component? %eof(rfc8141_type); + +### 8141 END fail := (any - [\n\r])* @err{ fgoto main; }; -main := (pre ':' (nid - pre %err(err_urn)) $err(err_nid) ':' nss >mark %set_nss) $err(err_parse); +main := urn; }%% @@ -103,6 +303,7 @@ main := (pre ':' (nid - pre %err(err_urn)) $err(err_nid) ':' nss >mark %set_nss) type Machine interface { Error() error Parse(input []byte) (*URN, error) + WithParsingMode(ParsingMode) } type machine struct { @@ -110,12 +311,24 @@ type machine struct { cs int p, pe, eof, pb int err error - tolower []int + startParsingAt int + parsingMode ParsingMode + parsingModeSet bool } // NewMachine creates a new FSM able to parse RFC 2141 strings. -func NewMachine() Machine { - m := &machine{} +func NewMachine(options ...Option) Machine { + m := &machine{ + parsingModeSet: false, + } + + for _, o := range options { + o(m) + } + // Set default parsing mode + if !m.parsingModeSet { + m.WithParsingMode(DefaultParsingMode) + } %% access m.; %% variable p m.p; @@ -137,7 +350,7 @@ func (m *machine) text() []byte { return m.data[m.pb:m.p] } -// Parse parses the input byte array as a RFC 2141 string. +// Parse parses the input byte array as a RFC 2141 or RFC7643 string. func (m *machine) Parse(input []byte) (*URN, error) { m.data = input m.p = 0 @@ -145,10 +358,11 @@ func (m *machine) Parse(input []byte) (*URN, error) { m.pe = len(input) m.eof = len(input) m.err = nil - m.tolower = []int{} - output := &URN{} + m.cs = m.startParsingAt + output := &URN{ + tolower: []int{}, + } - %% write init; %% write exec; if m.cs < first_final || m.cs == en_fail { @@ -157,3 +371,16 @@ func (m *machine) Parse(input []byte) (*URN, error) { return output, nil } + +func (m *machine) WithParsingMode(x ParsingMode) { + m.parsingMode = x + switch m.parsingMode { + case RFC2141Only: + m.startParsingAt = en_main + case RFC8141Only: + m.startParsingAt = en_rfc8141_only + case RFC7643Only: + m.startParsingAt = en_scim_only + } + m.parsingModeSet = true +} \ No newline at end of file diff --git a/vendor/github.com/leodido/go-urn/makefile b/vendor/github.com/leodido/go-urn/makefile index df87cdc6..68d5dd0f 100644 --- a/vendor/github.com/leodido/go-urn/makefile +++ b/vendor/github.com/leodido/go-urn/makefile @@ -15,18 +15,24 @@ clean: .PHONY: images images: docs/urn.png +.PHONY: snake2camel +snake2camel: + @cd ./tools/snake2camel; go build -o ../../snake2camel . + .PHONY: removecomments removecomments: @cd ./tools/removecomments; go build -o ../../removecomments . machine.go: machine.go.rl +machine.go: snake2camel + machine.go: removecomments machine.go: - $(RAGEL) -Z -G2 -e -o $@ $< + $(RAGEL) -Z -G1 -e -o $@ $< @./removecomments $@ - $(MAKE) -s file=$@ snake2camel + @./snake2camel $@ $(GOFMT) $@ docs/urn.dot: machine.go.rl @@ -41,13 +47,5 @@ bench: *_test.go machine.go go test -bench=. -benchmem -benchtime=5s ./... .PHONY: tests -tests: *_test.go +tests: *_test.go $(GO_TEST) ./... - -.PHONY: snake2camel -snake2camel: - @awk -i inplace '{ \ - while ( match($$0, /(.*)([a-z]+[0-9]*)_([a-zA-Z0-9])(.*)/, cap) ) \ - $$0 = cap[1] cap[2] toupper(cap[3]) cap[4]; \ - print \ - }' $(file) diff --git a/vendor/github.com/leodido/go-urn/options.go b/vendor/github.com/leodido/go-urn/options.go new file mode 100644 index 00000000..c543835a --- /dev/null +++ b/vendor/github.com/leodido/go-urn/options.go @@ -0,0 +1,9 @@ +package urn + +type Option func(Machine) + +func WithParsingMode(mode ParsingMode) Option { + return func(m Machine) { + m.WithParsingMode(mode) + } +} diff --git a/vendor/github.com/leodido/go-urn/parsing_mode.go b/vendor/github.com/leodido/go-urn/parsing_mode.go new file mode 100644 index 00000000..fce5aadc --- /dev/null +++ b/vendor/github.com/leodido/go-urn/parsing_mode.go @@ -0,0 +1,12 @@ +package urn + +type ParsingMode int + +const ( + Default ParsingMode = iota + RFC2141Only + RFC7643Only + RFC8141Only +) + +const DefaultParsingMode = RFC2141Only diff --git a/vendor/github.com/leodido/go-urn/scim.go b/vendor/github.com/leodido/go-urn/scim.go new file mode 100644 index 00000000..f6b7aefb --- /dev/null +++ b/vendor/github.com/leodido/go-urn/scim.go @@ -0,0 +1,48 @@ +package urn + +import ( + "encoding/json" + "fmt" + + scimschema "github.com/leodido/go-urn/scim/schema" +) + +const errInvalidSCIMURN = "invalid SCIM URN: %s" + +type SCIM struct { + Type scimschema.Type + Name string + Other string + pos int +} + +func (s SCIM) MarshalJSON() ([]byte, error) { + return json.Marshal(s.String()) +} + +func (s *SCIM) UnmarshalJSON(bytes []byte) error { + var str string + if err := json.Unmarshal(bytes, &str); err != nil { + return err + } + // Parse as SCIM + value, ok := Parse([]byte(str), WithParsingMode(RFC7643Only)) + if !ok { + return fmt.Errorf(errInvalidSCIMURN, str) + } + if value.RFC() != RFC7643 { + return fmt.Errorf(errInvalidSCIMURN, str) + } + *s = *value.SCIM() + + return nil +} + +func (s *SCIM) String() string { + ret := fmt.Sprintf("urn:ietf:params:scim:%s:%s", s.Type.String(), s.Name) + if s.Other != "" { + ret += fmt.Sprintf(":%s", s.Other) + } + + return ret +} diff --git a/vendor/github.com/leodido/go-urn/scim/schema/type.go b/vendor/github.com/leodido/go-urn/scim/schema/type.go new file mode 100644 index 00000000..13491823 --- /dev/null +++ b/vendor/github.com/leodido/go-urn/scim/schema/type.go @@ -0,0 +1,36 @@ +package scimschema + +type Type int + +const ( + Unsupported Type = iota + Schemas + API + Param +) + +func (t Type) String() string { + switch t { + case Schemas: + return "schemas" + case API: + return "api" + case Param: + return "param" + } + + return "" +} + +func TypeFromString(input string) Type { + switch input { + case "schemas": + return Schemas + case "api": + return API + case "param": + return Param + } + + return Unsupported +} diff --git a/vendor/github.com/leodido/go-urn/urn.go b/vendor/github.com/leodido/go-urn/urn.go index d51a6c91..894d6258 100644 --- a/vendor/github.com/leodido/go-urn/urn.go +++ b/vendor/github.com/leodido/go-urn/urn.go @@ -16,10 +16,18 @@ const errInvalidURN = "invalid URN: %s" // // Details at https://tools.ietf.org/html/rfc2141. type URN struct { - prefix string // Static prefix. Equal to "urn" when empty. - ID string // Namespace identifier - SS string // Namespace specific string - norm string // Normalized namespace specific string + prefix string // Static prefix. Equal to "urn" when empty. + ID string // Namespace identifier (NID) + SS string // Namespace specific string (NSS) + norm string // Normalized namespace specific string + kind Kind + scim *SCIM + rComponent string // RFC8141 + qComponent string // RFC8141 + fComponent string // RFC8141 + rStart bool // RFC8141 + qStart bool // RFC8141 + tolower []int } // Normalize turns the receiving URN into its norm version. @@ -30,12 +38,21 @@ func (u *URN) Normalize() *URN { prefix: "urn", ID: strings.ToLower(u.ID), SS: u.norm, + // rComponent: u.rComponent, + // qComponent: u.qComponent, + // fComponent: u.fComponent, } } // Equal checks the lexical equivalence of the current URN with another one. func (u *URN) Equal(x *URN) bool { - return *u.Normalize() == *x.Normalize() + if x == nil { + return false + } + nu := u.Normalize() + nx := x.Normalize() + + return nu.prefix == nx.prefix && nu.ID == nx.ID && nu.SS == nx.SS } // String reassembles the URN into a valid URN string. @@ -51,14 +68,23 @@ func (u *URN) String() string { res += "urn" } res += u.prefix + ":" + u.ID + ":" + u.SS + if u.rComponent != "" { + res += "?+" + u.rComponent + } + if u.qComponent != "" { + res += "?=" + u.qComponent + } + if u.fComponent != "" { + res += "#" + u.fComponent + } } return res } -// Parse is responsible to create an URN instance from a byte array matching the correct URN syntax. -func Parse(u []byte) (*URN, bool) { - urn, err := NewMachine().Parse(u) +// Parse is responsible to create an URN instance from a byte array matching the correct URN syntax (RFC 2141). +func Parse(u []byte, options ...Option) (*URN, bool) { + urn, err := NewMachine(options...).Parse(u) if err != nil { return nil, false } @@ -71,7 +97,7 @@ func (u URN) MarshalJSON() ([]byte, error) { return json.Marshal(u.String()) } -// MarshalJSON unmarshals a URN from JSON string form (e.g. `"urn:oid:1.2.3.4"`). +// UnmarshalJSON unmarshals a URN from JSON string form (e.g. `"urn:oid:1.2.3.4"`). func (u *URN) UnmarshalJSON(bytes []byte) error { var str string if err := json.Unmarshal(bytes, &str); err != nil { @@ -82,5 +108,34 @@ func (u *URN) UnmarshalJSON(bytes []byte) error { } else { *u = *value } + return nil -} \ No newline at end of file +} + +func (u *URN) IsSCIM() bool { + return u.kind == RFC7643 +} + +func (u *URN) SCIM() *SCIM { + if u.kind != RFC7643 { + return nil + } + + return u.scim +} + +func (u *URN) RFC() Kind { + return u.kind +} + +func (u *URN) FComponent() string { + return u.fComponent +} + +func (u *URN) QComponent() string { + return u.qComponent +} + +func (u *URN) RComponent() string { + return u.rComponent +} diff --git a/vendor/github.com/leodido/go-urn/urn8141.go b/vendor/github.com/leodido/go-urn/urn8141.go new file mode 100644 index 00000000..da4dd062 --- /dev/null +++ b/vendor/github.com/leodido/go-urn/urn8141.go @@ -0,0 +1,30 @@ +package urn + +import ( + "encoding/json" + "fmt" +) + +const errInvalidURN8141 = "invalid URN per RFC 8141: %s" + +type URN8141 struct { + *URN +} + +func (u URN8141) MarshalJSON() ([]byte, error) { + return json.Marshal(u.String()) +} + +func (u *URN8141) UnmarshalJSON(bytes []byte) error { + var str string + if err := json.Unmarshal(bytes, &str); err != nil { + return err + } + if value, ok := Parse([]byte(str), WithParsingMode(RFC8141Only)); !ok { + return fmt.Errorf(errInvalidURN8141, str) + } else { + *u = URN8141{value} + } + + return nil +} diff --git a/vendor/github.com/mattn/go-isatty/isatty_bsd.go b/vendor/github.com/mattn/go-isatty/isatty_bsd.go index d569c0c9..d0ea68f4 100644 --- a/vendor/github.com/mattn/go-isatty/isatty_bsd.go +++ b/vendor/github.com/mattn/go-isatty/isatty_bsd.go @@ -1,6 +1,7 @@ -//go:build (darwin || freebsd || openbsd || netbsd || dragonfly || hurd) && !appengine +//go:build (darwin || freebsd || openbsd || netbsd || dragonfly || hurd) && !appengine && !tinygo // +build darwin freebsd openbsd netbsd dragonfly hurd // +build !appengine +// +build !tinygo package isatty diff --git a/vendor/github.com/mattn/go-isatty/isatty_others.go b/vendor/github.com/mattn/go-isatty/isatty_others.go index 31503226..7402e061 100644 --- a/vendor/github.com/mattn/go-isatty/isatty_others.go +++ b/vendor/github.com/mattn/go-isatty/isatty_others.go @@ -1,5 +1,6 @@ -//go:build appengine || js || nacl || wasm -// +build appengine js nacl wasm +//go:build (appengine || js || nacl || tinygo || wasm) && !windows +// +build appengine js nacl tinygo wasm +// +build !windows package isatty diff --git a/vendor/github.com/mattn/go-isatty/isatty_tcgets.go b/vendor/github.com/mattn/go-isatty/isatty_tcgets.go index 67787657..0337d8cf 100644 --- a/vendor/github.com/mattn/go-isatty/isatty_tcgets.go +++ b/vendor/github.com/mattn/go-isatty/isatty_tcgets.go @@ -1,6 +1,7 @@ -//go:build (linux || aix || zos) && !appengine +//go:build (linux || aix || zos) && !appengine && !tinygo // +build linux aix zos // +build !appengine +// +build !tinygo package isatty diff --git a/vendor/github.com/mazznoer/csscolorparser/.gitignore b/vendor/github.com/mazznoer/csscolorparser/.gitignore new file mode 100644 index 00000000..1aaddb39 --- /dev/null +++ b/vendor/github.com/mazznoer/csscolorparser/.gitignore @@ -0,0 +1,16 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out +*.html + +# Dependency directories (remove the comment below to include it) +# vendor/ diff --git a/vendor/github.com/mazznoer/csscolorparser/CHANGELOG.md b/vendor/github.com/mazznoer/csscolorparser/CHANGELOG.md new file mode 100644 index 00000000..8266a354 --- /dev/null +++ b/vendor/github.com/mazznoer/csscolorparser/CHANGELOG.md @@ -0,0 +1,19 @@ +# Changelog + +## [Unreleased](https://github.com/mazznoer/csscolorparser/compare/v0.1.4...HEAD) + +### Added + +- `Clamp()` + +## v0.1.4 + +### Added + +- Support parsing `oklab()` and `oklch()` color format. +- `FromOklab()`, `FromOklch()`, `FromLinearRgb()`, `FromHsl()`, `FromHsv()`, `FromHwb()` + +### Fixed + +- Update `oklab` formula. + diff --git a/vendor/github.com/mazznoer/csscolorparser/LICENSE b/vendor/github.com/mazznoer/csscolorparser/LICENSE new file mode 100644 index 00000000..1458f1b5 --- /dev/null +++ b/vendor/github.com/mazznoer/csscolorparser/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Nor Khasyatillah + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/mazznoer/csscolorparser/Makefile b/vendor/github.com/mazznoer/csscolorparser/Makefile new file mode 100644 index 00000000..410deb8e --- /dev/null +++ b/vendor/github.com/mazznoer/csscolorparser/Makefile @@ -0,0 +1,11 @@ +SHELL := /bin/bash + +.PHONY: all check test + +all: check test + +check: + go build && go vet && gofmt -s -l . + +test: + go test -v -coverprofile coverage.out && go tool cover -html coverage.out -o coverage.html diff --git a/vendor/github.com/mazznoer/csscolorparser/README.md b/vendor/github.com/mazznoer/csscolorparser/README.md new file mode 100644 index 00000000..9ca797c4 --- /dev/null +++ b/vendor/github.com/mazznoer/csscolorparser/README.md @@ -0,0 +1,78 @@ +# Go Language CSS Color Parser Library + +[![PkgGoDev](https://pkg.go.dev/badge/github.com/mazznoer/csscolorparser)](https://pkg.go.dev/github.com/mazznoer/csscolorparser) +![Build Status](https://github.com/mazznoer/csscolorparser/actions/workflows/go.yml/badge.svg) +[![go report](https://goreportcard.com/badge/github.com/mazznoer/csscolorparser)](https://goreportcard.com/report/github.com/mazznoer/csscolorparser) + +[Go](https://www.golang.org/) library for parsing CSS color string as defined in the W3C's [CSS Color Module Level 4](https://www.w3.org/TR/css-color-4/). + +## Supported Color Format + +* [Named colors](https://www.w3.org/TR/css-color-4/#named-colors) +* RGB hexadecimal (with and without `#` prefix) + * Short format `#rgb` + * Short format with alpha `#rgba` + * Long format `#rrggbb` + * Long format with alpha `#rrggbbaa` +* `rgb()` and `rgba()` +* `hsl()` and `hsla()` +* `hwb()` +* `oklab()` +* `oklch()` +* `hwba()`, `hsv()`, `hsva()` - not in CSS standard. + +Not yet supported: `lab()`, `lch()`. + +### Example Color Format + +```css +transparent +lime +#0f0 +#0f0f +#00ff00 +#00ff00ff +rgb(0,255,0) +rgb(0% 100% 0%) +rgb(0 255 0 / 100%) +rgba(0,255,0,1) +hsl(120,100%,50%) +hsl(120deg 100% 50%) +hsl(-240 100% 50%) +hsl(-240deg 100% 50%) +hsl(0.3333turn 100% 50%) +hsl(133.333grad 100% 50%) +hsl(2.0944rad 100% 50%) +hsla(120,100%,50%,100%) +hwb(120 0% 0%) +hwb(480deg 0% 0% / 100%) +hsv(120,100%,100%) +hsv(120deg 100% 100% / 100%) +``` + +## Usage Examples + +```go +import "github.com/mazznoer/csscolorparser" + +c, err := csscolorparser.Parse("gold") + +if err != nil { + panic(err) +} + +fmt.Printf("R:%.3f, G:%.3f, B:%.3f, A:%.3f", c.R, c.G, c.B, c.A) // R:1.000, G:0.843, B:0.000, A:1.000 +fmt.Println(c.RGBA255()) // 255 215 0 255 +fmt.Println(c.HexString()) // #ffd700 +fmt.Println(c.RGBString()) // rgb(255,215,0) +``` + +## Try It Online + +* [Playground 1](https://play.golang.org/p/8KMIc1TLQB0) +* [Playground 2](https://play.golang.org/p/7kb62KSARwa) + +## Similar Projects + +* [csscolorparser](https://github.com/mazznoer/csscolorparser-rs) (Rust) +* [csscolorparser](https://github.com/deanm/css-color-parser-js) (Javascript) diff --git a/vendor/github.com/mazznoer/csscolorparser/colorparser.go b/vendor/github.com/mazznoer/csscolorparser/colorparser.go new file mode 100644 index 00000000..6355b0ff --- /dev/null +++ b/vendor/github.com/mazznoer/csscolorparser/colorparser.go @@ -0,0 +1,480 @@ +// Package csscolorparser provides function for parsing CSS color string as defined in the W3C's CSS color module level 4. +package csscolorparser + +import ( + "fmt" + "math" + "strconv" + "strings" +) + +// Inspired by https://github.com/deanm/css-color-parser-js + +// R, G, B, A values in the range 0..1 +type Color struct { + R, G, B, A float64 +} + +// Implement the Go color.Color interface. +func (c Color) RGBA() (r, g, b, a uint32) { + r = uint32(c.R*c.A*65535 + 0.5) + g = uint32(c.G*c.A*65535 + 0.5) + b = uint32(c.B*c.A*65535 + 0.5) + a = uint32(c.A*65535 + 0.5) + return +} + +// RGBA255 returns R, G, B, A values in the range 0..255 +func (c Color) RGBA255() (r, g, b, a uint8) { + r = uint8(c.R*255 + 0.5) + g = uint8(c.G*255 + 0.5) + b = uint8(c.B*255 + 0.5) + a = uint8(c.A*255 + 0.5) + return +} + +func (c Color) Clamp() Color { + return Color{ + R: math.Max(math.Min(c.R, 1), 0), + G: math.Max(math.Min(c.G, 1), 0), + B: math.Max(math.Min(c.B, 1), 0), + A: math.Max(math.Min(c.A, 1), 0), + } +} + +// HexString returns CSS hexadecimal string. +func (c Color) HexString() string { + r, g, b, a := c.RGBA255() + if a < 255 { + return fmt.Sprintf("#%02x%02x%02x%02x", r, g, b, a) + } + return fmt.Sprintf("#%02x%02x%02x", r, g, b) +} + +// RGBString returns CSS RGB string. +func (c Color) RGBString() string { + r, g, b, _ := c.RGBA255() + if c.A < 1 { + return fmt.Sprintf("rgba(%d,%d,%d,%v)", r, g, b, c.A) + } + return fmt.Sprintf("rgb(%d,%d,%d)", r, g, b) +} + +// Name returns name of this color if its available. +func (c Color) Name() (string, bool) { + r, g, b, _ := c.RGBA255() + rgb := [3]uint8{r, g, b} + for k, v := range namedColors { + if v == rgb { + return k, true + } + } + return "", false +} + +// Implement the Go TextUnmarshaler interface +func (c *Color) UnmarshalText(text []byte) error { + col, err := Parse(string(text)) + if err != nil { + return err + } + c.R = col.R + c.G = col.G + c.B = col.B + c.A = col.A + return nil +} + +// Implement the Go TextMarshaler interface +func (c Color) MarshalText() ([]byte, error) { + return []byte(c.HexString()), nil +} + +func FromHsv(h, s, v, a float64) Color { + r, g, b := hsvToRgb(normalizeAngle(h), clamp0_1(s), clamp0_1(v)) + return Color{r, g, b, clamp0_1(a)} +} + +func FromHsl(h, s, l, a float64) Color { + r, g, b := hslToRgb(normalizeAngle(h), clamp0_1(s), clamp0_1(l)) + return Color{r, g, b, clamp0_1(a)} +} + +func FromHwb(h, w, b, a float64) Color { + r, g, b := hwbToRgb(normalizeAngle(h), clamp0_1(w), clamp0_1(b)) + return Color{r, g, b, clamp0_1(a)} +} + +func fromLinear(x float64) float64 { + if x >= 0.0031308 { + return 1.055*math.Pow(x, 1.0/2.4) - 0.055 + } + return 12.92 * x +} + +func FromLinearRGB(r, g, b, a float64) Color { + return Color{fromLinear(r), fromLinear(g), fromLinear(b), clamp0_1(a)} +} + +func FromOklab(l, a, b, alpha float64) Color { + l_ := math.Pow(l+0.3963377774*a+0.2158037573*b, 3) + m_ := math.Pow(l-0.1055613458*a-0.0638541728*b, 3) + s_ := math.Pow(l-0.0894841775*a-1.2914855480*b, 3) + + R := 4.0767416621*l_ - 3.3077115913*m_ + 0.2309699292*s_ + G := -1.2684380046*l_ + 2.6097574011*m_ - 0.3413193965*s_ + B := -0.0041960863*l_ - 0.7034186147*m_ + 1.7076147010*s_ + + return FromLinearRGB(R, G, B, alpha) +} + +func FromOklch(l, c, h, alpha float64) Color { + return FromOklab(l, c*math.Cos(h), c*math.Sin(h), alpha) +} + +var black = Color{0, 0, 0, 1} + +// Parse parses CSS color string and returns, if successful, a Color. +func Parse(s string) (Color, error) { + input := s + s = strings.TrimSpace(strings.ToLower(s)) + + if s == "transparent" { + return Color{0, 0, 0, 0}, nil + } + + // Predefined name / keyword + c, ok := namedColors[s] + if ok { + return Color{float64(c[0]) / 255, float64(c[1]) / 255, float64(c[2]) / 255, 1}, nil + } + + // Hexadecimal + if strings.HasPrefix(s, "#") { + c, ok := parseHex(s[1:]) + if ok { + return c, nil + } + return black, fmt.Errorf("Invalid hex color, %s", input) + } + + op := strings.Index(s, "(") + + if (op != -1) && strings.HasSuffix(s, ")") { + fname := strings.TrimSpace(s[:op]) + alpha := 1.0 + okA := true + s = s[op+1 : len(s)-1] + s = strings.ReplaceAll(s, ",", " ") + s = strings.ReplaceAll(s, "/", " ") + params := strings.Fields(s) + + if fname == "rgb" || fname == "rgba" { + if len(params) != 3 && len(params) != 4 { + return black, fmt.Errorf("%s() format needs 3 or 4 parameters, %s", fname, input) + } + r, okR, _ := parsePercentOr255(params[0]) + g, okG, _ := parsePercentOr255(params[1]) + b, okB, _ := parsePercentOr255(params[2]) + if len(params) == 4 { + alpha, okA, _ = parsePercentOrFloat(params[3]) + } + if okR && okG && okB && okA { + return Color{ + clamp0_1(r), + clamp0_1(g), + clamp0_1(b), + clamp0_1(alpha), + }, nil + } + return black, fmt.Errorf("Wrong %s() components, %s", fname, input) + + } else if fname == "hsl" || fname == "hsla" { + if len(params) != 3 && len(params) != 4 { + return black, fmt.Errorf("%s() format needs 3 or 4 parameters, %s", fname, input) + } + h, okH := parseAngle(params[0]) + s, okS, _ := parsePercentOrFloat(params[1]) + l, okL, _ := parsePercentOrFloat(params[2]) + if len(params) == 4 { + alpha, okA, _ = parsePercentOrFloat(params[3]) + } + if okH && okS && okL && okA { + return FromHsl(h, s, l, alpha), nil + } + return black, fmt.Errorf("Wrong %s() components, %s", fname, input) + + } else if fname == "hwb" || fname == "hwba" { + if len(params) != 3 && len(params) != 4 { + return black, fmt.Errorf("hwb() format needs 3 or 4 parameters, %s", input) + } + H, okH := parseAngle(params[0]) + W, okW, _ := parsePercentOrFloat(params[1]) + B, okB, _ := parsePercentOrFloat(params[2]) + if len(params) == 4 { + alpha, okA, _ = parsePercentOrFloat(params[3]) + } + if okH && okW && okB && okA { + return FromHwb(H, W, B, alpha), nil + } + return black, fmt.Errorf("Wrong hwb() components, %s", input) + + } else if fname == "hsv" || fname == "hsva" { + if len(params) != 3 && len(params) != 4 { + return black, fmt.Errorf("hsv() format needs 3 or 4 parameters, %s", input) + } + h, okH := parseAngle(params[0]) + s, okS, _ := parsePercentOrFloat(params[1]) + v, okV, _ := parsePercentOrFloat(params[2]) + if len(params) == 4 { + alpha, okA, _ = parsePercentOrFloat(params[3]) + } + if okH && okS && okV && okA { + return FromHsv(h, s, v, alpha), nil + } + return black, fmt.Errorf("Wrong hsv() components, %s", input) + } else if fname == "oklab" { + if len(params) != 3 && len(params) != 4 { + return black, fmt.Errorf("oklab() format needs 3 or 4 parameters, %s", input) + } + l, okL, _ := parsePercentOrFloat(params[0]) + a, okA, fmtA := parsePercentOrFloat(params[1]) + b, okB, fmtB := parsePercentOrFloat(params[2]) + okAlpha := true + if len(params) == 4 { + alpha, okAlpha, _ = parsePercentOrFloat(params[3]) + } + if okL && okA && okB && okAlpha { + if fmtA { + a = remap(a, -1.0, 1.0, -0.4, 0.4) + } + if fmtB { + b = remap(b, -1.0, 1.0, -0.4, 0.4) + } + return FromOklab(math.Max(l, 0), a, b, alpha), nil + } + return black, fmt.Errorf("Wrong oklab() components, %s", input) + } else if fname == "oklch" { + if len(params) != 3 && len(params) != 4 { + return black, fmt.Errorf("oklch() format needs 3 or 4 parameters, %s", input) + } + l, okL, _ := parsePercentOrFloat(params[0]) + c, okC, fmtC := parsePercentOrFloat(params[1]) + h, okH := parseAngle(params[2]) + if len(params) == 4 { + alpha, okA, _ = parsePercentOrFloat(params[3]) + } + if okL && okC && okH && okA { + if fmtC { + c = c * 0.4 + } + return FromOklch(math.Max(l, 0), math.Max(c, 0), h*math.Pi/180, alpha), nil + } + return black, fmt.Errorf("Wrong oklch() components, %s", input) + } + } + + // RGB hexadecimal format without '#' prefix + c2, ok2 := parseHex(s) + if ok2 { + return c2, nil + } + + return black, fmt.Errorf("Invalid color format, %s", input) +} + +// https://stackoverflow.com/questions/54197913/parse-hex-string-to-image-color + +func parseHex(s string) (c Color, ok bool) { + c.A = 1 + ok = true + + hexToByte := func(b byte) byte { + switch { + case b >= '0' && b <= '9': + return b - '0' + case b >= 'a' && b <= 'f': + return b - 'a' + 10 + } + ok = false + return 0 + } + + n := len(s) + if n == 6 || n == 8 { + c.R = float64(hexToByte(s[0])<<4+hexToByte(s[1])) / 255 + c.G = float64(hexToByte(s[2])<<4+hexToByte(s[3])) / 255 + c.B = float64(hexToByte(s[4])<<4+hexToByte(s[5])) / 255 + if n == 8 { + c.A = float64(hexToByte(s[6])<<4+hexToByte(s[7])) / 255 + } + } else if n == 3 || n == 4 { + c.R = float64(hexToByte(s[0])*17) / 255 + c.G = float64(hexToByte(s[1])*17) / 255 + c.B = float64(hexToByte(s[2])*17) / 255 + if n == 4 { + c.A = float64(hexToByte(s[3])*17) / 255 + } + } else { + ok = false + } + return +} + +func modulo(x, y float64) float64 { + return math.Mod(math.Mod(x, y)+y, y) +} + +func hueToRgb(n1, n2, h float64) float64 { + h = modulo(h, 6) + if h < 1 { + return n1 + ((n2 - n1) * h) + } + if h < 3 { + return n2 + } + if h < 4 { + return n1 + ((n2 - n1) * (4 - h)) + } + return n1 +} + +// h = 0..360 +// s, l = 0..1 +// r, g, b = 0..1 +func hslToRgb(h, s, l float64) (r, g, b float64) { + if s == 0 { + return l, l, l + } + var n2 float64 + if l < 0.5 { + n2 = l * (1 + s) + } else { + n2 = l + s - (l * s) + } + n1 := 2*l - n2 + h /= 60 + r = clamp0_1(hueToRgb(n1, n2, h+2)) + g = clamp0_1(hueToRgb(n1, n2, h)) + b = clamp0_1(hueToRgb(n1, n2, h-2)) + return +} + +func hwbToRgb(hue, white, black float64) (r, g, b float64) { + if white+black >= 1 { + gray := white / (white + black) + return gray, gray, gray + } + r, g, b = hslToRgb(hue, 1, 0.5) + r = r*(1-white-black) + white + g = g*(1-white-black) + white + b = b*(1-white-black) + white + return +} + +func hsvToHsl(H, S, V float64) (h, s, l float64) { + h = H + s = S + l = (2 - S) * V / 2 + if l != 0 { + if l == 1 { + s = 0 + } else if l < 0.5 { + s = S * V / (l * 2) + } else { + s = S * V / (2 - l*2) + } + } + return +} + +func hsvToRgb(H, S, V float64) (r, g, b float64) { + h, s, l := hsvToHsl(H, S, V) + return hslToRgb(h, s, l) +} + +func clamp0_1(t float64) float64 { + if t < 0 { + return 0 + } + if t > 1 { + return 1 + } + return t +} + +func parseFloat(s string) (float64, bool) { + f, err := strconv.ParseFloat(strings.TrimSpace(s), 64) + return f, err == nil +} + +// Returns (result, ok?, percentage?) +func parsePercentOrFloat(s string) (float64, bool, bool) { + if strings.HasSuffix(s, "%") { + f, ok := parseFloat(s[:len(s)-1]) + if ok { + return f / 100, true, true + } + return 0, false, true + } + f, ok := parseFloat(s) + return f, ok, false +} + +// Returns (result, ok?, percentage?) +func parsePercentOr255(s string) (float64, bool, bool) { + if strings.HasSuffix(s, "%") { + f, ok := parseFloat(s[:len(s)-1]) + if ok { + return f / 100, true, true + } + return 0, false, true + } + f, ok := parseFloat(s) + if ok { + return f / 255, true, false + } + return 0, false, false +} + +// Result angle in degrees (not normalized) +func parseAngle(s string) (float64, bool) { + if strings.HasSuffix(s, "deg") { + return parseFloat(s[:len(s)-3]) + } + if strings.HasSuffix(s, "grad") { + f, ok := parseFloat(s[:len(s)-4]) + if ok { + return f / 400 * 360, true + } + return 0, false + } + if strings.HasSuffix(s, "rad") { + f, ok := parseFloat(s[:len(s)-3]) + if ok { + return f / math.Pi * 180, true + } + return 0, false + } + if strings.HasSuffix(s, "turn") { + f, ok := parseFloat(s[:len(s)-4]) + if ok { + return f * 360, true + } + return 0, false + } + return parseFloat(s) +} + +func normalizeAngle(t float64) float64 { + t = math.Mod(t, 360) + if t < 0 { + t += 360 + } + return t +} + +// Map t which is in range [a, b] to range [c, d] +func remap(t, a, b, c, d float64) float64 { + return (t-a)*((d-c)/(b-a)) + c +} diff --git a/vendor/github.com/mazznoer/csscolorparser/named_colors.go b/vendor/github.com/mazznoer/csscolorparser/named_colors.go new file mode 100644 index 00000000..0a60c322 --- /dev/null +++ b/vendor/github.com/mazznoer/csscolorparser/named_colors.go @@ -0,0 +1,152 @@ +package csscolorparser + +var namedColors = map[string][3]uint8{ + "aliceblue": {240, 248, 255}, + "antiquewhite": {250, 235, 215}, + "aqua": {0, 255, 255}, + "aquamarine": {127, 255, 212}, + "azure": {240, 255, 255}, + "beige": {245, 245, 220}, + "bisque": {255, 228, 196}, + "black": {0, 0, 0}, + "blanchedalmond": {255, 235, 205}, + "blue": {0, 0, 255}, + "blueviolet": {138, 43, 226}, + "brown": {165, 42, 42}, + "burlywood": {222, 184, 135}, + "cadetblue": {95, 158, 160}, + "chartreuse": {127, 255, 0}, + "chocolate": {210, 105, 30}, + "coral": {255, 127, 80}, + "cornflowerblue": {100, 149, 237}, + "cornsilk": {255, 248, 220}, + "crimson": {220, 20, 60}, + "cyan": {0, 255, 255}, + "darkblue": {0, 0, 139}, + "darkcyan": {0, 139, 139}, + "darkgoldenrod": {184, 134, 11}, + "darkgray": {169, 169, 169}, + "darkgreen": {0, 100, 0}, + "darkgrey": {169, 169, 169}, + "darkkhaki": {189, 183, 107}, + "darkmagenta": {139, 0, 139}, + "darkolivegreen": {85, 107, 47}, + "darkorange": {255, 140, 0}, + "darkorchid": {153, 50, 204}, + "darkred": {139, 0, 0}, + "darksalmon": {233, 150, 122}, + "darkseagreen": {143, 188, 143}, + "darkslateblue": {72, 61, 139}, + "darkslategray": {47, 79, 79}, + "darkslategrey": {47, 79, 79}, + "darkturquoise": {0, 206, 209}, + "darkviolet": {148, 0, 211}, + "deeppink": {255, 20, 147}, + "deepskyblue": {0, 191, 255}, + "dimgray": {105, 105, 105}, + "dimgrey": {105, 105, 105}, + "dodgerblue": {30, 144, 255}, + "firebrick": {178, 34, 34}, + "floralwhite": {255, 250, 240}, + "forestgreen": {34, 139, 34}, + "fuchsia": {255, 0, 255}, + "gainsboro": {220, 220, 220}, + "ghostwhite": {248, 248, 255}, + "gold": {255, 215, 0}, + "goldenrod": {218, 165, 32}, + "gray": {128, 128, 128}, + "green": {0, 128, 0}, + "greenyellow": {173, 255, 47}, + "grey": {128, 128, 128}, + "honeydew": {240, 255, 240}, + "hotpink": {255, 105, 180}, + "indianred": {205, 92, 92}, + "indigo": {75, 0, 130}, + "ivory": {255, 255, 240}, + "khaki": {240, 230, 140}, + "lavender": {230, 230, 250}, + "lavenderblush": {255, 240, 245}, + "lawngreen": {124, 252, 0}, + "lemonchiffon": {255, 250, 205}, + "lightblue": {173, 216, 230}, + "lightcoral": {240, 128, 128}, + "lightcyan": {224, 255, 255}, + "lightgoldenrodyellow": {250, 250, 210}, + "lightgray": {211, 211, 211}, + "lightgreen": {144, 238, 144}, + "lightgrey": {211, 211, 211}, + "lightpink": {255, 182, 193}, + "lightsalmon": {255, 160, 122}, + "lightseagreen": {32, 178, 170}, + "lightskyblue": {135, 206, 250}, + "lightslategray": {119, 136, 153}, + "lightslategrey": {119, 136, 153}, + "lightsteelblue": {176, 196, 222}, + "lightyellow": {255, 255, 224}, + "lime": {0, 255, 0}, + "limegreen": {50, 205, 50}, + "linen": {250, 240, 230}, + "magenta": {255, 0, 255}, + "maroon": {128, 0, 0}, + "mediumaquamarine": {102, 205, 170}, + "mediumblue": {0, 0, 205}, + "mediumorchid": {186, 85, 211}, + "mediumpurple": {147, 112, 219}, + "mediumseagreen": {60, 179, 113}, + "mediumslateblue": {123, 104, 238}, + "mediumspringgreen": {0, 250, 154}, + "mediumturquoise": {72, 209, 204}, + "mediumvioletred": {199, 21, 133}, + "midnightblue": {25, 25, 112}, + "mintcream": {245, 255, 250}, + "mistyrose": {255, 228, 225}, + "moccasin": {255, 228, 181}, + "navajowhite": {255, 222, 173}, + "navy": {0, 0, 128}, + "oldlace": {253, 245, 230}, + "olive": {128, 128, 0}, + "olivedrab": {107, 142, 35}, + "orange": {255, 165, 0}, + "orangered": {255, 69, 0}, + "orchid": {218, 112, 214}, + "palegoldenrod": {238, 232, 170}, + "palegreen": {152, 251, 152}, + "paleturquoise": {175, 238, 238}, + "palevioletred": {219, 112, 147}, + "papayawhip": {255, 239, 213}, + "peachpuff": {255, 218, 185}, + "peru": {205, 133, 63}, + "pink": {255, 192, 203}, + "plum": {221, 160, 221}, + "powderblue": {176, 224, 230}, + "purple": {128, 0, 128}, + "rebeccapurple": {102, 51, 153}, + "red": {255, 0, 0}, + "rosybrown": {188, 143, 143}, + "royalblue": {65, 105, 225}, + "saddlebrown": {139, 69, 19}, + "salmon": {250, 128, 114}, + "sandybrown": {244, 164, 96}, + "seagreen": {46, 139, 87}, + "seashell": {255, 245, 238}, + "sienna": {160, 82, 45}, + "silver": {192, 192, 192}, + "skyblue": {135, 206, 235}, + "slateblue": {106, 90, 205}, + "slategray": {112, 128, 144}, + "slategrey": {112, 128, 144}, + "snow": {255, 250, 250}, + "springgreen": {0, 255, 127}, + "steelblue": {70, 130, 180}, + "tan": {210, 180, 140}, + "teal": {0, 128, 128}, + "thistle": {216, 191, 216}, + "tomato": {255, 99, 71}, + "turquoise": {64, 224, 208}, + "violet": {238, 130, 238}, + "wheat": {245, 222, 179}, + "white": {255, 255, 255}, + "whitesmoke": {245, 245, 245}, + "yellow": {255, 255, 0}, + "yellowgreen": {154, 205, 50}, +} diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/build/build_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/build/build_command.go index 5db5d1a7..fd172608 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/build/build_command.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/build/build_command.go @@ -2,6 +2,8 @@ package build import ( "fmt" + "os" + "path" "github.com/onsi/ginkgo/v2/ginkgo/command" "github.com/onsi/ginkgo/v2/ginkgo/internal" @@ -53,7 +55,18 @@ func buildSpecs(args []string, cliConfig types.CLIConfig, goFlagsConfig types.Go if suite.State.Is(internal.TestSuiteStateFailedToCompile) { fmt.Println(suite.CompilationError.Error()) } else { - fmt.Printf("Compiled %s.test\n", suite.PackageName) + if len(goFlagsConfig.O) == 0 { + goFlagsConfig.O = path.Join(suite.Path, suite.PackageName+".test") + } else { + stat, err := os.Stat(goFlagsConfig.O) + if err != nil { + panic(err) + } + if stat.IsDir() { + goFlagsConfig.O += "/" + suite.PackageName + ".test" + } + } + fmt.Printf("Compiled %s\n", goFlagsConfig.O) } } diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/bootstrap_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/bootstrap_command.go index 73aff0b7..b2dc59be 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/bootstrap_command.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/bootstrap_command.go @@ -7,7 +7,7 @@ import ( "os" "text/template" - sprig "github.com/go-task/slim-sprig" + sprig "github.com/go-task/slim-sprig/v3" "github.com/onsi/ginkgo/v2/ginkgo/command" "github.com/onsi/ginkgo/v2/ginkgo/internal" "github.com/onsi/ginkgo/v2/types" diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_command.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_command.go index be01dec9..cf3b7cb6 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_command.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/generators/generate_command.go @@ -10,7 +10,7 @@ import ( "strings" "text/template" - sprig "github.com/go-task/slim-sprig" + sprig "github.com/go-task/slim-sprig/v3" "github.com/onsi/ginkgo/v2/ginkgo/command" "github.com/onsi/ginkgo/v2/ginkgo/internal" "github.com/onsi/ginkgo/v2/types" @@ -174,6 +174,7 @@ func moduleName(modRoot string) string { if err != nil { return "" } + defer modFile.Close() mod := make([]byte, 128) _, err = modFile.Read(mod) diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/compile.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/compile.go index 86da7340..48827cc5 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/compile.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/compile.go @@ -25,6 +25,18 @@ func CompileSuite(suite TestSuite, goFlagsConfig types.GoFlagsConfig) TestSuite return suite } + if len(goFlagsConfig.O) > 0 { + userDefinedPath, err := filepath.Abs(goFlagsConfig.O) + if err != nil { + suite.State = TestSuiteStateFailedToCompile + suite.CompilationError = fmt.Errorf("Failed to compute compilation target path %s:\n%s", goFlagsConfig.O, err.Error()) + return suite + } + path = userDefinedPath + } + + goFlagsConfig.O = path + ginkgoInvocationPath, _ := os.Getwd() ginkgoInvocationPath, _ = filepath.Abs(ginkgoInvocationPath) packagePath := suite.AbsPath() @@ -34,7 +46,7 @@ func CompileSuite(suite TestSuite, goFlagsConfig types.GoFlagsConfig) TestSuite suite.CompilationError = fmt.Errorf("Failed to get relative path from package to the current working directory:\n%s", err.Error()) return suite } - args, err := types.GenerateGoTestCompileArgs(goFlagsConfig, path, "./", pathToInvocationPath) + args, err := types.GenerateGoTestCompileArgs(goFlagsConfig, "./", pathToInvocationPath) if err != nil { suite.State = TestSuiteStateFailedToCompile suite.CompilationError = fmt.Errorf("Failed to generate go test compile flags:\n%s", err.Error()) diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/gocovmerge.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/gocovmerge.go new file mode 100644 index 00000000..3c5079ff --- /dev/null +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/gocovmerge.go @@ -0,0 +1,129 @@ +// Copyright (c) 2015, Wade Simmons +// All rights reserved. + +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: + +// 1. Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. + +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Package gocovmerge takes the results from multiple `go test -coverprofile` +// runs and merges them into one profile + +// this file was originally taken from the gocovmerge project +// see also: https://go.shabbyrobe.org/gocovmerge +package internal + +import ( + "fmt" + "io" + "sort" + + "golang.org/x/tools/cover" +) + +func AddCoverProfile(profiles []*cover.Profile, p *cover.Profile) []*cover.Profile { + i := sort.Search(len(profiles), func(i int) bool { return profiles[i].FileName >= p.FileName }) + if i < len(profiles) && profiles[i].FileName == p.FileName { + MergeCoverProfiles(profiles[i], p) + } else { + profiles = append(profiles, nil) + copy(profiles[i+1:], profiles[i:]) + profiles[i] = p + } + return profiles +} + +func DumpCoverProfiles(profiles []*cover.Profile, out io.Writer) error { + if len(profiles) == 0 { + return nil + } + if _, err := fmt.Fprintf(out, "mode: %s\n", profiles[0].Mode); err != nil { + return err + } + for _, p := range profiles { + for _, b := range p.Blocks { + if _, err := fmt.Fprintf(out, "%s:%d.%d,%d.%d %d %d\n", p.FileName, b.StartLine, b.StartCol, b.EndLine, b.EndCol, b.NumStmt, b.Count); err != nil { + return err + } + } + } + return nil +} + +func MergeCoverProfiles(into *cover.Profile, merge *cover.Profile) error { + if into.Mode != merge.Mode { + return fmt.Errorf("cannot merge profiles with different modes") + } + // Since the blocks are sorted, we can keep track of where the last block + // was inserted and only look at the blocks after that as targets for merge + startIndex := 0 + for _, b := range merge.Blocks { + var err error + startIndex, err = mergeProfileBlock(into, b, startIndex) + if err != nil { + return err + } + } + return nil +} + +func mergeProfileBlock(p *cover.Profile, pb cover.ProfileBlock, startIndex int) (int, error) { + sortFunc := func(i int) bool { + pi := p.Blocks[i+startIndex] + return pi.StartLine >= pb.StartLine && (pi.StartLine != pb.StartLine || pi.StartCol >= pb.StartCol) + } + + i := 0 + if sortFunc(i) != true { + i = sort.Search(len(p.Blocks)-startIndex, sortFunc) + } + + i += startIndex + if i < len(p.Blocks) && p.Blocks[i].StartLine == pb.StartLine && p.Blocks[i].StartCol == pb.StartCol { + if p.Blocks[i].EndLine != pb.EndLine || p.Blocks[i].EndCol != pb.EndCol { + return i, fmt.Errorf("gocovmerge: overlapping merge %v %v %v", p.FileName, p.Blocks[i], pb) + } + switch p.Mode { + case "set": + p.Blocks[i].Count |= pb.Count + case "count", "atomic": + p.Blocks[i].Count += pb.Count + default: + return i, fmt.Errorf("gocovmerge: unsupported covermode '%s'", p.Mode) + } + + } else { + if i > 0 { + pa := p.Blocks[i-1] + if pa.EndLine >= pb.EndLine && (pa.EndLine != pb.EndLine || pa.EndCol > pb.EndCol) { + return i, fmt.Errorf("gocovmerge: overlap before %v %v %v", p.FileName, pa, pb) + } + } + if i < len(p.Blocks)-1 { + pa := p.Blocks[i+1] + if pa.StartLine <= pb.StartLine && (pa.StartLine != pb.StartLine || pa.StartCol < pb.StartCol) { + return i, fmt.Errorf("gocovmerge: overlap after %v %v %v", p.FileName, pa, pb) + } + } + p.Blocks = append(p.Blocks, cover.ProfileBlock{}) + copy(p.Blocks[i+1:], p.Blocks[i:]) + p.Blocks[i] = pb + } + + return i + 1, nil +} diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/profiles_and_reports.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/profiles_and_reports.go index bd3c6d02..8e16d2bb 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/profiles_and_reports.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/profiles_and_reports.go @@ -1,7 +1,6 @@ package internal import ( - "bytes" "fmt" "os" "os/exec" @@ -12,6 +11,7 @@ import ( "github.com/google/pprof/profile" "github.com/onsi/ginkgo/v2/reporters" "github.com/onsi/ginkgo/v2/types" + "golang.org/x/tools/cover" ) func AbsPathForGeneratedAsset(assetName string, suite TestSuite, cliConfig types.CLIConfig, process int) string { @@ -144,38 +144,27 @@ func FinalizeProfilesAndReportsForSuites(suites TestSuites, cliConfig types.CLIC return messages, nil } -//loads each profile, combines them, deletes them, stores them in destination +// loads each profile, merges them, deletes them, stores them in destination func MergeAndCleanupCoverProfiles(profiles []string, destination string) error { - combined := &bytes.Buffer{} - modeRegex := regexp.MustCompile(`^mode: .*\n`) - for i, profile := range profiles { - contents, err := os.ReadFile(profile) + var merged []*cover.Profile + for _, file := range profiles { + parsedProfiles, err := cover.ParseProfiles(file) if err != nil { - return fmt.Errorf("Unable to read coverage file %s:\n%s", profile, err.Error()) + return err } - os.Remove(profile) - - // remove the cover mode line from every file - // except the first one - if i > 0 { - contents = modeRegex.ReplaceAll(contents, []byte{}) - } - - _, err = combined.Write(contents) - - // Add a newline to the end of every file if missing. - if err == nil && len(contents) > 0 && contents[len(contents)-1] != '\n' { - _, err = combined.Write([]byte("\n")) - } - - if err != nil { - return fmt.Errorf("Unable to append to coverprofile:\n%s", err.Error()) + os.Remove(file) + for _, p := range parsedProfiles { + merged = AddCoverProfile(merged, p) } } - - err := os.WriteFile(destination, combined.Bytes(), 0666) + dst, err := os.OpenFile(destination, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) + if err != nil { + return err + } + defer dst.Close() + err = DumpCoverProfiles(merged, dst) if err != nil { - return fmt.Errorf("Unable to create combined cover profile:\n%s", err.Error()) + return err } return nil } @@ -184,7 +173,7 @@ func GetCoverageFromCoverProfile(profile string) (float64, error) { cmd := exec.Command("go", "tool", "cover", "-func", profile) output, err := cmd.CombinedOutput() if err != nil { - return 0, fmt.Errorf("Could not process Coverprofile %s: %s", profile, err.Error()) + return 0, fmt.Errorf("Could not process Coverprofile %s: %s - %s", profile, err.Error(), string(output)) } re := regexp.MustCompile(`total:\s*\(statements\)\s*(\d*\.\d*)\%`) matches := re.FindStringSubmatch(string(output)) @@ -208,6 +197,7 @@ func MergeProfiles(profilePaths []string, destination string) error { return fmt.Errorf("Could not open profile: %s\n%s", profilePath, err.Error()) } prof, err := profile.Parse(proFile) + _ = proFile.Close() if err != nil { return fmt.Errorf("Could not parse profile: %s\n%s", profilePath, err.Error()) } diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/test_suite.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/test_suite.go index 64dcb1b7..df99875b 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/test_suite.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/internal/test_suite.go @@ -7,6 +7,7 @@ import ( "path" "path/filepath" "regexp" + "runtime" "strings" "github.com/onsi/ginkgo/v2/types" @@ -192,7 +193,7 @@ func precompiledTestSuite(path string) (TestSuite, error) { return TestSuite{}, errors.New("this is not a .test binary") } - if filepath.Ext(path) == ".test" && info.Mode()&0111 == 0 { + if filepath.Ext(path) == ".test" && runtime.GOOS != "windows" && info.Mode()&0111 == 0 { return TestSuite{}, errors.New("this is not executable") } @@ -225,7 +226,7 @@ func suitesInDir(dir string, recurse bool) TestSuites { files, _ := os.ReadDir(dir) re := regexp.MustCompile(`^[^._].*_test\.go$`) for _, file := range files { - if !file.IsDir() && re.Match([]byte(file.Name())) { + if !file.IsDir() && re.MatchString(file.Name()) { suite := TestSuite{ Path: relPath(dir), PackageName: packageNameForSuite(dir), @@ -240,7 +241,7 @@ func suitesInDir(dir string, recurse bool) TestSuites { if recurse { re = regexp.MustCompile(`^[._]`) for _, file := range files { - if file.IsDir() && !re.Match([]byte(file.Name())) { + if file.IsDir() && !re.MatchString(file.Name()) { suites = append(suites, suitesInDir(dir+"/"+file.Name(), recurse)...) } } @@ -271,7 +272,7 @@ func filesHaveGinkgoSuite(dir string, files []os.DirEntry) bool { reGinkgo := regexp.MustCompile(`package ginkgo|\/ginkgo"|\/ginkgo\/v2"|\/ginkgo\/v2/dsl/`) for _, file := range files { - if !file.IsDir() && reTestFile.Match([]byte(file.Name())) { + if !file.IsDir() && reTestFile.MatchString(file.Name()) { contents, _ := os.ReadFile(dir + "/" + file.Name()) if reGinkgo.Match(contents) { return true diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/ginkgo.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/ginkgo.go index 958daccb..5d8d00bb 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/ginkgo.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/ginkgo.go @@ -1,10 +1,11 @@ package outline import ( - "github.com/onsi/ginkgo/v2/types" "go/ast" "go/token" "strconv" + + "github.com/onsi/ginkgo/v2/types" ) const ( diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/import.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/import.go index 67ec5ab7..f0a6b5d2 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/import.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/outline/import.go @@ -28,14 +28,7 @@ func packageNameForImport(f *ast.File, path string) *string { } name := spec.Name.String() if name == "" { - // If the package name is not explicitly specified, - // make an educated guess. This is not guaranteed to be correct. - lastSlash := strings.LastIndex(path, "/") - if lastSlash == -1 { - name = path - } else { - name = path[lastSlash+1:] - } + name = "ginkgo" } if name == "." { name = "" diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/dependencies.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/dependencies.go index f5ddff30..a34d9435 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/dependencies.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/dependencies.go @@ -78,7 +78,7 @@ func (d Dependencies) resolveAndAdd(deps []string, depth int) { if err != nil { continue } - if !pkg.Goroot && (!ginkgoAndGomegaFilter.Match([]byte(pkg.Dir)) || ginkgoIntegrationTestFilter.Match([]byte(pkg.Dir))) { + if !pkg.Goroot && (!ginkgoAndGomegaFilter.MatchString(pkg.Dir) || ginkgoIntegrationTestFilter.MatchString(pkg.Dir)) { d.addDepIfNotPresent(pkg.Dir, depth) } } diff --git a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hash.go b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hash.go index e9f7ec0c..0e6ae1f2 100644 --- a/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hash.go +++ b/vendor/github.com/onsi/ginkgo/v2/ginkgo/watch/package_hash.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "regexp" + "strings" "time" ) @@ -79,7 +80,11 @@ func (p *PackageHash) computeHashes() (codeHash string, codeModifiedTime time.Ti continue } - if goTestRegExp.Match([]byte(info.Name())) { + if isHiddenFile(info) { + continue + } + + if goTestRegExp.MatchString(info.Name()) { testHash += p.hashForFileInfo(info) if info.ModTime().After(testModifiedTime) { testModifiedTime = info.ModTime() @@ -87,7 +92,7 @@ func (p *PackageHash) computeHashes() (codeHash string, codeModifiedTime time.Ti continue } - if p.watchRegExp.Match([]byte(info.Name())) { + if p.watchRegExp.MatchString(info.Name()) { codeHash += p.hashForFileInfo(info) if info.ModTime().After(codeModifiedTime) { codeModifiedTime = info.ModTime() @@ -103,6 +108,10 @@ func (p *PackageHash) computeHashes() (codeHash string, codeModifiedTime time.Ti return } +func isHiddenFile(info os.FileInfo) bool { + return strings.HasPrefix(info.Name(), ".") || strings.HasPrefix(info.Name(), "_") +} + func (p *PackageHash) hashForFileInfo(info os.FileInfo) string { return fmt.Sprintf("%s_%d_%d", info.Name(), info.Size(), info.ModTime().UnixNano()) } diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/default_reporter.go b/vendor/github.com/onsi/ginkgo/v2/reporters/default_reporter.go index 56b7be75..48073048 100644 --- a/vendor/github.com/onsi/ginkgo/v2/reporters/default_reporter.go +++ b/vendor/github.com/onsi/ginkgo/v2/reporters/default_reporter.go @@ -182,10 +182,31 @@ func (r *DefaultReporter) WillRun(report types.SpecReport) { r.emitBlock(r.f(r.codeLocationBlock(report, "{{/}}", v.Is(types.VerbosityLevelVeryVerbose), false))) } +func (r *DefaultReporter) wrapTextBlock(sectionName string, fn func()) { + r.emitBlock("\n") + if r.conf.GithubOutput { + r.emitBlock(r.fi(1, "::group::%s", sectionName)) + } else { + r.emitBlock(r.fi(1, "{{gray}}%s >>{{/}}", sectionName)) + } + fn() + if r.conf.GithubOutput { + r.emitBlock(r.fi(1, "::endgroup::")) + } else { + r.emitBlock(r.fi(1, "{{gray}}<< %s{{/}}", sectionName)) + } + +} + func (r *DefaultReporter) DidRun(report types.SpecReport) { v := r.conf.Verbosity() inParallel := report.RunningInParallel + //should we completely omit this spec? + if report.State.Is(types.SpecStateSkipped) && r.conf.SilenceSkips { + return + } + header := r.specDenoter if report.LeafNodeType.Is(types.NodeTypesForSuiteLevelNodes) { header = fmt.Sprintf("[%s]", report.LeafNodeType) @@ -262,9 +283,12 @@ func (r *DefaultReporter) DidRun(report types.SpecReport) { } } - // If we have no content to show, jsut emit the header and return + // If we have no content to show, just emit the header and return if !reportHasContent { r.emit(r.f(highlightColor + header + "{{/}}")) + if r.conf.ForceNewlines { + r.emit("\n") + } return } @@ -283,26 +307,23 @@ func (r *DefaultReporter) DidRun(report types.SpecReport) { //Emit Stdout/Stderr Output if showSeparateStdSection { - r.emitBlock("\n") - r.emitBlock(r.fi(1, "{{gray}}Captured StdOut/StdErr Output >>{{/}}")) - r.emitBlock(r.fi(1, "%s", report.CapturedStdOutErr)) - r.emitBlock(r.fi(1, "{{gray}}<< Captured StdOut/StdErr Output{{/}}")) + r.wrapTextBlock("Captured StdOut/StdErr Output", func() { + r.emitBlock(r.fi(1, "%s", report.CapturedStdOutErr)) + }) } if showSeparateVisibilityAlwaysReportsSection { - r.emitBlock("\n") - r.emitBlock(r.fi(1, "{{gray}}Report Entries >>{{/}}")) - for _, entry := range report.ReportEntries.WithVisibility(types.ReportEntryVisibilityAlways) { - r.emitReportEntry(1, entry) - } - r.emitBlock(r.fi(1, "{{gray}}<< Report Entries{{/}}")) + r.wrapTextBlock("Report Entries", func() { + for _, entry := range report.ReportEntries.WithVisibility(types.ReportEntryVisibilityAlways) { + r.emitReportEntry(1, entry) + } + }) } if showTimeline { - r.emitBlock("\n") - r.emitBlock(r.fi(1, "{{gray}}Timeline >>{{/}}")) - r.emitTimeline(1, report, timeline) - r.emitBlock(r.fi(1, "{{gray}}<< Timeline{{/}}")) + r.wrapTextBlock("Timeline", func() { + r.emitTimeline(1, report, timeline) + }) } // Emit Failure Message @@ -405,7 +426,15 @@ func (r *DefaultReporter) emitShortFailure(indent uint, state types.SpecState, f func (r *DefaultReporter) emitFailure(indent uint, state types.SpecState, failure types.Failure, includeAdditionalFailure bool) { highlightColor := r.highlightColorForState(state) r.emitBlock(r.fi(indent, highlightColor+"[%s] %s{{/}}", r.humanReadableState(state), failure.Message)) - r.emitBlock(r.fi(indent, highlightColor+"In {{bold}}[%s]{{/}}"+highlightColor+" at: {{bold}}%s{{/}} {{gray}}@ %s{{/}}\n", failure.FailureNodeType, failure.Location, failure.TimelineLocation.Time.Format(types.GINKGO_TIME_FORMAT))) + if r.conf.GithubOutput { + level := "error" + if state.Is(types.SpecStateSkipped) { + level = "notice" + } + r.emitBlock(r.fi(indent, "::%s file=%s,line=%d::%s %s", level, failure.Location.FileName, failure.Location.LineNumber, failure.FailureNodeType, failure.TimelineLocation.Time.Format(types.GINKGO_TIME_FORMAT))) + } else { + r.emitBlock(r.fi(indent, highlightColor+"In {{bold}}[%s]{{/}}"+highlightColor+" at: {{bold}}%s{{/}} {{gray}}@ %s{{/}}\n", failure.FailureNodeType, failure.Location, failure.TimelineLocation.Time.Format(types.GINKGO_TIME_FORMAT))) + } if failure.ForwardedPanic != "" { r.emitBlock("\n") r.emitBlock(r.fi(indent, highlightColor+"%s{{/}}", failure.ForwardedPanic)) diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/json_report.go b/vendor/github.com/onsi/ginkgo/v2/reporters/json_report.go index be506f9b..5d3e8db9 100644 --- a/vendor/github.com/onsi/ginkgo/v2/reporters/json_report.go +++ b/vendor/github.com/onsi/ginkgo/v2/reporters/json_report.go @@ -18,6 +18,7 @@ func GenerateJSONReport(report types.Report, destination string) error { if err != nil { return err } + defer f.Close() enc := json.NewEncoder(f) enc.SetIndent("", " ") err = enc.Encode([]types.Report{ @@ -26,7 +27,7 @@ func GenerateJSONReport(report types.Report, destination string) error { if err != nil { return err } - return f.Close() + return nil } // MergeJSONReports produces a single JSON-formatted report at the passed in destination by merging the JSON-formatted reports provided in sources @@ -57,11 +58,12 @@ func MergeAndCleanupJSONReports(sources []string, destination string) ([]string, if err != nil { return messages, err } + defer f.Close() enc := json.NewEncoder(f) enc.SetIndent("", " ") err = enc.Encode(allReports) if err != nil { return messages, err } - return messages, f.Close() + return messages, nil } diff --git a/vendor/github.com/onsi/ginkgo/v2/reporters/junit_report.go b/vendor/github.com/onsi/ginkgo/v2/reporters/junit_report.go index 81604220..562e0f62 100644 --- a/vendor/github.com/onsi/ginkgo/v2/reporters/junit_report.go +++ b/vendor/github.com/onsi/ginkgo/v2/reporters/junit_report.go @@ -15,6 +15,7 @@ import ( "fmt" "os" "path" + "regexp" "strings" "github.com/onsi/ginkgo/v2/config" @@ -104,6 +105,8 @@ type JUnitProperty struct { Value string `xml:"value,attr"` } +var ownerRE = regexp.MustCompile(`(?i)^owner:(.*)$`) + type JUnitTestCase struct { // Name maps onto the full text of the spec - equivalent to "[SpecReport.LeafNodeType] SpecReport.FullText()" Name string `xml:"name,attr"` @@ -113,6 +116,8 @@ type JUnitTestCase struct { Status string `xml:"status,attr"` // Time is the time in seconds to execute the spec - maps onto SpecReport.RunTime Time float64 `xml:"time,attr"` + // Owner is the owner the spec - is set if a label matching Label("owner:X") is provided. The last matching label is used as the owner, thereby allowing specs to override owners specified in container nodes. + Owner string `xml:"owner,attr,omitempty"` //Skipped is populated with a message if the test was skipped or pending Skipped *JUnitSkipped `xml:"skipped,omitempty"` //Error is populated if the test panicked or was interrupted @@ -172,6 +177,7 @@ func GenerateJUnitReportWithConfig(report types.Report, dst string, config Junit {"FocusFiles", strings.Join(report.SuiteConfig.FocusFiles, ";")}, {"SkipFiles", strings.Join(report.SuiteConfig.SkipFiles, ";")}, {"FailOnPending", fmt.Sprintf("%t", report.SuiteConfig.FailOnPending)}, + {"FailOnEmpty", fmt.Sprintf("%t", report.SuiteConfig.FailOnEmpty)}, {"FailFast", fmt.Sprintf("%t", report.SuiteConfig.FailFast)}, {"FlakeAttempts", fmt.Sprintf("%d", report.SuiteConfig.FlakeAttempts)}, {"DryRun", fmt.Sprintf("%t", report.SuiteConfig.DryRun)}, @@ -195,6 +201,12 @@ func GenerateJUnitReportWithConfig(report types.Report, dst string, config Junit if len(labels) > 0 && !config.OmitSpecLabels { name = name + " [" + strings.Join(labels, ", ") + "]" } + owner := "" + for _, label := range labels { + if matches := ownerRE.FindStringSubmatch(label); len(matches) == 2 { + owner = matches[1] + } + } name = strings.TrimSpace(name) test := JUnitTestCase{ @@ -202,6 +214,7 @@ func GenerateJUnitReportWithConfig(report types.Report, dst string, config Junit Classname: report.SuiteDescription, Status: spec.State.String(), Time: spec.RunTime.Seconds(), + Owner: owner, } if !spec.State.Is(config.OmitTimelinesForSpecState) { test.SystemErr = systemErrForUnstructuredReporters(spec) @@ -312,6 +325,7 @@ func MergeAndCleanupJUnitReports(sources []string, dst string) ([]string, error) continue } err = xml.NewDecoder(f).Decode(&report) + _ = f.Close() if err != nil { messages = append(messages, fmt.Sprintf("Could not decode %s:\n%s", source, err.Error())) continue diff --git a/vendor/github.com/onsi/ginkgo/v2/types/code_location.go b/vendor/github.com/onsi/ginkgo/v2/types/code_location.go index 9cd57681..57e87517 100644 --- a/vendor/github.com/onsi/ginkgo/v2/types/code_location.go +++ b/vendor/github.com/onsi/ginkgo/v2/types/code_location.go @@ -149,7 +149,7 @@ func PruneStack(fullStackTrace string, skip int) string { re := regexp.MustCompile(`\/ginkgo\/|\/pkg\/testing\/|\/pkg\/runtime\/`) for i := 0; i < len(stack)/2; i++ { // We filter out based on the source code file name. - if !re.Match([]byte(stack[i*2+1])) { + if !re.MatchString(stack[i*2+1]) { prunedStack = append(prunedStack, stack[i*2]) prunedStack = append(prunedStack, stack[i*2+1]) } diff --git a/vendor/github.com/onsi/ginkgo/v2/types/config.go b/vendor/github.com/onsi/ginkgo/v2/types/config.go index c88fc85a..97a049e0 100644 --- a/vendor/github.com/onsi/ginkgo/v2/types/config.go +++ b/vendor/github.com/onsi/ginkgo/v2/types/config.go @@ -25,6 +25,7 @@ type SuiteConfig struct { SkipFiles []string LabelFilter string FailOnPending bool + FailOnEmpty bool FailFast bool FlakeAttempts int MustPassRepeatedly int @@ -89,6 +90,9 @@ type ReporterConfig struct { VeryVerbose bool FullTrace bool ShowNodeEvents bool + GithubOutput bool + SilenceSkips bool + ForceNewlines bool JSONReport string JUnitReport string @@ -198,6 +202,7 @@ type GoFlagsConfig struct { A bool ASMFlags string BuildMode string + BuildVCS bool Compiler string GCCGoFlags string GCFlags string @@ -215,6 +220,7 @@ type GoFlagsConfig struct { ToolExec string Work bool X bool + O string } func NewDefaultGoFlagsConfig() GoFlagsConfig { @@ -264,7 +270,7 @@ var FlagSections = GinkgoFlagSections{ // SuiteConfigFlags provides flags for the Ginkgo test process, and CLI var SuiteConfigFlags = GinkgoFlags{ {KeyPath: "S.RandomSeed", Name: "seed", SectionKey: "order", UsageDefaultValue: "randomly generated by Ginkgo", - Usage: "The seed used to randomize the spec suite."}, + Usage: "The seed used to randomize the spec suite.", AlwaysExport: true}, {KeyPath: "S.RandomizeAllSpecs", Name: "randomize-all", SectionKey: "order", DeprecatedName: "randomizeAllSpecs", DeprecatedDocLink: "changed-command-line-flags", Usage: "If set, ginkgo will randomize all specs together. By default, ginkgo only randomizes the top level Describe, Context and When containers."}, @@ -274,6 +280,8 @@ var SuiteConfigFlags = GinkgoFlags{ Usage: "If set, ginkgo will stop running a test suite after a failure occurs."}, {KeyPath: "S.FlakeAttempts", Name: "flake-attempts", SectionKey: "failure", UsageDefaultValue: "0 - failed tests are not retried", DeprecatedName: "flakeAttempts", DeprecatedDocLink: "changed-command-line-flags", Usage: "Make up to this many attempts to run each spec. If any of the attempts succeed, the suite will not be failed."}, + {KeyPath: "S.FailOnEmpty", Name: "fail-on-empty", SectionKey: "failure", + Usage: "If set, ginkgo will mark the test suite as failed if no specs are run."}, {KeyPath: "S.DryRun", Name: "dry-run", SectionKey: "debug", DeprecatedName: "dryRun", DeprecatedDocLink: "changed-command-line-flags", Usage: "If set, ginkgo will walk the test hierarchy without actually running anything. Best paired with -v."}, @@ -331,6 +339,12 @@ var ReporterConfigFlags = GinkgoFlags{ Usage: "If set, default reporter prints out the full stack trace when a failure occurs"}, {KeyPath: "R.ShowNodeEvents", Name: "show-node-events", SectionKey: "output", Usage: "If set, default reporter prints node > Enter and < Exit events when specs fail"}, + {KeyPath: "R.GithubOutput", Name: "github-output", SectionKey: "output", + Usage: "If set, default reporter prints easier to manage output in Github Actions."}, + {KeyPath: "R.SilenceSkips", Name: "silence-skips", SectionKey: "output", + Usage: "If set, default reporter will not print out skipped tests."}, + {KeyPath: "R.ForceNewlines", Name: "force-newlines", SectionKey: "output", + Usage: "If set, default reporter will ensure a newline appears after each test."}, {KeyPath: "R.JSONReport", Name: "json-report", UsageArgument: "filename.json", SectionKey: "output", Usage: "If set, Ginkgo will generate a JSON-formatted test report at the specified location."}, @@ -499,7 +513,7 @@ var GinkgoCLIWatchFlags = GinkgoFlags{ // GoBuildFlags provides flags for the Ginkgo CLI build, run, and watch commands that capture go's build-time flags. These are passed to go test -c by the ginkgo CLI var GoBuildFlags = GinkgoFlags{ {KeyPath: "Go.Race", Name: "race", SectionKey: "code-and-coverage-analysis", - Usage: "enable data race detection. Supported only on linux/amd64, freebsd/amd64, darwin/amd64, windows/amd64, linux/ppc64le and linux/arm64 (only for 48-bit VMA)."}, + Usage: "enable data race detection. Supported on linux/amd64, linux/ppc64le, linux/arm64, linux/s390x, freebsd/amd64, netbsd/amd64, darwin/amd64, darwin/arm64, and windows/amd64."}, {KeyPath: "Go.Vet", Name: "vet", UsageArgument: "list", SectionKey: "code-and-coverage-analysis", Usage: `Configure the invocation of "go vet" during "go test" to use the comma-separated list of vet checks. If list is empty, "go test" runs "go vet" with a curated list of checks believed to be always worth addressing. If list is "off", "go test" does not run "go vet" at all. Available checks can be found by running 'go doc cmd/vet'`}, {KeyPath: "Go.Cover", Name: "cover", SectionKey: "code-and-coverage-analysis", @@ -515,6 +529,8 @@ var GoBuildFlags = GinkgoFlags{ Usage: "arguments to pass on each go tool asm invocation."}, {KeyPath: "Go.BuildMode", Name: "buildmode", UsageArgument: "mode", SectionKey: "go-build", Usage: "build mode to use. See 'go help buildmode' for more."}, + {KeyPath: "Go.BuildVCS", Name: "buildvcs", SectionKey: "go-build", + Usage: "adds version control information."}, {KeyPath: "Go.Compiler", Name: "compiler", UsageArgument: "name", SectionKey: "go-build", Usage: "name of compiler to use, as in runtime.Compiler (gccgo or gc)."}, {KeyPath: "Go.GCCGoFlags", Name: "gccgoflags", UsageArgument: "'[pattern=]arg list'", SectionKey: "go-build", @@ -549,6 +565,8 @@ var GoBuildFlags = GinkgoFlags{ Usage: "print the name of the temporary work directory and do not delete it when exiting."}, {KeyPath: "Go.X", Name: "x", SectionKey: "go-build", Usage: "print the commands."}, + {KeyPath: "Go.O", Name: "o", SectionKey: "go-build", + Usage: "output binary path (including name)."}, } // GoRunFlags provides flags for the Ginkgo CLI run, and watch commands that capture go's run-time flags. These are passed to the compiled test binary by the ginkgo CLI @@ -602,7 +620,7 @@ func VetAndInitializeCLIAndGoConfig(cliConfig CLIConfig, goFlagsConfig GoFlagsCo } // GenerateGoTestCompileArgs is used by the Ginkgo CLI to generate command line arguments to pass to the go test -c command when compiling the test -func GenerateGoTestCompileArgs(goFlagsConfig GoFlagsConfig, destination string, packageToBuild string, pathToInvocationPath string) ([]string, error) { +func GenerateGoTestCompileArgs(goFlagsConfig GoFlagsConfig, packageToBuild string, pathToInvocationPath string) ([]string, error) { // if the user has set the CoverProfile run-time flag make sure to set the build-time cover flag to make sure // the built test binary can generate a coverprofile if goFlagsConfig.CoverProfile != "" { @@ -625,7 +643,7 @@ func GenerateGoTestCompileArgs(goFlagsConfig GoFlagsConfig, destination string, goFlagsConfig.CoverPkg = strings.Join(adjustedCoverPkgs, ",") } - args := []string{"test", "-c", "-o", destination, packageToBuild} + args := []string{"test", "-c", packageToBuild} goArgs, err := GenerateFlagArgs( GoBuildFlags, map[string]interface{}{ diff --git a/vendor/github.com/onsi/ginkgo/v2/types/errors.go b/vendor/github.com/onsi/ginkgo/v2/types/errors.go index 4fbdc3e9..6bb72d00 100644 --- a/vendor/github.com/onsi/ginkgo/v2/types/errors.go +++ b/vendor/github.com/onsi/ginkgo/v2/types/errors.go @@ -505,6 +505,15 @@ func (g ginkgoErrors) IncorrectVariadicParameterTypeToTableFunction(expected, ac } } +func (g ginkgoErrors) ContextsCannotBeUsedInSubtreeTables(cl CodeLocation) error { + return GinkgoError{ + Heading: "Contexts cannot be used in subtree tables", + Message: "You''ve defined a subtree body function that accepts a context but did not provide one in the table entry. Ginkgo SpecContexts can only be passed in to subject and setup nodes - so if you are trying to implement a spec timeout you should request a context in the It function within your subtree body function, not in the subtree body function itself.", + CodeLocation: cl, + DocLink: "table-specs", + } +} + /* Parallel Synchronization errors */ func (g ginkgoErrors) AggregatedReportUnavailableDueToNodeDisappearing() error { diff --git a/vendor/github.com/onsi/ginkgo/v2/types/flags.go b/vendor/github.com/onsi/ginkgo/v2/types/flags.go index 9186ae87..de69f302 100644 --- a/vendor/github.com/onsi/ginkgo/v2/types/flags.go +++ b/vendor/github.com/onsi/ginkgo/v2/types/flags.go @@ -24,7 +24,8 @@ type GinkgoFlag struct { DeprecatedDocLink string DeprecatedVersion string - ExportAs string + ExportAs string + AlwaysExport bool } type GinkgoFlags []GinkgoFlag @@ -431,7 +432,7 @@ func (ssv stringSliceVar) Set(s string) error { return nil } -//given a set of GinkgoFlags and bindings, generate flag arguments suitable to be passed to an application with that set of flags configured. +// given a set of GinkgoFlags and bindings, generate flag arguments suitable to be passed to an application with that set of flags configured. func GenerateFlagArgs(flags GinkgoFlags, bindings interface{}) ([]string, error) { result := []string{} for _, flag := range flags { @@ -451,19 +452,19 @@ func GenerateFlagArgs(flags GinkgoFlags, bindings interface{}) ([]string, error) iface := value.Interface() switch value.Type() { case reflect.TypeOf(string("")): - if iface.(string) != "" { + if iface.(string) != "" || flag.AlwaysExport { result = append(result, fmt.Sprintf("--%s=%s", name, iface)) } case reflect.TypeOf(int64(0)): - if iface.(int64) != 0 { + if iface.(int64) != 0 || flag.AlwaysExport { result = append(result, fmt.Sprintf("--%s=%d", name, iface)) } case reflect.TypeOf(float64(0)): - if iface.(float64) != 0 { + if iface.(float64) != 0 || flag.AlwaysExport { result = append(result, fmt.Sprintf("--%s=%f", name, iface)) } case reflect.TypeOf(int(0)): - if iface.(int) != 0 { + if iface.(int) != 0 || flag.AlwaysExport { result = append(result, fmt.Sprintf("--%s=%d", name, iface)) } case reflect.TypeOf(bool(true)): @@ -471,7 +472,7 @@ func GenerateFlagArgs(flags GinkgoFlags, bindings interface{}) ([]string, error) result = append(result, fmt.Sprintf("--%s", name)) } case reflect.TypeOf(time.Duration(0)): - if iface.(time.Duration) != time.Duration(0) { + if iface.(time.Duration) != time.Duration(0) || flag.AlwaysExport { result = append(result, fmt.Sprintf("--%s=%s", name, iface)) } diff --git a/vendor/github.com/onsi/ginkgo/v2/types/label_filter.go b/vendor/github.com/onsi/ginkgo/v2/types/label_filter.go index b0d3b651..7fdc8aa2 100644 --- a/vendor/github.com/onsi/ginkgo/v2/types/label_filter.go +++ b/vendor/github.com/onsi/ginkgo/v2/types/label_filter.go @@ -45,6 +45,83 @@ func orAction(a, b LabelFilter) LabelFilter { return func(labels []string) bool { return a(labels) || b(labels) } } +func labelSetFor(key string, labels []string) map[string]bool { + key = strings.ToLower(strings.TrimSpace(key)) + out := map[string]bool{} + for _, label := range labels { + components := strings.SplitN(label, ":", 2) + if len(components) < 2 { + continue + } + if key == strings.ToLower(strings.TrimSpace(components[0])) { + out[strings.ToLower(strings.TrimSpace(components[1]))] = true + } + } + + return out +} + +func isEmptyLabelSetAction(key string) LabelFilter { + return func(labels []string) bool { + return len(labelSetFor(key, labels)) == 0 + } +} + +func containsAnyLabelSetAction(key string, expectedValues []string) LabelFilter { + return func(labels []string) bool { + set := labelSetFor(key, labels) + for _, value := range expectedValues { + if set[value] { + return true + } + } + return false + } +} + +func containsAllLabelSetAction(key string, expectedValues []string) LabelFilter { + return func(labels []string) bool { + set := labelSetFor(key, labels) + for _, value := range expectedValues { + if !set[value] { + return false + } + } + return true + } +} + +func consistsOfLabelSetAction(key string, expectedValues []string) LabelFilter { + return func(labels []string) bool { + set := labelSetFor(key, labels) + if len(set) != len(expectedValues) { + return false + } + for _, value := range expectedValues { + if !set[value] { + return false + } + } + return true + } +} + +func isSubsetOfLabelSetAction(key string, expectedValues []string) LabelFilter { + expectedSet := map[string]bool{} + for _, value := range expectedValues { + expectedSet[value] = true + } + return func(labels []string) bool { + set := labelSetFor(key, labels) + for value := range set { + if !expectedSet[value] { + return false + } + } + return true + } +} + type lfToken uint const ( @@ -58,6 +135,9 @@ const ( lfTokenOr lfTokenRegexp lfTokenLabel + lfTokenSetKey + lfTokenSetOperation + lfTokenSetArgument lfTokenEOF ) @@ -71,6 +151,8 @@ func (l lfToken) Precedence() int { return 2 case lfTokenNot: return 3 + case lfTokenSetOperation: + return 4 } return -1 } @@ -93,6 +175,12 @@ func (l lfToken) String() string { return "/regexp/" case lfTokenLabel: return "label" + case lfTokenSetKey: + return "set_key" + case lfTokenSetOperation: + return "set_operation" + case lfTokenSetArgument: + return "set_argument" case lfTokenEOF: return "EOF" } @@ -148,6 +236,35 @@ func (tn *treeNode) constructLabelFilter(input string) (LabelFilter, error) { return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, tn.location, fmt.Sprintf("RegExp compilation error: %s", err)) } return matchLabelRegexAction(re), nil + case lfTokenSetOperation: + tokenSetOperation := strings.ToLower(tn.value) + if tokenSetOperation == "isempty" { + return isEmptyLabelSetAction(tn.leftNode.value), nil + } + if tn.rightNode == nil { + return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, tn.location, fmt.Sprintf("Set operation '%s' is missing an argument.", tn.value)) + } + + rawValues := strings.Split(tn.rightNode.value, ",") + values := make([]string, len(rawValues)) + for i := range rawValues { + values[i] = strings.ToLower(strings.TrimSpace(rawValues[i])) + if strings.ContainsAny(values[i], "&|!,()/") { + return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, tn.rightNode.location, fmt.Sprintf("Invalid label value '%s' in set operation argument.", values[i])) + } else if values[i] == "" { + return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, tn.rightNode.location, "Empty label value in set operation argument.") + } + } + switch tokenSetOperation { + case "containsany": + return containsAnyLabelSetAction(tn.leftNode.value, values), nil + case "containsall": + return containsAllLabelSetAction(tn.leftNode.value, values), nil + case "consistsof": + return consistsOfLabelSetAction(tn.leftNode.value, values), nil + case "issubsetof": + return isSubsetOfLabelSetAction(tn.leftNode.value, values), nil + } } if tn.rightNode == nil { @@ -203,7 +320,17 @@ func (tn *treeNode) toString(indent int) string { return out } +var validSetOperations = map[string]string{ + "containsany": "containsAny", + "containsall": "containsAll", + "consistsof": "consistsOf", + "issubsetof": "isSubsetOf", + "isempty": "isEmpty", +} + func tokenize(input string) func() (*treeNode, error) { + lastToken := lfTokenInvalid + lastValue := "" runes, i := []rune(input), 0 peekIs := func(r rune) bool { @@ -233,6 +360,53 @@ func tokenize(input string) func() (*treeNode, error) { } node := &treeNode{location: i} + defer func() { + lastToken = node.token + lastValue = node.value + }() + + if lastToken == lfTokenSetKey { + //we should get a valid set operation next + value, n := consumeUntil(" )") + if validSetOperations[strings.ToLower(value)] == "" { + return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, i, fmt.Sprintf("Invalid set operation '%s'.", value)) + } + i += n + node.token, node.value = lfTokenSetOperation, value + return node, nil + } + if lastToken == lfTokenSetOperation { + //we should get an argument next, if we aren't isempty + var arg = "" + origI := i + if runes[i] == '{' { + i += 1 + value, n := consumeUntil("}") + if i+n >= len(runes) { + return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, i-1, "Missing closing '}' in set operation argument?") + } + i += n + 1 + arg = value + } else { + value, n := consumeUntil("&|!,()/") + i += n + arg = strings.TrimSpace(value) + } + if strings.ToLower(lastValue) == "isempty" && arg != "" { + return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, origI, fmt.Sprintf("isEmpty does not take arguments, was passed '%s'.", arg)) + } + if arg == "" && strings.ToLower(lastValue) != "isempty" { + if i < len(runes) && runes[i] == '/' { + return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, origI, "Set operations do not support regular expressions.") + } else { + return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, origI, fmt.Sprintf("Set operation '%s' requires an argument.", lastValue)) + } + } + // note that we sent an empty SetArgument token if we are isempty + node.token, node.value = lfTokenSetArgument, arg + return node, nil + } + switch runes[i] { case '&': if !peekIs('&') { @@ -264,8 +438,38 @@ func tokenize(input string) func() (*treeNode, error) { i += n + 1 node.token, node.value = lfTokenRegexp, value default: - value, n := consumeUntil("&|!,()/") + value, n := consumeUntil("&|!,()/:") i += n + value = strings.TrimSpace(value) + + //are we the beginning of a set operation? + if i < len(runes) && runes[i] == ':' { + if peekIs(' ') { + if value == "" { + return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, i, "Missing set key.") + } + i += 1 + //we are the beginning of a set operation + node.token, node.value = lfTokenSetKey, value + return node, nil + } + additionalValue, n := consumeUntil("&|!,()/") + additionalValue = strings.TrimSpace(additionalValue) + if additionalValue == ":" { + return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, i, "Missing set operation.") + } + i += n + value += additionalValue + } + + valueToCheckForSetOperation := strings.ToLower(value) + for setOperation := range validSetOperations { + idx := strings.Index(valueToCheckForSetOperation, " "+setOperation) + if idx > 0 { + return &treeNode{}, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, i-n+idx+1, fmt.Sprintf("Looks like you are using the set operator '%s' but did not provide a set key. Did you forget the ':'?", validSetOperations[setOperation])) + } + } + node.token, node.value = lfTokenLabel, strings.TrimSpace(value) } return node, nil @@ -307,7 +511,7 @@ LOOP: switch node.token { case lfTokenEOF: break LOOP - case lfTokenLabel, lfTokenRegexp: + case lfTokenLabel, lfTokenRegexp, lfTokenSetKey: if current.rightNode != nil { return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, "Found two adjacent labels. You need an operator between them.") } @@ -326,6 +530,18 @@ LOOP: node.setLeftNode(nodeToStealFrom.rightNode) nodeToStealFrom.setRightNode(node) current = node + case lfTokenSetOperation: + if current.rightNode == nil { + return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, fmt.Sprintf("Set operation '%s' missing left hand operand.", node.value)) + } + node.setLeftNode(current.rightNode) + current.setRightNode(node) + current = node + case lfTokenSetArgument: + if current.rightNode != nil { + return nil, GinkgoErrors.SyntaxErrorParsingLabelFilter(input, node.location, fmt.Sprintf("Unexpected set argument '%s'.", node.token)) + } + current.setRightNode(node) case lfTokenCloseGroup: firstUnmatchedOpenNode := current.firstUnmatchedOpenNode() if firstUnmatchedOpenNode == nil { @@ -354,5 +570,14 @@ func ValidateAndCleanupLabel(label string, cl CodeLocation) (string, error) { if strings.ContainsAny(out, "&|!,()/") { return "", GinkgoErrors.InvalidLabel(label, cl) } + if out[0] == ':' { + return "", GinkgoErrors.InvalidLabel(label, cl) + } + if strings.Contains(out, ":") { + components := strings.SplitN(out, ":", 2) + if len(components) < 2 || components[1] == "" { + return "", GinkgoErrors.InvalidLabel(label, cl) + } + } return out, nil } diff --git a/vendor/github.com/onsi/ginkgo/v2/types/version.go b/vendor/github.com/onsi/ginkgo/v2/types/version.go index 21fb22b6..6dfb25f2 100644 --- a/vendor/github.com/onsi/ginkgo/v2/types/version.go +++ b/vendor/github.com/onsi/ginkgo/v2/types/version.go @@ -1,3 +1,3 @@ package types -const VERSION = "2.12.0" +const VERSION = "2.20.2" diff --git a/vendor/github.com/pelletier/go-toml/v2/.gitignore b/vendor/github.com/pelletier/go-toml/v2/.gitignore index a69e2b0e..4b7c4eda 100644 --- a/vendor/github.com/pelletier/go-toml/v2/.gitignore +++ b/vendor/github.com/pelletier/go-toml/v2/.gitignore @@ -3,4 +3,5 @@ fuzz/ cmd/tomll/tomll cmd/tomljson/tomljson cmd/tomltestgen/tomltestgen -dist \ No newline at end of file +dist +tests/ diff --git a/vendor/github.com/pelletier/go-toml/v2/.goreleaser.yaml b/vendor/github.com/pelletier/go-toml/v2/.goreleaser.yaml index 3aa1840e..ec52857a 100644 --- a/vendor/github.com/pelletier/go-toml/v2/.goreleaser.yaml +++ b/vendor/github.com/pelletier/go-toml/v2/.goreleaser.yaml @@ -1,3 +1,4 @@ +version: 2 before: hooks: - go mod tidy @@ -18,6 +19,7 @@ builds: - linux_amd64 - linux_arm64 - linux_arm + - linux_riscv64 - windows_amd64 - windows_arm64 - windows_arm @@ -37,6 +39,7 @@ builds: - linux_amd64 - linux_arm64 - linux_arm + - linux_riscv64 - windows_amd64 - windows_arm64 - windows_arm @@ -55,6 +58,7 @@ builds: targets: - linux_amd64 - linux_arm64 + - linux_riscv64 - linux_arm - windows_amd64 - windows_arm64 diff --git a/vendor/github.com/pelletier/go-toml/v2/CONTRIBUTING.md b/vendor/github.com/pelletier/go-toml/v2/CONTRIBUTING.md index 04dd12bc..96ecf9e2 100644 --- a/vendor/github.com/pelletier/go-toml/v2/CONTRIBUTING.md +++ b/vendor/github.com/pelletier/go-toml/v2/CONTRIBUTING.md @@ -165,25 +165,22 @@ Checklist: ### New release -1. Decide on the next version number. Use semver. -2. Generate release notes using [`gh`][gh]. Example: +1. Decide on the next version number. Use semver. Review commits since last + version to assess. +2. Tag release. For example: ``` -$ gh api -X POST \ - -F tag_name='v2.0.0-beta.5' \ - -F target_commitish='v2' \ - -F previous_tag_name='v2.0.0-beta.4' \ - --jq '.body' \ - repos/pelletier/go-toml/releases/generate-notes +git checkout v2 +git pull +git tag v2.2.0 +git push --tags ``` -3. Look for "Other changes". That would indicate a pull request not labeled - properly. Tweak labels and pull request titles until changelog looks good for - users. -4. [Draft new release][new-release]. -5. Fill tag and target with the same value used to generate the changelog. -6. Set title to the new tag value. -7. Paste the generated changelog. -8. Check "create discussion", in the "Releases" category. -9. Check pre-release if new version is an alpha or beta. +3. CI automatically builds a draft Github release. Review it and edit as + necessary. Look for "Other changes". That would indicate a pull request not + labeled properly. Tweak labels and pull request titles until changelog looks + good for users. +4. Check "create discussion" box, in the "Releases" category. +5. If new version is an alpha or beta only, check pre-release box. + [issues-tracker]: https://github.com/pelletier/go-toml/issues [bug-report]: https://github.com/pelletier/go-toml/issues/new?template=bug_report.md diff --git a/vendor/github.com/pelletier/go-toml/v2/LICENSE b/vendor/github.com/pelletier/go-toml/v2/LICENSE index 6839d51c..991e2ae9 100644 --- a/vendor/github.com/pelletier/go-toml/v2/LICENSE +++ b/vendor/github.com/pelletier/go-toml/v2/LICENSE @@ -1,6 +1,7 @@ The MIT License (MIT) -Copyright (c) 2013 - 2022 Thomas Pelletier, Eric Anderton +go-toml v2 +Copyright (c) 2021 - 2023 Thomas Pelletier Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/vendor/github.com/pelletier/go-toml/v2/README.md b/vendor/github.com/pelletier/go-toml/v2/README.md index d53f4397..0755e556 100644 --- a/vendor/github.com/pelletier/go-toml/v2/README.md +++ b/vendor/github.com/pelletier/go-toml/v2/README.md @@ -45,16 +45,15 @@ to check for typos. [See example in the documentation][strict]. ### Contextualized errors -When most decoding errors occur, go-toml returns [`DecodeError`][decode-err]), +When most decoding errors occur, go-toml returns [`DecodeError`][decode-err], which contains a human readable contextualized version of the error. For example: ``` -2| key1 = "value1" -3| key2 = "missing2" - | ~~~~ missing field -4| key3 = "missing3" -5| key4 = "value4" +1| [server] +2| path = 100 + | ~~~ cannot decode TOML integer into struct field toml_test.Server.Path of type string +3| port = 50 ``` [decode-err]: https://pkg.go.dev/github.com/pelletier/go-toml/v2#DecodeError @@ -73,15 +72,35 @@ representation. [tlt]: https://pkg.go.dev/github.com/pelletier/go-toml/v2#LocalTime [tldt]: https://pkg.go.dev/github.com/pelletier/go-toml/v2#LocalDateTime +### Commented config + +Since TOML is often used for configuration files, go-toml can emit documents +annotated with [comments and commented-out values][comments-example]. For +example, it can generate the following file: + +```toml +# Host IP to connect to. +host = '127.0.0.1' +# Port of the remote server. +port = 4242 + +# Encryption parameters (optional) +# [TLS] +# cipher = 'AEAD-AES128-GCM-SHA256' +# version = 'TLS 1.3' +``` + +[comments-example]: https://pkg.go.dev/github.com/pelletier/go-toml/v2#example-Marshal-Commented + ## Getting started Given the following struct, let's see how to read it and write it as TOML: ```go type MyConfig struct { - Version int - Name string - Tags []string + Version int + Name string + Tags []string } ``` @@ -100,7 +119,7 @@ tags = ["go", "toml"] var cfg MyConfig err := toml.Unmarshal([]byte(doc), &cfg) if err != nil { - panic(err) + panic(err) } fmt.Println("version:", cfg.Version) fmt.Println("name:", cfg.Name) @@ -121,14 +140,14 @@ as a TOML document: ```go cfg := MyConfig{ - Version: 2, - Name: "go-toml", - Tags: []string{"go", "toml"}, + Version: 2, + Name: "go-toml", + Tags: []string{"go", "toml"}, } b, err := toml.Marshal(cfg) if err != nil { - panic(err) + panic(err) } fmt.Println(string(b)) @@ -156,17 +175,17 @@ the AST level. See https://pkg.go.dev/github.com/pelletier/go-toml/v2/unstable. Execution time speedup compared to other Go TOML libraries: - - - - - - - - - - - + + + + + + + + + + +
Benchmarkgo-toml v1BurntSushi/toml
Marshal/HugoFrontMatter-21.9x1.9x
Marshal/ReferenceFile/map-21.7x1.8x
Marshal/ReferenceFile/struct-22.2x2.5x
Unmarshal/HugoFrontMatter-22.9x2.9x
Unmarshal/ReferenceFile/map-22.6x2.9x
Unmarshal/ReferenceFile/struct-24.4x5.3x
Benchmarkgo-toml v1BurntSushi/toml
Marshal/HugoFrontMatter-21.9x2.2x
Marshal/ReferenceFile/map-21.7x2.1x
Marshal/ReferenceFile/struct-22.2x3.0x
Unmarshal/HugoFrontMatter-22.9x2.7x
Unmarshal/ReferenceFile/map-22.6x2.7x
Unmarshal/ReferenceFile/struct-24.6x5.1x
See more

The table above has the results of the most common use-cases. The table below @@ -174,22 +193,22 @@ contains the results of all benchmarks, including unrealistic ones. It is provided for completeness.

- - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + +
Benchmarkgo-toml v1BurntSushi/toml
Marshal/SimpleDocument/map-21.8x2.9x
Marshal/SimpleDocument/struct-22.7x4.2x
Unmarshal/SimpleDocument/map-24.5x3.1x
Unmarshal/SimpleDocument/struct-26.2x3.9x
UnmarshalDataset/example-23.1x3.5x
UnmarshalDataset/code-22.3x3.1x
UnmarshalDataset/twitter-22.5x2.6x
UnmarshalDataset/citm_catalog-22.1x2.2x
UnmarshalDataset/canada-21.6x1.3x
UnmarshalDataset/config-24.3x3.2x
[Geo mean]2.7x2.8x
Benchmarkgo-toml v1BurntSushi/toml
Marshal/SimpleDocument/map-21.8x2.7x
Marshal/SimpleDocument/struct-22.7x3.8x
Unmarshal/SimpleDocument/map-23.8x3.0x
Unmarshal/SimpleDocument/struct-25.6x4.1x
UnmarshalDataset/example-23.0x3.2x
UnmarshalDataset/code-22.3x2.9x
UnmarshalDataset/twitter-22.6x2.7x
UnmarshalDataset/citm_catalog-22.2x2.3x
UnmarshalDataset/canada-21.8x1.5x
UnmarshalDataset/config-24.1x2.9x
geomean2.7x2.8x

This table can be generated with ./ci.sh benchmark -a -html.

@@ -214,24 +233,24 @@ Go-toml provides three handy command line tools: * `tomljson`: Reads a TOML file and outputs its JSON representation. - ``` - $ go install github.com/pelletier/go-toml/v2/cmd/tomljson@latest - $ tomljson --help - ``` + ``` + $ go install github.com/pelletier/go-toml/v2/cmd/tomljson@latest + $ tomljson --help + ``` * `jsontoml`: Reads a JSON file and outputs a TOML representation. - ``` - $ go install github.com/pelletier/go-toml/v2/cmd/jsontoml@latest - $ jsontoml --help - ``` + ``` + $ go install github.com/pelletier/go-toml/v2/cmd/jsontoml@latest + $ jsontoml --help + ``` * `tomll`: Lints and reformats a TOML file. - ``` - $ go install github.com/pelletier/go-toml/v2/cmd/tomll@latest - $ tomll --help - ``` + ``` + $ go install github.com/pelletier/go-toml/v2/cmd/tomll@latest + $ tomll --help + ``` ### Docker image @@ -242,7 +261,7 @@ Those tools are also available as a [Docker image][docker]. For example, to use docker run -i ghcr.io/pelletier/go-toml:v2 tomljson < example.toml ``` -Multiple versions are availble on [ghcr.io][docker]. +Multiple versions are available on [ghcr.io][docker]. [docker]: https://github.com/pelletier/go-toml/pkgs/container/go-toml @@ -274,16 +293,16 @@ element in the interface to decode the object. For example: ```go type inner struct { - B interface{} + B interface{} } type doc struct { - A interface{} + A interface{} } d := doc{ - A: inner{ - B: "Before", - }, + A: inner{ + B: "Before", + }, } data := ` @@ -322,7 +341,7 @@ contained in the doc is superior to the capacity of the array. For example: ```go type doc struct { - A [2]string + A [2]string } d := doc{} err := toml.Unmarshal([]byte(`A = ["one", "two", "many"]`), &d) @@ -497,27 +516,20 @@ is not necessary anymore. V1 used to provide multiple struct tags: `comment`, `commented`, `multiline`, `toml`, and `omitempty`. To behave more like the standard library, v2 has merged -`toml`, `multiline`, and `omitempty`. For example: +`toml`, `multiline`, `commented`, and `omitempty`. For example: ```go type doc struct { // v1 - F string `toml:"field" multiline:"true" omitempty:"true"` + F string `toml:"field" multiline:"true" omitempty:"true" commented:"true"` // v2 - F string `toml:"field,multiline,omitempty"` + F string `toml:"field,multiline,omitempty,commented"` } ``` Has a result, the `Encoder.SetTag*` methods have been removed, as there is just one tag now. - -#### `commented` tag has been removed - -There is no replacement for the `commented` tag. This feature would be better -suited in a proper document model for go-toml v2, which has been [cut from -scope][nodoc] at the moment. - #### `Encoder.ArraysWithOneElementPerLine` has been renamed The new name is `Encoder.SetArraysMultiline`. The behavior should be the same. @@ -553,10 +565,11 @@ complete solutions exist out there. ## Versioning -Go-toml follows [Semantic Versioning](https://semver.org). The supported version -of [TOML](https://github.com/toml-lang/toml) is indicated at the beginning of -this document. The last two major versions of Go are supported -(see [Go Release Policy](https://golang.org/doc/devel/release.html#policy)). +Expect for parts explicitly marked otherwise, go-toml follows [Semantic +Versioning](https://semver.org). The supported version of +[TOML](https://github.com/toml-lang/toml) is indicated at the beginning of this +document. The last two major versions of Go are supported (see [Go Release +Policy](https://golang.org/doc/devel/release.html#policy)). ## License diff --git a/vendor/github.com/pelletier/go-toml/v2/SECURITY.md b/vendor/github.com/pelletier/go-toml/v2/SECURITY.md index b2f21cfc..d4d554fd 100644 --- a/vendor/github.com/pelletier/go-toml/v2/SECURITY.md +++ b/vendor/github.com/pelletier/go-toml/v2/SECURITY.md @@ -2,9 +2,6 @@ ## Supported Versions -Use this section to tell people about which versions of your project are -currently being supported with security updates. - | Version | Supported | | ---------- | ------------------ | | Latest 2.x | :white_check_mark: | diff --git a/vendor/github.com/pelletier/go-toml/v2/ci.sh b/vendor/github.com/pelletier/go-toml/v2/ci.sh index 9ae8b753..86217a9b 100644 --- a/vendor/github.com/pelletier/go-toml/v2/ci.sh +++ b/vendor/github.com/pelletier/go-toml/v2/ci.sh @@ -77,7 +77,7 @@ cover() { pushd "$dir" go test -covermode=atomic -coverpkg=./... -coverprofile=coverage.out.tmp ./... - cat coverage.out.tmp | grep -v fuzz | grep -v testsuite | grep -v tomltestgen | grep -v gotoml-test-decoder > coverage.out + grep -Ev '(fuzz|testsuite|tomltestgen|gotoml-test-decoder|gotoml-test-encoder)' coverage.out.tmp > coverage.out go tool cover -func=coverage.out echo "Coverage profile for ${branch}: ${dir}/coverage.out" >&2 popd @@ -152,7 +152,7 @@ bench() { fi export GOMAXPROCS=2 - nice -n -19 taskset --cpu-list 0,1 go test '-bench=^Benchmark(Un)?[mM]arshal' -count=5 -run=Nothing ./... | tee "${out}" + go test '-bench=^Benchmark(Un)?[mM]arshal' -count=10 -run=Nothing ./... | tee "${out}" popd if [ "${branch}" != "HEAD" ]; then @@ -161,10 +161,12 @@ bench() { } fmktemp() { - if mktemp --version|grep GNU >/dev/null; then - mktemp --suffix=-$1; + if mktemp --version &> /dev/null; then + # GNU + mktemp --suffix=-$1 else - mktemp -t $1; + # BSD + mktemp -t $1 fi } @@ -184,12 +186,14 @@ with open(sys.argv[1]) as f: lines.append(line.split(',')) results = [] -for line in reversed(lines[1:]): +for line in reversed(lines[2:]): + if len(line) < 8 or line[0] == "": + continue v2 = float(line[1]) results.append([ line[0].replace("-32", ""), "%.1fx" % (float(line[3])/v2), # v1 - "%.1fx" % (float(line[5])/v2), # bs + "%.1fx" % (float(line[7])/v2), # bs ]) # move geomean to the end results.append(results[0]) @@ -260,10 +264,10 @@ benchmark() { if [ "$1" = "-html" ]; then tmpcsv=`fmktemp csv` - benchstat -csv -geomean go-toml-v2.txt go-toml-v1.txt bs-toml.txt > $tmpcsv + benchstat -format csv go-toml-v2.txt go-toml-v1.txt bs-toml.txt > $tmpcsv benchstathtml $tmpcsv else - benchstat -geomean go-toml-v2.txt go-toml-v1.txt bs-toml.txt + benchstat go-toml-v2.txt go-toml-v1.txt bs-toml.txt fi rm -f go-toml-v2.txt go-toml-v1.txt bs-toml.txt diff --git a/vendor/github.com/pelletier/go-toml/v2/decode.go b/vendor/github.com/pelletier/go-toml/v2/decode.go index 3a860d0f..f0ec3b17 100644 --- a/vendor/github.com/pelletier/go-toml/v2/decode.go +++ b/vendor/github.com/pelletier/go-toml/v2/decode.go @@ -318,7 +318,7 @@ func parseFloat(b []byte) (float64, error) { if cleaned[0] == '+' || cleaned[0] == '-' { start = 1 } - if cleaned[start] == '0' && isDigit(cleaned[start+1]) { + if cleaned[start] == '0' && len(cleaned) > start+1 && isDigit(cleaned[start+1]) { return 0, unstable.NewParserError(b, "float integer part cannot have leading zeroes") } diff --git a/vendor/github.com/pelletier/go-toml/v2/internal/tracker/seen.go b/vendor/github.com/pelletier/go-toml/v2/internal/tracker/seen.go index 40e23f83..76df2d5b 100644 --- a/vendor/github.com/pelletier/go-toml/v2/internal/tracker/seen.go +++ b/vendor/github.com/pelletier/go-toml/v2/internal/tracker/seen.go @@ -57,7 +57,11 @@ type SeenTracker struct { currentIdx int } -var pool sync.Pool +var pool = sync.Pool{ + New: func() interface{} { + return &SeenTracker{} + }, +} func (s *SeenTracker) reset() { // Always contains a root element at index 0. @@ -149,8 +153,9 @@ func (s *SeenTracker) setExplicitFlag(parentIdx int) { // CheckExpression takes a top-level node and checks that it does not contain // keys that have been seen in previous calls, and validates that types are -// consistent. -func (s *SeenTracker) CheckExpression(node *unstable.Node) error { +// consistent. It returns true if it is the first time this node's key is seen. +// Useful to clear array tables on first use. +func (s *SeenTracker) CheckExpression(node *unstable.Node) (bool, error) { if s.entries == nil { s.reset() } @@ -166,7 +171,7 @@ func (s *SeenTracker) CheckExpression(node *unstable.Node) error { } } -func (s *SeenTracker) checkTable(node *unstable.Node) error { +func (s *SeenTracker) checkTable(node *unstable.Node) (bool, error) { if s.currentIdx >= 0 { s.setExplicitFlag(s.currentIdx) } @@ -192,7 +197,7 @@ func (s *SeenTracker) checkTable(node *unstable.Node) error { } else { entry := s.entries[idx] if entry.kind == valueKind { - return fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) + return false, fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) } } parentIdx = idx @@ -201,25 +206,27 @@ func (s *SeenTracker) checkTable(node *unstable.Node) error { k := it.Node().Data idx := s.find(parentIdx, k) + first := false if idx >= 0 { kind := s.entries[idx].kind if kind != tableKind { - return fmt.Errorf("toml: key %s should be a table, not a %s", string(k), kind) + return false, fmt.Errorf("toml: key %s should be a table, not a %s", string(k), kind) } if s.entries[idx].explicit { - return fmt.Errorf("toml: table %s already exists", string(k)) + return false, fmt.Errorf("toml: table %s already exists", string(k)) } s.entries[idx].explicit = true } else { idx = s.create(parentIdx, k, tableKind, true, false) + first = true } s.currentIdx = idx - return nil + return first, nil } -func (s *SeenTracker) checkArrayTable(node *unstable.Node) error { +func (s *SeenTracker) checkArrayTable(node *unstable.Node) (bool, error) { if s.currentIdx >= 0 { s.setExplicitFlag(s.currentIdx) } @@ -242,7 +249,7 @@ func (s *SeenTracker) checkArrayTable(node *unstable.Node) error { } else { entry := s.entries[idx] if entry.kind == valueKind { - return fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) + return false, fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) } } @@ -252,22 +259,23 @@ func (s *SeenTracker) checkArrayTable(node *unstable.Node) error { k := it.Node().Data idx := s.find(parentIdx, k) - if idx >= 0 { + firstTime := idx < 0 + if firstTime { + idx = s.create(parentIdx, k, arrayTableKind, true, false) + } else { kind := s.entries[idx].kind if kind != arrayTableKind { - return fmt.Errorf("toml: key %s already exists as a %s, but should be an array table", kind, string(k)) + return false, fmt.Errorf("toml: key %s already exists as a %s, but should be an array table", kind, string(k)) } s.clear(idx) - } else { - idx = s.create(parentIdx, k, arrayTableKind, true, false) } s.currentIdx = idx - return nil + return firstTime, nil } -func (s *SeenTracker) checkKeyValue(node *unstable.Node) error { +func (s *SeenTracker) checkKeyValue(node *unstable.Node) (bool, error) { parentIdx := s.currentIdx it := node.Key() @@ -281,11 +289,11 @@ func (s *SeenTracker) checkKeyValue(node *unstable.Node) error { } else { entry := s.entries[idx] if it.IsLast() { - return fmt.Errorf("toml: key %s is already defined", string(k)) + return false, fmt.Errorf("toml: key %s is already defined", string(k)) } else if entry.kind != tableKind { - return fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) + return false, fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) } else if entry.explicit { - return fmt.Errorf("toml: cannot redefine table %s that has already been explicitly defined", string(k)) + return false, fmt.Errorf("toml: cannot redefine table %s that has already been explicitly defined", string(k)) } } @@ -303,45 +311,39 @@ func (s *SeenTracker) checkKeyValue(node *unstable.Node) error { return s.checkArray(value) } - return nil + return false, nil } -func (s *SeenTracker) checkArray(node *unstable.Node) error { +func (s *SeenTracker) checkArray(node *unstable.Node) (first bool, err error) { it := node.Children() for it.Next() { n := it.Node() switch n.Kind { case unstable.InlineTable: - err := s.checkInlineTable(n) + first, err = s.checkInlineTable(n) if err != nil { - return err + return false, err } case unstable.Array: - err := s.checkArray(n) + first, err = s.checkArray(n) if err != nil { - return err + return false, err } } } - return nil + return first, nil } -func (s *SeenTracker) checkInlineTable(node *unstable.Node) error { - if pool.New == nil { - pool.New = func() interface{} { - return &SeenTracker{} - } - } - +func (s *SeenTracker) checkInlineTable(node *unstable.Node) (first bool, err error) { s = pool.Get().(*SeenTracker) s.reset() it := node.Children() for it.Next() { n := it.Node() - err := s.checkKeyValue(n) + first, err = s.checkKeyValue(n) if err != nil { - return err + return false, err } } @@ -352,5 +354,5 @@ func (s *SeenTracker) checkInlineTable(node *unstable.Node) error { // redefinition of its keys: check* functions cannot walk into // a value. pool.Put(s) - return nil + return first, nil } diff --git a/vendor/github.com/pelletier/go-toml/v2/marshaler.go b/vendor/github.com/pelletier/go-toml/v2/marshaler.go index 83875260..161acd93 100644 --- a/vendor/github.com/pelletier/go-toml/v2/marshaler.go +++ b/vendor/github.com/pelletier/go-toml/v2/marshaler.go @@ -3,11 +3,12 @@ package toml import ( "bytes" "encoding" + "encoding/json" "fmt" "io" "math" "reflect" - "sort" + "slices" "strconv" "strings" "time" @@ -37,10 +38,11 @@ type Encoder struct { w io.Writer // global settings - tablesInline bool - arraysMultiline bool - indentSymbol string - indentTables bool + tablesInline bool + arraysMultiline bool + indentSymbol string + indentTables bool + marshalJsonNumbers bool } // NewEncoder returns a new Encoder that writes to w. @@ -87,6 +89,17 @@ func (enc *Encoder) SetIndentTables(indent bool) *Encoder { return enc } +// SetMarshalJsonNumbers forces the encoder to serialize `json.Number` as a +// float or integer instead of relying on TextMarshaler to emit a string. +// +// *Unstable:* This method does not follow the compatibility guarantees of +// semver. It can be changed or removed without a new major version being +// issued. +func (enc *Encoder) SetMarshalJsonNumbers(indent bool) *Encoder { + enc.marshalJsonNumbers = indent + return enc +} + // Encode writes a TOML representation of v to the stream. // // If v cannot be represented to TOML it returns an error. @@ -148,6 +161,9 @@ func (enc *Encoder) SetIndentTables(indent bool) *Encoder { // // The "omitempty" option prevents empty values or groups from being emitted. // +// The "commented" option prefixes the value and all its children with a comment +// symbol. +// // In addition to the "toml" tag struct tag, a "comment" tag can be used to emit // a TOML comment before the value being annotated. Comments are ignored inside // inline tables. For array tables, the comment is only present before the first @@ -180,6 +196,7 @@ func (enc *Encoder) Encode(v interface{}) error { type valueOptions struct { multiline bool omitempty bool + commented bool comment string } @@ -205,6 +222,9 @@ type encoderCtx struct { // Indentation level indent int + // Prefix the current value with a comment. + commented bool + // Options coming from struct tags options valueOptions } @@ -245,10 +265,22 @@ func (enc *Encoder) encode(b []byte, ctx encoderCtx, v reflect.Value) ([]byte, e return append(b, x.String()...), nil case LocalDateTime: return append(b, x.String()...), nil + case json.Number: + if enc.marshalJsonNumbers { + if x == "" { /// Useful zero value. + return append(b, "0"...), nil + } else if v, err := x.Int64(); err == nil { + return enc.encode(b, ctx, reflect.ValueOf(v)) + } else if f, err := x.Float64(); err == nil { + return enc.encode(b, ctx, reflect.ValueOf(f)) + } else { + return nil, fmt.Errorf("toml: unable to convert %q to int64 or float64", x) + } + } } hasTextMarshaler := v.Type().Implements(textMarshalerType) - if hasTextMarshaler || (v.CanAddr() && reflect.PtrTo(v.Type()).Implements(textMarshalerType)) { + if hasTextMarshaler || (v.CanAddr() && reflect.PointerTo(v.Type()).Implements(textMarshalerType)) { if !hasTextMarshaler { v = v.Addr() } @@ -357,6 +389,7 @@ func (enc *Encoder) encodeKv(b []byte, ctx encoderCtx, options valueOptions, v r if !ctx.inline { b = enc.encodeComment(ctx.indent, options.comment, b) + b = enc.commented(ctx.commented, b) b = enc.indent(ctx.indent, b) } @@ -378,6 +411,13 @@ func (enc *Encoder) encodeKv(b []byte, ctx encoderCtx, options valueOptions, v r return b, nil } +func (enc *Encoder) commented(commented bool, b []byte) []byte { + if commented { + return append(b, "# "...) + } + return b +} + func isEmptyValue(v reflect.Value) bool { switch v.Kind() { case reflect.Struct: @@ -526,6 +566,8 @@ func (enc *Encoder) encodeTableHeader(ctx encoderCtx, b []byte) ([]byte, error) b = enc.encodeComment(ctx.indent, ctx.options.comment, b) + b = enc.commented(ctx.commented, b) + b = enc.indent(ctx.indent, b) b = append(b, '[') @@ -589,6 +631,18 @@ func (enc *Encoder) keyToString(k reflect.Value) (string, error) { return "", fmt.Errorf("toml: error marshalling key %v from text: %w", k, err) } return string(keyB), nil + + case keyType.Kind() == reflect.Int || keyType.Kind() == reflect.Int8 || keyType.Kind() == reflect.Int16 || keyType.Kind() == reflect.Int32 || keyType.Kind() == reflect.Int64: + return strconv.FormatInt(k.Int(), 10), nil + + case keyType.Kind() == reflect.Uint || keyType.Kind() == reflect.Uint8 || keyType.Kind() == reflect.Uint16 || keyType.Kind() == reflect.Uint32 || keyType.Kind() == reflect.Uint64: + return strconv.FormatUint(k.Uint(), 10), nil + + case keyType.Kind() == reflect.Float32: + return strconv.FormatFloat(k.Float(), 'f', -1, 32), nil + + case keyType.Kind() == reflect.Float64: + return strconv.FormatFloat(k.Float(), 'f', -1, 64), nil } return "", fmt.Errorf("toml: type %s is not supported as a map key", keyType.Kind()) } @@ -626,8 +680,8 @@ func (enc *Encoder) encodeMap(b []byte, ctx encoderCtx, v reflect.Value) ([]byte } func sortEntriesByKey(e []entry) { - sort.Slice(e, func(i, j int) bool { - return e[i].Key < e[j].Key + slices.SortFunc(e, func(a, b entry) int { + return strings.Compare(a.Key, b.Key) }) } @@ -690,6 +744,8 @@ func walkStruct(ctx encoderCtx, t *table, v reflect.Value) { if fieldType.Anonymous { if fieldType.Type.Kind() == reflect.Struct { walkStruct(ctx, t, f) + } else if fieldType.Type.Kind() == reflect.Ptr && !f.IsNil() && f.Elem().Kind() == reflect.Struct { + walkStruct(ctx, t, f.Elem()) } continue } else { @@ -704,6 +760,7 @@ func walkStruct(ctx encoderCtx, t *table, v reflect.Value) { options := valueOptions{ multiline: opts.multiline, omitempty: opts.omitempty, + commented: opts.commented, comment: fieldType.Tag.Get("comment"), } @@ -763,6 +820,7 @@ type tagOptions struct { multiline bool inline bool omitempty bool + commented bool } func parseTag(tag string) (string, tagOptions) { @@ -790,6 +848,8 @@ func parseTag(tag string) (string, tagOptions) { opts.inline = true case "omitempty": opts.omitempty = true + case "commented": + opts.commented = true } } @@ -825,8 +885,10 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro hasNonEmptyKV = true ctx.setKey(kv.Key) + ctx2 := ctx + ctx2.commented = kv.Options.commented || ctx2.commented - b, err = enc.encodeKv(b, ctx, kv.Options, kv.Value) + b, err = enc.encodeKv(b, ctx2, kv.Options, kv.Value) if err != nil { return nil, err } @@ -851,8 +913,10 @@ func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, erro ctx.setKey(table.Key) ctx.options = table.Options + ctx2 := ctx + ctx2.commented = ctx2.commented || ctx.options.commented - b, err = enc.encode(b, ctx, table.Value) + b, err = enc.encode(b, ctx2, table.Value) if err != nil { return nil, err } @@ -899,7 +963,7 @@ func willConvertToTable(ctx encoderCtx, v reflect.Value) bool { if !v.IsValid() { return false } - if v.Type() == timeType || v.Type().Implements(textMarshalerType) || (v.Kind() != reflect.Ptr && v.CanAddr() && reflect.PtrTo(v.Type()).Implements(textMarshalerType)) { + if v.Type() == timeType || v.Type().Implements(textMarshalerType) || (v.Kind() != reflect.Ptr && v.CanAddr() && reflect.PointerTo(v.Type()).Implements(textMarshalerType)) { return false } @@ -970,6 +1034,13 @@ func (enc *Encoder) encodeSliceAsArrayTable(b []byte, ctx encoderCtx, v reflect. ctx.shiftKey() scratch := make([]byte, 0, 64) + + scratch = enc.commented(ctx.commented, scratch) + + if enc.indentTables { + scratch = enc.indent(ctx.indent, scratch) + } + scratch = append(scratch, "[["...) for i, k := range ctx.parentKey { @@ -985,6 +1056,10 @@ func (enc *Encoder) encodeSliceAsArrayTable(b []byte, ctx encoderCtx, v reflect. b = enc.encodeComment(ctx.indent, ctx.options.comment, b) + if enc.indentTables { + ctx.indent++ + } + for i := 0; i < v.Len(); i++ { if i != 0 { b = append(b, "\n"...) diff --git a/vendor/github.com/pelletier/go-toml/v2/unmarshaler.go b/vendor/github.com/pelletier/go-toml/v2/unmarshaler.go index 5cede081..c3df8bee 100644 --- a/vendor/github.com/pelletier/go-toml/v2/unmarshaler.go +++ b/vendor/github.com/pelletier/go-toml/v2/unmarshaler.go @@ -5,9 +5,9 @@ import ( "errors" "fmt" "io" - "io/ioutil" "math" "reflect" + "strconv" "strings" "sync/atomic" "time" @@ -21,10 +21,8 @@ import ( // // It is a shortcut for Decoder.Decode() with the default options. func Unmarshal(data []byte, v interface{}) error { - p := unstable.Parser{} - p.Reset(data) - d := decoder{p: &p} - + d := decoder{} + d.p.Reset(data) return d.FromParser(v) } @@ -35,6 +33,9 @@ type Decoder struct { // global settings strict bool + + // toggles unmarshaler interface + unmarshalerInterface bool } // NewDecoder creates a new Decoder that will read from r. @@ -54,6 +55,24 @@ func (d *Decoder) DisallowUnknownFields() *Decoder { return d } +// EnableUnmarshalerInterface allows to enable unmarshaler interface. +// +// With this feature enabled, types implementing the unstable/Unmarshaler +// interface can be decoded from any structure of the document. It allows types +// that don't have a straightfoward TOML representation to provide their own +// decoding logic. +// +// Currently, types can only decode from a single value. Tables and array tables +// are not supported. +// +// *Unstable:* This method does not follow the compatibility guarantees of +// semver. It can be changed or removed without a new major version being +// issued. +func (d *Decoder) EnableUnmarshalerInterface() *Decoder { + d.unmarshalerInterface = true + return d +} + // Decode the whole content of r into v. // // By default, values in the document that don't exist in the target Go value @@ -96,26 +115,25 @@ func (d *Decoder) DisallowUnknownFields() *Decoder { // Inline Table -> same as Table // Array of Tables -> same as Array and Table func (d *Decoder) Decode(v interface{}) error { - b, err := ioutil.ReadAll(d.r) + b, err := io.ReadAll(d.r) if err != nil { return fmt.Errorf("toml: %w", err) } - p := unstable.Parser{} - p.Reset(b) dec := decoder{ - p: &p, strict: strict{ Enabled: d.strict, }, + unmarshalerInterface: d.unmarshalerInterface, } + dec.p.Reset(b) return dec.FromParser(v) } type decoder struct { // Which parser instance in use for this decoding session. - p *unstable.Parser + p unstable.Parser // Flag indicating that the current expression is stashed. // If set to true, calling nextExpr will not actually pull a new expression @@ -127,6 +145,10 @@ type decoder struct { // need to be skipped. skipUntilTable bool + // Flag indicating that the current array/slice table should be cleared because + // it is the first encounter of an array table. + clearArrayTable bool + // Tracks position in Go arrays. // This is used when decoding [[array tables]] into Go arrays. Given array // tables are separate TOML expression, we need to keep track of where we @@ -139,6 +161,9 @@ type decoder struct { // Strict mode strict strict + // Flag that enables/disables unmarshaler interface. + unmarshalerInterface bool + // Current context for the error. errorContext *errorContext } @@ -149,12 +174,16 @@ type errorContext struct { } func (d *decoder) typeMismatchError(toml string, target reflect.Type) error { + return fmt.Errorf("toml: %s", d.typeMismatchString(toml, target)) +} + +func (d *decoder) typeMismatchString(toml string, target reflect.Type) string { if d.errorContext != nil && d.errorContext.Struct != nil { ctx := d.errorContext f := ctx.Struct.FieldByIndex(ctx.Field) - return fmt.Errorf("toml: cannot decode TOML %s into struct field %s.%s of type %s", toml, ctx.Struct, f.Name, f.Type) + return fmt.Sprintf("cannot decode TOML %s into struct field %s.%s of type %s", toml, ctx.Struct, f.Name, f.Type) } - return fmt.Errorf("toml: cannot decode TOML %s into a Go value of type %s", toml, target) + return fmt.Sprintf("cannot decode TOML %s into a Go value of type %s", toml, target) } func (d *decoder) expr() *unstable.Node { @@ -242,9 +271,10 @@ Rules for the unmarshal code: func (d *decoder) handleRootExpression(expr *unstable.Node, v reflect.Value) error { var x reflect.Value var err error + var first bool // used for to clear array tables on first use if !(d.skipUntilTable && expr.Kind == unstable.KeyValue) { - err = d.seen.CheckExpression(expr) + first, err = d.seen.CheckExpression(expr) if err != nil { return err } @@ -263,6 +293,7 @@ func (d *decoder) handleRootExpression(expr *unstable.Node, v reflect.Value) err case unstable.ArrayTable: d.skipUntilTable = false d.strict.EnterArrayTable(expr) + d.clearArrayTable = first x, err = d.handleArrayTable(expr.Key(), v) default: panic(fmt.Errorf("parser should not permit expression of kind %s at document root", expr.Kind)) @@ -303,6 +334,10 @@ func (d *decoder) handleArrayTableCollectionLast(key unstable.Iterator, v reflec reflect.Copy(nelem, elem) elem = nelem } + if d.clearArrayTable && elem.Len() > 0 { + elem.SetLen(0) + d.clearArrayTable = false + } } return d.handleArrayTableCollectionLast(key, elem) case reflect.Ptr: @@ -321,6 +356,10 @@ func (d *decoder) handleArrayTableCollectionLast(key unstable.Iterator, v reflec return v, nil case reflect.Slice: + if d.clearArrayTable && v.Len() > 0 { + v.SetLen(0) + d.clearArrayTable = false + } elemType := v.Type().Elem() var elem reflect.Value if elemType.Kind() == reflect.Interface { @@ -572,7 +611,7 @@ func (d *decoder) handleKeyValues(v reflect.Value) (reflect.Value, error) { break } - err := d.seen.CheckExpression(expr) + _, err := d.seen.CheckExpression(expr) if err != nil { return reflect.Value{}, err } @@ -630,6 +669,14 @@ func (d *decoder) handleValue(value *unstable.Node, v reflect.Value) error { v = initAndDereferencePointer(v) } + if d.unmarshalerInterface { + if v.CanAddr() && v.Addr().CanInterface() { + if outi, ok := v.Addr().Interface().(unstable.Unmarshaler); ok { + return outi.UnmarshalTOML(value) + } + } + } + ok, err := d.tryTextUnmarshaler(value, v) if ok || err != nil { return err @@ -963,7 +1010,7 @@ func (d *decoder) unmarshalInteger(value *unstable.Node, v reflect.Value) error case reflect.Interface: r = reflect.ValueOf(i) default: - return d.typeMismatchError("integer", v.Type()) + return unstable.NewParserError(d.p.Raw(value.Raw), d.typeMismatchString("integer", v.Type())) } if !r.Type().AssignableTo(v.Type()) { @@ -982,7 +1029,7 @@ func (d *decoder) unmarshalString(value *unstable.Node, v reflect.Value) error { case reflect.Interface: v.Set(reflect.ValueOf(string(value.Data))) default: - return unstable.NewParserError(d.p.Raw(value.Raw), "cannot store TOML string into a Go %s", v.Kind()) + return unstable.NewParserError(d.p.Raw(value.Raw), d.typeMismatchString("string", v.Type())) } return nil @@ -1027,12 +1074,39 @@ func (d *decoder) keyFromData(keyType reflect.Type, data []byte) (reflect.Value, } return mk, nil - case reflect.PtrTo(keyType).Implements(textUnmarshalerType): + case reflect.PointerTo(keyType).Implements(textUnmarshalerType): mk := reflect.New(keyType) if err := mk.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { return reflect.Value{}, fmt.Errorf("toml: error unmarshalling key type %s from text: %w", stringType, err) } return mk.Elem(), nil + + case keyType.Kind() == reflect.Int || keyType.Kind() == reflect.Int8 || keyType.Kind() == reflect.Int16 || keyType.Kind() == reflect.Int32 || keyType.Kind() == reflect.Int64: + key, err := strconv.ParseInt(string(data), 10, 64) + if err != nil { + return reflect.Value{}, fmt.Errorf("toml: error parsing key of type %s from integer: %w", stringType, err) + } + return reflect.ValueOf(key).Convert(keyType), nil + case keyType.Kind() == reflect.Uint || keyType.Kind() == reflect.Uint8 || keyType.Kind() == reflect.Uint16 || keyType.Kind() == reflect.Uint32 || keyType.Kind() == reflect.Uint64: + key, err := strconv.ParseUint(string(data), 10, 64) + if err != nil { + return reflect.Value{}, fmt.Errorf("toml: error parsing key of type %s from unsigned integer: %w", stringType, err) + } + return reflect.ValueOf(key).Convert(keyType), nil + + case keyType.Kind() == reflect.Float32: + key, err := strconv.ParseFloat(string(data), 32) + if err != nil { + return reflect.Value{}, fmt.Errorf("toml: error parsing key of type %s from float: %w", stringType, err) + } + return reflect.ValueOf(float32(key)), nil + + case keyType.Kind() == reflect.Float64: + key, err := strconv.ParseFloat(string(data), 64) + if err != nil { + return reflect.Value{}, fmt.Errorf("toml: error parsing key of type %s from float: %w", stringType, err) + } + return reflect.ValueOf(float64(key)), nil } return reflect.Value{}, fmt.Errorf("toml: cannot convert map key of type %s to expected type %s", stringType, keyType) } @@ -1093,9 +1167,9 @@ func (d *decoder) handleKeyValuePart(key unstable.Iterator, value *unstable.Node f := fieldByIndex(v, path) - if !f.CanSet() { - // If the field is not settable, need to take a slower path and make a copy of - // the struct itself to a new location. + if !f.CanAddr() { + // If the field is not addressable, need to take a slower path and + // make a copy of the struct itself to a new location. nvp := reflect.New(v.Type()) nvp.Elem().Set(v) v = nvp.Elem() diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go b/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go index a8eb0529..50358a44 100644 --- a/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go +++ b/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go @@ -1013,6 +1013,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) return p.builder.Push(Node{ Kind: Float, Data: b[:3], + Raw: p.Range(b[:3]), }), b[3:], nil case 'n': if !scanFollowsNan(b) { @@ -1022,6 +1023,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) return p.builder.Push(Node{ Kind: Float, Data: b[:3], + Raw: p.Range(b[:3]), }), b[3:], nil case '+', '-': return p.scanIntOrFloat(b) @@ -1146,6 +1148,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { return p.builder.Push(Node{ Kind: Integer, Data: b[:i], + Raw: p.Range(b[:i]), }), b[i:], nil } @@ -1169,6 +1172,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { return p.builder.Push(Node{ Kind: Float, Data: b[:i+3], + Raw: p.Range(b[:i+3]), }), b[i+3:], nil } @@ -1180,6 +1184,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { return p.builder.Push(Node{ Kind: Float, Data: b[:i+3], + Raw: p.Range(b[:i+3]), }), b[i+3:], nil } @@ -1202,6 +1207,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { return p.builder.Push(Node{ Kind: kind, Data: b[:i], + Raw: p.Range(b[:i]), }), b[i:], nil } diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/unmarshaler.go b/vendor/github.com/pelletier/go-toml/v2/unstable/unmarshaler.go new file mode 100644 index 00000000..00cfd6de --- /dev/null +++ b/vendor/github.com/pelletier/go-toml/v2/unstable/unmarshaler.go @@ -0,0 +1,7 @@ +package unstable + +// The Unmarshaler interface may be implemented by types to customize their +// behavior when being unmarshaled from a TOML document. +type Unmarshaler interface { + UnmarshalTOML(value *Node) error +} diff --git a/vendor/github.com/pires/go-proxyproto/header.go b/vendor/github.com/pires/go-proxyproto/header.go index 81ebeb38..209c2ccf 100644 --- a/vendor/github.com/pires/go-proxyproto/header.go +++ b/vendor/github.com/pires/go-proxyproto/header.go @@ -155,11 +155,11 @@ func (header *Header) EqualsTo(otherHeader *Header) bool { if otherHeader == nil { return false } - // TLVs only exist for version 2 - if header.Version == 2 && !bytes.Equal(header.rawTLVs, otherHeader.rawTLVs) { + if header.Version != otherHeader.Version || header.Command != otherHeader.Command || header.TransportProtocol != otherHeader.TransportProtocol { return false } - if header.Version != otherHeader.Version || header.Command != otherHeader.Command || header.TransportProtocol != otherHeader.TransportProtocol { + // TLVs only exist for version 2 + if header.Version == 2 && !bytes.Equal(header.rawTLVs, otherHeader.rawTLVs) { return false } // Return early for header with LOCAL command, which contains no address information diff --git a/vendor/github.com/pires/go-proxyproto/policy.go b/vendor/github.com/pires/go-proxyproto/policy.go index 71ad62ba..ebef8b98 100644 --- a/vendor/github.com/pires/go-proxyproto/policy.go +++ b/vendor/github.com/pires/go-proxyproto/policy.go @@ -14,6 +14,21 @@ import ( // In case an error is returned the connection is denied. type PolicyFunc func(upstream net.Addr) (Policy, error) +// ConnPolicyFunc can be used to decide whether to trust the PROXY info +// based on connection policy options. If set, the connecting addresses +// (remote and local) are passed in as argument. +// +// See below for the different policies. +// +// In case an error is returned the connection is denied. +type ConnPolicyFunc func(connPolicyOptions ConnPolicyOptions) (Policy, error) + +// ConnPolicyOptions contains the remote and local addresses of a connection. +type ConnPolicyOptions struct { + Upstream net.Addr + Downstream net.Addr +} + // Policy defines how a connection with a PROXY header address is treated. type Policy int @@ -32,8 +47,31 @@ const ( // a PROXY header is not present, subsequent reads do not. It is the task // of the code using the connection to handle that case properly. REQUIRE + // SKIP accepts a connection without requiring the PROXY header + // Note: an example usage can be found in the SkipProxyHeaderForCIDR + // function. + SKIP ) +// SkipProxyHeaderForCIDR returns a PolicyFunc which can be used to accept a +// connection from a skipHeaderCIDR without requiring a PROXY header, e.g. +// Kubernetes pods local traffic. The def is a policy to use when an upstream +// address doesn't match the skipHeaderCIDR. +func SkipProxyHeaderForCIDR(skipHeaderCIDR *net.IPNet, def Policy) PolicyFunc { + return func(upstream net.Addr) (Policy, error) { + ip, err := ipFromAddr(upstream) + if err != nil { + return def, err + } + + if skipHeaderCIDR != nil && skipHeaderCIDR.Contains(ip) { + return SKIP, nil + } + + return def, nil + } +} + // WithPolicy adds given policy to a connection when passed as option to NewConn() func WithPolicy(p Policy) func(*Conn) { return func(c *Conn) { @@ -147,3 +185,22 @@ func ipFromAddr(upstream net.Addr) (net.IP, error) { return upstreamIP, nil } + +// IgnoreProxyHeaderNotOnInterface retuns a ConnPolicyFunc which can be used to +// decide whether to use or ignore PROXY headers depending on the connection +// being made on a specific interface. This policy can be used when the server +// is bound to multiple interfaces but wants to allow on only one interface. +func IgnoreProxyHeaderNotOnInterface(allowedIP net.IP) ConnPolicyFunc { + return func(connOpts ConnPolicyOptions) (Policy, error) { + ip, err := ipFromAddr(connOpts.Downstream) + if err != nil { + return REJECT, err + } + + if allowedIP.Equal(ip) { + return USE, nil + } + + return IGNORE, nil + } +} diff --git a/vendor/github.com/pires/go-proxyproto/protocol.go b/vendor/github.com/pires/go-proxyproto/protocol.go index 6f5641d4..270b90d2 100644 --- a/vendor/github.com/pires/go-proxyproto/protocol.go +++ b/vendor/github.com/pires/go-proxyproto/protocol.go @@ -2,6 +2,8 @@ package proxyproto import ( "bufio" + "errors" + "fmt" "io" "net" "sync" @@ -9,11 +11,17 @@ import ( "time" ) -// DefaultReadHeaderTimeout is how long header processing waits for header to -// be read from the wire, if Listener.ReaderHeaderTimeout is not set. -// It's kept as a global variable so to make it easier to find and override, -// e.g. go build -ldflags -X "github.com/pires/go-proxyproto.DefaultReadHeaderTimeout=1s" -var DefaultReadHeaderTimeout = 200 * time.Millisecond +var ( + // DefaultReadHeaderTimeout is how long header processing waits for header to + // be read from the wire, if Listener.ReaderHeaderTimeout is not set. + // It's kept as a global variable so to make it easier to find and override, + // e.g. go build -ldflags -X "github.com/pires/go-proxyproto.DefaultReadHeaderTimeout=1s" + DefaultReadHeaderTimeout = 10 * time.Second + + // ErrInvalidUpstream should be returned when an upstream connection address + // is not trusted, and therefore is invalid. + ErrInvalidUpstream = fmt.Errorf("proxyproto: upstream connection address not trusted for PROXY information") +) // Listener is used to wrap an underlying listener, // whose connections may be using the HAProxy Proxy Protocol. @@ -22,9 +30,14 @@ var DefaultReadHeaderTimeout = 200 * time.Millisecond // connections in order to prevent blocking operations. If no ReadHeaderTimeout // is set, a default of 200ms will be used. This can be disabled by setting the // timeout to < 0. +// +// Only one of Policy or ConnPolicy should be provided. If both are provided then +// a panic would occur during accept. type Listener struct { - Listener net.Listener + Listener net.Listener + // Deprecated: use ConnPolicyFunc instead. This will be removed in future release. Policy PolicyFunc + ConnPolicy ConnPolicyFunc ValidateHeader Validator ReadHeaderTimeout time.Duration } @@ -38,10 +51,11 @@ type Conn struct { once sync.Once readErr error conn net.Conn - Validate Validator bufReader *bufio.Reader + reader io.Reader header *Header ProxyHeaderPolicy Policy + Validate Validator readHeaderTimeout time.Duration } @@ -58,39 +72,70 @@ func ValidateHeader(v Validator) func(*Conn) { } } -// Accept waits for and returns the next connection to the listener. -func (p *Listener) Accept() (net.Conn, error) { - // Get the underlying connection - conn, err := p.Listener.Accept() - if err != nil { - return nil, err +// SetReadHeaderTimeout sets the readHeaderTimeout for a connection when passed as option to NewConn() +func SetReadHeaderTimeout(t time.Duration) func(*Conn) { + return func(c *Conn) { + if t >= 0 { + c.readHeaderTimeout = t + } } +} - proxyHeaderPolicy := USE - if p.Policy != nil { - proxyHeaderPolicy, err = p.Policy(conn.RemoteAddr()) +// Accept waits for and returns the next valid connection to the listener. +func (p *Listener) Accept() (net.Conn, error) { + for { + // Get the underlying connection + conn, err := p.Listener.Accept() if err != nil { - // can't decide the policy, we can't accept the connection - conn.Close() return nil, err } - } - newConn := NewConn( - conn, - WithPolicy(proxyHeaderPolicy), - ValidateHeader(p.ValidateHeader), - ) + proxyHeaderPolicy := USE + if p.Policy != nil && p.ConnPolicy != nil { + panic("only one of policy or connpolicy must be provided.") + } + if p.Policy != nil || p.ConnPolicy != nil { + if p.Policy != nil { + proxyHeaderPolicy, err = p.Policy(conn.RemoteAddr()) + } else { + proxyHeaderPolicy, err = p.ConnPolicy(ConnPolicyOptions{ + Upstream: conn.RemoteAddr(), + Downstream: conn.LocalAddr(), + }) + } + if err != nil { + // can't decide the policy, we can't accept the connection + conn.Close() - // If the ReadHeaderTimeout for the listener is unset, use the default timeout. - if p.ReadHeaderTimeout == 0 { - p.ReadHeaderTimeout = DefaultReadHeaderTimeout - } + if errors.Is(err, ErrInvalidUpstream) { + // keep listening for other connections + continue + } + + return nil, err + } + // Handle a connection as a regular one + if proxyHeaderPolicy == SKIP { + return conn, nil + } + } - // Set the readHeaderTimeout of the new conn to the value of the listener - newConn.readHeaderTimeout = p.ReadHeaderTimeout + newConn := NewConn( + conn, + WithPolicy(proxyHeaderPolicy), + ValidateHeader(p.ValidateHeader), + ) - return newConn, nil + // If the ReadHeaderTimeout for the listener is unset, use the default timeout. + if p.ReadHeaderTimeout == 0 { + p.ReadHeaderTimeout = DefaultReadHeaderTimeout + } + + // Set the readHeaderTimeout of the new conn to the value of the listener + newConn.readHeaderTimeout = p.ReadHeaderTimeout + + return newConn, nil + } } // Close closes the underlying listener. @@ -106,8 +151,15 @@ func (p *Listener) Addr() net.Addr { // NewConn is used to wrap a net.Conn that may be speaking // the proxy protocol into a proxyproto.Conn func NewConn(conn net.Conn, opts ...func(*Conn)) *Conn { + // For v1 the header length is at most 108 bytes. + // For v2 the header length is at most 52 bytes plus the length of the TLVs. + // We use 256 bytes to be safe. + const bufSize = 256 + br := bufio.NewReaderSize(conn, bufSize) + pConn := &Conn{ - bufReader: bufio.NewReader(conn), + bufReader: br, + reader: io.MultiReader(br, conn), conn: conn, } @@ -129,7 +181,7 @@ func (p *Conn) Read(b []byte) (int, error) { return 0, p.readErr } - return p.bufReader.Read(b) + return p.reader.Read(b) } // Write wraps original conn.Write @@ -240,7 +292,9 @@ func (p *Conn) readHeader() error { // run on the connection, as we don't want to override the previous // read deadline the user may have used. if p.readHeaderTimeout > 0 { - p.conn.SetReadDeadline(time.Now().Add(p.readHeaderTimeout)) + if err := p.conn.SetReadDeadline(time.Now().Add(p.readHeaderTimeout)); err != nil { + return err + } } header, err := Read(p.bufReader) @@ -255,7 +309,9 @@ func (p *Conn) readHeader() error { if t == nil { t = time.Time{} } - p.conn.SetReadDeadline(t.(time.Time)) + if err := p.conn.SetReadDeadline(t.(time.Time)); err != nil { + return err + } if netErr, ok := err.(net.Error); ok && netErr.Timeout() { err = ErrNoProxyProtocol } @@ -307,5 +363,27 @@ func (p *Conn) WriteTo(w io.Writer) (int64, error) { if p.readErr != nil { return 0, p.readErr } - return p.bufReader.WriteTo(w) + + b := make([]byte, p.bufReader.Buffered()) + if _, err := p.bufReader.Read(b); err != nil { + return 0, err // this should never as we read buffered data + } + + var n int64 + { + nn, err := w.Write(b) + n += int64(nn) + if err != nil { + return n, err + } + } + { + nn, err := io.Copy(w, p.conn) + n += nn + if err != nil { + return n, err + } + } + + return n, nil } diff --git a/vendor/github.com/pires/go-proxyproto/v1.go b/vendor/github.com/pires/go-proxyproto/v1.go index 23de95ec..0d34ba52 100644 --- a/vendor/github.com/pires/go-proxyproto/v1.go +++ b/vendor/github.com/pires/go-proxyproto/v1.go @@ -5,6 +5,7 @@ import ( "bytes" "fmt" "net" + "net/netip" "strconv" "strings" ) @@ -221,11 +222,22 @@ func parseV1PortNumber(portStr string) (int, error) { return port, nil } -func parseV1IPAddress(protocol AddressFamilyAndProtocol, addrStr string) (addr net.IP, err error) { - addr = net.ParseIP(addrStr) - tryV4 := addr.To4() - if (protocol == TCPv4 && tryV4 == nil) || (protocol == TCPv6 && tryV4 != nil) { - err = ErrInvalidAddress +func parseV1IPAddress(protocol AddressFamilyAndProtocol, addrStr string) (net.IP, error) { + addr, err := netip.ParseAddr(addrStr) + if err != nil { + return nil, ErrInvalidAddress } - return + + switch protocol { + case TCPv4: + if addr.Is4() { + return net.IP(addr.AsSlice()), nil + } + case TCPv6: + if addr.Is6() || addr.Is4In6() { + return net.IP(addr.AsSlice()), nil + } + } + + return nil, ErrInvalidAddress } diff --git a/vendor/github.com/quic-go/quic-go/.gitignore b/vendor/github.com/quic-go/quic-go/.gitignore index 3cc06f24..b454729d 100644 --- a/vendor/github.com/quic-go/quic-go/.gitignore +++ b/vendor/github.com/quic-go/quic-go/.gitignore @@ -4,6 +4,7 @@ main mockgen_tmp.go *.qtr *.qlog +*.sqlog *.txt race.[0-9]* diff --git a/vendor/github.com/quic-go/quic-go/.golangci.yml b/vendor/github.com/quic-go/quic-go/.golangci.yml index 469d54cf..63b40cc3 100644 --- a/vendor/github.com/quic-go/quic-go/.golangci.yml +++ b/vendor/github.com/quic-go/quic-go/.golangci.yml @@ -1,21 +1,29 @@ -run: - skip-files: - - internal/handshake/cipher_suite.go linters-settings: misspell: ignore-words: - ect + depguard: + rules: + quicvarint: + list-mode: strict + files: + - "**/github.com/quic-go/quic-go/quicvarint/*" + - "!$test" + allow: + - $gostd linters: disable-all: true enable: - asciicheck + - depguard - exhaustive - exportloopref - goimports - gofmt # redundant, since gofmt *should* be a no-op after gofumpt - gofumpt - gosimple + - govet - ineffassign - misspell - prealloc @@ -24,10 +32,15 @@ linters: - unconvert - unparam - unused - - vet issues: + exclude-files: + - internal/handshake/cipher_suite.go exclude-rules: - path: internal/qtls linters: - depguard + - path: _test\.go + linters: + - exhaustive + - prealloc diff --git a/vendor/github.com/quic-go/quic-go/README.md b/vendor/github.com/quic-go/quic-go/README.md index faba82f3..94823d99 100644 --- a/vendor/github.com/quic-go/quic-go/README.md +++ b/vendor/github.com/quic-go/quic-go/README.md @@ -2,11 +2,12 @@ +[![Documentation](https://img.shields.io/badge/docs-quic--go.net-red?style=flat)](https://quic-go.net/docs/) [![PkgGoDev](https://pkg.go.dev/badge/github.com/quic-go/quic-go)](https://pkg.go.dev/github.com/quic-go/quic-go) [![Code Coverage](https://img.shields.io/codecov/c/github/quic-go/quic-go/master.svg?style=flat-square)](https://codecov.io/gh/quic-go/quic-go/) [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/quic-go.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:quic-go) -quic-go is an implementation of the QUIC protocol ([RFC 9000](https://datatracker.ietf.org/doc/html/rfc9000), [RFC 9001](https://datatracker.ietf.org/doc/html/rfc9001), [RFC 9002](https://datatracker.ietf.org/doc/html/rfc9002)) in Go. It has support for HTTP/3 ([RFC 9114](https://datatracker.ietf.org/doc/html/rfc9114)), including QPACK ([RFC 9204](https://datatracker.ietf.org/doc/html/rfc9204)). +quic-go is an implementation of the QUIC protocol ([RFC 9000](https://datatracker.ietf.org/doc/html/rfc9000), [RFC 9001](https://datatracker.ietf.org/doc/html/rfc9001), [RFC 9002](https://datatracker.ietf.org/doc/html/rfc9002)) in Go. It has support for HTTP/3 ([RFC 9114](https://datatracker.ietf.org/doc/html/rfc9114)), including QPACK ([RFC 9204](https://datatracker.ietf.org/doc/html/rfc9204)) and HTTP Datagrams ([RFC 9297](https://datatracker.ietf.org/doc/html/rfc9297)). In addition to these base RFCs, it also implements the following RFCs: * Unreliable Datagram Extension ([RFC 9221](https://datatracker.ietf.org/doc/html/rfc9221)) @@ -16,207 +17,7 @@ In addition to these base RFCs, it also implements the following RFCs: Support for WebTransport over HTTP/3 ([draft-ietf-webtrans-http3](https://datatracker.ietf.org/doc/draft-ietf-webtrans-http3/)) is implemented in [webtransport-go](https://github.com/quic-go/webtransport-go). -## Using QUIC - -### Running a Server - -The central entry point is the `quic.Transport`. A transport manages QUIC connections running on a single UDP socket. Since QUIC uses Connection IDs, it can demultiplex a listener (accepting incoming connections) and an arbitrary number of outgoing QUIC connections on the same UDP socket. - -```go -udpConn, err := net.ListenUDP("udp4", &net.UDPAddr{Port: 1234}) -// ... error handling -tr := quic.Transport{ - Conn: udpConn, -} -ln, err := tr.Listen(tlsConf, quicConf) -// ... error handling -go func() { - for { - conn, err := ln.Accept() - // ... error handling - // handle the connection, usually in a new Go routine - } -}() -``` - -The listener `ln` can now be used to accept incoming QUIC connections by (repeatedly) calling the `Accept` method (see below for more information on the `quic.Connection`). - -As a shortcut, `quic.Listen` and `quic.ListenAddr` can be used without explicitly initializing a `quic.Transport`: - -``` -ln, err := quic.Listen(udpConn, tlsConf, quicConf) -``` - -When using the shortcut, it's not possible to reuse the same UDP socket for outgoing connections. - -### Running a Client - -As mentioned above, multiple outgoing connections can share a single UDP socket, since QUIC uses Connection IDs to demultiplex connections. - -```go -ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) // 3s handshake timeout -defer cancel() -conn, err := tr.Dial(ctx, , , ) -// ... error handling -``` - -As a shortcut, `quic.Dial` and `quic.DialAddr` can be used without explictly initializing a `quic.Transport`: - -```go -ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) // 3s handshake timeout -defer cancel() -conn, err := quic.Dial(ctx, conn, , , ) -``` - -Just as we saw before when used a similar shortcut to run a server, it's also not possible to reuse the same UDP socket for other outgoing connections, or to listen for incoming connections. - -### Using a QUIC Connection - -#### Accepting Streams - -QUIC is a stream-multiplexed transport. A `quic.Connection` fundamentally differs from the `net.Conn` and the `net.PacketConn` interface defined in the standard library. Data is sent and received on (unidirectional and bidirectional) streams (and, if supported, in [datagrams](#quic-datagrams)), not on the connection itself. The stream state machine is described in detail in [Section 3 of RFC 9000](https://datatracker.ietf.org/doc/html/rfc9000#section-3). - -Note: A unidirectional stream is a stream that the initiator can only write to (`quic.SendStream`), and the receiver can only read from (`quic.ReceiveStream`). A bidirectional stream (`quic.Stream`) allows reading from and writing to for both sides. - -On the receiver side, streams are accepted using the `AcceptStream` (for bidirectional) and `AcceptUniStream` functions. For most user cases, it makes sense to call these functions in a loop: - -```go -for { - str, err := conn.AcceptStream(context.Background()) // for bidirectional streams - // ... error handling - // handle the stream, usually in a new Go routine -} -``` - -These functions return an error when the underlying QUIC connection is closed. - -#### Opening Streams - -There are two slightly different ways to open streams, one synchronous and one (potentially) asynchronous. This API is necessary since the receiver grants us a certain number of streams that we're allowed to open. It may grant us additional streams later on (typically when existing streams are closed), but it means that at the time we want to open a new stream, we might not be able to do so. - -Using the synchronous method `OpenStreamSync` for bidirectional streams, and `OpenUniStreamSync` for unidirectional streams, an application can block until the peer allows opening additional streams. In case that we're allowed to open a new stream, these methods return right away: - -```go -ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) -defer cancel() -str, err := conn.OpenStreamSync(ctx) // wait up to 5s to open a new bidirectional stream -``` - -The asynchronous version never blocks. If it's currently not possible to open a new stream, it returns a `net.Error` timeout error: - -```go -str, err := conn.OpenStream() -if nerr, ok := err.(net.Error); ok && nerr.Timeout() { - // It's currently not possible to open another stream, - // but it might be possible later, once the peer allowed us to do so. -} -``` - -These functions return an error when the underlying QUIC connection is closed. - -#### Using Streams - -Using QUIC streams is pretty straightforward. The `quic.ReceiveStream` implements the `io.Reader` interface, and the `quic.SendStream` implements the `io.Writer` interface. A bidirectional stream (`quic.Stream`) implements both these interfaces. Conceptually, a bidirectional stream can be thought of as the composition of two unidirectional streams in opposite directions. - -Calling `Close` on a `quic.SendStream` or a `quic.Stream` closes the send side of the stream. On the receiver side, this will be surfaced as an `io.EOF` returned from the `io.Reader` once all data has been consumed. Note that for bidirectional streams, `Close` _only_ closes the send side of the stream. It is still possible to read from the stream until the peer closes or resets the stream. - -In case the application wishes to abort sending on a `quic.SendStream` or a `quic.Stream` , it can reset the send side by calling `CancelWrite` with an application-defined error code (an unsigned 62-bit number). On the receiver side, this surfaced as a `quic.StreamError` containing that error code on the `io.Reader`. Note that for bidirectional streams, `CancelWrite` _only_ resets the send side of the stream. It is still possible to read from the stream until the peer closes or resets the stream. - -Conversely, in case the application wishes to abort receiving from a `quic.ReceiveStream` or a `quic.Stream`, it can ask the sender to abort data transmission by calling `CancelRead` with an application-defined error code (an unsigned 62-bit number). On the receiver side, this surfaced as a `quic.StreamError` containing that error code on the `io.Writer`. Note that for bidirectional streams, `CancelWrite` _only_ resets the receive side of the stream. It is still possible to write to the stream. - -A bidirectional stream is only closed once both the read and the write side of the stream have been either closed or reset. Only then the peer is granted a new stream according to the maximum number of concurrent streams configured via `quic.Config.MaxIncomingStreams`. - -### Configuring QUIC - -The `quic.Config` struct passed to both the listen and dial calls (see above) contains a wide range of configuration options for QUIC connections, incl. the ability to fine-tune flow control limits, the number of streams that the peer is allowed to open concurrently, keep-alives, idle timeouts, and many more. Please refer to the documentation for the `quic.Config` for details. - -The `quic.Transport` contains a few configuration options that don't apply to any single QUIC connection, but to all connections handled by that transport. It is highly recommend to set the `StatelessResetToken`, which allows endpoints to quickly recover from crashes / reboots of our node (see [Section 10.3 of RFC 9000](https://datatracker.ietf.org/doc/html/rfc9000#section-10.3)). - -### Closing a Connection - -#### When the remote Peer closes the Connection - -In case the peer closes the QUIC connection, all calls to open streams, accept streams, as well as all methods on streams immediately return an error. Additionally, it is set as cancellation cause of the connection context. Users can use errors assertions to find out what exactly went wrong: - -* `quic.VersionNegotiationError`: Happens during the handshake, if there is no overlap between our and the remote's supported QUIC versions. -* `quic.HandshakeTimeoutError`: Happens if the QUIC handshake doesn't complete within the time specified in `quic.Config.HandshakeTimeout`. -* `quic.IdleTimeoutError`: Happens after completion of the handshake if the connection is idle for longer than the minimum of both peers idle timeouts (as configured by `quic.Config.IdleTimeout`). The connection is considered idle when no stream data (and datagrams, if applicable) are exchanged for that period. The QUIC connection can be instructed to regularly send a packet to prevent a connection from going idle by setting `quic.Config.KeepAlive`. However, this is no guarantee that the peer doesn't suddenly go away (e.g. by abruptly shutting down the node or by crashing), or by a NAT binding expiring, in which case this error might still occur. -* `quic.StatelessResetError`: Happens when the remote peer lost the state required to decrypt the packet. This requires the `quic.Transport.StatelessResetToken` to be configured by the peer. -* `quic.TransportError`: Happens if when the QUIC protocol is violated. Unless the error code is `APPLICATION_ERROR`, this will not happen unless one of the QUIC stacks involved is misbehaving. Please open an issue if you encounter this error. -* `quic.ApplicationError`: Happens when the remote decides to close the connection, see below. - -#### Initiated by the Application - -A `quic.Connection` can be closed using `CloseWithError`: - -```go -conn.CloseWithError(0x42, "error 0x42 occurred") -``` - -Applications can transmit both an error code (an unsigned 62-bit number) as well as a UTF-8 encoded human-readable reason. The error code allows the receiver to learn why the connection was closed, and the reason can be useful for debugging purposes. - -On the receiver side, this is surfaced as a `quic.ApplicationError`. - -### QUIC Datagrams - -Unreliable datagrams are a QUIC extension ([RFC 9221](https://datatracker.ietf.org/doc/html/rfc9221)) that is negotiated during the handshake. Support can be enabled by setting the `quic.Config.EnableDatagram` flag. Note that this doesn't guarantee that the peer also supports datagrams. Whether or not the feature negotiation succeeded can be learned from the `quic.ConnectionState.SupportsDatagrams` obtained from `quic.Connection.ConnectionState()`. - -QUIC DATAGRAMs are a new QUIC frame type sent in QUIC 1-RTT packets (i.e. after completion of the handshake). Therefore, they're end-to-end encrypted and congestion-controlled. However, if a DATAGRAM frame is deemed lost by QUIC's loss detection mechanism, they are not retransmitted. - -Datagrams are sent using the `SendDatagram` method on the `quic.Connection`: - -```go -conn.SendDatagram([]byte("foobar")) -``` - -And received using `ReceiveDatagram`: - -```go -msg, err := conn.ReceiveDatagram() -``` - -Note that this code path is currently not optimized. It works for datagrams that are sent occasionally, but it doesn't achieve the same throughput as writing data on a stream. Please get in touch on issue #3766 if your use case relies on high datagram throughput, or if you'd like to help fix this issue. There are also some restrictions regarding the maximum message size (see #3599). - -### QUIC Event Logging using qlog - -quic-go logs a wide range of events defined in [draft-ietf-quic-qlog-quic-events](https://datatracker.ietf.org/doc/draft-ietf-quic-qlog-quic-events/), providing comprehensive insights in the internals of a QUIC connection. - -qlog files can be processed by a number of 3rd-party tools. [qviz](https://qvis.quictools.info/) has proven very useful for debugging all kinds of QUIC connection failures. - -qlog can be activated by setting the `Tracer` callback on the `Config`. It is called as soon as quic-go decides to start the QUIC handshake on a new connection. -`qlog.DefaultTracer` provides a tracer implementation which writes qlog files to a directory specified by the `QLOGDIR` environment variable, if set. -The default qlog tracer can be used like this: -```go -quic.Config{ - Tracer: qlog.DefaultTracer, -} -``` - -This example creates a new qlog file under `/_.qlog`, e.g. `qlogs/2e0407da_client.qlog`. - - -For custom qlog behavior, `qlog.NewConnectionTracer` can be used. - -## Using HTTP/3 - -### As a server - -See the [example server](example/main.go). Starting a QUIC server is very similar to the standard library http package in Go: - -```go -http.Handle("/", http.FileServer(http.Dir(wwwDir))) -http3.ListenAndServeQUIC("localhost:4242", "/path/to/cert/chain.pem", "/path/to/privkey.pem", nil) -``` - -### As a client - -See the [example client](example/client/main.go). Use a `http3.RoundTripper` as a `Transport` in a `http.Client`. - -```go -http.Client{ - Transport: &http3.RoundTripper{}, -} -``` +Detailed documentation can be found on [quic-go.net](https://quic-go.net/docs/). ## Projects using quic-go diff --git a/vendor/github.com/quic-go/quic-go/client.go b/vendor/github.com/quic-go/quic-go/client.go index 70dd5e19..1c5654f6 100644 --- a/vendor/github.com/quic-go/quic-go/client.go +++ b/vendor/github.com/quic-go/quic-go/client.go @@ -35,7 +35,7 @@ type client struct { conn quicConn tracer *logging.ConnectionTracer - tracingID uint64 + tracingID ConnectionTracingID logger utils.Logger } @@ -191,6 +191,7 @@ func (c *client) dial(ctx context.Context) error { c.logger.Infof("Starting new connection to %s (%s -> %s), source connection ID %s, destination connection ID %s, version %s", c.tlsConf.ServerName, c.sendConn.LocalAddr(), c.sendConn.RemoteAddr(), c.srcConnID, c.destConnID, c.version) c.conn = newClientConnection( + context.WithValue(context.WithoutCancel(ctx), ConnectionTracingKey, c.tracingID), c.sendConn, c.packetHandlers, c.destConnID, @@ -202,7 +203,6 @@ func (c *client) dial(ctx context.Context) error { c.use0RTT, c.hasNegotiatedVersion, c.tracer, - c.tracingID, c.logger, c.version, ) diff --git a/vendor/github.com/quic-go/quic-go/config.go b/vendor/github.com/quic-go/quic-go/config.go index ee032e6e..d42bdc1c 100644 --- a/vendor/github.com/quic-go/quic-go/config.go +++ b/vendor/github.com/quic-go/quic-go/config.go @@ -39,6 +39,12 @@ func validateConfig(config *Config) error { if config.MaxConnectionReceiveWindow > quicvarint.Max { config.MaxConnectionReceiveWindow = quicvarint.Max } + if config.InitialPacketSize > 0 && config.InitialPacketSize < protocol.MinInitialPacketSize { + config.InitialPacketSize = protocol.MinInitialPacketSize + } + if config.InitialPacketSize > protocol.MaxPacketBufferSize { + config.InitialPacketSize = protocol.MaxPacketBufferSize + } // check that all QUIC versions are actually supported for _, v := range config.Versions { if !protocol.IsValidVersion(v) { @@ -94,6 +100,10 @@ func populateConfig(config *Config) *Config { } else if maxIncomingUniStreams < 0 { maxIncomingUniStreams = 0 } + initialPacketSize := config.InitialPacketSize + if initialPacketSize == 0 { + initialPacketSize = protocol.InitialPacketSize + } return &Config{ GetConfigForClient: config.GetConfigForClient, @@ -110,6 +120,7 @@ func populateConfig(config *Config) *Config { MaxIncomingUniStreams: maxIncomingUniStreams, TokenStore: config.TokenStore, EnableDatagrams: config.EnableDatagrams, + InitialPacketSize: initialPacketSize, DisablePathMTUDiscovery: config.DisablePathMTUDiscovery, Allow0RTT: config.Allow0RTT, Tracer: config.Tracer, diff --git a/vendor/github.com/quic-go/quic-go/connection.go b/vendor/github.com/quic-go/quic-go/connection.go index f8bcd613..1411a77b 100644 --- a/vendor/github.com/quic-go/quic-go/connection.go +++ b/vendor/github.com/quic-go/quic-go/connection.go @@ -16,7 +16,6 @@ import ( "github.com/quic-go/quic-go/internal/ackhandler" "github.com/quic-go/quic-go/internal/flowcontrol" "github.com/quic-go/quic-go/internal/handshake" - "github.com/quic-go/quic-go/internal/logutils" "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/internal/qerr" "github.com/quic-go/quic-go/internal/utils" @@ -25,15 +24,10 @@ import ( ) type unpacker interface { - UnpackLongHeader(hdr *wire.Header, rcvTime time.Time, data []byte, v protocol.Version) (*unpackedPacket, error) + UnpackLongHeader(hdr *wire.Header, data []byte) (*unpackedPacket, error) UnpackShortHeader(rcvTime time.Time, data []byte) (protocol.PacketNumber, protocol.PacketNumberLen, protocol.KeyPhaseBit, []byte, error) } -type streamGetter interface { - GetOrOpenReceiveStream(protocol.StreamID) (receiveStreamI, error) - GetOrOpenSendStream(protocol.StreamID) (sendStreamI, error) -} - type streamManager interface { GetOrOpenSendStream(protocol.StreamID) (sendStreamI, error) GetOrOpenReceiveStream(protocol.StreamID) (receiveStreamI, error) @@ -52,13 +46,14 @@ type streamManager interface { } type cryptoStreamHandler interface { - StartHandshake() error + StartHandshake(context.Context) error ChangeConnectionID(protocol.ConnectionID) SetLargest1RTTAcked(protocol.PacketNumber) error SetHandshakeConfirmed() GetSessionTicket() ([]byte, error) NextEvent() handshake.Event DiscardInitialKeys() + HandleMessage([]byte, protocol.EncryptionLevel) error io.Closer ConnectionState() handshake.ConnectionState } @@ -113,8 +108,8 @@ func (e *errCloseForRecreating) Error() string { return "closing connection in order to recreate it" } -var connTracingID uint64 // to be accessed atomically -func nextConnTracingID() uint64 { return atomic.AddUint64(&connTracingID, 1) } +var connTracingID atomic.Uint64 // to be accessed atomically +func nextConnTracingID() ConnectionTracingID { return ConnectionTracingID(connTracingID.Add(1)) } // A Connection is a QUIC connection type connection struct { @@ -144,8 +139,7 @@ type connection struct { sentPacketHandler ackhandler.SentPacketHandler receivedPacketHandler ackhandler.ReceivedPacketHandler retransmissionQueue *retransmissionQueue - framer framer - windowUpdateQueue *windowUpdateQueue + framer *framer connFlowController flowcontrol.ConnectionFlowController tokenStoreKey string // only set for the client tokenGenerator *handshake.TokenGenerator // only set for the server @@ -153,11 +147,13 @@ type connection struct { unpacker unpacker frameParser wire.FrameParser packer packer - mtuDiscoverer mtuDiscoverer // initialized when the handshake completes + mtuDiscoverer mtuDiscoverer // initialized when the transport parameters are received + + maxPayloadSizeEstimate atomic.Uint32 - initialStream cryptoStream - handshakeStream cryptoStream - oneRTTStream cryptoStream // only set for the server + initialStream *cryptoStream + handshakeStream *cryptoStream + oneRTTStream *cryptoStream // only set for the server cryptoStreamHandler cryptoStreamHandler receivedPackets chan receivedPacket @@ -167,10 +163,9 @@ type connection struct { // closeChan is used to notify the run loop that it should terminate closeChan chan closeError - ctx context.Context - ctxCancel context.CancelCauseFunc - handshakeCtx context.Context - handshakeCtxCancel context.CancelFunc + ctx context.Context + ctxCancel context.CancelCauseFunc + handshakeCompleteChan chan struct{} undecryptablePackets []receivedPacket // undecryptable packets, waiting for a change in encryption level undecryptablePacketsToProcess []receivedPacket @@ -220,6 +215,8 @@ var ( ) var newConnection = func( + ctx context.Context, + ctxCancel context.CancelCauseFunc, conn sendConn, runner connRunner, origDestConnID protocol.ConnectionID, @@ -234,11 +231,12 @@ var newConnection = func( tokenGenerator *handshake.TokenGenerator, clientAddressValidated bool, tracer *logging.ConnectionTracer, - tracingID uint64, logger utils.Logger, v protocol.Version, ) quicConn { s := &connection{ + ctx: ctx, + ctxCancel: ctxCancel, conn: conn, config: conf, handshakeDestConnID: destConnID, @@ -273,10 +271,9 @@ var newConnection = func( connIDGenerator, ) s.preSetup() - s.ctx, s.ctxCancel = context.WithCancelCause(context.WithValue(context.Background(), ConnectionTracingKey, tracingID)) s.sentPacketHandler, s.receivedPacketHandler = ackhandler.NewAckHandler( 0, - getMaxPacketSize(s.conn.RemoteAddr()), + protocol.ByteCount(s.config.InitialPacketSize), s.rttStats, clientAddressValidated, s.conn.capabilities().ECN, @@ -284,7 +281,7 @@ var newConnection = func( s.tracer, s.logger, ) - s.mtuDiscoverer = newMTUDiscoverer(s.rttStats, getMaxPacketSize(s.conn.RemoteAddr()), s.sentPacketHandler.SetMaxDatagramSize) + s.maxPayloadSizeEstimate.Store(uint32(estimateMaxPayloadSize(protocol.ByteCount(s.config.InitialPacketSize)))) params := &wire.TransportParameters{ InitialMaxStreamDataBidiLocal: protocol.ByteCount(s.config.InitialStreamReceiveWindow), InitialMaxStreamDataBidiRemote: protocol.ByteCount(s.config.InitialStreamReceiveWindow), @@ -295,6 +292,7 @@ var newConnection = func( MaxUniStreamNum: protocol.StreamNum(s.config.MaxIncomingUniStreams), MaxAckDelay: protocol.MaxAckDelayInclGranularity, AckDelayExponent: protocol.AckDelayExponent, + MaxUDPPayloadSize: protocol.MaxPacketBufferSize, DisableActiveMigration: true, StatelessResetToken: &statelessResetToken, OriginalDestinationConnectionID: origDestConnID, @@ -330,12 +328,13 @@ var newConnection = func( s.cryptoStreamHandler = cs s.packer = newPacketPacker(srcConnID, s.connIDManager.Get, s.initialStream, s.handshakeStream, s.sentPacketHandler, s.retransmissionQueue, cs, s.framer, s.receivedPacketHandler, s.datagramQueue, s.perspective) s.unpacker = newPacketUnpacker(cs, s.srcConnIDLen) - s.cryptoStreamManager = newCryptoStreamManager(cs, s.initialStream, s.handshakeStream, s.oneRTTStream) + s.cryptoStreamManager = newCryptoStreamManager(s.initialStream, s.handshakeStream, s.oneRTTStream) return s } // declare this as a variable, such that we can it mock it in the tests var newClientConnection = func( + ctx context.Context, conn sendConn, runner connRunner, destConnID protocol.ConnectionID, @@ -347,7 +346,6 @@ var newClientConnection = func( enable0RTT bool, hasNegotiatedVersion bool, tracer *logging.ConnectionTracer, - tracingID uint64, logger utils.Logger, v protocol.Version, ) quicConn { @@ -381,11 +379,11 @@ var newClientConnection = func( s.queueControlFrame, connIDGenerator, ) + s.ctx, s.ctxCancel = context.WithCancelCause(ctx) s.preSetup() - s.ctx, s.ctxCancel = context.WithCancelCause(context.WithValue(context.Background(), ConnectionTracingKey, tracingID)) s.sentPacketHandler, s.receivedPacketHandler = ackhandler.NewAckHandler( initialPacketNumber, - getMaxPacketSize(s.conn.RemoteAddr()), + protocol.ByteCount(s.config.InitialPacketSize), s.rttStats, false, // has no effect s.conn.capabilities().ECN, @@ -393,7 +391,7 @@ var newClientConnection = func( s.tracer, s.logger, ) - s.mtuDiscoverer = newMTUDiscoverer(s.rttStats, getMaxPacketSize(s.conn.RemoteAddr()), s.sentPacketHandler.SetMaxDatagramSize) + s.maxPayloadSizeEstimate.Store(uint32(estimateMaxPayloadSize(protocol.ByteCount(s.config.InitialPacketSize)))) oneRTTStream := newCryptoStream() params := &wire.TransportParameters{ InitialMaxStreamDataBidiRemote: protocol.ByteCount(s.config.InitialStreamReceiveWindow), @@ -404,6 +402,7 @@ var newClientConnection = func( MaxBidiStreamNum: protocol.StreamNum(s.config.MaxIncomingStreams), MaxUniStreamNum: protocol.StreamNum(s.config.MaxIncomingUniStreams), MaxAckDelay: protocol.MaxAckDelayInclGranularity, + MaxUDPPayloadSize: protocol.MaxPacketBufferSize, AckDelayExponent: protocol.AckDelayExponent, DisableActiveMigration: true, // For interoperability with quic-go versions before May 2023, this value must be set to a value @@ -433,7 +432,7 @@ var newClientConnection = func( s.version, ) s.cryptoStreamHandler = cs - s.cryptoStreamManager = newCryptoStreamManager(cs, s.initialStream, s.handshakeStream, oneRTTStream) + s.cryptoStreamManager = newCryptoStreamManager(s.initialStream, s.handshakeStream, oneRTTStream) s.unpacker = newPacketUnpacker(cs, s.srcConnIDLen) s.packer = newPacketPacker(srcConnID, s.connIDManager.Get, s.initialStream, s.handshakeStream, s.sentPacketHandler, s.retransmissionQueue, cs, s.framer, s.receivedPacketHandler, s.datagramQueue, s.perspective) if len(tlsConf.ServerName) > 0 { @@ -459,7 +458,6 @@ func (s *connection) preSetup() { s.connFlowController = flowcontrol.NewConnectionFlowController( protocol.ByteCount(s.config.InitialConnectionReceiveWindow), protocol.ByteCount(s.config.MaxConnectionReceiveWindow), - s.onHasConnectionWindowUpdate, func(size protocol.ByteCount) bool { if s.config.AllowConnectionWindowIncrease == nil { return true @@ -471,23 +469,24 @@ func (s *connection) preSetup() { ) s.earlyConnReadyChan = make(chan struct{}) s.streamsMap = newStreamsMap( + s.ctx, s, + s.queueControlFrame, s.newFlowController, uint64(s.config.MaxIncomingStreams), uint64(s.config.MaxIncomingUniStreams), s.perspective, ) - s.framer = newFramer(s.streamsMap) + s.framer = newFramer() s.receivedPackets = make(chan receivedPacket, protocol.MaxConnUnprocessedPackets) s.closeChan = make(chan closeError, 1) s.sendingScheduled = make(chan struct{}, 1) - s.handshakeCtx, s.handshakeCtxCancel = context.WithCancel(context.Background()) + s.handshakeCompleteChan = make(chan struct{}) now := time.Now() s.lastPacketReceivedTime = now s.creationTime = now - s.windowUpdateQueue = newWindowUpdateQueue(s.streamsMap, s.connFlowController, s.framer.QueueControlFrame) s.datagramQueue = newDatagramQueue(s.scheduleSending, s.logger) s.connState.Version = s.version } @@ -495,13 +494,11 @@ func (s *connection) preSetup() { // run the connection main loop func (s *connection) run() error { var closeErr closeError - defer func() { - s.ctxCancel(closeErr.err) - }() + defer func() { s.ctxCancel(closeErr.err) }() s.timer = *newTimer() - if err := s.cryptoStreamHandler.StartHandshake(); err != nil { + if err := s.cryptoStreamHandler.StartHandshake(s.ctx); err != nil { return err } if err := s.handleHandshakeEvents(); err != nil { @@ -662,7 +659,7 @@ func (s *connection) earlyConnReady() <-chan struct{} { } func (s *connection) HandshakeComplete() <-chan struct{} { - return s.handshakeCtx.Done() + return s.handshakeCompleteChan } func (s *connection) Context() context.Context { @@ -702,10 +699,10 @@ func (s *connection) nextKeepAliveTime() time.Time { func (s *connection) maybeResetTimer() { var deadline time.Time if !s.handshakeComplete { - deadline = utils.MinTime( - s.creationTime.Add(s.config.handshakeTimeout()), - s.idleTimeoutStartTime().Add(s.config.HandshakeIdleTimeout), - ) + deadline = s.creationTime.Add(s.config.handshakeTimeout()) + if t := s.idleTimeoutStartTime().Add(s.config.HandshakeIdleTimeout); t.Before(deadline) { + deadline = t + } } else { if keepAliveTime := s.nextKeepAliveTime(); !keepAliveTime.IsZero() { deadline = keepAliveTime @@ -723,11 +720,15 @@ func (s *connection) maybeResetTimer() { } func (s *connection) idleTimeoutStartTime() time.Time { - return utils.MaxTime(s.lastPacketReceivedTime, s.firstAckElicitingPacketAfterIdleSentTime) + startTime := s.lastPacketReceivedTime + if t := s.firstAckElicitingPacketAfterIdleSentTime; t.After(startTime) { + startTime = t + } + return startTime } func (s *connection) handleHandshakeComplete() error { - defer s.handshakeCtxCancel() + defer close(s.handshakeCompleteChan) // Once the handshake completes, we have derived 1-RTT keys. // There's no point in queueing undecryptable packets for later decryption anymore. s.undecryptablePackets = nil @@ -780,11 +781,7 @@ func (s *connection) handleHandshakeConfirmed() error { s.cryptoStreamHandler.SetHandshakeConfirmed() if !s.config.DisablePathMTUDiscovery && s.conn.capabilities().DF { - maxPacketSize := s.peerParams.MaxUDPPayloadSize - if maxPacketSize == 0 { - maxPacketSize = protocol.MaxByteCount - } - s.mtuDiscoverer.Start(min(maxPacketSize, protocol.MaxPacketBufferSize)) + s.mtuDiscoverer.Start() } return nil } @@ -803,13 +800,11 @@ func (s *connection) handlePacketImpl(rp receivedPacket) bool { data := rp.data p := rp for len(data) > 0 { - var destConnID protocol.ConnectionID if counter > 0 { p = *(p.Clone()) p.data = data - var err error - destConnID, err = wire.ParseConnectionID(p.data, s.srcConnIDLen) + destConnID, err := wire.ParseConnectionID(p.data, s.srcConnIDLen) if err != nil { if s.tracer != nil && s.tracer.DroppedPacket != nil { s.tracer.DroppedPacket(logging.PacketTypeNotDetermined, protocol.InvalidPacketNumber, protocol.ByteCount(len(data)), logging.PacketDropHeaderParseError) @@ -869,7 +864,9 @@ func (s *connection) handlePacketImpl(rp receivedPacket) bool { if counter > 0 { p.buffer.Split() } - processed = s.handleShortHeaderPacket(p, destConnID) + if wasProcessed := s.handleShortHeaderPacket(p); wasProcessed { + processed = true + } break } } @@ -878,7 +875,7 @@ func (s *connection) handlePacketImpl(rp receivedPacket) bool { return processed } -func (s *connection) handleShortHeaderPacket(p receivedPacket, destConnID protocol.ConnectionID) bool { +func (s *connection) handleShortHeaderPacket(p receivedPacket) bool { var wasQueued bool defer func() { @@ -888,6 +885,11 @@ func (s *connection) handleShortHeaderPacket(p receivedPacket, destConnID protoc } }() + destConnID, err := wire.ParseConnectionID(p.data, s.srcConnIDLen) + if err != nil { + s.tracer.DroppedPacket(logging.PacketType1RTT, protocol.InvalidPacketNumber, protocol.ByteCount(len(p.data)), logging.PacketDropHeaderParseError) + return false + } pn, pnLen, keyPhase, data, err := s.unpacker.UnpackShortHeader(p.rcvTime, p.data) if err != nil { wasQueued = s.handleUnpackError(err, p, logging.PacketType1RTT) @@ -961,7 +963,7 @@ func (s *connection) handleLongHeaderPacket(p receivedPacket, hdr *wire.Header) return false } - packet, err := s.unpacker.UnpackLongHeader(hdr, p.rcvTime, p.data, s.version) + packet, err := s.unpacker.UnpackLongHeader(hdr, p.data) if err != nil { wasQueued = s.handleUnpackError(err, p, logging.PacketTypeFromHeader(hdr)) return false @@ -1261,7 +1263,7 @@ func (s *connection) handleFrames( isAckEliciting = true } if log != nil { - frames = append(frames, logutils.ConvertFrame(frame)) + frames = append(frames, toLoggingFrame(frame)) } // An error occurred handling a previous frame. // Don't handle the current frame. @@ -1378,6 +1380,15 @@ func (s *connection) handleCryptoFrame(frame *wire.CryptoFrame, encLevel protoco if err := s.cryptoStreamManager.HandleCryptoFrame(frame, encLevel); err != nil { return err } + for { + data := s.cryptoStreamManager.GetCryptoData(encLevel) + if data == nil { + break + } + if err := s.cryptoStreamHandler.HandleMessage(data, encLevel); err != nil { + return err + } + } return s.handleHandshakeEvents() } @@ -1668,10 +1679,8 @@ func (s *connection) dropEncryptionLevel(encLevel protocol.EncryptionLevel) erro s.cryptoStreamHandler.DiscardInitialKeys() case protocol.Encryption0RTT: s.streamsMap.ResetFor0RTT() - if err := s.connFlowController.Reset(); err != nil { - return err - } - return s.framer.Handle0RTTRejection() + s.framer.Handle0RTTRejection() + return s.connFlowController.Reset() } return s.cryptoStreamManager.Drop(encLevel) } @@ -1758,7 +1767,11 @@ func (s *connection) checkTransportParameters(params *wire.TransportParameters) func (s *connection) applyTransportParameters() { params := s.peerParams // Our local idle timeout will always be > 0. - s.idleTimeout = utils.MinNonZeroDuration(s.config.MaxIdleTimeout, params.MaxIdleTimeout) + s.idleTimeout = s.config.MaxIdleTimeout + // If the peer advertised an idle timeout, take the minimum of the values. + if params.MaxIdleTimeout > 0 { + s.idleTimeout = min(s.idleTimeout, params.MaxIdleTimeout) + } s.keepAliveInterval = min(s.config.KeepAlivePeriod, min(s.idleTimeout/2, protocol.MaxKeepAliveInterval)) s.streamsMap.UpdateLimits(params) s.frameParser.SetAckDelayExponent(params.AckDelayExponent) @@ -1773,6 +1786,17 @@ func (s *connection) applyTransportParameters() { // Retire the connection ID. s.connIDManager.AddFromPreferredAddress(params.PreferredAddress.ConnectionID, params.PreferredAddress.StatelessResetToken) } + maxPacketSize := protocol.ByteCount(protocol.MaxPacketBufferSize) + if params.MaxUDPPayloadSize > 0 && params.MaxUDPPayloadSize < maxPacketSize { + maxPacketSize = params.MaxUDPPayloadSize + } + s.mtuDiscoverer = newMTUDiscoverer( + s.rttStats, + protocol.ByteCount(s.config.InitialPacketSize), + maxPacketSize, + s.onMTUIncreased, + s.tracer, + ) } func (s *connection) triggerSending(now time.Time) error { @@ -1855,13 +1879,15 @@ func (s *connection) sendPackets(now time.Time) error { if isBlocked, offset := s.connFlowController.IsNewlyBlocked(); isBlocked { s.framer.QueueControlFrame(&wire.DataBlockedFrame{MaximumData: offset}) } - s.windowUpdateQueue.QueueAll() + if offset := s.connFlowController.GetWindowUpdate(); offset > 0 { + s.framer.QueueControlFrame(&wire.MaxDataFrame{MaximumData: offset}) + } if cf := s.cryptoStreamManager.GetPostHandshakeData(protocol.MaxPostHandshakeCryptoFrameSize); cf != nil { s.queueControlFrame(cf) } if !s.handshakeConfirmed { - packet, err := s.packer.PackCoalescedPacket(false, s.mtuDiscoverer.CurrentSize(), s.version) + packet, err := s.packer.PackCoalescedPacket(false, s.maxPacketSize(), s.version) if err != nil || packet == nil { return err } @@ -1888,7 +1914,7 @@ func (s *connection) sendPacketsWithoutGSO(now time.Time) error { for { buf := getPacketBuffer() ecn := s.sentPacketHandler.ECNMode(true) - if _, err := s.appendOneShortHeaderPacket(buf, s.mtuDiscoverer.CurrentSize(), ecn, now); err != nil { + if _, err := s.appendOneShortHeaderPacket(buf, s.maxPacketSize(), ecn, now); err != nil { if err == errNothingToPack { buf.Release() return nil @@ -1919,7 +1945,7 @@ func (s *connection) sendPacketsWithoutGSO(now time.Time) error { func (s *connection) sendPacketsWithGSO(now time.Time) error { buf := getLargePacketBuffer() - maxSize := s.mtuDiscoverer.CurrentSize() + maxSize := s.maxPacketSize() ecn := s.sentPacketHandler.ECNMode(true) for { @@ -1988,7 +2014,7 @@ func (s *connection) resetPacingDeadline() { func (s *connection) maybeSendAckOnlyPacket(now time.Time) error { if !s.handshakeConfirmed { ecn := s.sentPacketHandler.ECNMode(false) - packet, err := s.packer.PackCoalescedPacket(true, s.mtuDiscoverer.CurrentSize(), s.version) + packet, err := s.packer.PackCoalescedPacket(true, s.maxPacketSize(), s.version) if err != nil { return err } @@ -1999,7 +2025,7 @@ func (s *connection) maybeSendAckOnlyPacket(now time.Time) error { } ecn := s.sentPacketHandler.ECNMode(true) - p, buf, err := s.packer.PackAckOnlyPacket(s.mtuDiscoverer.CurrentSize(), s.version) + p, buf, err := s.packer.PackAckOnlyPacket(s.maxPacketSize(), s.version) if err != nil { if err == errNothingToPack { return nil @@ -2021,7 +2047,7 @@ func (s *connection) sendProbePacket(encLevel protocol.EncryptionLevel, now time break } var err error - packet, err = s.packer.MaybePackProbePacket(encLevel, s.mtuDiscoverer.CurrentSize(), s.version) + packet, err = s.packer.MaybePackProbePacket(encLevel, s.maxPacketSize(), s.version) if err != nil { return err } @@ -2032,7 +2058,7 @@ func (s *connection) sendProbePacket(encLevel protocol.EncryptionLevel, now time if packet == nil { s.retransmissionQueue.AddPing(encLevel) var err error - packet, err = s.packer.MaybePackProbePacket(encLevel, s.mtuDiscoverer.CurrentSize(), s.version) + packet, err = s.packer.MaybePackProbePacket(encLevel, s.maxPacketSize(), s.version) if err != nil { return err } @@ -2111,14 +2137,14 @@ func (s *connection) sendConnectionClose(e error) ([]byte, error) { var transportErr *qerr.TransportError var applicationErr *qerr.ApplicationError if errors.As(e, &transportErr) { - packet, err = s.packer.PackConnectionClose(transportErr, s.mtuDiscoverer.CurrentSize(), s.version) + packet, err = s.packer.PackConnectionClose(transportErr, s.maxPacketSize(), s.version) } else if errors.As(e, &applicationErr) { - packet, err = s.packer.PackApplicationClose(applicationErr, s.mtuDiscoverer.CurrentSize(), s.version) + packet, err = s.packer.PackApplicationClose(applicationErr, s.maxPacketSize(), s.version) } else { packet, err = s.packer.PackConnectionClose(&qerr.TransportError{ ErrorCode: qerr.InternalError, ErrorMessage: fmt.Sprintf("connection BUG: unspecified error type (msg: %s)", e.Error()), - }, s.mtuDiscoverer.CurrentSize(), s.version) + }, s.maxPacketSize(), s.version) } if err != nil { return nil, err @@ -2128,126 +2154,22 @@ func (s *connection) sendConnectionClose(e error) ([]byte, error) { return packet.buffer.Data, s.conn.Write(packet.buffer.Data, 0, ecn) } -func (s *connection) logLongHeaderPacket(p *longHeaderPacket, ecn protocol.ECN) { - // quic-go logging - if s.logger.Debug() { - p.header.Log(s.logger) - if p.ack != nil { - wire.LogFrame(s.logger, p.ack, true) - } - for _, frame := range p.frames { - wire.LogFrame(s.logger, frame.Frame, true) - } - for _, frame := range p.streamFrames { - wire.LogFrame(s.logger, frame.Frame, true) - } - } - - // tracing - if s.tracer != nil && s.tracer.SentLongHeaderPacket != nil { - frames := make([]logging.Frame, 0, len(p.frames)) - for _, f := range p.frames { - frames = append(frames, logutils.ConvertFrame(f.Frame)) - } - for _, f := range p.streamFrames { - frames = append(frames, logutils.ConvertFrame(f.Frame)) +func (s *connection) maxPacketSize() protocol.ByteCount { + if s.mtuDiscoverer == nil { + // Use the configured packet size on the client side. + // If the server sends a max_udp_payload_size that's smaller than this size, we can ignore this: + // Apparently the server still processed the (fully padded) Initial packet anyway. + if s.perspective == protocol.PerspectiveClient { + return protocol.ByteCount(s.config.InitialPacketSize) } - var ack *logging.AckFrame - if p.ack != nil { - ack = logutils.ConvertAckFrame(p.ack) - } - s.tracer.SentLongHeaderPacket(p.header, p.length, ecn, ack, frames) - } -} - -func (s *connection) logShortHeaderPacket( - destConnID protocol.ConnectionID, - ackFrame *wire.AckFrame, - frames []ackhandler.Frame, - streamFrames []ackhandler.StreamFrame, - pn protocol.PacketNumber, - pnLen protocol.PacketNumberLen, - kp protocol.KeyPhaseBit, - ecn protocol.ECN, - size protocol.ByteCount, - isCoalesced bool, -) { - if s.logger.Debug() && !isCoalesced { - s.logger.Debugf("-> Sending packet %d (%d bytes) for connection %s, 1-RTT (ECN: %s)", pn, size, s.logID, ecn) - } - // quic-go logging - if s.logger.Debug() { - wire.LogShortHeader(s.logger, destConnID, pn, pnLen, kp) - if ackFrame != nil { - wire.LogFrame(s.logger, ackFrame, true) - } - for _, f := range frames { - wire.LogFrame(s.logger, f.Frame, true) - } - for _, f := range streamFrames { - wire.LogFrame(s.logger, f.Frame, true) - } - } - - // tracing - if s.tracer != nil && s.tracer.SentShortHeaderPacket != nil { - fs := make([]logging.Frame, 0, len(frames)+len(streamFrames)) - for _, f := range frames { - fs = append(fs, logutils.ConvertFrame(f.Frame)) - } - for _, f := range streamFrames { - fs = append(fs, logutils.ConvertFrame(f.Frame)) - } - var ack *logging.AckFrame - if ackFrame != nil { - ack = logutils.ConvertAckFrame(ackFrame) - } - s.tracer.SentShortHeaderPacket( - &logging.ShortHeader{ - DestConnectionID: destConnID, - PacketNumber: pn, - PacketNumberLen: pnLen, - KeyPhase: kp, - }, - size, - ecn, - ack, - fs, - ) - } -} - -func (s *connection) logCoalescedPacket(packet *coalescedPacket, ecn protocol.ECN) { - if s.logger.Debug() { - // There's a short period between dropping both Initial and Handshake keys and completion of the handshake, - // during which we might call PackCoalescedPacket but just pack a short header packet. - if len(packet.longHdrPackets) == 0 && packet.shortHdrPacket != nil { - s.logShortHeaderPacket( - packet.shortHdrPacket.DestConnID, - packet.shortHdrPacket.Ack, - packet.shortHdrPacket.Frames, - packet.shortHdrPacket.StreamFrames, - packet.shortHdrPacket.PacketNumber, - packet.shortHdrPacket.PacketNumberLen, - packet.shortHdrPacket.KeyPhase, - ecn, - packet.shortHdrPacket.Length, - false, - ) - return - } - if len(packet.longHdrPackets) > 1 { - s.logger.Debugf("-> Sending coalesced packet (%d parts, %d bytes) for connection %s", len(packet.longHdrPackets), packet.buffer.Len(), s.logID) - } else { - s.logger.Debugf("-> Sending packet %d (%d bytes) for connection %s, %s", packet.longHdrPackets[0].header.PacketNumber, packet.buffer.Len(), s.logID, packet.longHdrPackets[0].EncryptionLevel()) - } - } - for _, p := range packet.longHdrPackets { - s.logLongHeaderPacket(p, ecn) - } - if p := packet.shortHdrPacket; p != nil { - s.logShortHeaderPacket(p.DestConnID, p.Ack, p.Frames, p.StreamFrames, p.PacketNumber, p.PacketNumberLen, p.KeyPhase, ecn, p.Length, true) + // On the server side, there's no downside to using 1200 bytes until we received the client's transport + // parameters: + // * If the first packet didn't contain the entire ClientHello, all we can do is ACK that packet. We don't + // need a lot of bytes for that. + // * If it did, we will have processed the transport parameters and initialized the MTU discoverer. + return protocol.MinInitialPacketSize } + return s.mtuDiscoverer.CurrentSize() } // AcceptStream returns the next stream openend by the peer @@ -2291,7 +2213,6 @@ func (s *connection) newFlowController(id protocol.StreamID) flowcontrol.StreamF protocol.ByteCount(s.config.InitialStreamReceiveWindow), protocol.ByteCount(s.config.MaxStreamReceiveWindow), initialSendWindow, - s.onHasStreamWindowUpdate, s.rttStats, s.logger, ) @@ -2330,18 +2251,13 @@ func (s *connection) queueControlFrame(f wire.Frame) { s.scheduleSending() } -func (s *connection) onHasStreamWindowUpdate(id protocol.StreamID) { - s.windowUpdateQueue.AddStream(id) - s.scheduleSending() -} - -func (s *connection) onHasConnectionWindowUpdate() { - s.windowUpdateQueue.AddConnection() +func (s *connection) onHasStreamData(id protocol.StreamID, str sendStreamI) { + s.framer.AddActiveStream(id, str) s.scheduleSending() } -func (s *connection) onHasStreamData(id protocol.StreamID) { - s.framer.AddActiveStream(id) +func (s *connection) onHasStreamControlFrame(id protocol.StreamID, str streamControlFrameGetter) { + s.framer.AddStreamWithControlFrames(id, str) s.scheduleSending() } @@ -2349,6 +2265,12 @@ func (s *connection) onStreamCompleted(id protocol.StreamID) { if err := s.streamsMap.DeleteStream(id); err != nil { s.closeLocal(err) } + s.framer.RemoveActiveStream(id) +} + +func (s *connection) onMTUIncreased(mtu protocol.ByteCount) { + s.maxPayloadSizeEstimate.Store(uint32(estimateMaxPayloadSize(mtu))) + s.sentPacketHandler.SetMaxDatagramSize(mtu) } func (s *connection) SendDatagram(p []byte) error { @@ -2357,10 +2279,14 @@ func (s *connection) SendDatagram(p []byte) error { } f := &wire.DatagramFrame{DataLenPresent: true} - if protocol.ByteCount(len(p)) > f.MaxDataLen(s.peerParams.MaxDatagramFrameSize, s.version) { - return &DatagramTooLargeError{ - PeerMaxDatagramFrameSize: int64(s.peerParams.MaxDatagramFrameSize), - } + // The payload size estimate is conservative. + // Under many circumstances we could send a few more bytes. + maxDataLen := min( + f.MaxDataLen(s.peerParams.MaxDatagramFrameSize, s.version), + protocol.ByteCount(s.maxPayloadSizeEstimate.Load()), + ) + if protocol.ByteCount(len(p)) > maxDataLen { + return &DatagramTooLargeError{MaxDatagramPayloadSize: int64(maxDataLen)} } f.Data = make([]byte, len(p)) copy(f.Data, p) @@ -2386,8 +2312,22 @@ func (s *connection) GetVersion() protocol.Version { return s.version } -func (s *connection) NextConnection() Connection { - <-s.HandshakeComplete() - s.streamsMap.UseResetMaps() - return s +func (s *connection) NextConnection(ctx context.Context) (Connection, error) { + // The handshake might fail after the server rejected 0-RTT. + // This could happen if the Finished message is malformed or never received. + select { + case <-ctx.Done(): + return nil, context.Cause(ctx) + case <-s.Context().Done(): + case <-s.HandshakeComplete(): + s.streamsMap.UseResetMaps() + } + return s, nil +} + +// estimateMaxPayloadSize estimates the maximum payload size for short header packets. +// It is not very sophisticated: it just subtracts the size of header (assuming the maximum +// connection ID length), and the size of the encryption tag. +func estimateMaxPayloadSize(mtu protocol.ByteCount) protocol.ByteCount { + return mtu - 1 /* type byte */ - 20 /* maximum connection ID length */ - 16 /* tag size */ } diff --git a/vendor/github.com/quic-go/quic-go/connection_logging.go b/vendor/github.com/quic-go/quic-go/connection_logging.go new file mode 100644 index 00000000..f75b39f6 --- /dev/null +++ b/vendor/github.com/quic-go/quic-go/connection_logging.go @@ -0,0 +1,173 @@ +package quic + +import ( + "slices" + + "github.com/quic-go/quic-go/internal/ackhandler" + "github.com/quic-go/quic-go/internal/protocol" + "github.com/quic-go/quic-go/internal/wire" + "github.com/quic-go/quic-go/logging" +) + +// ConvertFrame converts a wire.Frame into a logging.Frame. +// This makes it possible for external packages to access the frames. +// Furthermore, it removes the data slices from CRYPTO and STREAM frames. +func toLoggingFrame(frame wire.Frame) logging.Frame { + switch f := frame.(type) { + case *wire.AckFrame: + // We use a pool for ACK frames. + // Implementations of the tracer interface may hold on to frames, so we need to make a copy here. + return toLoggingAckFrame(f) + case *wire.CryptoFrame: + return &logging.CryptoFrame{ + Offset: f.Offset, + Length: protocol.ByteCount(len(f.Data)), + } + case *wire.StreamFrame: + return &logging.StreamFrame{ + StreamID: f.StreamID, + Offset: f.Offset, + Length: f.DataLen(), + Fin: f.Fin, + } + case *wire.DatagramFrame: + return &logging.DatagramFrame{ + Length: logging.ByteCount(len(f.Data)), + } + default: + return logging.Frame(frame) + } +} + +func toLoggingAckFrame(f *wire.AckFrame) *logging.AckFrame { + ack := &logging.AckFrame{ + AckRanges: slices.Clone(f.AckRanges), + DelayTime: f.DelayTime, + ECNCE: f.ECNCE, + ECT0: f.ECT0, + ECT1: f.ECT1, + } + return ack +} + +func (s *connection) logLongHeaderPacket(p *longHeaderPacket, ecn protocol.ECN) { + // quic-go logging + if s.logger.Debug() { + p.header.Log(s.logger) + if p.ack != nil { + wire.LogFrame(s.logger, p.ack, true) + } + for _, frame := range p.frames { + wire.LogFrame(s.logger, frame.Frame, true) + } + for _, frame := range p.streamFrames { + wire.LogFrame(s.logger, frame.Frame, true) + } + } + + // tracing + if s.tracer != nil && s.tracer.SentLongHeaderPacket != nil { + frames := make([]logging.Frame, 0, len(p.frames)) + for _, f := range p.frames { + frames = append(frames, toLoggingFrame(f.Frame)) + } + for _, f := range p.streamFrames { + frames = append(frames, toLoggingFrame(f.Frame)) + } + var ack *logging.AckFrame + if p.ack != nil { + ack = toLoggingAckFrame(p.ack) + } + s.tracer.SentLongHeaderPacket(p.header, p.length, ecn, ack, frames) + } +} + +func (s *connection) logShortHeaderPacket( + destConnID protocol.ConnectionID, + ackFrame *wire.AckFrame, + frames []ackhandler.Frame, + streamFrames []ackhandler.StreamFrame, + pn protocol.PacketNumber, + pnLen protocol.PacketNumberLen, + kp protocol.KeyPhaseBit, + ecn protocol.ECN, + size protocol.ByteCount, + isCoalesced bool, +) { + if s.logger.Debug() && !isCoalesced { + s.logger.Debugf("-> Sending packet %d (%d bytes) for connection %s, 1-RTT (ECN: %s)", pn, size, s.logID, ecn) + } + // quic-go logging + if s.logger.Debug() { + wire.LogShortHeader(s.logger, destConnID, pn, pnLen, kp) + if ackFrame != nil { + wire.LogFrame(s.logger, ackFrame, true) + } + for _, f := range frames { + wire.LogFrame(s.logger, f.Frame, true) + } + for _, f := range streamFrames { + wire.LogFrame(s.logger, f.Frame, true) + } + } + + // tracing + if s.tracer != nil && s.tracer.SentShortHeaderPacket != nil { + fs := make([]logging.Frame, 0, len(frames)+len(streamFrames)) + for _, f := range frames { + fs = append(fs, toLoggingFrame(f.Frame)) + } + for _, f := range streamFrames { + fs = append(fs, toLoggingFrame(f.Frame)) + } + var ack *logging.AckFrame + if ackFrame != nil { + ack = toLoggingAckFrame(ackFrame) + } + s.tracer.SentShortHeaderPacket( + &logging.ShortHeader{ + DestConnectionID: destConnID, + PacketNumber: pn, + PacketNumberLen: pnLen, + KeyPhase: kp, + }, + size, + ecn, + ack, + fs, + ) + } +} + +func (s *connection) logCoalescedPacket(packet *coalescedPacket, ecn protocol.ECN) { + if s.logger.Debug() { + // There's a short period between dropping both Initial and Handshake keys and completion of the handshake, + // during which we might call PackCoalescedPacket but just pack a short header packet. + if len(packet.longHdrPackets) == 0 && packet.shortHdrPacket != nil { + s.logShortHeaderPacket( + packet.shortHdrPacket.DestConnID, + packet.shortHdrPacket.Ack, + packet.shortHdrPacket.Frames, + packet.shortHdrPacket.StreamFrames, + packet.shortHdrPacket.PacketNumber, + packet.shortHdrPacket.PacketNumberLen, + packet.shortHdrPacket.KeyPhase, + ecn, + packet.shortHdrPacket.Length, + false, + ) + return + } + if len(packet.longHdrPackets) > 1 { + s.logger.Debugf("-> Sending coalesced packet (%d parts, %d bytes) for connection %s", len(packet.longHdrPackets), packet.buffer.Len(), s.logID) + } else { + s.logger.Debugf("-> Sending packet %d (%d bytes) for connection %s, %s", packet.longHdrPackets[0].header.PacketNumber, packet.buffer.Len(), s.logID, packet.longHdrPackets[0].EncryptionLevel()) + } + } + for _, p := range packet.longHdrPackets { + s.logLongHeaderPacket(p, ecn) + } + if p := packet.shortHdrPacket; p != nil { + s.logShortHeaderPacket(p.DestConnID, p.Ack, p.Frames, p.StreamFrames, p.PacketNumber, p.PacketNumberLen, p.KeyPhase, ecn, p.Length, true) + } +} diff --git a/vendor/github.com/quic-go/quic-go/crypto_stream.go b/vendor/github.com/quic-go/quic-go/crypto_stream.go index abc7ddcf..9a387baa 100644 --- a/vendor/github.com/quic-go/quic-go/crypto_stream.go +++ b/vendor/github.com/quic-go/quic-go/crypto_stream.go @@ -2,27 +2,14 @@ package quic import ( "fmt" - "io" "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/internal/qerr" "github.com/quic-go/quic-go/internal/wire" ) -type cryptoStream interface { - // for receiving data - HandleCryptoFrame(*wire.CryptoFrame) error - GetCryptoData() []byte - Finish() error - // for sending data - io.Writer - HasData() bool - PopCryptoFrame(protocol.ByteCount) *wire.CryptoFrame -} - -type cryptoStreamImpl struct { - queue *frameSorter - msgBuf []byte +type cryptoStream struct { + queue frameSorter highestOffset protocol.ByteCount finished bool @@ -31,11 +18,11 @@ type cryptoStreamImpl struct { writeBuf []byte } -func newCryptoStream() cryptoStream { - return &cryptoStreamImpl{queue: newFrameSorter()} +func newCryptoStream() *cryptoStream { + return &cryptoStream{queue: *newFrameSorter()} } -func (s *cryptoStreamImpl) HandleCryptoFrame(f *wire.CryptoFrame) error { +func (s *cryptoStream) HandleCryptoFrame(f *wire.CryptoFrame) error { highestOffset := f.Offset + protocol.ByteCount(len(f.Data)) if maxOffset := highestOffset; maxOffset > protocol.MaxCryptoStreamOffset { return &qerr.TransportError{ @@ -56,26 +43,16 @@ func (s *cryptoStreamImpl) HandleCryptoFrame(f *wire.CryptoFrame) error { return nil } s.highestOffset = max(s.highestOffset, highestOffset) - if err := s.queue.Push(f.Data, f.Offset, nil); err != nil { - return err - } - for { - _, data, _ := s.queue.Pop() - if data == nil { - return nil - } - s.msgBuf = append(s.msgBuf, data...) - } + return s.queue.Push(f.Data, f.Offset, nil) } // GetCryptoData retrieves data that was received in CRYPTO frames -func (s *cryptoStreamImpl) GetCryptoData() []byte { - b := s.msgBuf - s.msgBuf = nil - return b +func (s *cryptoStream) GetCryptoData() []byte { + _, data, _ := s.queue.Pop() + return data } -func (s *cryptoStreamImpl) Finish() error { +func (s *cryptoStream) Finish() error { if s.queue.HasMoreData() { return &qerr.TransportError{ ErrorCode: qerr.ProtocolViolation, @@ -87,16 +64,16 @@ func (s *cryptoStreamImpl) Finish() error { } // Writes writes data that should be sent out in CRYPTO frames -func (s *cryptoStreamImpl) Write(p []byte) (int, error) { +func (s *cryptoStream) Write(p []byte) (int, error) { s.writeBuf = append(s.writeBuf, p...) return len(p), nil } -func (s *cryptoStreamImpl) HasData() bool { +func (s *cryptoStream) HasData() bool { return len(s.writeBuf) > 0 } -func (s *cryptoStreamImpl) PopCryptoFrame(maxLen protocol.ByteCount) *wire.CryptoFrame { +func (s *cryptoStream) PopCryptoFrame(maxLen protocol.ByteCount) *wire.CryptoFrame { f := &wire.CryptoFrame{Offset: s.writeOffset} n := min(f.MaxDataLen(maxLen), protocol.ByteCount(len(s.writeBuf))) f.Data = s.writeBuf[:n] diff --git a/vendor/github.com/quic-go/quic-go/crypto_stream_manager.go b/vendor/github.com/quic-go/quic-go/crypto_stream_manager.go index c48e238a..d70b9b00 100644 --- a/vendor/github.com/quic-go/quic-go/crypto_stream_manager.go +++ b/vendor/github.com/quic-go/quic-go/crypto_stream_manager.go @@ -3,32 +3,22 @@ package quic import ( "fmt" - "github.com/quic-go/quic-go/internal/handshake" "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/internal/wire" ) -type cryptoDataHandler interface { - HandleMessage([]byte, protocol.EncryptionLevel) error - NextEvent() handshake.Event -} - type cryptoStreamManager struct { - cryptoHandler cryptoDataHandler - - initialStream cryptoStream - handshakeStream cryptoStream - oneRTTStream cryptoStream + initialStream *cryptoStream + handshakeStream *cryptoStream + oneRTTStream *cryptoStream } func newCryptoStreamManager( - cryptoHandler cryptoDataHandler, - initialStream cryptoStream, - handshakeStream cryptoStream, - oneRTTStream cryptoStream, + initialStream *cryptoStream, + handshakeStream *cryptoStream, + oneRTTStream *cryptoStream, ) *cryptoStreamManager { return &cryptoStreamManager{ - cryptoHandler: cryptoHandler, initialStream: initialStream, handshakeStream: handshakeStream, oneRTTStream: oneRTTStream, @@ -36,7 +26,7 @@ func newCryptoStreamManager( } func (m *cryptoStreamManager) HandleCryptoFrame(frame *wire.CryptoFrame, encLevel protocol.EncryptionLevel) error { - var str cryptoStream + var str *cryptoStream //nolint:exhaustive // CRYPTO frames cannot be sent in 0-RTT packets. switch encLevel { case protocol.EncryptionInitial: @@ -48,18 +38,23 @@ func (m *cryptoStreamManager) HandleCryptoFrame(frame *wire.CryptoFrame, encLeve default: return fmt.Errorf("received CRYPTO frame with unexpected encryption level: %s", encLevel) } - if err := str.HandleCryptoFrame(frame); err != nil { - return err - } - for { - data := str.GetCryptoData() - if data == nil { - return nil - } - if err := m.cryptoHandler.HandleMessage(data, encLevel); err != nil { - return err - } + return str.HandleCryptoFrame(frame) +} + +func (m *cryptoStreamManager) GetCryptoData(encLevel protocol.EncryptionLevel) []byte { + var str *cryptoStream + //nolint:exhaustive // CRYPTO frames cannot be sent in 0-RTT packets. + switch encLevel { + case protocol.EncryptionInitial: + str = m.initialStream + case protocol.EncryptionHandshake: + str = m.handshakeStream + case protocol.Encryption1RTT: + str = m.oneRTTStream + default: + panic(fmt.Sprintf("received CRYPTO frame with unexpected encryption level: %s", encLevel)) } + return str.GetCryptoData() } func (m *cryptoStreamManager) GetPostHandshakeData(maxSize protocol.ByteCount) *wire.CryptoFrame { diff --git a/vendor/github.com/quic-go/quic-go/errors.go b/vendor/github.com/quic-go/quic-go/errors.go index fda3c924..3fe1e0a9 100644 --- a/vendor/github.com/quic-go/quic-go/errors.go +++ b/vendor/github.com/quic-go/quic-go/errors.go @@ -64,7 +64,7 @@ func (e *StreamError) Error() string { // DatagramTooLargeError is returned from Connection.SendDatagram if the payload is too large to be sent. type DatagramTooLargeError struct { - PeerMaxDatagramFrameSize int64 + MaxDatagramPayloadSize int64 } func (e *DatagramTooLargeError) Is(target error) bool { diff --git a/vendor/github.com/quic-go/quic-go/framer.go b/vendor/github.com/quic-go/quic-go/framer.go index 1e6219a4..e162f6b8 100644 --- a/vendor/github.com/quic-go/quic-go/framer.go +++ b/vendor/github.com/quic-go/quic-go/framer.go @@ -1,7 +1,7 @@ package quic import ( - "errors" + "slices" "sync" "github.com/quic-go/quic-go/internal/ackhandler" @@ -11,37 +11,25 @@ import ( "github.com/quic-go/quic-go/quicvarint" ) -type framer interface { - HasData() bool - - QueueControlFrame(wire.Frame) - AppendControlFrames([]ackhandler.Frame, protocol.ByteCount, protocol.Version) ([]ackhandler.Frame, protocol.ByteCount) - - AddActiveStream(protocol.StreamID) - AppendStreamFrames([]ackhandler.StreamFrame, protocol.ByteCount, protocol.Version) ([]ackhandler.StreamFrame, protocol.ByteCount) - - Handle0RTTRejection() error - - // QueuedTooManyControlFrames says if the control frame queue exceeded its maximum queue length. - // This is a hack. - // It is easier to implement than propagating an error return value in QueueControlFrame. - // The correct solution would be to queue frames with their respective structs. - // See https://github.com/quic-go/quic-go/issues/4271 for the queueing of stream-related control frames. - QueuedTooManyControlFrames() bool -} - const ( maxPathResponses = 256 maxControlFrames = 16 << 10 ) -type framerI struct { - mutex sync.Mutex +// This is the largest possible size of a stream-related control frame +// (which is the RESET_STREAM frame). +const maxStreamControlFrameSize = 25 - streamGetter streamGetter +type streamControlFrameGetter interface { + getControlFrame() (_ ackhandler.Frame, ok, hasMore bool) +} - activeStreams map[protocol.StreamID]struct{} - streamQueue ringbuffer.RingBuffer[protocol.StreamID] +type framer struct { + mutex sync.Mutex + + activeStreams map[protocol.StreamID]sendStreamI + streamQueue ringbuffer.RingBuffer[protocol.StreamID] + streamsWithControlFrames map[protocol.StreamID]streamControlFrameGetter controlFrameMutex sync.Mutex controlFrames []wire.Frame @@ -49,16 +37,14 @@ type framerI struct { queuedTooManyControlFrames bool } -var _ framer = &framerI{} - -func newFramer(streamGetter streamGetter) framer { - return &framerI{ - streamGetter: streamGetter, - activeStreams: make(map[protocol.StreamID]struct{}), +func newFramer() *framer { + return &framer{ + activeStreams: make(map[protocol.StreamID]sendStreamI), + streamsWithControlFrames: make(map[protocol.StreamID]streamControlFrameGetter), } } -func (f *framerI) HasData() bool { +func (f *framer) HasData() bool { f.mutex.Lock() hasData := !f.streamQueue.Empty() f.mutex.Unlock() @@ -67,10 +53,10 @@ func (f *framerI) HasData() bool { } f.controlFrameMutex.Lock() defer f.controlFrameMutex.Unlock() - return len(f.controlFrames) > 0 || len(f.pathResponses) > 0 + return len(f.streamsWithControlFrames) > 0 || len(f.controlFrames) > 0 || len(f.pathResponses) > 0 } -func (f *framerI) QueueControlFrame(frame wire.Frame) { +func (f *framer) QueueControlFrame(frame wire.Frame) { f.controlFrameMutex.Lock() defer f.controlFrameMutex.Unlock() @@ -92,7 +78,7 @@ func (f *framerI) QueueControlFrame(frame wire.Frame) { f.controlFrames = append(f.controlFrames, frame) } -func (f *framerI) AppendControlFrames(frames []ackhandler.Frame, maxLen protocol.ByteCount, v protocol.Version) ([]ackhandler.Frame, protocol.ByteCount) { +func (f *framer) AppendControlFrames(frames []ackhandler.Frame, maxLen protocol.ByteCount, v protocol.Version) ([]ackhandler.Frame, protocol.ByteCount) { f.controlFrameMutex.Lock() defer f.controlFrameMutex.Unlock() @@ -108,6 +94,29 @@ func (f *framerI) AppendControlFrames(frames []ackhandler.Frame, maxLen protocol } } + // add stream-related control frames + for id, str := range f.streamsWithControlFrames { + start: + remainingLen := maxLen - length + if remainingLen <= maxStreamControlFrameSize { + break + } + fr, ok, hasMore := str.getControlFrame() + if !hasMore { + delete(f.streamsWithControlFrames, id) + } + if !ok { + continue + } + frames = append(frames, fr) + length += fr.Frame.Length(v) + if hasMore { + // It is rare that a stream has more than one control frame to queue. + // We don't want to spawn another loop for just to cover that case. + goto start + } + } + for len(f.controlFrames) > 0 { frame := f.controlFrames[len(f.controlFrames)-1] frameLen := frame.Length(v) @@ -118,27 +127,51 @@ func (f *framerI) AppendControlFrames(frames []ackhandler.Frame, maxLen protocol length += frameLen f.controlFrames = f.controlFrames[:len(f.controlFrames)-1] } + return frames, length } -func (f *framerI) QueuedTooManyControlFrames() bool { +// QueuedTooManyControlFrames says if the control frame queue exceeded its maximum queue length. +// This is a hack. +// It is easier to implement than propagating an error return value in QueueControlFrame. +// The correct solution would be to queue frames with their respective structs. +// See https://github.com/quic-go/quic-go/issues/4271 for the queueing of stream-related control frames. +func (f *framer) QueuedTooManyControlFrames() bool { return f.queuedTooManyControlFrames } -func (f *framerI) AddActiveStream(id protocol.StreamID) { +func (f *framer) AddActiveStream(id protocol.StreamID, str sendStreamI) { f.mutex.Lock() if _, ok := f.activeStreams[id]; !ok { f.streamQueue.PushBack(id) - f.activeStreams[id] = struct{}{} + f.activeStreams[id] = str } f.mutex.Unlock() } -func (f *framerI) AppendStreamFrames(frames []ackhandler.StreamFrame, maxLen protocol.ByteCount, v protocol.Version) ([]ackhandler.StreamFrame, protocol.ByteCount) { +func (f *framer) AddStreamWithControlFrames(id protocol.StreamID, str streamControlFrameGetter) { + f.controlFrameMutex.Lock() + if _, ok := f.streamsWithControlFrames[id]; !ok { + f.streamsWithControlFrames[id] = str + } + f.controlFrameMutex.Unlock() +} + +// RemoveActiveStream is called when a stream completes. +func (f *framer) RemoveActiveStream(id protocol.StreamID) { + f.mutex.Lock() + delete(f.activeStreams, id) + // We don't delete the stream from the streamQueue, + // since we'd have to iterate over the ringbuffer. + // Instead, we check if the stream is still in activeStreams in AppendStreamFrames. + f.mutex.Unlock() +} + +func (f *framer) AppendStreamFrames(frames []ackhandler.StreamFrame, maxLen protocol.ByteCount, v protocol.Version) ([]ackhandler.StreamFrame, protocol.ByteCount) { startLen := len(frames) var length protocol.ByteCount f.mutex.Lock() - // pop STREAM frames, until less than MinStreamFrameSize bytes are left in the packet + // pop STREAM frames, until less than 128 bytes are left in the packet numActiveStreams := f.streamQueue.Len() for i := 0; i < numActiveStreams; i++ { if protocol.MinStreamFrameSize+length > maxLen { @@ -147,17 +180,16 @@ func (f *framerI) AppendStreamFrames(frames []ackhandler.StreamFrame, maxLen pro id := f.streamQueue.PopFront() // This should never return an error. Better check it anyway. // The stream will only be in the streamQueue, if it enqueued itself there. - str, err := f.streamGetter.GetOrOpenSendStream(id) - // The stream can be nil if it completed after it said it had data. - if str == nil || err != nil { - delete(f.activeStreams, id) + str, ok := f.activeStreams[id] + // The stream might have been removed after being enqueued. + if !ok { continue } remainingLen := maxLen - length // For the last STREAM frame, we'll remove the DataLen field later. // Therefore, we can pretend to have more bytes available when popping // the STREAM frame (which will always have the DataLen set). - remainingLen += quicvarint.Len(uint64(remainingLen)) + remainingLen += protocol.ByteCount(quicvarint.Len(uint64(remainingLen))) frame, ok, hasMoreData := str.popStreamFrame(remainingLen, v) if hasMoreData { // put the stream back in the queue (at the end) f.streamQueue.PushBack(id) @@ -165,7 +197,7 @@ func (f *framerI) AppendStreamFrames(frames []ackhandler.StreamFrame, maxLen pro delete(f.activeStreams, id) } // The frame can be "nil" - // * if the receiveStream was canceled after it said it had data + // * if the stream was canceled after it said it had data // * the remaining size doesn't allow us to add another STREAM frame if !ok { continue @@ -183,11 +215,12 @@ func (f *framerI) AppendStreamFrames(frames []ackhandler.StreamFrame, maxLen pro return frames, length } -func (f *framerI) Handle0RTTRejection() error { +func (f *framer) Handle0RTTRejection() { f.mutex.Lock() defer f.mutex.Unlock() - f.controlFrameMutex.Lock() + defer f.controlFrameMutex.Unlock() + f.streamQueue.Clear() for id := range f.activeStreams { delete(f.activeStreams, id) @@ -195,16 +228,13 @@ func (f *framerI) Handle0RTTRejection() error { var j int for i, frame := range f.controlFrames { switch frame.(type) { - case *wire.MaxDataFrame, *wire.MaxStreamDataFrame, *wire.MaxStreamsFrame: - return errors.New("didn't expect MAX_DATA / MAX_STREAM_DATA / MAX_STREAMS frame to be sent in 0-RTT") - case *wire.DataBlockedFrame, *wire.StreamDataBlockedFrame, *wire.StreamsBlockedFrame: + case *wire.MaxDataFrame, *wire.MaxStreamDataFrame, *wire.MaxStreamsFrame, + *wire.DataBlockedFrame, *wire.StreamDataBlockedFrame, *wire.StreamsBlockedFrame: continue default: f.controlFrames[j] = f.controlFrames[i] j++ } } - f.controlFrames = f.controlFrames[:j] - f.controlFrameMutex.Unlock() - return nil + f.controlFrames = slices.Delete(f.controlFrames, j, len(f.controlFrames)) } diff --git a/vendor/github.com/quic-go/quic-go/interface.go b/vendor/github.com/quic-go/quic-go/interface.go index ca8544d8..2071b596 100644 --- a/vendor/github.com/quic-go/quic-go/interface.go +++ b/vendor/github.com/quic-go/quic-go/interface.go @@ -19,10 +19,6 @@ type StreamID = protocol.StreamID // A Version is a QUIC version number. type Version = protocol.Version -// A VersionNumber is a QUIC version number. -// Deprecated: VersionNumber was renamed to Version. -type VersionNumber = Version - const ( // Version1 is RFC 9000 Version1 = protocol.Version1 @@ -57,8 +53,13 @@ var Err0RTTRejected = errors.New("0-RTT rejected") // ConnectionTracingKey can be used to associate a ConnectionTracer with a Connection. // It is set on the Connection.Context() context, // as well as on the context passed to logging.Tracer.NewConnectionTracer. +// Deprecated: Applications can set their own tracing key using Transport.ConnContext. var ConnectionTracingKey = connTracingCtxKey{} +// ConnectionTracingID is the type of the context value saved under the ConnectionTracingKey. +// Deprecated: Applications can set their own tracing key using Transport.ConnContext. +type ConnectionTracingID uint64 + type connTracingCtxKey struct{} // QUICVersionContextKey can be used to find out the QUIC version of a TLS handshake from the @@ -84,8 +85,8 @@ type ReceiveStream interface { // Read reads data from the stream. // Read can be made to time out and return a net.Error with Timeout() == true // after a fixed time limit; see SetDeadline and SetReadDeadline. - // If the stream was canceled by the peer, the error implements the StreamError - // interface, and Canceled() == true. + // If the stream was canceled by the peer, the error is a StreamError and + // Remote == true. // If the connection was closed due to a timeout, the error satisfies // the net.Error interface, and Timeout() will be true. io.Reader @@ -108,8 +109,8 @@ type SendStream interface { // Write writes data to the stream. // Write can be made to time out and return a net.Error with Timeout() == true // after a fixed time limit; see SetDeadline and SetWriteDeadline. - // If the stream was canceled by the peer, the error implements the StreamError - // interface, and Canceled() == true. + // If the stream was canceled by the peer, the error is a StreamError and + // Remote == true. // If the connection was closed due to a timeout, the error satisfies // the net.Error interface, and Timeout() will be true. io.Writer @@ -121,7 +122,9 @@ type SendStream interface { // CancelWrite aborts sending on this stream. // Data already written, but not yet delivered to the peer is not guaranteed to be delivered reliably. // Write will unblock immediately, and future calls to Write will fail. - // When called multiple times or after closing the stream it is a no-op. + // When called multiple times it is a no-op. + // When called after Close, it aborts delivery. Note that there is no guarantee if + // the peer will receive the FIN or the reset first. CancelWrite(StreamErrorCode) // The Context is canceled as soon as the write-side of the stream is closed. // This happens when Close() or CancelWrite() is called, or when the peer @@ -143,7 +146,7 @@ type SendStream interface { // * TransportError: for errors triggered by the QUIC transport (in many cases a misbehaving peer) // * IdleTimeoutError: when the peer goes away unexpectedly (this is a net.Error timeout error) // * HandshakeTimeoutError: when the cryptographic handshake takes too long (this is a net.Error timeout error) -// * StatelessResetError: when we receive a stateless reset (this is a net.Error temporary error) +// * StatelessResetError: when we receive a stateless reset // * VersionNegotiationError: returned by the client, when there's no version overlap between the peers type Connection interface { // AcceptStream returns the next stream opened by the peer, blocking until one is available. @@ -156,28 +159,29 @@ type Connection interface { AcceptUniStream(context.Context) (ReceiveStream, error) // OpenStream opens a new bidirectional QUIC stream. // There is no signaling to the peer about new streams: - // The peer can only accept the stream after data has been sent on the stream. - // If the error is non-nil, it satisfies the net.Error interface. - // When reaching the peer's stream limit, err.Temporary() will be true. - // If the connection was closed due to a timeout, Timeout() will be true. + // The peer can only accept the stream after data has been sent on the stream, + // or the stream has been reset or closed. + // When reaching the peer's stream limit, it is not possible to open a new stream until the + // peer raises the stream limit. In that case, a StreamLimitReachedError is returned. OpenStream() (Stream, error) // OpenStreamSync opens a new bidirectional QUIC stream. // It blocks until a new stream can be opened. // There is no signaling to the peer about new streams: // The peer can only accept the stream after data has been sent on the stream, // or the stream has been reset or closed. - // If the error is non-nil, it satisfies the net.Error interface. - // If the connection was closed due to a timeout, Timeout() will be true. OpenStreamSync(context.Context) (Stream, error) // OpenUniStream opens a new outgoing unidirectional QUIC stream. - // If the error is non-nil, it satisfies the net.Error interface. - // When reaching the peer's stream limit, Temporary() will be true. - // If the connection was closed due to a timeout, Timeout() will be true. + // There is no signaling to the peer about new streams: + // The peer can only accept the stream after data has been sent on the stream, + // or the stream has been reset or closed. + // When reaching the peer's stream limit, it is not possible to open a new stream until the + // peer raises the stream limit. In that case, a StreamLimitReachedError is returned. OpenUniStream() (SendStream, error) // OpenUniStreamSync opens a new outgoing unidirectional QUIC stream. // It blocks until a new stream can be opened. - // If the error is non-nil, it satisfies the net.Error interface. - // If the connection was closed due to a timeout, Timeout() will be true. + // There is no signaling to the peer about new streams: + // The peer can only accept the stream after data has been sent on the stream, + // or the stream has been reset or closed. OpenUniStreamSync(context.Context) (SendStream, error) // LocalAddr returns the local address. LocalAddr() net.Addr @@ -217,7 +221,7 @@ type EarlyConnection interface { // however the client's identity is only verified once the handshake completes. HandshakeComplete() <-chan struct{} - NextConnection() Connection + NextConnection(context.Context) (Connection, error) } // StatelessResetKey is a key used to derive stateless reset tokens. @@ -320,10 +324,15 @@ type Config struct { // If set to 0, then no keep alive is sent. Otherwise, the keep alive is sent on that period (or at most // every half of MaxIdleTimeout, whichever is smaller). KeepAlivePeriod time.Duration + // InitialPacketSize is the initial size of packets sent. + // It is usually not necessary to manually set this value, + // since Path MTU discovery very quickly finds the path's MTU. + // If set too high, the path might not support packets that large, leading to a timeout of the QUIC handshake. + // Values below 1200 are invalid. + InitialPacketSize uint16 // DisablePathMTUDiscovery disables Path MTU Discovery (RFC 8899). // This allows the sending of QUIC packets that fully utilize the available MTU of the path. // Path MTU discovery is only available on systems that allow setting of the Don't Fragment (DF) bit. - // If unavailable or disabled, packets will be at most 1252 (IPv4) / 1232 (IPv6) bytes in size. DisablePathMTUDiscovery bool // Allow0RTT allows the application to decide if a 0-RTT connection attempt should be accepted. // Only valid for the server. diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_history.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_history.go index 3143bfe1..f9feae1d 100644 --- a/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_history.go +++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/received_packet_history.go @@ -1,10 +1,9 @@ package ackhandler import ( - "sync" + "slices" "github.com/quic-go/quic-go/internal/protocol" - list "github.com/quic-go/quic-go/internal/utils/linkedlist" "github.com/quic-go/quic-go/internal/wire" ) @@ -14,25 +13,17 @@ type interval struct { End protocol.PacketNumber } -var intervalElementPool sync.Pool - -func init() { - intervalElementPool = *list.NewPool[interval]() -} - // The receivedPacketHistory stores if a packet number has already been received. // It generates ACK ranges which can be used to assemble an ACK frame. // It does not store packet contents. type receivedPacketHistory struct { - ranges *list.List[interval] + ranges []interval // maximum length: protocol.MaxNumAckRanges deletedBelow protocol.PacketNumber } func newReceivedPacketHistory() *receivedPacketHistory { - return &receivedPacketHistory{ - ranges: list.NewWithPool[interval](&intervalElementPool), - } + return &receivedPacketHistory{} } // ReceivedPacket registers a packet with PacketNumber p and updates the ranges @@ -41,58 +32,54 @@ func (h *receivedPacketHistory) ReceivedPacket(p protocol.PacketNumber) bool /* if p < h.deletedBelow { return false } + isNew := h.addToRanges(p) - h.maybeDeleteOldRanges() + // Delete old ranges, if we're tracking too many of them. + // This is a DoS defense against a peer that sends us too many gaps. + if len(h.ranges) > protocol.MaxNumAckRanges { + h.ranges = slices.Delete(h.ranges, 0, len(h.ranges)-protocol.MaxNumAckRanges) + } return isNew } func (h *receivedPacketHistory) addToRanges(p protocol.PacketNumber) bool /* is a new packet (and not a duplicate / delayed packet) */ { - if h.ranges.Len() == 0 { - h.ranges.PushBack(interval{Start: p, End: p}) + if len(h.ranges) == 0 { + h.ranges = append(h.ranges, interval{Start: p, End: p}) return true } - for el := h.ranges.Back(); el != nil; el = el.Prev() { + for i := len(h.ranges) - 1; i >= 0; i-- { // p already included in an existing range. Nothing to do here - if p >= el.Value.Start && p <= el.Value.End { + if p >= h.ranges[i].Start && p <= h.ranges[i].End { return false } - if el.Value.End == p-1 { // extend a range at the end - el.Value.End = p + if h.ranges[i].End == p-1 { // extend a range at the end + h.ranges[i].End = p return true } - if el.Value.Start == p+1 { // extend a range at the beginning - el.Value.Start = p + if h.ranges[i].Start == p+1 { // extend a range at the beginning + h.ranges[i].Start = p - prev := el.Prev() - if prev != nil && prev.Value.End+1 == el.Value.Start { // merge two ranges - prev.Value.End = el.Value.End - h.ranges.Remove(el) + if i > 0 && h.ranges[i-1].End+1 == h.ranges[i].Start { // merge two ranges + h.ranges[i-1].End = h.ranges[i].End + h.ranges = slices.Delete(h.ranges, i, i+1) } return true } - // create a new range at the end - if p > el.Value.End { - h.ranges.InsertAfter(interval{Start: p, End: p}, el) + // create a new range after the current one + if p > h.ranges[i].End { + h.ranges = slices.Insert(h.ranges, i+1, interval{Start: p, End: p}) return true } } // create a new range at the beginning - h.ranges.InsertBefore(interval{Start: p, End: p}, h.ranges.Front()) + h.ranges = slices.Insert(h.ranges, 0, interval{Start: p, End: p}) return true } -// Delete old ranges, if we're tracking more than 500 of them. -// This is a DoS defense against a peer that sends us too many gaps. -func (h *receivedPacketHistory) maybeDeleteOldRanges() { - for h.ranges.Len() > protocol.MaxNumAckRanges { - h.ranges.Remove(h.ranges.Front()) - } -} - // DeleteBelow deletes all entries below (but not including) p func (h *receivedPacketHistory) DeleteBelow(p protocol.PacketNumber) { if p < h.deletedBelow { @@ -100,37 +87,39 @@ func (h *receivedPacketHistory) DeleteBelow(p protocol.PacketNumber) { } h.deletedBelow = p - nextEl := h.ranges.Front() - for el := h.ranges.Front(); nextEl != nil; el = nextEl { - nextEl = el.Next() + if len(h.ranges) == 0 { + return + } - if el.Value.End < p { // delete a whole range - h.ranges.Remove(el) - } else if p > el.Value.Start && p <= el.Value.End { - el.Value.Start = p - return + idx := -1 + for i := 0; i < len(h.ranges); i++ { + if h.ranges[i].End < p { // delete a whole range + idx = i + } else if p > h.ranges[i].Start && p <= h.ranges[i].End { + h.ranges[i].Start = p + break } else { // no ranges affected. Nothing to do - return + break } } + if idx >= 0 { + h.ranges = slices.Delete(h.ranges, 0, idx+1) + } } // AppendAckRanges appends to a slice of all AckRanges that can be used in an AckFrame func (h *receivedPacketHistory) AppendAckRanges(ackRanges []wire.AckRange) []wire.AckRange { - if h.ranges.Len() > 0 { - for el := h.ranges.Back(); el != nil; el = el.Prev() { - ackRanges = append(ackRanges, wire.AckRange{Smallest: el.Value.Start, Largest: el.Value.End}) - } + for i := len(h.ranges) - 1; i >= 0; i-- { + ackRanges = append(ackRanges, wire.AckRange{Smallest: h.ranges[i].Start, Largest: h.ranges[i].End}) } return ackRanges } func (h *receivedPacketHistory) GetHighestAckRange() wire.AckRange { ackRange := wire.AckRange{} - if h.ranges.Len() > 0 { - r := h.ranges.Back().Value - ackRange.Smallest = r.Start - ackRange.Largest = r.End + if len(h.ranges) > 0 { + ackRange.Smallest = h.ranges[len(h.ranges)-1].Start + ackRange.Largest = h.ranges[len(h.ranges)-1].End } return ackRange } @@ -139,11 +128,12 @@ func (h *receivedPacketHistory) IsPotentiallyDuplicate(p protocol.PacketNumber) if p < h.deletedBelow { return true } - for el := h.ranges.Back(); el != nil; el = el.Prev() { - if p > el.Value.End { + // Iterating over the slices is faster than using a binary search (using slices.BinarySearchFunc). + for i := len(h.ranges) - 1; i >= 0; i-- { + if p > h.ranges[i].End { return false } - if p <= el.Value.End && p >= el.Value.Start { + if p <= h.ranges[i].End && p >= h.ranges[i].Start { return true } } diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_handler.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_handler.go index 3cef8923..b84f0dcb 100644 --- a/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_handler.go +++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_handler.go @@ -28,7 +28,7 @@ const ( ) type packetNumberSpace struct { - history *sentPacketHistory + history sentPacketHistory pns packetNumberGenerator lossTime time.Time @@ -38,15 +38,15 @@ type packetNumberSpace struct { largestSent protocol.PacketNumber } -func newPacketNumberSpace(initialPN protocol.PacketNumber, skipPNs bool) *packetNumberSpace { +func newPacketNumberSpace(initialPN protocol.PacketNumber, isAppData bool) *packetNumberSpace { var pns packetNumberGenerator - if skipPNs { + if isAppData { pns = newSkippingPacketNumberGenerator(initialPN, protocol.SkipPacketInitialPeriod, protocol.SkipPacketMaxPeriod) } else { pns = newSequentialPacketNumberGenerator(initialPN) } return &packetNumberSpace{ - history: newSentPacketHistory(), + history: *newSentPacketHistory(isAppData), pns: pns, largestSent: protocol.InvalidPacketNumber, largestAcked: protocol.InvalidPacketNumber, @@ -756,7 +756,7 @@ func (h *sentPacketHandler) PeekPacketNumber(encLevel protocol.EncryptionLevel) pnSpace := h.getPacketNumberSpace(encLevel) pn := pnSpace.pns.Peek() // See section 17.1 of RFC 9000. - return pn, protocol.GetPacketNumberLengthForHeader(pn, pnSpace.largestAcked) + return pn, protocol.PacketNumberLengthForHeader(pn, pnSpace.largestAcked) } func (h *sentPacketHandler) PopPacketNumber(encLevel protocol.EncryptionLevel) protocol.PacketNumber { diff --git a/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_history.go b/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_history.go index c14c0f49..9968df6a 100644 --- a/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_history.go +++ b/vendor/github.com/quic-go/quic-go/internal/ackhandler/sent_packet_history.go @@ -14,11 +14,16 @@ type sentPacketHistory struct { highestPacketNumber protocol.PacketNumber } -func newSentPacketHistory() *sentPacketHistory { - return &sentPacketHistory{ - packets: make([]*packet, 0, 32), +func newSentPacketHistory(isAppData bool) *sentPacketHistory { + h := &sentPacketHistory{ highestPacketNumber: protocol.InvalidPacketNumber, } + if isAppData { + h.packets = make([]*packet, 0, 32) + } else { + h.packets = make([]*packet, 0, 6) + } + return h } func (h *sentPacketHistory) checkSequentialPacketNumberUse(pn protocol.PacketNumber) { diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/cubic.go b/vendor/github.com/quic-go/quic-go/internal/congestion/cubic.go index 4e30de65..b35d40d4 100644 --- a/vendor/github.com/quic-go/quic-go/internal/congestion/cubic.go +++ b/vendor/github.com/quic-go/quic-go/internal/congestion/cubic.go @@ -17,11 +17,11 @@ import ( // 1024*1024^3 (first 1024 is from 0.100^3) // where 0.100 is 100 ms which is the scaling round trip time. const ( - cubeScale = 40 - cubeCongestionWindowScale = 410 - cubeFactor protocol.ByteCount = 1 << cubeScale / cubeCongestionWindowScale / maxDatagramSize + cubeScale = 40 + cubeCongestionWindowScale = 410 + cubeFactor = 1 << cubeScale / cubeCongestionWindowScale / maxDatagramSize // TODO: when re-enabling cubic, make sure to use the actual packet size here - maxDatagramSize = protocol.ByteCount(protocol.InitialPacketSizeIPv4) + maxDatagramSize = protocol.ByteCount(protocol.InitialPacketSize) ) const defaultNumConnections = 1 diff --git a/vendor/github.com/quic-go/quic-go/internal/congestion/cubic_sender.go b/vendor/github.com/quic-go/quic-go/internal/congestion/cubic_sender.go index a1b06ab3..075b08e0 100644 --- a/vendor/github.com/quic-go/quic-go/internal/congestion/cubic_sender.go +++ b/vendor/github.com/quic-go/quic-go/internal/congestion/cubic_sender.go @@ -12,7 +12,7 @@ import ( const ( // maxDatagramSize is the default maximum packet size used in the Linux TCP implementation. // Used in QUIC for congestion window computations in bytes. - initialMaxDatagramSize = protocol.ByteCount(protocol.InitialPacketSizeIPv4) + initialMaxDatagramSize = protocol.ByteCount(protocol.InitialPacketSize) maxBurstPackets = 3 renoBeta = 0.7 // Reno backoff factor. minCongestionWindowPackets = 2 diff --git a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/connection_flow_controller.go b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/connection_flow_controller.go index 8504cdcf..2efcad74 100644 --- a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/connection_flow_controller.go +++ b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/connection_flow_controller.go @@ -12,8 +12,6 @@ import ( type connectionFlowController struct { baseFlowController - - queueWindowUpdate func() } var _ ConnectionFlowController = &connectionFlowController{} @@ -23,7 +21,6 @@ var _ ConnectionFlowController = &connectionFlowController{} func NewConnectionFlowController( receiveWindow protocol.ByteCount, maxReceiveWindow protocol.ByteCount, - queueWindowUpdate func(), allowWindowIncrease func(size protocol.ByteCount) bool, rttStats *utils.RTTStats, logger utils.Logger, @@ -37,7 +34,6 @@ func NewConnectionFlowController( allowWindowIncrease: allowWindowIncrease, logger: logger, }, - queueWindowUpdate: queueWindowUpdate, } } @@ -63,18 +59,14 @@ func (c *connectionFlowController) IncrementHighestReceived(increment protocol.B func (c *connectionFlowController) AddBytesRead(n protocol.ByteCount) { c.mutex.Lock() c.baseFlowController.addBytesRead(n) - shouldQueueWindowUpdate := c.hasWindowUpdate() c.mutex.Unlock() - if shouldQueueWindowUpdate { - c.queueWindowUpdate() - } } func (c *connectionFlowController) GetWindowUpdate() protocol.ByteCount { c.mutex.Lock() oldWindowSize := c.receiveWindowSize offset := c.baseFlowController.getWindowUpdate() - if oldWindowSize < c.receiveWindowSize { + if c.logger.Debug() && oldWindowSize < c.receiveWindowSize { c.logger.Debugf("Increasing receive flow control window for the connection to %d kB", c.receiveWindowSize/(1<<10)) } c.mutex.Unlock() diff --git a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/interface.go b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/interface.go index fc5f9de0..57d12a95 100644 --- a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/interface.go +++ b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/interface.go @@ -8,14 +8,13 @@ type flowController interface { UpdateSendWindow(protocol.ByteCount) (updated bool) AddBytesSent(protocol.ByteCount) // for receiving - AddBytesRead(protocol.ByteCount) GetWindowUpdate() protocol.ByteCount // returns 0 if no update is necessary - IsNewlyBlocked() (bool, protocol.ByteCount) } // A StreamFlowController is a flow controller for a QUIC stream. type StreamFlowController interface { flowController + AddBytesRead(protocol.ByteCount) (shouldQueueWindowUpdate bool) // UpdateHighestReceived is called when a new highest offset is received // final has to be to true if this is the final offset of the stream, // as contained in a STREAM frame with FIN bit, and the RESET_STREAM frame @@ -23,12 +22,15 @@ type StreamFlowController interface { // Abandon is called when reading from the stream is aborted early, // and there won't be any further calls to AddBytesRead. Abandon() + IsNewlyBlocked() bool } // The ConnectionFlowController is the flow controller for the connection. type ConnectionFlowController interface { flowController + AddBytesRead(protocol.ByteCount) Reset() error + IsNewlyBlocked() (bool, protocol.ByteCount) } type connectionFlowControllerI interface { diff --git a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/stream_flow_controller.go b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/stream_flow_controller.go index 1a69fb2b..2d58351c 100644 --- a/vendor/github.com/quic-go/quic-go/internal/flowcontrol/stream_flow_controller.go +++ b/vendor/github.com/quic-go/quic-go/internal/flowcontrol/stream_flow_controller.go @@ -13,8 +13,6 @@ type streamFlowController struct { streamID protocol.StreamID - queueWindowUpdate func() - connection connectionFlowControllerI receivedFinalOffset bool @@ -29,14 +27,12 @@ func NewStreamFlowController( receiveWindow protocol.ByteCount, maxReceiveWindow protocol.ByteCount, initialSendWindow protocol.ByteCount, - queueWindowUpdate func(protocol.StreamID), rttStats *utils.RTTStats, logger utils.Logger, ) StreamFlowController { return &streamFlowController{ - streamID: streamID, - connection: cfc.(connectionFlowControllerI), - queueWindowUpdate: func() { queueWindowUpdate(streamID) }, + streamID: streamID, + connection: cfc.(connectionFlowControllerI), baseFlowController: baseFlowController{ rttStats: rttStats, receiveWindow: receiveWindow, @@ -97,20 +93,19 @@ func (c *streamFlowController) UpdateHighestReceived(offset protocol.ByteCount, return c.connection.IncrementHighestReceived(increment) } -func (c *streamFlowController) AddBytesRead(n protocol.ByteCount) { +func (c *streamFlowController) AddBytesRead(n protocol.ByteCount) (shouldQueueWindowUpdate bool) { c.mutex.Lock() c.baseFlowController.addBytesRead(n) - shouldQueueWindowUpdate := c.shouldQueueWindowUpdate() + shouldQueueWindowUpdate = c.shouldQueueWindowUpdate() c.mutex.Unlock() - if shouldQueueWindowUpdate { - c.queueWindowUpdate() - } c.connection.AddBytesRead(n) + return } func (c *streamFlowController) Abandon() { c.mutex.Lock() unread := c.highestReceived - c.bytesRead + c.bytesRead = c.highestReceived c.mutex.Unlock() if unread > 0 { c.connection.AddBytesRead(unread) @@ -126,6 +121,11 @@ func (c *streamFlowController) SendWindowSize() protocol.ByteCount { return min(c.baseFlowController.sendWindowSize(), c.connection.SendWindowSize()) } +func (c *streamFlowController) IsNewlyBlocked() bool { + blocked, _ := c.baseFlowController.IsNewlyBlocked() + return blocked +} + func (c *streamFlowController) shouldQueueWindowUpdate() bool { return !c.receivedFinalOffset && c.hasWindowUpdate() } diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/crypto_setup.go b/vendor/github.com/quic-go/quic-go/internal/handshake/crypto_setup.go index adf74fe7..c8e6cb33 100644 --- a/vendor/github.com/quic-go/quic-go/internal/handshake/crypto_setup.go +++ b/vendor/github.com/quic-go/quic-go/internal/handshake/crypto_setup.go @@ -1,7 +1,6 @@ package handshake import ( - "bytes" "context" "crypto/tls" "errors" @@ -124,44 +123,12 @@ func NewCryptoSetupServer( ) cs.allow0RTT = allow0RTT - quicConf := &tls.QUICConfig{TLSConfig: tlsConf} - qtls.SetupConfigForServer(quicConf, cs.allow0RTT, cs.getDataForSessionTicket, cs.handleSessionTicket) - addConnToClientHelloInfo(quicConf.TLSConfig, localAddr, remoteAddr) - - cs.tlsConf = quicConf.TLSConfig - cs.conn = tls.QUICServer(quicConf) - + tlsConf = qtls.SetupConfigForServer(tlsConf, localAddr, remoteAddr, cs.getDataForSessionTicket, cs.handleSessionTicket) + cs.tlsConf = tlsConf + cs.conn = tls.QUICServer(&tls.QUICConfig{TLSConfig: tlsConf}) return cs } -// The tls.Config contains two callbacks that pass in a tls.ClientHelloInfo. -// Since crypto/tls doesn't do it, we need to make sure to set the Conn field with a fake net.Conn -// that allows the caller to get the local and the remote address. -func addConnToClientHelloInfo(conf *tls.Config, localAddr, remoteAddr net.Addr) { - if conf.GetConfigForClient != nil { - gcfc := conf.GetConfigForClient - conf.GetConfigForClient = func(info *tls.ClientHelloInfo) (*tls.Config, error) { - info.Conn = &conn{localAddr: localAddr, remoteAddr: remoteAddr} - c, err := gcfc(info) - if c != nil { - c = c.Clone() - // This won't be necessary anymore once https://github.com/golang/go/issues/63722 is accepted. - c.MinVersion = tls.VersionTLS13 - // We're returning a tls.Config here, so we need to apply this recursively. - addConnToClientHelloInfo(c, localAddr, remoteAddr) - } - return c, err - } - } - if conf.GetCertificate != nil { - gc := conf.GetCertificate - conf.GetCertificate = func(info *tls.ClientHelloInfo) (*tls.Certificate, error) { - info.Conn = &conn{localAddr: localAddr, remoteAddr: remoteAddr} - return gc(info) - } - } -} - func newCryptoSetup( connID protocol.ConnectionID, tp *wire.TransportParameters, @@ -204,8 +171,8 @@ func (h *cryptoSetup) SetLargest1RTTAcked(pn protocol.PacketNumber) error { return h.aead.SetLargestAcked(pn) } -func (h *cryptoSetup) StartHandshake() error { - err := h.conn.Start(context.WithValue(context.Background(), QUICVersionContextKey, h.version)) +func (h *cryptoSetup) StartHandshake(ctx context.Context) error { + err := h.conn.Start(context.WithValue(ctx, QUICVersionContextKey, h.version)) if err != nil { return wrapError(err) } @@ -262,6 +229,9 @@ func (h *cryptoSetup) handleMessage(data []byte, encLevel protocol.EncryptionLev } func (h *cryptoSetup) handleEvent(ev tls.QUICEvent) (done bool, err error) { + //nolint:exhaustive + // Go 1.23 added new 0-RTT events, see https://github.com/quic-go/quic-go/issues/4272. + // We will start using these events when dropping support for Go 1.22. switch ev.Kind { case tls.QUICNoEvent: return true, nil @@ -286,7 +256,10 @@ func (h *cryptoSetup) handleEvent(ev tls.QUICEvent) (done bool, err error) { h.handshakeComplete() return false, nil default: - return false, fmt.Errorf("unexpected event: %d", ev.Kind) + // Unknown events should be ignored. + // crypto/tls will ensure that this is safe to do. + // See the discussion following https://github.com/golang/go/issues/68124#issuecomment-2187042510 for details. + return false, nil } } @@ -338,25 +311,26 @@ func (h *cryptoSetup) handleDataFromSessionState(data []byte, earlyData bool) (a return false } -func decodeDataFromSessionState(data []byte, earlyData bool) (time.Duration, *wire.TransportParameters, error) { - r := bytes.NewReader(data) - ver, err := quicvarint.Read(r) +func decodeDataFromSessionState(b []byte, earlyData bool) (time.Duration, *wire.TransportParameters, error) { + ver, l, err := quicvarint.Parse(b) if err != nil { return 0, nil, err } + b = b[l:] if ver != clientSessionStateRevision { return 0, nil, fmt.Errorf("mismatching version. Got %d, expected %d", ver, clientSessionStateRevision) } - rttEncoded, err := quicvarint.Read(r) + rttEncoded, l, err := quicvarint.Parse(b) if err != nil { return 0, nil, err } + b = b[l:] rtt := time.Duration(rttEncoded) * time.Microsecond if !earlyData { return rtt, nil, nil } var tp wire.TransportParameters - if err := tp.UnmarshalFromSessionTicket(r); err != nil { + if err := tp.UnmarshalFromSessionTicket(b); err != nil { return 0, nil, err } return rtt, &tp, nil @@ -376,9 +350,7 @@ func (h *cryptoSetup) getDataForSessionTicket() []byte { // Due to limitations in crypto/tls, it's only possible to generate a single session ticket per connection. // It is only valid for the server. func (h *cryptoSetup) GetSessionTicket() ([]byte, error) { - if err := h.conn.SendSessionTicket(tls.QUICSessionTicketOptions{ - EarlyData: h.allow0RTT, - }); err != nil { + if err := h.conn.SendSessionTicket(tls.QUICSessionTicketOptions{EarlyData: h.allow0RTT}); err != nil { // Session tickets might be disabled by tls.Config.SessionTicketsDisabled. // We can't check h.tlsConfig here, since the actual config might have been obtained from // the GetConfigForClient callback. @@ -655,8 +627,7 @@ func (h *cryptoSetup) ConnectionState() ConnectionState { } func wrapError(err error) error { - // alert 80 is an internal error - if alertErr := tls.AlertError(0); errors.As(err, &alertErr) && alertErr != 80 { + if alertErr := tls.AlertError(0); errors.As(err, &alertErr) { return qerr.NewLocalCryptoError(uint8(alertErr), err) } return &qerr.TransportError{ErrorCode: qerr.InternalError, ErrorMessage: err.Error()} diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/interface.go b/vendor/github.com/quic-go/quic-go/internal/handshake/interface.go index fab224f9..c3a59fcd 100644 --- a/vendor/github.com/quic-go/quic-go/internal/handshake/interface.go +++ b/vendor/github.com/quic-go/quic-go/internal/handshake/interface.go @@ -1,6 +1,7 @@ package handshake import ( + "context" "crypto/tls" "errors" "io" @@ -82,6 +83,29 @@ const ( EventHandshakeComplete ) +func (k EventKind) String() string { + switch k { + case EventNoEvent: + return "EventNoEvent" + case EventWriteInitialData: + return "EventWriteInitialData" + case EventWriteHandshakeData: + return "EventWriteHandshakeData" + case EventReceivedReadKeys: + return "EventReceivedReadKeys" + case EventDiscard0RTTKeys: + return "EventDiscard0RTTKeys" + case EventReceivedTransportParameters: + return "EventReceivedTransportParameters" + case EventRestoredTransportParameters: + return "EventRestoredTransportParameters" + case EventHandshakeComplete: + return "EventHandshakeComplete" + default: + return "Unknown EventKind" + } +} + // Event is a handshake event. type Event struct { Kind EventKind @@ -91,7 +115,7 @@ type Event struct { // CryptoSetup handles the handshake and protecting / unprotecting packets type CryptoSetup interface { - StartHandshake() error + StartHandshake(context.Context) error io.Closer ChangeConnectionID(protocol.ConnectionID) GetSessionTicket() ([]byte, error) diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/session_ticket.go b/vendor/github.com/quic-go/quic-go/internal/handshake/session_ticket.go index 9481af56..b67f0101 100644 --- a/vendor/github.com/quic-go/quic-go/internal/handshake/session_ticket.go +++ b/vendor/github.com/quic-go/quic-go/internal/handshake/session_ticket.go @@ -1,7 +1,6 @@ package handshake import ( - "bytes" "errors" "fmt" "time" @@ -28,25 +27,26 @@ func (t *sessionTicket) Marshal() []byte { } func (t *sessionTicket) Unmarshal(b []byte, using0RTT bool) error { - r := bytes.NewReader(b) - rev, err := quicvarint.Read(r) + rev, l, err := quicvarint.Parse(b) if err != nil { return errors.New("failed to read session ticket revision") } + b = b[l:] if rev != sessionTicketRevision { return fmt.Errorf("unknown session ticket revision: %d", rev) } - rtt, err := quicvarint.Read(r) + rtt, l, err := quicvarint.Parse(b) if err != nil { return errors.New("failed to read RTT") } + b = b[l:] if using0RTT { var tp wire.TransportParameters - if err := tp.UnmarshalFromSessionTicket(r); err != nil { + if err := tp.UnmarshalFromSessionTicket(b); err != nil { return fmt.Errorf("unmarshaling transport parameters from session ticket failed: %s", err.Error()) } t.Parameters = &tp - } else if r.Len() > 0 { + } else if len(b) > 0 { return fmt.Errorf("the session ticket has more bytes than expected") } t.RTT = time.Duration(rtt) * time.Microsecond diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/token_generator.go b/vendor/github.com/quic-go/quic-go/internal/handshake/token_generator.go index 2d91e6b2..84e58cfc 100644 --- a/vendor/github.com/quic-go/quic-go/internal/handshake/token_generator.go +++ b/vendor/github.com/quic-go/quic-go/internal/handshake/token_generator.go @@ -46,7 +46,7 @@ type TokenGenerator struct { // NewTokenGenerator initializes a new TokenGenerator func NewTokenGenerator(key TokenProtectorKey) *TokenGenerator { - return &TokenGenerator{tokenProtector: newTokenProtector(key)} + return &TokenGenerator{tokenProtector: *newTokenProtector(key)} } // NewRetryToken generates a new token for a Retry for a given source address diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/token_protector.go b/vendor/github.com/quic-go/quic-go/internal/handshake/token_protector.go index f3a99e41..15779189 100644 --- a/vendor/github.com/quic-go/quic-go/internal/handshake/token_protector.go +++ b/vendor/github.com/quic-go/quic-go/internal/handshake/token_protector.go @@ -14,28 +14,20 @@ import ( // TokenProtectorKey is the key used to encrypt both Retry and session resumption tokens. type TokenProtectorKey [32]byte -// TokenProtector is used to create and verify a token -type tokenProtector interface { - // NewToken creates a new token - NewToken([]byte) ([]byte, error) - // DecodeToken decodes a token - DecodeToken([]byte) ([]byte, error) -} - const tokenNonceSize = 32 // tokenProtector is used to create and verify a token -type tokenProtectorImpl struct { +type tokenProtector struct { key TokenProtectorKey } // newTokenProtector creates a source for source address tokens -func newTokenProtector(key TokenProtectorKey) tokenProtector { - return &tokenProtectorImpl{key: key} +func newTokenProtector(key TokenProtectorKey) *tokenProtector { + return &tokenProtector{key: key} } // NewToken encodes data into a new token. -func (s *tokenProtectorImpl) NewToken(data []byte) ([]byte, error) { +func (s *tokenProtector) NewToken(data []byte) ([]byte, error) { var nonce [tokenNonceSize]byte if _, err := rand.Read(nonce[:]); err != nil { return nil, err @@ -48,7 +40,7 @@ func (s *tokenProtectorImpl) NewToken(data []byte) ([]byte, error) { } // DecodeToken decodes a token. -func (s *tokenProtectorImpl) DecodeToken(p []byte) ([]byte, error) { +func (s *tokenProtector) DecodeToken(p []byte) ([]byte, error) { if len(p) < tokenNonceSize { return nil, fmt.Errorf("token too short: %d", len(p)) } @@ -60,7 +52,7 @@ func (s *tokenProtectorImpl) DecodeToken(p []byte) ([]byte, error) { return aead.Open(nil, aeadNonce, p[tokenNonceSize:], nil) } -func (s *tokenProtectorImpl) createAEAD(nonce []byte) (cipher.AEAD, []byte, error) { +func (s *tokenProtector) createAEAD(nonce []byte) (cipher.AEAD, []byte, error) { h := hkdf.New(sha256.New, s.key[:], nonce, []byte("quic-go token source")) key := make([]byte, 32) // use a 32 byte key, in order to select AES-256 if _, err := io.ReadFull(h, key); err != nil { diff --git a/vendor/github.com/quic-go/quic-go/internal/logutils/frame.go b/vendor/github.com/quic-go/quic-go/internal/logutils/frame.go deleted file mode 100644 index a6032fc2..00000000 --- a/vendor/github.com/quic-go/quic-go/internal/logutils/frame.go +++ /dev/null @@ -1,50 +0,0 @@ -package logutils - -import ( - "github.com/quic-go/quic-go/internal/protocol" - "github.com/quic-go/quic-go/internal/wire" - "github.com/quic-go/quic-go/logging" -) - -// ConvertFrame converts a wire.Frame into a logging.Frame. -// This makes it possible for external packages to access the frames. -// Furthermore, it removes the data slices from CRYPTO and STREAM frames. -func ConvertFrame(frame wire.Frame) logging.Frame { - switch f := frame.(type) { - case *wire.AckFrame: - // We use a pool for ACK frames. - // Implementations of the tracer interface may hold on to frames, so we need to make a copy here. - return ConvertAckFrame(f) - case *wire.CryptoFrame: - return &logging.CryptoFrame{ - Offset: f.Offset, - Length: protocol.ByteCount(len(f.Data)), - } - case *wire.StreamFrame: - return &logging.StreamFrame{ - StreamID: f.StreamID, - Offset: f.Offset, - Length: f.DataLen(), - Fin: f.Fin, - } - case *wire.DatagramFrame: - return &logging.DatagramFrame{ - Length: logging.ByteCount(len(f.Data)), - } - default: - return logging.Frame(frame) - } -} - -func ConvertAckFrame(f *wire.AckFrame) *logging.AckFrame { - ranges := make([]wire.AckRange, 0, len(f.AckRanges)) - ranges = append(ranges, f.AckRanges...) - ack := &logging.AckFrame{ - AckRanges: ranges, - DelayTime: f.DelayTime, - ECNCE: f.ECNCE, - ECT0: f.ECT0, - ECT1: f.ECT1, - } - return ack -} diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/packet_number.go b/vendor/github.com/quic-go/quic-go/internal/protocol/packet_number.go index bd340161..9422db92 100644 --- a/vendor/github.com/quic-go/quic-go/internal/protocol/packet_number.go +++ b/vendor/github.com/quic-go/quic-go/internal/protocol/packet_number.go @@ -21,58 +21,36 @@ const ( PacketNumberLen4 PacketNumberLen = 4 ) -// DecodePacketNumber calculates the packet number based on the received packet number, its length and the last seen packet number -func DecodePacketNumber( - packetNumberLength PacketNumberLen, - lastPacketNumber PacketNumber, - wirePacketNumber PacketNumber, -) PacketNumber { - var epochDelta PacketNumber - switch packetNumberLength { - case PacketNumberLen1: - epochDelta = PacketNumber(1) << 8 - case PacketNumberLen2: - epochDelta = PacketNumber(1) << 16 - case PacketNumberLen3: - epochDelta = PacketNumber(1) << 24 - case PacketNumberLen4: - epochDelta = PacketNumber(1) << 32 +// DecodePacketNumber calculates the packet number based its length and the last seen packet number +// This function is taken from https://www.rfc-editor.org/rfc/rfc9000.html#section-a.3. +func DecodePacketNumber(length PacketNumberLen, largest PacketNumber, truncated PacketNumber) PacketNumber { + expected := largest + 1 + win := PacketNumber(1 << (length * 8)) + hwin := win / 2 + mask := win - 1 + candidate := (expected & ^mask) | truncated + if candidate <= expected-hwin && candidate < 1<<62-win { + return candidate + win } - epoch := lastPacketNumber & ^(epochDelta - 1) - var prevEpochBegin PacketNumber - if epoch > epochDelta { - prevEpochBegin = epoch - epochDelta + if candidate > expected+hwin && candidate >= win { + return candidate - win } - nextEpochBegin := epoch + epochDelta - return closestTo( - lastPacketNumber+1, - epoch+wirePacketNumber, - closestTo(lastPacketNumber+1, prevEpochBegin+wirePacketNumber, nextEpochBegin+wirePacketNumber), - ) + return candidate } -func closestTo(target, a, b PacketNumber) PacketNumber { - if delta(target, a) < delta(target, b) { - return a - } - return b -} - -func delta(a, b PacketNumber) PacketNumber { - if a < b { - return b - a - } - return a - b -} - -// GetPacketNumberLengthForHeader gets the length of the packet number for the public header +// PacketNumberLengthForHeader gets the length of the packet number for the public header // it never chooses a PacketNumberLen of 1 byte, since this is too short under certain circumstances -func GetPacketNumberLengthForHeader(packetNumber, leastUnacked PacketNumber) PacketNumberLen { - diff := uint64(packetNumber - leastUnacked) - if diff < (1 << (16 - 1)) { +func PacketNumberLengthForHeader(pn, largestAcked PacketNumber) PacketNumberLen { + var numUnacked PacketNumber + if largestAcked == InvalidPacketNumber { + numUnacked = pn + 1 + } else { + numUnacked = pn - largestAcked + } + if numUnacked < 1<<(16-1) { return PacketNumberLen2 } - if diff < (1 << (24 - 1)) { + if numUnacked < 1<<(24-1) { return PacketNumberLen3 } return PacketNumberLen4 diff --git a/vendor/github.com/quic-go/quic-go/internal/protocol/params.go b/vendor/github.com/quic-go/quic-go/internal/protocol/params.go index 487cbc06..7c4d8d4d 100644 --- a/vendor/github.com/quic-go/quic-go/internal/protocol/params.go +++ b/vendor/github.com/quic-go/quic-go/internal/protocol/params.go @@ -3,16 +3,13 @@ package protocol import "time" // DesiredReceiveBufferSize is the kernel UDP receive buffer size that we'd like to use. -const DesiredReceiveBufferSize = (1 << 20) * 2 // 2 MB +const DesiredReceiveBufferSize = (1 << 20) * 7 // 7 MB // DesiredSendBufferSize is the kernel UDP send buffer size that we'd like to use. -const DesiredSendBufferSize = (1 << 20) * 2 // 2 MB +const DesiredSendBufferSize = (1 << 20) * 7 // 7 MB -// InitialPacketSizeIPv4 is the maximum packet size that we use for sending IPv4 packets. -const InitialPacketSizeIPv4 = 1252 - -// InitialPacketSizeIPv6 is the maximum packet size that we use for sending IPv6 packets. -const InitialPacketSizeIPv6 = 1232 +// InitialPacketSize is the initial (before Path MTU discovery) maximum packet size used. +const InitialPacketSize = 1280 // MaxCongestionWindowPackets is the maximum congestion window in packet. const MaxCongestionWindowPackets = 10000 diff --git a/vendor/github.com/quic-go/quic-go/internal/handshake/conn.go b/vendor/github.com/quic-go/quic-go/internal/qtls/conn.go similarity index 97% rename from vendor/github.com/quic-go/quic-go/internal/handshake/conn.go rename to vendor/github.com/quic-go/quic-go/internal/qtls/conn.go index 54af823b..6660ac66 100644 --- a/vendor/github.com/quic-go/quic-go/internal/handshake/conn.go +++ b/vendor/github.com/quic-go/quic-go/internal/qtls/conn.go @@ -1,4 +1,4 @@ -package handshake +package qtls import ( "net" diff --git a/vendor/github.com/quic-go/quic-go/internal/qtls/qtls.go b/vendor/github.com/quic-go/quic-go/internal/qtls/qtls.go index ebcd9d4d..cdfe82a2 100644 --- a/vendor/github.com/quic-go/quic-go/internal/qtls/qtls.go +++ b/vendor/github.com/quic-go/quic-go/internal/qtls/qtls.go @@ -4,20 +4,23 @@ import ( "bytes" "crypto/tls" "fmt" + "net" "github.com/quic-go/quic-go/internal/protocol" ) -func SetupConfigForServer(qconf *tls.QUICConfig, _ bool, getData func() []byte, handleSessionTicket func([]byte, bool) bool) { - conf := qconf.TLSConfig - +func SetupConfigForServer( + conf *tls.Config, + localAddr, remoteAddr net.Addr, + getData func() []byte, + handleSessionTicket func([]byte, bool) bool, +) *tls.Config { // Workaround for https://github.com/golang/go/issues/60506. // This initializes the session tickets _before_ cloning the config. _, _ = conf.DecryptTicket(nil, tls.ConnectionState{}) conf = conf.Clone() conf.MinVersion = tls.VersionTLS13 - qconf.TLSConfig = conf // add callbacks to save transport parameters into the session ticket origWrapSession := conf.WrapSession @@ -58,6 +61,29 @@ func SetupConfigForServer(qconf *tls.QUICConfig, _ bool, getData func() []byte, return state, nil } + // The tls.Config contains two callbacks that pass in a tls.ClientHelloInfo. + // Since crypto/tls doesn't do it, we need to make sure to set the Conn field with a fake net.Conn + // that allows the caller to get the local and the remote address. + if conf.GetConfigForClient != nil { + gcfc := conf.GetConfigForClient + conf.GetConfigForClient = func(info *tls.ClientHelloInfo) (*tls.Config, error) { + info.Conn = &conn{localAddr: localAddr, remoteAddr: remoteAddr} + c, err := gcfc(info) + if c != nil { + // We're returning a tls.Config here, so we need to apply this recursively. + c = SetupConfigForServer(c, localAddr, remoteAddr, getData, handleSessionTicket) + } + return c, err + } + } + if conf.GetCertificate != nil { + gc := conf.GetCertificate + conf.GetCertificate = func(info *tls.ClientHelloInfo) (*tls.Certificate, error) { + info.Conn = &conn{localAddr: localAddr, remoteAddr: remoteAddr} + return gc(info) + } + } + return conf } func SetupConfigForClient( diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/byteorder.go b/vendor/github.com/quic-go/quic-go/internal/utils/byteorder.go deleted file mode 100644 index a9b715e2..00000000 --- a/vendor/github.com/quic-go/quic-go/internal/utils/byteorder.go +++ /dev/null @@ -1,21 +0,0 @@ -package utils - -import ( - "bytes" - "io" -) - -// A ByteOrder specifies how to convert byte sequences into 16-, 32-, or 64-bit unsigned integers. -type ByteOrder interface { - Uint32([]byte) uint32 - Uint24([]byte) uint32 - Uint16([]byte) uint16 - - ReadUint32(io.ByteReader) (uint32, error) - ReadUint24(io.ByteReader) (uint32, error) - ReadUint16(io.ByteReader) (uint16, error) - - WriteUint32(*bytes.Buffer, uint32) - WriteUint24(*bytes.Buffer, uint32) - WriteUint16(*bytes.Buffer, uint16) -} diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/byteorder_big_endian.go b/vendor/github.com/quic-go/quic-go/internal/utils/byteorder_big_endian.go deleted file mode 100644 index 834a711b..00000000 --- a/vendor/github.com/quic-go/quic-go/internal/utils/byteorder_big_endian.go +++ /dev/null @@ -1,103 +0,0 @@ -package utils - -import ( - "bytes" - "encoding/binary" - "io" -) - -// BigEndian is the big-endian implementation of ByteOrder. -var BigEndian ByteOrder = bigEndian{} - -type bigEndian struct{} - -var _ ByteOrder = &bigEndian{} - -// ReadUintN reads N bytes -func (bigEndian) ReadUintN(b io.ByteReader, length uint8) (uint64, error) { - var res uint64 - for i := uint8(0); i < length; i++ { - bt, err := b.ReadByte() - if err != nil { - return 0, err - } - res ^= uint64(bt) << ((length - 1 - i) * 8) - } - return res, nil -} - -// ReadUint32 reads a uint32 -func (bigEndian) ReadUint32(b io.ByteReader) (uint32, error) { - var b1, b2, b3, b4 uint8 - var err error - if b4, err = b.ReadByte(); err != nil { - return 0, err - } - if b3, err = b.ReadByte(); err != nil { - return 0, err - } - if b2, err = b.ReadByte(); err != nil { - return 0, err - } - if b1, err = b.ReadByte(); err != nil { - return 0, err - } - return uint32(b1) + uint32(b2)<<8 + uint32(b3)<<16 + uint32(b4)<<24, nil -} - -// ReadUint24 reads a uint24 -func (bigEndian) ReadUint24(b io.ByteReader) (uint32, error) { - var b1, b2, b3 uint8 - var err error - if b3, err = b.ReadByte(); err != nil { - return 0, err - } - if b2, err = b.ReadByte(); err != nil { - return 0, err - } - if b1, err = b.ReadByte(); err != nil { - return 0, err - } - return uint32(b1) + uint32(b2)<<8 + uint32(b3)<<16, nil -} - -// ReadUint16 reads a uint16 -func (bigEndian) ReadUint16(b io.ByteReader) (uint16, error) { - var b1, b2 uint8 - var err error - if b2, err = b.ReadByte(); err != nil { - return 0, err - } - if b1, err = b.ReadByte(); err != nil { - return 0, err - } - return uint16(b1) + uint16(b2)<<8, nil -} - -func (bigEndian) Uint32(b []byte) uint32 { - return binary.BigEndian.Uint32(b) -} - -func (bigEndian) Uint24(b []byte) uint32 { - _ = b[2] // bounds check hint to compiler; see golang.org/issue/14808 - return uint32(b[2]) | uint32(b[1])<<8 | uint32(b[0])<<16 -} - -func (bigEndian) Uint16(b []byte) uint16 { - return binary.BigEndian.Uint16(b) -} - -// WriteUint32 writes a uint32 -func (bigEndian) WriteUint32(b *bytes.Buffer, i uint32) { - b.Write([]byte{uint8(i >> 24), uint8(i >> 16), uint8(i >> 8), uint8(i)}) -} - -// WriteUint24 writes a uint24 -func (bigEndian) WriteUint24(b *bytes.Buffer, i uint32) { - b.Write([]byte{uint8(i >> 16), uint8(i >> 8), uint8(i)}) -} - -// WriteUint16 writes a uint16 -func (bigEndian) WriteUint16(b *bytes.Buffer, i uint16) { - b.Write([]byte{uint8(i >> 8), uint8(i)}) -} diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/ip.go b/vendor/github.com/quic-go/quic-go/internal/utils/ip.go deleted file mode 100644 index 7ac7ffec..00000000 --- a/vendor/github.com/quic-go/quic-go/internal/utils/ip.go +++ /dev/null @@ -1,10 +0,0 @@ -package utils - -import "net" - -func IsIPv4(ip net.IP) bool { - // If ip is not an IPv4 address, To4 returns nil. - // Note that there might be some corner cases, where this is not correct. - // See https://stackoverflow.com/questions/22751035/golang-distinguish-ipv4-ipv6. - return ip.To4() != nil -} diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/minmax.go b/vendor/github.com/quic-go/quic-go/internal/utils/minmax.go deleted file mode 100644 index 03a9c9a8..00000000 --- a/vendor/github.com/quic-go/quic-go/internal/utils/minmax.go +++ /dev/null @@ -1,36 +0,0 @@ -package utils - -import ( - "math" - "time" -) - -// InfDuration is a duration of infinite length -const InfDuration = time.Duration(math.MaxInt64) - -// MinNonZeroDuration return the minimum duration that's not zero. -func MinNonZeroDuration(a, b time.Duration) time.Duration { - if a == 0 { - return b - } - if b == 0 { - return a - } - return min(a, b) -} - -// MinTime returns the earlier time -func MinTime(a, b time.Time) time.Time { - if a.After(b) { - return b - } - return a -} - -// MaxTime returns the later time -func MaxTime(a, b time.Time) time.Time { - if a.After(b) { - return a - } - return b -} diff --git a/vendor/github.com/quic-go/quic-go/internal/utils/rtt_stats.go b/vendor/github.com/quic-go/quic-go/internal/utils/rtt_stats.go index 463b9542..dcfac67d 100644 --- a/vendor/github.com/quic-go/quic-go/internal/utils/rtt_stats.go +++ b/vendor/github.com/quic-go/quic-go/internal/utils/rtt_stats.go @@ -27,11 +27,6 @@ type RTTStats struct { maxAckDelay time.Duration } -// NewRTTStats makes a properly initialized RTTStats object -func NewRTTStats() *RTTStats { - return &RTTStats{} -} - // MinRTT Returns the minRTT for the entire connection. // May return Zero if no valid updates have occurred. func (r *RTTStats) MinRTT() time.Duration { return r.minRTT } @@ -64,7 +59,7 @@ func (r *RTTStats) PTO(includeMaxAckDelay bool) time.Duration { // UpdateRTT updates the RTT based on a new sample. func (r *RTTStats) UpdateRTT(sendDelta, ackDelay time.Duration, now time.Time) { - if sendDelta == InfDuration || sendDelta <= 0 { + if sendDelta <= 0 { return } @@ -113,19 +108,3 @@ func (r *RTTStats) SetInitialRTT(t time.Duration) { r.smoothedRTT = t r.latestRTT = t } - -// OnConnectionMigration is called when connection migrates and rtt measurement needs to be reset. -func (r *RTTStats) OnConnectionMigration() { - r.latestRTT = 0 - r.minRTT = 0 - r.smoothedRTT = 0 - r.meanDeviation = 0 -} - -// ExpireSmoothedMetrics causes the smoothed_rtt to be increased to the latest_rtt if the latest_rtt -// is larger. The mean deviation is increased to the most recent deviation if -// it's larger. -func (r *RTTStats) ExpireSmoothedMetrics() { - r.meanDeviation = max(r.meanDeviation, (r.smoothedRTT - r.latestRTT).Abs()) - r.smoothedRTT = max(r.smoothedRTT, r.latestRTT) -} diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame.go index a0f3feb0..8befef4f 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/ack_frame.go @@ -1,13 +1,12 @@ package wire import ( - "bytes" "errors" + "math" "sort" "time" "github.com/quic-go/quic-go/internal/protocol" - "github.com/quic-go/quic-go/internal/utils" "github.com/quic-go/quic-go/quicvarint" ) @@ -22,91 +21,101 @@ type AckFrame struct { } // parseAckFrame reads an ACK frame -func parseAckFrame(frame *AckFrame, r *bytes.Reader, typ uint64, ackDelayExponent uint8, _ protocol.Version) error { +func parseAckFrame(frame *AckFrame, b []byte, typ uint64, ackDelayExponent uint8, _ protocol.Version) (int, error) { + startLen := len(b) ecn := typ == ackECNFrameType - la, err := quicvarint.Read(r) + la, l, err := quicvarint.Parse(b) if err != nil { - return err + return 0, replaceUnexpectedEOF(err) } + b = b[l:] largestAcked := protocol.PacketNumber(la) - delay, err := quicvarint.Read(r) + delay, l, err := quicvarint.Parse(b) if err != nil { - return err + return 0, replaceUnexpectedEOF(err) } + b = b[l:] delayTime := time.Duration(delay*1< largestAcked { - return errors.New("invalid first ACK range") + return 0, errors.New("invalid first ACK range") } smallest := largestAcked - ackBlock frame.AckRanges = append(frame.AckRanges, AckRange{Smallest: smallest, Largest: largestAcked}) // read all the other ACK ranges for i := uint64(0); i < numBlocks; i++ { - g, err := quicvarint.Read(r) + g, l, err := quicvarint.Parse(b) if err != nil { - return err + return 0, replaceUnexpectedEOF(err) } + b = b[l:] gap := protocol.PacketNumber(g) if smallest < gap+2 { - return errInvalidAckRanges + return 0, errInvalidAckRanges } largest := smallest - gap - 2 - ab, err := quicvarint.Read(r) + ab, l, err := quicvarint.Parse(b) if err != nil { - return err + return 0, replaceUnexpectedEOF(err) } + b = b[l:] ackBlock := protocol.PacketNumber(ab) if ackBlock > largest { - return errInvalidAckRanges + return 0, errInvalidAckRanges } smallest = largest - ackBlock frame.AckRanges = append(frame.AckRanges, AckRange{Smallest: smallest, Largest: largest}) } if !frame.validateAckRanges() { - return errInvalidAckRanges + return 0, errInvalidAckRanges } if ecn { - ect0, err := quicvarint.Read(r) + ect0, l, err := quicvarint.Parse(b) if err != nil { - return err + return 0, replaceUnexpectedEOF(err) } + b = b[l:] frame.ECT0 = ect0 - ect1, err := quicvarint.Read(r) + ect1, l, err := quicvarint.Parse(b) if err != nil { - return err + return 0, replaceUnexpectedEOF(err) } + b = b[l:] frame.ECT1 = ect1 - ecnce, err := quicvarint.Read(r) + ecnce, l, err := quicvarint.Parse(b) if err != nil { - return err + return 0, replaceUnexpectedEOF(err) } + b = b[l:] frame.ECNCE = ecnce } - return nil + return startLen - len(b), nil } // Append appends an ACK frame. @@ -163,7 +172,7 @@ func (f *AckFrame) Length(_ protocol.Version) protocol.ByteCount { length += quicvarint.Len(f.ECT1) length += quicvarint.Len(f.ECNCE) } - return length + return protocol.ByteCount(length) } // gets the number of ACK ranges that can be encoded @@ -174,7 +183,7 @@ func (f *AckFrame) numEncodableAckRanges() int { for i := 1; i < len(f.AckRanges); i++ { gap, len := f.encodeAckRange(i) rangeLen := quicvarint.Len(gap) + quicvarint.Len(len) - if length+rangeLen > protocol.MaxAckFrameSize { + if protocol.ByteCount(length+rangeLen) > protocol.MaxAckFrameSize { // Writing range i would exceed the MaxAckFrameSize. // So encode one range less than that. return i - 1 diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/connection_close_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/connection_close_frame.go index df362447..be11a1b2 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/connection_close_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/connection_close_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "io" "github.com/quic-go/quic-go/internal/protocol" @@ -16,47 +15,45 @@ type ConnectionCloseFrame struct { ReasonPhrase string } -func parseConnectionCloseFrame(r *bytes.Reader, typ uint64, _ protocol.Version) (*ConnectionCloseFrame, error) { +func parseConnectionCloseFrame(b []byte, typ uint64, _ protocol.Version) (*ConnectionCloseFrame, int, error) { + startLen := len(b) f := &ConnectionCloseFrame{IsApplicationError: typ == applicationCloseFrameType} - ec, err := quicvarint.Read(r) + ec, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] f.ErrorCode = ec // read the Frame Type, if this is not an application error if !f.IsApplicationError { - ft, err := quicvarint.Read(r) + ft, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] f.FrameType = ft } var reasonPhraseLen uint64 - reasonPhraseLen, err = quicvarint.Read(r) + reasonPhraseLen, l, err = quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - // shortcut to prevent the unnecessary allocation of dataLen bytes - // if the dataLen is larger than the remaining length of the packet - // reading the whole reason phrase would result in EOF when attempting to READ - if int(reasonPhraseLen) > r.Len() { - return nil, io.EOF + b = b[l:] + if int(reasonPhraseLen) > len(b) { + return nil, 0, io.EOF } reasonPhrase := make([]byte, reasonPhraseLen) - if _, err := io.ReadFull(r, reasonPhrase); err != nil { - // this should never happen, since we already checked the reasonPhraseLen earlier - return nil, err - } + copy(reasonPhrase, b) f.ReasonPhrase = string(reasonPhrase) - return f, nil + return f, startLen - len(b) + int(reasonPhraseLen), nil } // Length of a written frame func (f *ConnectionCloseFrame) Length(protocol.Version) protocol.ByteCount { - length := 1 + quicvarint.Len(f.ErrorCode) + quicvarint.Len(uint64(len(f.ReasonPhrase))) + protocol.ByteCount(len(f.ReasonPhrase)) + length := 1 + protocol.ByteCount(quicvarint.Len(f.ErrorCode)+quicvarint.Len(uint64(len(f.ReasonPhrase)))) + protocol.ByteCount(len(f.ReasonPhrase)) if !f.IsApplicationError { - length += quicvarint.Len(f.FrameType) // for the frame type + length += protocol.ByteCount(quicvarint.Len(f.FrameType)) // for the frame type } return length } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/crypto_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/crypto_frame.go index d4214639..0aa7fe7b 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/crypto_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/crypto_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "io" "github.com/quic-go/quic-go/internal/protocol" @@ -14,28 +13,28 @@ type CryptoFrame struct { Data []byte } -func parseCryptoFrame(r *bytes.Reader, _ protocol.Version) (*CryptoFrame, error) { +func parseCryptoFrame(b []byte, _ protocol.Version) (*CryptoFrame, int, error) { + startLen := len(b) frame := &CryptoFrame{} - offset, err := quicvarint.Read(r) + offset, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] frame.Offset = protocol.ByteCount(offset) - dataLen, err := quicvarint.Read(r) + dataLen, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - if dataLen > uint64(r.Len()) { - return nil, io.EOF + b = b[l:] + if dataLen > uint64(len(b)) { + return nil, 0, io.EOF } if dataLen != 0 { frame.Data = make([]byte, dataLen) - if _, err := io.ReadFull(r, frame.Data); err != nil { - // this should never happen, since we already checked the dataLen earlier - return nil, err - } + copy(frame.Data, b) } - return frame, nil + return frame, startLen - len(b) + int(dataLen), nil } func (f *CryptoFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -48,14 +47,14 @@ func (f *CryptoFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { // Length of a written frame func (f *CryptoFrame) Length(_ protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.Offset)) + quicvarint.Len(uint64(len(f.Data))) + protocol.ByteCount(len(f.Data)) + return protocol.ByteCount(1 + quicvarint.Len(uint64(f.Offset)) + quicvarint.Len(uint64(len(f.Data))) + len(f.Data)) } // MaxDataLen returns the maximum data length func (f *CryptoFrame) MaxDataLen(maxSize protocol.ByteCount) protocol.ByteCount { // pretend that the data size will be 1 bytes // if it turns out that varint encoding the length will consume 2 bytes, we need to adjust the data length afterwards - headerLen := 1 + quicvarint.Len(uint64(f.Offset)) + 1 + headerLen := protocol.ByteCount(1 + quicvarint.Len(uint64(f.Offset)) + 1) if headerLen > maxSize { return 0 } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/data_blocked_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/data_blocked_frame.go index 8fe2acb5..c97d4c62 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/data_blocked_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/data_blocked_frame.go @@ -1,8 +1,6 @@ package wire import ( - "bytes" - "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/quicvarint" ) @@ -12,12 +10,12 @@ type DataBlockedFrame struct { MaximumData protocol.ByteCount } -func parseDataBlockedFrame(r *bytes.Reader, _ protocol.Version) (*DataBlockedFrame, error) { - offset, err := quicvarint.Read(r) +func parseDataBlockedFrame(b []byte, _ protocol.Version) (*DataBlockedFrame, int, error) { + offset, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - return &DataBlockedFrame{MaximumData: protocol.ByteCount(offset)}, nil + return &DataBlockedFrame{MaximumData: protocol.ByteCount(offset)}, l, nil } func (f *DataBlockedFrame) Append(b []byte, version protocol.Version) ([]byte, error) { @@ -27,5 +25,5 @@ func (f *DataBlockedFrame) Append(b []byte, version protocol.Version) ([]byte, e // Length of a written frame func (f *DataBlockedFrame) Length(version protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.MaximumData)) + return 1 + protocol.ByteCount(quicvarint.Len(uint64(f.MaximumData))) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/datagram_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/datagram_frame.go index 8e406f1a..071fda9a 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/datagram_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/datagram_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "io" "github.com/quic-go/quic-go/internal/protocol" @@ -20,29 +19,29 @@ type DatagramFrame struct { Data []byte } -func parseDatagramFrame(r *bytes.Reader, typ uint64, _ protocol.Version) (*DatagramFrame, error) { +func parseDatagramFrame(b []byte, typ uint64, _ protocol.Version) (*DatagramFrame, int, error) { + startLen := len(b) f := &DatagramFrame{} f.DataLenPresent = typ&0x1 > 0 var length uint64 if f.DataLenPresent { var err error - len, err := quicvarint.Read(r) + var l int + length, l, err = quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - if len > uint64(r.Len()) { - return nil, io.EOF + b = b[l:] + if length > uint64(len(b)) { + return nil, 0, io.EOF } - length = len } else { - length = uint64(r.Len()) + length = uint64(len(b)) } f.Data = make([]byte, length) - if _, err := io.ReadFull(r, f.Data); err != nil { - return nil, err - } - return f, nil + copy(f.Data, b) + return f, startLen - len(b) + int(length), nil } func (f *DatagramFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -80,7 +79,7 @@ func (f *DatagramFrame) MaxDataLen(maxSize protocol.ByteCount, version protocol. func (f *DatagramFrame) Length(_ protocol.Version) protocol.ByteCount { length := 1 + protocol.ByteCount(len(f.Data)) if f.DataLenPresent { - length += quicvarint.Len(uint64(len(f.Data))) + length += protocol.ByteCount(quicvarint.Len(uint64(len(f.Data)))) } return length } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/extended_header.go b/vendor/github.com/quic-go/quic-go/internal/wire/extended_header.go index e04d91b7..1c6ad991 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/extended_header.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/extended_header.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "encoding/binary" "errors" "fmt" @@ -32,66 +31,23 @@ type ExtendedHeader struct { parsedLen protocol.ByteCount } -func (h *ExtendedHeader) parse(b *bytes.Reader, v protocol.Version) (bool /* reserved bits valid */, error) { - startLen := b.Len() +func (h *ExtendedHeader) parse(data []byte) (bool /* reserved bits valid */, error) { // read the (now unencrypted) first byte - var err error - h.typeByte, err = b.ReadByte() - if err != nil { - return false, err - } - if _, err := b.Seek(int64(h.Header.ParsedLen())-1, io.SeekCurrent); err != nil { - return false, err - } - reservedBitsValid, err := h.parseLongHeader(b, v) - if err != nil { - return false, err + h.typeByte = data[0] + h.PacketNumberLen = protocol.PacketNumberLen(h.typeByte&0x3) + 1 + if protocol.ByteCount(len(data)) < h.Header.ParsedLen()+protocol.ByteCount(h.PacketNumberLen) { + return false, io.EOF } - h.parsedLen = protocol.ByteCount(startLen - b.Len()) - return reservedBitsValid, err -} -func (h *ExtendedHeader) parseLongHeader(b *bytes.Reader, _ protocol.Version) (bool /* reserved bits valid */, error) { - if err := h.readPacketNumber(b); err != nil { - return false, err - } - if h.typeByte&0xc != 0 { - return false, nil + pn, err := readPacketNumber(data[h.Header.ParsedLen():], h.PacketNumberLen) + if err != nil { + return true, nil } - return true, nil -} + h.PacketNumber = pn + reservedBitsValid := h.typeByte&0xc == 0 -func (h *ExtendedHeader) readPacketNumber(b *bytes.Reader) error { - h.PacketNumberLen = protocol.PacketNumberLen(h.typeByte&0x3) + 1 - switch h.PacketNumberLen { - case protocol.PacketNumberLen1: - n, err := b.ReadByte() - if err != nil { - return err - } - h.PacketNumber = protocol.PacketNumber(n) - case protocol.PacketNumberLen2: - n, err := utils.BigEndian.ReadUint16(b) - if err != nil { - return err - } - h.PacketNumber = protocol.PacketNumber(n) - case protocol.PacketNumberLen3: - n, err := utils.BigEndian.ReadUint24(b) - if err != nil { - return err - } - h.PacketNumber = protocol.PacketNumber(n) - case protocol.PacketNumberLen4: - n, err := utils.BigEndian.ReadUint32(b) - if err != nil { - return err - } - h.PacketNumber = protocol.PacketNumber(n) - default: - return fmt.Errorf("invalid packet number length: %d", h.PacketNumberLen) - } - return nil + h.parsedLen = h.Header.ParsedLen() + protocol.ByteCount(h.PacketNumberLen) + return reservedBitsValid, err } // Append appends the Header. @@ -165,7 +121,7 @@ func (h *ExtendedHeader) ParsedLen() protocol.ByteCount { func (h *ExtendedHeader) GetLength(_ protocol.Version) protocol.ByteCount { length := 1 /* type byte */ + 4 /* version */ + 1 /* dest conn ID len */ + protocol.ByteCount(h.DestConnectionID.Len()) + 1 /* src conn ID len */ + protocol.ByteCount(h.SrcConnectionID.Len()) + protocol.ByteCount(h.PacketNumberLen) + 2 /* length */ if h.Type == protocol.PacketTypeInitial { - length += quicvarint.Len(uint64(len(h.Token))) + protocol.ByteCount(len(h.Token)) + length += protocol.ByteCount(quicvarint.Len(uint64(len(h.Token))) + len(h.Token)) } return length } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/frame_parser.go b/vendor/github.com/quic-go/quic-go/internal/wire/frame_parser.go index cf7d4cec..59d41444 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/frame_parser.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/frame_parser.go @@ -1,9 +1,9 @@ package wire import ( - "bytes" "errors" "fmt" + "io" "reflect" "github.com/quic-go/quic-go/internal/protocol" @@ -38,8 +38,6 @@ const ( // The FrameParser parses QUIC frames, one by one. type FrameParser struct { - r bytes.Reader // cached bytes.Reader, so we don't have to repeatedly allocate them - ackDelayExponent uint8 supportsDatagrams bool @@ -51,7 +49,6 @@ type FrameParser struct { // NewFrameParser creates a new frame parser. func NewFrameParser(supportsDatagrams bool) *FrameParser { return &FrameParser{ - r: *bytes.NewReader(nil), supportsDatagrams: supportsDatagrams, ackFrame: &AckFrame{}, } @@ -60,45 +57,46 @@ func NewFrameParser(supportsDatagrams bool) *FrameParser { // ParseNext parses the next frame. // It skips PADDING frames. func (p *FrameParser) ParseNext(data []byte, encLevel protocol.EncryptionLevel, v protocol.Version) (int, Frame, error) { - startLen := len(data) - p.r.Reset(data) - frame, err := p.parseNext(&p.r, encLevel, v) - n := startLen - p.r.Len() - p.r.Reset(nil) - return n, frame, err + frame, l, err := p.parseNext(data, encLevel, v) + return l, frame, err } -func (p *FrameParser) parseNext(r *bytes.Reader, encLevel protocol.EncryptionLevel, v protocol.Version) (Frame, error) { - for r.Len() != 0 { - typ, err := quicvarint.Read(r) +func (p *FrameParser) parseNext(b []byte, encLevel protocol.EncryptionLevel, v protocol.Version) (Frame, int, error) { + var parsed int + for len(b) != 0 { + typ, l, err := quicvarint.Parse(b) + parsed += l if err != nil { - return nil, &qerr.TransportError{ + return nil, parsed, &qerr.TransportError{ ErrorCode: qerr.FrameEncodingError, ErrorMessage: err.Error(), } } + b = b[l:] if typ == 0x0 { // skip PADDING frames continue } - f, err := p.parseFrame(r, typ, encLevel, v) + f, l, err := p.parseFrame(b, typ, encLevel, v) + parsed += l if err != nil { - return nil, &qerr.TransportError{ + return nil, parsed, &qerr.TransportError{ FrameType: typ, ErrorCode: qerr.FrameEncodingError, ErrorMessage: err.Error(), } } - return f, nil + return f, parsed, nil } - return nil, nil + return nil, parsed, nil } -func (p *FrameParser) parseFrame(r *bytes.Reader, typ uint64, encLevel protocol.EncryptionLevel, v protocol.Version) (Frame, error) { +func (p *FrameParser) parseFrame(b []byte, typ uint64, encLevel protocol.EncryptionLevel, v protocol.Version) (Frame, int, error) { var frame Frame var err error + var l int if typ&0xf8 == 0x8 { - frame, err = parseStreamFrame(r, typ, v) + frame, l, err = parseStreamFrame(b, typ, v) } else { switch typ { case pingFrameType: @@ -109,43 +107,43 @@ func (p *FrameParser) parseFrame(r *bytes.Reader, typ uint64, encLevel protocol. ackDelayExponent = protocol.DefaultAckDelayExponent } p.ackFrame.Reset() - err = parseAckFrame(p.ackFrame, r, typ, ackDelayExponent, v) + l, err = parseAckFrame(p.ackFrame, b, typ, ackDelayExponent, v) frame = p.ackFrame case resetStreamFrameType: - frame, err = parseResetStreamFrame(r, v) + frame, l, err = parseResetStreamFrame(b, v) case stopSendingFrameType: - frame, err = parseStopSendingFrame(r, v) + frame, l, err = parseStopSendingFrame(b, v) case cryptoFrameType: - frame, err = parseCryptoFrame(r, v) + frame, l, err = parseCryptoFrame(b, v) case newTokenFrameType: - frame, err = parseNewTokenFrame(r, v) + frame, l, err = parseNewTokenFrame(b, v) case maxDataFrameType: - frame, err = parseMaxDataFrame(r, v) + frame, l, err = parseMaxDataFrame(b, v) case maxStreamDataFrameType: - frame, err = parseMaxStreamDataFrame(r, v) + frame, l, err = parseMaxStreamDataFrame(b, v) case bidiMaxStreamsFrameType, uniMaxStreamsFrameType: - frame, err = parseMaxStreamsFrame(r, typ, v) + frame, l, err = parseMaxStreamsFrame(b, typ, v) case dataBlockedFrameType: - frame, err = parseDataBlockedFrame(r, v) + frame, l, err = parseDataBlockedFrame(b, v) case streamDataBlockedFrameType: - frame, err = parseStreamDataBlockedFrame(r, v) + frame, l, err = parseStreamDataBlockedFrame(b, v) case bidiStreamBlockedFrameType, uniStreamBlockedFrameType: - frame, err = parseStreamsBlockedFrame(r, typ, v) + frame, l, err = parseStreamsBlockedFrame(b, typ, v) case newConnectionIDFrameType: - frame, err = parseNewConnectionIDFrame(r, v) + frame, l, err = parseNewConnectionIDFrame(b, v) case retireConnectionIDFrameType: - frame, err = parseRetireConnectionIDFrame(r, v) + frame, l, err = parseRetireConnectionIDFrame(b, v) case pathChallengeFrameType: - frame, err = parsePathChallengeFrame(r, v) + frame, l, err = parsePathChallengeFrame(b, v) case pathResponseFrameType: - frame, err = parsePathResponseFrame(r, v) + frame, l, err = parsePathResponseFrame(b, v) case connectionCloseFrameType, applicationCloseFrameType: - frame, err = parseConnectionCloseFrame(r, typ, v) + frame, l, err = parseConnectionCloseFrame(b, typ, v) case handshakeDoneFrameType: frame = &HandshakeDoneFrame{} case 0x30, 0x31: if p.supportsDatagrams { - frame, err = parseDatagramFrame(r, typ, v) + frame, l, err = parseDatagramFrame(b, typ, v) break } fallthrough @@ -154,12 +152,12 @@ func (p *FrameParser) parseFrame(r *bytes.Reader, typ uint64, encLevel protocol. } } if err != nil { - return nil, err + return nil, 0, err } if !p.isAllowedAtEncLevel(frame, encLevel) { - return nil, fmt.Errorf("%s not allowed at encryption level %s", reflect.TypeOf(frame).Elem().Name(), encLevel) + return nil, l, fmt.Errorf("%s not allowed at encryption level %s", reflect.TypeOf(frame).Elem().Name(), encLevel) } - return frame, nil + return frame, l, nil } func (p *FrameParser) isAllowedAtEncLevel(f Frame, encLevel protocol.EncryptionLevel) bool { @@ -190,3 +188,10 @@ func (p *FrameParser) isAllowedAtEncLevel(f Frame, encLevel protocol.EncryptionL func (p *FrameParser) SetAckDelayExponent(exp uint8) { p.ackDelayExponent = exp } + +func replaceUnexpectedEOF(e error) error { + if e == io.ErrUnexpectedEOF { + return io.EOF + } + return e +} diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/header.go b/vendor/github.com/quic-go/quic-go/internal/wire/header.go index 29911684..678a04a2 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/header.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/header.go @@ -1,14 +1,12 @@ package wire import ( - "bytes" "encoding/binary" "errors" "fmt" "io" "github.com/quic-go/quic-go/internal/protocol" - "github.com/quic-go/quic-go/internal/utils" "github.com/quic-go/quic-go/quicvarint" ) @@ -41,37 +39,27 @@ func ParseConnectionID(data []byte, shortHeaderConnIDLen int) (protocol.Connecti // https://datatracker.ietf.org/doc/html/rfc8999#section-5.1. // This function should only be called on Long Header packets for which we don't support the version. func ParseArbitraryLenConnectionIDs(data []byte) (bytesParsed int, dest, src protocol.ArbitraryLenConnectionID, _ error) { - r := bytes.NewReader(data) - remaining := r.Len() - src, dest, err := parseArbitraryLenConnectionIDs(r) - return remaining - r.Len(), src, dest, err -} - -func parseArbitraryLenConnectionIDs(r *bytes.Reader) (dest, src protocol.ArbitraryLenConnectionID, _ error) { - r.Seek(5, io.SeekStart) // skip first byte and version field - destConnIDLen, err := r.ReadByte() - if err != nil { - return nil, nil, err + startLen := len(data) + if len(data) < 6 { + return 0, nil, nil, io.EOF } + data = data[5:] // skip first byte and version field + destConnIDLen := data[0] + data = data[1:] destConnID := make(protocol.ArbitraryLenConnectionID, destConnIDLen) - if _, err := io.ReadFull(r, destConnID); err != nil { - if err == io.ErrUnexpectedEOF { - err = io.EOF - } - return nil, nil, err + if len(data) < int(destConnIDLen)+1 { + return 0, nil, nil, io.EOF } - srcConnIDLen, err := r.ReadByte() - if err != nil { - return nil, nil, err + copy(destConnID, data) + data = data[destConnIDLen:] + srcConnIDLen := data[0] + data = data[1:] + if len(data) < int(srcConnIDLen) { + return 0, nil, nil, io.EOF } srcConnID := make(protocol.ArbitraryLenConnectionID, srcConnIDLen) - if _, err := io.ReadFull(r, srcConnID); err != nil { - if err == io.ErrUnexpectedEOF { - err = io.EOF - } - return nil, nil, err - } - return destConnID, srcConnID, nil + copy(srcConnID, data) + return startLen - len(data) + int(srcConnIDLen), destConnID, srcConnID, nil } func IsPotentialQUICPacket(firstByte byte) bool { @@ -139,18 +127,18 @@ type Header struct { parsedLen protocol.ByteCount // how many bytes were read while parsing this header } -// ParsePacket parses a packet. -// If the packet has a long header, the packet is cut according to the length field. -// If we understand the version, the packet is header up unto the packet number. +// ParsePacket parses a long header packet. +// The packet is cut according to the length field. +// If we understand the version, the packet is parsed up unto the packet number. // Otherwise, only the invariant part of the header is parsed. func ParsePacket(data []byte) (*Header, []byte, []byte, error) { if len(data) == 0 || !IsLongHeaderPacket(data[0]) { return nil, nil, nil, errors.New("not a long header packet") } - hdr, err := parseHeader(bytes.NewReader(data)) + hdr, err := parseHeader(data) if err != nil { - if err == ErrUnsupportedVersion { - return hdr, nil, nil, ErrUnsupportedVersion + if errors.Is(err, ErrUnsupportedVersion) { + return hdr, nil, nil, err } return nil, nil, nil, err } @@ -161,55 +149,55 @@ func ParsePacket(data []byte) (*Header, []byte, []byte, error) { return hdr, data[:packetLen], data[packetLen:], nil } -// ParseHeader parses the header. -// For short header packets: up to the packet number. -// For long header packets: +// ParseHeader parses the header: // * if we understand the version: up to the packet number // * if not, only the invariant part of the header -func parseHeader(b *bytes.Reader) (*Header, error) { - startLen := b.Len() - typeByte, err := b.ReadByte() - if err != nil { - return nil, err +func parseHeader(b []byte) (*Header, error) { + if len(b) == 0 { + return nil, io.EOF } + typeByte := b[0] h := &Header{typeByte: typeByte} - err = h.parseLongHeader(b) - h.parsedLen = protocol.ByteCount(startLen - b.Len()) + l, err := h.parseLongHeader(b[1:]) + h.parsedLen = protocol.ByteCount(l) + 1 return h, err } -func (h *Header) parseLongHeader(b *bytes.Reader) error { - v, err := utils.BigEndian.ReadUint32(b) - if err != nil { - return err +func (h *Header) parseLongHeader(b []byte) (int, error) { + startLen := len(b) + if len(b) < 5 { + return 0, io.EOF } - h.Version = protocol.Version(v) + h.Version = protocol.Version(binary.BigEndian.Uint32(b[:4])) if h.Version != 0 && h.typeByte&0x40 == 0 { - return errors.New("not a QUIC packet") + return startLen - len(b), errors.New("not a QUIC packet") } - destConnIDLen, err := b.ReadByte() - if err != nil { - return err + destConnIDLen := int(b[4]) + if destConnIDLen > protocol.MaxConnIDLen { + return startLen - len(b), protocol.ErrInvalidConnectionIDLen } - h.DestConnectionID, err = protocol.ReadConnectionID(b, int(destConnIDLen)) - if err != nil { - return err + b = b[5:] + if len(b) < destConnIDLen+1 { + return startLen - len(b), io.EOF } - srcConnIDLen, err := b.ReadByte() - if err != nil { - return err + h.DestConnectionID = protocol.ParseConnectionID(b[:destConnIDLen]) + srcConnIDLen := int(b[destConnIDLen]) + if srcConnIDLen > protocol.MaxConnIDLen { + return startLen - len(b), protocol.ErrInvalidConnectionIDLen } - h.SrcConnectionID, err = protocol.ReadConnectionID(b, int(srcConnIDLen)) - if err != nil { - return err + b = b[destConnIDLen+1:] + if len(b) < srcConnIDLen { + return startLen - len(b), io.EOF } + h.SrcConnectionID = protocol.ParseConnectionID(b[:srcConnIDLen]) + b = b[srcConnIDLen:] if h.Version == 0 { // version negotiation packet - return nil + return startLen - len(b), nil } // If we don't understand the version, we have no idea how to interpret the rest of the bytes if !protocol.IsSupportedVersion(protocol.SupportedVersions, h.Version) { - return ErrUnsupportedVersion + return startLen - len(b), ErrUnsupportedVersion } if h.Version == protocol.Version2 { @@ -237,38 +225,35 @@ func (h *Header) parseLongHeader(b *bytes.Reader) error { } if h.Type == protocol.PacketTypeRetry { - tokenLen := b.Len() - 16 + tokenLen := len(b) - 16 if tokenLen <= 0 { - return io.EOF + return startLen - len(b), io.EOF } h.Token = make([]byte, tokenLen) - if _, err := io.ReadFull(b, h.Token); err != nil { - return err - } - _, err := b.Seek(16, io.SeekCurrent) - return err + copy(h.Token, b[:tokenLen]) + return startLen - len(b) + tokenLen + 16, nil } if h.Type == protocol.PacketTypeInitial { - tokenLen, err := quicvarint.Read(b) + tokenLen, n, err := quicvarint.Parse(b) if err != nil { - return err + return startLen - len(b), err } - if tokenLen > uint64(b.Len()) { - return io.EOF + b = b[n:] + if tokenLen > uint64(len(b)) { + return startLen - len(b), io.EOF } h.Token = make([]byte, tokenLen) - if _, err := io.ReadFull(b, h.Token); err != nil { - return err - } + copy(h.Token, b[:tokenLen]) + b = b[tokenLen:] } - pl, err := quicvarint.Read(b) + pl, n, err := quicvarint.Parse(b) if err != nil { - return err + return 0, err } h.Length = protocol.ByteCount(pl) - return nil + return startLen - len(b) + n, nil } // ParsedLen returns the number of bytes that were consumed when parsing the header @@ -278,9 +263,9 @@ func (h *Header) ParsedLen() protocol.ByteCount { // ParseExtended parses the version dependent part of the header. // The Reader has to be set such that it points to the first byte of the header. -func (h *Header) ParseExtended(b *bytes.Reader, ver protocol.Version) (*ExtendedHeader, error) { +func (h *Header) ParseExtended(data []byte) (*ExtendedHeader, error) { extHdr := h.toExtendedHeader() - reservedBitsValid, err := extHdr.parse(b, ver) + reservedBitsValid, err := extHdr.parse(data) if err != nil { return nil, err } @@ -298,3 +283,20 @@ func (h *Header) toExtendedHeader() *ExtendedHeader { func (h *Header) PacketType() string { return h.Type.String() } + +func readPacketNumber(data []byte, pnLen protocol.PacketNumberLen) (protocol.PacketNumber, error) { + var pn protocol.PacketNumber + switch pnLen { + case protocol.PacketNumberLen1: + pn = protocol.PacketNumber(data[0]) + case protocol.PacketNumberLen2: + pn = protocol.PacketNumber(binary.BigEndian.Uint16(data[:2])) + case protocol.PacketNumberLen3: + pn = protocol.PacketNumber(uint32(data[2]) + uint32(data[1])<<8 + uint32(data[0])<<16) + case protocol.PacketNumberLen4: + pn = protocol.PacketNumber(binary.BigEndian.Uint32(data[:4])) + default: + return 0, fmt.Errorf("invalid packet number length: %d", pnLen) + } + return pn, nil +} diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/max_data_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/max_data_frame.go index 3dfd7611..5819c027 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/max_data_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/max_data_frame.go @@ -1,8 +1,6 @@ package wire import ( - "bytes" - "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/quicvarint" ) @@ -13,14 +11,14 @@ type MaxDataFrame struct { } // parseMaxDataFrame parses a MAX_DATA frame -func parseMaxDataFrame(r *bytes.Reader, _ protocol.Version) (*MaxDataFrame, error) { +func parseMaxDataFrame(b []byte, _ protocol.Version) (*MaxDataFrame, int, error) { frame := &MaxDataFrame{} - byteOffset, err := quicvarint.Read(r) + byteOffset, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } frame.MaximumData = protocol.ByteCount(byteOffset) - return frame, nil + return frame, l, nil } func (f *MaxDataFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -31,5 +29,5 @@ func (f *MaxDataFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { // Length of a written frame func (f *MaxDataFrame) Length(_ protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.MaximumData)) + return 1 + protocol.ByteCount(quicvarint.Len(uint64(f.MaximumData))) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/max_stream_data_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/max_stream_data_frame.go index cb5eab1b..db9091af 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/max_stream_data_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/max_stream_data_frame.go @@ -1,8 +1,6 @@ package wire import ( - "bytes" - "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/quicvarint" ) @@ -13,23 +11,26 @@ type MaxStreamDataFrame struct { MaximumStreamData protocol.ByteCount } -func parseMaxStreamDataFrame(r *bytes.Reader, _ protocol.Version) (*MaxStreamDataFrame, error) { - sid, err := quicvarint.Read(r) +func parseMaxStreamDataFrame(b []byte, _ protocol.Version) (*MaxStreamDataFrame, int, error) { + startLen := len(b) + sid, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - offset, err := quicvarint.Read(r) + b = b[l:] + offset, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] return &MaxStreamDataFrame{ StreamID: protocol.StreamID(sid), MaximumStreamData: protocol.ByteCount(offset), - }, nil + }, startLen - len(b), nil } -func (f *MaxStreamDataFrame) Append(b []byte, version protocol.Version) ([]byte, error) { +func (f *MaxStreamDataFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { b = append(b, maxStreamDataFrameType) b = quicvarint.Append(b, uint64(f.StreamID)) b = quicvarint.Append(b, uint64(f.MaximumStreamData)) @@ -37,6 +38,6 @@ func (f *MaxStreamDataFrame) Append(b []byte, version protocol.Version) ([]byte, } // Length of a written frame -func (f *MaxStreamDataFrame) Length(version protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.StreamID)) + quicvarint.Len(uint64(f.MaximumStreamData)) +func (f *MaxStreamDataFrame) Length(protocol.Version) protocol.ByteCount { + return 1 + protocol.ByteCount(quicvarint.Len(uint64(f.StreamID))+quicvarint.Len(uint64(f.MaximumStreamData))) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/max_streams_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/max_streams_frame.go index d9029338..a8745bd1 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/max_streams_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/max_streams_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "fmt" "github.com/quic-go/quic-go/internal/protocol" @@ -14,7 +13,7 @@ type MaxStreamsFrame struct { MaxStreamNum protocol.StreamNum } -func parseMaxStreamsFrame(r *bytes.Reader, typ uint64, _ protocol.Version) (*MaxStreamsFrame, error) { +func parseMaxStreamsFrame(b []byte, typ uint64, _ protocol.Version) (*MaxStreamsFrame, int, error) { f := &MaxStreamsFrame{} switch typ { case bidiMaxStreamsFrameType: @@ -22,15 +21,15 @@ func parseMaxStreamsFrame(r *bytes.Reader, typ uint64, _ protocol.Version) (*Max case uniMaxStreamsFrameType: f.Type = protocol.StreamTypeUni } - streamID, err := quicvarint.Read(r) + streamID, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } f.MaxStreamNum = protocol.StreamNum(streamID) if f.MaxStreamNum > protocol.MaxStreamCount { - return nil, fmt.Errorf("%d exceeds the maximum stream count", f.MaxStreamNum) + return nil, 0, fmt.Errorf("%d exceeds the maximum stream count", f.MaxStreamNum) } - return f, nil + return f, l, nil } func (f *MaxStreamsFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -46,5 +45,5 @@ func (f *MaxStreamsFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { // Length of a written frame func (f *MaxStreamsFrame) Length(protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.MaxStreamNum)) + return 1 + protocol.ByteCount(quicvarint.Len(uint64(f.MaxStreamNum))) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/new_connection_id_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/new_connection_id_frame.go index afae010a..852d46ef 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/new_connection_id_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/new_connection_id_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "errors" "fmt" "io" @@ -18,43 +17,47 @@ type NewConnectionIDFrame struct { StatelessResetToken protocol.StatelessResetToken } -func parseNewConnectionIDFrame(r *bytes.Reader, _ protocol.Version) (*NewConnectionIDFrame, error) { - seq, err := quicvarint.Read(r) +func parseNewConnectionIDFrame(b []byte, _ protocol.Version) (*NewConnectionIDFrame, int, error) { + startLen := len(b) + seq, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - ret, err := quicvarint.Read(r) + b = b[l:] + ret, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] if ret > seq { //nolint:stylecheck - return nil, fmt.Errorf("Retire Prior To value (%d) larger than Sequence Number (%d)", ret, seq) + return nil, 0, fmt.Errorf("Retire Prior To value (%d) larger than Sequence Number (%d)", ret, seq) } - connIDLen, err := r.ReadByte() - if err != nil { - return nil, err + if len(b) == 0 { + return nil, 0, io.EOF } + connIDLen := int(b[0]) + b = b[1:] if connIDLen == 0 { - return nil, errors.New("invalid zero-length connection ID") + return nil, 0, errors.New("invalid zero-length connection ID") } - connID, err := protocol.ReadConnectionID(r, int(connIDLen)) - if err != nil { - return nil, err + if connIDLen > protocol.MaxConnIDLen { + return nil, 0, protocol.ErrInvalidConnectionIDLen + } + if len(b) < connIDLen { + return nil, 0, io.EOF } frame := &NewConnectionIDFrame{ SequenceNumber: seq, RetirePriorTo: ret, - ConnectionID: connID, + ConnectionID: protocol.ParseConnectionID(b[:connIDLen]), } - if _, err := io.ReadFull(r, frame.StatelessResetToken[:]); err != nil { - if err == io.ErrUnexpectedEOF { - return nil, io.EOF - } - return nil, err + b = b[connIDLen:] + if len(b) < len(frame.StatelessResetToken) { + return nil, 0, io.EOF } - - return frame, nil + copy(frame.StatelessResetToken[:], b) + return frame, startLen - len(b) + len(frame.StatelessResetToken), nil } func (f *NewConnectionIDFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -73,5 +76,5 @@ func (f *NewConnectionIDFrame) Append(b []byte, _ protocol.Version) ([]byte, err // Length of a written frame func (f *NewConnectionIDFrame) Length(protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(f.SequenceNumber) + quicvarint.Len(f.RetirePriorTo) + 1 /* connection ID length */ + protocol.ByteCount(f.ConnectionID.Len()) + 16 + return 1 + protocol.ByteCount(quicvarint.Len(f.SequenceNumber)+quicvarint.Len(f.RetirePriorTo)+1 /* connection ID length */ +f.ConnectionID.Len()) + 16 } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/new_token_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/new_token_frame.go index 6a2eac94..f1d4d00f 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/new_token_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/new_token_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "errors" "io" @@ -14,22 +13,21 @@ type NewTokenFrame struct { Token []byte } -func parseNewTokenFrame(r *bytes.Reader, _ protocol.Version) (*NewTokenFrame, error) { - tokenLen, err := quicvarint.Read(r) +func parseNewTokenFrame(b []byte, _ protocol.Version) (*NewTokenFrame, int, error) { + tokenLen, l, err := quicvarint.Parse(b) if err != nil { - return nil, err - } - if uint64(r.Len()) < tokenLen { - return nil, io.EOF + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] if tokenLen == 0 { - return nil, errors.New("token must not be empty") + return nil, 0, errors.New("token must not be empty") } - token := make([]byte, int(tokenLen)) - if _, err := io.ReadFull(r, token); err != nil { - return nil, err + if uint64(len(b)) < tokenLen { + return nil, 0, io.EOF } - return &NewTokenFrame{Token: token}, nil + token := make([]byte, int(tokenLen)) + copy(token, b) + return &NewTokenFrame{Token: token}, l + int(tokenLen), nil } func (f *NewTokenFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -41,5 +39,5 @@ func (f *NewTokenFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { // Length of a written frame func (f *NewTokenFrame) Length(protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(len(f.Token))) + protocol.ByteCount(len(f.Token)) + return 1 + protocol.ByteCount(quicvarint.Len(uint64(len(f.Token)))+len(f.Token)) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/path_challenge_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/path_challenge_frame.go index 772041ac..2aca989f 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/path_challenge_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/path_challenge_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "io" "github.com/quic-go/quic-go/internal/protocol" @@ -12,15 +11,13 @@ type PathChallengeFrame struct { Data [8]byte } -func parsePathChallengeFrame(r *bytes.Reader, _ protocol.Version) (*PathChallengeFrame, error) { - frame := &PathChallengeFrame{} - if _, err := io.ReadFull(r, frame.Data[:]); err != nil { - if err == io.ErrUnexpectedEOF { - return nil, io.EOF - } - return nil, err +func parsePathChallengeFrame(b []byte, _ protocol.Version) (*PathChallengeFrame, int, error) { + f := &PathChallengeFrame{} + if len(b) < 8 { + return nil, 0, io.EOF } - return frame, nil + copy(f.Data[:], b) + return f, 8, nil } func (f *PathChallengeFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/path_response_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/path_response_frame.go index 86bbe619..76532c85 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/path_response_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/path_response_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "io" "github.com/quic-go/quic-go/internal/protocol" @@ -12,15 +11,13 @@ type PathResponseFrame struct { Data [8]byte } -func parsePathResponseFrame(r *bytes.Reader, _ protocol.Version) (*PathResponseFrame, error) { - frame := &PathResponseFrame{} - if _, err := io.ReadFull(r, frame.Data[:]); err != nil { - if err == io.ErrUnexpectedEOF { - return nil, io.EOF - } - return nil, err +func parsePathResponseFrame(b []byte, _ protocol.Version) (*PathResponseFrame, int, error) { + f := &PathResponseFrame{} + if len(b) < 8 { + return nil, 0, io.EOF } - return frame, nil + copy(f.Data[:], b) + return f, 8, nil } func (f *PathResponseFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/reset_stream_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/reset_stream_frame.go index e60f1db1..a20029af 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/reset_stream_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/reset_stream_frame.go @@ -1,8 +1,6 @@ package wire import ( - "bytes" - "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/internal/qerr" "github.com/quic-go/quic-go/quicvarint" @@ -15,21 +13,24 @@ type ResetStreamFrame struct { FinalSize protocol.ByteCount } -func parseResetStreamFrame(r *bytes.Reader, _ protocol.Version) (*ResetStreamFrame, error) { +func parseResetStreamFrame(b []byte, _ protocol.Version) (*ResetStreamFrame, int, error) { + startLen := len(b) var streamID protocol.StreamID var byteOffset protocol.ByteCount - sid, err := quicvarint.Read(r) + sid, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] streamID = protocol.StreamID(sid) - errorCode, err := quicvarint.Read(r) + errorCode, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - bo, err := quicvarint.Read(r) + b = b[l:] + bo, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } byteOffset = protocol.ByteCount(bo) @@ -37,7 +38,7 @@ func parseResetStreamFrame(r *bytes.Reader, _ protocol.Version) (*ResetStreamFra StreamID: streamID, ErrorCode: qerr.StreamErrorCode(errorCode), FinalSize: byteOffset, - }, nil + }, startLen - len(b) + l, nil } func (f *ResetStreamFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -49,6 +50,6 @@ func (f *ResetStreamFrame) Append(b []byte, _ protocol.Version) ([]byte, error) } // Length of a written frame -func (f *ResetStreamFrame) Length(version protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.StreamID)) + quicvarint.Len(uint64(f.ErrorCode)) + quicvarint.Len(uint64(f.FinalSize)) +func (f *ResetStreamFrame) Length(protocol.Version) protocol.ByteCount { + return 1 + protocol.ByteCount(quicvarint.Len(uint64(f.StreamID))+quicvarint.Len(uint64(f.ErrorCode))+quicvarint.Len(uint64(f.FinalSize))) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/retire_connection_id_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/retire_connection_id_frame.go index 98153622..27aeff84 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/retire_connection_id_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/retire_connection_id_frame.go @@ -1,8 +1,6 @@ package wire import ( - "bytes" - "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/quicvarint" ) @@ -12,12 +10,12 @@ type RetireConnectionIDFrame struct { SequenceNumber uint64 } -func parseRetireConnectionIDFrame(r *bytes.Reader, _ protocol.Version) (*RetireConnectionIDFrame, error) { - seq, err := quicvarint.Read(r) +func parseRetireConnectionIDFrame(b []byte, _ protocol.Version) (*RetireConnectionIDFrame, int, error) { + seq, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - return &RetireConnectionIDFrame{SequenceNumber: seq}, nil + return &RetireConnectionIDFrame{SequenceNumber: seq}, l, nil } func (f *RetireConnectionIDFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -28,5 +26,5 @@ func (f *RetireConnectionIDFrame) Append(b []byte, _ protocol.Version) ([]byte, // Length of a written frame func (f *RetireConnectionIDFrame) Length(protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(f.SequenceNumber) + return 1 + protocol.ByteCount(quicvarint.Len(f.SequenceNumber)) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/short_header.go b/vendor/github.com/quic-go/quic-go/internal/wire/short_header.go index 69aa8341..cf2889c5 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/short_header.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/short_header.go @@ -2,7 +2,6 @@ package wire import ( "errors" - "fmt" "io" "github.com/quic-go/quic-go/internal/protocol" @@ -28,25 +27,15 @@ func ParseShortHeader(data []byte, connIDLen int) (length int, _ protocol.Packet } pos := 1 + connIDLen - var pn protocol.PacketNumber - switch pnLen { - case protocol.PacketNumberLen1: - pn = protocol.PacketNumber(data[pos]) - case protocol.PacketNumberLen2: - pn = protocol.PacketNumber(utils.BigEndian.Uint16(data[pos : pos+2])) - case protocol.PacketNumberLen3: - pn = protocol.PacketNumber(utils.BigEndian.Uint24(data[pos : pos+3])) - case protocol.PacketNumberLen4: - pn = protocol.PacketNumber(utils.BigEndian.Uint32(data[pos : pos+4])) - default: - return 0, 0, 0, 0, fmt.Errorf("invalid packet number length: %d", pnLen) + pn, err := readPacketNumber(data[pos:], pnLen) + if err != nil { + return 0, 0, 0, 0, err } kp := protocol.KeyPhaseZero if data[0]&0b100 > 0 { kp = protocol.KeyPhaseOne } - var err error if data[0]&0x18 != 0 { err = ErrInvalidReservedBits } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/stop_sending_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/stop_sending_frame.go index d314a569..a2326f8e 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/stop_sending_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/stop_sending_frame.go @@ -1,8 +1,6 @@ package wire import ( - "bytes" - "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/internal/qerr" "github.com/quic-go/quic-go/quicvarint" @@ -15,25 +13,28 @@ type StopSendingFrame struct { } // parseStopSendingFrame parses a STOP_SENDING frame -func parseStopSendingFrame(r *bytes.Reader, _ protocol.Version) (*StopSendingFrame, error) { - streamID, err := quicvarint.Read(r) +func parseStopSendingFrame(b []byte, _ protocol.Version) (*StopSendingFrame, int, error) { + startLen := len(b) + streamID, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - errorCode, err := quicvarint.Read(r) + b = b[l:] + errorCode, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] return &StopSendingFrame{ StreamID: protocol.StreamID(streamID), ErrorCode: qerr.StreamErrorCode(errorCode), - }, nil + }, startLen - len(b), nil } // Length of a written frame func (f *StopSendingFrame) Length(_ protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.StreamID)) + quicvarint.Len(uint64(f.ErrorCode)) + return 1 + protocol.ByteCount(quicvarint.Len(uint64(f.StreamID))+quicvarint.Len(uint64(f.ErrorCode))) } func (f *StopSendingFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/stream_data_blocked_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/stream_data_blocked_frame.go index f79740f9..3762ec76 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/stream_data_blocked_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/stream_data_blocked_frame.go @@ -1,8 +1,6 @@ package wire import ( - "bytes" - "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/quicvarint" ) @@ -13,20 +11,22 @@ type StreamDataBlockedFrame struct { MaximumStreamData protocol.ByteCount } -func parseStreamDataBlockedFrame(r *bytes.Reader, _ protocol.Version) (*StreamDataBlockedFrame, error) { - sid, err := quicvarint.Read(r) +func parseStreamDataBlockedFrame(b []byte, _ protocol.Version) (*StreamDataBlockedFrame, int, error) { + startLen := len(b) + sid, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } - offset, err := quicvarint.Read(r) + b = b[l:] + offset, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } return &StreamDataBlockedFrame{ StreamID: protocol.StreamID(sid), MaximumStreamData: protocol.ByteCount(offset), - }, nil + }, startLen - len(b) + l, nil } func (f *StreamDataBlockedFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -37,6 +37,6 @@ func (f *StreamDataBlockedFrame) Append(b []byte, _ protocol.Version) ([]byte, e } // Length of a written frame -func (f *StreamDataBlockedFrame) Length(version protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.StreamID)) + quicvarint.Len(uint64(f.MaximumStreamData)) +func (f *StreamDataBlockedFrame) Length(protocol.Version) protocol.ByteCount { + return 1 + protocol.ByteCount(quicvarint.Len(uint64(f.StreamID))+quicvarint.Len(uint64(f.MaximumStreamData))) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/stream_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/stream_frame.go index 0f6c00da..f9470ecd 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/stream_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/stream_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "errors" "io" @@ -20,33 +19,41 @@ type StreamFrame struct { fromPool bool } -func parseStreamFrame(r *bytes.Reader, typ uint64, _ protocol.Version) (*StreamFrame, error) { +func parseStreamFrame(b []byte, typ uint64, _ protocol.Version) (*StreamFrame, int, error) { + startLen := len(b) hasOffset := typ&0b100 > 0 fin := typ&0b1 > 0 hasDataLen := typ&0b10 > 0 - streamID, err := quicvarint.Read(r) + streamID, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] var offset uint64 if hasOffset { - offset, err = quicvarint.Read(r) + offset, l, err = quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } + b = b[l:] } var dataLen uint64 if hasDataLen { var err error - dataLen, err = quicvarint.Read(r) + var l int + dataLen, l, err = quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) + } + b = b[l:] + if dataLen > uint64(len(b)) { + return nil, 0, io.EOF } } else { // The rest of the packet is data - dataLen = uint64(r.Len()) + dataLen = uint64(len(b)) } var frame *StreamFrame @@ -57,7 +64,7 @@ func parseStreamFrame(r *bytes.Reader, typ uint64, _ protocol.Version) (*StreamF // The STREAM frame can't be larger than the StreamFrame we obtained from the buffer, // since those StreamFrames have a buffer length of the maximum packet size. if dataLen > uint64(cap(frame.Data)) { - return nil, io.EOF + return nil, 0, io.EOF } frame.Data = frame.Data[:dataLen] } @@ -68,17 +75,14 @@ func parseStreamFrame(r *bytes.Reader, typ uint64, _ protocol.Version) (*StreamF frame.DataLenPresent = hasDataLen if dataLen != 0 { - if _, err := io.ReadFull(r, frame.Data); err != nil { - return nil, err - } + copy(frame.Data, b) } if frame.Offset+frame.DataLen() > protocol.MaxByteCount { - return nil, errors.New("stream data overflows maximum offset") + return nil, 0, errors.New("stream data overflows maximum offset") } - return frame, nil + return frame, startLen - len(b) + int(dataLen), nil } -// Write writes a STREAM frame func (f *StreamFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { if len(f.Data) == 0 && !f.Fin { return nil, errors.New("StreamFrame: attempting to write empty frame without FIN") @@ -108,7 +112,7 @@ func (f *StreamFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { } // Length returns the total length of the STREAM frame -func (f *StreamFrame) Length(version protocol.Version) protocol.ByteCount { +func (f *StreamFrame) Length(protocol.Version) protocol.ByteCount { length := 1 + quicvarint.Len(uint64(f.StreamID)) if f.Offset != 0 { length += quicvarint.Len(uint64(f.Offset)) @@ -116,7 +120,7 @@ func (f *StreamFrame) Length(version protocol.Version) protocol.ByteCount { if f.DataLenPresent { length += quicvarint.Len(uint64(f.DataLen())) } - return length + f.DataLen() + return protocol.ByteCount(length) + f.DataLen() } // DataLen gives the length of data in bytes @@ -126,14 +130,14 @@ func (f *StreamFrame) DataLen() protocol.ByteCount { // MaxDataLen returns the maximum data length // If 0 is returned, writing will fail (a STREAM frame must contain at least 1 byte of data). -func (f *StreamFrame) MaxDataLen(maxSize protocol.ByteCount, version protocol.Version) protocol.ByteCount { - headerLen := 1 + quicvarint.Len(uint64(f.StreamID)) +func (f *StreamFrame) MaxDataLen(maxSize protocol.ByteCount, _ protocol.Version) protocol.ByteCount { + headerLen := 1 + protocol.ByteCount(quicvarint.Len(uint64(f.StreamID))) if f.Offset != 0 { - headerLen += quicvarint.Len(uint64(f.Offset)) + headerLen += protocol.ByteCount(quicvarint.Len(uint64(f.Offset))) } if f.DataLenPresent { - // pretend that the data size will be 1 bytes - // if it turns out that varint encoding the length will consume 2 bytes, we need to adjust the data length afterwards + // Pretend that the data size will be 1 byte. + // If it turns out that varint encoding the length will consume 2 bytes, we need to adjust the data length afterward headerLen++ } if headerLen > maxSize { diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/streams_blocked_frame.go b/vendor/github.com/quic-go/quic-go/internal/wire/streams_blocked_frame.go index b24619ab..c946fec3 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/streams_blocked_frame.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/streams_blocked_frame.go @@ -1,7 +1,6 @@ package wire import ( - "bytes" "fmt" "github.com/quic-go/quic-go/internal/protocol" @@ -14,7 +13,7 @@ type StreamsBlockedFrame struct { StreamLimit protocol.StreamNum } -func parseStreamsBlockedFrame(r *bytes.Reader, typ uint64, _ protocol.Version) (*StreamsBlockedFrame, error) { +func parseStreamsBlockedFrame(b []byte, typ uint64, _ protocol.Version) (*StreamsBlockedFrame, int, error) { f := &StreamsBlockedFrame{} switch typ { case bidiStreamBlockedFrameType: @@ -22,15 +21,15 @@ func parseStreamsBlockedFrame(r *bytes.Reader, typ uint64, _ protocol.Version) ( case uniStreamBlockedFrameType: f.Type = protocol.StreamTypeUni } - streamLimit, err := quicvarint.Read(r) + streamLimit, l, err := quicvarint.Parse(b) if err != nil { - return nil, err + return nil, 0, replaceUnexpectedEOF(err) } f.StreamLimit = protocol.StreamNum(streamLimit) if f.StreamLimit > protocol.MaxStreamCount { - return nil, fmt.Errorf("%d exceeds the maximum stream count", f.StreamLimit) + return nil, 0, fmt.Errorf("%d exceeds the maximum stream count", f.StreamLimit) } - return f, nil + return f, l, nil } func (f *StreamsBlockedFrame) Append(b []byte, _ protocol.Version) ([]byte, error) { @@ -46,5 +45,5 @@ func (f *StreamsBlockedFrame) Append(b []byte, _ protocol.Version) ([]byte, erro // Length of a written frame func (f *StreamsBlockedFrame) Length(_ protocol.Version) protocol.ByteCount { - return 1 + quicvarint.Len(uint64(f.StreamLimit)) + return 1 + protocol.ByteCount(quicvarint.Len(uint64(f.StreamLimit))) } diff --git a/vendor/github.com/quic-go/quic-go/internal/wire/transport_parameters.go b/vendor/github.com/quic-go/quic-go/internal/wire/transport_parameters.go index c03be3cd..cee74b8f 100644 --- a/vendor/github.com/quic-go/quic-go/internal/wire/transport_parameters.go +++ b/vendor/github.com/quic-go/quic-go/internal/wire/transport_parameters.go @@ -1,19 +1,17 @@ package wire import ( - "bytes" "crypto/rand" "encoding/binary" "errors" "fmt" "io" "net/netip" - "sort" + "slices" "time" "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/internal/qerr" - "github.com/quic-go/quic-go/internal/utils" "github.com/quic-go/quic-go/quicvarint" ) @@ -89,7 +87,7 @@ type TransportParameters struct { // Unmarshal the transport parameters func (p *TransportParameters) Unmarshal(data []byte, sentBy protocol.Perspective) error { - if err := p.unmarshal(bytes.NewReader(data), sentBy, false); err != nil { + if err := p.unmarshal(data, sentBy, false); err != nil { return &qerr.TransportError{ ErrorCode: qerr.TransportParameterError, ErrorMessage: err.Error(), @@ -98,9 +96,9 @@ func (p *TransportParameters) Unmarshal(data []byte, sentBy protocol.Perspective return nil } -func (p *TransportParameters) unmarshal(r *bytes.Reader, sentBy protocol.Perspective, fromSessionTicket bool) error { +func (p *TransportParameters) unmarshal(b []byte, sentBy protocol.Perspective, fromSessionTicket bool) error { // needed to check that every parameter is only sent at most once - var parameterIDs []transportParameterID + parameterIDs := make([]transportParameterID, 0, 32) var ( readOriginalDestinationConnectionID bool @@ -112,18 +110,20 @@ func (p *TransportParameters) unmarshal(r *bytes.Reader, sentBy protocol.Perspec p.MaxAckDelay = protocol.DefaultMaxAckDelay p.MaxDatagramFrameSize = protocol.InvalidByteCount - for r.Len() > 0 { - paramIDInt, err := quicvarint.Read(r) + for len(b) > 0 { + paramIDInt, l, err := quicvarint.Parse(b) if err != nil { return err } paramID := transportParameterID(paramIDInt) - paramLen, err := quicvarint.Read(r) + b = b[l:] + paramLen, l, err := quicvarint.Parse(b) if err != nil { return err } - if uint64(r.Len()) < paramLen { - return fmt.Errorf("remaining length (%d) smaller than parameter length (%d)", r.Len(), paramLen) + b = b[l:] + if uint64(len(b)) < paramLen { + return fmt.Errorf("remaining length (%d) smaller than parameter length (%d)", len(b), paramLen) } parameterIDs = append(parameterIDs, paramID) switch paramID { @@ -141,16 +141,18 @@ func (p *TransportParameters) unmarshal(r *bytes.Reader, sentBy protocol.Perspec maxAckDelayParameterID, maxDatagramFrameSizeParameterID, ackDelayExponentParameterID: - if err := p.readNumericTransportParameter(r, paramID, int(paramLen)); err != nil { + if err := p.readNumericTransportParameter(b, paramID, int(paramLen)); err != nil { return err } + b = b[paramLen:] case preferredAddressParameterID: if sentBy == protocol.PerspectiveClient { return errors.New("client sent a preferred_address") } - if err := p.readPreferredAddress(r, int(paramLen)); err != nil { + if err := p.readPreferredAddress(b, int(paramLen)); err != nil { return err } + b = b[paramLen:] case disableActiveMigrationParameterID: if paramLen != 0 { return fmt.Errorf("wrong length for disable_active_migration: %d (expected empty)", paramLen) @@ -164,25 +166,41 @@ func (p *TransportParameters) unmarshal(r *bytes.Reader, sentBy protocol.Perspec return fmt.Errorf("wrong length for stateless_reset_token: %d (expected 16)", paramLen) } var token protocol.StatelessResetToken - r.Read(token[:]) + if len(b) < len(token) { + return io.EOF + } + copy(token[:], b) + b = b[len(token):] p.StatelessResetToken = &token case originalDestinationConnectionIDParameterID: if sentBy == protocol.PerspectiveClient { return errors.New("client sent an original_destination_connection_id") } - p.OriginalDestinationConnectionID, _ = protocol.ReadConnectionID(r, int(paramLen)) + if paramLen > protocol.MaxConnIDLen { + return protocol.ErrInvalidConnectionIDLen + } + p.OriginalDestinationConnectionID = protocol.ParseConnectionID(b[:paramLen]) + b = b[paramLen:] readOriginalDestinationConnectionID = true case initialSourceConnectionIDParameterID: - p.InitialSourceConnectionID, _ = protocol.ReadConnectionID(r, int(paramLen)) + if paramLen > protocol.MaxConnIDLen { + return protocol.ErrInvalidConnectionIDLen + } + p.InitialSourceConnectionID = protocol.ParseConnectionID(b[:paramLen]) + b = b[paramLen:] readInitialSourceConnectionID = true case retrySourceConnectionIDParameterID: if sentBy == protocol.PerspectiveClient { return errors.New("client sent a retry_source_connection_id") } - connID, _ := protocol.ReadConnectionID(r, int(paramLen)) + if paramLen > protocol.MaxConnIDLen { + return protocol.ErrInvalidConnectionIDLen + } + connID := protocol.ParseConnectionID(b[:paramLen]) + b = b[paramLen:] p.RetrySourceConnectionID = &connID default: - r.Seek(int64(paramLen), io.SeekCurrent) + b = b[paramLen:] } } @@ -202,7 +220,12 @@ func (p *TransportParameters) unmarshal(r *bytes.Reader, sentBy protocol.Perspec } // check that every transport parameter was sent at most once - sort.Slice(parameterIDs, func(i, j int) bool { return parameterIDs[i] < parameterIDs[j] }) + slices.SortFunc(parameterIDs, func(a, b transportParameterID) int { + if a < b { + return -1 + } + return 1 + }) for i := 0; i < len(parameterIDs)-1; i++ { if parameterIDs[i] == parameterIDs[i+1] { return fmt.Errorf("received duplicate transport parameter %#x", parameterIDs[i]) @@ -212,60 +235,47 @@ func (p *TransportParameters) unmarshal(r *bytes.Reader, sentBy protocol.Perspec return nil } -func (p *TransportParameters) readPreferredAddress(r *bytes.Reader, expectedLen int) error { - remainingLen := r.Len() +func (p *TransportParameters) readPreferredAddress(b []byte, expectedLen int) error { + remainingLen := len(b) pa := &PreferredAddress{} - var ipv4 [4]byte - if _, err := io.ReadFull(r, ipv4[:]); err != nil { - return err - } - port, err := utils.BigEndian.ReadUint16(r) - if err != nil { - return err + if len(b) < 4+2+16+2+1 { + return io.EOF } - pa.IPv4 = netip.AddrPortFrom(netip.AddrFrom4(ipv4), port) + var ipv4 [4]byte + copy(ipv4[:], b[:4]) + port4 := binary.BigEndian.Uint16(b[4:]) + b = b[4+2:] + pa.IPv4 = netip.AddrPortFrom(netip.AddrFrom4(ipv4), port4) var ipv6 [16]byte - if _, err := io.ReadFull(r, ipv6[:]); err != nil { - return err - } - port, err = utils.BigEndian.ReadUint16(r) - if err != nil { - return err - } - pa.IPv6 = netip.AddrPortFrom(netip.AddrFrom16(ipv6), port) - connIDLen, err := r.ReadByte() - if err != nil { - return err - } + copy(ipv6[:], b[:16]) + port6 := binary.BigEndian.Uint16(b[16:]) + pa.IPv6 = netip.AddrPortFrom(netip.AddrFrom16(ipv6), port6) + b = b[16+2:] + connIDLen := int(b[0]) + b = b[1:] if connIDLen == 0 || connIDLen > protocol.MaxConnIDLen { return fmt.Errorf("invalid connection ID length: %d", connIDLen) } - connID, err := protocol.ReadConnectionID(r, int(connIDLen)) - if err != nil { - return err - } - pa.ConnectionID = connID - if _, err := io.ReadFull(r, pa.StatelessResetToken[:]); err != nil { - return err + if len(b) < connIDLen+len(pa.StatelessResetToken) { + return io.EOF } - if bytesRead := remainingLen - r.Len(); bytesRead != expectedLen { + pa.ConnectionID = protocol.ParseConnectionID(b[:connIDLen]) + b = b[connIDLen:] + copy(pa.StatelessResetToken[:], b) + b = b[len(pa.StatelessResetToken):] + if bytesRead := remainingLen - len(b); bytesRead != expectedLen { return fmt.Errorf("expected preferred_address to be %d long, read %d bytes", expectedLen, bytesRead) } p.PreferredAddress = pa return nil } -func (p *TransportParameters) readNumericTransportParameter( - r *bytes.Reader, - paramID transportParameterID, - expectedLen int, -) error { - remainingLen := r.Len() - val, err := quicvarint.Read(r) +func (p *TransportParameters) readNumericTransportParameter(b []byte, paramID transportParameterID, expectedLen int) error { + val, l, err := quicvarint.Parse(b) if err != nil { return fmt.Errorf("error while reading transport parameter %d: %s", paramID, err) } - if remainingLen-r.Len() != expectedLen { + if l != expectedLen { return fmt.Errorf("inconsistent transport parameter length for transport parameter %#x", paramID) } //nolint:exhaustive // This only covers the numeric transport parameters. @@ -292,7 +302,7 @@ func (p *TransportParameters) readNumericTransportParameter( p.MaxIdleTimeout = max(protocol.MinRemoteIdleTimeout, time.Duration(val)*time.Millisecond) case maxUDPPayloadSizeParameterID: if val < 1200 { - return fmt.Errorf("invalid value for max_packet_size: %d (minimum 1200)", val) + return fmt.Errorf("invalid value for max_udp_payload_size: %d (minimum 1200)", val) } p.MaxUDPPayloadSize = protocol.ByteCount(val) case ackDelayExponentParameterID: @@ -347,8 +357,10 @@ func (p *TransportParameters) Marshal(pers protocol.Perspective) []byte { b = p.marshalVarintParam(b, initialMaxStreamsUniParameterID, uint64(p.MaxUniStreamNum)) // idle_timeout b = p.marshalVarintParam(b, maxIdleTimeoutParameterID, uint64(p.MaxIdleTimeout/time.Millisecond)) - // max_packet_size - b = p.marshalVarintParam(b, maxUDPPayloadSizeParameterID, uint64(protocol.MaxPacketBufferSize)) + // max_udp_payload_size + if p.MaxUDPPayloadSize > 0 { + b = p.marshalVarintParam(b, maxUDPPayloadSizeParameterID, uint64(p.MaxUDPPayloadSize)) + } // max_ack_delay // Only send it if is different from the default value. if p.MaxAckDelay != protocol.DefaultMaxAckDelay { @@ -457,15 +469,15 @@ func (p *TransportParameters) MarshalForSessionTicket(b []byte) []byte { } // UnmarshalFromSessionTicket unmarshals transport parameters from a session ticket. -func (p *TransportParameters) UnmarshalFromSessionTicket(r *bytes.Reader) error { - version, err := quicvarint.Read(r) +func (p *TransportParameters) UnmarshalFromSessionTicket(b []byte) error { + version, l, err := quicvarint.Parse(b) if err != nil { return err } if version != transportParameterMarshalingVersion { return fmt.Errorf("unknown transport parameter marshaling version: %d", version) } - return p.unmarshal(r, protocol.PerspectiveServer, true) + return p.unmarshal(b[l:], protocol.PerspectiveServer, true) } // ValidFor0RTT checks if the transport parameters match those saved in the session ticket. diff --git a/vendor/github.com/quic-go/quic-go/logging/connection_tracer.go b/vendor/github.com/quic-go/quic-go/logging/connection_tracer.go index 7f54d6cd..96bf4617 100644 --- a/vendor/github.com/quic-go/quic-go/logging/connection_tracer.go +++ b/vendor/github.com/quic-go/quic-go/logging/connection_tracer.go @@ -8,14 +8,14 @@ import ( // A ConnectionTracer records events. type ConnectionTracer struct { StartedConnection func(local, remote net.Addr, srcConnID, destConnID ConnectionID) - NegotiatedVersion func(chosen VersionNumber, clientVersions, serverVersions []VersionNumber) + NegotiatedVersion func(chosen Version, clientVersions, serverVersions []Version) ClosedConnection func(error) SentTransportParameters func(*TransportParameters) ReceivedTransportParameters func(*TransportParameters) RestoredTransportParameters func(parameters *TransportParameters) // for 0-RTT SentLongHeaderPacket func(*ExtendedHeader, ByteCount, ECN, *AckFrame, []Frame) SentShortHeaderPacket func(*ShortHeader, ByteCount, ECN, *AckFrame, []Frame) - ReceivedVersionNegotiationPacket func(dest, src ArbitraryLenConnectionID, _ []VersionNumber) + ReceivedVersionNegotiationPacket func(dest, src ArbitraryLenConnectionID, _ []Version) ReceivedRetry func(*Header) ReceivedLongHeaderPacket func(*ExtendedHeader, ByteCount, ECN, []Frame) ReceivedShortHeaderPacket func(*ShortHeader, ByteCount, ECN, []Frame) @@ -24,6 +24,7 @@ type ConnectionTracer struct { UpdatedMetrics func(rttStats *RTTStats, cwnd, bytesInFlight ByteCount, packetsInFlight int) AcknowledgedPacket func(EncryptionLevel, PacketNumber) LostPacket func(EncryptionLevel, PacketNumber, PacketLossReason) + UpdatedMTU func(mtu ByteCount, done bool) UpdatedCongestionState func(CongestionState) UpdatedPTOCount func(value uint32) UpdatedKeyFromTLS func(EncryptionLevel, Perspective) @@ -56,7 +57,7 @@ func NewMultiplexedConnectionTracer(tracers ...*ConnectionTracer) *ConnectionTra } } }, - NegotiatedVersion: func(chosen VersionNumber, clientVersions, serverVersions []VersionNumber) { + NegotiatedVersion: func(chosen Version, clientVersions, serverVersions []Version) { for _, t := range tracers { if t.NegotiatedVersion != nil { t.NegotiatedVersion(chosen, clientVersions, serverVersions) @@ -105,7 +106,7 @@ func NewMultiplexedConnectionTracer(tracers ...*ConnectionTracer) *ConnectionTra } } }, - ReceivedVersionNegotiationPacket: func(dest, src ArbitraryLenConnectionID, versions []VersionNumber) { + ReceivedVersionNegotiationPacket: func(dest, src ArbitraryLenConnectionID, versions []Version) { for _, t := range tracers { if t.ReceivedVersionNegotiationPacket != nil { t.ReceivedVersionNegotiationPacket(dest, src, versions) @@ -168,6 +169,13 @@ func NewMultiplexedConnectionTracer(tracers ...*ConnectionTracer) *ConnectionTra } } }, + UpdatedMTU: func(mtu ByteCount, done bool) { + for _, t := range tracers { + if t.UpdatedMTU != nil { + t.UpdatedMTU(mtu, done) + } + } + }, UpdatedCongestionState: func(state CongestionState) { for _, t := range tracers { if t.UpdatedCongestionState != nil { diff --git a/vendor/github.com/quic-go/quic-go/logging/interface.go b/vendor/github.com/quic-go/quic-go/logging/interface.go index a618a189..1f8edb92 100644 --- a/vendor/github.com/quic-go/quic-go/logging/interface.go +++ b/vendor/github.com/quic-go/quic-go/logging/interface.go @@ -36,8 +36,8 @@ type ( StreamNum = protocol.StreamNum // The StreamType is the type of the stream (unidirectional or bidirectional). StreamType = protocol.StreamType - // The VersionNumber is the QUIC version. - VersionNumber = protocol.Version + // The Version is the QUIC version. + Version = protocol.Version // The Header is the QUIC packet header, before removing header protection. Header = wire.Header @@ -72,27 +72,27 @@ const ( const ( // KeyPhaseZero is key phase bit 0 - KeyPhaseZero KeyPhaseBit = protocol.KeyPhaseZero + KeyPhaseZero = protocol.KeyPhaseZero // KeyPhaseOne is key phase bit 1 - KeyPhaseOne KeyPhaseBit = protocol.KeyPhaseOne + KeyPhaseOne = protocol.KeyPhaseOne ) const ( // PerspectiveServer is used for a QUIC server - PerspectiveServer Perspective = protocol.PerspectiveServer + PerspectiveServer = protocol.PerspectiveServer // PerspectiveClient is used for a QUIC client - PerspectiveClient Perspective = protocol.PerspectiveClient + PerspectiveClient = protocol.PerspectiveClient ) const ( // EncryptionInitial is the Initial encryption level - EncryptionInitial EncryptionLevel = protocol.EncryptionInitial + EncryptionInitial = protocol.EncryptionInitial // EncryptionHandshake is the Handshake encryption level - EncryptionHandshake EncryptionLevel = protocol.EncryptionHandshake + EncryptionHandshake = protocol.EncryptionHandshake // Encryption1RTT is the 1-RTT encryption level - Encryption1RTT EncryptionLevel = protocol.Encryption1RTT + Encryption1RTT = protocol.Encryption1RTT // Encryption0RTT is the 0-RTT encryption level - Encryption0RTT EncryptionLevel = protocol.Encryption0RTT + Encryption0RTT = protocol.Encryption0RTT ) const ( diff --git a/vendor/github.com/quic-go/quic-go/logging/tracer.go b/vendor/github.com/quic-go/quic-go/logging/tracer.go index edd85dba..625a809e 100644 --- a/vendor/github.com/quic-go/quic-go/logging/tracer.go +++ b/vendor/github.com/quic-go/quic-go/logging/tracer.go @@ -5,7 +5,7 @@ import "net" // A Tracer traces events. type Tracer struct { SentPacket func(net.Addr, *Header, ByteCount, []Frame) - SentVersionNegotiationPacket func(_ net.Addr, dest, src ArbitraryLenConnectionID, _ []VersionNumber) + SentVersionNegotiationPacket func(_ net.Addr, dest, src ArbitraryLenConnectionID, _ []Version) DroppedPacket func(net.Addr, PacketType, ByteCount, PacketDropReason) Debug func(name, msg string) Close func() @@ -27,7 +27,7 @@ func NewMultiplexedTracer(tracers ...*Tracer) *Tracer { } } }, - SentVersionNegotiationPacket: func(remote net.Addr, dest, src ArbitraryLenConnectionID, versions []VersionNumber) { + SentVersionNegotiationPacket: func(remote net.Addr, dest, src ArbitraryLenConnectionID, versions []Version) { for _, t := range tracers { if t.SentVersionNegotiationPacket != nil { t.SentVersionNegotiationPacket(remote, dest, src, versions) diff --git a/vendor/github.com/quic-go/quic-go/mockgen.go b/vendor/github.com/quic-go/quic-go/mockgen.go index 81cc4a5e..65ec465a 100644 --- a/vendor/github.com/quic-go/quic-go/mockgen.go +++ b/vendor/github.com/quic-go/quic-go/mockgen.go @@ -14,23 +14,17 @@ type Sender = sender //go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_stream_internal_test.go github.com/quic-go/quic-go StreamI" type StreamI = streamI -//go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_crypto_stream_test.go github.com/quic-go/quic-go CryptoStream" -type CryptoStream = cryptoStream - //go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_receive_stream_internal_test.go github.com/quic-go/quic-go ReceiveStreamI" type ReceiveStreamI = receiveStreamI //go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_send_stream_internal_test.go github.com/quic-go/quic-go SendStreamI" type SendStreamI = sendStreamI -//go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_stream_getter_test.go github.com/quic-go/quic-go StreamGetter" -type StreamGetter = streamGetter - //go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_stream_sender_test.go github.com/quic-go/quic-go StreamSender" type StreamSender = streamSender -//go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_crypto_data_handler_test.go github.com/quic-go/quic-go CryptoDataHandler" -type CryptoDataHandler = cryptoDataHandler +//go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_stream_control_frame_getter_test.go github.com/quic-go/quic-go StreamControlFrameGetter" +type StreamControlFrameGetter = streamControlFrameGetter //go:generate sh -c "go run go.uber.org/mock/mockgen -typed -build_flags=\"-tags=gomock\" -package quic -self_package github.com/quic-go/quic-go -destination mock_frame_source_test.go github.com/quic-go/quic-go FrameSource" type FrameSource = frameSource @@ -72,5 +66,4 @@ type PacketHandlerManager = packetHandlerManager // //go:generate sh -c "go run go.uber.org/mock/mockgen -typed -package quic -self_package github.com/quic-go/quic-go -source sys_conn_oob.go -destination mock_batch_conn_test.go -mock_names batchConn=MockBatchConn" -//go:generate sh -c "go run go.uber.org/mock/mockgen -typed -package quic -self_package github.com/quic-go/quic-go -self_package github.com/quic-go/quic-go -destination mock_token_store_test.go github.com/quic-go/quic-go TokenStore" //go:generate sh -c "go run go.uber.org/mock/mockgen -typed -package quic -self_package github.com/quic-go/quic-go -self_package github.com/quic-go/quic-go -destination mock_packetconn_test.go net PacketConn" diff --git a/vendor/github.com/quic-go/quic-go/mtu_discoverer.go b/vendor/github.com/quic-go/quic-go/mtu_discoverer.go index 317b0929..3f3a640a 100644 --- a/vendor/github.com/quic-go/quic-go/mtu_discoverer.go +++ b/vendor/github.com/quic-go/quic-go/mtu_discoverer.go @@ -1,19 +1,19 @@ package quic import ( - "net" "time" "github.com/quic-go/quic-go/internal/ackhandler" "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/internal/utils" "github.com/quic-go/quic-go/internal/wire" + "github.com/quic-go/quic-go/logging" ) type mtuDiscoverer interface { // Start starts the MTU discovery process. // It's unnecessary to call ShouldSendProbe before that. - Start(maxPacketSize protocol.ByteCount) + Start() ShouldSendProbe(now time.Time) bool CurrentSize() protocol.ByteCount GetPing() (ping ackhandler.Frame, datagramSize protocol.ByteCount) @@ -25,54 +25,129 @@ const ( maxMTUDiff = 20 // send a probe packet every mtuProbeDelay RTTs mtuProbeDelay = 5 + // Once maxLostMTUProbes MTU probe packets larger than a certain size are lost, + // MTU discovery won't probe for larger MTUs than this size. + // The algorithm used here is resilient to packet loss of (maxLostMTUProbes - 1) packets. + maxLostMTUProbes = 3 ) -func getMaxPacketSize(addr net.Addr) protocol.ByteCount { - maxSize := protocol.ByteCount(protocol.MinInitialPacketSize) - // If this is not a UDP address, we don't know anything about the MTU. - // Use the minimum size of an Initial packet as the max packet size. - if udpAddr, ok := addr.(*net.UDPAddr); ok { - if utils.IsIPv4(udpAddr.IP) { - maxSize = protocol.InitialPacketSizeIPv4 - } else { - maxSize = protocol.InitialPacketSizeIPv6 - } - } - return maxSize -} +// The Path MTU is found by sending a larger packet every now and then. +// If the packet is acknowledged, we conclude that the path supports this larger packet size. +// If the packet is lost, this can mean one of two things: +// 1. The path doesn't support this larger packet size, or +// 2. The packet was lost due to packet loss, independent of its size. +// The algorithm used here is resilient to packet loss of (maxLostMTUProbes - 1) packets. +// For simplicty, the following example use maxLostMTUProbes = 2. +// +// Initialization: +// |------------------------------------------------------------------------------| +// min max +// +// The first MTU probe packet will have size (min+max)/2. +// Assume that this packet is acknowledged. We can now move the min marker, +// and continue the search in the resulting interval. +// +// If 1st probe packet acknowledged: +// |---------------------------------------|--------------------------------------| +// min max +// +// If 1st probe packet lost: +// |---------------------------------------|--------------------------------------| +// min lost[0] max +// +// We can't conclude that the path doesn't support this packet size, since the loss of the probe +// packet could have been unrelated to the packet size. A larger probe packet will be sent later on. +// After a loss, the next probe packet has size (min+lost[0])/2. +// Now assume this probe packet is acknowledged: +// +// 2nd probe packet acknowledged: +// |------------------|--------------------|--------------------------------------| +// min lost[0] max +// +// First of all, we conclude that the path supports at least this MTU. That's progress! +// Second, we probe a bit more aggressively with the next probe packet: +// After an acknowledgement, the next probe packet has size (min+max)/2. +// This means we'll send a packet larger than the first probe packet (which was lost). +// +// If 3rd probe packet acknowledged: +// |-------------------------------------------------|----------------------------| +// min max +// +// We can conclude that the loss of the 1st probe packet was not due to its size, and +// continue searching in a much smaller interval now. +// +// If 3rd probe packet lost: +// |------------------|--------------------|---------|----------------------------| +// min lost[0] max +// +// Since in our example numPTOProbes = 2, and we lost 2 packets smaller than max, we +// conclude that this packet size is not supported on the path, and reduce the maximum +// value of the search interval. +// +// MTU discovery concludes once the interval min and max has been narrowed down to maxMTUDiff. type mtuFinder struct { lastProbeTime time.Time mtuIncreased func(protocol.ByteCount) rttStats *utils.RTTStats + inFlight protocol.ByteCount // the size of the probe packet currently in flight. InvalidByteCount if none is in flight - current protocol.ByteCount - max protocol.ByteCount // the maximum value, as advertised by the peer (or our maximum size buffer) + min protocol.ByteCount + limit protocol.ByteCount + + // on initialization, we treat the maximum size as the first "lost" packet + lost [maxLostMTUProbes]protocol.ByteCount + lastProbeWasLost bool + + tracer *logging.ConnectionTracer } var _ mtuDiscoverer = &mtuFinder{} -func newMTUDiscoverer(rttStats *utils.RTTStats, start protocol.ByteCount, mtuIncreased func(protocol.ByteCount)) *mtuFinder { - return &mtuFinder{ +func newMTUDiscoverer( + rttStats *utils.RTTStats, + start, max protocol.ByteCount, + mtuIncreased func(protocol.ByteCount), + tracer *logging.ConnectionTracer, +) *mtuFinder { + f := &mtuFinder{ inFlight: protocol.InvalidByteCount, - current: start, + min: start, + limit: max, rttStats: rttStats, mtuIncreased: mtuIncreased, + tracer: tracer, } + for i := range f.lost { + if i == 0 { + f.lost[i] = max + continue + } + f.lost[i] = protocol.InvalidByteCount + } + return f } func (f *mtuFinder) done() bool { - return f.max-f.current <= maxMTUDiff+1 + return f.max()-f.min <= maxMTUDiff+1 } -func (f *mtuFinder) Start(maxPacketSize protocol.ByteCount) { +func (f *mtuFinder) max() protocol.ByteCount { + for i, v := range f.lost { + if v == protocol.InvalidByteCount { + return f.lost[i-1] + } + } + return f.lost[len(f.lost)-1] +} + +func (f *mtuFinder) Start() { f.lastProbeTime = time.Now() // makes sure the first probe packet is not sent immediately - f.max = maxPacketSize } func (f *mtuFinder) ShouldSendProbe(now time.Time) bool { - if f.max == 0 || f.lastProbeTime.IsZero() { + if f.lastProbeTime.IsZero() { return false } if f.inFlight != protocol.InvalidByteCount || f.done() { @@ -82,20 +157,27 @@ func (f *mtuFinder) ShouldSendProbe(now time.Time) bool { } func (f *mtuFinder) GetPing() (ackhandler.Frame, protocol.ByteCount) { - size := (f.max + f.current) / 2 + var size protocol.ByteCount + if f.lastProbeWasLost { + size = (f.min + f.lost[0]) / 2 + } else { + size = (f.min + f.max()) / 2 + } f.lastProbeTime = time.Now() f.inFlight = size return ackhandler.Frame{ Frame: &wire.PingFrame{}, - Handler: (*mtuFinderAckHandler)(f), + Handler: &mtuFinderAckHandler{f}, }, size } func (f *mtuFinder) CurrentSize() protocol.ByteCount { - return f.current + return f.min } -type mtuFinderAckHandler mtuFinder +type mtuFinderAckHandler struct { + *mtuFinder +} var _ ackhandler.FrameHandler = &mtuFinderAckHandler{} @@ -105,7 +187,28 @@ func (h *mtuFinderAckHandler) OnAcked(wire.Frame) { panic("OnAcked callback called although there's no MTU probe packet in flight") } h.inFlight = protocol.InvalidByteCount - h.current = size + h.min = size + h.lastProbeWasLost = false + // remove all values smaller than size from the lost array + var j int + for i, v := range h.lost { + if size < v { + j = i + break + } + } + if j > 0 { + for i := 0; i < len(h.lost); i++ { + if i+j < len(h.lost) { + h.lost[i] = h.lost[i+j] + } else { + h.lost[i] = protocol.InvalidByteCount + } + } + } + if h.tracer != nil && h.tracer.UpdatedMTU != nil { + h.tracer.UpdatedMTU(size, h.done()) + } h.mtuIncreased(size) } @@ -114,6 +217,13 @@ func (h *mtuFinderAckHandler) OnLost(wire.Frame) { if size == protocol.InvalidByteCount { panic("OnLost callback called although there's no MTU probe packet in flight") } - h.max = size + h.lastProbeWasLost = true h.inFlight = protocol.InvalidByteCount + for i, v := range h.lost { + if size < v { + copy(h.lost[i+1:], h.lost[i:]) + h.lost[i] = size + break + } + } } diff --git a/vendor/github.com/quic-go/quic-go/oss-fuzz.sh b/vendor/github.com/quic-go/quic-go/oss-fuzz.sh index 22a577fe..92a57a2c 100644 --- a/vendor/github.com/quic-go/quic-go/oss-fuzz.sh +++ b/vendor/github.com/quic-go/quic-go/oss-fuzz.sh @@ -3,12 +3,12 @@ # Install Go manually, since oss-fuzz ships with an outdated Go version. # See https://github.com/google/oss-fuzz/pull/10643. export CXX="${CXX} -lresolv" # required by Go 1.20 -wget https://go.dev/dl/go1.22.0.linux-amd64.tar.gz \ +wget https://go.dev/dl/go1.23.0.linux-amd64.tar.gz \ && mkdir temp-go \ && rm -rf /root/.go/* \ - && tar -C temp-go/ -xzf go1.22.0.linux-amd64.tar.gz \ + && tar -C temp-go/ -xzf go1.23.0.linux-amd64.tar.gz \ && mv temp-go/go/* /root/.go/ \ - && rm -rf temp-go go1.22.0.linux-amd64.tar.gz + && rm -rf temp-go go1.23.0.linux-amd64.tar.gz ( # fuzz qpack diff --git a/vendor/github.com/quic-go/quic-go/packet_packer.go b/vendor/github.com/quic-go/quic-go/packet_packer.go index e707734f..8b8a03d4 100644 --- a/vendor/github.com/quic-go/quic-go/packet_packer.go +++ b/vendor/github.com/quic-go/quic-go/packet_packer.go @@ -121,8 +121,8 @@ type packetPacker struct { perspective protocol.Perspective cryptoSetup sealingManager - initialStream cryptoStream - handshakeStream cryptoStream + initialStream *cryptoStream + handshakeStream *cryptoStream token []byte @@ -141,7 +141,7 @@ var _ packer = &packetPacker{} func newPacketPacker( srcConnID protocol.ConnectionID, getDestConnID func() protocol.ConnectionID, - initialStream, handshakeStream cryptoStream, + initialStream, handshakeStream *cryptoStream, packetNumberManager packetNumberManager, retransmissionQueue *retransmissionQueue, cryptoSetup sealingManager, @@ -482,7 +482,7 @@ func (p *packetPacker) maybeGetCryptoPacket(maxPacketSize protocol.ByteCount, en return nil, payload{} } - var s cryptoStream + var s *cryptoStream var handler ackhandler.FrameHandler var hasRetransmission bool //nolint:exhaustive // Initial and Handshake are the only two encryption levels here. @@ -645,6 +645,9 @@ func (p *packetPacker) composeNextPacket(maxFrameSize protocol.ByteCount, onlyAc pl.length += lengthAdded // add handlers for the control frames that were added for i := startLen; i < len(pl.frames); i++ { + if pl.frames[i].Handler != nil { + continue + } switch pl.frames[i].Frame.(type) { case *wire.PathChallengeFrame, *wire.PathResponseFrame: // Path probing is currently not supported, therefore we don't need to set the OnAcked callback yet. diff --git a/vendor/github.com/quic-go/quic-go/packet_unpacker.go b/vendor/github.com/quic-go/quic-go/packet_unpacker.go index 1034aab1..9e0fa9d9 100644 --- a/vendor/github.com/quic-go/quic-go/packet_unpacker.go +++ b/vendor/github.com/quic-go/quic-go/packet_unpacker.go @@ -1,7 +1,6 @@ package quic import ( - "bytes" "fmt" "time" @@ -53,7 +52,7 @@ func newPacketUnpacker(cs handshake.CryptoSetup, shortHdrConnIDLen int) *packetU // If the reserved bits are invalid, the error is wire.ErrInvalidReservedBits. // If any other error occurred when parsing the header, the error is of type headerParseError. // If decrypting the payload fails for any reason, the error is the error returned by the AEAD. -func (u *packetUnpacker) UnpackLongHeader(hdr *wire.Header, rcvTime time.Time, data []byte, v protocol.Version) (*unpackedPacket, error) { +func (u *packetUnpacker) UnpackLongHeader(hdr *wire.Header, data []byte) (*unpackedPacket, error) { var encLevel protocol.EncryptionLevel var extHdr *wire.ExtendedHeader var decrypted []byte @@ -65,7 +64,7 @@ func (u *packetUnpacker) UnpackLongHeader(hdr *wire.Header, rcvTime time.Time, d if err != nil { return nil, err } - extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data, v) + extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data) if err != nil { return nil, err } @@ -75,7 +74,7 @@ func (u *packetUnpacker) UnpackLongHeader(hdr *wire.Header, rcvTime time.Time, d if err != nil { return nil, err } - extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data, v) + extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data) if err != nil { return nil, err } @@ -85,7 +84,7 @@ func (u *packetUnpacker) UnpackLongHeader(hdr *wire.Header, rcvTime time.Time, d if err != nil { return nil, err } - extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data, v) + extHdr, decrypted, err = u.unpackLongHeaderPacket(opener, hdr, data) if err != nil { return nil, err } @@ -125,8 +124,8 @@ func (u *packetUnpacker) UnpackShortHeader(rcvTime time.Time, data []byte) (prot return pn, pnLen, kp, decrypted, nil } -func (u *packetUnpacker) unpackLongHeaderPacket(opener handshake.LongHeaderOpener, hdr *wire.Header, data []byte, v protocol.Version) (*wire.ExtendedHeader, []byte, error) { - extHdr, parseErr := u.unpackLongHeader(opener, hdr, data, v) +func (u *packetUnpacker) unpackLongHeaderPacket(opener handshake.LongHeaderOpener, hdr *wire.Header, data []byte) (*wire.ExtendedHeader, []byte, error) { + extHdr, parseErr := u.unpackLongHeader(opener, hdr, data) // If the reserved bits are set incorrectly, we still need to continue unpacking. // This avoids a timing side-channel, which otherwise might allow an attacker // to gain information about the header encryption. @@ -187,17 +186,15 @@ func (u *packetUnpacker) unpackShortHeader(hd headerDecryptor, data []byte) (int } // The error is either nil, a wire.ErrInvalidReservedBits or of type headerParseError. -func (u *packetUnpacker) unpackLongHeader(hd headerDecryptor, hdr *wire.Header, data []byte, v protocol.Version) (*wire.ExtendedHeader, error) { - extHdr, err := unpackLongHeader(hd, hdr, data, v) +func (u *packetUnpacker) unpackLongHeader(hd headerDecryptor, hdr *wire.Header, data []byte) (*wire.ExtendedHeader, error) { + extHdr, err := unpackLongHeader(hd, hdr, data) if err != nil && err != wire.ErrInvalidReservedBits { return nil, &headerParseError{err: err} } return extHdr, err } -func unpackLongHeader(hd headerDecryptor, hdr *wire.Header, data []byte, v protocol.Version) (*wire.ExtendedHeader, error) { - r := bytes.NewReader(data) - +func unpackLongHeader(hd headerDecryptor, hdr *wire.Header, data []byte) (*wire.ExtendedHeader, error) { hdrLen := hdr.ParsedLen() if protocol.ByteCount(len(data)) < hdrLen+4+16 { //nolint:stylecheck @@ -214,7 +211,7 @@ func unpackLongHeader(hd headerDecryptor, hdr *wire.Header, data []byte, v proto data[hdrLen:hdrLen+4], ) // 3. parse the header (and learn the actual length of the packet number) - extHdr, parseErr := hdr.ParseExtended(r, v) + extHdr, parseErr := hdr.ParseExtended(data) if parseErr != nil && parseErr != wire.ErrInvalidReservedBits { return nil, parseErr } diff --git a/vendor/github.com/quic-go/quic-go/quicvarint/varint.go b/vendor/github.com/quic-go/quic-go/quicvarint/varint.go index 3f12c076..9a22e334 100644 --- a/vendor/github.com/quic-go/quic-go/quicvarint/varint.go +++ b/vendor/github.com/quic-go/quic-go/quicvarint/varint.go @@ -3,8 +3,6 @@ package quicvarint import ( "fmt" "io" - - "github.com/quic-go/quic-go/internal/protocol" ) // taken from the QUIC draft @@ -28,16 +26,16 @@ func Read(r io.ByteReader) (uint64, error) { return 0, err } // the first two bits of the first byte encode the length - len := 1 << ((firstByte & 0xc0) >> 6) + l := 1 << ((firstByte & 0xc0) >> 6) b1 := firstByte & (0xff - 0xc0) - if len == 1 { + if l == 1 { return uint64(b1), nil } b2, err := r.ReadByte() if err != nil { return 0, err } - if len == 2 { + if l == 2 { return uint64(b2) + uint64(b1)<<8, nil } b3, err := r.ReadByte() @@ -48,7 +46,7 @@ func Read(r io.ByteReader) (uint64, error) { if err != nil { return 0, err } - if len == 4 { + if l == 4 { return uint64(b4) + uint64(b3)<<8 + uint64(b2)<<16 + uint64(b1)<<24, nil } b5, err := r.ReadByte() @@ -70,6 +68,31 @@ func Read(r io.ByteReader) (uint64, error) { return uint64(b8) + uint64(b7)<<8 + uint64(b6)<<16 + uint64(b5)<<24 + uint64(b4)<<32 + uint64(b3)<<40 + uint64(b2)<<48 + uint64(b1)<<56, nil } +// Parse reads a number in the QUIC varint format. +// It returns the number of bytes consumed. +func Parse(b []byte) (uint64 /* value */, int /* bytes consumed */, error) { + if len(b) == 0 { + return 0, 0, io.EOF + } + firstByte := b[0] + // the first two bits of the first byte encode the length + l := 1 << ((firstByte & 0xc0) >> 6) + if len(b) < l { + return 0, 0, io.ErrUnexpectedEOF + } + b0 := firstByte & (0xff - 0xc0) + if l == 1 { + return uint64(b0), 1, nil + } + if l == 2 { + return uint64(b[1]) + uint64(b0)<<8, 2, nil + } + if l == 4 { + return uint64(b[3]) + uint64(b[2])<<8 + uint64(b[1])<<16 + uint64(b0)<<24, 4, nil + } + return uint64(b[7]) + uint64(b[6])<<8 + uint64(b[5])<<16 + uint64(b[4])<<24 + uint64(b[3])<<32 + uint64(b[2])<<40 + uint64(b[1])<<48 + uint64(b0)<<56, 8, nil +} + // Append appends i in the QUIC varint format. func Append(b []byte, i uint64) []byte { if i <= maxVarInt1 { @@ -91,7 +114,7 @@ func Append(b []byte, i uint64) []byte { } // AppendWithLen append i in the QUIC varint format with the desired length. -func AppendWithLen(b []byte, i uint64, length protocol.ByteCount) []byte { +func AppendWithLen(b []byte, i uint64, length int) []byte { if length != 1 && length != 2 && length != 4 && length != 8 { panic("invalid varint length") } @@ -109,17 +132,17 @@ func AppendWithLen(b []byte, i uint64, length protocol.ByteCount) []byte { } else if length == 8 { b = append(b, 0b11000000) } - for j := protocol.ByteCount(1); j < length-l; j++ { + for j := 1; j < length-l; j++ { b = append(b, 0) } - for j := protocol.ByteCount(0); j < l; j++ { + for j := 0; j < l; j++ { b = append(b, uint8(i>>(8*(l-1-j)))) } return b } // Len determines the number of bytes that will be needed to write the number i. -func Len(i uint64) protocol.ByteCount { +func Len(i uint64) int { if i <= maxVarInt1 { return 1 } diff --git a/vendor/github.com/quic-go/quic-go/receive_stream.go b/vendor/github.com/quic-go/quic-go/receive_stream.go index 1235ff0e..b8535ef5 100644 --- a/vendor/github.com/quic-go/quic-go/receive_stream.go +++ b/vendor/github.com/quic-go/quic-go/receive_stream.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "github.com/quic-go/quic-go/internal/ackhandler" "github.com/quic-go/quic-go/internal/flowcontrol" "github.com/quic-go/quic-go/internal/protocol" "github.com/quic-go/quic-go/internal/qerr" @@ -19,7 +20,6 @@ type receiveStreamI interface { handleStreamFrame(*wire.StreamFrame) error handleResetStreamFrame(*wire.ResetStreamFrame) error closeForShutdown(error) - getWindowUpdate() protocol.ByteCount } type receiveStream struct { @@ -37,10 +37,17 @@ type receiveStream struct { readPosInFrame int currentFrameIsLast bool // is the currentFrame the last frame on this stream - finRead bool // set once we read a frame with a Fin + queuedStopSending bool + queuedMaxStreamData bool + + // Set once we read the io.EOF or the cancellation error. + // Note that for local cancellations, this doesn't necessarily mean that we know the final offset yet. + errorRead bool + completed bool // set once we've called streamSender.onStreamCompleted + cancelledRemotely bool + cancelledLocally bool + cancelErr *StreamError closeForShutdownErr error - cancelReadErr error - resetRemotelyErr *StreamError readChan chan struct{} readOnce chan struct{} // cap: 1, to protect against concurrent use of Read @@ -50,8 +57,9 @@ type receiveStream struct { } var ( - _ ReceiveStream = &receiveStream{} - _ receiveStreamI = &receiveStream{} + _ ReceiveStream = &receiveStream{} + _ receiveStreamI = &receiveStream{} + _ streamControlFrameGetter = &receiveStream{} ) func newReceiveStream( @@ -83,29 +91,54 @@ func (s *receiveStream) Read(p []byte) (int, error) { defer func() { <-s.readOnce }() s.mutex.Lock() - completed, n, err := s.readImpl(p) + queuedNewControlFrame, n, err := s.readImpl(p) + completed := s.isNewlyCompleted() s.mutex.Unlock() if completed { s.sender.onStreamCompleted(s.streamID) } + if queuedNewControlFrame { + s.sender.onHasStreamControlFrame(s.streamID, s) + } return n, err } -func (s *receiveStream) readImpl(p []byte) (bool /*stream completed */, int, error) { - if s.finRead { - return false, 0, io.EOF +func (s *receiveStream) isNewlyCompleted() bool { + if s.completed { + return false + } + // We need to know the final offset (either via FIN or RESET_STREAM) for flow control accounting. + if s.finalOffset == protocol.MaxByteCount { + return false + } + // We're done with the stream if it was cancelled locally... + if s.cancelledLocally { + s.completed = true + return true + } + // ... or if the error (either io.EOF or the reset error) was read + if s.errorRead { + s.completed = true + return true } - if s.cancelReadErr != nil { - return false, 0, s.cancelReadErr + return false +} + +func (s *receiveStream) readImpl(p []byte) (bool, int, error) { + if s.currentFrameIsLast && s.currentFrame == nil { + s.errorRead = true + return false, 0, io.EOF } - if s.resetRemotelyErr != nil { - return false, 0, s.resetRemotelyErr + if s.cancelledRemotely || s.cancelledLocally { + s.errorRead = true + return false, 0, s.cancelErr } if s.closeForShutdownErr != nil { return false, 0, s.closeForShutdownErr } + var queuedNewControlFrame bool var bytesRead int var deadlineTimer *utils.Timer for bytesRead < len(p) { @@ -113,25 +146,23 @@ func (s *receiveStream) readImpl(p []byte) (bool /*stream completed */, int, err s.dequeueNextFrame() } if s.currentFrame == nil && bytesRead > 0 { - return false, bytesRead, s.closeForShutdownErr + return queuedNewControlFrame, bytesRead, s.closeForShutdownErr } for { // Stop waiting on errors if s.closeForShutdownErr != nil { - return false, bytesRead, s.closeForShutdownErr - } - if s.cancelReadErr != nil { - return false, bytesRead, s.cancelReadErr + return queuedNewControlFrame, bytesRead, s.closeForShutdownErr } - if s.resetRemotelyErr != nil { - return false, bytesRead, s.resetRemotelyErr + if s.cancelledRemotely || s.cancelledLocally { + s.errorRead = true + return queuedNewControlFrame, 0, s.cancelErr } deadline := s.deadline if !deadline.IsZero() { if !time.Now().Before(deadline) { - return false, bytesRead, errDeadline + return queuedNewControlFrame, bytesRead, errDeadline } if deadlineTimer == nil { deadlineTimer = utils.NewTimer() @@ -161,10 +192,10 @@ func (s *receiveStream) readImpl(p []byte) (bool /*stream completed */, int, err } if bytesRead > len(p) { - return false, bytesRead, fmt.Errorf("BUG: bytesRead (%d) > len(p) (%d) in stream.Read", bytesRead, len(p)) + return queuedNewControlFrame, bytesRead, fmt.Errorf("BUG: bytesRead (%d) > len(p) (%d) in stream.Read", bytesRead, len(p)) } if s.readPosInFrame > len(s.currentFrame) { - return false, bytesRead, fmt.Errorf("BUG: readPosInFrame (%d) > frame.DataLen (%d) in stream.Read", s.readPosInFrame, len(s.currentFrame)) + return queuedNewControlFrame, bytesRead, fmt.Errorf("BUG: readPosInFrame (%d) > frame.DataLen (%d) in stream.Read", s.readPosInFrame, len(s.currentFrame)) } m := copy(p[bytesRead:], s.currentFrame[s.readPosInFrame:]) @@ -173,20 +204,23 @@ func (s *receiveStream) readImpl(p []byte) (bool /*stream completed */, int, err // when a RESET_STREAM was received, the flow controller was already // informed about the final byteOffset for this stream - if s.resetRemotelyErr == nil { - s.flowController.AddBytesRead(protocol.ByteCount(m)) + if !s.cancelledRemotely { + if queueMaxStreamData := s.flowController.AddBytesRead(protocol.ByteCount(m)); queueMaxStreamData { + s.queuedMaxStreamData = true + queuedNewControlFrame = true + } } if s.readPosInFrame >= len(s.currentFrame) && s.currentFrameIsLast { - s.finRead = true s.currentFrame = nil if s.currentFrameDone != nil { s.currentFrameDone() } - return true, bytesRead, io.EOF + s.errorRead = true + return queuedNewControlFrame, bytesRead, io.EOF } } - return false, bytesRead, nil + return queuedNewControlFrame, bytesRead, nil } func (s *receiveStream) dequeueNextFrame() { @@ -202,32 +236,40 @@ func (s *receiveStream) dequeueNextFrame() { func (s *receiveStream) CancelRead(errorCode StreamErrorCode) { s.mutex.Lock() - completed := s.cancelReadImpl(errorCode) + queuedNewControlFrame := s.cancelReadImpl(errorCode) + completed := s.isNewlyCompleted() s.mutex.Unlock() + if queuedNewControlFrame { + s.sender.onHasStreamControlFrame(s.streamID, s) + } if completed { s.flowController.Abandon() s.sender.onStreamCompleted(s.streamID) } } -func (s *receiveStream) cancelReadImpl(errorCode qerr.StreamErrorCode) bool /* completed */ { - if s.finRead || s.cancelReadErr != nil || s.resetRemotelyErr != nil { +func (s *receiveStream) cancelReadImpl(errorCode qerr.StreamErrorCode) (queuedNewControlFrame bool) { + if s.cancelledLocally { // duplicate call to CancelRead return false } - s.cancelReadErr = &StreamError{StreamID: s.streamID, ErrorCode: errorCode, Remote: false} + if s.closeForShutdownErr != nil { + return false + } + s.cancelledLocally = true + if s.errorRead || s.cancelledRemotely { + return false + } + s.queuedStopSending = true + s.cancelErr = &StreamError{StreamID: s.streamID, ErrorCode: errorCode, Remote: false} s.signalRead() - s.sender.queueControlFrame(&wire.StopSendingFrame{ - StreamID: s.streamID, - ErrorCode: errorCode, - }) - // We're done with this stream if the final offset was already received. - return s.finalOffset != protocol.MaxByteCount + return true } func (s *receiveStream) handleStreamFrame(frame *wire.StreamFrame) error { s.mutex.Lock() - completed, err := s.handleStreamFrameImpl(frame) + err := s.handleStreamFrameImpl(frame) + completed := s.isNewlyCompleted() s.mutex.Unlock() if completed { @@ -237,59 +279,78 @@ func (s *receiveStream) handleStreamFrame(frame *wire.StreamFrame) error { return err } -func (s *receiveStream) handleStreamFrameImpl(frame *wire.StreamFrame) (bool /* completed */, error) { +func (s *receiveStream) handleStreamFrameImpl(frame *wire.StreamFrame) error { maxOffset := frame.Offset + frame.DataLen() if err := s.flowController.UpdateHighestReceived(maxOffset, frame.Fin); err != nil { - return false, err + return err } - var newlyRcvdFinalOffset bool if frame.Fin { - newlyRcvdFinalOffset = s.finalOffset == protocol.MaxByteCount s.finalOffset = maxOffset } - if s.cancelReadErr != nil { - return newlyRcvdFinalOffset, nil + if s.cancelledLocally { + return nil } if err := s.frameQueue.Push(frame.Data, frame.Offset, frame.PutBack); err != nil { - return false, err + return err } s.signalRead() - return false, nil + return nil } func (s *receiveStream) handleResetStreamFrame(frame *wire.ResetStreamFrame) error { s.mutex.Lock() - completed, err := s.handleResetStreamFrameImpl(frame) + err := s.handleResetStreamFrameImpl(frame) + completed := s.isNewlyCompleted() s.mutex.Unlock() if completed { - s.flowController.Abandon() s.sender.onStreamCompleted(s.streamID) } return err } -func (s *receiveStream) handleResetStreamFrameImpl(frame *wire.ResetStreamFrame) (bool /*completed */, error) { +func (s *receiveStream) handleResetStreamFrameImpl(frame *wire.ResetStreamFrame) error { if s.closeForShutdownErr != nil { - return false, nil + return nil } if err := s.flowController.UpdateHighestReceived(frame.FinalSize, true); err != nil { - return false, err + return err } - newlyRcvdFinalOffset := s.finalOffset == protocol.MaxByteCount s.finalOffset = frame.FinalSize // ignore duplicate RESET_STREAM frames for this stream (after checking their final offset) - if s.resetRemotelyErr != nil { - return false, nil + if s.cancelledRemotely { + return nil } - s.resetRemotelyErr = &StreamError{ - StreamID: s.streamID, - ErrorCode: frame.ErrorCode, - Remote: true, + s.flowController.Abandon() + // don't save the error if the RESET_STREAM frames was received after CancelRead was called + if s.cancelledLocally { + return nil } + s.cancelledRemotely = true + s.cancelErr = &StreamError{StreamID: s.streamID, ErrorCode: frame.ErrorCode, Remote: true} s.signalRead() - return newlyRcvdFinalOffset, nil + return nil +} + +func (s *receiveStream) getControlFrame() (_ ackhandler.Frame, ok, hasMore bool) { + s.mutex.Lock() + defer s.mutex.Unlock() + + if !s.queuedStopSending && !s.queuedMaxStreamData { + return ackhandler.Frame{}, false, false + } + if s.queuedStopSending { + s.queuedStopSending = false + return ackhandler.Frame{ + Frame: &wire.StopSendingFrame{StreamID: s.streamID, ErrorCode: s.cancelErr.ErrorCode}, + }, true, s.queuedMaxStreamData + } + + s.queuedMaxStreamData = false + return ackhandler.Frame{ + Frame: &wire.MaxStreamDataFrame{StreamID: s.streamID, MaximumStreamData: s.flowController.GetWindowUpdate()}, + }, true, false } func (s *receiveStream) SetReadDeadline(t time.Time) error { @@ -310,10 +371,6 @@ func (s *receiveStream) closeForShutdown(err error) { s.signalRead() } -func (s *receiveStream) getWindowUpdate() protocol.ByteCount { - return s.flowController.GetWindowUpdate() -} - // signalRead performs a non-blocking send on the readChan func (s *receiveStream) signalRead() { select { diff --git a/vendor/github.com/quic-go/quic-go/send_stream.go b/vendor/github.com/quic-go/quic-go/send_stream.go index e1ce3e67..699c40ef 100644 --- a/vendor/github.com/quic-go/quic-go/send_stream.go +++ b/vendor/github.com/quic-go/quic-go/send_stream.go @@ -26,7 +26,7 @@ type sendStreamI interface { type sendStream struct { mutex sync.Mutex - numOutstandingFrames int64 + numOutstandingFrames int64 // outstanding STREAM and RESET_STREAM frames retransmissionQueue []*wire.StreamFrame ctx context.Context @@ -37,12 +37,19 @@ type sendStream struct { writeOffset protocol.ByteCount - cancelWriteErr error + cancelWriteErr *StreamError closeForShutdownErr error + queuedResetStreamFrame bool + queuedBlockedFrame bool + finishedWriting bool // set once Close() is called finSent bool // set when a STREAM_FRAME with FIN bit has been sent - completed bool // set when this stream has been reported to the streamSender as completed + // Set when the application knows about the cancellation. + // This can happen because the application called CancelWrite, + // or because Write returned the error (for remote cancellations). + cancellationFlagged bool + completed bool // set when this stream has been reported to the streamSender as completed dataForWriting []byte // during a Write() call, this slice is the part of p that still needs to be sent out nextFrame *wire.StreamFrame @@ -55,11 +62,13 @@ type sendStream struct { } var ( - _ SendStream = &sendStream{} - _ sendStreamI = &sendStream{} + _ SendStream = &sendStream{} + _ sendStreamI = &sendStream{} + _ streamControlFrameGetter = &sendStream{} ) func newSendStream( + ctx context.Context, streamID protocol.StreamID, sender streamSender, flowController flowcontrol.StreamFlowController, @@ -71,7 +80,7 @@ func newSendStream( writeChan: make(chan struct{}, 1), writeOnce: make(chan struct{}, 1), // cap: 1, to protect against concurrent use of Write } - s.ctx, s.ctxCancel = context.WithCancelCause(context.Background()) + s.ctx, s.ctxCancel = context.WithCancelCause(ctx) return s } @@ -86,23 +95,32 @@ func (s *sendStream) Write(p []byte) (int, error) { s.writeOnce <- struct{}{} defer func() { <-s.writeOnce }() + isNewlyCompleted, n, err := s.write(p) + if isNewlyCompleted { + s.sender.onStreamCompleted(s.streamID) + } + return n, err +} + +func (s *sendStream) write(p []byte) (bool /* is newly completed */, int, error) { s.mutex.Lock() defer s.mutex.Unlock() if s.finishedWriting { - return 0, fmt.Errorf("write on closed stream %d", s.streamID) + return false, 0, fmt.Errorf("write on closed stream %d", s.streamID) } if s.cancelWriteErr != nil { - return 0, s.cancelWriteErr + s.cancellationFlagged = true + return s.isNewlyCompleted(), 0, s.cancelWriteErr } if s.closeForShutdownErr != nil { - return 0, s.closeForShutdownErr + return false, 0, s.closeForShutdownErr } if !s.deadline.IsZero() && !time.Now().Before(s.deadline) { - return 0, errDeadline + return false, 0, errDeadline } if len(p) == 0 { - return 0, nil + return false, 0, nil } s.dataForWriting = p @@ -143,7 +161,7 @@ func (s *sendStream) Write(p []byte) (int, error) { if !deadline.IsZero() { if !time.Now().Before(deadline) { s.dataForWriting = nil - return bytesWritten, errDeadline + return false, bytesWritten, errDeadline } if deadlineTimer == nil { deadlineTimer = utils.NewTimer() @@ -158,7 +176,7 @@ func (s *sendStream) Write(p []byte) (int, error) { s.mutex.Unlock() if !notifiedSender { - s.sender.onHasStreamData(s.streamID) // must be called without holding the mutex + s.sender.onHasStreamData(s.streamID, s) // must be called without holding the mutex notifiedSender = true } if copied { @@ -178,14 +196,15 @@ func (s *sendStream) Write(p []byte) (int, error) { } if bytesWritten == len(p) { - return bytesWritten, nil + return false, bytesWritten, nil } if s.closeForShutdownErr != nil { - return bytesWritten, s.closeForShutdownErr + return false, bytesWritten, s.closeForShutdownErr } else if s.cancelWriteErr != nil { - return bytesWritten, s.cancelWriteErr + s.cancellationFlagged = true + return s.isNewlyCompleted(), bytesWritten, s.cancelWriteErr } - return bytesWritten, nil + return false, bytesWritten, nil } func (s *sendStream) canBufferStreamFrame() bool { @@ -200,12 +219,15 @@ func (s *sendStream) canBufferStreamFrame() bool { // maxBytes is the maximum length this frame (including frame header) will have. func (s *sendStream) popStreamFrame(maxBytes protocol.ByteCount, v protocol.Version) (af ackhandler.StreamFrame, ok, hasMore bool) { s.mutex.Lock() - f, hasMoreData := s.popNewOrRetransmittedStreamFrame(maxBytes, v) + f, hasMoreData, queuedControlFrame := s.popNewOrRetransmittedStreamFrame(maxBytes, v) if f != nil { s.numOutstandingFrames++ } s.mutex.Unlock() + if queuedControlFrame { + s.sender.onHasStreamControlFrame(s.streamID, s) + } if f == nil { return ackhandler.StreamFrame{}, false, hasMoreData } @@ -215,20 +237,20 @@ func (s *sendStream) popStreamFrame(maxBytes protocol.ByteCount, v protocol.Vers }, true, hasMoreData } -func (s *sendStream) popNewOrRetransmittedStreamFrame(maxBytes protocol.ByteCount, v protocol.Version) (*wire.StreamFrame, bool /* has more data to send */) { +func (s *sendStream) popNewOrRetransmittedStreamFrame(maxBytes protocol.ByteCount, v protocol.Version) (_ *wire.StreamFrame, hasMoreData, queuedControlFrame bool) { if s.cancelWriteErr != nil || s.closeForShutdownErr != nil { - return nil, false + return nil, false, false } if len(s.retransmissionQueue) > 0 { f, hasMoreRetransmissions := s.maybeGetRetransmission(maxBytes, v) if f != nil || hasMoreRetransmissions { if f == nil { - return nil, true + return nil, true, false } // We always claim that we have more data to send. // This might be incorrect, in which case there'll be a spurious call to popStreamFrame in the future. - return f, true + return f, true, false } } @@ -240,21 +262,18 @@ func (s *sendStream) popNewOrRetransmittedStreamFrame(maxBytes protocol.ByteCoun Offset: s.writeOffset, DataLenPresent: true, Fin: true, - }, false + }, false, false } - return nil, false + return nil, false, false } sendWindow := s.flowController.SendWindowSize() if sendWindow == 0 { - if isBlocked, offset := s.flowController.IsNewlyBlocked(); isBlocked { - s.sender.queueControlFrame(&wire.StreamDataBlockedFrame{ - StreamID: s.streamID, - MaximumStreamData: offset, - }) - return nil, false + if s.flowController.IsNewlyBlocked() { + s.queuedBlockedFrame = true + return nil, false, true } - return nil, true + return nil, true, false } f, hasMoreData := s.popNewStreamFrame(maxBytes, sendWindow, v) @@ -266,7 +285,7 @@ func (s *sendStream) popNewOrRetransmittedStreamFrame(maxBytes protocol.ByteCoun if f.Fin { s.finSent = true } - return f, hasMoreData + return f, hasMoreData, false } func (s *sendStream) popNewStreamFrame(maxBytes, sendWindow protocol.ByteCount, v protocol.Version) (*wire.StreamFrame, bool) { @@ -348,8 +367,24 @@ func (s *sendStream) getDataForWriting(f *wire.StreamFrame, maxBytes protocol.By } func (s *sendStream) isNewlyCompleted() bool { - completed := (s.finSent || s.cancelWriteErr != nil) && s.numOutstandingFrames == 0 && len(s.retransmissionQueue) == 0 - if completed && !s.completed { + if s.completed { + return false + } + // We need to keep the stream around until all frames have been sent and acknowledged. + if s.numOutstandingFrames > 0 || len(s.retransmissionQueue) > 0 || s.queuedResetStreamFrame { + return false + } + // The stream is completed if we sent the FIN. + if s.finSent { + s.completed = true + return true + } + // The stream is also completed if: + // 1. the application called CancelWrite, or + // 2. we received a STOP_SENDING, and + // * the application consumed the error via Write, or + // * the application called Close + if s.cancelWriteErr != nil && (s.cancellationFlagged || s.finishedWriting) { s.completed = true return true } @@ -362,15 +397,23 @@ func (s *sendStream) Close() error { s.mutex.Unlock() return nil } - if s.cancelWriteErr != nil { - s.mutex.Unlock() - return fmt.Errorf("close called for canceled stream %d", s.streamID) - } - s.ctxCancel(nil) s.finishedWriting = true + cancelWriteErr := s.cancelWriteErr + if cancelWriteErr != nil { + s.cancellationFlagged = true + } + completed := s.isNewlyCompleted() s.mutex.Unlock() - s.sender.onHasStreamData(s.streamID) // need to send the FIN, must be called without holding the mutex + if completed { + s.sender.onStreamCompleted(s.streamID) + } + if cancelWriteErr != nil { + return fmt.Errorf("close called for canceled stream %d", s.streamID) + } + s.sender.onHasStreamData(s.streamID, s) // need to send the FIN, must be called without holding the mutex + + s.ctxCancel(nil) return nil } @@ -378,9 +421,26 @@ func (s *sendStream) CancelWrite(errorCode StreamErrorCode) { s.cancelWriteImpl(errorCode, false) } -// must be called after locking the mutex func (s *sendStream) cancelWriteImpl(errorCode qerr.StreamErrorCode, remote bool) { s.mutex.Lock() + if s.closeForShutdownErr != nil { + s.mutex.Unlock() + return + } + if !remote { + s.cancellationFlagged = true + if s.cancelWriteErr != nil { + completed := s.isNewlyCompleted() + s.mutex.Unlock() + // The user has called CancelWrite. If the previous cancellation was + // because of a STOP_SENDING, we don't need to flag the error to the + // user anymore. + if completed { + s.sender.onStreamCompleted(s.streamID) + } + return + } + } if s.cancelWriteErr != nil { s.mutex.Unlock() return @@ -389,18 +449,11 @@ func (s *sendStream) cancelWriteImpl(errorCode qerr.StreamErrorCode, remote bool s.ctxCancel(s.cancelWriteErr) s.numOutstandingFrames = 0 s.retransmissionQueue = nil - newlyCompleted := s.isNewlyCompleted() + s.queuedResetStreamFrame = true s.mutex.Unlock() s.signalWrite() - s.sender.queueControlFrame(&wire.ResetStreamFrame{ - StreamID: s.streamID, - FinalSize: s.writeOffset, - ErrorCode: errorCode, - }) - if newlyCompleted { - s.sender.onStreamCompleted(s.streamID) - } + s.sender.onHasStreamControlFrame(s.streamID, s) } func (s *sendStream) updateSendWindow(limit protocol.ByteCount) { @@ -412,7 +465,7 @@ func (s *sendStream) updateSendWindow(limit protocol.ByteCount) { hasStreamData := s.dataForWriting != nil || s.nextFrame != nil s.mutex.Unlock() if hasStreamData { - s.sender.onHasStreamData(s.streamID) + s.sender.onHasStreamData(s.streamID, s) } } @@ -420,6 +473,32 @@ func (s *sendStream) handleStopSendingFrame(frame *wire.StopSendingFrame) { s.cancelWriteImpl(frame.ErrorCode, true) } +func (s *sendStream) getControlFrame() (_ ackhandler.Frame, ok, hasMore bool) { + s.mutex.Lock() + defer s.mutex.Unlock() + + if !s.queuedBlockedFrame && !s.queuedResetStreamFrame { + return ackhandler.Frame{}, false, false + } + if s.queuedBlockedFrame { + s.queuedBlockedFrame = false + return ackhandler.Frame{ + Frame: &wire.StreamDataBlockedFrame{StreamID: s.streamID, MaximumStreamData: s.writeOffset}, + }, true, s.queuedResetStreamFrame + } + // RESET_STREAM frame + s.queuedResetStreamFrame = false + s.numOutstandingFrames++ + return ackhandler.Frame{ + Frame: &wire.ResetStreamFrame{ + StreamID: s.streamID, + FinalSize: s.writeOffset, + ErrorCode: s.cancelWriteErr.ErrorCode, + }, + Handler: (*sendStreamResetStreamHandler)(s), + }, true, false +} + func (s *sendStream) Context() context.Context { return s.ctx } @@ -437,7 +516,6 @@ func (s *sendStream) SetWriteDeadline(t time.Time) error { // The peer will NOT be informed about this: the stream is closed without sending a FIN or RST. func (s *sendStream) closeForShutdown(err error) { s.mutex.Lock() - s.ctxCancel(err) s.closeForShutdownErr = err s.mutex.Unlock() s.signalWrite() @@ -467,10 +545,10 @@ func (s *sendStreamAckHandler) OnAcked(f wire.Frame) { if s.numOutstandingFrames < 0 { panic("numOutStandingFrames negative") } - newlyCompleted := (*sendStream)(s).isNewlyCompleted() + completed := (*sendStream)(s).isNewlyCompleted() s.mutex.Unlock() - if newlyCompleted { + if completed { s.sender.onStreamCompleted(s.streamID) } } @@ -490,5 +568,30 @@ func (s *sendStreamAckHandler) OnLost(f wire.Frame) { } s.mutex.Unlock() - s.sender.onHasStreamData(s.streamID) + s.sender.onHasStreamData(s.streamID, (*sendStream)(s)) +} + +type sendStreamResetStreamHandler sendStream + +var _ ackhandler.FrameHandler = &sendStreamResetStreamHandler{} + +func (s *sendStreamResetStreamHandler) OnAcked(wire.Frame) { + s.mutex.Lock() + s.numOutstandingFrames-- + if s.numOutstandingFrames < 0 { + panic("numOutStandingFrames negative") + } + completed := (*sendStream)(s).isNewlyCompleted() + s.mutex.Unlock() + + if completed { + s.sender.onStreamCompleted(s.streamID) + } +} + +func (s *sendStreamResetStreamHandler) OnLost(wire.Frame) { + s.mutex.Lock() + s.queuedResetStreamFrame = true + s.mutex.Unlock() + s.sender.onHasStreamControlFrame(s.streamID, (*sendStream)(s)) } diff --git a/vendor/github.com/quic-go/quic-go/server.go b/vendor/github.com/quic-go/quic-go/server.go index afbd18fd..0cf45aca 100644 --- a/vendor/github.com/quic-go/quic-go/server.go +++ b/vendor/github.com/quic-go/quic-go/server.go @@ -18,7 +18,12 @@ import ( ) // ErrServerClosed is returned by the Listener or EarlyListener's Accept method after a call to Close. -var ErrServerClosed = errors.New("quic: server closed") +var ErrServerClosed = errServerClosed{} + +type errServerClosed struct{} + +func (errServerClosed) Error() string { return "quic: server closed" } +func (errServerClosed) Unwrap() error { return net.ErrClosed } // packetHandler handles packets type packetHandler interface { @@ -76,8 +81,12 @@ type baseServer struct { nextZeroRTTCleanup time.Time zeroRTTQueues map[protocol.ConnectionID]*zeroRTTQueue // only initialized if acceptEarlyConns == true + connContext func(context.Context) context.Context + // set as a member, so they can be set in the tests newConn func( + context.Context, + context.CancelCauseFunc, sendConn, connRunner, protocol.ConnectionID, /* original dest connection ID */ @@ -92,7 +101,6 @@ type baseServer struct { *handshake.TokenGenerator, bool, /* client address validated by an address validation token */ *logging.ConnectionTracer, - uint64, utils.Logger, protocol.Version, ) quicConn @@ -231,6 +239,7 @@ func newServer( conn rawConn, connHandler packetHandlerManager, connIDGenerator ConnectionIDGenerator, + connContext func(context.Context) context.Context, tlsConf *tls.Config, config *Config, tracer *logging.Tracer, @@ -243,6 +252,7 @@ func newServer( ) *baseServer { s := &baseServer{ conn: conn, + connContext: connContext, tlsConf: tlsConf, config: config, tokenGenerator: handshake.NewTokenGenerator(tokenGeneratorKey), @@ -631,7 +641,26 @@ func (s *baseServer) handleInitialImpl(p receivedPacket, hdr *wire.Header) error } var conn quicConn - tracingID := nextConnTracingID() + var cancel context.CancelCauseFunc + ctx, cancel1 := context.WithCancelCause(context.Background()) + if s.connContext != nil { + ctx = s.connContext(ctx) + if ctx == nil { + panic("quic: ConnContext returned nil") + } + // There's no guarantee that the application returns a context + // that's derived from the context we passed into ConnContext. + // We need to make sure that both contexts are cancelled. + var cancel2 context.CancelCauseFunc + ctx, cancel2 = context.WithCancelCause(ctx) + cancel = func(cause error) { + cancel1(cause) + cancel2(cause) + } + } else { + cancel = cancel1 + } + ctx = context.WithValue(ctx, ConnectionTracingKey, nextConnTracingID()) var tracer *logging.ConnectionTracer if config.Tracer != nil { // Use the same connection ID that is passed to the client's GetLogWriter callback. @@ -639,7 +668,7 @@ func (s *baseServer) handleInitialImpl(p receivedPacket, hdr *wire.Header) error if origDestConnID.Len() > 0 { connID = origDestConnID } - tracer = config.Tracer(context.WithValue(context.Background(), ConnectionTracingKey, tracingID), protocol.PerspectiveServer, connID) + tracer = config.Tracer(ctx, protocol.PerspectiveServer, connID) } connID, err := s.connIDGenerator.GenerateConnectionID() if err != nil { @@ -647,6 +676,8 @@ func (s *baseServer) handleInitialImpl(p receivedPacket, hdr *wire.Header) error } s.logger.Debugf("Changing connection ID to %s.", connID) conn = s.newConn( + ctx, + cancel, newSendConn(s.conn, p.remoteAddr, p.info, s.logger), s.connHandler, origDestConnID, @@ -661,7 +692,6 @@ func (s *baseServer) handleInitialImpl(p receivedPacket, hdr *wire.Header) error s.tokenGenerator, clientAddrVerified, tracer, - tracingID, s.logger, hdr.Version, ) @@ -778,7 +808,7 @@ func (s *baseServer) maybeSendInvalidToken(p rejectedPacket) { hdr := p.hdr sealer, opener := handshake.NewInitialAEAD(hdr.DestConnectionID, protocol.PerspectiveServer, hdr.Version) data := p.data[:hdr.ParsedLen()+hdr.Length] - extHdr, err := unpackLongHeader(opener, hdr, data, hdr.Version) + extHdr, err := unpackLongHeader(opener, hdr, data) // Only send INVALID_TOKEN if we can unprotect the packet. // This makes sure that we won't send it for packets that were corrupted. if err != nil { diff --git a/vendor/github.com/quic-go/quic-go/stream.go b/vendor/github.com/quic-go/quic-go/stream.go index ce4374d6..1ed26323 100644 --- a/vendor/github.com/quic-go/quic-go/stream.go +++ b/vendor/github.com/quic-go/quic-go/stream.go @@ -1,6 +1,7 @@ package quic import ( + "context" "net" "os" "sync" @@ -23,8 +24,8 @@ var errDeadline net.Error = &deadlineError{} // The streamSender is notified by the stream about various events. type streamSender interface { - queueControlFrame(wire.Frame) - onHasStreamData(protocol.StreamID) + onHasStreamData(protocol.StreamID, sendStreamI) + onHasStreamControlFrame(protocol.StreamID, streamControlFrameGetter) // must be called without holding the mutex that is acquired by closeForShutdown onStreamCompleted(protocol.StreamID) } @@ -33,19 +34,16 @@ type streamSender interface { // This is necessary in order to keep track when both halves have been completed. type uniStreamSender struct { streamSender - onStreamCompletedImpl func() + onStreamCompletedImpl func() + onHasStreamControlFrameImpl func(protocol.StreamID, streamControlFrameGetter) } -func (s *uniStreamSender) queueControlFrame(f wire.Frame) { - s.streamSender.queueControlFrame(f) +func (s *uniStreamSender) onHasStreamData(id protocol.StreamID, str sendStreamI) { + s.streamSender.onHasStreamData(id, str) } - -func (s *uniStreamSender) onHasStreamData(id protocol.StreamID) { - s.streamSender.onHasStreamData(id) -} - -func (s *uniStreamSender) onStreamCompleted(protocol.StreamID) { - s.onStreamCompletedImpl() +func (s *uniStreamSender) onStreamCompleted(protocol.StreamID) { s.onStreamCompletedImpl() } +func (s *uniStreamSender) onHasStreamControlFrame(id protocol.StreamID, str streamControlFrameGetter) { + s.onHasStreamControlFrameImpl(id, str) } var _ streamSender = &uniStreamSender{} @@ -56,7 +54,6 @@ type streamI interface { // for receiving handleStreamFrame(*wire.StreamFrame) error handleResetStreamFrame(*wire.ResetStreamFrame) error - getWindowUpdate() protocol.ByteCount // for sending hasData() bool handleStopSendingFrame(*wire.StopSendingFrame) @@ -82,10 +79,15 @@ type stream struct { sendStreamCompleted bool } -var _ Stream = &stream{} +var ( + _ Stream = &stream{} + _ streamControlFrameGetter = &receiveStream{} +) // newStream creates a new Stream -func newStream(streamID protocol.StreamID, +func newStream( + ctx context.Context, + streamID protocol.StreamID, sender streamSender, flowController flowcontrol.StreamFlowController, ) *stream { @@ -98,8 +100,11 @@ func newStream(streamID protocol.StreamID, s.checkIfCompleted() s.completedMutex.Unlock() }, + onHasStreamControlFrameImpl: func(id protocol.StreamID, str streamControlFrameGetter) { + sender.onHasStreamControlFrame(streamID, s) + }, } - s.sendStream = *newSendStream(streamID, senderForSendStream, flowController) + s.sendStream = *newSendStream(ctx, streamID, senderForSendStream, flowController) senderForReceiveStream := &uniStreamSender{ streamSender: sender, onStreamCompletedImpl: func() { @@ -108,6 +113,9 @@ func newStream(streamID protocol.StreamID, s.checkIfCompleted() s.completedMutex.Unlock() }, + onHasStreamControlFrameImpl: func(id protocol.StreamID, str streamControlFrameGetter) { + sender.onHasStreamControlFrame(streamID, s) + }, } s.receiveStream = *newReceiveStream(streamID, senderForReceiveStream, flowController) return s @@ -123,6 +131,14 @@ func (s *stream) Close() error { return s.sendStream.Close() } +func (s *stream) getControlFrame() (_ ackhandler.Frame, ok, hasMore bool) { + f, ok, _ := s.sendStream.getControlFrame() + if ok { + return f, true, true + } + return s.receiveStream.getControlFrame() +} + func (s *stream) SetDeadline(t time.Time) error { _ = s.SetReadDeadline(t) // SetReadDeadline never errors _ = s.SetWriteDeadline(t) // SetWriteDeadline never errors diff --git a/vendor/github.com/quic-go/quic-go/streams_map.go b/vendor/github.com/quic-go/quic-go/streams_map.go index b1a80eb3..0ce91287 100644 --- a/vendor/github.com/quic-go/quic-go/streams_map.go +++ b/vendor/github.com/quic-go/quic-go/streams_map.go @@ -38,19 +38,31 @@ type streamOpenErr struct{ error } var _ net.Error = &streamOpenErr{} -func (e streamOpenErr) Temporary() bool { return e.error == errTooManyOpenStreams } -func (streamOpenErr) Timeout() bool { return false } +func (streamOpenErr) Timeout() bool { return false } +func (e streamOpenErr) Unwrap() error { return e.error } -// errTooManyOpenStreams is used internally by the outgoing streams maps. -var errTooManyOpenStreams = errors.New("too many open streams") +func (e streamOpenErr) Temporary() bool { + // In older versions of quic-go, the stream limit error was documented to be a net.Error.Temporary. + // This function was since deprecated, but we keep the existing behavior. + return errors.Is(e, &StreamLimitReachedError{}) +} + +// StreamLimitReachedError is returned from Connection.OpenStream and Connection.OpenUniStream +// when it is not possible to open a new stream because the number of opens streams reached +// the peer's stream limit. +type StreamLimitReachedError struct{} + +func (e StreamLimitReachedError) Error() string { return "too many open streams" } type streamsMap struct { + ctx context.Context // not used for cancellations, but carries the values associated with the connection perspective protocol.Perspective maxIncomingBidiStreams uint64 maxIncomingUniStreams uint64 sender streamSender + queueControlFrame func(wire.Frame) newFlowController func(protocol.StreamID) flowcontrol.StreamFlowController mutex sync.Mutex @@ -64,14 +76,18 @@ type streamsMap struct { var _ streamManager = &streamsMap{} func newStreamsMap( + ctx context.Context, sender streamSender, + queueControlFrame func(wire.Frame), newFlowController func(protocol.StreamID) flowcontrol.StreamFlowController, maxIncomingBidiStreams uint64, maxIncomingUniStreams uint64, perspective protocol.Perspective, -) streamManager { +) *streamsMap { m := &streamsMap{ + ctx: ctx, perspective: perspective, + queueControlFrame: queueControlFrame, newFlowController: newFlowController, maxIncomingBidiStreams: maxIncomingBidiStreams, maxIncomingUniStreams: maxIncomingUniStreams, @@ -86,26 +102,26 @@ func (m *streamsMap) initMaps() { protocol.StreamTypeBidi, func(num protocol.StreamNum) streamI { id := num.StreamID(protocol.StreamTypeBidi, m.perspective) - return newStream(id, m.sender, m.newFlowController(id)) + return newStream(m.ctx, id, m.sender, m.newFlowController(id)) }, - m.sender.queueControlFrame, + m.queueControlFrame, ) m.incomingBidiStreams = newIncomingStreamsMap( protocol.StreamTypeBidi, func(num protocol.StreamNum) streamI { id := num.StreamID(protocol.StreamTypeBidi, m.perspective.Opposite()) - return newStream(id, m.sender, m.newFlowController(id)) + return newStream(m.ctx, id, m.sender, m.newFlowController(id)) }, m.maxIncomingBidiStreams, - m.sender.queueControlFrame, + m.queueControlFrame, ) m.outgoingUniStreams = newOutgoingStreamsMap( protocol.StreamTypeUni, func(num protocol.StreamNum) sendStreamI { id := num.StreamID(protocol.StreamTypeUni, m.perspective) - return newSendStream(id, m.sender, m.newFlowController(id)) + return newSendStream(m.ctx, id, m.sender, m.newFlowController(id)) }, - m.sender.queueControlFrame, + m.queueControlFrame, ) m.incomingUniStreams = newIncomingStreamsMap( protocol.StreamTypeUni, @@ -114,7 +130,7 @@ func (m *streamsMap) initMaps() { return newReceiveStream(id, m.sender, m.newFlowController(id)) }, m.maxIncomingUniStreams, - m.sender.queueControlFrame, + m.queueControlFrame, ) } diff --git a/vendor/github.com/quic-go/quic-go/streams_map_outgoing.go b/vendor/github.com/quic-go/quic-go/streams_map_outgoing.go index fd45f4e7..a8d04b04 100644 --- a/vendor/github.com/quic-go/quic-go/streams_map_outgoing.go +++ b/vendor/github.com/quic-go/quic-go/streams_map_outgoing.go @@ -60,7 +60,7 @@ func (m *outgoingStreamsMap[T]) OpenStream() (T, error) { // if there are OpenStreamSync calls waiting, return an error here if len(m.openQueue) > 0 || m.nextStream > m.maxStream { m.maybeSendBlockedFrame() - return *new(T), streamOpenErr{errTooManyOpenStreams} + return *new(T), streamOpenErr{&StreamLimitReachedError{}} } return m.openStream(), nil } diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_helper_darwin.go b/vendor/github.com/quic-go/quic-go/sys_conn_helper_darwin.go index d761072f..545502dd 100644 --- a/vendor/github.com/quic-go/quic-go/sys_conn_helper_darwin.go +++ b/vendor/github.com/quic-go/quic-go/sys_conn_helper_darwin.go @@ -33,4 +33,6 @@ func parseIPv4PktInfo(body []byte) (ip netip.Addr, ifIndex uint32, ok bool) { return netip.AddrFrom4(*(*[4]byte)(body[8:12])), binary.LittleEndian.Uint32(body), true } -func isGSOSupported(syscall.RawConn) bool { return false } +func isGSOEnabled(syscall.RawConn) bool { return false } + +func isECNEnabled() bool { return !isECNDisabledUsingEnv() } diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_helper_freebsd.go b/vendor/github.com/quic-go/quic-go/sys_conn_helper_freebsd.go index a53ca2ea..521f80d4 100644 --- a/vendor/github.com/quic-go/quic-go/sys_conn_helper_freebsd.go +++ b/vendor/github.com/quic-go/quic-go/sys_conn_helper_freebsd.go @@ -28,4 +28,6 @@ func parseIPv4PktInfo(body []byte) (ip netip.Addr, _ uint32, ok bool) { return netip.AddrFrom4(*(*[4]byte)(body)), 0, true } -func isGSOSupported(syscall.RawConn) bool { return false } +func isGSOEnabled(syscall.RawConn) bool { return false } + +func isECNEnabled() bool { return !isECNDisabledUsingEnv() } diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_helper_linux.go b/vendor/github.com/quic-go/quic-go/sys_conn_helper_linux.go index 5fbf34ad..eec12719 100644 --- a/vendor/github.com/quic-go/quic-go/sys_conn_helper_linux.go +++ b/vendor/github.com/quic-go/quic-go/sys_conn_helper_linux.go @@ -23,6 +23,12 @@ const ecnIPv4DataLen = 1 const batchSize = 8 // needs to smaller than MaxUint8 (otherwise the type of oobConn.readPos has to be changed) +var kernelVersionMajor int + +func init() { + kernelVersionMajor, _ = kernelVersion() +} + func forceSetReceiveBuffer(c syscall.RawConn, bytes int) error { var serr error if err := c.Control(func(fd uintptr) { @@ -55,9 +61,12 @@ func parseIPv4PktInfo(body []byte) (ip netip.Addr, ifIndex uint32, ok bool) { return netip.AddrFrom4(*(*[4]byte)(body[8:12])), binary.LittleEndian.Uint32(body), true } -// isGSOSupported tests if the kernel supports GSO. +// isGSOEnabled tests if the kernel supports GSO. // Sending with GSO might still fail later on, if the interface doesn't support it (see isGSOError). -func isGSOSupported(conn syscall.RawConn) bool { +func isGSOEnabled(conn syscall.RawConn) bool { + if kernelVersionMajor < 5 { + return false + } disabled, err := strconv.ParseBool(os.Getenv("QUIC_GO_DISABLE_GSO")) if err == nil && disabled { return false @@ -108,3 +117,40 @@ func isPermissionError(err error) bool { } return false } + +func isECNEnabled() bool { + return kernelVersionMajor >= 5 && !isECNDisabledUsingEnv() +} + +// kernelVersion returns major and minor kernel version numbers, parsed from +// the syscall.Uname's Release field, or 0, 0 if the version can't be obtained +// or parsed. +// +// copied from the standard library's internal/syscall/unix/kernel_version_linux.go +func kernelVersion() (major, minor int) { + var uname syscall.Utsname + if err := syscall.Uname(&uname); err != nil { + return + } + + var ( + values [2]int + value, vi int + ) + for _, c := range uname.Release { + if '0' <= c && c <= '9' { + value = (value * 10) + int(c-'0') + } else { + // Note that we're assuming N.N.N here. + // If we see anything else, we are likely to mis-parse it. + values[vi] = value + vi++ + if vi >= len(values) { + break + } + value = 0 + } + } + + return values[0], values[1] +} diff --git a/vendor/github.com/quic-go/quic-go/sys_conn_oob.go b/vendor/github.com/quic-go/quic-go/sys_conn_oob.go index 64d581c0..a6795ca2 100644 --- a/vendor/github.com/quic-go/quic-go/sys_conn_oob.go +++ b/vendor/github.com/quic-go/quic-go/sys_conn_oob.go @@ -59,7 +59,7 @@ func inspectWriteBuffer(c syscall.RawConn) (int, error) { return size, serr } -func isECNDisabled() bool { +func isECNDisabledUsingEnv() bool { disabled, err := strconv.ParseBool(os.Getenv("QUIC_GO_DISABLE_ECN")) return err == nil && disabled } @@ -147,8 +147,8 @@ func newConn(c OOBCapablePacketConn, supportsDF bool) (*oobConn, error) { readPos: batchSize, cap: connCapabilities{ DF: supportsDF, - GSO: isGSOSupported(rawConn), - ECN: !isECNDisabled(), + GSO: isGSOEnabled(rawConn), + ECN: isECNEnabled(), }, } for i := 0; i < batchSize; i++ { @@ -247,7 +247,7 @@ func (c *oobConn) WritePacket(b []byte, addr net.Addr, packetInfoOOB []byte, gso } if ecn != protocol.ECNUnsupported { if !c.capabilities().ECN { - panic("tried to send a ECN-marked packet although ECN is disabled") + panic("tried to send an ECN-marked packet although ECN is disabled") } if remoteUDPAddr, ok := addr.(*net.UDPAddr); ok { if remoteUDPAddr.IP.To4() != nil { diff --git a/vendor/github.com/quic-go/quic-go/transport.go b/vendor/github.com/quic-go/quic-go/transport.go index ea219c11..059f30f5 100644 --- a/vendor/github.com/quic-go/quic-go/transport.go +++ b/vendor/github.com/quic-go/quic-go/transport.go @@ -89,6 +89,17 @@ type Transport struct { // implementation of this callback (negating its return value). VerifySourceAddress func(net.Addr) bool + // ConnContext is called when the server accepts a new connection. + // The context is closed when the connection is closed, or when the handshake fails for any reason. + // The context returned from the callback is used to derive every other context used during the + // lifetime of the connection: + // * the context passed to crypto/tls (and used on the tls.ClientHelloInfo) + // * the context used in Config.Tracer + // * the context returned from Connection.Context + // * the context returned from SendStream.Context + // It is not used for dialed connections. + ConnContext func(context.Context) context.Context + // A Tracer traces events that don't belong to a single QUIC connection. // Tracer.Close is called when the transport is closed. Tracer *logging.Tracer @@ -168,6 +179,7 @@ func (t *Transport) createServer(tlsConf *tls.Config, conf *Config, allow0RTT bo t.conn, t.handlerMap, t.connIDGenerator, + t.ConnContext, tlsConf, conf, t.Tracer, diff --git a/vendor/github.com/quic-go/quic-go/window_update_queue.go b/vendor/github.com/quic-go/quic-go/window_update_queue.go deleted file mode 100644 index 9ed12143..00000000 --- a/vendor/github.com/quic-go/quic-go/window_update_queue.go +++ /dev/null @@ -1,71 +0,0 @@ -package quic - -import ( - "sync" - - "github.com/quic-go/quic-go/internal/flowcontrol" - "github.com/quic-go/quic-go/internal/protocol" - "github.com/quic-go/quic-go/internal/wire" -) - -type windowUpdateQueue struct { - mutex sync.Mutex - - queue map[protocol.StreamID]struct{} // used as a set - queuedConn bool // connection-level window update - - streamGetter streamGetter - connFlowController flowcontrol.ConnectionFlowController - callback func(wire.Frame) -} - -func newWindowUpdateQueue( - streamGetter streamGetter, - connFC flowcontrol.ConnectionFlowController, - cb func(wire.Frame), -) *windowUpdateQueue { - return &windowUpdateQueue{ - queue: make(map[protocol.StreamID]struct{}), - streamGetter: streamGetter, - connFlowController: connFC, - callback: cb, - } -} - -func (q *windowUpdateQueue) AddStream(id protocol.StreamID) { - q.mutex.Lock() - q.queue[id] = struct{}{} - q.mutex.Unlock() -} - -func (q *windowUpdateQueue) AddConnection() { - q.mutex.Lock() - q.queuedConn = true - q.mutex.Unlock() -} - -func (q *windowUpdateQueue) QueueAll() { - q.mutex.Lock() - // queue a connection-level window update - if q.queuedConn { - q.callback(&wire.MaxDataFrame{MaximumData: q.connFlowController.GetWindowUpdate()}) - q.queuedConn = false - } - // queue all stream-level window updates - for id := range q.queue { - delete(q.queue, id) - str, err := q.streamGetter.GetOrOpenReceiveStream(id) - if err != nil || str == nil { // the stream can be nil if it was completed before dequeing the window update - continue - } - offset := str.getWindowUpdate() - if offset == 0 { // can happen if we received a final offset, right after queueing the window update - continue - } - q.callback(&wire.MaxStreamDataFrame{ - StreamID: id, - MaximumStreamData: offset, - }) - } - q.mutex.Unlock() -} diff --git a/vendor/github.com/rs/cors/README.md b/vendor/github.com/rs/cors/README.md index 0ad3e94e..c7fbea00 100644 --- a/vendor/github.com/rs/cors/README.md +++ b/vendor/github.com/rs/cors/README.md @@ -1,4 +1,4 @@ -# Go CORS handler [![godoc](http://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/rs/cors) [![license](http://img.shields.io/badge/license-MIT-red.svg?style=flat)](https://raw.githubusercontent.com/rs/cors/master/LICENSE) [![build](https://img.shields.io/travis/rs/cors.svg?style=flat)](https://travis-ci.org/rs/cors) [![Coverage](http://gocover.io/_badge/github.com/rs/cors)](http://gocover.io/github.com/rs/cors) +# Go CORS handler [![godoc](http://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/rs/cors) [![license](http://img.shields.io/badge/license-MIT-red.svg?style=flat)](https://raw.githubusercontent.com/rs/cors/master/LICENSE) [![Go Coverage](https://github.com/rs/cors/wiki/coverage.svg)](https://raw.githack.com/wiki/rs/cors/coverage.html) CORS is a `net/http` handler implementing [Cross Origin Resource Sharing W3 specification](http://www.w3.org/TR/cors/) in Golang. @@ -88,11 +88,14 @@ handler = c.Handler(handler) * **AllowedOrigins** `[]string`: A list of origins a cross-domain request can be executed from. If the special `*` value is present in the list, all origins will be allowed. An origin may contain a wildcard (`*`) to replace 0 or more characters (i.e.: `http://*.domain.com`). Usage of wildcards implies a small performance penality. Only one wildcard can be used per origin. The default value is `*`. * **AllowOriginFunc** `func (origin string) bool`: A custom function to validate the origin. It takes the origin as an argument and returns true if allowed, or false otherwise. If this option is set, the content of `AllowedOrigins` is ignored. -* **AllowOriginRequestFunc** `func (r *http.Request, origin string) bool`: A custom function to validate the origin. It takes the HTTP Request object and the origin as argument and returns true if allowed or false otherwise. If this option is set, the content of `AllowedOrigins` and `AllowOriginFunc` is ignored +* **AllowOriginRequestFunc** `func (r *http.Request, origin string) bool`: A custom function to validate the origin. It takes the HTTP Request object and the origin as argument and returns true if allowed or false otherwise. If this option is set, the contents of `AllowedOrigins` and `AllowOriginFunc` are ignored. +Deprecated: use `AllowOriginVaryRequestFunc` instead. +* **AllowOriginVaryRequestFunc** `func(r *http.Request, origin string) (bool, []string)`: A custom function to validate the origin. It takes the HTTP Request object and the origin as argument and returns true if allowed or false otherwise with a list of headers used to take that decision if any so they can be added to the Vary header. If this option is set, the contents of `AllowedOrigins`, `AllowOriginFunc` and `AllowOriginRequestFunc` are ignored. * **AllowedMethods** `[]string`: A list of methods the client is allowed to use with cross-domain requests. Default value is simple methods (`GET` and `POST`). * **AllowedHeaders** `[]string`: A list of non simple headers the client is allowed to use with cross-domain requests. -* **ExposedHeaders** `[]string`: Indicates which headers are safe to expose to the API of a CORS API specification +* **ExposedHeaders** `[]string`: Indicates which headers are safe to expose to the API of a CORS API specification. * **AllowCredentials** `bool`: Indicates whether the request can include user credentials like cookies, HTTP authentication or client side SSL certificates. The default is `false`. +* **AllowPrivateNetwork** `bool`: Indicates whether to accept cross-origin requests over a private network. * **MaxAge** `int`: Indicates how long (in seconds) the results of a preflight request can be cached. The default is `0` which stands for no max age. * **OptionsPassthrough** `bool`: Instructs preflight to let other potential next handlers to process the `OPTIONS` method. Turn this on if your application handles `OPTIONS`. * **OptionsSuccessStatus** `int`: Provides a status code to use for successful OPTIONS requests. Default value is `http.StatusNoContent` (`204`). @@ -102,14 +105,20 @@ See [API documentation](http://godoc.org/github.com/rs/cors) for more info. ## Benchmarks - BenchmarkWithout 20000000 64.6 ns/op 8 B/op 1 allocs/op - BenchmarkDefault 3000000 469 ns/op 114 B/op 2 allocs/op - BenchmarkAllowedOrigin 3000000 608 ns/op 114 B/op 2 allocs/op - BenchmarkPreflight 20000000 73.2 ns/op 0 B/op 0 allocs/op - BenchmarkPreflightHeader 20000000 73.6 ns/op 0 B/op 0 allocs/op - BenchmarkParseHeaderList 2000000 847 ns/op 184 B/op 6 allocs/op - BenchmarkParse…Single 5000000 290 ns/op 32 B/op 3 allocs/op - BenchmarkParse…Normalized 2000000 776 ns/op 160 B/op 6 allocs/op +``` +goos: darwin +goarch: arm64 +pkg: github.com/rs/cors +BenchmarkWithout-10 135325480 8.124 ns/op 0 B/op 0 allocs/op +BenchmarkDefault-10 24082140 51.40 ns/op 0 B/op 0 allocs/op +BenchmarkAllowedOrigin-10 16424518 88.25 ns/op 0 B/op 0 allocs/op +BenchmarkPreflight-10 8010259 147.3 ns/op 0 B/op 0 allocs/op +BenchmarkPreflightHeader-10 6850962 175.0 ns/op 0 B/op 0 allocs/op +BenchmarkWildcard/match-10 253275342 4.714 ns/op 0 B/op 0 allocs/op +BenchmarkWildcard/too_short-10 1000000000 0.6235 ns/op 0 B/op 0 allocs/op +PASS +ok github.com/rs/cors 99.131s +``` ## Licenses diff --git a/vendor/github.com/rs/cors/cors.go b/vendor/github.com/rs/cors/cors.go index 2ce24e3f..724f242a 100644 --- a/vendor/github.com/rs/cors/cors.go +++ b/vendor/github.com/rs/cors/cors.go @@ -4,15 +4,15 @@ as defined by http://www.w3.org/TR/cors/ You can configure it by passing an option struct to cors.New: - c := cors.New(cors.Options{ - AllowedOrigins: []string{"foo.com"}, - AllowedMethods: []string{http.MethodGet, http.MethodPost, http.MethodDelete}, - AllowCredentials: true, - }) + c := cors.New(cors.Options{ + AllowedOrigins: []string{"foo.com"}, + AllowedMethods: []string{http.MethodGet, http.MethodPost, http.MethodDelete}, + AllowCredentials: true, + }) Then insert the handler in the chain: - handler = c.Handler(handler) + handler = c.Handler(handler) See Options documentation for more options. @@ -26,8 +26,14 @@ import ( "os" "strconv" "strings" + + "github.com/rs/cors/internal" ) +var headerVaryOrigin = []string{"Origin"} +var headerOriginAll = []string{"*"} +var headerTrue = []string{"true"} + // Options is a configuration container to setup the CORS middleware. type Options struct { // AllowedOrigins is a list of origins a cross-domain request can be executed from. @@ -37,31 +43,48 @@ type Options struct { // Only one wildcard can be used per origin. // Default value is ["*"] AllowedOrigins []string - // AllowOriginFunc is a custom function to validate the origin. It take the origin - // as argument and returns true if allowed or false otherwise. If this option is - // set, the content of AllowedOrigins is ignored. + // AllowOriginFunc is a custom function to validate the origin. It take the + // origin as argument and returns true if allowed or false otherwise. If + // this option is set, the content of `AllowedOrigins` is ignored. AllowOriginFunc func(origin string) bool - // AllowOriginRequestFunc is a custom function to validate the origin. It takes the HTTP Request object and the origin as - // argument and returns true if allowed or false otherwise. If this option is set, the content of `AllowedOrigins` - // and `AllowOriginFunc` is ignored. + // AllowOriginRequestFunc is a custom function to validate the origin. It + // takes the HTTP Request object and the origin as argument and returns true + // if allowed or false otherwise. If headers are used take the decision, + // consider using AllowOriginVaryRequestFunc instead. If this option is set, + // the contents of `AllowedOrigins`, `AllowOriginFunc` are ignored. + // + // Deprecated: use `AllowOriginVaryRequestFunc` instead. AllowOriginRequestFunc func(r *http.Request, origin string) bool + // AllowOriginVaryRequestFunc is a custom function to validate the origin. + // It takes the HTTP Request object and the origin as argument and returns + // true if allowed or false otherwise with a list of headers used to take + // that decision if any so they can be added to the Vary header. If this + // option is set, the contents of `AllowedOrigins`, `AllowOriginFunc` and + // `AllowOriginRequestFunc` are ignored. + AllowOriginVaryRequestFunc func(r *http.Request, origin string) (bool, []string) // AllowedMethods is a list of methods the client is allowed to use with // cross-domain requests. Default value is simple methods (HEAD, GET and POST). AllowedMethods []string // AllowedHeaders is list of non simple headers the client is allowed to use with // cross-domain requests. // If the special "*" value is present in the list, all headers will be allowed. - // Default value is [] but "Origin" is always appended to the list. + // Default value is []. AllowedHeaders []string // ExposedHeaders indicates which headers are safe to expose to the API of a CORS // API specification ExposedHeaders []string // MaxAge indicates how long (in seconds) the results of a preflight request - // can be cached + // can be cached. Default value is 0, which stands for no + // Access-Control-Max-Age header to be sent back, resulting in browsers + // using their default value (5s by spec). If you need to force a 0 max-age, + // set `MaxAge` to a negative value (ie: -1). MaxAge int // AllowCredentials indicates whether the request can include user credentials like // cookies, HTTP authentication or client side SSL certificates. AllowCredentials bool + // AllowPrivateNetwork indicates whether to accept cross-origin requests over a + // private network. + AllowPrivateNetwork bool // OptionsPassthrough instructs preflight to let other potential next handlers to // process the OPTIONS method. Turn this on if your application handles OPTIONS. OptionsPassthrough bool @@ -70,6 +93,8 @@ type Options struct { OptionsSuccessStatus int // Debugging flag adds additional output to debug server side CORS issues Debug bool + // Adds a custom logger, implies Debug is true + Logger Logger } // Logger generic interface for logger @@ -86,16 +111,19 @@ type Cors struct { // List of allowed origins containing wildcards allowedWOrigins []wildcard // Optional origin validator function - allowOriginFunc func(origin string) bool - // Optional origin validator (with request) function - allowOriginRequestFunc func(r *http.Request, origin string) bool + allowOriginFunc func(r *http.Request, origin string) (bool, []string) // Normalized list of allowed headers - allowedHeaders []string + // Note: the Fetch standard guarantees that CORS-unsafe request-header names + // (i.e. the values listed in the Access-Control-Request-Headers header) + // are unique and sorted; + // see https://fetch.spec.whatwg.org/#cors-unsafe-request-header-names. + allowedHeaders internal.SortedSet // Normalized list of allowed methods allowedMethods []string - // Normalized list of exposed headers + // Pre-computed normalized list of exposed headers exposedHeaders []string - maxAge int + // Pre-computed maxAge header value + maxAge []string // Set to true when allowed origins contains a "*" allowedOriginsAll bool // Set to true when allowed headers contains a "*" @@ -103,38 +131,46 @@ type Cors struct { // Status code to use for successful OPTIONS requests optionsSuccessStatus int allowCredentials bool + allowPrivateNetwork bool optionPassthrough bool + preflightVary []string } // New creates a new Cors handler with the provided options. func New(options Options) *Cors { c := &Cors{ - exposedHeaders: convert(options.ExposedHeaders, http.CanonicalHeaderKey), - allowOriginFunc: options.AllowOriginFunc, - allowOriginRequestFunc: options.AllowOriginRequestFunc, - allowCredentials: options.AllowCredentials, - maxAge: options.MaxAge, - optionPassthrough: options.OptionsPassthrough, + allowCredentials: options.AllowCredentials, + allowPrivateNetwork: options.AllowPrivateNetwork, + optionPassthrough: options.OptionsPassthrough, + Log: options.Logger, } if options.Debug && c.Log == nil { c.Log = log.New(os.Stdout, "[cors] ", log.LstdFlags) } - // Normalize options - // Note: for origins and methods matching, the spec requires a case-sensitive matching. - // As it may error prone, we chose to ignore the spec here. - - // Allowed Origins - if len(options.AllowedOrigins) == 0 { - if options.AllowOriginFunc == nil && options.AllowOriginRequestFunc == nil { + // Allowed origins + switch { + case options.AllowOriginVaryRequestFunc != nil: + c.allowOriginFunc = options.AllowOriginVaryRequestFunc + case options.AllowOriginRequestFunc != nil: + c.allowOriginFunc = func(r *http.Request, origin string) (bool, []string) { + return options.AllowOriginRequestFunc(r, origin), nil + } + case options.AllowOriginFunc != nil: + c.allowOriginFunc = func(r *http.Request, origin string) (bool, []string) { + return options.AllowOriginFunc(origin), nil + } + case len(options.AllowedOrigins) == 0: + if c.allowOriginFunc == nil { // Default is all origins c.allowedOriginsAll = true } - } else { + default: c.allowedOrigins = []string{} c.allowedWOrigins = []wildcard{} for _, origin := range options.AllowedOrigins { - // Normalize + // Note: for origins matching, the spec requires a case-sensitive matching. + // As it may error prone, we chose to ignore the spec here. origin = strings.ToLower(origin) if origin == "*" { // If "*" is present in the list, turn the whole list into a match all @@ -153,16 +189,19 @@ func New(options Options) *Cors { } // Allowed Headers + // Note: the Fetch standard guarantees that CORS-unsafe request-header names + // (i.e. the values listed in the Access-Control-Request-Headers header) + // are lowercase; see https://fetch.spec.whatwg.org/#cors-unsafe-request-header-names. if len(options.AllowedHeaders) == 0 { // Use sensible defaults - c.allowedHeaders = []string{"Origin", "Accept", "Content-Type", "X-Requested-With"} + c.allowedHeaders = internal.NewSortedSet("accept", "content-type", "x-requested-with") } else { - // Origin is always appended as some browsers will always request for this header at preflight - c.allowedHeaders = convert(append(options.AllowedHeaders, "Origin"), http.CanonicalHeaderKey) + normalized := convert(options.AllowedHeaders, strings.ToLower) + c.allowedHeaders = internal.NewSortedSet(normalized...) for _, h := range options.AllowedHeaders { if h == "*" { c.allowedHeadersAll = true - c.allowedHeaders = nil + c.allowedHeaders = internal.SortedSet{} break } } @@ -173,7 +212,7 @@ func New(options Options) *Cors { // Default is spec's "simple" methods c.allowedMethods = []string{http.MethodGet, http.MethodPost, http.MethodHead} } else { - c.allowedMethods = convert(options.AllowedMethods, strings.ToUpper) + c.allowedMethods = options.AllowedMethods } // Options Success Status Code @@ -183,6 +222,25 @@ func New(options Options) *Cors { c.optionsSuccessStatus = options.OptionsSuccessStatus } + // Pre-compute exposed headers header value + if len(options.ExposedHeaders) > 0 { + c.exposedHeaders = []string{strings.Join(convert(options.ExposedHeaders, http.CanonicalHeaderKey), ", ")} + } + + // Pre-compute prefight Vary header to save allocations + if c.allowPrivateNetwork { + c.preflightVary = []string{"Origin, Access-Control-Request-Method, Access-Control-Request-Headers, Access-Control-Request-Private-Network"} + } else { + c.preflightVary = []string{"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"} + } + + // Precompute max-age + if options.MaxAge > 0 { + c.maxAge = []string{strconv.Itoa(options.MaxAge)} + } else if options.MaxAge < 0 { + c.maxAge = []string{"0"} + } + return c } @@ -279,15 +337,21 @@ func (c *Cors) handlePreflight(w http.ResponseWriter, r *http.Request) { // Always set Vary headers // see https://github.com/rs/cors/issues/10, // https://github.com/rs/cors/commit/dbdca4d95feaa7511a46e6f1efb3b3aa505bc43f#commitcomment-12352001 - headers.Add("Vary", "Origin") - headers.Add("Vary", "Access-Control-Request-Method") - headers.Add("Vary", "Access-Control-Request-Headers") + if vary, found := headers["Vary"]; found { + headers["Vary"] = append(vary, c.preflightVary[0]) + } else { + headers["Vary"] = c.preflightVary + } + allowed, additionalVaryHeaders := c.isOriginAllowed(r, origin) + if len(additionalVaryHeaders) > 0 { + headers.Add("Vary", strings.Join(convert(additionalVaryHeaders, http.CanonicalHeaderKey), ", ")) + } if origin == "" { c.logf(" Preflight aborted: empty origin") return } - if !c.isOriginAllowed(r, origin) { + if !allowed { c.logf(" Preflight aborted: origin '%s' not allowed", origin) return } @@ -297,30 +361,37 @@ func (c *Cors) handlePreflight(w http.ResponseWriter, r *http.Request) { c.logf(" Preflight aborted: method '%s' not allowed", reqMethod) return } - reqHeaders := parseHeaderList(r.Header.Get("Access-Control-Request-Headers")) - if !c.areHeadersAllowed(reqHeaders) { + // Note: the Fetch standard guarantees that at most one + // Access-Control-Request-Headers header is present in the preflight request; + // see step 5.2 in https://fetch.spec.whatwg.org/#cors-preflight-fetch-0. + // However, some gateways split that header into multiple headers of the same name; + // see https://github.com/rs/cors/issues/184. + reqHeaders, found := r.Header["Access-Control-Request-Headers"] + if found && !c.allowedHeadersAll && !c.allowedHeaders.Accepts(reqHeaders) { c.logf(" Preflight aborted: headers '%v' not allowed", reqHeaders) return } if c.allowedOriginsAll { - headers.Set("Access-Control-Allow-Origin", "*") + headers["Access-Control-Allow-Origin"] = headerOriginAll } else { - headers.Set("Access-Control-Allow-Origin", origin) + headers["Access-Control-Allow-Origin"] = r.Header["Origin"] } // Spec says: Since the list of methods can be unbounded, simply returning the method indicated // by Access-Control-Request-Method (if supported) can be enough - headers.Set("Access-Control-Allow-Methods", strings.ToUpper(reqMethod)) - if len(reqHeaders) > 0 { - + headers["Access-Control-Allow-Methods"] = r.Header["Access-Control-Request-Method"] + if found && len(reqHeaders[0]) > 0 { // Spec says: Since the list of headers can be unbounded, simply returning supported headers // from Access-Control-Request-Headers can be enough - headers.Set("Access-Control-Allow-Headers", strings.Join(reqHeaders, ", ")) + headers["Access-Control-Allow-Headers"] = reqHeaders } if c.allowCredentials { - headers.Set("Access-Control-Allow-Credentials", "true") + headers["Access-Control-Allow-Credentials"] = headerTrue + } + if c.allowPrivateNetwork && r.Header.Get("Access-Control-Request-Private-Network") == "true" { + headers["Access-Control-Allow-Private-Network"] = headerTrue } - if c.maxAge > 0 { - headers.Set("Access-Control-Max-Age", strconv.Itoa(c.maxAge)) + if len(c.maxAge) > 0 { + headers["Access-Control-Max-Age"] = c.maxAge } c.logf(" Preflight response headers: %v", headers) } @@ -330,13 +401,22 @@ func (c *Cors) handleActualRequest(w http.ResponseWriter, r *http.Request) { headers := w.Header() origin := r.Header.Get("Origin") + allowed, additionalVaryHeaders := c.isOriginAllowed(r, origin) + // Always set Vary, see https://github.com/rs/cors/issues/10 - headers.Add("Vary", "Origin") + if vary := headers["Vary"]; vary == nil { + headers["Vary"] = headerVaryOrigin + } else { + headers["Vary"] = append(vary, headerVaryOrigin[0]) + } + if len(additionalVaryHeaders) > 0 { + headers.Add("Vary", strings.Join(convert(additionalVaryHeaders, http.CanonicalHeaderKey), ", ")) + } if origin == "" { c.logf(" Actual request no headers added: missing origin") return } - if !c.isOriginAllowed(r, origin) { + if !allowed { c.logf(" Actual request no headers added: origin '%s' not allowed", origin) return } @@ -347,19 +427,18 @@ func (c *Cors) handleActualRequest(w http.ResponseWriter, r *http.Request) { // We think it's a nice feature to be able to have control on those methods though. if !c.isMethodAllowed(r.Method) { c.logf(" Actual request no headers added: method '%s' not allowed", r.Method) - return } if c.allowedOriginsAll { - headers.Set("Access-Control-Allow-Origin", "*") + headers["Access-Control-Allow-Origin"] = headerOriginAll } else { - headers.Set("Access-Control-Allow-Origin", origin) + headers["Access-Control-Allow-Origin"] = r.Header["Origin"] } if len(c.exposedHeaders) > 0 { - headers.Set("Access-Control-Expose-Headers", strings.Join(c.exposedHeaders, ", ")) + headers["Access-Control-Expose-Headers"] = c.exposedHeaders } if c.allowCredentials { - headers.Set("Access-Control-Allow-Credentials", "true") + headers["Access-Control-Allow-Credentials"] = headerTrue } c.logf(" Actual response added headers: %v", headers) } @@ -374,33 +453,31 @@ func (c *Cors) logf(format string, a ...interface{}) { // check the Origin of a request. No origin at all is also allowed. func (c *Cors) OriginAllowed(r *http.Request) bool { origin := r.Header.Get("Origin") - return c.isOriginAllowed(r, origin) + allowed, _ := c.isOriginAllowed(r, origin) + return allowed } // isOriginAllowed checks if a given origin is allowed to perform cross-domain requests // on the endpoint -func (c *Cors) isOriginAllowed(r *http.Request, origin string) bool { - if c.allowOriginRequestFunc != nil { - return c.allowOriginRequestFunc(r, origin) - } +func (c *Cors) isOriginAllowed(r *http.Request, origin string) (allowed bool, varyHeaders []string) { if c.allowOriginFunc != nil { - return c.allowOriginFunc(origin) + return c.allowOriginFunc(r, origin) } if c.allowedOriginsAll { - return true + return true, nil } origin = strings.ToLower(origin) for _, o := range c.allowedOrigins { if o == origin { - return true + return true, nil } } for _, w := range c.allowedWOrigins { if w.match(origin) { - return true + return true, nil } } - return false + return false, nil } // isMethodAllowed checks if a given method can be used as part of a cross-domain request @@ -410,7 +487,6 @@ func (c *Cors) isMethodAllowed(method string) bool { // If no method allowed, always return false, even for preflight request return false } - method = strings.ToUpper(method) if method == http.MethodOptions { // Always allow preflight requests return true @@ -422,25 +498,3 @@ func (c *Cors) isMethodAllowed(method string) bool { } return false } - -// areHeadersAllowed checks if a given list of headers are allowed to used within -// a cross-domain request. -func (c *Cors) areHeadersAllowed(requestedHeaders []string) bool { - if c.allowedHeadersAll || len(requestedHeaders) == 0 { - return true - } - for _, header := range requestedHeaders { - header = http.CanonicalHeaderKey(header) - found := false - for _, h := range c.allowedHeaders { - if h == header { - found = true - break - } - } - if !found { - return false - } - } - return true -} diff --git a/vendor/github.com/rs/cors/internal/sortedset.go b/vendor/github.com/rs/cors/internal/sortedset.go new file mode 100644 index 00000000..844f3f9e --- /dev/null +++ b/vendor/github.com/rs/cors/internal/sortedset.go @@ -0,0 +1,201 @@ +// adapted from github.com/jub0bs/cors +package internal + +import ( + "sort" + "strings" +) + +// A SortedSet represents a mathematical set of strings sorted in +// lexicographical order. +// Each element has a unique position ranging from 0 (inclusive) +// to the set's cardinality (exclusive). +// The zero value represents an empty set. +type SortedSet struct { + m map[string]int + maxLen int +} + +// NewSortedSet returns a SortedSet that contains all of elems, +// but no other elements. +func NewSortedSet(elems ...string) SortedSet { + sort.Strings(elems) + m := make(map[string]int) + var maxLen int + i := 0 + for _, s := range elems { + if _, exists := m[s]; exists { + continue + } + m[s] = i + i++ + maxLen = max(maxLen, len(s)) + } + return SortedSet{ + m: m, + maxLen: maxLen, + } +} + +// Size returns the cardinality of set. +func (set SortedSet) Size() int { + return len(set.m) +} + +// String sorts joins the elements of set (in lexicographical order) +// with a comma and returns the resulting string. +func (set SortedSet) String() string { + elems := make([]string, len(set.m)) + for elem, i := range set.m { + elems[i] = elem // safe indexing, by construction of SortedSet + } + return strings.Join(elems, ",") +} + +// Accepts reports whether values is a sequence of list-based field values +// whose elements are +// - all members of set, +// - sorted in lexicographical order, +// - unique. +func (set SortedSet) Accepts(values []string) bool { + var ( // effectively constant + maxLen = maxOWSBytes + set.maxLen + maxOWSBytes + 1 // +1 for comma + ) + var ( + posOfLastNameSeen = -1 + name string + commaFound bool + emptyElements int + ok bool + ) + for _, s := range values { + for { + // As a defense against maliciously long names in s, + // we process only a small number of s's leading bytes per iteration. + name, s, commaFound = cutAtComma(s, maxLen) + name, ok = trimOWS(name, maxOWSBytes) + if !ok { + return false + } + if name == "" { + // RFC 9110 requires recipients to tolerate + // "a reasonable number of empty list elements"; see + // https://httpwg.org/specs/rfc9110.html#abnf.extension.recipient. + emptyElements++ + if emptyElements > maxEmptyElements { + return false + } + if !commaFound { // We have now exhausted the names in s. + break + } + continue + } + pos, ok := set.m[name] + if !ok { + return false + } + // The names in s are expected to be sorted in lexicographical order + // and to each appear at most once. + // Therefore, the positions (in set) of the names that + // appear in s should form a strictly increasing sequence. + // If that's not actually the case, bail out. + if pos <= posOfLastNameSeen { + return false + } + posOfLastNameSeen = pos + if !commaFound { // We have now exhausted the names in s. + break + } + } + } + return true +} + +const ( + maxOWSBytes = 1 // number of leading/trailing OWS bytes tolerated + maxEmptyElements = 16 // number of empty list elements tolerated +) + +func cutAtComma(s string, n int) (before, after string, found bool) { + // Note: this implementation draws inspiration from strings.Cut's. + end := min(len(s), n) + if i := strings.IndexByte(s[:end], ','); i >= 0 { + after = s[i+1:] // deal with this first to save one bounds check + return s[:i], after, true + } + return s, "", false +} + +// TrimOWS trims up to n bytes of [optional whitespace (OWS)] +// from the start of and/or the end of s. +// If no more than n bytes of OWS are found at the start of s +// and no more than n bytes of OWS are found at the end of s, +// it returns the trimmed result and true. +// Otherwise, it returns the original string and false. +// +// [optional whitespace (OWS)]: https://httpwg.org/specs/rfc9110.html#whitespace +func trimOWS(s string, n int) (trimmed string, ok bool) { + if s == "" { + return s, true + } + trimmed, ok = trimRightOWS(s, n) + if !ok { + return s, false + } + trimmed, ok = trimLeftOWS(trimmed, n) + if !ok { + return s, false + } + return trimmed, true +} + +func trimLeftOWS(s string, n int) (string, bool) { + sCopy := s + var i int + for len(s) > 0 { + if i > n { + return sCopy, false + } + if !(s[0] == ' ' || s[0] == '\t') { + break + } + s = s[1:] + i++ + } + return s, true +} + +func trimRightOWS(s string, n int) (string, bool) { + sCopy := s + var i int + for len(s) > 0 { + if i > n { + return sCopy, false + } + last := len(s) - 1 + if !(s[last] == ' ' || s[last] == '\t') { + break + } + s = s[:last] + i++ + } + return s, true +} + +// TODO: when updating go directive to 1.21 or later, +// use min builtin instead. +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// TODO: when updating go directive to 1.21 or later, +// use max builtin instead. +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/vendor/github.com/rs/cors/utils.go b/vendor/github.com/rs/cors/utils.go index 6bb120ca..41b0c283 100644 --- a/vendor/github.com/rs/cors/utils.go +++ b/vendor/github.com/rs/cors/utils.go @@ -1,10 +1,8 @@ package cors -import "strings" - -const toLower = 'a' - 'A' - -type converter func(string) string +import ( + "strings" +) type wildcard struct { prefix string @@ -12,60 +10,16 @@ type wildcard struct { } func (w wildcard) match(s string) bool { - return len(s) >= len(w.prefix)+len(w.suffix) && strings.HasPrefix(s, w.prefix) && strings.HasSuffix(s, w.suffix) + return len(s) >= len(w.prefix)+len(w.suffix) && + strings.HasPrefix(s, w.prefix) && + strings.HasSuffix(s, w.suffix) } // convert converts a list of string using the passed converter function -func convert(s []string, c converter) []string { - out := []string{} - for _, i := range s { - out = append(out, c(i)) +func convert(s []string, f func(string) string) []string { + out := make([]string, len(s)) + for i := range s { + out[i] = f(s[i]) } return out } - -// parseHeaderList tokenize + normalize a string containing a list of headers -func parseHeaderList(headerList string) []string { - l := len(headerList) - h := make([]byte, 0, l) - upper := true - // Estimate the number headers in order to allocate the right splice size - t := 0 - for i := 0; i < l; i++ { - if headerList[i] == ',' { - t++ - } - } - headers := make([]string, 0, t) - for i := 0; i < l; i++ { - b := headerList[i] - switch { - case b >= 'a' && b <= 'z': - if upper { - h = append(h, b-toLower) - } else { - h = append(h, b) - } - case b >= 'A' && b <= 'Z': - if !upper { - h = append(h, b+toLower) - } else { - h = append(h, b) - } - case b == '-' || b == '_' || b == '.' || (b >= '0' && b <= '9'): - h = append(h, b) - } - - if b == ' ' || b == ',' || i == l-1 { - if len(h) > 0 { - // Flush the found header - headers = append(headers, string(h)) - h = h[:0] - upper = true - } - } else { - upper = b == '-' || b == '_' - } - } - return headers -} diff --git a/vendor/github.com/spf13/cobra/.golangci.yml b/vendor/github.com/spf13/cobra/.golangci.yml index 2578d94b..2c8f4808 100644 --- a/vendor/github.com/spf13/cobra/.golangci.yml +++ b/vendor/github.com/spf13/cobra/.golangci.yml @@ -19,44 +19,39 @@ linters: disable-all: true enable: #- bodyclose - - deadcode + # - deadcode ! deprecated since v1.49.0; replaced by 'unused' #- depguard #- dogsled #- dupl - errcheck #- exhaustive #- funlen - - gas #- gochecknoinits - goconst - #- gocritic + - gocritic #- gocyclo - #- gofmt + - gofmt - goimports - - golint #- gomnd #- goprintffuncname - #- gosec - #- gosimple + - gosec + - gosimple - govet - ineffassign - - interfacer #- lll - - maligned - - megacheck - #- misspell + - misspell #- nakedret #- noctx - #- nolintlint + - nolintlint #- rowserrcheck #- scopelint - #- staticcheck - - structcheck - #- stylecheck + - staticcheck + #- structcheck ! deprecated since v1.49.0; replaced by 'unused' + - stylecheck #- typecheck - unconvert #- unparam - #- unused - - varcheck + - unused + # - varcheck ! deprecated since v1.49.0; replaced by 'unused' #- whitespace fast: false diff --git a/vendor/github.com/spf13/cobra/README.md b/vendor/github.com/spf13/cobra/README.md index 592c0b8a..6444f4b7 100644 --- a/vendor/github.com/spf13/cobra/README.md +++ b/vendor/github.com/spf13/cobra/README.md @@ -4,7 +4,7 @@ Cobra is a library for creating powerful modern CLI applications. Cobra is used in many Go projects such as [Kubernetes](https://kubernetes.io/), [Hugo](https://gohugo.io), and [GitHub CLI](https://github.com/cli/cli) to -name a few. [This list](./projects_using_cobra.md) contains a more extensive list of projects using Cobra. +name a few. [This list](site/content/projects_using_cobra.md) contains a more extensive list of projects using Cobra. [![](https://img.shields.io/github/actions/workflow/status/spf13/cobra/test.yml?branch=main&longCache=true&label=Test&logo=github%20actions&logoColor=fff)](https://github.com/spf13/cobra/actions?query=workflow%3ATest) [![Go Reference](https://pkg.go.dev/badge/github.com/spf13/cobra.svg)](https://pkg.go.dev/github.com/spf13/cobra) @@ -80,7 +80,7 @@ which maintains the same interface while adding POSIX compliance. # Installing Using Cobra is easy. First, use `go get` to install the latest version -of the library. +of the library. ``` go get -u github.com/spf13/cobra@latest @@ -105,8 +105,8 @@ go install github.com/spf13/cobra-cli@latest For complete details on using the Cobra-CLI generator, please read [The Cobra Generator README](https://github.com/spf13/cobra-cli/blob/main/README.md) -For complete details on using the Cobra library, please read the [The Cobra User Guide](user_guide.md). +For complete details on using the Cobra library, please read the [The Cobra User Guide](site/content/user_guide.md). # License -Cobra is released under the Apache 2.0 license. See [LICENSE.txt](https://github.com/spf13/cobra/blob/master/LICENSE.txt) +Cobra is released under the Apache 2.0 license. See [LICENSE.txt](LICENSE.txt) diff --git a/vendor/github.com/spf13/cobra/active_help.go b/vendor/github.com/spf13/cobra/active_help.go index 2d023943..25c30e3c 100644 --- a/vendor/github.com/spf13/cobra/active_help.go +++ b/vendor/github.com/spf13/cobra/active_help.go @@ -17,15 +17,14 @@ package cobra import ( "fmt" "os" - "strings" ) const ( activeHelpMarker = "_activeHelp_ " // The below values should not be changed: programs will be using them explicitly // in their user documentation, and users will be using them explicitly. - activeHelpEnvVarSuffix = "_ACTIVE_HELP" - activeHelpGlobalEnvVar = "COBRA_ACTIVE_HELP" + activeHelpEnvVarSuffix = "ACTIVE_HELP" + activeHelpGlobalEnvVar = configEnvVarGlobalPrefix + "_" + activeHelpEnvVarSuffix activeHelpGlobalDisable = "0" ) @@ -42,7 +41,7 @@ func AppendActiveHelp(compArray []string, activeHelpStr string) []string { // GetActiveHelpConfig returns the value of the ActiveHelp environment variable // _ACTIVE_HELP where is the name of the root command in upper -// case, with all - replaced by _. +// case, with all non-ASCII-alphanumeric characters replaced by `_`. // It will always return "0" if the global environment variable COBRA_ACTIVE_HELP // is set to "0". func GetActiveHelpConfig(cmd *Command) string { @@ -55,9 +54,7 @@ func GetActiveHelpConfig(cmd *Command) string { // activeHelpEnvVar returns the name of the program-specific ActiveHelp environment // variable. It has the format _ACTIVE_HELP where is the name of the -// root command in upper case, with all - replaced by _. +// root command in upper case, with all non-ASCII-alphanumeric characters replaced by `_`. func activeHelpEnvVar(name string) string { - // This format should not be changed: users will be using it explicitly. - activeHelpEnvVar := strings.ToUpper(fmt.Sprintf("%s%s", name, activeHelpEnvVarSuffix)) - return strings.ReplaceAll(activeHelpEnvVar, "-", "_") + return configEnvVar(name, activeHelpEnvVarSuffix) } diff --git a/vendor/github.com/spf13/cobra/active_help.md b/vendor/github.com/spf13/cobra/active_help.md deleted file mode 100644 index 5e7f59af..00000000 --- a/vendor/github.com/spf13/cobra/active_help.md +++ /dev/null @@ -1,157 +0,0 @@ -# Active Help - -Active Help is a framework provided by Cobra which allows a program to define messages (hints, warnings, etc) that will be printed during program usage. It aims to make it easier for your users to learn how to use your program. If configured by the program, Active Help is printed when the user triggers shell completion. - -For example, -``` -bash-5.1$ helm repo add [tab] -You must choose a name for the repo you are adding. - -bash-5.1$ bin/helm package [tab] -Please specify the path to the chart to package - -bash-5.1$ bin/helm package [tab][tab] -bin/ internal/ scripts/ pkg/ testdata/ -``` - -**Hint**: A good place to use Active Help messages is when the normal completion system does not provide any suggestions. In such cases, Active Help nicely supplements the normal shell completions to guide the user in knowing what is expected by the program. -## Supported shells - -Active Help is currently only supported for the following shells: -- Bash (using [bash completion V2](shell_completions.md#bash-completion-v2) only). Note that bash 4.4 or higher is required for the prompt to appear when an Active Help message is printed. -- Zsh - -## Adding Active Help messages - -As Active Help uses the shell completion system, the implementation of Active Help messages is done by enhancing custom dynamic completions. If you are not familiar with dynamic completions, please refer to [Shell Completions](shell_completions.md). - -Adding Active Help is done through the use of the `cobra.AppendActiveHelp(...)` function, where the program repeatedly adds Active Help messages to the list of completions. Keep reading for details. - -### Active Help for nouns - -Adding Active Help when completing a noun is done within the `ValidArgsFunction(...)` of a command. Please notice the use of `cobra.AppendActiveHelp(...)` in the following example: - -```go -cmd := &cobra.Command{ - Use: "add [NAME] [URL]", - Short: "add a chart repository", - Args: require.ExactArgs(2), - RunE: func(cmd *cobra.Command, args []string) error { - return addRepo(args) - }, - ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - var comps []string - if len(args) == 0 { - comps = cobra.AppendActiveHelp(comps, "You must choose a name for the repo you are adding") - } else if len(args) == 1 { - comps = cobra.AppendActiveHelp(comps, "You must specify the URL for the repo you are adding") - } else { - comps = cobra.AppendActiveHelp(comps, "This command does not take any more arguments") - } - return comps, cobra.ShellCompDirectiveNoFileComp - }, -} -``` -The example above defines the completions (none, in this specific example) as well as the Active Help messages for the `helm repo add` command. It yields the following behavior: -``` -bash-5.1$ helm repo add [tab] -You must choose a name for the repo you are adding - -bash-5.1$ helm repo add grafana [tab] -You must specify the URL for the repo you are adding - -bash-5.1$ helm repo add grafana https://grafana.github.io/helm-charts [tab] -This command does not take any more arguments -``` -**Hint**: As can be seen in the above example, a good place to use Active Help messages is when the normal completion system does not provide any suggestions. In such cases, Active Help nicely supplements the normal shell completions. - -### Active Help for flags - -Providing Active Help for flags is done in the same fashion as for nouns, but using the completion function registered for the flag. For example: -```go -_ = cmd.RegisterFlagCompletionFunc("version", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - if len(args) != 2 { - return cobra.AppendActiveHelp(nil, "You must first specify the chart to install before the --version flag can be completed"), cobra.ShellCompDirectiveNoFileComp - } - return compVersionFlag(args[1], toComplete) - }) -``` -The example above prints an Active Help message when not enough information was given by the user to complete the `--version` flag. -``` -bash-5.1$ bin/helm install myrelease --version 2.0.[tab] -You must first specify the chart to install before the --version flag can be completed - -bash-5.1$ bin/helm install myrelease bitnami/solr --version 2.0.[tab][tab] -2.0.1 2.0.2 2.0.3 -``` - -## User control of Active Help - -You may want to allow your users to disable Active Help or choose between different levels of Active Help. It is entirely up to the program to define the type of configurability of Active Help that it wants to offer, if any. -Allowing to configure Active Help is entirely optional; you can use Active Help in your program without doing anything about Active Help configuration. - -The way to configure Active Help is to use the program's Active Help environment -variable. That variable is named `_ACTIVE_HELP` where `` is the name of your -program in uppercase with any `-` replaced by an `_`. The variable should be set by the user to whatever -Active Help configuration values are supported by the program. - -For example, say `helm` has chosen to support three levels for Active Help: `on`, `off`, `local`. Then a user -would set the desired behavior to `local` by doing `export HELM_ACTIVE_HELP=local` in their shell. - -For simplicity, when in `cmd.ValidArgsFunction(...)` or a flag's completion function, the program should read the -Active Help configuration using the `cobra.GetActiveHelpConfig(cmd)` function and select what Active Help messages -should or should not be added (instead of reading the environment variable directly). - -For example: -```go -ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - activeHelpLevel := cobra.GetActiveHelpConfig(cmd) - - var comps []string - if len(args) == 0 { - if activeHelpLevel != "off" { - comps = cobra.AppendActiveHelp(comps, "You must choose a name for the repo you are adding") - } - } else if len(args) == 1 { - if activeHelpLevel != "off" { - comps = cobra.AppendActiveHelp(comps, "You must specify the URL for the repo you are adding") - } - } else { - if activeHelpLevel == "local" { - comps = cobra.AppendActiveHelp(comps, "This command does not take any more arguments") - } - } - return comps, cobra.ShellCompDirectiveNoFileComp -}, -``` -**Note 1**: If the `_ACTIVE_HELP` environment variable is set to the string "0", Cobra will automatically disable all Active Help output (even if some output was specified by the program using the `cobra.AppendActiveHelp(...)` function). Using "0" can simplify your code in situations where you want to blindly disable Active Help without having to call `cobra.GetActiveHelpConfig(cmd)` explicitly. - -**Note 2**: If a user wants to disable Active Help for every single program based on Cobra, she can set the environment variable `COBRA_ACTIVE_HELP` to "0". In this case `cobra.GetActiveHelpConfig(cmd)` will return "0" no matter what the variable `_ACTIVE_HELP` is set to. - -**Note 3**: If the user does not set `_ACTIVE_HELP` or `COBRA_ACTIVE_HELP` (which will be a common case), the default value for the Active Help configuration returned by `cobra.GetActiveHelpConfig(cmd)` will be the empty string. -## Active Help with Cobra's default completion command - -Cobra provides a default `completion` command for programs that wish to use it. -When using the default `completion` command, Active Help is configurable in the same -fashion as described above using environment variables. You may wish to document this in more -details for your users. - -## Debugging Active Help - -Debugging your Active Help code is done in the same way as debugging your dynamic completion code, which is with Cobra's hidden `__complete` command. Please refer to [debugging shell completion](shell_completions.md#debugging) for details. - -When debugging with the `__complete` command, if you want to specify different Active Help configurations, you should use the active help environment variable. That variable is named `_ACTIVE_HELP` where any `-` is replaced by an `_`. For example, we can test deactivating some Active Help as shown below: -``` -$ HELM_ACTIVE_HELP=1 bin/helm __complete install wordpress bitnami/h -bitnami/haproxy -bitnami/harbor -_activeHelp_ WARNING: cannot re-use a name that is still in use -:0 -Completion ended with directive: ShellCompDirectiveDefault - -$ HELM_ACTIVE_HELP=0 bin/helm __complete install wordpress bitnami/h -bitnami/haproxy -bitnami/harbor -:0 -Completion ended with directive: ShellCompDirectiveDefault -``` diff --git a/vendor/github.com/spf13/cobra/args.go b/vendor/github.com/spf13/cobra/args.go index e79ec33a..ed1e70ce 100644 --- a/vendor/github.com/spf13/cobra/args.go +++ b/vendor/github.com/spf13/cobra/args.go @@ -52,9 +52,9 @@ func OnlyValidArgs(cmd *Command, args []string) error { if len(cmd.ValidArgs) > 0 { // Remove any description that may be included in ValidArgs. // A description is following a tab character. - var validArgs []string + validArgs := make([]string, 0, len(cmd.ValidArgs)) for _, v := range cmd.ValidArgs { - validArgs = append(validArgs, strings.Split(v, "\t")[0]) + validArgs = append(validArgs, strings.SplitN(v, "\t", 2)[0]) } for _, v := range args { if !stringInSlice(v, validArgs) { diff --git a/vendor/github.com/spf13/cobra/bash_completions.go b/vendor/github.com/spf13/cobra/bash_completions.go index 10c78847..f4d198cb 100644 --- a/vendor/github.com/spf13/cobra/bash_completions.go +++ b/vendor/github.com/spf13/cobra/bash_completions.go @@ -85,7 +85,7 @@ __%[1]s_handle_go_custom_completion() local out requestComp lastParam lastChar comp directive args # Prepare the command to request completions for the program. - # Calling ${words[0]} instead of directly %[1]s allows to handle aliases + # Calling ${words[0]} instead of directly %[1]s allows handling aliases args=("${words[@]:1}") # Disable ActiveHelp which is not supported for bash completion v1 requestComp="%[8]s=0 ${words[0]} %[2]s ${args[*]}" @@ -597,19 +597,16 @@ func writeRequiredFlag(buf io.StringWriter, cmd *Command) { if nonCompletableFlag(flag) { return } - for key := range flag.Annotations { - switch key { - case BashCompOneRequiredFlag: - format := " must_have_one_flag+=(\"--%s" - if flag.Value.Type() != "bool" { - format += "=" - } - format += cbn - WriteStringAndCheck(buf, fmt.Sprintf(format, flag.Name)) - - if len(flag.Shorthand) > 0 { - WriteStringAndCheck(buf, fmt.Sprintf(" must_have_one_flag+=(\"-%s"+cbn, flag.Shorthand)) - } + if _, ok := flag.Annotations[BashCompOneRequiredFlag]; ok { + format := " must_have_one_flag+=(\"--%s" + if flag.Value.Type() != "bool" { + format += "=" + } + format += cbn + WriteStringAndCheck(buf, fmt.Sprintf(format, flag.Name)) + + if len(flag.Shorthand) > 0 { + WriteStringAndCheck(buf, fmt.Sprintf(" must_have_one_flag+=(\"-%s"+cbn, flag.Shorthand)) } } }) @@ -621,7 +618,7 @@ func writeRequiredNouns(buf io.StringWriter, cmd *Command) { for _, value := range cmd.ValidArgs { // Remove any description that may be included following a tab character. // Descriptions are not supported by bash completion. - value = strings.Split(value, "\t")[0] + value = strings.SplitN(value, "\t", 2)[0] WriteStringAndCheck(buf, fmt.Sprintf(" must_have_one_noun+=(%q)\n", value)) } if cmd.ValidArgsFunction != nil { diff --git a/vendor/github.com/spf13/cobra/bash_completions.md b/vendor/github.com/spf13/cobra/bash_completions.md deleted file mode 100644 index 52919b2f..00000000 --- a/vendor/github.com/spf13/cobra/bash_completions.md +++ /dev/null @@ -1,93 +0,0 @@ -# Generating Bash Completions For Your cobra.Command - -Please refer to [Shell Completions](shell_completions.md) for details. - -## Bash legacy dynamic completions - -For backward compatibility, Cobra still supports its legacy dynamic completion solution (described below). Unlike the `ValidArgsFunction` solution, the legacy solution will only work for Bash shell-completion and not for other shells. This legacy solution can be used along-side `ValidArgsFunction` and `RegisterFlagCompletionFunc()`, as long as both solutions are not used for the same command. This provides a path to gradually migrate from the legacy solution to the new solution. - -**Note**: Cobra's default `completion` command uses bash completion V2. If you are currently using Cobra's legacy dynamic completion solution, you should not use the default `completion` command but continue using your own. - -The legacy solution allows you to inject bash functions into the bash completion script. Those bash functions are responsible for providing the completion choices for your own completions. - -Some code that works in kubernetes: - -```bash -const ( - bash_completion_func = `__kubectl_parse_get() -{ - local kubectl_output out - if kubectl_output=$(kubectl get --no-headers "$1" 2>/dev/null); then - out=($(echo "${kubectl_output}" | awk '{print $1}')) - COMPREPLY=( $( compgen -W "${out[*]}" -- "$cur" ) ) - fi -} - -__kubectl_get_resource() -{ - if [[ ${#nouns[@]} -eq 0 ]]; then - return 1 - fi - __kubectl_parse_get ${nouns[${#nouns[@]} -1]} - if [[ $? -eq 0 ]]; then - return 0 - fi -} - -__kubectl_custom_func() { - case ${last_command} in - kubectl_get | kubectl_describe | kubectl_delete | kubectl_stop) - __kubectl_get_resource - return - ;; - *) - ;; - esac -} -`) -``` - -And then I set that in my command definition: - -```go -cmds := &cobra.Command{ - Use: "kubectl", - Short: "kubectl controls the Kubernetes cluster manager", - Long: `kubectl controls the Kubernetes cluster manager. - -Find more information at https://github.com/GoogleCloudPlatform/kubernetes.`, - Run: runHelp, - BashCompletionFunction: bash_completion_func, -} -``` - -The `BashCompletionFunction` option is really only valid/useful on the root command. Doing the above will cause `__kubectl_custom_func()` (`___custom_func()`) to be called when the built in processor was unable to find a solution. In the case of kubernetes a valid command might look something like `kubectl get pod [mypod]`. If you type `kubectl get pod [tab][tab]` the `__kubectl_customc_func()` will run because the cobra.Command only understood "kubectl" and "get." `__kubectl_custom_func()` will see that the cobra.Command is "kubectl_get" and will thus call another helper `__kubectl_get_resource()`. `__kubectl_get_resource` will look at the 'nouns' collected. In our example the only noun will be `pod`. So it will call `__kubectl_parse_get pod`. `__kubectl_parse_get` will actually call out to kubernetes and get any pods. It will then set `COMPREPLY` to valid pods! - -Similarly, for flags: - -```go - annotation := make(map[string][]string) - annotation[cobra.BashCompCustom] = []string{"__kubectl_get_namespaces"} - - flag := &pflag.Flag{ - Name: "namespace", - Usage: usage, - Annotations: annotation, - } - cmd.Flags().AddFlag(flag) -``` - -In addition add the `__kubectl_get_namespaces` implementation in the `BashCompletionFunction` -value, e.g.: - -```bash -__kubectl_get_namespaces() -{ - local template - template="{{ range .items }}{{ .metadata.name }} {{ end }}" - local kubectl_out - if kubectl_out=$(kubectl get -o template --template="${template}" namespace 2>/dev/null); then - COMPREPLY=( $( compgen -W "${kubectl_out}[*]" -- "$cur" ) ) - fi -} -``` diff --git a/vendor/github.com/spf13/cobra/bash_completionsV2.go b/vendor/github.com/spf13/cobra/bash_completionsV2.go index 19b09560..1cce5c32 100644 --- a/vendor/github.com/spf13/cobra/bash_completionsV2.go +++ b/vendor/github.com/spf13/cobra/bash_completionsV2.go @@ -57,7 +57,7 @@ __%[1]s_get_completion_results() { local requestComp lastParam lastChar args # Prepare the command to request completions for the program. - # Calling ${words[0]} instead of directly %[1]s allows to handle aliases + # Calling ${words[0]} instead of directly %[1]s allows handling aliases args=("${words[@]:1}") requestComp="${words[0]} %[2]s ${args[*]}" diff --git a/vendor/github.com/spf13/cobra/cobra.go b/vendor/github.com/spf13/cobra/cobra.go index b07b44a0..e0b0947b 100644 --- a/vendor/github.com/spf13/cobra/cobra.go +++ b/vendor/github.com/spf13/cobra/cobra.go @@ -43,12 +43,13 @@ var initializers []func() var finalizers []func() const ( - defaultPrefixMatching = false - defaultCommandSorting = true - defaultCaseInsensitive = false + defaultPrefixMatching = false + defaultCommandSorting = true + defaultCaseInsensitive = false + defaultTraverseRunHooks = false ) -// EnablePrefixMatching allows to set automatic prefix matching. Automatic prefix matching can be a dangerous thing +// EnablePrefixMatching allows setting automatic prefix matching. Automatic prefix matching can be a dangerous thing // to automatically enable in CLI tools. // Set this to true to enable it. var EnablePrefixMatching = defaultPrefixMatching @@ -60,6 +61,10 @@ var EnableCommandSorting = defaultCommandSorting // EnableCaseInsensitive allows case-insensitive commands names. (case sensitive by default) var EnableCaseInsensitive = defaultCaseInsensitive +// EnableTraverseRunHooks executes persistent pre-run and post-run hooks from all parents. +// By default this is disabled, which means only the first run hook to be found is executed. +var EnableTraverseRunHooks = defaultTraverseRunHooks + // MousetrapHelpText enables an information splash screen on Windows // if the CLI is started from explorer.exe. // To disable the mousetrap, just set this variable to blank string (""). @@ -188,8 +193,6 @@ func ld(s, t string, ignoreCase bool) int { d := make([][]int, len(s)+1) for i := range d { d[i] = make([]int, len(t)+1) - } - for i := range d { d[i][0] = i } for j := range d[0] { diff --git a/vendor/github.com/spf13/cobra/command.go b/vendor/github.com/spf13/cobra/command.go index 01f7c6f1..54748fc6 100644 --- a/vendor/github.com/spf13/cobra/command.go +++ b/vendor/github.com/spf13/cobra/command.go @@ -30,7 +30,10 @@ import ( flag "github.com/spf13/pflag" ) -const FlagSetByCobraAnnotation = "cobra_annotation_flag_set_by_cobra" +const ( + FlagSetByCobraAnnotation = "cobra_annotation_flag_set_by_cobra" + CommandDisplayNameAnnotation = "cobra_annotation_command_display_name" +) // FParseErrWhitelist configures Flag parse errors to be ignored type FParseErrWhitelist flag.ParseErrorsWhitelist @@ -99,7 +102,7 @@ type Command struct { Deprecated string // Annotations are key/value pairs that can be used by applications to identify or - // group commands. + // group commands or set special options. Annotations map[string]string // Version defines the version for this command. If this value is non-empty and the command does not @@ -115,6 +118,8 @@ type Command struct { // * PostRun() // * PersistentPostRun() // All functions get the same args, the arguments after the command name. + // The *PreRun and *PostRun functions will only be executed if the Run function of the current + // command has been declared. // // PersistentPreRun: children of this command will inherit and execute. PersistentPreRun func(cmd *Command, args []string) @@ -149,8 +154,10 @@ type Command struct { // pflags contains persistent flags. pflags *flag.FlagSet // lflags contains local flags. + // This field does not represent internal state, it's used as a cache to optimise LocalFlags function call lflags *flag.FlagSet // iflags contains inherited flags. + // This field does not represent internal state, it's used as a cache to optimise InheritedFlags function call iflags *flag.FlagSet // parentsPflags is all persistent flags of cmd's parents. parentsPflags *flag.FlagSet @@ -181,6 +188,9 @@ type Command struct { // versionTemplate is the version template defined by user. versionTemplate string + // errPrefix is the error message prefix defined by user. + errPrefix string + // inReader is a reader defined by the user that replaces stdin inReader io.Reader // outWriter is a writer defined by the user that replaces stdout @@ -346,6 +356,11 @@ func (c *Command) SetVersionTemplate(s string) { c.versionTemplate = s } +// SetErrPrefix sets error message prefix to be used. Application can use it to set custom prefix. +func (c *Command) SetErrPrefix(s string) { + c.errPrefix = s +} + // SetGlobalNormalizationFunc sets a normalization function to all flag sets and also to child commands. // The user should not have a cyclic dependency on commands. func (c *Command) SetGlobalNormalizationFunc(n func(f *flag.FlagSet, name string) flag.NormalizedName) { @@ -595,6 +610,18 @@ func (c *Command) VersionTemplate() string { ` } +// ErrPrefix return error message prefix for the command +func (c *Command) ErrPrefix() string { + if c.errPrefix != "" { + return c.errPrefix + } + + if c.HasParent() { + return c.parent.ErrPrefix() + } + return "Error:" +} + func hasNoOptDefVal(name string, fs *flag.FlagSet) bool { flag := fs.Lookup(name) if flag == nil { @@ -681,7 +708,7 @@ Loop: // This is not a flag or a flag value. Check to see if it matches what we're looking for, and if so, // return the args, excluding the one at this position. if s == x { - ret := []string{} + ret := make([]string, 0, len(args)-1) ret = append(ret, args[:pos]...) ret = append(ret, args[pos+1:]...) return ret @@ -729,14 +756,14 @@ func (c *Command) findSuggestions(arg string) string { if c.SuggestionsMinimumDistance <= 0 { c.SuggestionsMinimumDistance = 2 } - suggestionsString := "" + var sb strings.Builder if suggestions := c.SuggestionsFor(arg); len(suggestions) > 0 { - suggestionsString += "\n\nDid you mean this?\n" + sb.WriteString("\n\nDid you mean this?\n") for _, s := range suggestions { - suggestionsString += fmt.Sprintf("\t%v\n", s) + _, _ = fmt.Fprintf(&sb, "\t%v\n", s) } } - return suggestionsString + return sb.String() } func (c *Command) findNext(next string) *Command { @@ -752,7 +779,9 @@ func (c *Command) findNext(next string) *Command { } if len(matches) == 1 { - return matches[0] + // Temporarily disable gosec G602, which produces a false positive. + // See https://github.com/securego/gosec/issues/1005. + return matches[0] // #nosec G602 } return nil @@ -846,7 +875,7 @@ func (c *Command) ArgsLenAtDash() int { func (c *Command) execute(a []string) (err error) { if c == nil { - return fmt.Errorf("Called Execute() on a nil Command") + return fmt.Errorf("called Execute() on a nil Command") } if len(c.Deprecated) > 0 { @@ -910,15 +939,31 @@ func (c *Command) execute(a []string) (err error) { return err } + parents := make([]*Command, 0, 5) for p := c; p != nil; p = p.Parent() { + if EnableTraverseRunHooks { + // When EnableTraverseRunHooks is set: + // - Execute all persistent pre-runs from the root parent till this command. + // - Execute all persistent post-runs from this command till the root parent. + parents = append([]*Command{p}, parents...) + } else { + // Otherwise, execute only the first found persistent hook. + parents = append(parents, p) + } + } + for _, p := range parents { if p.PersistentPreRunE != nil { if err := p.PersistentPreRunE(c, argWoFlags); err != nil { return err } - break + if !EnableTraverseRunHooks { + break + } } else if p.PersistentPreRun != nil { p.PersistentPreRun(c, argWoFlags) - break + if !EnableTraverseRunHooks { + break + } } } if c.PreRunE != nil { @@ -955,10 +1000,14 @@ func (c *Command) execute(a []string) (err error) { if err := p.PersistentPostRunE(c, argWoFlags); err != nil { return err } - break + if !EnableTraverseRunHooks { + break + } } else if p.PersistentPostRun != nil { p.PersistentPostRun(c, argWoFlags) - break + if !EnableTraverseRunHooks { + break + } } } @@ -1048,7 +1097,7 @@ func (c *Command) ExecuteC() (cmd *Command, err error) { c = cmd } if !c.SilenceErrors { - c.PrintErrln("Error:", err.Error()) + c.PrintErrln(c.ErrPrefix(), err.Error()) c.PrintErrf("Run '%v --help' for usage.\n", c.CommandPath()) } return c, err @@ -1077,7 +1126,7 @@ func (c *Command) ExecuteC() (cmd *Command, err error) { // If root command has SilenceErrors flagged, // all subcommands should respect it if !cmd.SilenceErrors && !c.SilenceErrors { - c.PrintErrln("Error:", err.Error()) + c.PrintErrln(cmd.ErrPrefix(), err.Error()) } // If root command has SilenceUsage flagged, @@ -1140,10 +1189,11 @@ func (c *Command) InitDefaultHelpFlag() { c.mergePersistentFlags() if c.Flags().Lookup("help") == nil { usage := "help for " - if c.Name() == "" { + name := c.displayName() + if name == "" { usage += "this command" } else { - usage += c.Name() + usage += name } c.Flags().BoolP("help", "h", false, usage) _ = c.Flags().SetAnnotation("help", FlagSetByCobraAnnotation, []string{"true"}) @@ -1189,7 +1239,7 @@ func (c *Command) InitDefaultHelpCmd() { Use: "help [command]", Short: "Help about any command", Long: `Help provides help for any command in the application. -Simply type ` + c.Name() + ` help [path to command] for full details.`, +Simply type ` + c.displayName() + ` help [path to command] for full details.`, ValidArgsFunction: func(c *Command, args []string, toComplete string) ([]string, ShellCompDirective) { var completions []string cmd, _, e := c.Root().Find(args) @@ -1380,16 +1430,24 @@ func (c *Command) CommandPath() string { if c.HasParent() { return c.Parent().CommandPath() + " " + c.Name() } + return c.displayName() +} + +func (c *Command) displayName() string { + if displayName, ok := c.Annotations[CommandDisplayNameAnnotation]; ok { + return displayName + } return c.Name() } // UseLine puts out the full usage for a given command (including parents). func (c *Command) UseLine() string { var useline string + use := strings.Replace(c.Use, c.Name(), c.displayName(), 1) if c.HasParent() { - useline = c.parent.CommandPath() + " " + c.Use + useline = c.parent.CommandPath() + " " + use } else { - useline = c.Use + useline = use } if c.DisableFlagsInUseLine { return useline @@ -1591,7 +1649,7 @@ func (c *Command) GlobalNormalizationFunc() func(f *flag.FlagSet, name string) f // to this command (local and persistent declared here and by all parents). func (c *Command) Flags() *flag.FlagSet { if c.flags == nil { - c.flags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) + c.flags = flag.NewFlagSet(c.displayName(), flag.ContinueOnError) if c.flagErrorBuf == nil { c.flagErrorBuf = new(bytes.Buffer) } @@ -1602,10 +1660,11 @@ func (c *Command) Flags() *flag.FlagSet { } // LocalNonPersistentFlags are flags specific to this command which will NOT persist to subcommands. +// This function does not modify the flags of the current command, it's purpose is to return the current state. func (c *Command) LocalNonPersistentFlags() *flag.FlagSet { persistentFlags := c.PersistentFlags() - out := flag.NewFlagSet(c.Name(), flag.ContinueOnError) + out := flag.NewFlagSet(c.displayName(), flag.ContinueOnError) c.LocalFlags().VisitAll(func(f *flag.Flag) { if persistentFlags.Lookup(f.Name) == nil { out.AddFlag(f) @@ -1615,11 +1674,12 @@ func (c *Command) LocalNonPersistentFlags() *flag.FlagSet { } // LocalFlags returns the local FlagSet specifically set in the current command. +// This function does not modify the flags of the current command, it's purpose is to return the current state. func (c *Command) LocalFlags() *flag.FlagSet { c.mergePersistentFlags() if c.lflags == nil { - c.lflags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) + c.lflags = flag.NewFlagSet(c.displayName(), flag.ContinueOnError) if c.flagErrorBuf == nil { c.flagErrorBuf = new(bytes.Buffer) } @@ -1642,11 +1702,12 @@ func (c *Command) LocalFlags() *flag.FlagSet { } // InheritedFlags returns all flags which were inherited from parent commands. +// This function does not modify the flags of the current command, it's purpose is to return the current state. func (c *Command) InheritedFlags() *flag.FlagSet { c.mergePersistentFlags() if c.iflags == nil { - c.iflags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) + c.iflags = flag.NewFlagSet(c.displayName(), flag.ContinueOnError) if c.flagErrorBuf == nil { c.flagErrorBuf = new(bytes.Buffer) } @@ -1667,6 +1728,7 @@ func (c *Command) InheritedFlags() *flag.FlagSet { } // NonInheritedFlags returns all flags which were not inherited from parent commands. +// This function does not modify the flags of the current command, it's purpose is to return the current state. func (c *Command) NonInheritedFlags() *flag.FlagSet { return c.LocalFlags() } @@ -1674,7 +1736,7 @@ func (c *Command) NonInheritedFlags() *flag.FlagSet { // PersistentFlags returns the persistent FlagSet specifically set in the current command. func (c *Command) PersistentFlags() *flag.FlagSet { if c.pflags == nil { - c.pflags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) + c.pflags = flag.NewFlagSet(c.displayName(), flag.ContinueOnError) if c.flagErrorBuf == nil { c.flagErrorBuf = new(bytes.Buffer) } @@ -1687,9 +1749,9 @@ func (c *Command) PersistentFlags() *flag.FlagSet { func (c *Command) ResetFlags() { c.flagErrorBuf = new(bytes.Buffer) c.flagErrorBuf.Reset() - c.flags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) + c.flags = flag.NewFlagSet(c.displayName(), flag.ContinueOnError) c.flags.SetOutput(c.flagErrorBuf) - c.pflags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) + c.pflags = flag.NewFlagSet(c.displayName(), flag.ContinueOnError) c.pflags.SetOutput(c.flagErrorBuf) c.lflags = nil @@ -1806,7 +1868,7 @@ func (c *Command) mergePersistentFlags() { // If c.parentsPflags == nil, it makes new. func (c *Command) updateParentsPflags() { if c.parentsPflags == nil { - c.parentsPflags = flag.NewFlagSet(c.Name(), flag.ContinueOnError) + c.parentsPflags = flag.NewFlagSet(c.displayName(), flag.ContinueOnError) c.parentsPflags.SetOutput(c.flagErrorBuf) c.parentsPflags.SortFlags = false } diff --git a/vendor/github.com/spf13/cobra/completions.go b/vendor/github.com/spf13/cobra/completions.go index ee38c4d0..c0c08b05 100644 --- a/vendor/github.com/spf13/cobra/completions.go +++ b/vendor/github.com/spf13/cobra/completions.go @@ -17,6 +17,8 @@ package cobra import ( "fmt" "os" + "regexp" + "strconv" "strings" "sync" @@ -145,6 +147,20 @@ func (c *Command) RegisterFlagCompletionFunc(flagName string, f func(cmd *Comman return nil } +// GetFlagCompletionFunc returns the completion function for the given flag of the command, if available. +func (c *Command) GetFlagCompletionFunc(flagName string) (func(*Command, []string, string) ([]string, ShellCompDirective), bool) { + flag := c.Flag(flagName) + if flag == nil { + return nil, false + } + + flagCompletionMutex.RLock() + defer flagCompletionMutex.RUnlock() + + completionFunc, exists := flagCompletionFunctions[flag] + return completionFunc, exists +} + // Returns a string listing the different directive enabled in the specified parameter func (d ShellCompDirective) string() string { var directives []string @@ -197,24 +213,29 @@ func (c *Command) initCompleteCmd(args []string) { // 2- Even without completions, we need to print the directive } - noDescriptions := (cmd.CalledAs() == ShellCompNoDescRequestCmd) + noDescriptions := cmd.CalledAs() == ShellCompNoDescRequestCmd + if !noDescriptions { + if doDescriptions, err := strconv.ParseBool(getEnvConfig(cmd, configEnvVarSuffixDescriptions)); err == nil { + noDescriptions = !doDescriptions + } + } + noActiveHelp := GetActiveHelpConfig(finalCmd) == activeHelpGlobalDisable + out := finalCmd.OutOrStdout() for _, comp := range completions { - if GetActiveHelpConfig(finalCmd) == activeHelpGlobalDisable { - // Remove all activeHelp entries in this case - if strings.HasPrefix(comp, activeHelpMarker) { - continue - } + if noActiveHelp && strings.HasPrefix(comp, activeHelpMarker) { + // Remove all activeHelp entries if it's disabled. + continue } if noDescriptions { // Remove any description that may be included following a tab character. - comp = strings.Split(comp, "\t")[0] + comp = strings.SplitN(comp, "\t", 2)[0] } // Make sure we only write the first line to the output. // This is needed if a description contains a linebreak. // Otherwise the shell scripts will interpret the other lines as new flags // and could therefore provide a wrong completion. - comp = strings.Split(comp, "\n")[0] + comp = strings.SplitN(comp, "\n", 2)[0] // Finally trim the completion. This is especially important to get rid // of a trailing tab when there are no description following it. @@ -223,14 +244,14 @@ func (c *Command) initCompleteCmd(args []string) { // although there is no description). comp = strings.TrimSpace(comp) - // Print each possible completion to stdout for the completion script to consume. - fmt.Fprintln(finalCmd.OutOrStdout(), comp) + // Print each possible completion to the output for the completion script to consume. + fmt.Fprintln(out, comp) } // As the last printout, print the completion directive for the completion script to parse. // The directive integer must be that last character following a single colon (:). // The completion script expects : - fmt.Fprintf(finalCmd.OutOrStdout(), ":%d\n", directive) + fmt.Fprintf(out, ":%d\n", directive) // Print some helpful info to stderr for the user to understand. // Output from stderr must be ignored by the completion script. @@ -277,15 +298,19 @@ func (c *Command) getCompletions(args []string) (*Command, []string, ShellCompDi } if err != nil { // Unable to find the real command. E.g., someInvalidCmd - return c, []string{}, ShellCompDirectiveDefault, fmt.Errorf("Unable to find a command for arguments: %v", trimmedArgs) + return c, []string{}, ShellCompDirectiveDefault, fmt.Errorf("unable to find a command for arguments: %v", trimmedArgs) } finalCmd.ctx = c.ctx // These flags are normally added when `execute()` is called on `finalCmd`, // however, when doing completion, we don't call `finalCmd.execute()`. - // Let's add the --help and --version flag ourselves. - finalCmd.InitDefaultHelpFlag() - finalCmd.InitDefaultVersionFlag() + // Let's add the --help and --version flag ourselves but only if the finalCmd + // has not disabled flag parsing; if flag parsing is disabled, it is up to the + // finalCmd itself to handle the completion of *all* flags. + if !finalCmd.DisableFlagParsing { + finalCmd.InitDefaultHelpFlag() + finalCmd.InitDefaultVersionFlag() + } // Check if we are doing flag value completion before parsing the flags. // This is important because if we are completing a flag value, we need to also @@ -389,6 +414,11 @@ func (c *Command) getCompletions(args []string) (*Command, []string, ShellCompDi finalCmd.InheritedFlags().VisitAll(func(flag *pflag.Flag) { doCompleteFlags(flag) }) + // Try to complete non-inherited flags even if DisableFlagParsing==true. + // This allows programs to tell Cobra about flags for completion even + // if the actual parsing of flags is not done by Cobra. + // For instance, Helm uses this to provide flag name completion for + // some of its plugins. finalCmd.NonInheritedFlags().VisitAll(func(flag *pflag.Flag) { doCompleteFlags(flag) }) @@ -876,3 +906,34 @@ func CompError(msg string) { func CompErrorln(msg string) { CompError(fmt.Sprintf("%s\n", msg)) } + +// These values should not be changed: users will be using them explicitly. +const ( + configEnvVarGlobalPrefix = "COBRA" + configEnvVarSuffixDescriptions = "COMPLETION_DESCRIPTIONS" +) + +var configEnvVarPrefixSubstRegexp = regexp.MustCompile(`[^A-Z0-9_]`) + +// configEnvVar returns the name of the program-specific configuration environment +// variable. It has the format _ where is the name of the +// root command in upper case, with all non-ASCII-alphanumeric characters replaced by `_`. +func configEnvVar(name, suffix string) string { + // This format should not be changed: users will be using it explicitly. + v := strings.ToUpper(fmt.Sprintf("%s_%s", name, suffix)) + v = configEnvVarPrefixSubstRegexp.ReplaceAllString(v, "_") + return v +} + +// getEnvConfig returns the value of the configuration environment variable +// _ where is the name of the root command in upper +// case, with all non-ASCII-alphanumeric characters replaced by `_`. +// If the value is empty or not set, the value of the environment variable +// COBRA_ is returned instead. +func getEnvConfig(cmd *Command, suffix string) string { + v := os.Getenv(configEnvVar(cmd.Root().Name(), suffix)) + if v == "" { + v = os.Getenv(configEnvVar(configEnvVarGlobalPrefix, suffix)) + } + return v +} diff --git a/vendor/github.com/spf13/cobra/fish_completions.go b/vendor/github.com/spf13/cobra/fish_completions.go index 12ca0d2b..12d61b69 100644 --- a/vendor/github.com/spf13/cobra/fish_completions.go +++ b/vendor/github.com/spf13/cobra/fish_completions.go @@ -113,7 +113,7 @@ function __%[1]s_clear_perform_completion_once_result __%[1]s_debug "" __%[1]s_debug "========= clearing previously set __%[1]s_perform_completion_once_result variable ==========" set --erase __%[1]s_perform_completion_once_result - __%[1]s_debug "Succesfully erased the variable __%[1]s_perform_completion_once_result" + __%[1]s_debug "Successfully erased the variable __%[1]s_perform_completion_once_result" end function __%[1]s_requires_order_preservation diff --git a/vendor/github.com/spf13/cobra/fish_completions.md b/vendor/github.com/spf13/cobra/fish_completions.md deleted file mode 100644 index 19b2ed12..00000000 --- a/vendor/github.com/spf13/cobra/fish_completions.md +++ /dev/null @@ -1,4 +0,0 @@ -## Generating Fish Completions For Your cobra.Command - -Please refer to [Shell Completions](shell_completions.md) for details. - diff --git a/vendor/github.com/spf13/cobra/flag_groups.go b/vendor/github.com/spf13/cobra/flag_groups.go index b35fde15..560612fd 100644 --- a/vendor/github.com/spf13/cobra/flag_groups.go +++ b/vendor/github.com/spf13/cobra/flag_groups.go @@ -23,8 +23,9 @@ import ( ) const ( - requiredAsGroup = "cobra_annotation_required_if_others_set" - mutuallyExclusive = "cobra_annotation_mutually_exclusive" + requiredAsGroupAnnotation = "cobra_annotation_required_if_others_set" + oneRequiredAnnotation = "cobra_annotation_one_required" + mutuallyExclusiveAnnotation = "cobra_annotation_mutually_exclusive" ) // MarkFlagsRequiredTogether marks the given flags with annotations so that Cobra errors @@ -36,7 +37,23 @@ func (c *Command) MarkFlagsRequiredTogether(flagNames ...string) { if f == nil { panic(fmt.Sprintf("Failed to find flag %q and mark it as being required in a flag group", v)) } - if err := c.Flags().SetAnnotation(v, requiredAsGroup, append(f.Annotations[requiredAsGroup], strings.Join(flagNames, " "))); err != nil { + if err := c.Flags().SetAnnotation(v, requiredAsGroupAnnotation, append(f.Annotations[requiredAsGroupAnnotation], strings.Join(flagNames, " "))); err != nil { + // Only errs if the flag isn't found. + panic(err) + } + } +} + +// MarkFlagsOneRequired marks the given flags with annotations so that Cobra errors +// if the command is invoked without at least one flag from the given set of flags. +func (c *Command) MarkFlagsOneRequired(flagNames ...string) { + c.mergePersistentFlags() + for _, v := range flagNames { + f := c.Flags().Lookup(v) + if f == nil { + panic(fmt.Sprintf("Failed to find flag %q and mark it as being in a one-required flag group", v)) + } + if err := c.Flags().SetAnnotation(v, oneRequiredAnnotation, append(f.Annotations[oneRequiredAnnotation], strings.Join(flagNames, " "))); err != nil { // Only errs if the flag isn't found. panic(err) } @@ -53,13 +70,13 @@ func (c *Command) MarkFlagsMutuallyExclusive(flagNames ...string) { panic(fmt.Sprintf("Failed to find flag %q and mark it as being in a mutually exclusive flag group", v)) } // Each time this is called is a single new entry; this allows it to be a member of multiple groups if needed. - if err := c.Flags().SetAnnotation(v, mutuallyExclusive, append(f.Annotations[mutuallyExclusive], strings.Join(flagNames, " "))); err != nil { + if err := c.Flags().SetAnnotation(v, mutuallyExclusiveAnnotation, append(f.Annotations[mutuallyExclusiveAnnotation], strings.Join(flagNames, " "))); err != nil { panic(err) } } } -// ValidateFlagGroups validates the mutuallyExclusive/requiredAsGroup logic and returns the +// ValidateFlagGroups validates the mutuallyExclusive/oneRequired/requiredAsGroup logic and returns the // first error encountered. func (c *Command) ValidateFlagGroups() error { if c.DisableFlagParsing { @@ -71,15 +88,20 @@ func (c *Command) ValidateFlagGroups() error { // groupStatus format is the list of flags as a unique ID, // then a map of each flag name and whether it is set or not. groupStatus := map[string]map[string]bool{} + oneRequiredGroupStatus := map[string]map[string]bool{} mutuallyExclusiveGroupStatus := map[string]map[string]bool{} flags.VisitAll(func(pflag *flag.Flag) { - processFlagForGroupAnnotation(flags, pflag, requiredAsGroup, groupStatus) - processFlagForGroupAnnotation(flags, pflag, mutuallyExclusive, mutuallyExclusiveGroupStatus) + processFlagForGroupAnnotation(flags, pflag, requiredAsGroupAnnotation, groupStatus) + processFlagForGroupAnnotation(flags, pflag, oneRequiredAnnotation, oneRequiredGroupStatus) + processFlagForGroupAnnotation(flags, pflag, mutuallyExclusiveAnnotation, mutuallyExclusiveGroupStatus) }) if err := validateRequiredFlagGroups(groupStatus); err != nil { return err } + if err := validateOneRequiredFlagGroups(oneRequiredGroupStatus); err != nil { + return err + } if err := validateExclusiveFlagGroups(mutuallyExclusiveGroupStatus); err != nil { return err } @@ -108,7 +130,7 @@ func processFlagForGroupAnnotation(flags *flag.FlagSet, pflag *flag.Flag, annota continue } - groupStatus[group] = map[string]bool{} + groupStatus[group] = make(map[string]bool, len(flagnames)) for _, name := range flagnames { groupStatus[group][name] = false } @@ -142,6 +164,27 @@ func validateRequiredFlagGroups(data map[string]map[string]bool) error { return nil } +func validateOneRequiredFlagGroups(data map[string]map[string]bool) error { + keys := sortedKeys(data) + for _, flagList := range keys { + flagnameAndStatus := data[flagList] + var set []string + for flagname, isSet := range flagnameAndStatus { + if isSet { + set = append(set, flagname) + } + } + if len(set) >= 1 { + continue + } + + // Sort values, so they can be tested/scripted against consistently. + sort.Strings(set) + return fmt.Errorf("at least one of the flags in the group [%v] is required", flagList) + } + return nil +} + func validateExclusiveFlagGroups(data map[string]map[string]bool) error { keys := sortedKeys(data) for _, flagList := range keys { @@ -176,6 +219,7 @@ func sortedKeys(m map[string]map[string]bool) []string { // enforceFlagGroupsForCompletion will do the following: // - when a flag in a group is present, other flags in the group will be marked required +// - when none of the flags in a one-required group are present, all flags in the group will be marked required // - when a flag in a mutually exclusive group is present, other flags in the group will be marked as hidden // This allows the standard completion logic to behave appropriately for flag groups func (c *Command) enforceFlagGroupsForCompletion() { @@ -185,10 +229,12 @@ func (c *Command) enforceFlagGroupsForCompletion() { flags := c.Flags() groupStatus := map[string]map[string]bool{} + oneRequiredGroupStatus := map[string]map[string]bool{} mutuallyExclusiveGroupStatus := map[string]map[string]bool{} c.Flags().VisitAll(func(pflag *flag.Flag) { - processFlagForGroupAnnotation(flags, pflag, requiredAsGroup, groupStatus) - processFlagForGroupAnnotation(flags, pflag, mutuallyExclusive, mutuallyExclusiveGroupStatus) + processFlagForGroupAnnotation(flags, pflag, requiredAsGroupAnnotation, groupStatus) + processFlagForGroupAnnotation(flags, pflag, oneRequiredAnnotation, oneRequiredGroupStatus) + processFlagForGroupAnnotation(flags, pflag, mutuallyExclusiveAnnotation, mutuallyExclusiveGroupStatus) }) // If a flag that is part of a group is present, we make all the other flags @@ -204,6 +250,26 @@ func (c *Command) enforceFlagGroupsForCompletion() { } } + // If none of the flags of a one-required group are present, we make all the flags + // of that group required so that the shell completion suggests them automatically + for flagList, flagnameAndStatus := range oneRequiredGroupStatus { + isSet := false + + for _, isSet = range flagnameAndStatus { + if isSet { + break + } + } + + // None of the flags of the group are set, mark all flags in the group + // as required + if !isSet { + for _, fName := range strings.Split(flagList, " ") { + _ = c.MarkFlagRequired(fName) + } + } + } + // If a flag that is mutually exclusive to others is present, we hide the other // flags of that group so the shell completion does not suggest them for flagList, flagnameAndStatus := range mutuallyExclusiveGroupStatus { diff --git a/vendor/github.com/spf13/cobra/powershell_completions.go b/vendor/github.com/spf13/cobra/powershell_completions.go index 177d2755..a830b7bc 100644 --- a/vendor/github.com/spf13/cobra/powershell_completions.go +++ b/vendor/github.com/spf13/cobra/powershell_completions.go @@ -28,8 +28,8 @@ import ( func genPowerShellComp(buf io.StringWriter, name string, includeDesc bool) { // Variables should not contain a '-' or ':' character nameForVar := name - nameForVar = strings.Replace(nameForVar, "-", "_", -1) - nameForVar = strings.Replace(nameForVar, ":", "_", -1) + nameForVar = strings.ReplaceAll(nameForVar, "-", "_") + nameForVar = strings.ReplaceAll(nameForVar, ":", "_") compCmd := ShellCompRequestCmd if !includeDesc { @@ -47,7 +47,7 @@ filter __%[1]s_escapeStringWithSpecialChars { `+" $_ -replace '\\s|#|@|\\$|;|,|''|\\{|\\}|\\(|\\)|\"|`|\\||<|>|&','`$&'"+` } -[scriptblock]$__%[2]sCompleterBlock = { +[scriptblock]${__%[2]sCompleterBlock} = { param( $WordToComplete, $CommandAst, @@ -122,7 +122,7 @@ filter __%[1]s_escapeStringWithSpecialChars { __%[1]s_debug "Calling $RequestComp" # First disable ActiveHelp which is not supported for Powershell - $env:%[10]s=0 + ${env:%[10]s}=0 #call the command store the output in $out and redirect stderr and stdout to null # $Out is an array contains each line per element @@ -279,7 +279,7 @@ filter __%[1]s_escapeStringWithSpecialChars { } } -Register-ArgumentCompleter -CommandName '%[1]s' -ScriptBlock $__%[2]sCompleterBlock +Register-ArgumentCompleter -CommandName '%[1]s' -ScriptBlock ${__%[2]sCompleterBlock} `, name, nameForVar, compCmd, ShellCompDirectiveError, ShellCompDirectiveNoSpace, ShellCompDirectiveNoFileComp, ShellCompDirectiveFilterFileExt, ShellCompDirectiveFilterDirs, ShellCompDirectiveKeepOrder, activeHelpEnvVar(name))) diff --git a/vendor/github.com/spf13/cobra/powershell_completions.md b/vendor/github.com/spf13/cobra/powershell_completions.md deleted file mode 100644 index c449f1e5..00000000 --- a/vendor/github.com/spf13/cobra/powershell_completions.md +++ /dev/null @@ -1,3 +0,0 @@ -# Generating PowerShell Completions For Your Own cobra.Command - -Please refer to [Shell Completions](shell_completions.md#powershell-completions) for details. diff --git a/vendor/github.com/spf13/cobra/projects_using_cobra.md b/vendor/github.com/spf13/cobra/projects_using_cobra.md deleted file mode 100644 index 8a291eb2..00000000 --- a/vendor/github.com/spf13/cobra/projects_using_cobra.md +++ /dev/null @@ -1,64 +0,0 @@ -## Projects using Cobra - -- [Allero](https://github.com/allero-io/allero) -- [Arewefastyet](https://benchmark.vitess.io) -- [Arduino CLI](https://github.com/arduino/arduino-cli) -- [Bleve](https://blevesearch.com/) -- [Cilium](https://cilium.io/) -- [CloudQuery](https://github.com/cloudquery/cloudquery) -- [CockroachDB](https://www.cockroachlabs.com/) -- [Constellation](https://github.com/edgelesssys/constellation) -- [Cosmos SDK](https://github.com/cosmos/cosmos-sdk) -- [Datree](https://github.com/datreeio/datree) -- [Delve](https://github.com/derekparker/delve) -- [Docker (distribution)](https://github.com/docker/distribution) -- [Etcd](https://etcd.io/) -- [Gardener](https://github.com/gardener/gardenctl) -- [Giant Swarm's gsctl](https://github.com/giantswarm/gsctl) -- [Git Bump](https://github.com/erdaltsksn/git-bump) -- [GitHub CLI](https://github.com/cli/cli) -- [GitHub Labeler](https://github.com/erdaltsksn/gh-label) -- [Golangci-lint](https://golangci-lint.run) -- [GopherJS](https://github.com/gopherjs/gopherjs) -- [GoReleaser](https://goreleaser.com) -- [Helm](https://helm.sh) -- [Hugo](https://gohugo.io) -- [Infracost](https://github.com/infracost/infracost) -- [Istio](https://istio.io) -- [Kool](https://github.com/kool-dev/kool) -- [Kubernetes](https://kubernetes.io/) -- [Kubescape](https://github.com/kubescape/kubescape) -- [KubeVirt](https://github.com/kubevirt/kubevirt) -- [Linkerd](https://linkerd.io/) -- [Mattermost-server](https://github.com/mattermost/mattermost-server) -- [Mercure](https://mercure.rocks/) -- [Meroxa CLI](https://github.com/meroxa/cli) -- [Metal Stack CLI](https://github.com/metal-stack/metalctl) -- [Moby (former Docker)](https://github.com/moby/moby) -- [Moldy](https://github.com/Moldy-Community/moldy) -- [Multi-gitter](https://github.com/lindell/multi-gitter) -- [Nanobox](https://github.com/nanobox-io/nanobox)/[Nanopack](https://github.com/nanopack) -- [nFPM](https://nfpm.goreleaser.com) -- [Okteto](https://github.com/okteto/okteto) -- [OpenShift](https://www.openshift.com/) -- [Ory Hydra](https://github.com/ory/hydra) -- [Ory Kratos](https://github.com/ory/kratos) -- [Pixie](https://github.com/pixie-io/pixie) -- [Polygon Edge](https://github.com/0xPolygon/polygon-edge) -- [Pouch](https://github.com/alibaba/pouch) -- [ProjectAtomic (enterprise)](https://www.projectatomic.io/) -- [Prototool](https://github.com/uber/prototool) -- [Pulumi](https://www.pulumi.com) -- [QRcp](https://github.com/claudiodangelis/qrcp) -- [Random](https://github.com/erdaltsksn/random) -- [Rclone](https://rclone.org/) -- [Scaleway CLI](https://github.com/scaleway/scaleway-cli) -- [Sia](https://github.com/SiaFoundation/siad) -- [Skaffold](https://skaffold.dev/) -- [Tendermint](https://github.com/tendermint/tendermint) -- [Twitch CLI](https://github.com/twitchdev/twitch-cli) -- [UpCloud CLI (`upctl`)](https://github.com/UpCloudLtd/upcloud-cli) -- [Vitess](https://vitess.io) -- VMware's [Tanzu Community Edition](https://github.com/vmware-tanzu/community-edition) & [Tanzu Framework](https://github.com/vmware-tanzu/tanzu-framework) -- [Werf](https://werf.io/) -- [ZITADEL](https://github.com/zitadel/zitadel) diff --git a/vendor/github.com/spf13/cobra/shell_completions.md b/vendor/github.com/spf13/cobra/shell_completions.md deleted file mode 100644 index 065c0621..00000000 --- a/vendor/github.com/spf13/cobra/shell_completions.md +++ /dev/null @@ -1,576 +0,0 @@ -# Generating shell completions - -Cobra can generate shell completions for multiple shells. -The currently supported shells are: -- Bash -- Zsh -- fish -- PowerShell - -Cobra will automatically provide your program with a fully functional `completion` command, -similarly to how it provides the `help` command. - -## Creating your own completion command - -If you do not wish to use the default `completion` command, you can choose to -provide your own, which will take precedence over the default one. (This also provides -backwards-compatibility with programs that already have their own `completion` command.) - -If you are using the `cobra-cli` generator, -which can be found at [spf13/cobra-cli](https://github.com/spf13/cobra-cli), -you can create a completion command by running - -```bash -cobra-cli add completion -``` -and then modifying the generated `cmd/completion.go` file to look something like this -(writing the shell script to stdout allows the most flexible use): - -```go -var completionCmd = &cobra.Command{ - Use: "completion [bash|zsh|fish|powershell]", - Short: "Generate completion script", - Long: fmt.Sprintf(`To load completions: - -Bash: - - $ source <(%[1]s completion bash) - - # To load completions for each session, execute once: - # Linux: - $ %[1]s completion bash > /etc/bash_completion.d/%[1]s - # macOS: - $ %[1]s completion bash > $(brew --prefix)/etc/bash_completion.d/%[1]s - -Zsh: - - # If shell completion is not already enabled in your environment, - # you will need to enable it. You can execute the following once: - - $ echo "autoload -U compinit; compinit" >> ~/.zshrc - - # To load completions for each session, execute once: - $ %[1]s completion zsh > "${fpath[1]}/_%[1]s" - - # You will need to start a new shell for this setup to take effect. - -fish: - - $ %[1]s completion fish | source - - # To load completions for each session, execute once: - $ %[1]s completion fish > ~/.config/fish/completions/%[1]s.fish - -PowerShell: - - PS> %[1]s completion powershell | Out-String | Invoke-Expression - - # To load completions for every new session, run: - PS> %[1]s completion powershell > %[1]s.ps1 - # and source this file from your PowerShell profile. -`,cmd.Root().Name()), - DisableFlagsInUseLine: true, - ValidArgs: []string{"bash", "zsh", "fish", "powershell"}, - Args: cobra.MatchAll(cobra.ExactArgs(1), cobra.OnlyValidArgs), - Run: func(cmd *cobra.Command, args []string) { - switch args[0] { - case "bash": - cmd.Root().GenBashCompletion(os.Stdout) - case "zsh": - cmd.Root().GenZshCompletion(os.Stdout) - case "fish": - cmd.Root().GenFishCompletion(os.Stdout, true) - case "powershell": - cmd.Root().GenPowerShellCompletionWithDesc(os.Stdout) - } - }, -} -``` - -**Note:** The cobra generator may include messages printed to stdout, for example, if the config file is loaded; this will break the auto-completion script so must be removed. - -## Adapting the default completion command - -Cobra provides a few options for the default `completion` command. To configure such options you must set -the `CompletionOptions` field on the *root* command. - -To tell Cobra *not* to provide the default `completion` command: -``` -rootCmd.CompletionOptions.DisableDefaultCmd = true -``` - -To tell Cobra to mark the default `completion` command as *hidden*: -``` -rootCmd.CompletionOptions.HiddenDefaultCmd = true -``` - -To tell Cobra *not* to provide the user with the `--no-descriptions` flag to the completion sub-commands: -``` -rootCmd.CompletionOptions.DisableNoDescFlag = true -``` - -To tell Cobra to completely disable descriptions for completions: -``` -rootCmd.CompletionOptions.DisableDescriptions = true -``` - -# Customizing completions - -The generated completion scripts will automatically handle completing commands and flags. However, you can make your completions much more powerful by providing information to complete your program's nouns and flag values. - -## Completion of nouns - -### Static completion of nouns - -Cobra allows you to provide a pre-defined list of completion choices for your nouns using the `ValidArgs` field. -For example, if you want `kubectl get [tab][tab]` to show a list of valid "nouns" you have to set them. -Some simplified code from `kubectl get` looks like: - -```go -validArgs = []string{ "pod", "node", "service", "replicationcontroller" } - -cmd := &cobra.Command{ - Use: "get [(-o|--output=)json|yaml|template|...] (RESOURCE [NAME] | RESOURCE/NAME ...)", - Short: "Display one or many resources", - Long: get_long, - Example: get_example, - Run: func(cmd *cobra.Command, args []string) { - cobra.CheckErr(RunGet(f, out, cmd, args)) - }, - ValidArgs: validArgs, -} -``` - -Notice we put the `ValidArgs` field on the `get` sub-command. Doing so will give results like: - -```bash -$ kubectl get [tab][tab] -node pod replicationcontroller service -``` - -#### Aliases for nouns - -If your nouns have aliases, you can define them alongside `ValidArgs` using `ArgAliases`: - -```go -argAliases = []string { "pods", "nodes", "services", "svc", "replicationcontrollers", "rc" } - -cmd := &cobra.Command{ - ... - ValidArgs: validArgs, - ArgAliases: argAliases -} -``` - -The aliases are shown to the user on tab completion only if no completions were found within sub-commands or `ValidArgs`. - -### Dynamic completion of nouns - -In some cases it is not possible to provide a list of completions in advance. Instead, the list of completions must be determined at execution-time. In a similar fashion as for static completions, you can use the `ValidArgsFunction` field to provide a Go function that Cobra will execute when it needs the list of completion choices for the nouns of a command. Note that either `ValidArgs` or `ValidArgsFunction` can be used for a single cobra command, but not both. -Simplified code from `helm status` looks like: - -```go -cmd := &cobra.Command{ - Use: "status RELEASE_NAME", - Short: "Display the status of the named release", - Long: status_long, - RunE: func(cmd *cobra.Command, args []string) { - RunGet(args[0]) - }, - ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - if len(args) != 0 { - return nil, cobra.ShellCompDirectiveNoFileComp - } - return getReleasesFromCluster(toComplete), cobra.ShellCompDirectiveNoFileComp - }, -} -``` -Where `getReleasesFromCluster()` is a Go function that obtains the list of current Helm releases running on the Kubernetes cluster. -Notice we put the `ValidArgsFunction` on the `status` sub-command. Let's assume the Helm releases on the cluster are: `harbor`, `notary`, `rook` and `thanos` then this dynamic completion will give results like: - -```bash -$ helm status [tab][tab] -harbor notary rook thanos -``` -You may have noticed the use of `cobra.ShellCompDirective`. These directives are bit fields allowing to control some shell completion behaviors for your particular completion. You can combine them with the bit-or operator such as `cobra.ShellCompDirectiveNoSpace | cobra.ShellCompDirectiveNoFileComp` -```go -// Indicates that the shell will perform its default behavior after completions -// have been provided (this implies none of the other directives). -ShellCompDirectiveDefault - -// Indicates an error occurred and completions should be ignored. -ShellCompDirectiveError - -// Indicates that the shell should not add a space after the completion, -// even if there is a single completion provided. -ShellCompDirectiveNoSpace - -// Indicates that the shell should not provide file completion even when -// no completion is provided. -ShellCompDirectiveNoFileComp - -// Indicates that the returned completions should be used as file extension filters. -// For example, to complete only files of the form *.json or *.yaml: -// return []string{"yaml", "json"}, ShellCompDirectiveFilterFileExt -// For flags, using MarkFlagFilename() and MarkPersistentFlagFilename() -// is a shortcut to using this directive explicitly. -// -ShellCompDirectiveFilterFileExt - -// Indicates that only directory names should be provided in file completion. -// For example: -// return nil, ShellCompDirectiveFilterDirs -// For flags, using MarkFlagDirname() is a shortcut to using this directive explicitly. -// -// To request directory names within another directory, the returned completions -// should specify a single directory name within which to search. For example, -// to complete directories within "themes/": -// return []string{"themes"}, ShellCompDirectiveFilterDirs -// -ShellCompDirectiveFilterDirs - -// ShellCompDirectiveKeepOrder indicates that the shell should preserve the order -// in which the completions are provided -ShellCompDirectiveKeepOrder -``` - -***Note***: When using the `ValidArgsFunction`, Cobra will call your registered function after having parsed all flags and arguments provided in the command-line. You therefore don't need to do this parsing yourself. For example, when a user calls `helm status --namespace my-rook-ns [tab][tab]`, Cobra will call your registered `ValidArgsFunction` after having parsed the `--namespace` flag, as it would have done when calling the `RunE` function. - -#### Debugging - -Cobra achieves dynamic completion through the use of a hidden command called by the completion script. To debug your Go completion code, you can call this hidden command directly: -```bash -$ helm __complete status har -harbor -:4 -Completion ended with directive: ShellCompDirectiveNoFileComp # This is on stderr -``` -***Important:*** If the noun to complete is empty (when the user has not yet typed any letters of that noun), you must pass an empty parameter to the `__complete` command: -```bash -$ helm __complete status "" -harbor -notary -rook -thanos -:4 -Completion ended with directive: ShellCompDirectiveNoFileComp # This is on stderr -``` -Calling the `__complete` command directly allows you to run the Go debugger to troubleshoot your code. You can also add printouts to your code; Cobra provides the following functions to use for printouts in Go completion code: -```go -// Prints to the completion script debug file (if BASH_COMP_DEBUG_FILE -// is set to a file path) and optionally prints to stderr. -cobra.CompDebug(msg string, printToStdErr bool) { -cobra.CompDebugln(msg string, printToStdErr bool) - -// Prints to the completion script debug file (if BASH_COMP_DEBUG_FILE -// is set to a file path) and to stderr. -cobra.CompError(msg string) -cobra.CompErrorln(msg string) -``` -***Important:*** You should **not** leave traces that print directly to stdout in your completion code as they will be interpreted as completion choices by the completion script. Instead, use the cobra-provided debugging traces functions mentioned above. - -## Completions for flags - -### Mark flags as required - -Most of the time completions will only show sub-commands. But if a flag is required to make a sub-command work, you probably want it to show up when the user types [tab][tab]. You can mark a flag as 'Required' like so: - -```go -cmd.MarkFlagRequired("pod") -cmd.MarkFlagRequired("container") -``` - -and you'll get something like - -```bash -$ kubectl exec [tab][tab] --c --container= -p --pod= -``` - -### Specify dynamic flag completion - -As for nouns, Cobra provides a way of defining dynamic completion of flags. To provide a Go function that Cobra will execute when it needs the list of completion choices for a flag, you must register the function using the `command.RegisterFlagCompletionFunc()` function. - -```go -flagName := "output" -cmd.RegisterFlagCompletionFunc(flagName, func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - return []string{"json", "table", "yaml"}, cobra.ShellCompDirectiveDefault -}) -``` -Notice that calling `RegisterFlagCompletionFunc()` is done through the `command` with which the flag is associated. In our example this dynamic completion will give results like so: - -```bash -$ helm status --output [tab][tab] -json table yaml -``` - -#### Debugging - -You can also easily debug your Go completion code for flags: -```bash -$ helm __complete status --output "" -json -table -yaml -:4 -Completion ended with directive: ShellCompDirectiveNoFileComp # This is on stderr -``` -***Important:*** You should **not** leave traces that print to stdout in your completion code as they will be interpreted as completion choices by the completion script. Instead, use the cobra-provided debugging traces functions mentioned further above. - -### Specify valid filename extensions for flags that take a filename - -To limit completions of flag values to file names with certain extensions you can either use the different `MarkFlagFilename()` functions or a combination of `RegisterFlagCompletionFunc()` and `ShellCompDirectiveFilterFileExt`, like so: -```go -flagName := "output" -cmd.MarkFlagFilename(flagName, "yaml", "json") -``` -or -```go -flagName := "output" -cmd.RegisterFlagCompletionFunc(flagName, func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - return []string{"yaml", "json"}, ShellCompDirectiveFilterFileExt}) -``` - -### Limit flag completions to directory names - -To limit completions of flag values to directory names you can either use the `MarkFlagDirname()` functions or a combination of `RegisterFlagCompletionFunc()` and `ShellCompDirectiveFilterDirs`, like so: -```go -flagName := "output" -cmd.MarkFlagDirname(flagName) -``` -or -```go -flagName := "output" -cmd.RegisterFlagCompletionFunc(flagName, func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - return nil, cobra.ShellCompDirectiveFilterDirs -}) -``` -To limit completions of flag values to directory names *within another directory* you can use a combination of `RegisterFlagCompletionFunc()` and `ShellCompDirectiveFilterDirs` like so: -```go -flagName := "output" -cmd.RegisterFlagCompletionFunc(flagName, func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - return []string{"themes"}, cobra.ShellCompDirectiveFilterDirs -}) -``` -### Descriptions for completions - -Cobra provides support for completion descriptions. Such descriptions are supported for each shell -(however, for bash, it is only available in the [completion V2 version](#bash-completion-v2)). -For commands and flags, Cobra will provide the descriptions automatically, based on usage information. -For example, using zsh: -``` -$ helm s[tab] -search -- search for a keyword in charts -show -- show information of a chart -status -- displays the status of the named release -``` -while using fish: -``` -$ helm s[tab] -search (search for a keyword in charts) show (show information of a chart) status (displays the status of the named release) -``` - -Cobra allows you to add descriptions to your own completions. Simply add the description text after each completion, following a `\t` separator. This technique applies to completions returned by `ValidArgs`, `ValidArgsFunction` and `RegisterFlagCompletionFunc()`. For example: -```go -ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { - return []string{"harbor\tAn image registry", "thanos\tLong-term metrics"}, cobra.ShellCompDirectiveNoFileComp -}} -``` -or -```go -ValidArgs: []string{"bash\tCompletions for bash", "zsh\tCompletions for zsh"} -``` - -If you don't want to show descriptions in the completions, you can add `--no-descriptions` to the default `completion` command to disable them, like: - -```bash -$ source <(helm completion bash) -$ helm completion [tab][tab] -bash (generate autocompletion script for bash) powershell (generate autocompletion script for powershell) -fish (generate autocompletion script for fish) zsh (generate autocompletion script for zsh) - -$ source <(helm completion bash --no-descriptions) -$ helm completion [tab][tab] -bash fish powershell zsh -``` -## Bash completions - -### Dependencies - -The bash completion script generated by Cobra requires the `bash_completion` package. You should update the help text of your completion command to show how to install the `bash_completion` package ([Kubectl docs](https://kubernetes.io/docs/tasks/tools/install-kubectl/#enabling-shell-autocompletion)) - -### Aliases - -You can also configure `bash` aliases for your program and they will also support completions. - -```bash -alias aliasname=origcommand -complete -o default -F __start_origcommand aliasname - -# and now when you run `aliasname` completion will make -# suggestions as it did for `origcommand`. - -$ aliasname -completion firstcommand secondcommand -``` -### Bash legacy dynamic completions - -For backward compatibility, Cobra still supports its bash legacy dynamic completion solution. -Please refer to [Bash Completions](bash_completions.md) for details. - -### Bash completion V2 - -Cobra provides two versions for bash completion. The original bash completion (which started it all!) can be used by calling -`GenBashCompletion()` or `GenBashCompletionFile()`. - -A new V2 bash completion version is also available. This version can be used by calling `GenBashCompletionV2()` or -`GenBashCompletionFileV2()`. The V2 version does **not** support the legacy dynamic completion -(see [Bash Completions](bash_completions.md)) but instead works only with the Go dynamic completion -solution described in this document. -Unless your program already uses the legacy dynamic completion solution, it is recommended that you use the bash -completion V2 solution which provides the following extra features: -- Supports completion descriptions (like the other shells) -- Small completion script of less than 300 lines (v1 generates scripts of thousands of lines; `kubectl` for example has a bash v1 completion script of over 13K lines) -- Streamlined user experience thanks to a completion behavior aligned with the other shells - -`Bash` completion V2 supports descriptions for completions. When calling `GenBashCompletionV2()` or `GenBashCompletionFileV2()` -you must provide these functions with a parameter indicating if the completions should be annotated with a description; Cobra -will provide the description automatically based on usage information. You can choose to make this option configurable by -your users. - -``` -# With descriptions -$ helm s[tab][tab] -search (search for a keyword in charts) status (display the status of the named release) -show (show information of a chart) - -# Without descriptions -$ helm s[tab][tab] -search show status -``` -**Note**: Cobra's default `completion` command uses bash completion V2. If for some reason you need to use bash completion V1, you will need to implement your own `completion` command. -## Zsh completions - -Cobra supports native zsh completion generated from the root `cobra.Command`. -The generated completion script should be put somewhere in your `$fpath` and be named -`_`. You will need to start a new shell for the completions to become available. - -Zsh supports descriptions for completions. Cobra will provide the description automatically, -based on usage information. Cobra provides a way to completely disable such descriptions by -using `GenZshCompletionNoDesc()` or `GenZshCompletionFileNoDesc()`. You can choose to make -this a configurable option to your users. -``` -# With descriptions -$ helm s[tab] -search -- search for a keyword in charts -show -- show information of a chart -status -- displays the status of the named release - -# Without descriptions -$ helm s[tab] -search show status -``` -*Note*: Because of backward-compatibility requirements, we were forced to have a different API to disable completion descriptions between `zsh` and `fish`. - -### Limitations - -* Custom completions implemented in Bash scripting (legacy) are not supported and will be ignored for `zsh` (including the use of the `BashCompCustom` flag annotation). - * You should instead use `ValidArgsFunction` and `RegisterFlagCompletionFunc()` which are portable to the different shells (`bash`, `zsh`, `fish`, `powershell`). -* The function `MarkFlagCustom()` is not supported and will be ignored for `zsh`. - * You should instead use `RegisterFlagCompletionFunc()`. - -### Zsh completions standardization - -Cobra 1.1 standardized its zsh completion support to align it with its other shell completions. Although the API was kept backward-compatible, some small changes in behavior were introduced. -Please refer to [Zsh Completions](zsh_completions.md) for details. - -## fish completions - -Cobra supports native fish completions generated from the root `cobra.Command`. You can use the `command.GenFishCompletion()` or `command.GenFishCompletionFile()` functions. You must provide these functions with a parameter indicating if the completions should be annotated with a description; Cobra will provide the description automatically based on usage information. You can choose to make this option configurable by your users. -``` -# With descriptions -$ helm s[tab] -search (search for a keyword in charts) show (show information of a chart) status (displays the status of the named release) - -# Without descriptions -$ helm s[tab] -search show status -``` -*Note*: Because of backward-compatibility requirements, we were forced to have a different API to disable completion descriptions between `zsh` and `fish`. - -### Limitations - -* Custom completions implemented in bash scripting (legacy) are not supported and will be ignored for `fish` (including the use of the `BashCompCustom` flag annotation). - * You should instead use `ValidArgsFunction` and `RegisterFlagCompletionFunc()` which are portable to the different shells (`bash`, `zsh`, `fish`, `powershell`). -* The function `MarkFlagCustom()` is not supported and will be ignored for `fish`. - * You should instead use `RegisterFlagCompletionFunc()`. -* The following flag completion annotations are not supported and will be ignored for `fish`: - * `BashCompFilenameExt` (filtering by file extension) - * `BashCompSubdirsInDir` (filtering by directory) -* The functions corresponding to the above annotations are consequently not supported and will be ignored for `fish`: - * `MarkFlagFilename()` and `MarkPersistentFlagFilename()` (filtering by file extension) - * `MarkFlagDirname()` and `MarkPersistentFlagDirname()` (filtering by directory) -* Similarly, the following completion directives are not supported and will be ignored for `fish`: - * `ShellCompDirectiveFilterFileExt` (filtering by file extension) - * `ShellCompDirectiveFilterDirs` (filtering by directory) - -## PowerShell completions - -Cobra supports native PowerShell completions generated from the root `cobra.Command`. You can use the `command.GenPowerShellCompletion()` or `command.GenPowerShellCompletionFile()` functions. To include descriptions use `command.GenPowerShellCompletionWithDesc()` and `command.GenPowerShellCompletionFileWithDesc()`. Cobra will provide the description automatically based on usage information. You can choose to make this option configurable by your users. - -The script is designed to support all three PowerShell completion modes: - -* TabCompleteNext (default windows style - on each key press the next option is displayed) -* Complete (works like bash) -* MenuComplete (works like zsh) - -You set the mode with `Set-PSReadLineKeyHandler -Key Tab -Function `. Descriptions are only displayed when using the `Complete` or `MenuComplete` mode. - -Users need PowerShell version 5.0 or above, which comes with Windows 10 and can be downloaded separately for Windows 7 or 8.1. They can then write the completions to a file and source this file from their PowerShell profile, which is referenced by the `$Profile` environment variable. See `Get-Help about_Profiles` for more info about PowerShell profiles. - -``` -# With descriptions and Mode 'Complete' -$ helm s[tab] -search (search for a keyword in charts) show (show information of a chart) status (displays the status of the named release) - -# With descriptions and Mode 'MenuComplete' The description of the current selected value will be displayed below the suggestions. -$ helm s[tab] -search show status - -search for a keyword in charts - -# Without descriptions -$ helm s[tab] -search show status -``` -### Aliases - -You can also configure `powershell` aliases for your program and they will also support completions. - -``` -$ sal aliasname origcommand -$ Register-ArgumentCompleter -CommandName 'aliasname' -ScriptBlock $__origcommandCompleterBlock - -# and now when you run `aliasname` completion will make -# suggestions as it did for `origcommand`. - -$ aliasname -completion firstcommand secondcommand -``` -The name of the completer block variable is of the form `$__CompleterBlock` where every `-` and `:` in the program name have been replaced with `_`, to respect powershell naming syntax. - -### Limitations - -* Custom completions implemented in bash scripting (legacy) are not supported and will be ignored for `powershell` (including the use of the `BashCompCustom` flag annotation). - * You should instead use `ValidArgsFunction` and `RegisterFlagCompletionFunc()` which are portable to the different shells (`bash`, `zsh`, `fish`, `powershell`). -* The function `MarkFlagCustom()` is not supported and will be ignored for `powershell`. - * You should instead use `RegisterFlagCompletionFunc()`. -* The following flag completion annotations are not supported and will be ignored for `powershell`: - * `BashCompFilenameExt` (filtering by file extension) - * `BashCompSubdirsInDir` (filtering by directory) -* The functions corresponding to the above annotations are consequently not supported and will be ignored for `powershell`: - * `MarkFlagFilename()` and `MarkPersistentFlagFilename()` (filtering by file extension) - * `MarkFlagDirname()` and `MarkPersistentFlagDirname()` (filtering by directory) -* Similarly, the following completion directives are not supported and will be ignored for `powershell`: - * `ShellCompDirectiveFilterFileExt` (filtering by file extension) - * `ShellCompDirectiveFilterDirs` (filtering by directory) diff --git a/vendor/github.com/spf13/cobra/user_guide.md b/vendor/github.com/spf13/cobra/user_guide.md deleted file mode 100644 index 85201d84..00000000 --- a/vendor/github.com/spf13/cobra/user_guide.md +++ /dev/null @@ -1,726 +0,0 @@ -# User Guide - -While you are welcome to provide your own organization, typically a Cobra-based -application will follow the following organizational structure: - -``` - ▾ appName/ - ▾ cmd/ - add.go - your.go - commands.go - here.go - main.go -``` - -In a Cobra app, typically the main.go file is very bare. It serves one purpose: initializing Cobra. - -```go -package main - -import ( - "{pathToYourApp}/cmd" -) - -func main() { - cmd.Execute() -} -``` - -## Using the Cobra Generator - -Cobra-CLI is its own program that will create your application and add any -commands you want. It's the easiest way to incorporate Cobra into your application. - -For complete details on using the Cobra generator, please refer to [The Cobra-CLI Generator README](https://github.com/spf13/cobra-cli/blob/main/README.md) - -## Using the Cobra Library - -To manually implement Cobra you need to create a bare main.go file and a rootCmd file. -You will optionally provide additional commands as you see fit. - -### Create rootCmd - -Cobra doesn't require any special constructors. Simply create your commands. - -Ideally you place this in app/cmd/root.go: - -```go -var rootCmd = &cobra.Command{ - Use: "hugo", - Short: "Hugo is a very fast static site generator", - Long: `A Fast and Flexible Static Site Generator built with - love by spf13 and friends in Go. - Complete documentation is available at https://gohugo.io/documentation/`, - Run: func(cmd *cobra.Command, args []string) { - // Do Stuff Here - }, -} - -func Execute() { - if err := rootCmd.Execute(); err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } -} -``` - -You will additionally define flags and handle configuration in your init() function. - -For example cmd/root.go: - -```go -package cmd - -import ( - "fmt" - "os" - - "github.com/spf13/cobra" - "github.com/spf13/viper" -) - -var ( - // Used for flags. - cfgFile string - userLicense string - - rootCmd = &cobra.Command{ - Use: "cobra-cli", - Short: "A generator for Cobra based Applications", - Long: `Cobra is a CLI library for Go that empowers applications. -This application is a tool to generate the needed files -to quickly create a Cobra application.`, - } -) - -// Execute executes the root command. -func Execute() error { - return rootCmd.Execute() -} - -func init() { - cobra.OnInitialize(initConfig) - - rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is $HOME/.cobra.yaml)") - rootCmd.PersistentFlags().StringP("author", "a", "YOUR NAME", "author name for copyright attribution") - rootCmd.PersistentFlags().StringVarP(&userLicense, "license", "l", "", "name of license for the project") - rootCmd.PersistentFlags().Bool("viper", true, "use Viper for configuration") - viper.BindPFlag("author", rootCmd.PersistentFlags().Lookup("author")) - viper.BindPFlag("useViper", rootCmd.PersistentFlags().Lookup("viper")) - viper.SetDefault("author", "NAME HERE ") - viper.SetDefault("license", "apache") - - rootCmd.AddCommand(addCmd) - rootCmd.AddCommand(initCmd) -} - -func initConfig() { - if cfgFile != "" { - // Use config file from the flag. - viper.SetConfigFile(cfgFile) - } else { - // Find home directory. - home, err := os.UserHomeDir() - cobra.CheckErr(err) - - // Search config in home directory with name ".cobra" (without extension). - viper.AddConfigPath(home) - viper.SetConfigType("yaml") - viper.SetConfigName(".cobra") - } - - viper.AutomaticEnv() - - if err := viper.ReadInConfig(); err == nil { - fmt.Println("Using config file:", viper.ConfigFileUsed()) - } -} -``` - -### Create your main.go - -With the root command you need to have your main function execute it. -Execute should be run on the root for clarity, though it can be called on any command. - -In a Cobra app, typically the main.go file is very bare. It serves one purpose: to initialize Cobra. - -```go -package main - -import ( - "{pathToYourApp}/cmd" -) - -func main() { - cmd.Execute() -} -``` - -### Create additional commands - -Additional commands can be defined and typically are each given their own file -inside of the cmd/ directory. - -If you wanted to create a version command you would create cmd/version.go and -populate it with the following: - -```go -package cmd - -import ( - "fmt" - - "github.com/spf13/cobra" -) - -func init() { - rootCmd.AddCommand(versionCmd) -} - -var versionCmd = &cobra.Command{ - Use: "version", - Short: "Print the version number of Hugo", - Long: `All software has versions. This is Hugo's`, - Run: func(cmd *cobra.Command, args []string) { - fmt.Println("Hugo Static Site Generator v0.9 -- HEAD") - }, -} -``` - -### Organizing subcommands - -A command may have subcommands which in turn may have other subcommands. This is achieved by using -`AddCommand`. In some cases, especially in larger applications, each subcommand may be defined in -its own go package. - -The suggested approach is for the parent command to use `AddCommand` to add its most immediate -subcommands. For example, consider the following directory structure: - -```text -├── cmd -│   ├── root.go -│   └── sub1 -│   ├── sub1.go -│   └── sub2 -│   ├── leafA.go -│   ├── leafB.go -│   └── sub2.go -└── main.go -``` - -In this case: - -* The `init` function of `root.go` adds the command defined in `sub1.go` to the root command. -* The `init` function of `sub1.go` adds the command defined in `sub2.go` to the sub1 command. -* The `init` function of `sub2.go` adds the commands defined in `leafA.go` and `leafB.go` to the - sub2 command. - -This approach ensures the subcommands are always included at compile time while avoiding cyclic -references. - -### Returning and handling errors - -If you wish to return an error to the caller of a command, `RunE` can be used. - -```go -package cmd - -import ( - "fmt" - - "github.com/spf13/cobra" -) - -func init() { - rootCmd.AddCommand(tryCmd) -} - -var tryCmd = &cobra.Command{ - Use: "try", - Short: "Try and possibly fail at something", - RunE: func(cmd *cobra.Command, args []string) error { - if err := someFunc(); err != nil { - return err - } - return nil - }, -} -``` - -The error can then be caught at the execute function call. - -## Working with Flags - -Flags provide modifiers to control how the action command operates. - -### Assign flags to a command - -Since the flags are defined and used in different locations, we need to -define a variable outside with the correct scope to assign the flag to -work with. - -```go -var Verbose bool -var Source string -``` - -There are two different approaches to assign a flag. - -### Persistent Flags - -A flag can be 'persistent', meaning that this flag will be available to the -command it's assigned to as well as every command under that command. For -global flags, assign a flag as a persistent flag on the root. - -```go -rootCmd.PersistentFlags().BoolVarP(&Verbose, "verbose", "v", false, "verbose output") -``` - -### Local Flags - -A flag can also be assigned locally, which will only apply to that specific command. - -```go -localCmd.Flags().StringVarP(&Source, "source", "s", "", "Source directory to read from") -``` - -### Local Flag on Parent Commands - -By default, Cobra only parses local flags on the target command, and any local flags on -parent commands are ignored. By enabling `Command.TraverseChildren`, Cobra will -parse local flags on each command before executing the target command. - -```go -command := cobra.Command{ - Use: "print [OPTIONS] [COMMANDS]", - TraverseChildren: true, -} -``` - -### Bind Flags with Config - -You can also bind your flags with [viper](https://github.com/spf13/viper): -```go -var author string - -func init() { - rootCmd.PersistentFlags().StringVar(&author, "author", "YOUR NAME", "Author name for copyright attribution") - viper.BindPFlag("author", rootCmd.PersistentFlags().Lookup("author")) -} -``` - -In this example, the persistent flag `author` is bound with `viper`. -**Note**: the variable `author` will not be set to the value from config, -when the `--author` flag is provided by user. - -More in [viper documentation](https://github.com/spf13/viper#working-with-flags). - -### Required flags - -Flags are optional by default. If instead you wish your command to report an error -when a flag has not been set, mark it as required: -```go -rootCmd.Flags().StringVarP(&Region, "region", "r", "", "AWS region (required)") -rootCmd.MarkFlagRequired("region") -``` - -Or, for persistent flags: -```go -rootCmd.PersistentFlags().StringVarP(&Region, "region", "r", "", "AWS region (required)") -rootCmd.MarkPersistentFlagRequired("region") -``` - -### Flag Groups - -If you have different flags that must be provided together (e.g. if they provide the `--username` flag they MUST provide the `--password` flag as well) then -Cobra can enforce that requirement: -```go -rootCmd.Flags().StringVarP(&u, "username", "u", "", "Username (required if password is set)") -rootCmd.Flags().StringVarP(&pw, "password", "p", "", "Password (required if username is set)") -rootCmd.MarkFlagsRequiredTogether("username", "password") -``` - -You can also prevent different flags from being provided together if they represent mutually -exclusive options such as specifying an output format as either `--json` or `--yaml` but never both: -```go -rootCmd.Flags().BoolVar(&ofJson, "json", false, "Output in JSON") -rootCmd.Flags().BoolVar(&ofYaml, "yaml", false, "Output in YAML") -rootCmd.MarkFlagsMutuallyExclusive("json", "yaml") -``` - -In both of these cases: - - both local and persistent flags can be used - - **NOTE:** the group is only enforced on commands where every flag is defined - - a flag may appear in multiple groups - - a group may contain any number of flags - -## Positional and Custom Arguments - -Validation of positional arguments can be specified using the `Args` field of `Command`. -The following validators are built in: - -- Number of arguments: - - `NoArgs` - report an error if there are any positional args. - - `ArbitraryArgs` - accept any number of args. - - `MinimumNArgs(int)` - report an error if less than N positional args are provided. - - `MaximumNArgs(int)` - report an error if more than N positional args are provided. - - `ExactArgs(int)` - report an error if there are not exactly N positional args. - - `RangeArgs(min, max)` - report an error if the number of args is not between `min` and `max`. -- Content of the arguments: - - `OnlyValidArgs` - report an error if there are any positional args not specified in the `ValidArgs` field of `Command`, which can optionally be set to a list of valid values for positional args. - -If `Args` is undefined or `nil`, it defaults to `ArbitraryArgs`. - -Moreover, `MatchAll(pargs ...PositionalArgs)` enables combining existing checks with arbitrary other checks. -For instance, if you want to report an error if there are not exactly N positional args OR if there are any positional -args that are not in the `ValidArgs` field of `Command`, you can call `MatchAll` on `ExactArgs` and `OnlyValidArgs`, as -shown below: - -```go -var cmd = &cobra.Command{ - Short: "hello", - Args: cobra.MatchAll(cobra.ExactArgs(2), cobra.OnlyValidArgs), - Run: func(cmd *cobra.Command, args []string) { - fmt.Println("Hello, World!") - }, -} -``` - -It is possible to set any custom validator that satisfies `func(cmd *cobra.Command, args []string) error`. -For example: - -```go -var cmd = &cobra.Command{ - Short: "hello", - Args: func(cmd *cobra.Command, args []string) error { - // Optionally run one of the validators provided by cobra - if err := cobra.MinimumNArgs(1)(cmd, args); err != nil { - return err - } - // Run the custom validation logic - if myapp.IsValidColor(args[0]) { - return nil - } - return fmt.Errorf("invalid color specified: %s", args[0]) - }, - Run: func(cmd *cobra.Command, args []string) { - fmt.Println("Hello, World!") - }, -} -``` - -## Example - -In the example below, we have defined three commands. Two are at the top level -and one (cmdTimes) is a child of one of the top commands. In this case the root -is not executable, meaning that a subcommand is required. This is accomplished -by not providing a 'Run' for the 'rootCmd'. - -We have only defined one flag for a single command. - -More documentation about flags is available at https://github.com/spf13/pflag - -```go -package main - -import ( - "fmt" - "strings" - - "github.com/spf13/cobra" -) - -func main() { - var echoTimes int - - var cmdPrint = &cobra.Command{ - Use: "print [string to print]", - Short: "Print anything to the screen", - Long: `print is for printing anything back to the screen. -For many years people have printed back to the screen.`, - Args: cobra.MinimumNArgs(1), - Run: func(cmd *cobra.Command, args []string) { - fmt.Println("Print: " + strings.Join(args, " ")) - }, - } - - var cmdEcho = &cobra.Command{ - Use: "echo [string to echo]", - Short: "Echo anything to the screen", - Long: `echo is for echoing anything back. -Echo works a lot like print, except it has a child command.`, - Args: cobra.MinimumNArgs(1), - Run: func(cmd *cobra.Command, args []string) { - fmt.Println("Echo: " + strings.Join(args, " ")) - }, - } - - var cmdTimes = &cobra.Command{ - Use: "times [string to echo]", - Short: "Echo anything to the screen more times", - Long: `echo things multiple times back to the user by providing -a count and a string.`, - Args: cobra.MinimumNArgs(1), - Run: func(cmd *cobra.Command, args []string) { - for i := 0; i < echoTimes; i++ { - fmt.Println("Echo: " + strings.Join(args, " ")) - } - }, - } - - cmdTimes.Flags().IntVarP(&echoTimes, "times", "t", 1, "times to echo the input") - - var rootCmd = &cobra.Command{Use: "app"} - rootCmd.AddCommand(cmdPrint, cmdEcho) - cmdEcho.AddCommand(cmdTimes) - rootCmd.Execute() -} -``` - -For a more complete example of a larger application, please checkout [Hugo](https://gohugo.io/). - -## Help Command - -Cobra automatically adds a help command to your application when you have subcommands. -This will be called when a user runs 'app help'. Additionally, help will also -support all other commands as input. Say, for instance, you have a command called -'create' without any additional configuration; Cobra will work when 'app help -create' is called. Every command will automatically have the '--help' flag added. - -### Example - -The following output is automatically generated by Cobra. Nothing beyond the -command and flag definitions are needed. - - $ cobra-cli help - - Cobra is a CLI library for Go that empowers applications. - This application is a tool to generate the needed files - to quickly create a Cobra application. - - Usage: - cobra-cli [command] - - Available Commands: - add Add a command to a Cobra Application - completion Generate the autocompletion script for the specified shell - help Help about any command - init Initialize a Cobra Application - - Flags: - -a, --author string author name for copyright attribution (default "YOUR NAME") - --config string config file (default is $HOME/.cobra.yaml) - -h, --help help for cobra-cli - -l, --license string name of license for the project - --viper use Viper for configuration - - Use "cobra-cli [command] --help" for more information about a command. - - -Help is just a command like any other. There is no special logic or behavior -around it. In fact, you can provide your own if you want. - -### Grouping commands in help - -Cobra supports grouping of available commands in the help output. To group commands, each group must be explicitly -defined using `AddGroup()` on the parent command. Then a subcommand can be added to a group using the `GroupID` element -of that subcommand. The groups will appear in the help output in the same order as they are defined using different -calls to `AddGroup()`. If you use the generated `help` or `completion` commands, you can set their group ids using -`SetHelpCommandGroupId()` and `SetCompletionCommandGroupId()` on the root command, respectively. - -### Defining your own help - -You can provide your own Help command or your own template for the default command to use -with the following functions: - -```go -cmd.SetHelpCommand(cmd *Command) -cmd.SetHelpFunc(f func(*Command, []string)) -cmd.SetHelpTemplate(s string) -``` - -The latter two will also apply to any children commands. - -## Usage Message - -When the user provides an invalid flag or invalid command, Cobra responds by -showing the user the 'usage'. - -### Example -You may recognize this from the help above. That's because the default help -embeds the usage as part of its output. - - $ cobra-cli --invalid - Error: unknown flag: --invalid - Usage: - cobra-cli [command] - - Available Commands: - add Add a command to a Cobra Application - completion Generate the autocompletion script for the specified shell - help Help about any command - init Initialize a Cobra Application - - Flags: - -a, --author string author name for copyright attribution (default "YOUR NAME") - --config string config file (default is $HOME/.cobra.yaml) - -h, --help help for cobra-cli - -l, --license string name of license for the project - --viper use Viper for configuration - - Use "cobra [command] --help" for more information about a command. - -### Defining your own usage -You can provide your own usage function or template for Cobra to use. -Like help, the function and template are overridable through public methods: - -```go -cmd.SetUsageFunc(f func(*Command) error) -cmd.SetUsageTemplate(s string) -``` - -## Version Flag - -Cobra adds a top-level '--version' flag if the Version field is set on the root command. -Running an application with the '--version' flag will print the version to stdout using -the version template. The template can be customized using the -`cmd.SetVersionTemplate(s string)` function. - -## PreRun and PostRun Hooks - -It is possible to run functions before or after the main `Run` function of your command. The `PersistentPreRun` and `PreRun` functions will be executed before `Run`. `PersistentPostRun` and `PostRun` will be executed after `Run`. The `Persistent*Run` functions will be inherited by children if they do not declare their own. These functions are run in the following order: - -- `PersistentPreRun` -- `PreRun` -- `Run` -- `PostRun` -- `PersistentPostRun` - -An example of two commands which use all of these features is below. When the subcommand is executed, it will run the root command's `PersistentPreRun` but not the root command's `PersistentPostRun`: - -```go -package main - -import ( - "fmt" - - "github.com/spf13/cobra" -) - -func main() { - - var rootCmd = &cobra.Command{ - Use: "root [sub]", - Short: "My root command", - PersistentPreRun: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside rootCmd PersistentPreRun with args: %v\n", args) - }, - PreRun: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside rootCmd PreRun with args: %v\n", args) - }, - Run: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside rootCmd Run with args: %v\n", args) - }, - PostRun: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside rootCmd PostRun with args: %v\n", args) - }, - PersistentPostRun: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside rootCmd PersistentPostRun with args: %v\n", args) - }, - } - - var subCmd = &cobra.Command{ - Use: "sub [no options!]", - Short: "My subcommand", - PreRun: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside subCmd PreRun with args: %v\n", args) - }, - Run: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside subCmd Run with args: %v\n", args) - }, - PostRun: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside subCmd PostRun with args: %v\n", args) - }, - PersistentPostRun: func(cmd *cobra.Command, args []string) { - fmt.Printf("Inside subCmd PersistentPostRun with args: %v\n", args) - }, - } - - rootCmd.AddCommand(subCmd) - - rootCmd.SetArgs([]string{""}) - rootCmd.Execute() - fmt.Println() - rootCmd.SetArgs([]string{"sub", "arg1", "arg2"}) - rootCmd.Execute() -} -``` - -Output: -``` -Inside rootCmd PersistentPreRun with args: [] -Inside rootCmd PreRun with args: [] -Inside rootCmd Run with args: [] -Inside rootCmd PostRun with args: [] -Inside rootCmd PersistentPostRun with args: [] - -Inside rootCmd PersistentPreRun with args: [arg1 arg2] -Inside subCmd PreRun with args: [arg1 arg2] -Inside subCmd Run with args: [arg1 arg2] -Inside subCmd PostRun with args: [arg1 arg2] -Inside subCmd PersistentPostRun with args: [arg1 arg2] -``` - -## Suggestions when "unknown command" happens - -Cobra will print automatic suggestions when "unknown command" errors happen. This allows Cobra to behave similarly to the `git` command when a typo happens. For example: - -``` -$ hugo srever -Error: unknown command "srever" for "hugo" - -Did you mean this? - server - -Run 'hugo --help' for usage. -``` - -Suggestions are automatically generated based on existing subcommands and use an implementation of [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance). Every registered command that matches a minimum distance of 2 (ignoring case) will be displayed as a suggestion. - -If you need to disable suggestions or tweak the string distance in your command, use: - -```go -command.DisableSuggestions = true -``` - -or - -```go -command.SuggestionsMinimumDistance = 1 -``` - -You can also explicitly set names for which a given command will be suggested using the `SuggestFor` attribute. This allows suggestions for strings that are not close in terms of string distance, but make sense in your set of commands but for which -you don't want aliases. Example: - -``` -$ kubectl remove -Error: unknown command "remove" for "kubectl" - -Did you mean this? - delete - -Run 'kubectl help' for usage. -``` - -## Generating documentation for your command - -Cobra can generate documentation based on subcommands, flags, etc. Read more about it in the [docs generation documentation](doc/README.md). - -## Generating shell completions - -Cobra can generate a shell-completion file for the following shells: bash, zsh, fish, PowerShell. If you add more information to your commands, these completions can be amazingly powerful and flexible. Read more about it in [Shell Completions](shell_completions.md). - -## Providing Active Help - -Cobra makes use of the shell-completion system to define a framework allowing you to provide Active Help to your users. Active Help are messages (hints, warnings, etc) printed as the program is being used. Read more about it in [Active Help](active_help.md). diff --git a/vendor/github.com/spf13/cobra/zsh_completions.md b/vendor/github.com/spf13/cobra/zsh_completions.md deleted file mode 100644 index 7cff6178..00000000 --- a/vendor/github.com/spf13/cobra/zsh_completions.md +++ /dev/null @@ -1,48 +0,0 @@ -## Generating Zsh Completion For Your cobra.Command - -Please refer to [Shell Completions](shell_completions.md) for details. - -## Zsh completions standardization - -Cobra 1.1 standardized its zsh completion support to align it with its other shell completions. Although the API was kept backwards-compatible, some small changes in behavior were introduced. - -### Deprecation summary - -See further below for more details on these deprecations. - -* `cmd.MarkZshCompPositionalArgumentFile(pos, []string{})` is no longer needed. It is therefore **deprecated** and silently ignored. -* `cmd.MarkZshCompPositionalArgumentFile(pos, glob[])` is **deprecated** and silently ignored. - * Instead use `ValidArgsFunction` with `ShellCompDirectiveFilterFileExt`. -* `cmd.MarkZshCompPositionalArgumentWords()` is **deprecated** and silently ignored. - * Instead use `ValidArgsFunction`. - -### Behavioral changes - -**Noun completion** -|Old behavior|New behavior| -|---|---| -|No file completion by default (opposite of bash)|File completion by default; use `ValidArgsFunction` with `ShellCompDirectiveNoFileComp` to turn off file completion on a per-argument basis| -|Completion of flag names without the `-` prefix having been typed|Flag names are only completed if the user has typed the first `-`| -`cmd.MarkZshCompPositionalArgumentFile(pos, []string{})` used to turn on file completion on a per-argument position basis|File completion for all arguments by default; `cmd.MarkZshCompPositionalArgumentFile()` is **deprecated** and silently ignored| -|`cmd.MarkZshCompPositionalArgumentFile(pos, glob[])` used to turn on file completion **with glob filtering** on a per-argument position basis (zsh-specific)|`cmd.MarkZshCompPositionalArgumentFile()` is **deprecated** and silently ignored; use `ValidArgsFunction` with `ShellCompDirectiveFilterFileExt` for file **extension** filtering (not full glob filtering)| -|`cmd.MarkZshCompPositionalArgumentWords(pos, words[])` used to provide completion choices on a per-argument position basis (zsh-specific)|`cmd.MarkZshCompPositionalArgumentWords()` is **deprecated** and silently ignored; use `ValidArgsFunction` to achieve the same behavior| - -**Flag-value completion** - -|Old behavior|New behavior| -|---|---| -|No file completion by default (opposite of bash)|File completion by default; use `RegisterFlagCompletionFunc()` with `ShellCompDirectiveNoFileComp` to turn off file completion| -|`cmd.MarkFlagFilename(flag, []string{})` and similar used to turn on file completion|File completion by default; `cmd.MarkFlagFilename(flag, []string{})` no longer needed in this context and silently ignored| -|`cmd.MarkFlagFilename(flag, glob[])` used to turn on file completion **with glob filtering** (syntax of `[]string{"*.yaml", "*.yml"}` incompatible with bash)|Will continue to work, however, support for bash syntax is added and should be used instead so as to work for all shells (`[]string{"yaml", "yml"}`)| -|`cmd.MarkFlagDirname(flag)` only completes directories (zsh-specific)|Has been added for all shells| -|Completion of a flag name does not repeat, unless flag is of type `*Array` or `*Slice` (not supported by bash)|Retained for `zsh` and added to `fish`| -|Completion of a flag name does not provide the `=` form (unlike bash)|Retained for `zsh` and added to `fish`| - -**Improvements** - -* Custom completion support (`ValidArgsFunction` and `RegisterFlagCompletionFunc()`) -* File completion by default if no other completions found -* Handling of required flags -* File extension filtering no longer mutually exclusive with bash usage -* Completion of directory names *within* another directory -* Support for `=` form of flags diff --git a/vendor/github.com/tkrajina/gpxgo/gpx/converters.go b/vendor/github.com/tkrajina/gpxgo/gpx/converters.go index 189dbdb0..2fd7614c 100644 --- a/vendor/github.com/tkrajina/gpxgo/gpx/converters.go +++ b/vendor/github.com/tkrajina/gpxgo/gpx/converters.go @@ -13,7 +13,7 @@ import ( "strings" ) -//defaultCreator contains the original repo path +// defaultCreator contains the original repo path const defaultCreator = "https://github.com/tkrajina/gpxgo" // ---------------------------------------------------------------------------------------------------- @@ -217,8 +217,8 @@ func convertFromGpx10Models(gpx10Doc *gpx10Gpx) *GPX { func convertPointToGpx10(original *GPXPoint) *gpx10GpxPoint { result := new(gpx10GpxPoint) - result.Lat = original.Latitude - result.Lon = original.Longitude + result.Lat = formattedFloat(original.Latitude) + result.Lon = formattedFloat(original.Longitude) result.Ele = original.Elevation result.Timestamp = formatGPXTime(&original.Timestamp) result.MagVar = original.MagneticVariation @@ -261,8 +261,8 @@ func convertPointToGpx10(original *GPXPoint) *gpx10GpxPoint { func convertPointFromGpx10(original *gpx10GpxPoint) *GPXPoint { result := new(GPXPoint) - result.Latitude = original.Lat - result.Longitude = original.Lon + result.Latitude = float64(original.Lat) + result.Longitude = float64(original.Lon) result.Elevation = original.Ele time, _ := parseGPXTime(original.Timestamp) if time != nil { @@ -411,6 +411,9 @@ func convertToGpx11Models(gpxDoc *GPX) (*gpx11Gpx, map[string]string) { gpx11Doc.Extensions = gpxDoc.Extensions gpx11Doc.Extensions.globalNsAttrs = gpxDoc.Attrs.GetNamespaceAttrs() + gpx11Doc.MetadataExtensions = gpxDoc.MetadataExtensions + gpx11Doc.MetadataExtensions.globalNsAttrs = gpxDoc.Attrs.GetNamespaceAttrs() + if len(gpxDoc.Creator) == 0 { gpx11Doc.Creator = defaultCreator } else { @@ -542,6 +545,7 @@ func convertFromGpx11Models(gpx11Doc *gpx11Gpx) *GPX { gpxDoc.Description = gpx11Doc.Desc gpxDoc.AuthorName = gpx11Doc.AuthorName gpxDoc.Extensions = gpx11Doc.Extensions + gpxDoc.MetadataExtensions = gpx11Doc.MetadataExtensions if gpx11Doc.AuthorEmail != nil { gpxDoc.AuthorEmail = gpx11Doc.AuthorEmail.Id + "@" + gpx11Doc.AuthorEmail.Domain @@ -648,8 +652,8 @@ func convertFromGpx11Models(gpx11Doc *gpx11Gpx) *GPX { func convertPointToGpx11(original *GPXPoint) *gpx11GpxPoint { result := new(gpx11GpxPoint) - result.Lat = original.Latitude - result.Lon = original.Longitude + result.Lat = formattedFloat(original.Latitude) + result.Lon = formattedFloat(original.Longitude) result.Ele = original.Elevation result.Timestamp = formatGPXTime(&original.Timestamp) result.MagVar = original.MagneticVariation @@ -693,8 +697,8 @@ func convertPointToGpx11(original *GPXPoint) *gpx11GpxPoint { func convertPointFromGpx11(original *gpx11GpxPoint) *GPXPoint { result := new(GPXPoint) - result.Latitude = original.Lat - result.Longitude = original.Lon + result.Latitude = float64(original.Lat) + result.Longitude = float64(original.Lon) result.Elevation = original.Ele time, _ := parseGPXTime(original.Timestamp) if time != nil { diff --git a/vendor/github.com/tkrajina/gpxgo/gpx/fixedpoint_float64.go b/vendor/github.com/tkrajina/gpxgo/gpx/fixedpoint_float64.go new file mode 100644 index 00000000..3d9cd5d5 --- /dev/null +++ b/vendor/github.com/tkrajina/gpxgo/gpx/fixedpoint_float64.go @@ -0,0 +1,21 @@ +package gpx + +import ( + "encoding/xml" + "strconv" + "strings" +) + +// formattedFloat forces XML attributes to be marshalled as a fixed point decimal with 10 decimal places. +type formattedFloat float64 + +func (f formattedFloat) MarshalXMLAttr(name xml.Name) (xml.Attr, error) { + s := strings.TrimRight(strconv.FormatFloat(float64(f), 'f', 10, 64), "0") + if strings.HasSuffix(s, ".") { + s += "0" + } + return xml.Attr{ + Name: xml.Name{Local: name.Local}, + Value: s, + }, nil +} diff --git a/vendor/github.com/tkrajina/gpxgo/gpx/geo.go b/vendor/github.com/tkrajina/gpxgo/gpx/geo.go index 0d8084c3..4e7129d3 100644 --- a/vendor/github.com/tkrajina/gpxgo/gpx/geo.go +++ b/vendor/github.com/tkrajina/gpxgo/gpx/geo.go @@ -97,7 +97,7 @@ func Length3D(locs []Point) float64 { func CalcMaxSpeed(speedsDistances []SpeedsAndDistances) float64 { lenArrs := len(speedsDistances) - if len(speedsDistances) < 20 { + if len(speedsDistances) < 3 { //log.Println("Segment too small to compute speed, size: ", lenArrs) return 0.0 } @@ -129,6 +129,7 @@ func CalcMaxSpeed(speedsDistances []SpeedsAndDistances) float64 { } speedsSorted := sort.Float64Slice(speeds) + sort.Sort(speedsSorted) if len(speedsSorted) == 0 { return 0 diff --git a/vendor/github.com/tkrajina/gpxgo/gpx/gpx.go b/vendor/github.com/tkrajina/gpxgo/gpx/gpx.go index 84c39d13..30cf2798 100644 --- a/vendor/github.com/tkrajina/gpxgo/gpx/gpx.go +++ b/vendor/github.com/tkrajina/gpxgo/gpx/gpx.go @@ -58,8 +58,8 @@ func GetGpxElementInfo(prefix string, gpxDoc GPXElementInfo) string { // ---------------------------------------------------------------------------------------------------- -//GPX implements one or multiple GPS tracks that can be written to and parsed -//from a gpx file +// GPX implements one or multiple GPS tracks that can be written to and parsed +// from a gpx file type GPX struct { XMLNs string XmlNsXsi string @@ -85,10 +85,11 @@ type GPX struct { Time *time.Time Keywords string - Waypoints []GPXPoint - Routes []GPXRoute - Tracks []GPXTrack - Extensions Extension + Waypoints []GPXPoint + Routes []GPXRoute + Tracks []GPXTrack + Extensions Extension + MetadataExtensions Extension } func (g *GPX) RegisterNamespace(ns, url string) { @@ -127,7 +128,7 @@ func (g *GPX) GetGpxInfo() string { return result } -//GetTrackPointsNo returns the amount of track points of all tracks +// GetTrackPointsNo returns the amount of track points of all tracks func (g *GPX) GetTrackPointsNo() int { result := 0 for _, track := range g.Tracks { @@ -184,7 +185,7 @@ func (g *GPX) Bounds() GpxBounds { return minmax } -//ElevationBounds returns the min and max elevation of all tracks +// ElevationBounds returns the min and max elevation of all tracks func (g *GPX) ElevationBounds() ElevationBounds { minmax := getMaximalElevationBounds() for _, trk := range g.Tracks { @@ -225,7 +226,7 @@ func (g *GPX) MovingData() MovingData { } } -//ReduceTrackPoints reduces the number of track points of all tracks +// ReduceTrackPoints reduces the number of track points of all tracks func (g *GPX) ReduceTrackPoints(maxPointsNo int, minDistanceBetween float64) { pointsNo := g.GetTrackPointsNo() @@ -329,7 +330,7 @@ func (g *GPX) PositionAt(t time.Time) []TrackPosition { return results } -//StoppedPositions returns the positions where there was a stop +// StoppedPositions returns the positions where there was a stop func (g *GPX) StoppedPositions() []TrackPosition { result := make([]TrackPosition, 0) for trackNo, track := range g.Tracks { @@ -442,35 +443,35 @@ func (g *GPX) getPositionsOnTrackWithPrecomputedDistances(location Location, dis return pointLocations } -//ExecuteOnAllPoints executes given function on all points +// ExecuteOnAllPoints executes given function on all points func (g *GPX) ExecuteOnAllPoints(executor func(*GPXPoint)) { g.ExecuteOnWaypoints(executor) g.ExecuteOnRoutePoints(executor) g.ExecuteOnTrackPoints(executor) } -//ExecuteOnWaypoints executes given function on waypoints +// ExecuteOnWaypoints executes given function on waypoints func (g *GPX) ExecuteOnWaypoints(executor func(*GPXPoint)) { for waypointNo := range g.Waypoints { executor(&g.Waypoints[waypointNo]) } } -//ExecuteOnRoutePoints executes given function on route points +// ExecuteOnRoutePoints executes given function on route points func (g *GPX) ExecuteOnRoutePoints(executor func(*GPXPoint)) { for _, route := range g.Routes { route.ExecuteOnPoints(executor) } } -//ExecuteOnTrackPoints executes given function on track points +// ExecuteOnTrackPoints executes given function on track points func (g *GPX) ExecuteOnTrackPoints(executor func(*GPXPoint)) { for _, track := range g.Tracks { track.ExecuteOnPoints(executor) } } -//AddElevation adds elevation on all points (pointElevation = pointElevation + elevation) +// AddElevation adds elevation on all points (pointElevation = pointElevation + elevation) func (g *GPX) AddElevation(elevation float64) { g.ExecuteOnAllPoints(func(point *GPXPoint) { fmt.Println("setting elevation if NotNull for:", point.Elevation) @@ -481,14 +482,14 @@ func (g *GPX) AddElevation(elevation float64) { }) } -//RemoveElevation removes elevation info on all points +// RemoveElevation removes elevation info on all points func (g *GPX) RemoveElevation() { g.ExecuteOnAllPoints(func(point *GPXPoint) { point.Elevation.SetNull() }) } -//ReduceGpxToSingleTrack combines all tracks to a single track +// ReduceGpxToSingleTrack combines all tracks to a single track func (g *GPX) ReduceGpxToSingleTrack() { if len(g.Tracks) <= 1 { return @@ -531,42 +532,42 @@ func (g *GPX) RemoveEmpty() { g.Tracks = nonEmptyTracks } -//SmoothHorizontal smoothes all tracks horizontally +// SmoothHorizontal smoothes all tracks horizontally func (g *GPX) SmoothHorizontal() { for trackNo := range g.Tracks { g.Tracks[trackNo].SmoothHorizontal() } } -//SmoothVertical smoothes all tracks vertically +// SmoothVertical smoothes all tracks vertically func (g *GPX) SmoothVertical() { for trackNo := range g.Tracks { g.Tracks[trackNo].SmoothVertical() } } -//RemoveHorizontalExtremes removes horizontal extremes +// RemoveHorizontalExtremes removes horizontal extremes func (g *GPX) RemoveHorizontalExtremes() { for trackNo := range g.Tracks { g.Tracks[trackNo].RemoveHorizontalExtremes() } } -//RemoveVerticalExtremes removes vertical extremes +// RemoveVerticalExtremes removes vertical extremes func (g *GPX) RemoveVerticalExtremes() { for trackNo := range g.Tracks { g.Tracks[trackNo].RemoveVerticalExtremes() } } -//AddMissingTime adds missing times +// AddMissingTime adds missing times func (g *GPX) AddMissingTime() { for trackNo := range g.Tracks { g.Tracks[trackNo].AddMissingTime() } } -//AppendTrack adds given track +// AppendTrack adds given track func (g *GPX) AppendTrack(t *GPXTrack) { g.Tracks = append(g.Tracks, *t) } @@ -597,19 +598,19 @@ func (g *GPX) AppendPoint(p *GPXPoint) { lastSegment.AppendPoint(p) } -//AppendRoute adds a route +// AppendRoute adds a route func (g *GPX) AppendRoute(r *GPXRoute) { g.Routes = append(g.Routes, *r) } -//AppendWaypoint adds a waypoint +// AppendWaypoint adds a waypoint func (g *GPX) AppendWaypoint(w *GPXPoint) { g.Waypoints = append(g.Waypoints, *w) } // ---------------------------------------------------------------------------------------------------- -//ElevationBounds contains max/min elevation +// ElevationBounds contains max/min elevation type ElevationBounds struct { MinElevation float64 MaxElevation float64 @@ -620,14 +621,14 @@ func (b ElevationBounds) Equals(b2 ElevationBounds) bool { return b.MinElevation == b2.MinElevation && b.MaxElevation == b2.MaxElevation } -//String implements Stringer interface +// String implements Stringer interface func (b *ElevationBounds) String() string { return fmt.Sprintf("Max: %+v Min: %+v", b.MinElevation, b.MaxElevation) } // ---------------------------------------------------------------------------------------------------- -//GpxBounds contains min/max latitude and longitude +// GpxBounds contains min/max latitude and longitude type GpxBounds struct { MinLatitude float64 MaxLatitude float64 @@ -640,7 +641,7 @@ func (b GpxBounds) Equals(b2 GpxBounds) bool { return b.MinLatitude == b2.MinLatitude && b.MaxLatitude == b2.MaxLatitude && b.MinLongitude == b2.MinLongitude && b.MaxLongitude == b2.MaxLongitude } -//String implements Stringer interface +// String implements Stringer interface func (b *GpxBounds) String() string { return fmt.Sprintf("Max: %+v, %+v Min: %+v, %+v", b.MinLatitude, b.MinLongitude, b.MaxLatitude, b.MaxLongitude) } @@ -665,17 +666,17 @@ func (pt Point) Add(latDelta, lonDelta, eleDelta float64) Point { return res } -//GetLatitude returns the latitude +// GetLatitude returns the latitude func (pt *Point) GetLatitude() float64 { return pt.Latitude } -//GetLongitude returns the longititude +// GetLongitude returns the longititude func (pt *Point) GetLongitude() float64 { return pt.Longitude } -//GetElevation returns the elevation +// GetElevation returns the elevation func (pt *Point) GetElevation() NullableFloat64 { return pt.Elevation } @@ -692,13 +693,13 @@ func (pt *Point) Distance3D(pt2 Location) float64 { // ---------------------------------------------------------------------------------------------------- -//TimeBounds contains min/max time +// TimeBounds contains min/max time type TimeBounds struct { StartTime time.Time EndTime time.Time } -//Equals compares to another TimeBounds struct +// Equals compares to another TimeBounds struct func (tb TimeBounds) Equals(tb2 TimeBounds) bool { if tb.StartTime == tb2.StartTime && tb.EndTime == tb2.EndTime { return true @@ -706,20 +707,20 @@ func (tb TimeBounds) Equals(tb2 TimeBounds) bool { return false } -//String implements Stringer interface +// String implements Stringer interface func (tb *TimeBounds) String() string { return fmt.Sprintf("%+v, %+v", tb.StartTime, tb.EndTime) } // ---------------------------------------------------------------------------------------------------- -//UphillDownhill contains uphill/downhill information +// UphillDownhill contains uphill/downhill information type UphillDownhill struct { Uphill float64 Downhill float64 } -//Equals compares to another UphillDownhill struct +// Equals compares to another UphillDownhill struct func (ud UphillDownhill) Equals(ud2 UphillDownhill) bool { if ud.Uphill == ud2.Uphill && ud.Downhill == ud2.Downhill { return true @@ -739,7 +740,7 @@ type TrackPosition struct { // ---------------------------------------------------------------------------------------------------- -//GPXPoint represents a point of the gpx file +// GPXPoint represents a point of the gpx file type GPXPoint struct { Point // TODO @@ -807,7 +808,7 @@ func (pt *GPXPoint) MaxDilutionOfPrecision() float64 { // ---------------------------------------------------------------------------------------------------- -//GPXRoute implements a gpx route +// GPXRoute implements a gpx route type GPXRoute struct { Name string Comment string @@ -852,7 +853,7 @@ func (rte *GPXRoute) Center() (float64, float64) { return sumLat / n, sumLon / n } -//ExecuteOnPoints executes given function on all points of the route +// ExecuteOnPoints executes given function on all points of the route func (rte *GPXRoute) ExecuteOnPoints(executor func(*GPXPoint)) { for pointNo := range rte.Points { executor(&rte.Points[pointNo]) @@ -861,7 +862,7 @@ func (rte *GPXRoute) ExecuteOnPoints(executor func(*GPXPoint)) { // ---------------------------------------------------------------------------------------------------- -//GPXTrackSegment represents a segment of a track +// GPXTrackSegment represents a segment of a track type GPXTrackSegment struct { Points []GPXPoint Extensions Extension @@ -887,7 +888,7 @@ func (seg *GPXTrackSegment) Length3D() float64 { return Length3D(points) } -//GetTrackPointsNo returns the amount of points of the segment +// GetTrackPointsNo returns the amount of points of the segment func (seg *GPXTrackSegment) GetTrackPointsNo() int { return len(seg.Points) } @@ -926,7 +927,7 @@ func (seg *GPXTrackSegment) Bounds() GpxBounds { return minmax } -//ElevationBounds returns the min and max elevation of the segment +// ElevationBounds returns the min and max elevation of the segment func (seg *GPXTrackSegment) ElevationBounds() ElevationBounds { minmax := getMaximalElevationBounds() for _, pt := range seg.Points { @@ -938,8 +939,8 @@ func (seg *GPXTrackSegment) ElevationBounds() ElevationBounds { return minmax } -//HasTimes checks if there are times -//WARNING: currently not implemented! +// HasTimes checks if there are times +// WARNING: currently not implemented! func (seg *GPXTrackSegment) HasTimes() bool { return false /* @@ -1048,14 +1049,14 @@ func (seg *GPXTrackSegment) UphillDownhill() UphillDownhill { return UphillDownhill{Uphill: uphill, Downhill: downhill} } -//ExecuteOnPoints executes given function on segment points +// ExecuteOnPoints executes given function on segment points func (seg *GPXTrackSegment) ExecuteOnPoints(executor func(*GPXPoint)) { for pointNo := range seg.Points { executor(&seg.Points[pointNo]) } } -//ReduceTrackPoints reduces the number of track points of the segment +// ReduceTrackPoints reduces the number of track points of the segment func (seg *GPXTrackSegment) ReduceTrackPoints(minDistance float64) { if minDistance <= 0 { return @@ -1083,7 +1084,7 @@ func (seg *GPXTrackSegment) SimplifyTracks(maxDistance float64) { seg.Points = simplifyPoints(seg.Points, maxDistance) } -//AddElevation adds elevation on segment points (pointElevation = pointElevation + elevation) +// AddElevation adds elevation on segment points (pointElevation = pointElevation + elevation) func (seg *GPXTrackSegment) AddElevation(elevation float64) { for _, point := range seg.Points { if point.Elevation.NotNull() { @@ -1132,7 +1133,7 @@ func (seg *GPXTrackSegment) PositionAt(t time.Time) int { return -1 } -//StoppedPositions returns the positions where there was a stop +// StoppedPositions returns the positions where there was a stop func (seg *GPXTrackSegment) StoppedPositions() []TrackPosition { result := make([]TrackPosition, 0) for pointNo, point := range seg.Points { @@ -1205,22 +1206,22 @@ func (seg *GPXTrackSegment) MovingData() MovingData { } } -//AppendPoint adds a point to the segment +// AppendPoint adds a point to the segment func (seg *GPXTrackSegment) AppendPoint(p *GPXPoint) { seg.Points = append(seg.Points, *p) } -//SmoothVertical smoothes the segment vertically +// SmoothVertical smoothes the segment vertically func (seg *GPXTrackSegment) SmoothVertical() { seg.Points = smoothVertical(seg.Points) } -//SmoothHorizontal smoothes the segment horizontally +// SmoothHorizontal smoothes the segment horizontally func (seg *GPXTrackSegment) SmoothHorizontal() { seg.Points = smoothHorizontal(seg.Points) } -//RemoveVerticalExtremes removes vertical extremes from the segment +// RemoveVerticalExtremes removes vertical extremes from the segment func (seg *GPXTrackSegment) RemoveVerticalExtremes() { if len(seg.Points) < removeExtreemesTreshold { return @@ -1261,7 +1262,7 @@ func (seg *GPXTrackSegment) RemoveVerticalExtremes() { seg.Points = newPoints } -//RemoveHorizontalExtremes removes horizontal extremes from the segment +// RemoveHorizontalExtremes removes horizontal extremes from the segment func (seg *GPXTrackSegment) RemoveHorizontalExtremes() { // Dont't remove extreemes if segment too small if len(seg.Points) < removeExtreemesTreshold { @@ -1305,7 +1306,7 @@ func (seg *GPXTrackSegment) RemoveHorizontalExtremes() { seg.Points = newPoints } -//AddMissingTime adds missing times in the segment +// AddMissingTime adds missing times in the segment func (seg *GPXTrackSegment) AddMissingTime() { emptySegmentStart := -1 for pointNo := range seg.Points { @@ -1351,7 +1352,7 @@ func (seg *GPXTrackSegment) addMissingTimeInSegment(start, end int) { // ---------------------------------------------------------------------------------------------------- -//GPXTrack implements a gpx track +// GPXTrack implements a gpx track type GPXTrack struct { Name string Comment string @@ -1385,7 +1386,7 @@ func (trk *GPXTrack) Length3D() float64 { return l } -//GetTrackPointsNo returns the amount of points on the track +// GetTrackPointsNo returns the amount of points on the track func (trk *GPXTrack) GetTrackPointsNo() int { result := 0 for _, segment := range trk.Segments { @@ -1425,7 +1426,7 @@ func (trk *GPXTrack) Bounds() GpxBounds { return minmax } -//ElevationBounds returns the elevation bouds of the track +// ElevationBounds returns the elevation bouds of the track func (trk *GPXTrack) ElevationBounds() ElevationBounds { minmax := getMaximalElevationBounds() for _, seg := range trk.Segments { @@ -1436,7 +1437,7 @@ func (trk *GPXTrack) ElevationBounds() ElevationBounds { return minmax } -//HasTimes checks if the track has times +// HasTimes checks if the track has times func (trk *GPXTrack) HasTimes() bool { result := true for _, segment := range trk.Segments { @@ -1445,7 +1446,7 @@ func (trk *GPXTrack) HasTimes() bool { return result } -//ReduceTrackPoints reduces the number of points on the track +// ReduceTrackPoints reduces the number of points on the track func (trk *GPXTrack) ReduceTrackPoints(minDistance float64) { for segmentNo := range trk.Segments { trk.Segments[segmentNo].ReduceTrackPoints(minDistance) @@ -1480,14 +1481,14 @@ func (trk *GPXTrack) Split(segNo, ptNo int) { trk.Segments = newSegs } -//ExecuteOnPoints executes given function on track points +// ExecuteOnPoints executes given function on track points func (trk *GPXTrack) ExecuteOnPoints(executor func(*GPXPoint)) { for segmentNo := range trk.Segments { trk.Segments[segmentNo].ExecuteOnPoints(executor) } } -//AddElevation adds elevation on all points of the track (pointElevation = pointElevation + elevation) +// AddElevation adds elevation on all points of the track (pointElevation = pointElevation + elevation) func (trk *GPXTrack) AddElevation(elevation float64) { for segmentNo := range trk.Segments { trk.Segments[segmentNo].AddElevation(elevation) @@ -1606,7 +1607,7 @@ func (trk *GPXTrack) PositionAt(t time.Time) []TrackPosition { return results } -//StoppedPositions returns the positions where there was a stop +// StoppedPositions returns the positions where there was a stop func (trk *GPXTrack) StoppedPositions() []TrackPosition { result := make([]TrackPosition, 0) for segmentNo, segment := range trk.Segments { @@ -1619,40 +1620,40 @@ func (trk *GPXTrack) StoppedPositions() []TrackPosition { return result } -//AppendSegment adds a segment to the track +// AppendSegment adds a segment to the track func (trk *GPXTrack) AppendSegment(s *GPXTrackSegment) { trk.Segments = append(trk.Segments, *s) } -//SmoothVertical smoothes the track vertically +// SmoothVertical smoothes the track vertically func (trk *GPXTrack) SmoothVertical() { for segmentNo := range trk.Segments { trk.Segments[segmentNo].SmoothVertical() } } -//SmoothHorizontal smoothes the track horizontally +// SmoothHorizontal smoothes the track horizontally func (trk *GPXTrack) SmoothHorizontal() { for segmentNo := range trk.Segments { trk.Segments[segmentNo].SmoothHorizontal() } } -//RemoveVerticalExtremes removes vertical extremes +// RemoveVerticalExtremes removes vertical extremes func (trk *GPXTrack) RemoveVerticalExtremes() { for segmentNo := range trk.Segments { trk.Segments[segmentNo].RemoveVerticalExtremes() } } -//RemoveHorizontalExtremes removes horizontal extremes +// RemoveHorizontalExtremes removes horizontal extremes func (trk *GPXTrack) RemoveHorizontalExtremes() { for segmentNo := range trk.Segments { trk.Segments[segmentNo].RemoveHorizontalExtremes() } } -//AddMissingTime adds missing times +// AddMissingTime adds missing times func (trk *GPXTrack) AddMissingTime() { for segmentNo := range trk.Segments { trk.Segments[segmentNo].AddMissingTime() diff --git a/vendor/github.com/tkrajina/gpxgo/gpx/gpx10.go b/vendor/github.com/tkrajina/gpxgo/gpx/gpx10.go index 269f7b2c..d8be15cc 100644 --- a/vendor/github.com/tkrajina/gpxgo/gpx/gpx10.go +++ b/vendor/github.com/tkrajina/gpxgo/gpx/gpx10.go @@ -179,8 +179,8 @@ type gpx10GpxLink struct { * Common struct fields for all points */ type gpx10GpxPoint struct { - Lat float64 `xml:"lat,attr"` - Lon float64 `xml:"lon,attr"` + Lat formattedFloat `xml:"lat,attr"` + Lon formattedFloat `xml:"lon,attr"` // Position info Ele NullableFloat64 `xml:"ele,omitempty"` Timestamp string `xml:"time,omitempty"` diff --git a/vendor/github.com/tkrajina/gpxgo/gpx/gpx11.go b/vendor/github.com/tkrajina/gpxgo/gpx/gpx11.go index 31855b75..94df7f3c 100644 --- a/vendor/github.com/tkrajina/gpxgo/gpx/gpx11.go +++ b/vendor/github.com/tkrajina/gpxgo/gpx/gpx11.go @@ -162,24 +162,25 @@ type gpx11Gpx struct { AuthorName string `xml:"metadata>author>name,omitempty"` AuthorEmail *gpx11GpxEmail `xml:"metadata>author>email,omitempty"` // TODO: There can be more than one link? - AuthorLink *gpx11GpxLink `xml:"metadata>author>link,omitempty"` - Copyright *gpx11GpxCopyright `xml:"metadata>copyright,omitempty"` - Link *gpx11GpxLink `xml:"metadata>link,omitempty"` - Timestamp string `xml:"metadata>time,omitempty"` - Keywords string `xml:"metadata>keywords,omitempty"` - Extensions Extension `xml:"metadata>extensions"` - Bounds *gpx11GpxBounds `xml:"bounds"` - Waypoints []*gpx11GpxPoint `xml:"wpt"` - Routes []*gpx11GpxRte `xml:"rte"` - Tracks []*gpx11GpxTrk `xml:"trk"` + AuthorLink *gpx11GpxLink `xml:"metadata>author>link,omitempty"` + Copyright *gpx11GpxCopyright `xml:"metadata>copyright,omitempty"` + Link *gpx11GpxLink `xml:"metadata>link,omitempty"` + Timestamp string `xml:"metadata>time,omitempty"` + Keywords string `xml:"metadata>keywords,omitempty"` + MetadataExtensions Extension `xml:"metadata>extensions"` + Bounds *gpx11GpxBounds `xml:"bounds"` + Waypoints []*gpx11GpxPoint `xml:"wpt"` + Routes []*gpx11GpxRte `xml:"rte"` + Tracks []*gpx11GpxTrk `xml:"trk"` + Extensions Extension `xml:"extensions"` } type gpx11GpxBounds struct { //XMLName xml.Name `xml:"bounds"` - MinLat float64 `xml:"minlat,attr"` - MaxLat float64 `xml:"maxlat,attr"` - MinLon float64 `xml:"minlon,attr"` - MaxLon float64 `xml:"maxlon,attr"` + MinLat formattedFloat `xml:"minlat,attr"` + MaxLat formattedFloat `xml:"maxlat,attr"` + MinLon formattedFloat `xml:"minlon,attr"` + MaxLon formattedFloat `xml:"maxlon,attr"` } type gpx11GpxCopyright struct { @@ -222,8 +223,8 @@ type gpx11GpxLink struct { * Common struct fields for all points */ type gpx11GpxPoint struct { - Lat float64 `xml:"lat,attr"` - Lon float64 `xml:"lon,attr"` + Lat formattedFloat `xml:"lat,attr"` + Lon formattedFloat `xml:"lon,attr"` // Position info Ele NullableFloat64 `xml:"ele,omitempty"` Timestamp string `xml:"time,omitempty"` diff --git a/vendor/github.com/tkrajina/gpxgo/gpx/nullable_float64.go b/vendor/github.com/tkrajina/gpxgo/gpx/nullable_float64.go index 9db9a06f..8efab86b 100644 --- a/vendor/github.com/tkrajina/gpxgo/gpx/nullable_float64.go +++ b/vendor/github.com/tkrajina/gpxgo/gpx/nullable_float64.go @@ -12,41 +12,41 @@ import ( "strings" ) -//NullableFloat64 implements a nullable float64 +// NullableFloat64 implements a nullable float64 type NullableFloat64 struct { data float64 notNull bool } -//Null checks if value is null +// Null checks if value is null func (n *NullableFloat64) Null() bool { return !n.notNull } -//NotNull checks if value is not null +// NotNull checks if value is not null func (n *NullableFloat64) NotNull() bool { return n.notNull } -//Value returns the value +// Value returns the value func (n *NullableFloat64) Value() float64 { return n.data } -//SetValue sets the value +// SetValue sets the value func (n *NullableFloat64) SetValue(data float64) { n.data = data n.notNull = true } -//SetNull sets the value to null +// SetNull sets the value to null func (n *NullableFloat64) SetNull() { var defaultValue float64 n.data = defaultValue n.notNull = false } -//NewNullableFloat64 creates a new NullableFloat64 +// NewNullableFloat64 creates a new NullableFloat64 func NewNullableFloat64(data float64) *NullableFloat64 { result := new(NullableFloat64) result.data = data @@ -54,7 +54,7 @@ func NewNullableFloat64(data float64) *NullableFloat64 { return result } -//UnmarshalXML implements xml unmarshalling +// UnmarshalXML implements xml unmarshalling func (n *NullableFloat64) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { t, err := d.Token() if err != nil { @@ -74,7 +74,7 @@ func (n *NullableFloat64) UnmarshalXML(d *xml.Decoder, start xml.StartElement) e return nil } -//UnmarshalXMLAttr implements xml attribute unmarshalling +// UnmarshalXMLAttr implements xml attribute unmarshalling func (n *NullableFloat64) UnmarshalXMLAttr(attr xml.Attr) error { strData := strings.Trim(string(attr.Value), " ") value, err := strconv.ParseFloat(strData, 64) @@ -86,7 +86,7 @@ func (n *NullableFloat64) UnmarshalXMLAttr(attr xml.Attr) error { return nil } -//MarshalXML implements xml marshalling +// MarshalXML implements xml marshalling func (n NullableFloat64) MarshalXML(e *xml.Encoder, start xml.StartElement) error { if n.Null() { return nil @@ -104,14 +104,11 @@ func (n NullableFloat64) MarshalXML(e *xml.Encoder, start xml.StartElement) erro return nil } -//MarshalXMLAttr implements xml attribute marshalling +// MarshalXMLAttr implements xml attribute marshalling func (n NullableFloat64) MarshalXMLAttr(name xml.Name) (xml.Attr, error) { var result xml.Attr if n.Null() { return result, nil } - return xml.Attr{ - Name: xml.Name{Local: name.Local}, - Value: fmt.Sprintf("%g", n.Value())}, - nil + return formattedFloat(n.Value()).MarshalXMLAttr(name) } diff --git a/vendor/github.com/tkrajina/gpxgo/gpx/xml.go b/vendor/github.com/tkrajina/gpxgo/gpx/xml.go index 89743fc6..6a28c2cd 100644 --- a/vendor/github.com/tkrajina/gpxgo/gpx/xml.go +++ b/vendor/github.com/tkrajina/gpxgo/gpx/xml.go @@ -9,7 +9,7 @@ import ( "bytes" "encoding/xml" "errors" - "io/ioutil" + "io" "os" "strings" "time" @@ -23,6 +23,7 @@ const formattingTimelayout = "2006-01-02T15:04:05Z" var parsingTimelayouts = []string{ "2006-01-02T15:04:05.000Z", formattingTimelayout, + "2006-01-02T15:04:05+00:00", "2006-01-02T15:04:05", "2006-01-02 15:04:05Z", "2006-01-02 15:04:05", @@ -36,14 +37,14 @@ func init() { */ } -//ToXmlParams contains settings for xml transformation +// ToXmlParams contains settings for xml transformation type ToXmlParams struct { Version string Indent bool } -//ToXml returns the xml representation of the GPX object. -//Params are optional, you can set null to use GPXs Version and no indentation. +// ToXml returns the xml representation of the GPX object. +// Params are optional, you can set null to use GPXs Version and no indentation. func ToXml(g *GPX, params ToXmlParams) ([]byte, error) { version := g.Version if len(params.Version) > 0 { @@ -97,21 +98,22 @@ func guessGPXVersion(bytes []byte) (string, error) { parts := strings.Split(startOfDocument, "") } parts = strings.Split(parts[1], "version=") if len(parts) <= 1 { - return "", errors.New("invalid GPX file, cannot find version") + return "", errors.New("invalid GPX file, cannot find version in ") } - if len(parts[1]) < 10 { - return "", errors.New("invalid GPX file, cannot find version") + version := strings.TrimLeft(parts[1], `'" `) + if strings.HasPrefix(version, "1.0") { + return "1.0", nil + } else if strings.HasPrefix(version, "1.1") { + return "1.1", nil } - result := parts[1][1:4] - - return result, nil + return "", errors.New("invalid GPX file, cannot find version") } func parseGPXTime(timestr string) (*time.Time, error) { @@ -142,7 +144,7 @@ func formatGPXTime(time *time.Time) string { return time.Format(formattingTimelayout) } -//ParseFile parses a gpx file and returns a GPX object +// ParseFile parses a gpx file and returns a GPX object func ParseFile(fileName string) (*GPX, error) { f, err := os.Open(fileName) if err != nil { @@ -151,55 +153,60 @@ func ParseFile(fileName string) (*GPX, error) { defer f.Close() - buf, err := ioutil.ReadAll(f) - if err != nil { - return nil, err - } - - return ParseBytes(buf) + return Parse(f) } -//ParseBytes parses GPX from bytes +// ParseBytes parses GPX from bytes func ParseBytes(buf []byte) (*GPX, error) { + return Parse(bytes.NewReader(buf)) +} - version, err := guessGPXVersion(buf) +func ParseDecoder(decoder *xml.Decoder, initialBytes []byte) (*GPX, error) { + version, err := guessGPXVersion(initialBytes) if err != nil { // Unknown version, try with 1.1 version = "1.1" } - reader := bytes.NewReader(buf) - decoder := xml.NewDecoder(reader) - decoder.CharsetReader = charset.NewReaderLabel - - if version == "1.0" { - + switch version { + case "1.0": g := &gpx10Gpx{} - err = decoder.Decode(&g) if err != nil { return nil, err } - return convertFromGpx10Models(g), nil - } - - if version == "1.1" { - + case "1.1": g := &gpx11Gpx{} - err = decoder.Decode(&g) if err != nil { return nil, err } - return convertFromGpx11Models(g), nil + default: + return nil, errors.New("Invalid version:" + version) } +} + +// Parse parses GPX from io.Reader +func Parse(inReader io.Reader) (*GPX, error) { + // at most 1000 bytes will make guessGPXVersion happy + buf := make([]byte, 1000) + + n, err := inReader.Read(buf) + if err != nil { + return nil, err + } + buf = buf[:n] + + reader := io.MultiReader(bytes.NewReader(buf), inReader) + decoder := xml.NewDecoder(reader) + decoder.CharsetReader = charset.NewReaderLabel - return nil, errors.New("Invalid version:" + version) + return ParseDecoder(decoder, buf) } -//ParseString parses GPX from string +// ParseString parses GPX from string func ParseString(str string) (*GPX, error) { - return ParseBytes([]byte(str)) + return Parse(strings.NewReader(str)) } diff --git a/vendor/github.com/ugorji/go/codec/cbor.go b/vendor/github.com/ugorji/go/codec/cbor.go index 10944487..802b1fc1 100644 --- a/vendor/github.com/ugorji/go/codec/cbor.go +++ b/vendor/github.com/ugorji/go/codec/cbor.go @@ -123,6 +123,11 @@ type cborEncDriver struct { encDriverNoopContainerWriter h *CborHandle + // scratch buffer for: encode time, numbers, etc + // + // RFC3339Nano uses 35 chars: 2006-01-02T15:04:05.999999999Z07:00 + b [40]byte + e Encoder } @@ -204,7 +209,7 @@ func (e *cborEncDriver) EncodeTime(t time.Time) { e.EncodeNil() } else if e.h.TimeRFC3339 { e.encUint(0, cborBaseTag) - e.encStringBytesS(cborBaseString, t.Format(time.RFC3339Nano)) + e.encStringBytesS(cborBaseString, stringView(fmtTime(t, time.RFC3339Nano, e.b[:0]))) } else { e.encUint(1, cborBaseTag) t = t.UTC().Round(time.Microsecond) @@ -427,12 +432,13 @@ func (d *cborDecDriver) decLen() int { return int(d.decUint()) } -func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte { +func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte, major byte) []byte { d.bdRead = false for !d.CheckBreak() { - if major := d.bd >> 5; major != cborMajorBytes && major != cborMajorString { - d.d.errorf("invalid indefinite string/bytes %x (%s); got major %v, expected %v or %v", - d.bd, cbordesc(d.bd), major, cborMajorBytes, cborMajorString) + chunkMajor := d.bd >> 5 + if chunkMajor != major { + d.d.errorf("malformed indefinite string/bytes %x (%s); contains chunk with major type %v, expected %v", + d.bd, cbordesc(d.bd), chunkMajor, major) } n := uint(d.decLen()) oldLen := uint(len(bs)) @@ -445,6 +451,9 @@ func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte { bs = bs[:newLen] } d.d.decRd.readb(bs[oldLen:newLen]) + if d.h.ValidateUnicode && major == cborMajorString && !utf8.Valid(bs[oldLen:newLen]) { + d.d.errorf("indefinite-length text string contains chunk that is not a valid utf-8 sequence: 0x%x", bs[oldLen:newLen]) + } d.bdRead = false } d.bdRead = false @@ -580,9 +589,9 @@ func (d *cborDecDriver) DecodeBytes(bs []byte) (bsOut []byte) { d.bdRead = false if bs == nil { d.d.decByteState = decByteStateReuseBuf - return d.decAppendIndefiniteBytes(d.d.b[:0]) + return d.decAppendIndefiniteBytes(d.d.b[:0], d.bd>>5) } - return d.decAppendIndefiniteBytes(bs[:0]) + return d.decAppendIndefiniteBytes(bs[:0], d.bd>>5) } if d.bd == cborBdIndefiniteArray { d.bdRead = false diff --git a/vendor/github.com/ugorji/go/codec/decode.go b/vendor/github.com/ugorji/go/codec/decode.go index d549188d..f98c8ff2 100644 --- a/vendor/github.com/ugorji/go/codec/decode.go +++ b/vendor/github.com/ugorji/go/codec/decode.go @@ -1399,6 +1399,10 @@ func NewDecoderString(s string, h Handle) *Decoder { return NewDecoderBytes(bytesView(s), h) } +func (d *Decoder) HandleName() string { + return d.hh.Name() +} + func (d *Decoder) r() *decRd { return &d.decRd } @@ -1580,14 +1584,9 @@ func (d *Decoder) MustDecode(v interface{}) { d.calls-- } -// Release releases shared (pooled) resources. -// -// It is important to call Release() when done with a Decoder, so those resources -// are released instantly for use by subsequently created Decoders. -// -// By default, Release() is automatically called unless the option ExplicitRelease is set. +// Release is a no-op. // -// Deprecated: Release is a no-op as pooled resources are not used with an Decoder. +// Deprecated: Pooled resources are not used with a Decoder. // This method is kept for compatibility reasons only. func (d *Decoder) Release() { } diff --git a/vendor/github.com/ugorji/go/codec/encode.go b/vendor/github.com/ugorji/go/codec/encode.go index 70361b5c..0e9f0cc0 100644 --- a/vendor/github.com/ugorji/go/codec/encode.go +++ b/vendor/github.com/ugorji/go/codec/encode.go @@ -984,6 +984,10 @@ func NewEncoderBytes(out *[]byte, h Handle) *Encoder { return e } +func (e *Encoder) HandleName() string { + return e.hh.Name() +} + func (e *Encoder) init(h Handle) { initHandle(h) e.err = errEncoderNotInitialized @@ -1150,12 +1154,9 @@ func (e *Encoder) MustEncode(v interface{}) { } } -// Release releases shared (pooled) resources. -// -// It is important to call Release() when done with an Encoder, so those resources -// are released instantly for use by subsequently created Encoders. +// Release is a no-op. // -// Deprecated: Release is a no-op as pooled resources are not used with an Encoder. +// Deprecated: Pooled resources are not used with an Encoder. // This method is kept for compatibility reasons only. func (e *Encoder) Release() { } diff --git a/vendor/github.com/ugorji/go/codec/gen.go b/vendor/github.com/ugorji/go/codec/gen.go index de7ee72f..0026e3e1 100644 --- a/vendor/github.com/ugorji/go/codec/gen.go +++ b/vendor/github.com/ugorji/go/codec/gen.go @@ -8,7 +8,7 @@ package codec import ( "bytes" - "encoding/base64" + "encoding/base32" "errors" "fmt" "go/format" @@ -190,7 +190,11 @@ var ( errGenExpectArrayOrMap = errors.New("unexpected type - expecting array/map/slice") errGenUnexpectedTypeFastpath = errors.New("fast-path: unexpected type - requires map or slice") - genBase64enc = base64.NewEncoding("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789__") + // don't use base64, only 63 characters allowed in valid go identifiers + // ie ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_ + // + // don't use numbers, as a valid go identifer must start with a letter. + genTypenameEnc = base32.NewEncoding("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef") genQNameRegex = regexp.MustCompile(`[A-Za-z_.]+`) ) @@ -2303,8 +2307,6 @@ func genMethodNameT(t reflect.Type, tRef reflect.Type) (n string) { } } else { // best way to get the package name inclusive - // return ptrPfx + strings.Replace(tstr, ".", "_", 1000) - // return ptrPfx + genBase64enc.EncodeToString([]byte(tstr)) if t.Name() != "" && genQNameRegex.MatchString(tstr) { return ptrPfx + strings.Replace(tstr, ".", "_", 1000) } else { @@ -2315,12 +2317,12 @@ func genMethodNameT(t reflect.Type, tRef reflect.Type) (n string) { } } -// genCustomNameForType base64encodes the t.String() value in such a way +// genCustomNameForType base32 encodes the t.String() value in such a way // that it can be used within a function name. func genCustomTypeName(tstr string) string { - len2 := genBase64enc.EncodedLen(len(tstr)) + len2 := genTypenameEnc.EncodedLen(len(tstr)) bufx := make([]byte, len2) - genBase64enc.Encode(bufx, []byte(tstr)) + genTypenameEnc.Encode(bufx, []byte(tstr)) for i := len2 - 1; i >= 0; i-- { if bufx[i] == '=' { len2-- diff --git a/vendor/github.com/ugorji/go/codec/helper.go b/vendor/github.com/ugorji/go/codec/helper.go index 19df27b6..ecd87ba5 100644 --- a/vendor/github.com/ugorji/go/codec/helper.go +++ b/vendor/github.com/ugorji/go/codec/helper.go @@ -110,8 +110,7 @@ package codec // // ------------------------------------------ // Bounds Checking -// - Allow bytesDecReader to incur "bounds check error", and -// recover that as an io.EOF. +// - Allow bytesDecReader to incur "bounds check error", and recover that as an io error. // This allows the bounds check branch to always be taken by the branch predictor, // giving better performance (in theory), while ensuring that the code is shorter. // @@ -857,26 +856,10 @@ type BasicHandle struct { // Once a Handle has been initialized (used), do not modify this option. It will be ignored. TimeNotBuiltin bool - // ExplicitRelease configures whether Release() is implicitly called after an encode or - // decode call. + // ExplicitRelease is ignored and has no effect. // - // If you will hold onto an Encoder or Decoder for re-use, by calling Reset(...) - // on it or calling (Must)Encode repeatedly into a given []byte or io.Writer, - // then you do not want it to be implicitly closed after each Encode/Decode call. - // Doing so will unnecessarily return resources to the shared pool, only for you to - // grab them right after again to do another Encode/Decode call. - // - // Instead, you configure ExplicitRelease=true, and you explicitly call Release() when - // you are truly done. - // - // As an alternative, you can explicitly set a finalizer - so its resources - // are returned to the shared pool before it is garbage-collected. Do it as below: - // runtime.SetFinalizer(e, (*Encoder).Release) - // runtime.SetFinalizer(d, (*Decoder).Release) - // - // Deprecated: This is not longer used as pools are only used for long-lived objects - // which are shared across goroutines. - // Setting this value has no effect. It is maintained for backward compatibility. + // Deprecated: Pools are only used for long-lived objects shared across goroutines. + // It is maintained for backward compatibility. ExplicitRelease bool // ---- cache line @@ -2489,7 +2472,7 @@ func panicValToErr(h errDecorator, v interface{}, err *error) { case runtime.Error: d, dok := h.(*Decoder) if dok && d.bytes && isSliceBoundsError(xerr.Error()) { - *err = io.EOF + *err = io.ErrUnexpectedEOF } else { h.wrapErr(xerr, err) } @@ -2803,7 +2786,7 @@ func freelistCapacity(length int) (capacity int) { // bytesFreelist is a list of byte buffers, sorted by cap. // // In anecdotal testing (running go test -tsd 1..6), we couldn't get -// the length ofthe list > 4 at any time. So we believe a linear search +// the length of the list > 4 at any time. So we believe a linear search // without bounds checking is sufficient. // // Typical usage model: @@ -2821,7 +2804,7 @@ func freelistCapacity(length int) (capacity int) { // v1 := v0 // ... use v1 ... // blist.put(v1) -// if byteSliceAddr(v0) != byteSliceAddr(v1) { +// if !byteSliceSameData(v0, v1) { // blist.put(v0) // } type bytesFreelist [][]byte diff --git a/vendor/github.com/ugorji/go/codec/json.go b/vendor/github.com/ugorji/go/codec/json.go index f7d2343e..569b0cc9 100644 --- a/vendor/github.com/ugorji/go/codec/json.go +++ b/vendor/github.com/ugorji/go/codec/json.go @@ -1301,6 +1301,9 @@ func (d *jsonDecDriver) DecodeNaked() { // Note also that the float values for NaN, +Inf or -Inf are encoded as null, // as suggested by NOTE 4 of the ECMA-262 ECMAScript Language Specification 5.1 edition. // see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf . +// +// Note the following behaviour differences vs std-library encoding/json package: +// - struct field names matched in case-sensitive manner type JsonHandle struct { textEncodingType BasicHandle diff --git a/vendor/github.com/ugorji/go/codec/msgpack.go b/vendor/github.com/ugorji/go/codec/msgpack.go index c8b539d4..c0861df5 100644 --- a/vendor/github.com/ugorji/go/codec/msgpack.go +++ b/vendor/github.com/ugorji/go/codec/msgpack.go @@ -1174,7 +1174,7 @@ func (c *msgpackSpecRpcCodec) ReadRequestBody(body interface{}) error { func (c *msgpackSpecRpcCodec) parseCustomHeader(expectTypeByte byte, msgid *uint64, methodOrError *string) (err error) { if cls := c.cls.load(); cls.closed { - return io.EOF + return io.ErrUnexpectedEOF } // We read the response header by hand diff --git a/vendor/github.com/ugorji/go/codec/reader.go b/vendor/github.com/ugorji/go/codec/reader.go index 3fea9f4c..ec5dac0e 100644 --- a/vendor/github.com/ugorji/go/codec/reader.go +++ b/vendor/github.com/ugorji/go/codec/reader.go @@ -350,9 +350,9 @@ func (z *ioDecReader) unreadn1() { // bytesDecReader is a decReader that reads off a byte slice with zero copying // -// Note: we do not try to convert index'ing out of bounds to an io.EOF. +// Note: we do not try to convert index'ing out of bounds to an io error. // instead, we let it bubble up to the exported Encode/Decode method -// and recover it as an io.EOF. +// and recover it as an io error. // // Every function here MUST defensively check bounds either explicitly // or via a bounds check. diff --git a/vendor/github.com/zcalusic/sysinfo/README.md b/vendor/github.com/zcalusic/sysinfo/README.md index 83cdfe55..7d72066a 100644 --- a/vendor/github.com/zcalusic/sysinfo/README.md +++ b/vendor/github.com/zcalusic/sysinfo/README.md @@ -1,6 +1,5 @@ # Sysinfo -[![Build Status](https://img.shields.io/github/actions/workflow/status/zcalusic/sysinfo/test.yaml)](https://github.com/zcalusic/sysinfo/actions/workflows/test.yaml) [![Go Report Card](https://goreportcard.com/badge/github.com/zcalusic/sysinfo)](https://goreportcard.com/report/github.com/zcalusic/sysinfo) [![GoDoc](https://godoc.org/github.com/zcalusic/sysinfo?status.svg)](https://godoc.org/github.com/zcalusic/sysinfo) [![License](https://img.shields.io/badge/license-MIT-a31f34.svg?maxAge=2592000)](https://github.com/zcalusic/sysinfo/blob/master/LICENSE) @@ -8,8 +7,8 @@ [![Platform](https://img.shields.io/badge/platform-Linux-009bde.svg?maxAge=2592000)](https://www.linuxfoundation.org/) Package sysinfo is a Go library providing Linux OS / kernel / hardware system information. It's completely standalone, -has no dependencies on the host system, doesn't execute external programs, doesn't even import other Go libraries. It -collects only "inventory type" information, things that don't change often. +has no dependencies on the host system and doesn't execute external programs. It collects only "inventory type" +information, things that don't change often. ## Code Example diff --git a/vendor/github.com/zcalusic/sysinfo/cpu.go b/vendor/github.com/zcalusic/sysinfo/cpu.go index cdd3a7c0..11fe17b9 100644 --- a/vendor/github.com/zcalusic/sysinfo/cpu.go +++ b/vendor/github.com/zcalusic/sysinfo/cpu.go @@ -80,12 +80,6 @@ func (si *SysInfo) getCPUInfo() { return } - // getNodeInfo() must have run first, to detect if we're dealing with a virtualized CPU! Detecting number of - // physical processors and/or cores is totally unreliable in virtualized environments, so let's not do it. - if si.Node.Hostname == "" || si.Node.Hypervisor != "" { - return - } - si.CPU.Cpus = uint(len(cpu)) si.CPU.Cores = uint(len(core)) } diff --git a/vendor/github.com/zcalusic/sysinfo/memory.go b/vendor/github.com/zcalusic/sysinfo/memory.go index 20735bc0..f0529258 100644 --- a/vendor/github.com/zcalusic/sysinfo/memory.go +++ b/vendor/github.com/zcalusic/sysinfo/memory.go @@ -7,7 +7,7 @@ package sysinfo import ( "bytes" "encoding/binary" - "io/ioutil" + "os" "strconv" ) @@ -31,7 +31,7 @@ func qword(data []byte, index int) uint64 { } func (si *SysInfo) getMemoryInfo() { - dmi, err := ioutil.ReadFile("/sys/firmware/dmi/tables/DMI") + dmi, err := os.ReadFile("/sys/firmware/dmi/tables/DMI") if err != nil { // Xen hypervisor if targetKB := slurpFile("/sys/devices/system/xen_memory/xen_memory0/target_kb"); targetKB != "" { diff --git a/vendor/github.com/zcalusic/sysinfo/network.go b/vendor/github.com/zcalusic/sysinfo/network.go index a2565c8c..fa24dc6d 100644 --- a/vendor/github.com/zcalusic/sysinfo/network.go +++ b/vendor/github.com/zcalusic/sysinfo/network.go @@ -5,7 +5,6 @@ package sysinfo import ( - "io/ioutil" "os" "path" "strings" @@ -112,7 +111,7 @@ func getSupported(name string) uint32 { func (si *SysInfo) getNetworkInfo() { sysClassNet := "/sys/class/net" - devices, err := ioutil.ReadDir(sysClassNet) + devices, err := os.ReadDir(sysClassNet) if err != nil { return } diff --git a/vendor/github.com/zcalusic/sysinfo/node.go b/vendor/github.com/zcalusic/sysinfo/node.go index 480ac0d0..216ee3ae 100644 --- a/vendor/github.com/zcalusic/sysinfo/node.go +++ b/vendor/github.com/zcalusic/sysinfo/node.go @@ -6,11 +6,8 @@ package sysinfo import ( "bufio" - "crypto/rand" - "fmt" "os" "strings" - "time" ) // Node information. @@ -58,19 +55,6 @@ func (si *SysInfo) getSetMachineID() { si.Node.MachineID = systemdMachineID return } - - // Generate and write fresh new machine ID to both locations, conforming to the DBUS specification: - // https://dbus.freedesktop.org/doc/dbus-specification.html#uuids - - random := make([]byte, 12) - if _, err := rand.Read(random); err != nil { - return - } - newMachineID := fmt.Sprintf("%x%x", random, time.Now().Unix()) - - spewFile(pathSystemdMachineID, newMachineID, 0444) - spewFile(pathDbusMachineID, newMachineID, 0444) - si.Node.MachineID = newMachineID } func (si *SysInfo) getTimezone() { diff --git a/vendor/github.com/zcalusic/sysinfo/os.go b/vendor/github.com/zcalusic/sysinfo/os.go index 82486eda..9d24a43f 100644 --- a/vendor/github.com/zcalusic/sysinfo/os.go +++ b/vendor/github.com/zcalusic/sysinfo/os.go @@ -25,7 +25,9 @@ var ( reID = regexp.MustCompile(`^ID=(.*)$`) reVersionID = regexp.MustCompile(`^VERSION_ID=(.*)$`) reUbuntu = regexp.MustCompile(`[\( ]([\d\.]+)`) + reAlma = regexp.MustCompile(`^AlmaLinux release ([\d\.]+)`) reCentOS = regexp.MustCompile(`^CentOS( Linux)? release ([\d\.]+)`) + reRocky = regexp.MustCompile(`^Rocky Linux release ([\d\.]+)`) reRedHat = regexp.MustCompile(`[\( ]([\d\.]+)`) ) @@ -61,12 +63,29 @@ func (si *SysInfo) getOSInfo() { if m := reUbuntu.FindStringSubmatch(si.OS.Name); m != nil { si.OS.Release = m[1] } + case "almalinux": + if release := slurpFile("/etc/almalinux-release"); release != "" { + if m := reAlma.FindStringSubmatch(release); m != nil { + si.OS.Release = m[1] + } + } + + si.OS.Version = strings.Split(si.OS.Release, ".")[0] case "centos": if release := slurpFile("/etc/centos-release"); release != "" { if m := reCentOS.FindStringSubmatch(release); m != nil { si.OS.Release = m[2] } } + case "rocky": + if release := slurpFile("/etc/rocky-release"); release != "" { + if m := reRocky.FindStringSubmatch(release); m != nil { + si.OS.Release = m[1] + } + } + + si.OS.Version = strings.Split(si.OS.Release, ".")[0] + case "rhel": if release := slurpFile("/etc/redhat-release"); release != "" { if m := reRedHat.FindStringSubmatch(release); m != nil { diff --git a/vendor/github.com/zcalusic/sysinfo/product.go b/vendor/github.com/zcalusic/sysinfo/product.go index 382b56d6..83b0897c 100644 --- a/vendor/github.com/zcalusic/sysinfo/product.go +++ b/vendor/github.com/zcalusic/sysinfo/product.go @@ -4,12 +4,16 @@ package sysinfo +import "github.com/google/uuid" + // Product information. type Product struct { - Name string `json:"name,omitempty"` - Vendor string `json:"vendor,omitempty"` - Version string `json:"version,omitempty"` - Serial string `json:"serial,omitempty"` + Name string `json:"name,omitempty"` + Vendor string `json:"vendor,omitempty"` + Version string `json:"version,omitempty"` + Serial string `json:"serial,omitempty"` + UUID uuid.UUID `json:"uuid,omitempty"` + SKU string `json:"sku,omitempty"` } func (si *SysInfo) getProductInfo() { @@ -17,4 +21,10 @@ func (si *SysInfo) getProductInfo() { si.Product.Vendor = slurpFile("/sys/class/dmi/id/sys_vendor") si.Product.Version = slurpFile("/sys/class/dmi/id/product_version") si.Product.Serial = slurpFile("/sys/class/dmi/id/product_serial") + si.Product.SKU = slurpFile("/sys/class/dmi/id/product_sku") + + uid, err := uuid.Parse(slurpFile("/sys/class/dmi/id/product_uuid")) + if err == nil { + si.Product.UUID = uid + } } diff --git a/vendor/github.com/zcalusic/sysinfo/storage.go b/vendor/github.com/zcalusic/sysinfo/storage.go index 4ce10f2d..eaee7a76 100644 --- a/vendor/github.com/zcalusic/sysinfo/storage.go +++ b/vendor/github.com/zcalusic/sysinfo/storage.go @@ -6,7 +6,6 @@ package sysinfo import ( "bufio" - "io/ioutil" "os" "path" "strconv" @@ -60,7 +59,7 @@ scan: func (si *SysInfo) getStorageInfo() { sysBlock := "/sys/block" - devices, err := ioutil.ReadDir(sysBlock) + devices, err := os.ReadDir(sysBlock) if err != nil { return } diff --git a/vendor/github.com/zcalusic/sysinfo/util.go b/vendor/github.com/zcalusic/sysinfo/util.go index cd499a47..f6c39b35 100644 --- a/vendor/github.com/zcalusic/sysinfo/util.go +++ b/vendor/github.com/zcalusic/sysinfo/util.go @@ -5,14 +5,13 @@ package sysinfo import ( - "io/ioutil" "os" "strings" ) // Read one-liner text files, strip newline. func slurpFile(path string) string { - data, err := ioutil.ReadFile(path) + data, err := os.ReadFile(path) if err != nil { return "" } @@ -22,5 +21,5 @@ func slurpFile(path string) string { // Write one-liner text files, add newline, ignore errors (best effort). func spewFile(path string, data string, perm os.FileMode) { - _ = ioutil.WriteFile(path, []byte(data+"\n"), perm) + _ = os.WriteFile(path, []byte(data+"\n"), perm) } diff --git a/vendor/github.com/zcalusic/sysinfo/version.go b/vendor/github.com/zcalusic/sysinfo/version.go index 1a9bd4b6..9320a72d 100644 --- a/vendor/github.com/zcalusic/sysinfo/version.go +++ b/vendor/github.com/zcalusic/sysinfo/version.go @@ -5,4 +5,4 @@ package sysinfo // Version of the sysinfo library. -const Version = "1.0.1" +const Version = "1.1.2" diff --git a/vendor/go.etcd.io/bbolt/.go-version b/vendor/go.etcd.io/bbolt/.go-version new file mode 100644 index 00000000..013173af --- /dev/null +++ b/vendor/go.etcd.io/bbolt/.go-version @@ -0,0 +1 @@ +1.22.6 diff --git a/vendor/go.etcd.io/bbolt/Makefile b/vendor/go.etcd.io/bbolt/Makefile index 18154c63..21407797 100644 --- a/vendor/go.etcd.io/bbolt/Makefile +++ b/vendor/go.etcd.io/bbolt/Makefile @@ -41,6 +41,15 @@ coverage: TEST_FREELIST_TYPE=array go test -v -timeout 30m \ -coverprofile cover-freelist-array.out -covermode atomic +BOLT_CMD=bbolt + +build: + go build -o bin/${BOLT_CMD} ./cmd/${BOLT_CMD} + +.PHONY: clean +clean: # Clean binaries + rm -f ./bin/${BOLT_CMD} + .PHONY: gofail-enable gofail-enable: install-gofail gofail enable . @@ -61,3 +70,7 @@ test-failpoint: @echo "[failpoint] array freelist test" TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} -timeout 30m ./tests/failpoint +.PHONY: test-robustness # Running robustness tests requires root permission +test-robustness: + go test -v ${TESTFLAGS} ./tests/dmflakey -test.root + go test -v ${TESTFLAGS} ./tests/robustness -test.root diff --git a/vendor/go.etcd.io/bbolt/README.md b/vendor/go.etcd.io/bbolt/README.md index 2be669a6..495a93ef 100644 --- a/vendor/go.etcd.io/bbolt/README.md +++ b/vendor/go.etcd.io/bbolt/README.md @@ -421,10 +421,19 @@ Prev() Move to the previous key. ``` Each of those functions has a return signature of `(key []byte, value []byte)`. -When you have iterated to the end of the cursor then `Next()` will return a -`nil` key. You must seek to a position using `First()`, `Last()`, or `Seek()` -before calling `Next()` or `Prev()`. If you do not seek to a position then -these functions will return a `nil` key. +You must seek to a position using `First()`, `Last()`, or `Seek()` before calling +`Next()` or `Prev()`. If you do not seek to a position then these functions will +return a `nil` key. + +When you have iterated to the end of the cursor, then `Next()` will return a +`nil` key and the cursor still points to the last element if present. When you +have iterated to the beginning of the cursor, then `Prev()` will return a `nil` +key and the cursor still points to the first element if present. + +If you remove key/value pairs during iteration, the cursor may automatically +move to the next position if present in current node each time removing a key. +When you call `c.Next()` after removing a key, it may skip one key/value pair. +Refer to [pull/611](https://github.com/etcd-io/bbolt/pull/611) to get more detailed info. During iteration, if the key is non-`nil` but the value is `nil`, that means the key refers to a bucket rather than a value. Use `Bucket.Bucket()` to @@ -850,6 +859,12 @@ Here are a few things to note when evaluating and using Bolt: to grow. However, it's important to note that deleting large chunks of data will not allow you to reclaim that space on disk. +* Removing key/values pairs in a bucket during iteration on the bucket using + cursor may not work properly. Each time when removing a key/value pair, the + cursor may automatically move to the next position if present. When users + call `c.Next()` after removing a key, it may skip one key/value pair. + Refer to https://github.com/etcd-io/bbolt/pull/611 for more detailed info. + For more information on page allocation, [see this comment][page-allocation]. [page-allocation]: https://github.com/boltdb/bolt/issues/308#issuecomment-74811638 diff --git a/vendor/go.etcd.io/bbolt/bolt_openbsd.go b/vendor/go.etcd.io/bbolt/bolt_openbsd.go index d7f50358..bf47aa1a 100644 --- a/vendor/go.etcd.io/bbolt/bolt_openbsd.go +++ b/vendor/go.etcd.io/bbolt/bolt_openbsd.go @@ -1,22 +1,11 @@ package bbolt import ( - "syscall" - "unsafe" -) - -const ( - msAsync = 1 << iota // perform asynchronous writes - msSync // perform synchronous writes - msInvalidate // invalidate cached data + "golang.org/x/sys/unix" ) func msync(db *DB) error { - _, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate) - if errno != 0 { - return errno - } - return nil + return unix.Msync(db.data[:db.datasz], unix.MS_INVALIDATE) } func fdatasync(db *DB) error { diff --git a/vendor/go.etcd.io/bbolt/bucket.go b/vendor/go.etcd.io/bbolt/bucket.go index 054467af..f3533d34 100644 --- a/vendor/go.etcd.io/bbolt/bucket.go +++ b/vendor/go.etcd.io/bbolt/bucket.go @@ -162,12 +162,17 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { return nil, ErrBucketNameRequired } + // Insert into node. + // Tip: Use a new variable `newKey` instead of reusing the existing `key` to prevent + // it from being marked as leaking, and accordingly cannot be allocated on stack. + newKey := cloneBytes(key) + // Move cursor to correct position. c := b.Cursor() - k, _, flags := c.seek(key) + k, _, flags := c.seek(newKey) // Return an error if there is an existing key. - if bytes.Equal(key, k) { + if bytes.Equal(newKey, k) { if (flags & bucketLeafFlag) != 0 { return nil, ErrBucketExists } @@ -182,16 +187,14 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { } var value = bucket.write() - // Insert into node. - key = cloneBytes(key) - c.node().put(key, key, value, 0, bucketLeafFlag) + c.node().put(newKey, newKey, value, 0, bucketLeafFlag) // Since subbuckets are not allowed on inline buckets, we need to // dereference the inline page, if it exists. This will cause the bucket // to be treated as a regular, non-inline bucket for the rest of the tx. b.page = nil - return b.Bucket(key), nil + return b.Bucket(newKey), nil } // CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it. @@ -288,18 +291,23 @@ func (b *Bucket) Put(key []byte, value []byte) error { return ErrValueTooLarge } + // Insert into node. + // Tip: Use a new variable `newKey` instead of reusing the existing `key` to prevent + // it from being marked as leaking, and accordingly cannot be allocated on stack. + newKey := cloneBytes(key) + // Move cursor to correct position. c := b.Cursor() - k, _, flags := c.seek(key) + k, _, flags := c.seek(newKey) // Return an error if there is an existing key with a bucket value. - if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 { + if bytes.Equal(newKey, k) && (flags&bucketLeafFlag) != 0 { return ErrIncompatibleValue } - // Insert into node. - key = cloneBytes(key) - c.node().put(key, key, value, 0, 0) + // gofail: var beforeBucketPut struct{} + + c.node().put(newKey, newKey, value, 0, 0) return nil } diff --git a/vendor/go.etcd.io/bbolt/cursor.go b/vendor/go.etcd.io/bbolt/cursor.go index 5dafb0ca..bbfd92a9 100644 --- a/vendor/go.etcd.io/bbolt/cursor.go +++ b/vendor/go.etcd.io/bbolt/cursor.go @@ -71,7 +71,7 @@ func (c *Cursor) Last() (key []byte, value []byte) { // If this is an empty page (calling Delete may result in empty pages) // we call prev to find the last page that is not empty - for len(c.stack) > 0 && c.stack[len(c.stack)-1].count() == 0 { + for len(c.stack) > 1 && c.stack[len(c.stack)-1].count() == 0 { c.prev() } @@ -254,6 +254,15 @@ func (c *Cursor) prev() (key []byte, value []byte, flags uint32) { elem.index-- break } + // If we've hit the beginning, we should stop moving the cursor, + // and stay at the first element, so that users can continue to + // iterate over the elements in reverse direction by calling `Next`. + // We should return nil in such case. + // Refer to https://github.com/etcd-io/bbolt/issues/733 + if len(c.stack) == 1 { + c.first() + return nil, nil, 0 + } c.stack = c.stack[:i] } diff --git a/vendor/go.etcd.io/bbolt/db.go b/vendor/go.etcd.io/bbolt/db.go index c9422127..822798e4 100644 --- a/vendor/go.etcd.io/bbolt/db.go +++ b/vendor/go.etcd.io/bbolt/db.go @@ -57,6 +57,12 @@ const ( // All data access is performed through transactions which can be obtained through the DB. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. type DB struct { + // Put `stats` at the first field to ensure it's 64-bit aligned. Note that + // the first word in an allocated struct can be relied upon to be 64-bit + // aligned. Refer to https://pkg.go.dev/sync/atomic#pkg-note-BUG. Also + // refer to discussion in https://github.com/etcd-io/bbolt/issues/577. + stats Stats + // When enabled, the database will perform a Check() after every commit. // A panic is issued if the database is in an inconsistent state. This // flag has a large performance impact so it should only be used for @@ -147,7 +153,6 @@ type DB struct { opened bool rwtx *Tx txs []*Tx - stats Stats freelist *freelist freelistLoad sync.Once @@ -424,7 +429,7 @@ func (db *DB) hasSyncedFreelist() bool { // mmap opens the underlying memory-mapped file and initializes the meta references. // minsz is the minimum size that the new mmap can be. -func (db *DB) mmap(minsz int) error { +func (db *DB) mmap(minsz int) (err error) { db.mmaplock.Lock() defer db.mmaplock.Unlock() @@ -459,17 +464,27 @@ func (db *DB) mmap(minsz int) error { } // Unmap existing data before continuing. - if err := db.munmap(); err != nil { + if err = db.munmap(); err != nil { return err } // Memory-map the data file as a byte slice. // gofail: var mapError string // return errors.New(mapError) - if err := mmap(db, size); err != nil { + if err = mmap(db, size); err != nil { return err } + // Perform unmmap on any error to reset all data fields: + // dataref, data, datasz, meta0 and meta1. + defer func() { + if err != nil { + if unmapErr := db.munmap(); unmapErr != nil { + err = fmt.Errorf("%w; rollback unmap also failed: %v", err, unmapErr) + } + } + }() + if db.Mlock { // Don't allow swapping of data file if err := db.mlock(fileSize); err != nil { @@ -509,7 +524,7 @@ func (db *DB) munmap() error { // gofail: var unmapError string // return errors.New(unmapError) if err := munmap(db); err != nil { - return fmt.Errorf("unmap error: " + err.Error()) + return fmt.Errorf("unmap error: %v", err.Error()) } return nil @@ -553,15 +568,19 @@ func (db *DB) mmapSize(size int) (int, error) { } func (db *DB) munlock(fileSize int) error { + // gofail: var munlockError string + // return errors.New(munlockError) if err := munlock(db, fileSize); err != nil { - return fmt.Errorf("munlock error: " + err.Error()) + return fmt.Errorf("munlock error: %v", err.Error()) } return nil } func (db *DB) mlock(fileSize int) error { + // gofail: var mlockError string + // return errors.New(mlockError) if err := mlock(db, fileSize); err != nil { - return fmt.Errorf("mlock error: " + err.Error()) + return fmt.Errorf("mlock error: %v", err.Error()) } return nil } @@ -649,9 +668,10 @@ func (db *DB) close() error { // Clear ops. db.ops.writeAt = nil + var errs []error // Close the mmap. if err := db.munmap(); err != nil { - return err + errs = append(errs, err) } // Close file handles. @@ -660,18 +680,22 @@ func (db *DB) close() error { if !db.readOnly { // Unlock the file. if err := funlock(db); err != nil { - return fmt.Errorf("bolt.Close(): funlock error: %w", err) + errs = append(errs, fmt.Errorf("bolt.Close(): funlock error: %w", err)) } } // Close the file descriptor. if err := db.file.Close(); err != nil { - return fmt.Errorf("db file close: %s", err) + errs = append(errs, fmt.Errorf("db file close: %w", err)) } db.file = nil } db.path = "" + + if len(errs) > 0 { + return errs[0] + } return nil } @@ -1135,6 +1159,8 @@ func (db *DB) grow(sz int) error { // https://github.com/boltdb/bolt/issues/284 if !db.NoGrowSync && !db.readOnly { if runtime.GOOS != "windows" { + // gofail: var resizeFileError string + // return errors.New(resizeFileError) if err := db.file.Truncate(int64(sz)); err != nil { return fmt.Errorf("file resize error: %s", err) } @@ -1263,6 +1289,12 @@ var DefaultOptions = &Options{ // Stats represents statistics about the database. type Stats struct { + // Put `TxStats` at the first field to ensure it's 64-bit aligned. Note + // that the first word in an allocated struct can be relied upon to be + // 64-bit aligned. Refer to https://pkg.go.dev/sync/atomic#pkg-note-BUG. + // Also refer to discussion in https://github.com/etcd-io/bbolt/issues/577. + TxStats TxStats // global, ongoing stats. + // Freelist stats FreePageN int // total number of free pages on the freelist PendingPageN int // total number of pending pages on the freelist @@ -1272,8 +1304,6 @@ type Stats struct { // Transaction stats TxN int // total number of started read transactions OpenTxN int // number of currently open read transactions - - TxStats TxStats // global, ongoing stats. } // Sub calculates and returns the difference between two sets of database stats. diff --git a/vendor/go.etcd.io/bbolt/freelist.go b/vendor/go.etcd.io/bbolt/freelist.go index 50f2d0e1..dffc7bc7 100644 --- a/vendor/go.etcd.io/bbolt/freelist.go +++ b/vendor/go.etcd.io/bbolt/freelist.go @@ -252,6 +252,14 @@ func (f *freelist) rollback(txid txid) { } // Remove pages from pending list and mark as free if allocated by txid. delete(f.pending, txid) + + // Remove pgids which are allocated by this txid + for pgid, tid := range f.allocs { + if tid == txid { + delete(f.allocs, pgid) + } + } + f.mergeSpans(m) } @@ -282,9 +290,8 @@ func (f *freelist) read(p *page) { if count == 0 { f.ids = nil } else { - var ids []pgid - data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx) - unsafeSlice(unsafe.Pointer(&ids), data, count) + data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(pgid(0)), idx) + ids := unsafe.Slice((*pgid)(data), count) // copy the ids, so we don't modify on the freelist page directly idsCopy := make([]pgid, count) @@ -322,15 +329,13 @@ func (f *freelist) write(p *page) error { p.count = uint16(l) } else if l < 0xFFFF { p.count = uint16(l) - var ids []pgid data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - unsafeSlice(unsafe.Pointer(&ids), data, l) + ids := unsafe.Slice((*pgid)(data), l) f.copyall(ids) } else { p.count = 0xFFFF - var ids []pgid data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - unsafeSlice(unsafe.Pointer(&ids), data, l+1) + ids := unsafe.Slice((*pgid)(data), l+1) ids[0] = pgid(l) f.copyall(ids[1:]) } diff --git a/vendor/go.etcd.io/bbolt/page.go b/vendor/go.etcd.io/bbolt/page.go index 379645c9..bb081b03 100644 --- a/vendor/go.etcd.io/bbolt/page.go +++ b/vendor/go.etcd.io/bbolt/page.go @@ -74,9 +74,8 @@ func (p *page) leafPageElements() []leafPageElement { if p.count == 0 { return nil } - var elems []leafPageElement data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - unsafeSlice(unsafe.Pointer(&elems), data, int(p.count)) + elems := unsafe.Slice((*leafPageElement)(data), int(p.count)) return elems } @@ -91,9 +90,8 @@ func (p *page) branchPageElements() []branchPageElement { if p.count == 0 { return nil } - var elems []branchPageElement data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - unsafeSlice(unsafe.Pointer(&elems), data, int(p.count)) + elems := unsafe.Slice((*branchPageElement)(data), int(p.count)) return elems } diff --git a/vendor/go.etcd.io/bbolt/tx.go b/vendor/go.etcd.io/bbolt/tx.go index 2fac8c0a..766395de 100644 --- a/vendor/go.etcd.io/bbolt/tx.go +++ b/vendor/go.etcd.io/bbolt/tx.go @@ -1,6 +1,7 @@ package bbolt import ( + "errors" "fmt" "io" "os" @@ -185,6 +186,10 @@ func (tx *Tx) Commit() error { // If the high water mark has moved up then attempt to grow the database. if tx.meta.pgid > opgid { + _ = errors.New("") + // gofail: var lackOfDiskSpace string + // tx.rollback() + // return errors.New(lackOfDiskSpace) if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { tx.rollback() return err @@ -470,6 +475,7 @@ func (tx *Tx) write() error { // Ignore file sync if flag is set on DB. if !tx.db.NoSync || IgnoreNoSync { + // gofail: var beforeSyncDataPages struct{} if err := fdatasync(tx.db); err != nil { return err } @@ -507,6 +513,7 @@ func (tx *Tx) writeMeta() error { return err } if !tx.db.NoSync || IgnoreNoSync { + // gofail: var beforeSyncMetaPage struct{} if err := fdatasync(tx.db); err != nil { return err } diff --git a/vendor/go.etcd.io/bbolt/unsafe.go b/vendor/go.etcd.io/bbolt/unsafe.go index c0e50375..7745d32c 100644 --- a/vendor/go.etcd.io/bbolt/unsafe.go +++ b/vendor/go.etcd.io/bbolt/unsafe.go @@ -1,7 +1,6 @@ package bbolt import ( - "reflect" "unsafe" ) @@ -26,14 +25,3 @@ func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte { // all), so this is believed to be correct. return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j] } - -// unsafeSlice modifies the data, len, and cap of a slice variable pointed to by -// the slice parameter. This helper should be used over other direct -// manipulation of reflect.SliceHeader to prevent misuse, namely, converting -// from reflect.SliceHeader to a Go slice type. -func unsafeSlice(slice, data unsafe.Pointer, len int) { - s := (*reflect.SliceHeader)(slice) - s.Data = uintptr(data) - s.Cap = len - s.Len = len -} diff --git a/vendor/go.opencensus.io/Makefile b/vendor/go.opencensus.io/Makefile index b3ce3df3..d896edc9 100644 --- a/vendor/go.opencensus.io/Makefile +++ b/vendor/go.opencensus.io/Makefile @@ -91,7 +91,7 @@ embedmd: .PHONY: install-tools install-tools: - go get -u golang.org/x/lint/golint - go get -u golang.org/x/tools/cmd/cover - go get -u golang.org/x/tools/cmd/goimports - go get -u github.com/rakyll/embedmd + go install golang.org/x/lint/golint@latest + go install golang.org/x/tools/cmd/cover@latest + go install golang.org/x/tools/cmd/goimports@latest + go install github.com/rakyll/embedmd@latest diff --git a/vendor/go.opencensus.io/opencensus.go b/vendor/go.opencensus.io/opencensus.go index e5e4b436..11e31f42 100644 --- a/vendor/go.opencensus.io/opencensus.go +++ b/vendor/go.opencensus.io/opencensus.go @@ -17,5 +17,5 @@ package opencensus // import "go.opencensus.io" // Version is the current release version of OpenCensus in use. func Version() string { - return "0.23.0" + return "0.24.0" } diff --git a/vendor/go.opencensus.io/trace/doc.go b/vendor/go.opencensus.io/trace/doc.go index 04b1ee4f..7a1616a5 100644 --- a/vendor/go.opencensus.io/trace/doc.go +++ b/vendor/go.opencensus.io/trace/doc.go @@ -18,24 +18,23 @@ Package trace contains support for OpenCensus distributed tracing. The following assumes a basic familiarity with OpenCensus concepts. See http://opencensus.io - -Exporting Traces +# Exporting Traces To export collected tracing data, register at least one exporter. You can use one of the provided exporters or write your own. - trace.RegisterExporter(exporter) + trace.RegisterExporter(exporter) By default, traces will be sampled relatively rarely. To change the sampling frequency for your entire program, call ApplyConfig. Use a ProbabilitySampler to sample a subset of traces, or use AlwaysSample to collect a trace on every run: - trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()}) + trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()}) Be careful about using trace.AlwaysSample in a production application with significant traffic: a new trace will be started and exported for every request. -Adding Spans to a Trace +# Adding Spans to a Trace A trace consists of a tree of spans. In Go, the current span is carried in a context.Context. @@ -44,8 +43,8 @@ It is common to want to capture all the activity of a function call in a span. F this to work, the function must take a context.Context as a parameter. Add these two lines to the top of the function: - ctx, span := trace.StartSpan(ctx, "example.com/Run") - defer span.End() + ctx, span := trace.StartSpan(ctx, "example.com/Run") + defer span.End() StartSpan will create a new top-level span if the context doesn't contain another span, otherwise it will create a child span. diff --git a/vendor/go.opencensus.io/trace/lrumap.go b/vendor/go.opencensus.io/trace/lrumap.go index 908c2497..80095a5f 100644 --- a/vendor/go.opencensus.io/trace/lrumap.go +++ b/vendor/go.opencensus.io/trace/lrumap.go @@ -44,7 +44,7 @@ func (lm lruMap) len() int { } func (lm lruMap) keys() []interface{} { - keys := make([]interface{}, len(lm.cacheKeys)) + keys := make([]interface{}, 0, len(lm.cacheKeys)) for k := range lm.cacheKeys { keys = append(keys, k) } diff --git a/vendor/go.opencensus.io/trace/trace_go11.go b/vendor/go.opencensus.io/trace/trace_go11.go index b7d8aaf2..b8fc1e49 100644 --- a/vendor/go.opencensus.io/trace/trace_go11.go +++ b/vendor/go.opencensus.io/trace/trace_go11.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build go1.11 // +build go1.11 package trace diff --git a/vendor/go.opencensus.io/trace/trace_nongo11.go b/vendor/go.opencensus.io/trace/trace_nongo11.go index e2541985..da488fc8 100644 --- a/vendor/go.opencensus.io/trace/trace_nongo11.go +++ b/vendor/go.opencensus.io/trace/trace_nongo11.go @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !go1.11 // +build !go1.11 package trace diff --git a/vendor/go.uber.org/mock/mockgen/deprecated.go b/vendor/go.uber.org/mock/mockgen/deprecated.go new file mode 100644 index 00000000..0b45a2e3 --- /dev/null +++ b/vendor/go.uber.org/mock/mockgen/deprecated.go @@ -0,0 +1,41 @@ +package main + +import ( + "flag" + "log" + "os" +) + +const ( + deprecatedFlagProgOnly = "prog_only" + deprecatedFlagExecOnly = "exec_only" +) + +var ( + _ = flag.Bool("prog_only", false, "DEPRECATED (reflect mode) Only generate the reflection program; write it to stdout and exit.") + _ = flag.String("exec_only", "", "DEPRECATED (reflect mode) If set, execute this reflection program.") +) + +// notifyAboutDeprecatedFlags prints a warning message for a deprecated flags if they are set. +func notifyAboutDeprecatedFlags() { + const resetColorPostfix = "\033[0m" + logger := initWarningLogger() + + flag.Visit(func(f *flag.Flag) { + switch f.Name { + case deprecatedFlagProgOnly: + logger.Println("The -prog_only flag is deprecated and has no effect.", resetColorPostfix) + case deprecatedFlagExecOnly: + logger.Println("The -exec_only flag is deprecated and has no effect.", resetColorPostfix) + } + }) +} + +func initWarningLogger() *log.Logger { + const ( + yellowColor = "\033[33m" + warningPrefix = yellowColor + "WARNING: " + ) + + return log.New(os.Stdout, warningPrefix, log.Ldate|log.Ltime) +} diff --git a/vendor/go.uber.org/mock/mockgen/generic_go118.go b/vendor/go.uber.org/mock/mockgen/generic.go similarity index 84% rename from vendor/go.uber.org/mock/mockgen/generic_go118.go rename to vendor/go.uber.org/mock/mockgen/generic.go index b7b44947..c2289c2a 100644 --- a/vendor/go.uber.org/mock/mockgen/generic_go118.go +++ b/vendor/go.uber.org/mock/mockgen/generic.go @@ -5,9 +5,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build go1.18 -// +build go1.18 - package main import ( @@ -15,7 +12,6 @@ import ( "fmt" "go/ast" "go/token" - "strings" "go.uber.org/mock/mockgen/model" ) @@ -67,29 +63,6 @@ func (p *fileParser) parseGenericType(pkg string, typ ast.Expr, tps map[string]m return nil, nil } -func getIdentTypeParams(decl any) string { - if decl == nil { - return "" - } - ts, ok := decl.(*ast.TypeSpec) - if !ok { - return "" - } - if ts.TypeParams == nil || len(ts.TypeParams.List) == 0 { - return "" - } - var sb strings.Builder - sb.WriteString("[") - for i, v := range ts.TypeParams.List { - if i != 0 { - sb.WriteString(", ") - } - sb.WriteString(v.Names[0].Name) - } - sb.WriteString("]") - return sb.String() -} - func (p *fileParser) parseGenericMethod(field *ast.Field, it *namedInterface, iface *model.Interface, pkg string, tps map[string]model.Type) ([]*model.Method, error) { var indices []ast.Expr var typ ast.Expr diff --git a/vendor/go.uber.org/mock/mockgen/generic_notgo118.go b/vendor/go.uber.org/mock/mockgen/generic_notgo118.go deleted file mode 100644 index 8a779c8b..00000000 --- a/vendor/go.uber.org/mock/mockgen/generic_notgo118.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2022 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !go1.18 -// +build !go1.18 - -package main - -import ( - "fmt" - "go/ast" - - "go.uber.org/mock/mockgen/model" -) - -func getTypeSpecTypeParams(ts *ast.TypeSpec) []*ast.Field { - return nil -} - -func (p *fileParser) parseGenericType(pkg string, typ ast.Expr, tps map[string]model.Type) (model.Type, error) { - return nil, nil -} - -func getIdentTypeParams(decl any) string { - return "" -} - -func (p *fileParser) parseGenericMethod(field *ast.Field, it *namedInterface, iface *model.Interface, pkg string, tps map[string]model.Type) ([]*model.Method, error) { - return nil, fmt.Errorf("don't know how to mock method of type %T", field.Type) -} diff --git a/vendor/go.uber.org/mock/mockgen/gob.go b/vendor/go.uber.org/mock/mockgen/gob.go new file mode 100644 index 00000000..b5ab0661 --- /dev/null +++ b/vendor/go.uber.org/mock/mockgen/gob.go @@ -0,0 +1,21 @@ +package main + +import ( + "encoding/gob" + "os" + + "go.uber.org/mock/mockgen/model" +) + +func gobMode(path string) (*model.Package, error) { + in, err := os.Open(path) + if err != nil { + return nil, err + } + defer in.Close() + var pkg model.Package + if err := gob.NewDecoder(in).Decode(&pkg); err != nil { + return nil, err + } + return &pkg, nil +} diff --git a/vendor/go.uber.org/mock/mockgen/mockgen.go b/vendor/go.uber.org/mock/mockgen/mockgen.go index df7d85f0..b5365de5 100644 --- a/vendor/go.uber.org/mock/mockgen/mockgen.go +++ b/vendor/go.uber.org/mock/mockgen/mockgen.go @@ -59,14 +59,17 @@ var ( mockNames = flag.String("mock_names", "", "Comma-separated interfaceName=mockName pairs of explicit mock names to use. Mock names default to 'Mock'+ interfaceName suffix.") packageOut = flag.String("package", "", "Package of the generated code; defaults to the package of the input with a 'mock_' prefix.") selfPackage = flag.String("self_package", "", "The full package import path for the generated code. The purpose of this flag is to prevent import cycles in the generated code by trying to include its own package. This can happen if the mock's package is set to one of its inputs (usually the main one) and the output is stdio so mockgen cannot detect the final output package. Setting this flag will then tell mockgen which import to exclude.") + writeCmdComment = flag.Bool("write_command_comment", true, "Writes the command used as a comment if true.") writePkgComment = flag.Bool("write_package_comment", true, "Writes package documentation comment (godoc) if true.") - writeSourceComment = flag.Bool("write_source_comment", true, "Writes original file (source mode) or interface names (reflect mode) comment if true.") + writeSourceComment = flag.Bool("write_source_comment", true, "Writes original file (source mode) or interface names (package mode) comment if true.") writeGenerateDirective = flag.Bool("write_generate_directive", false, "Add //go:generate directive to regenerate the mock") copyrightFile = flag.String("copyright_file", "", "Copyright file used to add copyright header") + buildConstraint = flag.String("build_constraint", "", "If non-empty, added as //go:build ") typed = flag.Bool("typed", false, "Generate Type-safe 'Return', 'Do', 'DoAndReturn' function") imports = flag.String("imports", "", "(source mode) Comma-separated name=path pairs of explicit imports to use.") auxFiles = flag.String("aux_files", "", "(source mode) Comma-separated pkg=path pairs of auxiliary Go source files.") - excludeInterfaces = flag.String("exclude_interfaces", "", "Comma-separated names of interfaces to be excluded") + excludeInterfaces = flag.String("exclude_interfaces", "", "(source mode) Comma-separated names of interfaces to be excluded") + modelGob = flag.String("model_gob", "", "Skip package/source loading entirely and use the gob encoded model.Package at the given path") debugParser = flag.Bool("debug_parser", false, "Print out parser results only.") showVersion = flag.Bool("version", false, "Print version.") @@ -76,6 +79,8 @@ func main() { flag.Usage = usage flag.Parse() + notifyAboutDeprecatedFlags() + if *showVersion { printVersion() return @@ -84,7 +89,9 @@ func main() { var pkg *model.Package var err error var packageName string - if *source != "" { + if *modelGob != "" { + pkg, err = gobMode(*modelGob) + } else if *source != "" { pkg, err = sourceMode(*source) } else { if flag.NArg() != 2 { @@ -103,7 +110,8 @@ func main() { log.Fatalf("Parse package name failed: %v", err) } } - pkg, err = reflectMode(packageName, interfaces) + parser := packageModeParser{} + pkg, err = parser.parsePackage(packageName, interfaces) } if err != nil { log.Fatalf("Loading input failed: %v", err) @@ -116,7 +124,7 @@ func main() { outputPackageName := *packageOut if outputPackageName == "" { - // pkg.Name in reflect mode is the base name of the import path, + // pkg.Name in package mode is the base name of the import path, // which might have characters that are illegal to have in package names. outputPackageName = "mock_" + sanitize(pkg.Name) } @@ -142,7 +150,9 @@ func main() { } } - g := new(generator) + g := &generator{ + buildConstraint: *buildConstraint, + } if *source != "" { g.filename = *source } else { @@ -225,20 +235,21 @@ func usage() { flag.PrintDefaults() } -const usageText = `mockgen has two modes of operation: source and reflect. +const usageText = `mockgen has two modes of operation: source and package. Source mode generates mock interfaces from a source file. It is enabled by using the -source flag. Other flags that -may be useful in this mode are -imports and -aux_files. +may be useful in this mode are -imports, -aux_files and -exclude_interfaces. Example: mockgen -source=foo.go [other options] -Reflect mode generates mock interfaces by building a program -that uses reflection to understand interfaces. It is enabled -by passing two non-flag arguments: an import path, and a -comma-separated list of symbols. +Package mode works by specifying the package and interface names. +It is enabled by passing two non-flag arguments: an import path, and a +comma-separated list of symbols. +You can use "." to refer to the current path's package. Example: mockgen database/sql/driver Conn,Driver + mockgen . SomeInterface ` @@ -250,12 +261,13 @@ type generator struct { destination string // may be empty srcPackage, srcInterfaces string // may be empty copyrightHeader string + buildConstraint string // may be empty packageMap map[string]string // map from import path to package name } func (g *generator) p(format string, args ...any) { - fmt.Fprintf(&g.buf, g.indent+format+"\n", args...) + _, _ = fmt.Fprintf(&g.buf, g.indent+format+"\n", args...) } func (g *generator) in() { @@ -305,6 +317,12 @@ func (g *generator) Generate(pkg *model.Package, outputPkgName string, outputPac g.p("") } + if g.buildConstraint != "" { + g.p("//go:build %s", g.buildConstraint) + // https://pkg.go.dev/cmd/go#hdr-Build_constraints:~:text=a%20build%20constraint%20should%20be%20followed%20by%20a%20blank%20line + g.p("") + } + g.p("// Code generated by MockGen. DO NOT EDIT.") if *writeSourceComment { if g.filename != "" { @@ -313,16 +331,18 @@ func (g *generator) Generate(pkg *model.Package, outputPkgName string, outputPac g.p("// Source: %v (interfaces: %v)", g.srcPackage, g.srcInterfaces) } } - g.p("//") - g.p("// Generated by this command:") - g.p("//") - // only log the name of the executable, not the full path - name := filepath.Base(os.Args[0]) - if runtime.GOOS == "windows" { - name = strings.TrimSuffix(name, ".exe") + if *writeCmdComment { + g.p("//") + g.p("// Generated by this command:") + g.p("//") + // only log the name of the executable, not the full path + name := filepath.Base(os.Args[0]) + if runtime.GOOS == "windows" { + name = strings.TrimSuffix(name, ".exe") + } + g.p("//\t%v", strings.Join(append([]string{name}, os.Args[1:]...), " ")) + g.p("//") } - g.p("//\t%v", strings.Join(append([]string{name}, os.Args[1:]...), " ")) - g.p("//") // Get all required imports, and generate unique names for them all. im := pkg.Imports() @@ -392,11 +412,13 @@ func (g *generator) Generate(pkg *model.Package, outputPkgName string, outputPac localNames[pkgName] = true } - if *writePkgComment { - // Ensure there's an empty line before the package to follow the recommendations: - // https://github.com/golang/go/wiki/CodeReviewComments#package-comments - g.p("") + // Ensure there is an empty line between “generated by” block and + // package documentation comments to follow the recommendations: + // https://go.dev/wiki/CodeReviewComments#package-comments + // That is, “generated by” should not be a package comment. + g.p("") + if *writePkgComment { g.p("// Package %v is a generated GoMock package.", outputPkgName) } g.p("package %v", outputPkgName) @@ -472,6 +494,7 @@ func (g *generator) GenerateMockInterface(intf *model.Interface, outputPackagePa g.in() g.p("ctrl *gomock.Controller") g.p("recorder *%vMockRecorder%v", mockType, shortTp) + g.p("isgomock struct{}") g.out() g.p("}") g.p("") @@ -816,7 +839,7 @@ func createPackageMap(importPaths []string) map[string]string { } pkgMap := make(map[string]string) b := bytes.NewBuffer(nil) - args := []string{"list", "-json"} + args := []string{"list", "-json=ImportPath,Name"} args = append(args, importPaths...) cmd := exec.Command("go", args...) cmd.Stdout = b diff --git a/vendor/go.uber.org/mock/mockgen/package_mode.go b/vendor/go.uber.org/mock/mockgen/package_mode.go new file mode 100644 index 00000000..abc9c7a4 --- /dev/null +++ b/vendor/go.uber.org/mock/mockgen/package_mode.go @@ -0,0 +1,358 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "go/types" + "strings" + + "go.uber.org/mock/mockgen/model" + "golang.org/x/tools/go/packages" +) + +var ( + buildFlags = flag.String("build_flags", "", "(package mode) Additional flags for go build.") +) + +type packageModeParser struct { + pkgName string +} + +func (p *packageModeParser) parsePackage(packageName string, ifaces []string) (*model.Package, error) { + p.pkgName = packageName + + pkg, err := p.loadPackage(packageName) + if err != nil { + return nil, fmt.Errorf("load package: %w", err) + } + + interfaces, err := p.extractInterfacesFromPackage(pkg, ifaces) + if err != nil { + return nil, fmt.Errorf("extract interfaces from package: %w", err) + } + + return &model.Package{ + Name: pkg.Types.Name(), + PkgPath: packageName, + Interfaces: interfaces, + }, nil +} + +func (p *packageModeParser) loadPackage(packageName string) (*packages.Package, error) { + var buildFlagsSet []string + if *buildFlags != "" { + buildFlagsSet = strings.Split(*buildFlags, " ") + } + + cfg := &packages.Config{ + Mode: packages.NeedDeps | packages.NeedImports | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedEmbedFiles, + BuildFlags: buildFlagsSet, + } + pkgs, err := packages.Load(cfg, packageName) + if err != nil { + return nil, fmt.Errorf("load packages: %w", err) + } + + if len(pkgs) != 1 { + return nil, fmt.Errorf("packages length must be 1: %d", len(pkgs)) + } + + if len(pkgs[0].Errors) > 0 { + errs := make([]error, len(pkgs[0].Errors)) + for i, err := range pkgs[0].Errors { + errs[i] = err + } + + return nil, errors.Join(errs...) + } + + return pkgs[0], nil +} + +func (p *packageModeParser) extractInterfacesFromPackage(pkg *packages.Package, ifaces []string) ([]*model.Interface, error) { + interfaces := make([]*model.Interface, len(ifaces)) + for i, iface := range ifaces { + obj := pkg.Types.Scope().Lookup(iface) + if obj == nil { + return nil, fmt.Errorf("interface %s does not exist", iface) + } + + modelIface, err := p.parseInterface(obj) + if err != nil { + return nil, newParseTypeError("parse interface", obj.Name(), err) + } + + interfaces[i] = modelIface + } + + return interfaces, nil +} + +func (p *packageModeParser) parseInterface(obj types.Object) (*model.Interface, error) { + named, ok := types.Unalias(obj.Type()).(*types.Named) + if !ok { + return nil, fmt.Errorf("%s is not an interface. it is a %T", obj.Name(), obj.Type().Underlying()) + } + + iface, ok := named.Underlying().(*types.Interface) + if !ok { + return nil, fmt.Errorf("%s is not an interface. it is a %T", obj.Name(), obj.Type().Underlying()) + } + + if p.isConstraint(iface) { + return nil, fmt.Errorf("interface %s is a constraint", obj.Name()) + } + + methods := make([]*model.Method, iface.NumMethods()) + for i := range iface.NumMethods() { + method := iface.Method(i) + typedMethod, ok := method.Type().(*types.Signature) + if !ok { + return nil, fmt.Errorf("method %s is not a signature", method.Name()) + } + + modelFunc, err := p.parseFunc(typedMethod) + if err != nil { + return nil, newParseTypeError("parse method", typedMethod.String(), err) + } + + methods[i] = &model.Method{ + Name: method.Name(), + In: modelFunc.In, + Out: modelFunc.Out, + Variadic: modelFunc.Variadic, + } + } + + if named.TypeParams() == nil { + return &model.Interface{Name: obj.Name(), Methods: methods}, nil + } + + typeParams := make([]*model.Parameter, named.TypeParams().Len()) + for i := range named.TypeParams().Len() { + param := named.TypeParams().At(i) + typeParam, err := p.parseConstraint(param) + if err != nil { + return nil, newParseTypeError("parse type parameter", param.String(), err) + } + + typeParams[i] = &model.Parameter{Name: param.Obj().Name(), Type: typeParam} + } + + return &model.Interface{Name: obj.Name(), Methods: methods, TypeParams: typeParams}, nil +} + +func (o *packageModeParser) isConstraint(t *types.Interface) bool { + for i := range t.NumEmbeddeds() { + embed := t.EmbeddedType(i) + if _, ok := embed.Underlying().(*types.Interface); !ok { + return true + } + } + + return false +} + +func (p *packageModeParser) parseType(t types.Type) (model.Type, error) { + switch t := t.(type) { + case *types.Array: + elementType, err := p.parseType(t.Elem()) + if err != nil { + return nil, newParseTypeError("parse array type", t.Elem().String(), err) + } + return &model.ArrayType{Len: int(t.Len()), Type: elementType}, nil + case *types.Slice: + elementType, err := p.parseType(t.Elem()) + if err != nil { + return nil, newParseTypeError("parse slice type", t.Elem().String(), err) + } + + return &model.ArrayType{Len: -1, Type: elementType}, nil + case *types.Chan: + var dir model.ChanDir + switch t.Dir() { + case types.RecvOnly: + dir = model.RecvDir + case types.SendOnly: + dir = model.SendDir + } + + chanType, err := p.parseType(t.Elem()) + if err != nil { + return nil, newParseTypeError("parse chan type", t.Elem().String(), err) + } + + return &model.ChanType{Dir: dir, Type: chanType}, nil + case *types.Signature: + sig, err := p.parseFunc(t) + if err != nil { + return nil, newParseTypeError("parse signature", t.String(), err) + } + + return sig, nil + case *types.Named, *types.Alias: + object := t.(interface{ Obj() *types.TypeName }) + var pkg string + if object.Obj().Pkg() != nil { + pkg = object.Obj().Pkg().Path() + } + + // TypeArgs method not available for aliases in go1.22 + genericType, ok := t.(interface{ TypeArgs() *types.TypeList }) + if !ok || genericType.TypeArgs() == nil { + return &model.NamedType{ + Package: pkg, + Type: object.Obj().Name(), + }, nil + } + + typeParams := &model.TypeParametersType{TypeParameters: make([]model.Type, genericType.TypeArgs().Len())} + for i := range genericType.TypeArgs().Len() { + typeParam := genericType.TypeArgs().At(i) + typedParam, err := p.parseType(typeParam) + if err != nil { + return nil, newParseTypeError("parse type parameter", typeParam.String(), err) + } + + typeParams.TypeParameters[i] = typedParam + } + + return &model.NamedType{ + Package: pkg, + Type: object.Obj().Name(), + TypeParams: typeParams, + }, nil + case *types.Interface: + if t.Empty() { + return model.PredeclaredType("any"), nil + } + + return nil, fmt.Errorf("cannot handle non-empty unnamed interfaces") + case *types.Map: + key, err := p.parseType(t.Key()) + if err != nil { + return nil, newParseTypeError("parse map key", t.Key().String(), err) + } + value, err := p.parseType(t.Elem()) + if err != nil { + return nil, newParseTypeError("parse map value", t.Elem().String(), err) + } + + return &model.MapType{Key: key, Value: value}, nil + case *types.Pointer: + valueType, err := p.parseType(t.Elem()) + if err != nil { + return nil, newParseTypeError("parse pointer type", t.Elem().String(), err) + } + + return &model.PointerType{Type: valueType}, nil + case *types.Struct: + if t.NumFields() > 0 { + return nil, fmt.Errorf("cannot handle non-empty unnamed structs") + } + + return model.PredeclaredType("struct{}"), nil + case *types.Basic: + return model.PredeclaredType(t.Name()), nil + case *types.Tuple: + panic("tuple field") // TODO + case *types.TypeParam: + return &model.NamedType{Type: t.Obj().Name()}, nil + default: + panic("unknown type") // TODO + } +} + +func (p *packageModeParser) parseFunc(sig *types.Signature) (*model.FuncType, error) { + var variadic *model.Parameter + params := make([]*model.Parameter, 0, sig.Params().Len()) + for i := range sig.Params().Len() { + param := sig.Params().At(i) + + isVariadicParam := i == sig.Params().Len()-1 && sig.Variadic() + parseType := param.Type() + if isVariadicParam { + sliceType, ok := param.Type().(*types.Slice) + if !ok { + return nil, newParseTypeError("variadic parameter is not a slice", param.String(), nil) + } + + parseType = sliceType.Elem() + } + + paramType, err := p.parseType(parseType) + if err != nil { + return nil, newParseTypeError("parse parameter type", parseType.String(), err) + } + + modelParameter := &model.Parameter{Type: paramType, Name: param.Name()} + + if isVariadicParam { + variadic = modelParameter + } else { + params = append(params, modelParameter) + } + } + + if len(params) == 0 { + params = nil + } + + results := make([]*model.Parameter, sig.Results().Len()) + for i := range sig.Results().Len() { + result := sig.Results().At(i) + + resultType, err := p.parseType(result.Type()) + if err != nil { + return nil, newParseTypeError("parse result type", result.Type().String(), err) + } + + results[i] = &model.Parameter{Type: resultType, Name: result.Name()} + } + + if len(results) == 0 { + results = nil + } + + return &model.FuncType{ + In: params, + Out: results, + Variadic: variadic, + }, nil +} + +func (p *packageModeParser) parseConstraint(t *types.TypeParam) (model.Type, error) { + if t == nil { + return nil, fmt.Errorf("nil type param") + } + + typeParam, err := p.parseType(t.Constraint()) + if err != nil { + return nil, newParseTypeError("parse constraint type", t.Constraint().String(), err) + } + + return typeParam, nil +} + +type parseTypeError struct { + message string + typeString string + error error +} + +func newParseTypeError(message string, typeString string, error error) *parseTypeError { + return &parseTypeError{typeString: typeString, error: error, message: message} +} + +func (p parseTypeError) Error() string { + if p.error != nil { + return fmt.Sprintf("%s: error parsing %s: %s", p.message, p.typeString, p.error) + } + + return fmt.Sprintf("%s: error parsing type %s", p.message, p.typeString) +} + +func (p parseTypeError) Unwrap() error { + return p.error +} diff --git a/vendor/go.uber.org/mock/mockgen/reflect.go b/vendor/go.uber.org/mock/mockgen/reflect.go deleted file mode 100644 index ca80ebbb..00000000 --- a/vendor/go.uber.org/mock/mockgen/reflect.go +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright 2012 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -// This file contains the model construction by reflection. - -import ( - "bytes" - "encoding/gob" - "flag" - "fmt" - "go/build" - "io" - "log" - "os" - "os/exec" - "path/filepath" - "runtime" - "strings" - "text/template" - - "go.uber.org/mock/mockgen/model" -) - -var ( - progOnly = flag.Bool("prog_only", false, "(reflect mode) Only generate the reflection program; write it to stdout and exit.") - execOnly = flag.String("exec_only", "", "(reflect mode) If set, execute this reflection program.") - buildFlags = flag.String("build_flags", "", "(reflect mode) Additional flags for go build.") -) - -// reflectMode generates mocks via reflection on an interface. -func reflectMode(importPath string, symbols []string) (*model.Package, error) { - if *execOnly != "" { - return run(*execOnly) - } - - program, err := writeProgram(importPath, symbols) - if err != nil { - return nil, err - } - - if *progOnly { - if _, err := os.Stdout.Write(program); err != nil { - return nil, err - } - os.Exit(0) - } - - wd, _ := os.Getwd() - - // Try to run the reflection program in the current working directory. - if p, err := runInDir(program, wd); err == nil { - return p, nil - } - - // Try to run the program in the same directory as the input package. - if p, err := build.Import(importPath, wd, build.FindOnly); err == nil { - dir := p.Dir - if p, err := runInDir(program, dir); err == nil { - return p, nil - } - } - - // Try to run it in a standard temp directory. - return runInDir(program, "") -} - -func writeProgram(importPath string, symbols []string) ([]byte, error) { - var program bytes.Buffer - data := reflectData{ - ImportPath: importPath, - Symbols: symbols, - } - if err := reflectProgram.Execute(&program, &data); err != nil { - return nil, err - } - return program.Bytes(), nil -} - -// run the given program and parse the output as a model.Package. -func run(program string) (*model.Package, error) { - f, err := os.CreateTemp("", "") - if err != nil { - return nil, err - } - - filename := f.Name() - defer os.Remove(filename) - if err := f.Close(); err != nil { - return nil, err - } - - // Run the program. - cmd := exec.Command(program, "-output", filename) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - return nil, err - } - - f, err = os.Open(filename) - if err != nil { - return nil, err - } - - // Process output. - var pkg model.Package - if err := gob.NewDecoder(f).Decode(&pkg); err != nil { - return nil, err - } - - if err := f.Close(); err != nil { - return nil, err - } - - return &pkg, nil -} - -// runInDir writes the given program into the given dir, runs it there, and -// parses the output as a model.Package. -func runInDir(program []byte, dir string) (*model.Package, error) { - // We use TempDir instead of TempFile so we can control the filename. - tmpDir, err := os.MkdirTemp(dir, "gomock_reflect_") - if err != nil { - return nil, err - } - defer func() { - if err := os.RemoveAll(tmpDir); err != nil { - log.Printf("failed to remove temp directory: %s", err) - } - }() - const progSource = "prog.go" - var progBinary = "prog.bin" - if runtime.GOOS == "windows" { - // Windows won't execute a program unless it has a ".exe" suffix. - progBinary += ".exe" - } - - if err := os.WriteFile(filepath.Join(tmpDir, progSource), program, 0600); err != nil { - return nil, err - } - - cmdArgs := []string{} - cmdArgs = append(cmdArgs, "build") - if *buildFlags != "" { - cmdArgs = append(cmdArgs, strings.Split(*buildFlags, " ")...) - } - cmdArgs = append(cmdArgs, "-o", progBinary, progSource) - - // Build the program. - buf := bytes.NewBuffer(nil) - cmd := exec.Command("go", cmdArgs...) - cmd.Dir = tmpDir - cmd.Stdout = os.Stdout - cmd.Stderr = io.MultiWriter(os.Stderr, buf) - if err := cmd.Run(); err != nil { - sErr := buf.String() - if strings.Contains(sErr, `cannot find package "."`) && - strings.Contains(sErr, "go.uber.org/mock/mockgen/model") { - fmt.Fprint(os.Stderr, "Please reference the steps in the README to fix this error:\n\thttps://go.uber.org/mock#reflect-vendoring-error.\n") - return nil, err - } - return nil, err - } - - return run(filepath.Join(tmpDir, progBinary)) -} - -type reflectData struct { - ImportPath string - Symbols []string -} - -// This program reflects on an interface value, and prints the -// gob encoding of a model.Package to standard output. -// JSON doesn't work because of the model.Type interface. -var reflectProgram = template.Must(template.New("program").Parse(` -// Code generated by MockGen. DO NOT EDIT. -package main - -import ( - "encoding/gob" - "flag" - "fmt" - "os" - "path" - "reflect" - - "go.uber.org/mock/mockgen/model" - - pkg_ {{printf "%q" .ImportPath}} -) - -var output = flag.String("output", "", "The output file name, or empty to use stdout.") - -func main() { - flag.Parse() - - its := []struct{ - sym string - typ reflect.Type - }{ - {{range .Symbols}} - { {{printf "%q" .}}, reflect.TypeOf((*pkg_.{{.}})(nil)).Elem()}, - {{end}} - } - pkg := &model.Package{ - // NOTE: This behaves contrary to documented behaviour if the - // package name is not the final component of the import path. - // The reflect package doesn't expose the package name, though. - Name: path.Base({{printf "%q" .ImportPath}}), - } - - for _, it := range its { - intf, err := model.InterfaceFromInterfaceType(it.typ) - if err != nil { - fmt.Fprintf(os.Stderr, "Reflection: %v\n", err) - os.Exit(1) - } - intf.Name = it.sym - pkg.Interfaces = append(pkg.Interfaces, intf) - } - - outfile := os.Stdout - if len(*output) != 0 { - var err error - outfile, err = os.Create(*output) - if err != nil { - fmt.Fprintf(os.Stderr, "failed to open output file %q", *output) - } - defer func() { - if err := outfile.Close(); err != nil { - fmt.Fprintf(os.Stderr, "failed to close output file %q", *output) - os.Exit(1) - } - }() - } - - if err := gob.NewEncoder(outfile).Encode(pkg); err != nil { - fmt.Fprintf(os.Stderr, "gob encode: %v\n", err) - os.Exit(1) - } -} -`)) diff --git a/vendor/golang.org/x/arch/LICENSE b/vendor/golang.org/x/arch/LICENSE index d29b3726..686d8a91 100644 --- a/vendor/golang.org/x/arch/LICENSE +++ b/vendor/golang.org/x/arch/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2015 The Go Authors. All rights reserved. +Copyright 2015 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/arch/x86/x86asm/gnu.go b/vendor/golang.org/x/arch/x86/x86asm/gnu.go index 75cff72b..8eba1fd0 100644 --- a/vendor/golang.org/x/arch/x86/x86asm/gnu.go +++ b/vendor/golang.org/x/arch/x86/x86asm/gnu.go @@ -10,7 +10,7 @@ import ( ) // GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. -// This general form is often called ``AT&T syntax'' as a reference to AT&T System V Unix. +// This general form is often called “AT&T syntax” as a reference to AT&T System V Unix. func GNUSyntax(inst Inst, pc uint64, symname SymLookup) string { // Rewrite instruction to mimic GNU peculiarities. // Note that inst has been passed by value and contains diff --git a/vendor/golang.org/x/arch/x86/x86asm/inst.go b/vendor/golang.org/x/arch/x86/x86asm/inst.go index 4632b506..e98f1a84 100644 --- a/vendor/golang.org/x/arch/x86/x86asm/inst.go +++ b/vendor/golang.org/x/arch/x86/x86asm/inst.go @@ -144,7 +144,7 @@ type Arg interface { // the interface value instead of requiring an allocation. // A Reg is a single register. -// The zero Reg value has no name but indicates ``no register.'' +// The zero Reg value has no name but indicates “no register.” type Reg uint8 const ( diff --git a/vendor/golang.org/x/crypto/LICENSE b/vendor/golang.org/x/crypto/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/crypto/LICENSE +++ b/vendor/golang.org/x/crypto/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s index 9ae8206c..f75162e0 100644 --- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s +++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s @@ -1,722 +1,4517 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2bAVX2_amd64_asm.go -out ../../blake2bAVX2_amd64.s -pkg blake2b. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 -DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 -DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 - -DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 - -#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 -#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 -#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e -#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 -#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 - -#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ - VPADDQ m0, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m1, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - VPERMQ_0x39_Y1_Y1; \ - VPERMQ_0x4E_Y2_Y2; \ - VPERMQ_0x93_Y3_Y3; \ - VPADDQ m2, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFD $-79, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPSHUFB c40, Y1, Y1; \ - VPADDQ m3, Y0, Y0; \ - VPADDQ Y1, Y0, Y0; \ - VPXOR Y0, Y3, Y3; \ - VPSHUFB c48, Y3, Y3; \ - VPADDQ Y3, Y2, Y2; \ - VPXOR Y2, Y1, Y1; \ - VPADDQ Y1, Y1, t; \ - VPSRLQ $63, Y1, Y1; \ - VPXOR t, Y1, Y1; \ - VPERMQ_0x39_Y3_Y3; \ - VPERMQ_0x4E_Y2_Y2; \ - VPERMQ_0x93_Y1_Y1 - -#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E -#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26 -#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E -#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36 -#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E - -#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n -#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n -#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n -#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n -#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n - -#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01 -#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01 -#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01 -#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01 -#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01 - -#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01 -#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01 - -#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8 -#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01 - -// load msg: Y12 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \ - VMOVQ_SI_X12(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X12(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y12, Y12 - -// load msg: Y13 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \ - VMOVQ_SI_X13(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X13(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y13, Y13 - -// load msg: Y14 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \ - VMOVQ_SI_X14(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X14(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y14, Y14 - -// load msg: Y15 = (i0, i1, i2, i3) -// i0, i1, i2, i3 must not be 0 -#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \ - VMOVQ_SI_X15(i0*8); \ - VMOVQ_SI_X11(i2*8); \ - VPINSRQ_1_SI_X15(i1*8); \ - VPINSRQ_1_SI_X11(i3*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \ - VMOVQ_SI_X12_0; \ - VMOVQ_SI_X11(4*8); \ - VPINSRQ_1_SI_X12(2*8); \ - VPINSRQ_1_SI_X11(6*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \ - LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \ - LOAD_MSG_AVX2_Y15(9, 11, 13, 15) - -#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \ - LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \ - LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \ - VMOVQ_SI_X11(11*8); \ - VPSHUFD $0x4E, 0*8(SI), X14; \ - VPINSRQ_1_SI_X11(5*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - LOAD_MSG_AVX2_Y15(12, 2, 7, 3) - -#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \ - VMOVQ_SI_X11(5*8); \ - VMOVDQU 11*8(SI), X12; \ - VPINSRQ_1_SI_X11(15*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - VMOVQ_SI_X13(8*8); \ - VMOVQ_SI_X11(2*8); \ - VPINSRQ_1_SI_X13_0; \ - VPINSRQ_1_SI_X11(13*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \ - LOAD_MSG_AVX2_Y15(14, 6, 1, 4) - -#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \ - LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \ - LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \ - LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \ - VMOVQ_SI_X15(6*8); \ - VMOVQ_SI_X11_0; \ - VPINSRQ_1_SI_X15(10*8); \ - VPINSRQ_1_SI_X11(8*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \ - LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \ - VMOVQ_SI_X13_0; \ - VMOVQ_SI_X11(4*8); \ - VPINSRQ_1_SI_X13(7*8); \ - VPINSRQ_1_SI_X11(15*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \ - LOAD_MSG_AVX2_Y15(1, 12, 8, 13) - -#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X11_0; \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X11(8*8); \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \ - LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \ - LOAD_MSG_AVX2_Y15(13, 5, 14, 9) - -#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \ - LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \ - LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \ - VMOVQ_SI_X14_0; \ - VPSHUFD $0x4E, 8*8(SI), X11; \ - VPINSRQ_1_SI_X14(6*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - LOAD_MSG_AVX2_Y15(7, 3, 2, 11) - -#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \ - LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \ - LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \ - LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \ - VMOVQ_SI_X15_0; \ - VMOVQ_SI_X11(6*8); \ - VPINSRQ_1_SI_X15(4*8); \ - VPINSRQ_1_SI_X11(10*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \ - VMOVQ_SI_X12(6*8); \ - VMOVQ_SI_X11(11*8); \ - VPINSRQ_1_SI_X12(14*8); \ - VPINSRQ_1_SI_X11_0; \ - VINSERTI128 $1, X11, Y12, Y12; \ - LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \ - VMOVQ_SI_X11(1*8); \ - VMOVDQU 12*8(SI), X14; \ - VPINSRQ_1_SI_X11(10*8); \ - VINSERTI128 $1, X11, Y14, Y14; \ - VMOVQ_SI_X15(2*8); \ - VMOVDQU 4*8(SI), X11; \ - VPINSRQ_1_SI_X15(7*8); \ - VINSERTI128 $1, X11, Y15, Y15 - -#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \ - LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \ - VMOVQ_SI_X13(2*8); \ - VPSHUFD $0x4E, 5*8(SI), X11; \ - VPINSRQ_1_SI_X13(4*8); \ - VINSERTI128 $1, X11, Y13, Y13; \ - LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \ - VMOVQ_SI_X15(11*8); \ - VMOVQ_SI_X11(12*8); \ - VPINSRQ_1_SI_X15(14*8); \ - VPINSRQ_1_SI_X11_0; \ - VINSERTI128 $1, X11, Y15, Y15 - // func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, DX - ADDQ $31, DX - ANDQ $~31, DX - - MOVQ CX, 16(DX) - XORQ CX, CX - MOVQ CX, 24(DX) - - VMOVDQU ·AVX2_c40<>(SB), Y4 - VMOVDQU ·AVX2_c48<>(SB), Y5 - - VMOVDQU 0(AX), Y8 +// Requires: AVX, AVX2 +TEXT ·hashBlocksAVX2(SB), NOSPLIT, $320-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, DX + ADDQ $+31, DX + ANDQ $-32, DX + MOVQ CX, 16(DX) + XORQ CX, CX + MOVQ CX, 24(DX) + VMOVDQU ·AVX2_c40<>+0(SB), Y4 + VMOVDQU ·AVX2_c48<>+0(SB), Y5 + VMOVDQU (AX), Y8 VMOVDQU 32(AX), Y9 - VMOVDQU ·AVX2_iv0<>(SB), Y6 - VMOVDQU ·AVX2_iv1<>(SB), Y7 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 - MOVQ R9, 8(DX) + VMOVDQU ·AVX2_iv0<>+0(SB), Y6 + VMOVDQU ·AVX2_iv1<>+0(SB), Y7 + MOVQ (BX), R8 + MOVQ 8(BX), R9 + MOVQ R9, 8(DX) loop: - ADDQ $128, R8 - MOVQ R8, 0(DX) - CMPQ R8, $128 + ADDQ $0x80, R8 + MOVQ R8, (DX) + CMPQ R8, $0x80 JGE noinc INCQ R9 MOVQ R9, 8(DX) noinc: - VMOVDQA Y8, Y0 - VMOVDQA Y9, Y1 - VMOVDQA Y6, Y2 - VPXOR 0(DX), Y7, Y3 - - LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() - VMOVDQA Y12, 32(DX) - VMOVDQA Y13, 64(DX) - VMOVDQA Y14, 96(DX) - VMOVDQA Y15, 128(DX) - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() - VMOVDQA Y12, 160(DX) - VMOVDQA Y13, 192(DX) - VMOVDQA Y14, 224(DX) - VMOVDQA Y15, 256(DX) - - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() - ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) - - ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5) - ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5) - - VPXOR Y0, Y8, Y8 - VPXOR Y1, Y9, Y9 - VPXOR Y2, Y8, Y8 - VPXOR Y3, Y9, Y9 - - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop - - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - - VMOVDQU Y8, 0(AX) - VMOVDQU Y9, 32(AX) + VMOVDQA Y8, Y0 + VMOVDQA Y9, Y1 + VMOVDQA Y6, Y2 + VPXOR (DX), Y7, Y3 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x26 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x28 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x38 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x70 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VMOVDQA Y12, 32(DX) + VMOVDQA Y13, 64(DX) + VMOVDQA Y14, 96(DX) + VMOVDQA Y15, 128(DX) + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x48 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + VPSHUFD $0x4e, (SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x28 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VMOVDQA Y12, 160(DX) + VMOVDQA Y13, 192(DX) + VMOVDQA Y14, 224(DX) + VMOVDQA Y15, 256(DX) + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x28 + VMOVDQU 88(SI), X12 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x2e + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x58 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x70 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x1e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x2e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x78 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x60 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x1e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x08 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x36 + VPSHUFD $0x4e, 64(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x30 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x58 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x18 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x48 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x10 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x3e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x1e + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x18 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x40 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x08 + VMOVDQU 96(SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x50 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x10 + VMOVDQU 32(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x38 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x08 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y12, Y12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x10 + VPSHUFD $0x4e, 40(SI), X11 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x20 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y13, Y13 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x18 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x5e + BYTE $0x68 + BYTE $0x01 + VINSERTI128 $0x01, X11, Y14, Y14 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x5e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0xa1 + BYTE $0x22 + BYTE $0x1e + BYTE $0x01 + VINSERTI128 $0x01, X11, Y15, Y15 + VPADDQ Y12, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y13, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ Y14, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ Y15, Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPADDQ 32(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 64(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ 96(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 128(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPADDQ 160(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 192(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x93 + VPADDQ 224(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFD $-79, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPSHUFB Y4, Y1, Y1 + VPADDQ 256(DX), Y0, Y0 + VPADDQ Y1, Y0, Y0 + VPXOR Y0, Y3, Y3 + VPSHUFB Y5, Y3, Y3 + VPADDQ Y3, Y2, Y2 + VPXOR Y2, Y1, Y1 + VPADDQ Y1, Y1, Y10 + VPSRLQ $0x3f, Y1, Y1 + VPXOR Y10, Y1, Y1 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xdb + BYTE $0x39 + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xd2 + BYTE $0x4e + BYTE $0xc4 + BYTE $0xe3 + BYTE $0xfd + BYTE $0x00 + BYTE $0xc9 + BYTE $0x93 + VPXOR Y0, Y8, Y8 + VPXOR Y1, Y9, Y9 + VPXOR Y2, Y8, Y8 + VPXOR Y3, Y9, Y9 + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + MOVQ R8, (BX) + MOVQ R9, 8(BX) + VMOVDQU Y8, (AX) + VMOVDQU Y9, 32(AX) VZEROUPPER - RET -#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA -#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB -#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF -#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD -#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE - -#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7 -#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF -#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7 -#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF -#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7 -#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7 -#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF -#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF - -#define SHUFFLE_AVX() \ - VMOVDQA X6, X13; \ - VMOVDQA X2, X14; \ - VMOVDQA X4, X6; \ - VPUNPCKLQDQ_X13_X13_X15; \ - VMOVDQA X5, X4; \ - VMOVDQA X6, X5; \ - VPUNPCKHQDQ_X15_X7_X6; \ - VPUNPCKLQDQ_X7_X7_X15; \ - VPUNPCKHQDQ_X15_X13_X7; \ - VPUNPCKLQDQ_X3_X3_X15; \ - VPUNPCKHQDQ_X15_X2_X2; \ - VPUNPCKLQDQ_X14_X14_X15; \ - VPUNPCKHQDQ_X15_X3_X3; \ - -#define SHUFFLE_AVX_INV() \ - VMOVDQA X2, X13; \ - VMOVDQA X4, X14; \ - VPUNPCKLQDQ_X2_X2_X15; \ - VMOVDQA X5, X4; \ - VPUNPCKHQDQ_X15_X3_X2; \ - VMOVDQA X14, X5; \ - VPUNPCKLQDQ_X3_X3_X15; \ - VMOVDQA X6, X14; \ - VPUNPCKHQDQ_X15_X13_X3; \ - VPUNPCKLQDQ_X7_X7_X15; \ - VPUNPCKHQDQ_X15_X6_X6; \ - VPUNPCKLQDQ_X14_X14_X15; \ - VPUNPCKHQDQ_X15_X7_X7; \ - -#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - VPADDQ m0, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m1, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFD $-79, v6, v6; \ - VPSHUFD $-79, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPSHUFB c40, v2, v2; \ - VPSHUFB c40, v3, v3; \ - VPADDQ m2, v0, v0; \ - VPADDQ v2, v0, v0; \ - VPADDQ m3, v1, v1; \ - VPADDQ v3, v1, v1; \ - VPXOR v0, v6, v6; \ - VPXOR v1, v7, v7; \ - VPSHUFB c48, v6, v6; \ - VPSHUFB c48, v7, v7; \ - VPADDQ v6, v4, v4; \ - VPADDQ v7, v5, v5; \ - VPXOR v4, v2, v2; \ - VPXOR v5, v3, v3; \ - VPADDQ v2, v2, t0; \ - VPSRLQ $63, v2, v2; \ - VPXOR t0, v2, v2; \ - VPADDQ v3, v3, t0; \ - VPSRLQ $63, v3, v3; \ - VPXOR t0, v3, v3 - -// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7) -// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0 -#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \ - VMOVQ_SI_X12(i0*8); \ - VMOVQ_SI_X13(i2*8); \ - VMOVQ_SI_X14(i4*8); \ - VMOVQ_SI_X15(i6*8); \ - VPINSRQ_1_SI_X12(i1*8); \ - VPINSRQ_1_SI_X13(i3*8); \ - VPINSRQ_1_SI_X14(i5*8); \ - VPINSRQ_1_SI_X15(i7*8) - -// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7) -#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \ - VMOVQ_SI_X12_0; \ - VMOVQ_SI_X13(4*8); \ - VMOVQ_SI_X14(1*8); \ - VMOVQ_SI_X15(5*8); \ - VPINSRQ_1_SI_X12(2*8); \ - VPINSRQ_1_SI_X13(6*8); \ - VPINSRQ_1_SI_X14(3*8); \ - VPINSRQ_1_SI_X15(7*8) - -// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3) -#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \ - VPSHUFD $0x4E, 0*8(SI), X12; \ - VMOVQ_SI_X13(11*8); \ - VMOVQ_SI_X14(12*8); \ - VMOVQ_SI_X15(7*8); \ - VPINSRQ_1_SI_X13(5*8); \ - VPINSRQ_1_SI_X14(2*8); \ - VPINSRQ_1_SI_X15(3*8) - -// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13) -#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \ - VMOVDQU 11*8(SI), X12; \ - VMOVQ_SI_X13(5*8); \ - VMOVQ_SI_X14(8*8); \ - VMOVQ_SI_X15(2*8); \ - VPINSRQ_1_SI_X13(15*8); \ - VPINSRQ_1_SI_X14_0; \ - VPINSRQ_1_SI_X15(13*8) - -// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8) -#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X13(4*8); \ - VMOVQ_SI_X14(6*8); \ - VMOVQ_SI_X15_0; \ - VPINSRQ_1_SI_X12(5*8); \ - VPINSRQ_1_SI_X13(15*8); \ - VPINSRQ_1_SI_X14(10*8); \ - VPINSRQ_1_SI_X15(8*8) +DATA ·AVX2_c40<>+0(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +DATA ·AVX2_c40<>+16(SB)/8, $0x0201000706050403 +DATA ·AVX2_c40<>+24(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX2_c40<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15) -#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \ - VMOVQ_SI_X12(9*8); \ - VMOVQ_SI_X13(2*8); \ - VMOVQ_SI_X14_0; \ - VMOVQ_SI_X15(4*8); \ - VPINSRQ_1_SI_X12(5*8); \ - VPINSRQ_1_SI_X13(10*8); \ - VPINSRQ_1_SI_X14(7*8); \ - VPINSRQ_1_SI_X15(15*8) +DATA ·AVX2_c48<>+0(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +DATA ·AVX2_c48<>+16(SB)/8, $0x0100070605040302 +DATA ·AVX2_c48<>+24(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX2_c48<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3) -#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \ - VMOVQ_SI_X12(2*8); \ - VMOVQ_SI_X13_0; \ - VMOVQ_SI_X14(12*8); \ - VMOVQ_SI_X15(11*8); \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X13(8*8); \ - VPINSRQ_1_SI_X14(10*8); \ - VPINSRQ_1_SI_X15(3*8) +DATA ·AVX2_iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX2_iv0<>+8(SB)/8, $0xbb67ae8584caa73b +DATA ·AVX2_iv0<>+16(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX2_iv0<>+24(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX2_iv0<>(SB), RODATA|NOPTR, $32 -// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11) -#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \ - MOVQ 0*8(SI), X12; \ - VPSHUFD $0x4E, 8*8(SI), X13; \ - MOVQ 7*8(SI), X14; \ - MOVQ 2*8(SI), X15; \ - VPINSRQ_1_SI_X12(6*8); \ - VPINSRQ_1_SI_X14(3*8); \ - VPINSRQ_1_SI_X15(11*8) - -// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8) -#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \ - MOVQ 6*8(SI), X12; \ - MOVQ 11*8(SI), X13; \ - MOVQ 15*8(SI), X14; \ - MOVQ 3*8(SI), X15; \ - VPINSRQ_1_SI_X12(14*8); \ - VPINSRQ_1_SI_X13_0; \ - VPINSRQ_1_SI_X14(9*8); \ - VPINSRQ_1_SI_X15(8*8) - -// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10) -#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \ - MOVQ 5*8(SI), X12; \ - MOVQ 8*8(SI), X13; \ - MOVQ 0*8(SI), X14; \ - MOVQ 6*8(SI), X15; \ - VPINSRQ_1_SI_X12(15*8); \ - VPINSRQ_1_SI_X13(2*8); \ - VPINSRQ_1_SI_X14(4*8); \ - VPINSRQ_1_SI_X15(10*8) - -// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5) -#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \ - VMOVDQU 12*8(SI), X12; \ - MOVQ 1*8(SI), X13; \ - MOVQ 2*8(SI), X14; \ - VPINSRQ_1_SI_X13(10*8); \ - VPINSRQ_1_SI_X14(7*8); \ - VMOVDQU 4*8(SI), X15 - -// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0) -#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \ - MOVQ 15*8(SI), X12; \ - MOVQ 3*8(SI), X13; \ - MOVQ 11*8(SI), X14; \ - MOVQ 12*8(SI), X15; \ - VPINSRQ_1_SI_X12(9*8); \ - VPINSRQ_1_SI_X13(13*8); \ - VPINSRQ_1_SI_X14(14*8); \ - VPINSRQ_1_SI_X15_0 +DATA ·AVX2_iv1<>+0(SB)/8, $0x510e527fade682d1 +DATA ·AVX2_iv1<>+8(SB)/8, $0x9b05688c2b3e6c1f +DATA ·AVX2_iv1<>+16(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX2_iv1<>+24(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX2_iv1<>(SB), RODATA|NOPTR, $32 // func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, R10 - ADDQ $15, R10 - ANDQ $~15, R10 - - VMOVDQU ·AVX_c40<>(SB), X0 - VMOVDQU ·AVX_c48<>(SB), X1 +// Requires: AVX, SSE2 +TEXT ·hashBlocksAVX(SB), NOSPLIT, $288-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, R10 + ADDQ $0x0f, R10 + ANDQ $-16, R10 + VMOVDQU ·AVX_c40<>+0(SB), X0 + VMOVDQU ·AVX_c48<>+0(SB), X1 VMOVDQA X0, X8 VMOVDQA X1, X9 - - VMOVDQU ·AVX_iv3<>(SB), X0 - VMOVDQA X0, 0(R10) - XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0) - - VMOVDQU 0(AX), X10 + VMOVDQU ·AVX_iv3<>+0(SB), X0 + VMOVDQA X0, (R10) + XORQ CX, (R10) + VMOVDQU (AX), X10 VMOVDQU 16(AX), X11 VMOVDQU 32(AX), X2 VMOVDQU 48(AX), X3 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 + MOVQ (BX), R8 + MOVQ 8(BX), R9 loop: - ADDQ $128, R8 - CMPQ R8, $128 + ADDQ $0x80, R8 + CMPQ R8, $0x80 JGE noinc INCQ R9 noinc: - VMOVQ_R8_X15 - VPINSRQ_1_R9_X15 - + BYTE $0xc4 + BYTE $0x41 + BYTE $0xf9 + BYTE $0x6e + BYTE $0xf8 + BYTE $0xc4 + BYTE $0x43 + BYTE $0x81 + BYTE $0x22 + BYTE $0xf9 + BYTE $0x01 VMOVDQA X10, X0 VMOVDQA X11, X1 - VMOVDQU ·AVX_iv0<>(SB), X4 - VMOVDQU ·AVX_iv1<>(SB), X5 - VMOVDQU ·AVX_iv2<>(SB), X6 - + VMOVDQU ·AVX_iv0<>+0(SB), X4 + VMOVDQU ·AVX_iv1<>+0(SB), X5 + VMOVDQU ·AVX_iv2<>+0(SB), X6 VPXOR X15, X6, X6 - VMOVDQA 0(R10), X7 - - LOAD_MSG_AVX_0_2_4_6_1_3_5_7() + VMOVDQA (R10), X7 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x26 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x28 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x38 + BYTE $0x01 VMOVDQA X12, 16(R10) VMOVDQA X13, 32(R10) VMOVDQA X14, 48(R10) VMOVDQA X15, 64(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15) + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x78 + BYTE $0x01 VMOVDQA X12, 80(R10) VMOVDQA X13, 96(R10) VMOVDQA X14, 112(R10) VMOVDQA X15, 128(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6) + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x78 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x68 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x30 + BYTE $0x01 VMOVDQA X12, 144(R10) VMOVDQA X13, 160(R10) VMOVDQA X14, 176(R10) VMOVDQA X15, 192(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_1_0_11_5_12_2_7_3() + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPSHUFD $0x4e, (SI), X12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x38 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 VMOVDQA X12, 208(R10) VMOVDQA X13, 224(R10) VMOVDQA X14, 240(R10) VMOVDQA X15, 256(R10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_11_12_5_15_8_0_2_13() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_2_5_4_15_6_10_0_8() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_9_5_2_10_0_7_4_15() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_2_6_0_8_12_10_11_3() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_0_6_9_8_7_3_2_11() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_5_15_8_2_0_4_6_10() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX_6_14_11_0_15_9_3_8() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_12_13_1_10_2_7_4_5() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX() - LOAD_MSG_AVX_15_9_3_13_11_14_12_0() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9) - SHUFFLE_AVX_INV() - - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9) - SHUFFLE_AVX() - HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9) - SHUFFLE_AVX_INV() - + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VMOVDQU 88(SI), X12 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x40 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x10 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x36 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x68 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x20 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x60 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x70 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x3e + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x40 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x48 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x36 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x20 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x78 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x30 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x08 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x40 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x58 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x60 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x68 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x2e + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x58 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x18 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x20 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x78 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x70 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x28 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x48 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x70 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x28 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x68 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ (SI), X12 + VPSHUFD $0x4e, 64(SI), X13 + MOVQ 56(SI), X14 + MOVQ 16(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x30 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x58 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x68 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x60 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x58 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x08 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x38 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x18 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x48 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ 40(SI), X12 + MOVQ 64(SI), X13 + MOVQ (SI), X14 + MOVQ 48(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x78 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x10 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x50 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + MOVQ 48(SI), X12 + MOVQ 88(SI), X13 + MOVQ 120(SI), X14 + MOVQ 24(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x2e + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x40 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VMOVDQU 96(SI), X12 + MOVQ 8(SI), X13 + MOVQ 16(SI), X14 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x50 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x38 + BYTE $0x01 + VMOVDQU 32(SI), X15 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x66 + BYTE $0x50 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x6e + BYTE $0x38 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x76 + BYTE $0x10 + BYTE $0xc5 + BYTE $0x7a + BYTE $0x7e + BYTE $0x7e + BYTE $0x30 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x40 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x08 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x20 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x7e + BYTE $0x28 + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + MOVQ 120(SI), X12 + MOVQ 24(SI), X13 + MOVQ 88(SI), X14 + MOVQ 96(SI), X15 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x99 + BYTE $0x22 + BYTE $0x66 + BYTE $0x48 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x91 + BYTE $0x22 + BYTE $0x6e + BYTE $0x68 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x89 + BYTE $0x22 + BYTE $0x76 + BYTE $0x70 + BYTE $0x01 + BYTE $0xc4 + BYTE $0x63 + BYTE $0x81 + BYTE $0x22 + BYTE $0x3e + BYTE $0x01 + VPADDQ X12, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X13, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ X14, X0, X0 + VPADDQ X2, X0, X0 + VPADDQ X15, X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VPADDQ 16(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 32(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 48(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 64(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPADDQ 80(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 96(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 112(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 128(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff + VPADDQ 144(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 160(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 176(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 192(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X6, X13 + VMOVDQA X2, X14 + VMOVDQA X4, X6 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x11 + BYTE $0x6c + BYTE $0xfd + VMOVDQA X5, X4 + VMOVDQA X6, X5 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xff + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x69 + BYTE $0x6d + BYTE $0xd7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xdf + VPADDQ 208(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 224(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFD $-79, X6, X6 + VPSHUFD $-79, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPSHUFB X8, X2, X2 + VPSHUFB X8, X3, X3 + VPADDQ 240(R10), X0, X0 + VPADDQ X2, X0, X0 + VPADDQ 256(R10), X1, X1 + VPADDQ X3, X1, X1 + VPXOR X0, X6, X6 + VPXOR X1, X7, X7 + VPSHUFB X9, X6, X6 + VPSHUFB X9, X7, X7 + VPADDQ X6, X4, X4 + VPADDQ X7, X5, X5 + VPXOR X4, X2, X2 + VPXOR X5, X3, X3 + VPADDQ X2, X2, X15 + VPSRLQ $0x3f, X2, X2 + VPXOR X15, X2, X2 + VPADDQ X3, X3, X15 + VPSRLQ $0x3f, X3, X3 + VPXOR X15, X3, X3 + VMOVDQA X2, X13 + VMOVDQA X4, X14 + BYTE $0xc5 + BYTE $0x69 + BYTE $0x6c + BYTE $0xfa + VMOVDQA X5, X4 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x61 + BYTE $0x6d + BYTE $0xd7 + VMOVDQA X14, X5 + BYTE $0xc5 + BYTE $0x61 + BYTE $0x6c + BYTE $0xfb + VMOVDQA X6, X14 + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x11 + BYTE $0x6d + BYTE $0xdf + BYTE $0xc5 + BYTE $0x41 + BYTE $0x6c + BYTE $0xff + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x49 + BYTE $0x6d + BYTE $0xf7 + BYTE $0xc4 + BYTE $0x41 + BYTE $0x09 + BYTE $0x6c + BYTE $0xfe + BYTE $0xc4 + BYTE $0xc1 + BYTE $0x41 + BYTE $0x6d + BYTE $0xff VMOVDQU 32(AX), X14 VMOVDQU 48(AX), X15 VPXOR X0, X10, X10 @@ -729,16 +4524,36 @@ noinc: VPXOR X7, X15, X3 VMOVDQU X2, 32(AX) VMOVDQU X3, 48(AX) + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + VMOVDQU X10, (AX) + VMOVDQU X11, 16(AX) + MOVQ R8, (BX) + MOVQ R9, 8(BX) + VZEROUPPER + RET - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop +DATA ·AVX_c40<>+0(SB)/8, $0x0201000706050403 +DATA ·AVX_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·AVX_c40<>(SB), RODATA|NOPTR, $16 - VMOVDQU X10, 0(AX) - VMOVDQU X11, 16(AX) +DATA ·AVX_c48<>+0(SB)/8, $0x0100070605040302 +DATA ·AVX_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·AVX_c48<>(SB), RODATA|NOPTR, $16 - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) - VZEROUPPER +DATA ·AVX_iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b +DATA ·AVX_iv3<>+8(SB)/8, $0x5be0cd19137e2179 +GLOBL ·AVX_iv3<>(SB), RODATA|NOPTR, $16 - RET +DATA ·AVX_iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·AVX_iv0<>+8(SB)/8, $0xbb67ae8584caa73b +GLOBL ·AVX_iv0<>(SB), RODATA|NOPTR, $16 + +DATA ·AVX_iv1<>+0(SB)/8, $0x3c6ef372fe94f82b +DATA ·AVX_iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·AVX_iv1<>(SB), RODATA|NOPTR, $16 + +DATA ·AVX_iv2<>+0(SB)/8, $0x510e527fade682d1 +DATA ·AVX_iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·AVX_iv2<>(SB), RODATA|NOPTR, $16 diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s index adfac00c..9a0ce212 100644 --- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s +++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s @@ -1,278 +1,1441 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2b_amd64_asm.go -out ../../blake2b_amd64.s -pkg blake2b. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 -DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b -GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b -DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 -GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 -DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f -GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 - -DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b -DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 -GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 - -DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 -DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b -GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 - -DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 -DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a -GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 - -#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v6, t1; \ - PUNPCKLQDQ v6, t2; \ - PUNPCKHQDQ v7, v6; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ v7, t2; \ - MOVO t1, v7; \ - MOVO v2, t1; \ - PUNPCKHQDQ t2, v7; \ - PUNPCKLQDQ v3, t2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v3 - -#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ - MOVO v4, t1; \ - MOVO v5, v4; \ - MOVO t1, v5; \ - MOVO v2, t1; \ - PUNPCKLQDQ v2, t2; \ - PUNPCKHQDQ v3, v2; \ - PUNPCKHQDQ t2, v2; \ - PUNPCKLQDQ v3, t2; \ - MOVO t1, v3; \ - MOVO v6, t1; \ - PUNPCKHQDQ t2, v3; \ - PUNPCKLQDQ v7, t2; \ - PUNPCKHQDQ t2, v6; \ - PUNPCKLQDQ t1, t2; \ - PUNPCKHQDQ t2, v7 - -#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ - PADDQ m0, v0; \ - PADDQ m1, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFD $0xB1, v6, v6; \ - PSHUFD $0xB1, v7, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - PSHUFB c40, v2; \ - PSHUFB c40, v3; \ - PADDQ m2, v0; \ - PADDQ m3, v1; \ - PADDQ v2, v0; \ - PADDQ v3, v1; \ - PXOR v0, v6; \ - PXOR v1, v7; \ - PSHUFB c48, v6; \ - PSHUFB c48, v7; \ - PADDQ v6, v4; \ - PADDQ v7, v5; \ - PXOR v4, v2; \ - PXOR v5, v3; \ - MOVOU v2, t0; \ - PADDQ v2, t0; \ - PSRLQ $63, v2; \ - PXOR t0, v2; \ - MOVOU v3, t0; \ - PADDQ v3, t0; \ - PSRLQ $63, v3; \ - PXOR t0, v3 - -#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ - MOVQ i0*8(src), m0; \ - PINSRQ $1, i1*8(src), m0; \ - MOVQ i2*8(src), m1; \ - PINSRQ $1, i3*8(src), m1; \ - MOVQ i4*8(src), m2; \ - PINSRQ $1, i5*8(src), m2; \ - MOVQ i6*8(src), m3; \ - PINSRQ $1, i7*8(src), m3 - // func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) -TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment - MOVQ h+0(FP), AX - MOVQ c+8(FP), BX - MOVQ flag+16(FP), CX - MOVQ blocks_base+24(FP), SI - MOVQ blocks_len+32(FP), DI - - MOVQ SP, R10 - ADDQ $15, R10 - ANDQ $~15, R10 - - MOVOU ·iv3<>(SB), X0 - MOVO X0, 0(R10) - XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0) - - MOVOU ·c40<>(SB), X13 - MOVOU ·c48<>(SB), X14 - - MOVOU 0(AX), X12 +// Requires: SSE2, SSE4.1, SSSE3 +TEXT ·hashBlocksSSE4(SB), NOSPLIT, $288-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVQ flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DI + MOVQ SP, R10 + ADDQ $0x0f, R10 + ANDQ $-16, R10 + MOVOU ·iv3<>+0(SB), X0 + MOVO X0, (R10) + XORQ CX, (R10) + MOVOU ·c40<>+0(SB), X13 + MOVOU ·c48<>+0(SB), X14 + MOVOU (AX), X12 MOVOU 16(AX), X15 - - MOVQ 0(BX), R8 - MOVQ 8(BX), R9 + MOVQ (BX), R8 + MOVQ 8(BX), R9 loop: - ADDQ $128, R8 - CMPQ R8, $128 + ADDQ $0x80, R8 + CMPQ R8, $0x80 JGE noinc INCQ R9 noinc: - MOVQ R8, X8 - PINSRQ $1, R9, X8 - - MOVO X12, X0 - MOVO X15, X1 - MOVOU 32(AX), X2 - MOVOU 48(AX), X3 - MOVOU ·iv0<>(SB), X4 - MOVOU ·iv1<>(SB), X5 - MOVOU ·iv2<>(SB), X6 - - PXOR X8, X6 - MOVO 0(R10), X7 - - LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) - MOVO X8, 16(R10) - MOVO X9, 32(R10) - MOVO X10, 48(R10) - MOVO X11, 64(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) - MOVO X8, 80(R10) - MOVO X9, 96(R10) - MOVO X10, 112(R10) - MOVO X11, 128(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) - MOVO X8, 144(R10) - MOVO X9, 160(R10) - MOVO X10, 176(R10) - MOVO X11, 192(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) - MOVO X8, 208(R10) - MOVO X9, 224(R10) - MOVO X10, 240(R10) - MOVO X11, 256(R10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) - - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) + MOVQ R8, X8 + PINSRQ $0x01, R9, X8 + MOVO X12, X0 + MOVO X15, X1 + MOVOU 32(AX), X2 + MOVOU 48(AX), X3 + MOVOU ·iv0<>+0(SB), X4 + MOVOU ·iv1<>+0(SB), X5 + MOVOU ·iv2<>+0(SB), X6 + PXOR X8, X6 + MOVO (R10), X7 + MOVQ (SI), X8 + PINSRQ $0x01, 16(SI), X8 + MOVQ 32(SI), X9 + PINSRQ $0x01, 48(SI), X9 + MOVQ 8(SI), X10 + PINSRQ $0x01, 24(SI), X10 + MOVQ 40(SI), X11 + PINSRQ $0x01, 56(SI), X11 + MOVO X8, 16(R10) + MOVO X9, 32(R10) + MOVO X10, 48(R10) + MOVO X11, 64(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 64(SI), X8 + PINSRQ $0x01, 80(SI), X8 + MOVQ 96(SI), X9 + PINSRQ $0x01, 112(SI), X9 + MOVQ 72(SI), X10 + PINSRQ $0x01, 88(SI), X10 + MOVQ 104(SI), X11 + PINSRQ $0x01, 120(SI), X11 + MOVO X8, 80(R10) + MOVO X9, 96(R10) + MOVO X10, 112(R10) + MOVO X11, 128(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 112(SI), X8 + PINSRQ $0x01, 32(SI), X8 + MOVQ 72(SI), X9 + PINSRQ $0x01, 104(SI), X9 + MOVQ 80(SI), X10 + PINSRQ $0x01, 64(SI), X10 + MOVQ 120(SI), X11 + PINSRQ $0x01, 48(SI), X11 + MOVO X8, 144(R10) + MOVO X9, 160(R10) + MOVO X10, 176(R10) + MOVO X11, 192(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 8(SI), X8 + PINSRQ $0x01, (SI), X8 + MOVQ 88(SI), X9 + PINSRQ $0x01, 40(SI), X9 + MOVQ 96(SI), X10 + PINSRQ $0x01, 16(SI), X10 + MOVQ 56(SI), X11 + PINSRQ $0x01, 24(SI), X11 + MOVO X8, 208(R10) + MOVO X9, 224(R10) + MOVO X10, 240(R10) + MOVO X11, 256(R10) + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 88(SI), X8 + PINSRQ $0x01, 96(SI), X8 + MOVQ 40(SI), X9 + PINSRQ $0x01, 120(SI), X9 + MOVQ 64(SI), X10 + PINSRQ $0x01, (SI), X10 + MOVQ 16(SI), X11 + PINSRQ $0x01, 104(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 80(SI), X8 + PINSRQ $0x01, 24(SI), X8 + MOVQ 56(SI), X9 + PINSRQ $0x01, 72(SI), X9 + MOVQ 112(SI), X10 + PINSRQ $0x01, 48(SI), X10 + MOVQ 8(SI), X11 + PINSRQ $0x01, 32(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 56(SI), X8 + PINSRQ $0x01, 24(SI), X8 + MOVQ 104(SI), X9 + PINSRQ $0x01, 88(SI), X9 + MOVQ 72(SI), X10 + PINSRQ $0x01, 8(SI), X10 + MOVQ 96(SI), X11 + PINSRQ $0x01, 112(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 16(SI), X8 + PINSRQ $0x01, 40(SI), X8 + MOVQ 32(SI), X9 + PINSRQ $0x01, 120(SI), X9 + MOVQ 48(SI), X10 + PINSRQ $0x01, 80(SI), X10 + MOVQ (SI), X11 + PINSRQ $0x01, 64(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 72(SI), X8 + PINSRQ $0x01, 40(SI), X8 + MOVQ 16(SI), X9 + PINSRQ $0x01, 80(SI), X9 + MOVQ (SI), X10 + PINSRQ $0x01, 56(SI), X10 + MOVQ 32(SI), X11 + PINSRQ $0x01, 120(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 112(SI), X8 + PINSRQ $0x01, 88(SI), X8 + MOVQ 48(SI), X9 + PINSRQ $0x01, 24(SI), X9 + MOVQ 8(SI), X10 + PINSRQ $0x01, 96(SI), X10 + MOVQ 64(SI), X11 + PINSRQ $0x01, 104(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 16(SI), X8 + PINSRQ $0x01, 48(SI), X8 + MOVQ (SI), X9 + PINSRQ $0x01, 64(SI), X9 + MOVQ 96(SI), X10 + PINSRQ $0x01, 80(SI), X10 + MOVQ 88(SI), X11 + PINSRQ $0x01, 24(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 32(SI), X8 + PINSRQ $0x01, 56(SI), X8 + MOVQ 120(SI), X9 + PINSRQ $0x01, 8(SI), X9 + MOVQ 104(SI), X10 + PINSRQ $0x01, 40(SI), X10 + MOVQ 112(SI), X11 + PINSRQ $0x01, 72(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 96(SI), X8 + PINSRQ $0x01, 8(SI), X8 + MOVQ 112(SI), X9 + PINSRQ $0x01, 32(SI), X9 + MOVQ 40(SI), X10 + PINSRQ $0x01, 120(SI), X10 + MOVQ 104(SI), X11 + PINSRQ $0x01, 80(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ (SI), X8 + PINSRQ $0x01, 48(SI), X8 + MOVQ 72(SI), X9 + PINSRQ $0x01, 64(SI), X9 + MOVQ 56(SI), X10 + PINSRQ $0x01, 24(SI), X10 + MOVQ 16(SI), X11 + PINSRQ $0x01, 88(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 104(SI), X8 + PINSRQ $0x01, 56(SI), X8 + MOVQ 96(SI), X9 + PINSRQ $0x01, 24(SI), X9 + MOVQ 88(SI), X10 + PINSRQ $0x01, 112(SI), X10 + MOVQ 8(SI), X11 + PINSRQ $0x01, 72(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 40(SI), X8 + PINSRQ $0x01, 120(SI), X8 + MOVQ 64(SI), X9 + PINSRQ $0x01, 16(SI), X9 + MOVQ (SI), X10 + PINSRQ $0x01, 32(SI), X10 + MOVQ 48(SI), X11 + PINSRQ $0x01, 80(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 48(SI), X8 + PINSRQ $0x01, 112(SI), X8 + MOVQ 88(SI), X9 + PINSRQ $0x01, (SI), X9 + MOVQ 120(SI), X10 + PINSRQ $0x01, 72(SI), X10 + MOVQ 24(SI), X11 + PINSRQ $0x01, 64(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 96(SI), X8 + PINSRQ $0x01, 104(SI), X8 + MOVQ 8(SI), X9 + PINSRQ $0x01, 80(SI), X9 + MOVQ 16(SI), X10 + PINSRQ $0x01, 56(SI), X10 + MOVQ 32(SI), X11 + PINSRQ $0x01, 40(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVQ 80(SI), X8 + PINSRQ $0x01, 64(SI), X8 + MOVQ 56(SI), X9 + PINSRQ $0x01, 8(SI), X9 + MOVQ 16(SI), X10 + PINSRQ $0x01, 32(SI), X10 + MOVQ 48(SI), X11 + PINSRQ $0x01, 40(SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + MOVQ 120(SI), X8 + PINSRQ $0x01, 72(SI), X8 + MOVQ 24(SI), X9 + PINSRQ $0x01, 104(SI), X9 + MOVQ 88(SI), X10 + PINSRQ $0x01, 112(SI), X10 + MOVQ 96(SI), X11 + PINSRQ $0x01, (SI), X11 + PADDQ X8, X0 + PADDQ X9, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ X10, X0 + PADDQ X11, X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + PADDQ 16(R10), X0 + PADDQ 32(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 48(R10), X0 + PADDQ 64(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + PADDQ 80(R10), X0 + PADDQ 96(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 112(R10), X0 + PADDQ 128(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + PADDQ 144(R10), X0 + PADDQ 160(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 176(R10), X0 + PADDQ 192(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X6, X8 + PUNPCKLQDQ X6, X9 + PUNPCKHQDQ X7, X6 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X7, X9 + MOVO X8, X7 + MOVO X2, X8 + PUNPCKHQDQ X9, X7 + PUNPCKLQDQ X3, X9 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X3 + PADDQ 208(R10), X0 + PADDQ 224(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFD $0xb1, X6, X6 + PSHUFD $0xb1, X7, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + PSHUFB X13, X2 + PSHUFB X13, X3 + PADDQ 240(R10), X0 + PADDQ 256(R10), X1 + PADDQ X2, X0 + PADDQ X3, X1 + PXOR X0, X6 + PXOR X1, X7 + PSHUFB X14, X6 + PSHUFB X14, X7 + PADDQ X6, X4 + PADDQ X7, X5 + PXOR X4, X2 + PXOR X5, X3 + MOVOU X2, X11 + PADDQ X2, X11 + PSRLQ $0x3f, X2 + PXOR X11, X2 + MOVOU X3, X11 + PADDQ X3, X11 + PSRLQ $0x3f, X3 + PXOR X11, X3 + MOVO X4, X8 + MOVO X5, X4 + MOVO X8, X5 + MOVO X2, X8 + PUNPCKLQDQ X2, X9 + PUNPCKHQDQ X3, X2 + PUNPCKHQDQ X9, X2 + PUNPCKLQDQ X3, X9 + MOVO X8, X3 + MOVO X6, X8 + PUNPCKHQDQ X9, X3 + PUNPCKLQDQ X7, X9 + PUNPCKHQDQ X9, X6 + PUNPCKLQDQ X8, X9 + PUNPCKHQDQ X9, X7 + MOVOU 32(AX), X10 + MOVOU 48(AX), X11 + PXOR X0, X12 + PXOR X1, X15 + PXOR X2, X10 + PXOR X3, X11 + PXOR X4, X12 + PXOR X5, X15 + PXOR X6, X10 + PXOR X7, X11 + MOVOU X10, 32(AX) + MOVOU X11, 48(AX) + LEAQ 128(SI), SI + SUBQ $0x80, DI + JNE loop + MOVOU X12, (AX) + MOVOU X15, 16(AX) + MOVQ R8, (BX) + MOVQ R9, 8(BX) + RET - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14) - SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) - HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14) - SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) +DATA ·iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b +DATA ·iv3<>+8(SB)/8, $0x5be0cd19137e2179 +GLOBL ·iv3<>(SB), RODATA|NOPTR, $16 - MOVOU 32(AX), X10 - MOVOU 48(AX), X11 - PXOR X0, X12 - PXOR X1, X15 - PXOR X2, X10 - PXOR X3, X11 - PXOR X4, X12 - PXOR X5, X15 - PXOR X6, X10 - PXOR X7, X11 - MOVOU X10, 32(AX) - MOVOU X11, 48(AX) +DATA ·c40<>+0(SB)/8, $0x0201000706050403 +DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b +GLOBL ·c40<>(SB), RODATA|NOPTR, $16 - LEAQ 128(SI), SI - SUBQ $128, DI - JNE loop +DATA ·c48<>+0(SB)/8, $0x0100070605040302 +DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a +GLOBL ·c48<>(SB), RODATA|NOPTR, $16 - MOVOU X12, 0(AX) - MOVOU X15, 16(AX) +DATA ·iv0<>+0(SB)/8, $0x6a09e667f3bcc908 +DATA ·iv0<>+8(SB)/8, $0xbb67ae8584caa73b +GLOBL ·iv0<>(SB), RODATA|NOPTR, $16 - MOVQ R8, 0(BX) - MOVQ R9, 8(BX) +DATA ·iv1<>+0(SB)/8, $0x3c6ef372fe94f82b +DATA ·iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1 +GLOBL ·iv1<>(SB), RODATA|NOPTR, $16 - RET +DATA ·iv2<>+0(SB)/8, $0x510e527fade682d1 +DATA ·iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f +GLOBL ·iv2<>(SB), RODATA|NOPTR, $16 diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s.go b/vendor/golang.org/x/crypto/blake2s/blake2s.go index e3f46aab..c25d07d4 100644 --- a/vendor/golang.org/x/crypto/blake2s/blake2s.go +++ b/vendor/golang.org/x/crypto/blake2s/blake2s.go @@ -16,9 +16,10 @@ // // BLAKE2X is a construction to compute hash values larger than 32 bytes. It // can produce hash values between 0 and 65535 bytes. -package blake2s // import "golang.org/x/crypto/blake2s" +package blake2s import ( + "crypto" "encoding/binary" "errors" "hash" @@ -55,6 +56,13 @@ func Sum256(data []byte) [Size]byte { // and BinaryUnmarshaler for state (de)serialization as documented by hash.Hash. func New256(key []byte) (hash.Hash, error) { return newDigest(Size, key) } +func init() { + crypto.RegisterHash(crypto.BLAKE2s_256, func() hash.Hash { + h, _ := New256(nil) + return h + }) +} + // New128 returns a new hash.Hash computing the BLAKE2s-128 checksum given a // non-empty key. Note that a 128-bit digest is too small to be secure as a // cryptographic hash and should only be used as a MAC, thus the key argument diff --git a/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.s b/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.s index fe4b818a..57d510fc 100644 --- a/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.s +++ b/vendor/golang.org/x/crypto/blake2s/blake2s_amd64.s @@ -1,432 +1,2173 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run blake2s_amd64_asm.go -out ../blake2s_amd64.s -pkg blake2s. DO NOT EDIT. //go:build amd64 && gc && !purego #include "textflag.h" -DATA iv0<>+0x00(SB)/4, $0x6a09e667 -DATA iv0<>+0x04(SB)/4, $0xbb67ae85 -DATA iv0<>+0x08(SB)/4, $0x3c6ef372 -DATA iv0<>+0x0c(SB)/4, $0xa54ff53a -GLOBL iv0<>(SB), (NOPTR+RODATA), $16 - -DATA iv1<>+0x00(SB)/4, $0x510e527f -DATA iv1<>+0x04(SB)/4, $0x9b05688c -DATA iv1<>+0x08(SB)/4, $0x1f83d9ab -DATA iv1<>+0x0c(SB)/4, $0x5be0cd19 -GLOBL iv1<>(SB), (NOPTR+RODATA), $16 - -DATA rol16<>+0x00(SB)/8, $0x0504070601000302 -DATA rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A -GLOBL rol16<>(SB), (NOPTR+RODATA), $16 - -DATA rol8<>+0x00(SB)/8, $0x0407060500030201 -DATA rol8<>+0x08(SB)/8, $0x0C0F0E0D080B0A09 -GLOBL rol8<>(SB), (NOPTR+RODATA), $16 - -DATA counter<>+0x00(SB)/8, $0x40 -DATA counter<>+0x08(SB)/8, $0x0 -GLOBL counter<>(SB), (NOPTR+RODATA), $16 - -#define ROTL_SSE2(n, t, v) \ - MOVO v, t; \ - PSLLL $n, t; \ - PSRLL $(32-n), v; \ - PXOR t, v - -#define ROTL_SSSE3(c, v) \ - PSHUFB c, v - -#define ROUND_SSE2(v0, v1, v2, v3, m0, m1, m2, m3, t) \ - PADDL m0, v0; \ - PADDL v1, v0; \ - PXOR v0, v3; \ - ROTL_SSE2(16, t, v3); \ - PADDL v3, v2; \ - PXOR v2, v1; \ - ROTL_SSE2(20, t, v1); \ - PADDL m1, v0; \ - PADDL v1, v0; \ - PXOR v0, v3; \ - ROTL_SSE2(24, t, v3); \ - PADDL v3, v2; \ - PXOR v2, v1; \ - ROTL_SSE2(25, t, v1); \ - PSHUFL $0x39, v1, v1; \ - PSHUFL $0x4E, v2, v2; \ - PSHUFL $0x93, v3, v3; \ - PADDL m2, v0; \ - PADDL v1, v0; \ - PXOR v0, v3; \ - ROTL_SSE2(16, t, v3); \ - PADDL v3, v2; \ - PXOR v2, v1; \ - ROTL_SSE2(20, t, v1); \ - PADDL m3, v0; \ - PADDL v1, v0; \ - PXOR v0, v3; \ - ROTL_SSE2(24, t, v3); \ - PADDL v3, v2; \ - PXOR v2, v1; \ - ROTL_SSE2(25, t, v1); \ - PSHUFL $0x39, v3, v3; \ - PSHUFL $0x4E, v2, v2; \ - PSHUFL $0x93, v1, v1 - -#define ROUND_SSSE3(v0, v1, v2, v3, m0, m1, m2, m3, t, c16, c8) \ - PADDL m0, v0; \ - PADDL v1, v0; \ - PXOR v0, v3; \ - ROTL_SSSE3(c16, v3); \ - PADDL v3, v2; \ - PXOR v2, v1; \ - ROTL_SSE2(20, t, v1); \ - PADDL m1, v0; \ - PADDL v1, v0; \ - PXOR v0, v3; \ - ROTL_SSSE3(c8, v3); \ - PADDL v3, v2; \ - PXOR v2, v1; \ - ROTL_SSE2(25, t, v1); \ - PSHUFL $0x39, v1, v1; \ - PSHUFL $0x4E, v2, v2; \ - PSHUFL $0x93, v3, v3; \ - PADDL m2, v0; \ - PADDL v1, v0; \ - PXOR v0, v3; \ - ROTL_SSSE3(c16, v3); \ - PADDL v3, v2; \ - PXOR v2, v1; \ - ROTL_SSE2(20, t, v1); \ - PADDL m3, v0; \ - PADDL v1, v0; \ - PXOR v0, v3; \ - ROTL_SSSE3(c8, v3); \ - PADDL v3, v2; \ - PXOR v2, v1; \ - ROTL_SSE2(25, t, v1); \ - PSHUFL $0x39, v3, v3; \ - PSHUFL $0x4E, v2, v2; \ - PSHUFL $0x93, v1, v1 - - -#define LOAD_MSG_SSE4(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15) \ - MOVL i0*4(src), m0; \ - PINSRD $1, i1*4(src), m0; \ - PINSRD $2, i2*4(src), m0; \ - PINSRD $3, i3*4(src), m0; \ - MOVL i4*4(src), m1; \ - PINSRD $1, i5*4(src), m1; \ - PINSRD $2, i6*4(src), m1; \ - PINSRD $3, i7*4(src), m1; \ - MOVL i8*4(src), m2; \ - PINSRD $1, i9*4(src), m2; \ - PINSRD $2, i10*4(src), m2; \ - PINSRD $3, i11*4(src), m2; \ - MOVL i12*4(src), m3; \ - PINSRD $1, i13*4(src), m3; \ - PINSRD $2, i14*4(src), m3; \ - PINSRD $3, i15*4(src), m3 +// func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) +// Requires: SSE2 +TEXT ·hashBlocksSSE2(SB), $672-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVL flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DX + MOVQ SP, BP + ADDQ $0x0f, BP + ANDQ $-16, BP + MOVQ (BX), R9 + MOVQ R9, (BP) + MOVQ CX, 8(BP) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU iv0<>+0(SB), X2 + MOVOU iv1<>+0(SB), X3 + MOVOU counter<>+0(SB), X12 + MOVOU rol16<>+0(SB), X13 + MOVOU rol8<>+0(SB), X14 + MOVO (BP), X15 -#define PRECOMPUTE_MSG(dst, off, src, R8, R9, R10, R11, R12, R13, R14, R15) \ - MOVQ 0*4(src), R8; \ - MOVQ 2*4(src), R9; \ - MOVQ 4*4(src), R10; \ - MOVQ 6*4(src), R11; \ - MOVQ 8*4(src), R12; \ - MOVQ 10*4(src), R13; \ - MOVQ 12*4(src), R14; \ - MOVQ 14*4(src), R15; \ - \ - MOVL R8, 0*4+off+0(dst); \ - MOVL R8, 9*4+off+64(dst); \ - MOVL R8, 5*4+off+128(dst); \ - MOVL R8, 14*4+off+192(dst); \ - MOVL R8, 4*4+off+256(dst); \ - MOVL R8, 2*4+off+320(dst); \ - MOVL R8, 8*4+off+384(dst); \ - MOVL R8, 12*4+off+448(dst); \ - MOVL R8, 3*4+off+512(dst); \ - MOVL R8, 15*4+off+576(dst); \ - SHRQ $32, R8; \ - MOVL R8, 4*4+off+0(dst); \ - MOVL R8, 8*4+off+64(dst); \ - MOVL R8, 14*4+off+128(dst); \ - MOVL R8, 5*4+off+192(dst); \ - MOVL R8, 12*4+off+256(dst); \ - MOVL R8, 11*4+off+320(dst); \ - MOVL R8, 1*4+off+384(dst); \ - MOVL R8, 6*4+off+448(dst); \ - MOVL R8, 10*4+off+512(dst); \ - MOVL R8, 3*4+off+576(dst); \ - \ - MOVL R9, 1*4+off+0(dst); \ - MOVL R9, 13*4+off+64(dst); \ - MOVL R9, 6*4+off+128(dst); \ - MOVL R9, 8*4+off+192(dst); \ - MOVL R9, 2*4+off+256(dst); \ - MOVL R9, 0*4+off+320(dst); \ - MOVL R9, 14*4+off+384(dst); \ - MOVL R9, 11*4+off+448(dst); \ - MOVL R9, 12*4+off+512(dst); \ - MOVL R9, 4*4+off+576(dst); \ - SHRQ $32, R9; \ - MOVL R9, 5*4+off+0(dst); \ - MOVL R9, 15*4+off+64(dst); \ - MOVL R9, 9*4+off+128(dst); \ - MOVL R9, 1*4+off+192(dst); \ - MOVL R9, 11*4+off+256(dst); \ - MOVL R9, 7*4+off+320(dst); \ - MOVL R9, 13*4+off+384(dst); \ - MOVL R9, 3*4+off+448(dst); \ - MOVL R9, 6*4+off+512(dst); \ - MOVL R9, 10*4+off+576(dst); \ - \ - MOVL R10, 2*4+off+0(dst); \ - MOVL R10, 1*4+off+64(dst); \ - MOVL R10, 15*4+off+128(dst); \ - MOVL R10, 10*4+off+192(dst); \ - MOVL R10, 6*4+off+256(dst); \ - MOVL R10, 8*4+off+320(dst); \ - MOVL R10, 3*4+off+384(dst); \ - MOVL R10, 13*4+off+448(dst); \ - MOVL R10, 14*4+off+512(dst); \ - MOVL R10, 5*4+off+576(dst); \ - SHRQ $32, R10; \ - MOVL R10, 6*4+off+0(dst); \ - MOVL R10, 11*4+off+64(dst); \ - MOVL R10, 2*4+off+128(dst); \ - MOVL R10, 9*4+off+192(dst); \ - MOVL R10, 1*4+off+256(dst); \ - MOVL R10, 13*4+off+320(dst); \ - MOVL R10, 4*4+off+384(dst); \ - MOVL R10, 8*4+off+448(dst); \ - MOVL R10, 15*4+off+512(dst); \ - MOVL R10, 7*4+off+576(dst); \ - \ - MOVL R11, 3*4+off+0(dst); \ - MOVL R11, 7*4+off+64(dst); \ - MOVL R11, 13*4+off+128(dst); \ - MOVL R11, 12*4+off+192(dst); \ - MOVL R11, 10*4+off+256(dst); \ - MOVL R11, 1*4+off+320(dst); \ - MOVL R11, 9*4+off+384(dst); \ - MOVL R11, 14*4+off+448(dst); \ - MOVL R11, 0*4+off+512(dst); \ - MOVL R11, 6*4+off+576(dst); \ - SHRQ $32, R11; \ - MOVL R11, 7*4+off+0(dst); \ - MOVL R11, 14*4+off+64(dst); \ - MOVL R11, 10*4+off+128(dst); \ - MOVL R11, 0*4+off+192(dst); \ - MOVL R11, 5*4+off+256(dst); \ - MOVL R11, 9*4+off+320(dst); \ - MOVL R11, 12*4+off+384(dst); \ - MOVL R11, 1*4+off+448(dst); \ - MOVL R11, 13*4+off+512(dst); \ - MOVL R11, 2*4+off+576(dst); \ - \ - MOVL R12, 8*4+off+0(dst); \ - MOVL R12, 5*4+off+64(dst); \ - MOVL R12, 4*4+off+128(dst); \ - MOVL R12, 15*4+off+192(dst); \ - MOVL R12, 14*4+off+256(dst); \ - MOVL R12, 3*4+off+320(dst); \ - MOVL R12, 11*4+off+384(dst); \ - MOVL R12, 10*4+off+448(dst); \ - MOVL R12, 7*4+off+512(dst); \ - MOVL R12, 1*4+off+576(dst); \ - SHRQ $32, R12; \ - MOVL R12, 12*4+off+0(dst); \ - MOVL R12, 2*4+off+64(dst); \ - MOVL R12, 11*4+off+128(dst); \ - MOVL R12, 4*4+off+192(dst); \ - MOVL R12, 0*4+off+256(dst); \ - MOVL R12, 15*4+off+320(dst); \ - MOVL R12, 10*4+off+384(dst); \ - MOVL R12, 7*4+off+448(dst); \ - MOVL R12, 5*4+off+512(dst); \ - MOVL R12, 9*4+off+576(dst); \ - \ - MOVL R13, 9*4+off+0(dst); \ - MOVL R13, 4*4+off+64(dst); \ - MOVL R13, 8*4+off+128(dst); \ - MOVL R13, 13*4+off+192(dst); \ - MOVL R13, 3*4+off+256(dst); \ - MOVL R13, 5*4+off+320(dst); \ - MOVL R13, 7*4+off+384(dst); \ - MOVL R13, 15*4+off+448(dst); \ - MOVL R13, 11*4+off+512(dst); \ - MOVL R13, 0*4+off+576(dst); \ - SHRQ $32, R13; \ - MOVL R13, 13*4+off+0(dst); \ - MOVL R13, 10*4+off+64(dst); \ - MOVL R13, 0*4+off+128(dst); \ - MOVL R13, 3*4+off+192(dst); \ - MOVL R13, 9*4+off+256(dst); \ - MOVL R13, 6*4+off+320(dst); \ - MOVL R13, 15*4+off+384(dst); \ - MOVL R13, 4*4+off+448(dst); \ - MOVL R13, 2*4+off+512(dst); \ - MOVL R13, 12*4+off+576(dst); \ - \ - MOVL R14, 10*4+off+0(dst); \ - MOVL R14, 12*4+off+64(dst); \ - MOVL R14, 1*4+off+128(dst); \ - MOVL R14, 6*4+off+192(dst); \ - MOVL R14, 13*4+off+256(dst); \ - MOVL R14, 4*4+off+320(dst); \ - MOVL R14, 0*4+off+384(dst); \ - MOVL R14, 2*4+off+448(dst); \ - MOVL R14, 8*4+off+512(dst); \ - MOVL R14, 14*4+off+576(dst); \ - SHRQ $32, R14; \ - MOVL R14, 14*4+off+0(dst); \ - MOVL R14, 3*4+off+64(dst); \ - MOVL R14, 7*4+off+128(dst); \ - MOVL R14, 2*4+off+192(dst); \ - MOVL R14, 15*4+off+256(dst); \ - MOVL R14, 12*4+off+320(dst); \ - MOVL R14, 6*4+off+384(dst); \ - MOVL R14, 0*4+off+448(dst); \ - MOVL R14, 9*4+off+512(dst); \ - MOVL R14, 11*4+off+576(dst); \ - \ - MOVL R15, 11*4+off+0(dst); \ - MOVL R15, 0*4+off+64(dst); \ - MOVL R15, 12*4+off+128(dst); \ - MOVL R15, 7*4+off+192(dst); \ - MOVL R15, 8*4+off+256(dst); \ - MOVL R15, 14*4+off+320(dst); \ - MOVL R15, 2*4+off+384(dst); \ - MOVL R15, 5*4+off+448(dst); \ - MOVL R15, 1*4+off+512(dst); \ - MOVL R15, 13*4+off+576(dst); \ - SHRQ $32, R15; \ - MOVL R15, 15*4+off+0(dst); \ - MOVL R15, 6*4+off+64(dst); \ - MOVL R15, 3*4+off+128(dst); \ - MOVL R15, 11*4+off+192(dst); \ - MOVL R15, 7*4+off+256(dst); \ - MOVL R15, 10*4+off+320(dst); \ - MOVL R15, 5*4+off+384(dst); \ - MOVL R15, 9*4+off+448(dst); \ - MOVL R15, 4*4+off+512(dst); \ - MOVL R15, 8*4+off+576(dst) +loop: + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X3, X7 + PADDQ X12, X15 + PXOR X15, X7 + MOVQ (SI), R8 + MOVQ 8(SI), R9 + MOVQ 16(SI), R10 + MOVQ 24(SI), R11 + MOVQ 32(SI), R12 + MOVQ 40(SI), R13 + MOVQ 48(SI), R14 + MOVQ 56(SI), R15 + MOVL R8, 16(BP) + MOVL R8, 116(BP) + MOVL R8, 164(BP) + MOVL R8, 264(BP) + MOVL R8, 288(BP) + MOVL R8, 344(BP) + MOVL R8, 432(BP) + MOVL R8, 512(BP) + MOVL R8, 540(BP) + MOVL R8, 652(BP) + SHRQ $0x20, R8 + MOVL R8, 32(BP) + MOVL R8, 112(BP) + MOVL R8, 200(BP) + MOVL R8, 228(BP) + MOVL R8, 320(BP) + MOVL R8, 380(BP) + MOVL R8, 404(BP) + MOVL R8, 488(BP) + MOVL R8, 568(BP) + MOVL R8, 604(BP) + MOVL R9, 20(BP) + MOVL R9, 132(BP) + MOVL R9, 168(BP) + MOVL R9, 240(BP) + MOVL R9, 280(BP) + MOVL R9, 336(BP) + MOVL R9, 456(BP) + MOVL R9, 508(BP) + MOVL R9, 576(BP) + MOVL R9, 608(BP) + SHRQ $0x20, R9 + MOVL R9, 36(BP) + MOVL R9, 140(BP) + MOVL R9, 180(BP) + MOVL R9, 212(BP) + MOVL R9, 316(BP) + MOVL R9, 364(BP) + MOVL R9, 452(BP) + MOVL R9, 476(BP) + MOVL R9, 552(BP) + MOVL R9, 632(BP) + MOVL R10, 24(BP) + MOVL R10, 84(BP) + MOVL R10, 204(BP) + MOVL R10, 248(BP) + MOVL R10, 296(BP) + MOVL R10, 368(BP) + MOVL R10, 412(BP) + MOVL R10, 516(BP) + MOVL R10, 584(BP) + MOVL R10, 612(BP) + SHRQ $0x20, R10 + MOVL R10, 40(BP) + MOVL R10, 124(BP) + MOVL R10, 152(BP) + MOVL R10, 244(BP) + MOVL R10, 276(BP) + MOVL R10, 388(BP) + MOVL R10, 416(BP) + MOVL R10, 496(BP) + MOVL R10, 588(BP) + MOVL R10, 620(BP) + MOVL R11, 28(BP) + MOVL R11, 108(BP) + MOVL R11, 196(BP) + MOVL R11, 256(BP) + MOVL R11, 312(BP) + MOVL R11, 340(BP) + MOVL R11, 436(BP) + MOVL R11, 520(BP) + MOVL R11, 528(BP) + MOVL R11, 616(BP) + SHRQ $0x20, R11 + MOVL R11, 44(BP) + MOVL R11, 136(BP) + MOVL R11, 184(BP) + MOVL R11, 208(BP) + MOVL R11, 292(BP) + MOVL R11, 372(BP) + MOVL R11, 448(BP) + MOVL R11, 468(BP) + MOVL R11, 580(BP) + MOVL R11, 600(BP) + MOVL R12, 48(BP) + MOVL R12, 100(BP) + MOVL R12, 160(BP) + MOVL R12, 268(BP) + MOVL R12, 328(BP) + MOVL R12, 348(BP) + MOVL R12, 444(BP) + MOVL R12, 504(BP) + MOVL R12, 556(BP) + MOVL R12, 596(BP) + SHRQ $0x20, R12 + MOVL R12, 64(BP) + MOVL R12, 88(BP) + MOVL R12, 188(BP) + MOVL R12, 224(BP) + MOVL R12, 272(BP) + MOVL R12, 396(BP) + MOVL R12, 440(BP) + MOVL R12, 492(BP) + MOVL R12, 548(BP) + MOVL R12, 628(BP) + MOVL R13, 52(BP) + MOVL R13, 96(BP) + MOVL R13, 176(BP) + MOVL R13, 260(BP) + MOVL R13, 284(BP) + MOVL R13, 356(BP) + MOVL R13, 428(BP) + MOVL R13, 524(BP) + MOVL R13, 572(BP) + MOVL R13, 592(BP) + SHRQ $0x20, R13 + MOVL R13, 68(BP) + MOVL R13, 120(BP) + MOVL R13, 144(BP) + MOVL R13, 220(BP) + MOVL R13, 308(BP) + MOVL R13, 360(BP) + MOVL R13, 460(BP) + MOVL R13, 480(BP) + MOVL R13, 536(BP) + MOVL R13, 640(BP) + MOVL R14, 56(BP) + MOVL R14, 128(BP) + MOVL R14, 148(BP) + MOVL R14, 232(BP) + MOVL R14, 324(BP) + MOVL R14, 352(BP) + MOVL R14, 400(BP) + MOVL R14, 472(BP) + MOVL R14, 560(BP) + MOVL R14, 648(BP) + SHRQ $0x20, R14 + MOVL R14, 72(BP) + MOVL R14, 92(BP) + MOVL R14, 172(BP) + MOVL R14, 216(BP) + MOVL R14, 332(BP) + MOVL R14, 384(BP) + MOVL R14, 424(BP) + MOVL R14, 464(BP) + MOVL R14, 564(BP) + MOVL R14, 636(BP) + MOVL R15, 60(BP) + MOVL R15, 80(BP) + MOVL R15, 192(BP) + MOVL R15, 236(BP) + MOVL R15, 304(BP) + MOVL R15, 392(BP) + MOVL R15, 408(BP) + MOVL R15, 484(BP) + MOVL R15, 532(BP) + MOVL R15, 644(BP) + SHRQ $0x20, R15 + MOVL R15, 76(BP) + MOVL R15, 104(BP) + MOVL R15, 156(BP) + MOVL R15, 252(BP) + MOVL R15, 300(BP) + MOVL R15, 376(BP) + MOVL R15, 420(BP) + MOVL R15, 500(BP) + MOVL R15, 544(BP) + MOVL R15, 624(BP) + PADDL 16(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 32(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 48(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 64(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 80(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 96(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 112(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 128(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 144(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 160(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 176(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 192(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 208(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 224(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 240(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 256(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 272(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 288(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 304(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 320(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 336(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 352(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 368(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 384(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 400(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 416(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 432(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 448(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 464(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 480(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 496(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 512(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 528(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 544(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 560(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 576(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 592(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 608(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 624(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x10, X8 + PSRLL $0x10, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 640(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + MOVO X7, X8 + PSLLL $0x18, X8 + PSRLL $0x08, X7 + PXOR X8, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PXOR X4, X0 + PXOR X5, X1 + PXOR X6, X0 + PXOR X7, X1 + LEAQ 64(SI), SI + SUBQ $0x40, DX + JNE loop + MOVO X15, (BP) + MOVQ (BP), R9 + MOVQ R9, (BX) + MOVOU X0, (AX) + MOVOU X1, 16(AX) + RET -#define BLAKE2s_SSE2() \ - PRECOMPUTE_MSG(BP, 16, SI, R8, R9, R10, R11, R12, R13, R14, R15); \ - ROUND_SSE2(X4, X5, X6, X7, 16(BP), 32(BP), 48(BP), 64(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+64(BP), 32+64(BP), 48+64(BP), 64+64(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+128(BP), 32+128(BP), 48+128(BP), 64+128(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+192(BP), 32+192(BP), 48+192(BP), 64+192(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+256(BP), 32+256(BP), 48+256(BP), 64+256(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+320(BP), 32+320(BP), 48+320(BP), 64+320(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+384(BP), 32+384(BP), 48+384(BP), 64+384(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+448(BP), 32+448(BP), 48+448(BP), 64+448(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+512(BP), 32+512(BP), 48+512(BP), 64+512(BP), X8); \ - ROUND_SSE2(X4, X5, X6, X7, 16+576(BP), 32+576(BP), 48+576(BP), 64+576(BP), X8) +DATA iv0<>+0(SB)/4, $0x6a09e667 +DATA iv0<>+4(SB)/4, $0xbb67ae85 +DATA iv0<>+8(SB)/4, $0x3c6ef372 +DATA iv0<>+12(SB)/4, $0xa54ff53a +GLOBL iv0<>(SB), RODATA|NOPTR, $16 -#define BLAKE2s_SSSE3() \ - PRECOMPUTE_MSG(BP, 16, SI, R8, R9, R10, R11, R12, R13, R14, R15); \ - ROUND_SSSE3(X4, X5, X6, X7, 16(BP), 32(BP), 48(BP), 64(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+64(BP), 32+64(BP), 48+64(BP), 64+64(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+128(BP), 32+128(BP), 48+128(BP), 64+128(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+192(BP), 32+192(BP), 48+192(BP), 64+192(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+256(BP), 32+256(BP), 48+256(BP), 64+256(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+320(BP), 32+320(BP), 48+320(BP), 64+320(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+384(BP), 32+384(BP), 48+384(BP), 64+384(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+448(BP), 32+448(BP), 48+448(BP), 64+448(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+512(BP), 32+512(BP), 48+512(BP), 64+512(BP), X8, X13, X14); \ - ROUND_SSSE3(X4, X5, X6, X7, 16+576(BP), 32+576(BP), 48+576(BP), 64+576(BP), X8, X13, X14) +DATA iv1<>+0(SB)/4, $0x510e527f +DATA iv1<>+4(SB)/4, $0x9b05688c +DATA iv1<>+8(SB)/4, $0x1f83d9ab +DATA iv1<>+12(SB)/4, $0x5be0cd19 +GLOBL iv1<>(SB), RODATA|NOPTR, $16 -#define BLAKE2s_SSE4() \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14); \ - LOAD_MSG_SSE4(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0); \ - ROUND_SSSE3(X4, X5, X6, X7, X8, X9, X10, X11, X8, X13, X14) +DATA counter<>+0(SB)/8, $0x0000000000000040 +DATA counter<>+8(SB)/8, $0x0000000000000000 +GLOBL counter<>(SB), RODATA|NOPTR, $16 -#define HASH_BLOCKS(h, c, flag, blocks_base, blocks_len, BLAKE2s_FUNC) \ - MOVQ h, AX; \ - MOVQ c, BX; \ - MOVL flag, CX; \ - MOVQ blocks_base, SI; \ - MOVQ blocks_len, DX; \ - \ - MOVQ SP, BP; \ - ADDQ $15, BP; \ - ANDQ $~15, BP; \ - \ - MOVQ 0(BX), R9; \ - MOVQ R9, 0(BP); \ - MOVQ CX, 8(BP); \ - \ - MOVOU 0(AX), X0; \ - MOVOU 16(AX), X1; \ - MOVOU iv0<>(SB), X2; \ - MOVOU iv1<>(SB), X3 \ - \ - MOVOU counter<>(SB), X12; \ - MOVOU rol16<>(SB), X13; \ - MOVOU rol8<>(SB), X14; \ - MOVO 0(BP), X15; \ - \ - loop: \ - MOVO X0, X4; \ - MOVO X1, X5; \ - MOVO X2, X6; \ - MOVO X3, X7; \ - \ - PADDQ X12, X15; \ - PXOR X15, X7; \ - \ - BLAKE2s_FUNC(); \ - \ - PXOR X4, X0; \ - PXOR X5, X1; \ - PXOR X6, X0; \ - PXOR X7, X1; \ - \ - LEAQ 64(SI), SI; \ - SUBQ $64, DX; \ - JNE loop; \ - \ - MOVO X15, 0(BP); \ - MOVQ 0(BP), R9; \ - MOVQ R9, 0(BX); \ - \ - MOVOU X0, 0(AX); \ - MOVOU X1, 16(AX) +DATA rol16<>+0(SB)/8, $0x0504070601000302 +DATA rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a +GLOBL rol16<>(SB), RODATA|NOPTR, $16 -// func hashBlocksSSE2(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) -TEXT ·hashBlocksSSE2(SB), 0, $672-48 // frame = 656 + 16 byte alignment - HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSE2) - RET +DATA rol8<>+0(SB)/8, $0x0407060500030201 +DATA rol8<>+8(SB)/8, $0x0c0f0e0d080b0a09 +GLOBL rol8<>(SB), RODATA|NOPTR, $16 // func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) -TEXT ·hashBlocksSSSE3(SB), 0, $672-48 // frame = 656 + 16 byte alignment - HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSSE3) +// Requires: SSE2, SSSE3 +TEXT ·hashBlocksSSSE3(SB), $672-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVL flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DX + MOVQ SP, BP + ADDQ $0x0f, BP + ANDQ $-16, BP + MOVQ (BX), R9 + MOVQ R9, (BP) + MOVQ CX, 8(BP) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU iv0<>+0(SB), X2 + MOVOU iv1<>+0(SB), X3 + MOVOU counter<>+0(SB), X12 + MOVOU rol16<>+0(SB), X13 + MOVOU rol8<>+0(SB), X14 + MOVO (BP), X15 + +loop: + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X3, X7 + PADDQ X12, X15 + PXOR X15, X7 + MOVQ (SI), R8 + MOVQ 8(SI), R9 + MOVQ 16(SI), R10 + MOVQ 24(SI), R11 + MOVQ 32(SI), R12 + MOVQ 40(SI), R13 + MOVQ 48(SI), R14 + MOVQ 56(SI), R15 + MOVL R8, 16(BP) + MOVL R8, 116(BP) + MOVL R8, 164(BP) + MOVL R8, 264(BP) + MOVL R8, 288(BP) + MOVL R8, 344(BP) + MOVL R8, 432(BP) + MOVL R8, 512(BP) + MOVL R8, 540(BP) + MOVL R8, 652(BP) + SHRQ $0x20, R8 + MOVL R8, 32(BP) + MOVL R8, 112(BP) + MOVL R8, 200(BP) + MOVL R8, 228(BP) + MOVL R8, 320(BP) + MOVL R8, 380(BP) + MOVL R8, 404(BP) + MOVL R8, 488(BP) + MOVL R8, 568(BP) + MOVL R8, 604(BP) + MOVL R9, 20(BP) + MOVL R9, 132(BP) + MOVL R9, 168(BP) + MOVL R9, 240(BP) + MOVL R9, 280(BP) + MOVL R9, 336(BP) + MOVL R9, 456(BP) + MOVL R9, 508(BP) + MOVL R9, 576(BP) + MOVL R9, 608(BP) + SHRQ $0x20, R9 + MOVL R9, 36(BP) + MOVL R9, 140(BP) + MOVL R9, 180(BP) + MOVL R9, 212(BP) + MOVL R9, 316(BP) + MOVL R9, 364(BP) + MOVL R9, 452(BP) + MOVL R9, 476(BP) + MOVL R9, 552(BP) + MOVL R9, 632(BP) + MOVL R10, 24(BP) + MOVL R10, 84(BP) + MOVL R10, 204(BP) + MOVL R10, 248(BP) + MOVL R10, 296(BP) + MOVL R10, 368(BP) + MOVL R10, 412(BP) + MOVL R10, 516(BP) + MOVL R10, 584(BP) + MOVL R10, 612(BP) + SHRQ $0x20, R10 + MOVL R10, 40(BP) + MOVL R10, 124(BP) + MOVL R10, 152(BP) + MOVL R10, 244(BP) + MOVL R10, 276(BP) + MOVL R10, 388(BP) + MOVL R10, 416(BP) + MOVL R10, 496(BP) + MOVL R10, 588(BP) + MOVL R10, 620(BP) + MOVL R11, 28(BP) + MOVL R11, 108(BP) + MOVL R11, 196(BP) + MOVL R11, 256(BP) + MOVL R11, 312(BP) + MOVL R11, 340(BP) + MOVL R11, 436(BP) + MOVL R11, 520(BP) + MOVL R11, 528(BP) + MOVL R11, 616(BP) + SHRQ $0x20, R11 + MOVL R11, 44(BP) + MOVL R11, 136(BP) + MOVL R11, 184(BP) + MOVL R11, 208(BP) + MOVL R11, 292(BP) + MOVL R11, 372(BP) + MOVL R11, 448(BP) + MOVL R11, 468(BP) + MOVL R11, 580(BP) + MOVL R11, 600(BP) + MOVL R12, 48(BP) + MOVL R12, 100(BP) + MOVL R12, 160(BP) + MOVL R12, 268(BP) + MOVL R12, 328(BP) + MOVL R12, 348(BP) + MOVL R12, 444(BP) + MOVL R12, 504(BP) + MOVL R12, 556(BP) + MOVL R12, 596(BP) + SHRQ $0x20, R12 + MOVL R12, 64(BP) + MOVL R12, 88(BP) + MOVL R12, 188(BP) + MOVL R12, 224(BP) + MOVL R12, 272(BP) + MOVL R12, 396(BP) + MOVL R12, 440(BP) + MOVL R12, 492(BP) + MOVL R12, 548(BP) + MOVL R12, 628(BP) + MOVL R13, 52(BP) + MOVL R13, 96(BP) + MOVL R13, 176(BP) + MOVL R13, 260(BP) + MOVL R13, 284(BP) + MOVL R13, 356(BP) + MOVL R13, 428(BP) + MOVL R13, 524(BP) + MOVL R13, 572(BP) + MOVL R13, 592(BP) + SHRQ $0x20, R13 + MOVL R13, 68(BP) + MOVL R13, 120(BP) + MOVL R13, 144(BP) + MOVL R13, 220(BP) + MOVL R13, 308(BP) + MOVL R13, 360(BP) + MOVL R13, 460(BP) + MOVL R13, 480(BP) + MOVL R13, 536(BP) + MOVL R13, 640(BP) + MOVL R14, 56(BP) + MOVL R14, 128(BP) + MOVL R14, 148(BP) + MOVL R14, 232(BP) + MOVL R14, 324(BP) + MOVL R14, 352(BP) + MOVL R14, 400(BP) + MOVL R14, 472(BP) + MOVL R14, 560(BP) + MOVL R14, 648(BP) + SHRQ $0x20, R14 + MOVL R14, 72(BP) + MOVL R14, 92(BP) + MOVL R14, 172(BP) + MOVL R14, 216(BP) + MOVL R14, 332(BP) + MOVL R14, 384(BP) + MOVL R14, 424(BP) + MOVL R14, 464(BP) + MOVL R14, 564(BP) + MOVL R14, 636(BP) + MOVL R15, 60(BP) + MOVL R15, 80(BP) + MOVL R15, 192(BP) + MOVL R15, 236(BP) + MOVL R15, 304(BP) + MOVL R15, 392(BP) + MOVL R15, 408(BP) + MOVL R15, 484(BP) + MOVL R15, 532(BP) + MOVL R15, 644(BP) + SHRQ $0x20, R15 + MOVL R15, 76(BP) + MOVL R15, 104(BP) + MOVL R15, 156(BP) + MOVL R15, 252(BP) + MOVL R15, 300(BP) + MOVL R15, 376(BP) + MOVL R15, 420(BP) + MOVL R15, 500(BP) + MOVL R15, 544(BP) + MOVL R15, 624(BP) + PADDL 16(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 32(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 48(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 64(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 80(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 96(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 112(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 128(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 144(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 160(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 176(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 192(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 208(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 224(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 240(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 256(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 272(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 288(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 304(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 320(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 336(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 352(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 368(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 384(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 400(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 416(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 432(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 448(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 464(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 480(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 496(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 512(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 528(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 544(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 560(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 576(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PADDL 592(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 608(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL 624(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL 640(BP), X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PXOR X4, X0 + PXOR X5, X1 + PXOR X6, X0 + PXOR X7, X1 + LEAQ 64(SI), SI + SUBQ $0x40, DX + JNE loop + MOVO X15, (BP) + MOVQ (BP), R9 + MOVQ R9, (BX) + MOVOU X0, (AX) + MOVOU X1, 16(AX) RET // func hashBlocksSSE4(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte) -TEXT ·hashBlocksSSE4(SB), 0, $32-48 // frame = 16 + 16 byte alignment - HASH_BLOCKS(h+0(FP), c+8(FP), flag+16(FP), blocks_base+24(FP), blocks_len+32(FP), BLAKE2s_SSE4) +// Requires: SSE2, SSE4.1, SSSE3 +TEXT ·hashBlocksSSE4(SB), $32-48 + MOVQ h+0(FP), AX + MOVQ c+8(FP), BX + MOVL flag+16(FP), CX + MOVQ blocks_base+24(FP), SI + MOVQ blocks_len+32(FP), DX + MOVQ SP, BP + ADDQ $0x0f, BP + ANDQ $-16, BP + MOVQ (BX), R9 + MOVQ R9, (BP) + MOVQ CX, 8(BP) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU iv0<>+0(SB), X2 + MOVOU iv1<>+0(SB), X3 + MOVOU counter<>+0(SB), X12 + MOVOU rol16<>+0(SB), X13 + MOVOU rol8<>+0(SB), X14 + MOVO (BP), X15 + +loop: + MOVO X0, X4 + MOVO X1, X5 + MOVO X2, X6 + MOVO X3, X7 + PADDQ X12, X15 + PXOR X15, X7 + MOVL (SI), X8 + PINSRD $0x01, 8(SI), X8 + PINSRD $0x02, 16(SI), X8 + PINSRD $0x03, 24(SI), X8 + MOVL 4(SI), X9 + PINSRD $0x01, 12(SI), X9 + PINSRD $0x02, 20(SI), X9 + PINSRD $0x03, 28(SI), X9 + MOVL 32(SI), X10 + PINSRD $0x01, 40(SI), X10 + PINSRD $0x02, 48(SI), X10 + PINSRD $0x03, 56(SI), X10 + MOVL 36(SI), X11 + PINSRD $0x01, 44(SI), X11 + PINSRD $0x02, 52(SI), X11 + PINSRD $0x03, 60(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 56(SI), X8 + PINSRD $0x01, 16(SI), X8 + PINSRD $0x02, 36(SI), X8 + PINSRD $0x03, 52(SI), X8 + MOVL 40(SI), X9 + PINSRD $0x01, 32(SI), X9 + PINSRD $0x02, 60(SI), X9 + PINSRD $0x03, 24(SI), X9 + MOVL 4(SI), X10 + PINSRD $0x01, (SI), X10 + PINSRD $0x02, 44(SI), X10 + PINSRD $0x03, 20(SI), X10 + MOVL 48(SI), X11 + PINSRD $0x01, 8(SI), X11 + PINSRD $0x02, 28(SI), X11 + PINSRD $0x03, 12(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 44(SI), X8 + PINSRD $0x01, 48(SI), X8 + PINSRD $0x02, 20(SI), X8 + PINSRD $0x03, 60(SI), X8 + MOVL 32(SI), X9 + PINSRD $0x01, (SI), X9 + PINSRD $0x02, 8(SI), X9 + PINSRD $0x03, 52(SI), X9 + MOVL 40(SI), X10 + PINSRD $0x01, 12(SI), X10 + PINSRD $0x02, 28(SI), X10 + PINSRD $0x03, 36(SI), X10 + MOVL 56(SI), X11 + PINSRD $0x01, 24(SI), X11 + PINSRD $0x02, 4(SI), X11 + PINSRD $0x03, 16(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 28(SI), X8 + PINSRD $0x01, 12(SI), X8 + PINSRD $0x02, 52(SI), X8 + PINSRD $0x03, 44(SI), X8 + MOVL 36(SI), X9 + PINSRD $0x01, 4(SI), X9 + PINSRD $0x02, 48(SI), X9 + PINSRD $0x03, 56(SI), X9 + MOVL 8(SI), X10 + PINSRD $0x01, 20(SI), X10 + PINSRD $0x02, 16(SI), X10 + PINSRD $0x03, 60(SI), X10 + MOVL 24(SI), X11 + PINSRD $0x01, 40(SI), X11 + PINSRD $0x02, (SI), X11 + PINSRD $0x03, 32(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 36(SI), X8 + PINSRD $0x01, 20(SI), X8 + PINSRD $0x02, 8(SI), X8 + PINSRD $0x03, 40(SI), X8 + MOVL (SI), X9 + PINSRD $0x01, 28(SI), X9 + PINSRD $0x02, 16(SI), X9 + PINSRD $0x03, 60(SI), X9 + MOVL 56(SI), X10 + PINSRD $0x01, 44(SI), X10 + PINSRD $0x02, 24(SI), X10 + PINSRD $0x03, 12(SI), X10 + MOVL 4(SI), X11 + PINSRD $0x01, 48(SI), X11 + PINSRD $0x02, 32(SI), X11 + PINSRD $0x03, 52(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 8(SI), X8 + PINSRD $0x01, 24(SI), X8 + PINSRD $0x02, (SI), X8 + PINSRD $0x03, 32(SI), X8 + MOVL 48(SI), X9 + PINSRD $0x01, 40(SI), X9 + PINSRD $0x02, 44(SI), X9 + PINSRD $0x03, 12(SI), X9 + MOVL 16(SI), X10 + PINSRD $0x01, 28(SI), X10 + PINSRD $0x02, 60(SI), X10 + PINSRD $0x03, 4(SI), X10 + MOVL 52(SI), X11 + PINSRD $0x01, 20(SI), X11 + PINSRD $0x02, 56(SI), X11 + PINSRD $0x03, 36(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 48(SI), X8 + PINSRD $0x01, 4(SI), X8 + PINSRD $0x02, 56(SI), X8 + PINSRD $0x03, 16(SI), X8 + MOVL 20(SI), X9 + PINSRD $0x01, 60(SI), X9 + PINSRD $0x02, 52(SI), X9 + PINSRD $0x03, 40(SI), X9 + MOVL (SI), X10 + PINSRD $0x01, 24(SI), X10 + PINSRD $0x02, 36(SI), X10 + PINSRD $0x03, 32(SI), X10 + MOVL 28(SI), X11 + PINSRD $0x01, 12(SI), X11 + PINSRD $0x02, 8(SI), X11 + PINSRD $0x03, 44(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 52(SI), X8 + PINSRD $0x01, 28(SI), X8 + PINSRD $0x02, 48(SI), X8 + PINSRD $0x03, 12(SI), X8 + MOVL 44(SI), X9 + PINSRD $0x01, 56(SI), X9 + PINSRD $0x02, 4(SI), X9 + PINSRD $0x03, 36(SI), X9 + MOVL 20(SI), X10 + PINSRD $0x01, 60(SI), X10 + PINSRD $0x02, 32(SI), X10 + PINSRD $0x03, 8(SI), X10 + MOVL (SI), X11 + PINSRD $0x01, 16(SI), X11 + PINSRD $0x02, 24(SI), X11 + PINSRD $0x03, 40(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 24(SI), X8 + PINSRD $0x01, 56(SI), X8 + PINSRD $0x02, 44(SI), X8 + PINSRD $0x03, (SI), X8 + MOVL 60(SI), X9 + PINSRD $0x01, 36(SI), X9 + PINSRD $0x02, 12(SI), X9 + PINSRD $0x03, 32(SI), X9 + MOVL 48(SI), X10 + PINSRD $0x01, 52(SI), X10 + PINSRD $0x02, 4(SI), X10 + PINSRD $0x03, 40(SI), X10 + MOVL 8(SI), X11 + PINSRD $0x01, 28(SI), X11 + PINSRD $0x02, 16(SI), X11 + PINSRD $0x03, 20(SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + MOVL 40(SI), X8 + PINSRD $0x01, 32(SI), X8 + PINSRD $0x02, 28(SI), X8 + PINSRD $0x03, 4(SI), X8 + MOVL 8(SI), X9 + PINSRD $0x01, 16(SI), X9 + PINSRD $0x02, 24(SI), X9 + PINSRD $0x03, 20(SI), X9 + MOVL 60(SI), X10 + PINSRD $0x01, 36(SI), X10 + PINSRD $0x02, 12(SI), X10 + PINSRD $0x03, 52(SI), X10 + MOVL 44(SI), X11 + PINSRD $0x01, 56(SI), X11 + PINSRD $0x02, 48(SI), X11 + PINSRD $0x03, (SI), X11 + PADDL X8, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X9, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X5, X5 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X7, X7 + PADDL X10, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X13, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x14, X8 + PSRLL $0x0c, X5 + PXOR X8, X5 + PADDL X11, X4 + PADDL X5, X4 + PXOR X4, X7 + PSHUFB X14, X7 + PADDL X7, X6 + PXOR X6, X5 + MOVO X5, X8 + PSLLL $0x19, X8 + PSRLL $0x07, X5 + PXOR X8, X5 + PSHUFL $0x39, X7, X7 + PSHUFL $0x4e, X6, X6 + PSHUFL $0x93, X5, X5 + PXOR X4, X0 + PXOR X5, X1 + PXOR X6, X0 + PXOR X7, X1 + LEAQ 64(SI), SI + SUBQ $0x40, DX + JNE loop + MOVO X15, (BP) + MOVQ (BP), R9 + MOVQ R9, (BX) + MOVOU X0, (AX) + MOVOU X1, 16(AX) RET diff --git a/vendor/golang.org/x/crypto/blake2s/register.go b/vendor/golang.org/x/crypto/blake2s/register.go deleted file mode 100644 index 3156148a..00000000 --- a/vendor/golang.org/x/crypto/blake2s/register.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.9 - -package blake2s - -import ( - "crypto" - "hash" -) - -func init() { - newHash256 := func() hash.Hash { - h, _ := New256(nil) - return h - } - - crypto.RegisterHash(crypto.BLAKE2s_256, newHash256) -} diff --git a/vendor/golang.org/x/crypto/blowfish/cipher.go b/vendor/golang.org/x/crypto/blowfish/cipher.go index 213bf204..08989568 100644 --- a/vendor/golang.org/x/crypto/blowfish/cipher.go +++ b/vendor/golang.org/x/crypto/blowfish/cipher.go @@ -11,7 +11,7 @@ // Deprecated: any new system should use AES (from crypto/aes, if necessary in // an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from // golang.org/x/crypto/chacha20poly1305). -package blowfish // import "golang.org/x/crypto/blowfish" +package blowfish // The code is a port of Bruce Schneier's C implementation. // See https://www.schneier.com/blowfish.html. diff --git a/vendor/golang.org/x/crypto/cast5/cast5.go b/vendor/golang.org/x/crypto/cast5/cast5.go index 425e8eec..016e9021 100644 --- a/vendor/golang.org/x/crypto/cast5/cast5.go +++ b/vendor/golang.org/x/crypto/cast5/cast5.go @@ -11,7 +11,7 @@ // Deprecated: any new system should use AES (from crypto/aes, if necessary in // an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from // golang.org/x/crypto/chacha20poly1305). -package cast5 // import "golang.org/x/crypto/cast5" +package cast5 import ( "errors" diff --git a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s index 66aebae2..c672ccf6 100644 --- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s +++ b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s @@ -33,6 +33,9 @@ #define CONSTBASE R16 #define BLOCKS R17 +// for VPERMXOR +#define MASK R18 + DATA consts<>+0x00(SB)/8, $0x3320646e61707865 DATA consts<>+0x08(SB)/8, $0x6b20657479622d32 DATA consts<>+0x10(SB)/8, $0x0000000000000001 @@ -53,7 +56,11 @@ DATA consts<>+0x80(SB)/8, $0x6b2065746b206574 DATA consts<>+0x88(SB)/8, $0x6b2065746b206574 DATA consts<>+0x90(SB)/8, $0x0000000100000000 DATA consts<>+0x98(SB)/8, $0x0000000300000002 -GLOBL consts<>(SB), RODATA, $0xa0 +DATA consts<>+0xa0(SB)/8, $0x5566774411223300 +DATA consts<>+0xa8(SB)/8, $0xddeeffcc99aabb88 +DATA consts<>+0xb0(SB)/8, $0x6677445522330011 +DATA consts<>+0xb8(SB)/8, $0xeeffccddaabb8899 +GLOBL consts<>(SB), RODATA, $0xc0 //func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32) TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40 @@ -70,6 +77,9 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40 MOVD $48, R10 MOVD $64, R11 SRD $6, LEN, BLOCKS + // for VPERMXOR + MOVD $consts<>+0xa0(SB), MASK + MOVD $16, R20 // V16 LXVW4X (CONSTBASE)(R0), VS48 ADD $80,CONSTBASE @@ -87,6 +97,10 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40 // V28 LXVW4X (CONSTBASE)(R11), VS60 + // Load mask constants for VPERMXOR + LXVW4X (MASK)(R0), V20 + LXVW4X (MASK)(R20), V21 + // splat slot from V19 -> V26 VSPLTW $0, V19, V26 @@ -97,7 +111,7 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40 MOVD $10, R14 MOVD R14, CTR - + PCALIGN $16 loop_outer_vsx: // V0, V1, V2, V3 LXVW4X (R0)(CONSTBASE), VS32 @@ -128,22 +142,17 @@ loop_outer_vsx: VSPLTISW $12, V28 VSPLTISW $8, V29 VSPLTISW $7, V30 - + PCALIGN $16 loop_vsx: VADDUWM V0, V4, V0 VADDUWM V1, V5, V1 VADDUWM V2, V6, V2 VADDUWM V3, V7, V3 - VXOR V12, V0, V12 - VXOR V13, V1, V13 - VXOR V14, V2, V14 - VXOR V15, V3, V15 - - VRLW V12, V27, V12 - VRLW V13, V27, V13 - VRLW V14, V27, V14 - VRLW V15, V27, V15 + VPERMXOR V12, V0, V21, V12 + VPERMXOR V13, V1, V21, V13 + VPERMXOR V14, V2, V21, V14 + VPERMXOR V15, V3, V21, V15 VADDUWM V8, V12, V8 VADDUWM V9, V13, V9 @@ -165,15 +174,10 @@ loop_vsx: VADDUWM V2, V6, V2 VADDUWM V3, V7, V3 - VXOR V12, V0, V12 - VXOR V13, V1, V13 - VXOR V14, V2, V14 - VXOR V15, V3, V15 - - VRLW V12, V29, V12 - VRLW V13, V29, V13 - VRLW V14, V29, V14 - VRLW V15, V29, V15 + VPERMXOR V12, V0, V20, V12 + VPERMXOR V13, V1, V20, V13 + VPERMXOR V14, V2, V20, V14 + VPERMXOR V15, V3, V20, V15 VADDUWM V8, V12, V8 VADDUWM V9, V13, V9 @@ -195,15 +199,10 @@ loop_vsx: VADDUWM V2, V7, V2 VADDUWM V3, V4, V3 - VXOR V15, V0, V15 - VXOR V12, V1, V12 - VXOR V13, V2, V13 - VXOR V14, V3, V14 - - VRLW V15, V27, V15 - VRLW V12, V27, V12 - VRLW V13, V27, V13 - VRLW V14, V27, V14 + VPERMXOR V15, V0, V21, V15 + VPERMXOR V12, V1, V21, V12 + VPERMXOR V13, V2, V21, V13 + VPERMXOR V14, V3, V21, V14 VADDUWM V10, V15, V10 VADDUWM V11, V12, V11 @@ -225,15 +224,10 @@ loop_vsx: VADDUWM V2, V7, V2 VADDUWM V3, V4, V3 - VXOR V15, V0, V15 - VXOR V12, V1, V12 - VXOR V13, V2, V13 - VXOR V14, V3, V14 - - VRLW V15, V29, V15 - VRLW V12, V29, V12 - VRLW V13, V29, V13 - VRLW V14, V29, V14 + VPERMXOR V15, V0, V20, V15 + VPERMXOR V12, V1, V20, V12 + VPERMXOR V13, V2, V20, V13 + VPERMXOR V14, V3, V20, V14 VADDUWM V10, V15, V10 VADDUWM V11, V12, V11 @@ -249,48 +243,48 @@ loop_vsx: VRLW V6, V30, V6 VRLW V7, V30, V7 VRLW V4, V30, V4 - BC 16, LT, loop_vsx + BDNZ loop_vsx VADDUWM V12, V26, V12 - WORD $0x13600F8C // VMRGEW V0, V1, V27 - WORD $0x13821F8C // VMRGEW V2, V3, V28 + VMRGEW V0, V1, V27 + VMRGEW V2, V3, V28 - WORD $0x10000E8C // VMRGOW V0, V1, V0 - WORD $0x10421E8C // VMRGOW V2, V3, V2 + VMRGOW V0, V1, V0 + VMRGOW V2, V3, V2 - WORD $0x13A42F8C // VMRGEW V4, V5, V29 - WORD $0x13C63F8C // VMRGEW V6, V7, V30 + VMRGEW V4, V5, V29 + VMRGEW V6, V7, V30 XXPERMDI VS32, VS34, $0, VS33 XXPERMDI VS32, VS34, $3, VS35 XXPERMDI VS59, VS60, $0, VS32 XXPERMDI VS59, VS60, $3, VS34 - WORD $0x10842E8C // VMRGOW V4, V5, V4 - WORD $0x10C63E8C // VMRGOW V6, V7, V6 + VMRGOW V4, V5, V4 + VMRGOW V6, V7, V6 - WORD $0x13684F8C // VMRGEW V8, V9, V27 - WORD $0x138A5F8C // VMRGEW V10, V11, V28 + VMRGEW V8, V9, V27 + VMRGEW V10, V11, V28 XXPERMDI VS36, VS38, $0, VS37 XXPERMDI VS36, VS38, $3, VS39 XXPERMDI VS61, VS62, $0, VS36 XXPERMDI VS61, VS62, $3, VS38 - WORD $0x11084E8C // VMRGOW V8, V9, V8 - WORD $0x114A5E8C // VMRGOW V10, V11, V10 + VMRGOW V8, V9, V8 + VMRGOW V10, V11, V10 - WORD $0x13AC6F8C // VMRGEW V12, V13, V29 - WORD $0x13CE7F8C // VMRGEW V14, V15, V30 + VMRGEW V12, V13, V29 + VMRGEW V14, V15, V30 XXPERMDI VS40, VS42, $0, VS41 XXPERMDI VS40, VS42, $3, VS43 XXPERMDI VS59, VS60, $0, VS40 XXPERMDI VS59, VS60, $3, VS42 - WORD $0x118C6E8C // VMRGOW V12, V13, V12 - WORD $0x11CE7E8C // VMRGOW V14, V15, V14 + VMRGOW V12, V13, V12 + VMRGOW V14, V15, V14 VSPLTISW $4, V27 VADDUWM V26, V27, V26 @@ -431,7 +425,7 @@ tail_vsx: ADD $-1, R11, R12 ADD $-1, INP ADD $-1, OUT - + PCALIGN $16 looptail_vsx: // Copying the result to OUT // in bytes. @@ -439,7 +433,7 @@ looptail_vsx: MOVBZU 1(INP), TMP XOR KEY, TMP, KEY MOVBU KEY, 1(OUT) - BC 16, LT, looptail_vsx + BDNZ looptail_vsx // Clear the stack values STXVW4X VS48, (R11)(R0) diff --git a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305.go b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305.go index 93da7322..8cf5d811 100644 --- a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305.go +++ b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305.go @@ -5,7 +5,7 @@ // Package chacha20poly1305 implements the ChaCha20-Poly1305 AEAD and its // extended nonce variant XChaCha20-Poly1305, as specified in RFC 8439 and // draft-irtf-cfrg-xchacha-01. -package chacha20poly1305 // import "golang.org/x/crypto/chacha20poly1305" +package chacha20poly1305 import ( "crypto/cipher" diff --git a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s index 731d2ac6..fd5ee845 100644 --- a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s +++ b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s @@ -1,2715 +1,9762 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This file was originally from https://golang.org/cl/24717 by Vlad Krasnov of CloudFlare. +// Code generated by command: go run chacha20poly1305_amd64_asm.go -out ../chacha20poly1305_amd64.s -pkg chacha20poly1305. DO NOT EDIT. //go:build gc && !purego #include "textflag.h" -// General register allocation -#define oup DI -#define inp SI -#define inl BX -#define adp CX // free to reuse, after we hash the additional data -#define keyp R8 // free to reuse, when we copy the key to stack -#define itr2 R9 // general iterator -#define itr1 CX // general iterator -#define acc0 R10 -#define acc1 R11 -#define acc2 R12 -#define t0 R13 -#define t1 R14 -#define t2 R15 -#define t3 R8 -// Register and stack allocation for the SSE code -#define rStore (0*16)(BP) -#define sStore (1*16)(BP) -#define state1Store (2*16)(BP) -#define state2Store (3*16)(BP) -#define tmpStore (4*16)(BP) -#define ctr0Store (5*16)(BP) -#define ctr1Store (6*16)(BP) -#define ctr2Store (7*16)(BP) -#define ctr3Store (8*16)(BP) -#define A0 X0 -#define A1 X1 -#define A2 X2 -#define B0 X3 -#define B1 X4 -#define B2 X5 -#define C0 X6 -#define C1 X7 -#define C2 X8 -#define D0 X9 -#define D1 X10 -#define D2 X11 -#define T0 X12 -#define T1 X13 -#define T2 X14 -#define T3 X15 -#define A3 T0 -#define B3 T1 -#define C3 T2 -#define D3 T3 -// Register and stack allocation for the AVX2 code -#define rsStoreAVX2 (0*32)(BP) -#define state1StoreAVX2 (1*32)(BP) -#define state2StoreAVX2 (2*32)(BP) -#define ctr0StoreAVX2 (3*32)(BP) -#define ctr1StoreAVX2 (4*32)(BP) -#define ctr2StoreAVX2 (5*32)(BP) -#define ctr3StoreAVX2 (6*32)(BP) -#define tmpStoreAVX2 (7*32)(BP) // 256 bytes on stack -#define AA0 Y0 -#define AA1 Y5 -#define AA2 Y6 -#define AA3 Y7 -#define BB0 Y14 -#define BB1 Y9 -#define BB2 Y10 -#define BB3 Y11 -#define CC0 Y12 -#define CC1 Y13 -#define CC2 Y8 -#define CC3 Y15 -#define DD0 Y4 -#define DD1 Y1 -#define DD2 Y2 -#define DD3 Y3 -#define TT0 DD3 -#define TT1 AA3 -#define TT2 BB3 -#define TT3 CC3 -// ChaCha20 constants -DATA ·chacha20Constants<>+0x00(SB)/4, $0x61707865 -DATA ·chacha20Constants<>+0x04(SB)/4, $0x3320646e -DATA ·chacha20Constants<>+0x08(SB)/4, $0x79622d32 -DATA ·chacha20Constants<>+0x0c(SB)/4, $0x6b206574 -DATA ·chacha20Constants<>+0x10(SB)/4, $0x61707865 -DATA ·chacha20Constants<>+0x14(SB)/4, $0x3320646e -DATA ·chacha20Constants<>+0x18(SB)/4, $0x79622d32 -DATA ·chacha20Constants<>+0x1c(SB)/4, $0x6b206574 -// <<< 16 with PSHUFB -DATA ·rol16<>+0x00(SB)/8, $0x0504070601000302 -DATA ·rol16<>+0x08(SB)/8, $0x0D0C0F0E09080B0A -DATA ·rol16<>+0x10(SB)/8, $0x0504070601000302 -DATA ·rol16<>+0x18(SB)/8, $0x0D0C0F0E09080B0A -// <<< 8 with PSHUFB -DATA ·rol8<>+0x00(SB)/8, $0x0605040702010003 -DATA ·rol8<>+0x08(SB)/8, $0x0E0D0C0F0A09080B -DATA ·rol8<>+0x10(SB)/8, $0x0605040702010003 -DATA ·rol8<>+0x18(SB)/8, $0x0E0D0C0F0A09080B - -DATA ·avx2InitMask<>+0x00(SB)/8, $0x0 -DATA ·avx2InitMask<>+0x08(SB)/8, $0x0 -DATA ·avx2InitMask<>+0x10(SB)/8, $0x1 -DATA ·avx2InitMask<>+0x18(SB)/8, $0x0 - -DATA ·avx2IncMask<>+0x00(SB)/8, $0x2 -DATA ·avx2IncMask<>+0x08(SB)/8, $0x0 -DATA ·avx2IncMask<>+0x10(SB)/8, $0x2 -DATA ·avx2IncMask<>+0x18(SB)/8, $0x0 -// Poly1305 key clamp -DATA ·polyClampMask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF -DATA ·polyClampMask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC -DATA ·polyClampMask<>+0x10(SB)/8, $0xFFFFFFFFFFFFFFFF -DATA ·polyClampMask<>+0x18(SB)/8, $0xFFFFFFFFFFFFFFFF - -DATA ·sseIncMask<>+0x00(SB)/8, $0x1 -DATA ·sseIncMask<>+0x08(SB)/8, $0x0 -// To load/store the last < 16 bytes in a buffer -DATA ·andMask<>+0x00(SB)/8, $0x00000000000000ff -DATA ·andMask<>+0x08(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x10(SB)/8, $0x000000000000ffff -DATA ·andMask<>+0x18(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x20(SB)/8, $0x0000000000ffffff -DATA ·andMask<>+0x28(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x30(SB)/8, $0x00000000ffffffff -DATA ·andMask<>+0x38(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x40(SB)/8, $0x000000ffffffffff -DATA ·andMask<>+0x48(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x50(SB)/8, $0x0000ffffffffffff -DATA ·andMask<>+0x58(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x60(SB)/8, $0x00ffffffffffffff -DATA ·andMask<>+0x68(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x70(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0x78(SB)/8, $0x0000000000000000 -DATA ·andMask<>+0x80(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0x88(SB)/8, $0x00000000000000ff -DATA ·andMask<>+0x90(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0x98(SB)/8, $0x000000000000ffff -DATA ·andMask<>+0xa0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xa8(SB)/8, $0x0000000000ffffff -DATA ·andMask<>+0xb0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xb8(SB)/8, $0x00000000ffffffff -DATA ·andMask<>+0xc0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xc8(SB)/8, $0x000000ffffffffff -DATA ·andMask<>+0xd0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xd8(SB)/8, $0x0000ffffffffffff -DATA ·andMask<>+0xe0(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+0xe8(SB)/8, $0x00ffffffffffffff - -GLOBL ·chacha20Constants<>(SB), (NOPTR+RODATA), $32 -GLOBL ·rol16<>(SB), (NOPTR+RODATA), $32 -GLOBL ·rol8<>(SB), (NOPTR+RODATA), $32 -GLOBL ·sseIncMask<>(SB), (NOPTR+RODATA), $16 -GLOBL ·avx2IncMask<>(SB), (NOPTR+RODATA), $32 -GLOBL ·avx2InitMask<>(SB), (NOPTR+RODATA), $32 -GLOBL ·polyClampMask<>(SB), (NOPTR+RODATA), $32 -GLOBL ·andMask<>(SB), (NOPTR+RODATA), $240 -// No PALIGNR in Go ASM yet (but VPALIGNR is present). -#define shiftB0Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x04 // PALIGNR $4, X3, X3 -#define shiftB1Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xe4; BYTE $0x04 // PALIGNR $4, X4, X4 -#define shiftB2Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xed; BYTE $0x04 // PALIGNR $4, X5, X5 -#define shiftB3Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xed; BYTE $0x04 // PALIGNR $4, X13, X13 -#define shiftC0Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xf6; BYTE $0x08 // PALIGNR $8, X6, X6 -#define shiftC1Left BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xff; BYTE $0x08 // PALIGNR $8, X7, X7 -#define shiftC2Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xc0; BYTE $0x08 // PALIGNR $8, X8, X8 -#define shiftC3Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xf6; BYTE $0x08 // PALIGNR $8, X14, X14 -#define shiftD0Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xc9; BYTE $0x0c // PALIGNR $12, X9, X9 -#define shiftD1Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xd2; BYTE $0x0c // PALIGNR $12, X10, X10 -#define shiftD2Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x0c // PALIGNR $12, X11, X11 -#define shiftD3Left BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xff; BYTE $0x0c // PALIGNR $12, X15, X15 -#define shiftB0Right BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x0c // PALIGNR $12, X3, X3 -#define shiftB1Right BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xe4; BYTE $0x0c // PALIGNR $12, X4, X4 -#define shiftB2Right BYTE $0x66; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xed; BYTE $0x0c // PALIGNR $12, X5, X5 -#define shiftB3Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xed; BYTE $0x0c // PALIGNR $12, X13, X13 -#define shiftC0Right shiftC0Left -#define shiftC1Right shiftC1Left -#define shiftC2Right shiftC2Left -#define shiftC3Right shiftC3Left -#define shiftD0Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xc9; BYTE $0x04 // PALIGNR $4, X9, X9 -#define shiftD1Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xd2; BYTE $0x04 // PALIGNR $4, X10, X10 -#define shiftD2Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xdb; BYTE $0x04 // PALIGNR $4, X11, X11 -#define shiftD3Right BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0x3a; BYTE $0x0f; BYTE $0xff; BYTE $0x04 // PALIGNR $4, X15, X15 - -// Some macros - -// ROL rotates the uint32s in register R left by N bits, using temporary T. -#define ROL(N, R, T) \ - MOVO R, T; PSLLL $(N), T; PSRLL $(32-(N)), R; PXOR T, R - -// ROL16 rotates the uint32s in register R left by 16, using temporary T if needed. -#ifdef GOAMD64_v2 -#define ROL16(R, T) PSHUFB ·rol16<>(SB), R -#else -#define ROL16(R, T) ROL(16, R, T) -#endif - -// ROL8 rotates the uint32s in register R left by 8, using temporary T if needed. -#ifdef GOAMD64_v2 -#define ROL8(R, T) PSHUFB ·rol8<>(SB), R -#else -#define ROL8(R, T) ROL(8, R, T) -#endif - -#define chachaQR(A, B, C, D, T) \ - PADDD B, A; PXOR A, D; ROL16(D, T) \ - PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $12, T; PSRLL $20, B; PXOR T, B \ - PADDD B, A; PXOR A, D; ROL8(D, T) \ - PADDD D, C; PXOR C, B; MOVO B, T; PSLLL $7, T; PSRLL $25, B; PXOR T, B - -#define chachaQR_AVX2(A, B, C, D, T) \ - VPADDD B, A, A; VPXOR A, D, D; VPSHUFB ·rol16<>(SB), D, D \ - VPADDD D, C, C; VPXOR C, B, B; VPSLLD $12, B, T; VPSRLD $20, B, B; VPXOR T, B, B \ - VPADDD B, A, A; VPXOR A, D, D; VPSHUFB ·rol8<>(SB), D, D \ - VPADDD D, C, C; VPXOR C, B, B; VPSLLD $7, B, T; VPSRLD $25, B, B; VPXOR T, B, B - -#define polyAdd(S) ADDQ S, acc0; ADCQ 8+S, acc1; ADCQ $1, acc2 -#define polyMulStage1 MOVQ (0*8)(BP), AX; MOVQ AX, t2; MULQ acc0; MOVQ AX, t0; MOVQ DX, t1; MOVQ (0*8)(BP), AX; MULQ acc1; IMULQ acc2, t2; ADDQ AX, t1; ADCQ DX, t2 -#define polyMulStage2 MOVQ (1*8)(BP), AX; MOVQ AX, t3; MULQ acc0; ADDQ AX, t1; ADCQ $0, DX; MOVQ DX, acc0; MOVQ (1*8)(BP), AX; MULQ acc1; ADDQ AX, t2; ADCQ $0, DX -#define polyMulStage3 IMULQ acc2, t3; ADDQ acc0, t2; ADCQ DX, t3 -#define polyMulReduceStage MOVQ t0, acc0; MOVQ t1, acc1; MOVQ t2, acc2; ANDQ $3, acc2; MOVQ t2, t0; ANDQ $-4, t0; MOVQ t3, t1; SHRQ $2, t3, t2; SHRQ $2, t3; ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $0, acc2; ADDQ t2, acc0; ADCQ t3, acc1; ADCQ $0, acc2 - -#define polyMulStage1_AVX2 MOVQ (0*8)(BP), DX; MOVQ DX, t2; MULXQ acc0, t0, t1; IMULQ acc2, t2; MULXQ acc1, AX, DX; ADDQ AX, t1; ADCQ DX, t2 -#define polyMulStage2_AVX2 MOVQ (1*8)(BP), DX; MULXQ acc0, acc0, AX; ADDQ acc0, t1; MULXQ acc1, acc1, t3; ADCQ acc1, t2; ADCQ $0, t3 -#define polyMulStage3_AVX2 IMULQ acc2, DX; ADDQ AX, t2; ADCQ DX, t3 - -#define polyMul polyMulStage1; polyMulStage2; polyMulStage3; polyMulReduceStage -#define polyMulAVX2 polyMulStage1_AVX2; polyMulStage2_AVX2; polyMulStage3_AVX2; polyMulReduceStage -// ---------------------------------------------------------------------------- + +// func polyHashADInternal<>() TEXT polyHashADInternal<>(SB), NOSPLIT, $0 - // adp points to beginning of additional data - // itr2 holds ad length - XORQ acc0, acc0 - XORQ acc1, acc1 - XORQ acc2, acc2 - CMPQ itr2, $13 - JNE hashADLoop - -openFastTLSAD: - // Special treatment for the TLS case of 13 bytes - MOVQ (adp), acc0 - MOVQ 5(adp), acc1 - SHRQ $24, acc1 - MOVQ $1, acc2 - polyMul + // Hack: Must declare #define macros inside of a function due to Avo constraints + // ROL rotates the uint32s in register R left by N bits, using temporary T. + #define ROL(N, R, T) \ + MOVO R, T; \ + PSLLL $(N), T; \ + PSRLL $(32-(N)), R; \ + PXOR T, R + + // ROL8 rotates the uint32s in register R left by 8, using temporary T if needed. + #ifdef GOAMD64_v2 + #define ROL8(R, T) PSHUFB ·rol8<>(SB), R + #else + #define ROL8(R, T) ROL(8, R, T) + #endif + + // ROL16 rotates the uint32s in register R left by 16, using temporary T if needed. + #ifdef GOAMD64_v2 + #define ROL16(R, T) PSHUFB ·rol16<>(SB), R + #else + #define ROL16(R, T) ROL(16, R, T) + #endif + XORQ R10, R10 + XORQ R11, R11 + XORQ R12, R12 + CMPQ R9, $0x0d + JNE hashADLoop + MOVQ (CX), R10 + MOVQ 5(CX), R11 + SHRQ $0x18, R11 + MOVQ $0x00000001, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 RET hashADLoop: // Hash in 16 byte chunks - CMPQ itr2, $16 - JB hashADTail - polyAdd(0(adp)) - LEAQ (1*16)(adp), adp - SUBQ $16, itr2 - polyMul - JMP hashADLoop + CMPQ R9, $0x10 + JB hashADTail + ADDQ (CX), R10 + ADCQ 8(CX), R11 + ADCQ $0x01, R12 + LEAQ 16(CX), CX + SUBQ $0x10, R9 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + JMP hashADLoop hashADTail: - CMPQ itr2, $0 + CMPQ R9, $0x00 JE hashADDone // Hash last < 16 byte tail - XORQ t0, t0 - XORQ t1, t1 - XORQ t2, t2 - ADDQ itr2, adp + XORQ R13, R13 + XORQ R14, R14 + XORQ R15, R15 + ADDQ R9, CX hashADTailLoop: - SHLQ $8, t0, t1 - SHLQ $8, t0 - MOVB -1(adp), t2 - XORQ t2, t0 - DECQ adp - DECQ itr2 - JNE hashADTailLoop - -hashADTailFinish: - ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $1, acc2 - polyMul - - // Finished AD + SHLQ $0x08, R13, R14 + SHLQ $0x08, R13 + MOVB -1(CX), R15 + XORQ R15, R13 + DECQ CX + DECQ R9 + JNE hashADTailLoop + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + hashADDone: RET -// ---------------------------------------------------------------------------- -// func chacha20Poly1305Open(dst, key, src, ad []byte) bool -TEXT ·chacha20Poly1305Open(SB), 0, $288-97 +// func chacha20Poly1305Open(dst []byte, key []uint32, src []byte, ad []byte) bool +// Requires: AVX, AVX2, BMI2, CMOV, SSE2 +TEXT ·chacha20Poly1305Open(SB), $288-97 // For aligned stack access MOVQ SP, BP - ADDQ $32, BP + ADDQ $0x20, BP ANDQ $-32, BP - MOVQ dst+0(FP), oup - MOVQ key+24(FP), keyp - MOVQ src+48(FP), inp - MOVQ src_len+56(FP), inl - MOVQ ad+72(FP), adp + MOVQ dst_base+0(FP), DI + MOVQ key_base+24(FP), R8 + MOVQ src_base+48(FP), SI + MOVQ src_len+56(FP), BX + MOVQ ad_base+72(FP), CX // Check for AVX2 support - CMPB ·useAVX2(SB), $1 + CMPB ·useAVX2+0(SB), $0x01 JE chacha20Poly1305Open_AVX2 // Special optimization, for very short buffers - CMPQ inl, $128 - JBE openSSE128 // About 16% faster + CMPQ BX, $0x80 + JBE openSSE128 // For long buffers, prepare the poly key first - MOVOU ·chacha20Constants<>(SB), A0 - MOVOU (1*16)(keyp), B0 - MOVOU (2*16)(keyp), C0 - MOVOU (3*16)(keyp), D0 - MOVO D0, T1 + MOVOU ·chacha20Constants<>+0(SB), X0 + MOVOU 16(R8), X3 + MOVOU 32(R8), X6 + MOVOU 48(R8), X9 + MOVO X9, X13 // Store state on stack for future use - MOVO B0, state1Store - MOVO C0, state2Store - MOVO D0, ctr3Store - MOVQ $10, itr2 + MOVO X3, 32(BP) + MOVO X6, 48(BP) + MOVO X9, 128(BP) + MOVQ $0x0000000a, R9 openSSEPreparePolyKey: - chachaQR(A0, B0, C0, D0, T0) - shiftB0Left; shiftC0Left; shiftD0Left - chachaQR(A0, B0, C0, D0, T0) - shiftB0Right; shiftC0Right; shiftD0Right - DECQ itr2 - JNE openSSEPreparePolyKey + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + DECQ R9 + JNE openSSEPreparePolyKey // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>(SB), A0; PADDL state1Store, B0 + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL 32(BP), X3 // Clamp and store the key - PAND ·polyClampMask<>(SB), A0 - MOVO A0, rStore; MOVO B0, sStore + PAND ·polyClampMask<>+0(SB), X0 + MOVO X0, (BP) + MOVO X3, 16(BP) // Hash AAD - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) openSSEMainLoop: - CMPQ inl, $256 + CMPQ BX, $0x00000100 JB openSSEMainLoopDone // Load state, increment counter blocks - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO A2, A3; MOVO B2, B3; MOVO C2, C3; MOVO D2, D3; PADDL ·sseIncMask<>(SB), D3 + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X2, X12 + MOVO X5, X13 + MOVO X8, X14 + MOVO X11, X15 + PADDL ·sseIncMask<>+0(SB), X15 // Store counters - MOVO D0, ctr0Store; MOVO D1, ctr1Store; MOVO D2, ctr2Store; MOVO D3, ctr3Store + MOVO X9, 80(BP) + MOVO X10, 96(BP) + MOVO X11, 112(BP) + MOVO X15, 128(BP) - // There are 10 ChaCha20 iterations of 2QR each, so for 6 iterations we hash 2 blocks, and for the remaining 4 only 1 block - for a total of 16 - MOVQ $4, itr1 - MOVQ inp, itr2 + // There are 10 ChaCha20 iterations of 2QR each, so for 6 iterations we hash + // 2 blocks, and for the remaining 4 only 1 block - for a total of 16 + MOVQ $0x00000004, CX + MOVQ SI, R9 openSSEInternalLoop: - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyAdd(0(itr2)) - shiftB0Left; shiftB1Left; shiftB2Left; shiftB3Left - shiftC0Left; shiftC1Left; shiftC2Left; shiftC3Left - shiftD0Left; shiftD1Left; shiftD2Left; shiftD3Left - polyMulStage1 - polyMulStage2 - LEAQ (2*8)(itr2), itr2 - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - polyMulStage3 - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyMulReduceStage - shiftB0Right; shiftB1Right; shiftB2Right; shiftB3Right - shiftC0Right; shiftC1Right; shiftC2Right; shiftC3Right - shiftD0Right; shiftD1Right; shiftD2Right; shiftD3Right - DECQ itr1 - JGE openSSEInternalLoop - - polyAdd(0(itr2)) - polyMul - LEAQ (2*8)(itr2), itr2 - - CMPQ itr1, $-6 - JG openSSEInternalLoop + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x0c + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + LEAQ 16(R9), R9 + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x04 + DECQ CX + JGE openSSEInternalLoop + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + CMPQ CX, $-6 + JG openSSEInternalLoop // Add in the state - PADDD ·chacha20Constants<>(SB), A0; PADDD ·chacha20Constants<>(SB), A1; PADDD ·chacha20Constants<>(SB), A2; PADDD ·chacha20Constants<>(SB), A3 - PADDD state1Store, B0; PADDD state1Store, B1; PADDD state1Store, B2; PADDD state1Store, B3 - PADDD state2Store, C0; PADDD state2Store, C1; PADDD state2Store, C2; PADDD state2Store, C3 - PADDD ctr0Store, D0; PADDD ctr1Store, D1; PADDD ctr2Store, D2; PADDD ctr3Store, D3 + PADDD ·chacha20Constants<>+0(SB), X0 + PADDD ·chacha20Constants<>+0(SB), X1 + PADDD ·chacha20Constants<>+0(SB), X2 + PADDD ·chacha20Constants<>+0(SB), X12 + PADDD 32(BP), X3 + PADDD 32(BP), X4 + PADDD 32(BP), X5 + PADDD 32(BP), X13 + PADDD 48(BP), X6 + PADDD 48(BP), X7 + PADDD 48(BP), X8 + PADDD 48(BP), X14 + PADDD 80(BP), X9 + PADDD 96(BP), X10 + PADDD 112(BP), X11 + PADDD 128(BP), X15 // Load - xor - store - MOVO D3, tmpStore - MOVOU (0*16)(inp), D3; PXOR D3, A0; MOVOU A0, (0*16)(oup) - MOVOU (1*16)(inp), D3; PXOR D3, B0; MOVOU B0, (1*16)(oup) - MOVOU (2*16)(inp), D3; PXOR D3, C0; MOVOU C0, (2*16)(oup) - MOVOU (3*16)(inp), D3; PXOR D3, D0; MOVOU D0, (3*16)(oup) - MOVOU (4*16)(inp), D0; PXOR D0, A1; MOVOU A1, (4*16)(oup) - MOVOU (5*16)(inp), D0; PXOR D0, B1; MOVOU B1, (5*16)(oup) - MOVOU (6*16)(inp), D0; PXOR D0, C1; MOVOU C1, (6*16)(oup) - MOVOU (7*16)(inp), D0; PXOR D0, D1; MOVOU D1, (7*16)(oup) - MOVOU (8*16)(inp), D0; PXOR D0, A2; MOVOU A2, (8*16)(oup) - MOVOU (9*16)(inp), D0; PXOR D0, B2; MOVOU B2, (9*16)(oup) - MOVOU (10*16)(inp), D0; PXOR D0, C2; MOVOU C2, (10*16)(oup) - MOVOU (11*16)(inp), D0; PXOR D0, D2; MOVOU D2, (11*16)(oup) - MOVOU (12*16)(inp), D0; PXOR D0, A3; MOVOU A3, (12*16)(oup) - MOVOU (13*16)(inp), D0; PXOR D0, B3; MOVOU B3, (13*16)(oup) - MOVOU (14*16)(inp), D0; PXOR D0, C3; MOVOU C3, (14*16)(oup) - MOVOU (15*16)(inp), D0; PXOR tmpStore, D0; MOVOU D0, (15*16)(oup) - LEAQ 256(inp), inp - LEAQ 256(oup), oup - SUBQ $256, inl + MOVO X15, 64(BP) + MOVOU (SI), X15 + PXOR X15, X0 + MOVOU X0, (DI) + MOVOU 16(SI), X15 + PXOR X15, X3 + MOVOU X3, 16(DI) + MOVOU 32(SI), X15 + PXOR X15, X6 + MOVOU X6, 32(DI) + MOVOU 48(SI), X15 + PXOR X15, X9 + MOVOU X9, 48(DI) + MOVOU 64(SI), X9 + PXOR X9, X1 + MOVOU X1, 64(DI) + MOVOU 80(SI), X9 + PXOR X9, X4 + MOVOU X4, 80(DI) + MOVOU 96(SI), X9 + PXOR X9, X7 + MOVOU X7, 96(DI) + MOVOU 112(SI), X9 + PXOR X9, X10 + MOVOU X10, 112(DI) + MOVOU 128(SI), X9 + PXOR X9, X2 + MOVOU X2, 128(DI) + MOVOU 144(SI), X9 + PXOR X9, X5 + MOVOU X5, 144(DI) + MOVOU 160(SI), X9 + PXOR X9, X8 + MOVOU X8, 160(DI) + MOVOU 176(SI), X9 + PXOR X9, X11 + MOVOU X11, 176(DI) + MOVOU 192(SI), X9 + PXOR X9, X12 + MOVOU X12, 192(DI) + MOVOU 208(SI), X9 + PXOR X9, X13 + MOVOU X13, 208(DI) + MOVOU 224(SI), X9 + PXOR X9, X14 + MOVOU X14, 224(DI) + MOVOU 240(SI), X9 + PXOR 64(BP), X9 + MOVOU X9, 240(DI) + LEAQ 256(SI), SI + LEAQ 256(DI), DI + SUBQ $0x00000100, BX JMP openSSEMainLoop openSSEMainLoopDone: // Handle the various tail sizes efficiently - TESTQ inl, inl + TESTQ BX, BX JE openSSEFinalize - CMPQ inl, $64 + CMPQ BX, $0x40 JBE openSSETail64 - CMPQ inl, $128 + CMPQ BX, $0x80 JBE openSSETail128 - CMPQ inl, $192 + CMPQ BX, $0xc0 JBE openSSETail192 JMP openSSETail256 openSSEFinalize: // Hash in the PT, AAD lengths - ADDQ ad_len+80(FP), acc0; ADCQ src_len+56(FP), acc1; ADCQ $1, acc2 - polyMul + ADDQ ad_len+80(FP), R10 + ADCQ src_len+56(FP), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Final reduce - MOVQ acc0, t0 - MOVQ acc1, t1 - MOVQ acc2, t2 - SUBQ $-5, acc0 - SBBQ $-1, acc1 - SBBQ $3, acc2 - CMOVQCS t0, acc0 - CMOVQCS t1, acc1 - CMOVQCS t2, acc2 + MOVQ R10, R13 + MOVQ R11, R14 + MOVQ R12, R15 + SUBQ $-5, R10 + SBBQ $-1, R11 + SBBQ $0x03, R12 + CMOVQCS R13, R10 + CMOVQCS R14, R11 + CMOVQCS R15, R12 // Add in the "s" part of the key - ADDQ 0+sStore, acc0 - ADCQ 8+sStore, acc1 + ADDQ 16(BP), R10 + ADCQ 24(BP), R11 // Finally, constant time compare to the tag at the end of the message XORQ AX, AX - MOVQ $1, DX - XORQ (0*8)(inp), acc0 - XORQ (1*8)(inp), acc1 - ORQ acc1, acc0 + MOVQ $0x00000001, DX + XORQ (SI), R10 + XORQ 8(SI), R11 + ORQ R11, R10 CMOVQEQ DX, AX // Return true iff tags are equal MOVB AX, ret+96(FP) RET -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 129 bytes openSSE128: - // For up to 128 bytes of ciphertext and 64 bytes for the poly key, we require to process three blocks - MOVOU ·chacha20Constants<>(SB), A0; MOVOU (1*16)(keyp), B0; MOVOU (2*16)(keyp), C0; MOVOU (3*16)(keyp), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO B0, T1; MOVO C0, T2; MOVO D1, T3 - MOVQ $10, itr2 + MOVOU ·chacha20Constants<>+0(SB), X0 + MOVOU 16(R8), X3 + MOVOU 32(R8), X6 + MOVOU 48(R8), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X3, X13 + MOVO X6, X14 + MOVO X10, X15 + MOVQ $0x0000000a, R9 openSSE128InnerCipherLoop: - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Left; shiftB1Left; shiftB2Left - shiftC0Left; shiftC1Left; shiftC2Left - shiftD0Left; shiftD1Left; shiftD2Left - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Right; shiftB1Right; shiftB2Right - shiftC0Right; shiftC1Right; shiftC2Right - shiftD0Right; shiftD1Right; shiftD2Right - DECQ itr2 - JNE openSSE128InnerCipherLoop + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + DECQ R9 + JNE openSSE128InnerCipherLoop // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1; PADDL ·chacha20Constants<>(SB), A2 - PADDL T1, B0; PADDL T1, B1; PADDL T1, B2 - PADDL T2, C1; PADDL T2, C2 - PADDL T3, D1; PADDL ·sseIncMask<>(SB), T3; PADDL T3, D2 + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL ·chacha20Constants<>+0(SB), X2 + PADDL X13, X3 + PADDL X13, X4 + PADDL X13, X5 + PADDL X14, X7 + PADDL X14, X8 + PADDL X15, X10 + PADDL ·sseIncMask<>+0(SB), X15 + PADDL X15, X11 // Clamp and store the key - PAND ·polyClampMask<>(SB), A0 - MOVOU A0, rStore; MOVOU B0, sStore + PAND ·polyClampMask<>+0(SB), X0 + MOVOU X0, (BP) + MOVOU X3, 16(BP) // Hash - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) openSSE128Open: - CMPQ inl, $16 + CMPQ BX, $0x10 JB openSSETail16 - SUBQ $16, inl + SUBQ $0x10, BX // Load for hashing - polyAdd(0(inp)) + ADDQ (SI), R10 + ADCQ 8(SI), R11 + ADCQ $0x01, R12 // Load for decryption - MOVOU (inp), T0; PXOR T0, A1; MOVOU A1, (oup) - LEAQ (1*16)(inp), inp - LEAQ (1*16)(oup), oup - polyMul + MOVOU (SI), X12 + PXOR X12, X1 + MOVOU X1, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Shift the stream "left" - MOVO B1, A1 - MOVO C1, B1 - MOVO D1, C1 - MOVO A2, D1 - MOVO B2, A2 - MOVO C2, B2 - MOVO D2, C2 + MOVO X4, X1 + MOVO X7, X4 + MOVO X10, X7 + MOVO X2, X10 + MOVO X5, X2 + MOVO X8, X5 + MOVO X11, X8 JMP openSSE128Open openSSETail16: - TESTQ inl, inl + TESTQ BX, BX JE openSSEFinalize // We can safely load the CT from the end, because it is padded with the MAC - MOVQ inl, itr2 - SHLQ $4, itr2 - LEAQ ·andMask<>(SB), t0 - MOVOU (inp), T0 - ADDQ inl, inp - PAND -16(t0)(itr2*1), T0 - MOVO T0, 0+tmpStore - MOVQ T0, t0 - MOVQ 8+tmpStore, t1 - PXOR A1, T0 + MOVQ BX, R9 + SHLQ $0x04, R9 + LEAQ ·andMask<>+0(SB), R13 + MOVOU (SI), X12 + ADDQ BX, SI + PAND -16(R13)(R9*1), X12 + MOVO X12, 64(BP) + MOVQ X12, R13 + MOVQ 72(BP), R14 + PXOR X1, X12 // We can only store one byte at a time, since plaintext can be shorter than 16 bytes openSSETail16Store: - MOVQ T0, t3 - MOVB t3, (oup) - PSRLDQ $1, T0 - INCQ oup - DECQ inl + MOVQ X12, R8 + MOVB R8, (DI) + PSRLDQ $0x01, X12 + INCQ DI + DECQ BX JNE openSSETail16Store - ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $1, acc2 - polyMul + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 JMP openSSEFinalize -// ---------------------------------------------------------------------------- -// Special optimization for the last 64 bytes of ciphertext openSSETail64: - // Need to decrypt up to 64 bytes - prepare single block - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr0Store - XORQ itr2, itr2 - MOVQ inl, itr1 - CMPQ itr1, $16 - JB openSSETail64LoopB + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 80(BP) + XORQ R9, R9 + MOVQ BX, CX + CMPQ CX, $0x10 + JB openSSETail64LoopB openSSETail64LoopA: - // Perform ChaCha rounds, while hashing the remaining input - polyAdd(0(inp)(itr2*1)) - polyMul - SUBQ $16, itr1 + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + SUBQ $0x10, CX openSSETail64LoopB: - ADDQ $16, itr2 - chachaQR(A0, B0, C0, D0, T0) - shiftB0Left; shiftC0Left; shiftD0Left - chachaQR(A0, B0, C0, D0, T0) - shiftB0Right; shiftC0Right; shiftD0Right - - CMPQ itr1, $16 - JAE openSSETail64LoopA - - CMPQ itr2, $160 - JNE openSSETail64LoopB - - PADDL ·chacha20Constants<>(SB), A0; PADDL state1Store, B0; PADDL state2Store, C0; PADDL ctr0Store, D0 + ADDQ $0x10, R9 + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + CMPQ CX, $0x10 + JAE openSSETail64LoopA + CMPQ R9, $0xa0 + JNE openSSETail64LoopB + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL 32(BP), X3 + PADDL 48(BP), X6 + PADDL 80(BP), X9 openSSETail64DecLoop: - CMPQ inl, $16 + CMPQ BX, $0x10 JB openSSETail64DecLoopDone - SUBQ $16, inl - MOVOU (inp), T0 - PXOR T0, A0 - MOVOU A0, (oup) - LEAQ 16(inp), inp - LEAQ 16(oup), oup - MOVO B0, A0 - MOVO C0, B0 - MOVO D0, C0 + SUBQ $0x10, BX + MOVOU (SI), X12 + PXOR X12, X0 + MOVOU X0, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + MOVO X3, X0 + MOVO X6, X3 + MOVO X9, X6 JMP openSSETail64DecLoop openSSETail64DecLoopDone: - MOVO A0, A1 + MOVO X0, X1 JMP openSSETail16 -// ---------------------------------------------------------------------------- -// Special optimization for the last 128 bytes of ciphertext openSSETail128: - // Need to decrypt up to 128 bytes - prepare two blocks - MOVO ·chacha20Constants<>(SB), A1; MOVO state1Store, B1; MOVO state2Store, C1; MOVO ctr3Store, D1; PADDL ·sseIncMask<>(SB), D1; MOVO D1, ctr0Store - MOVO A1, A0; MOVO B1, B0; MOVO C1, C0; MOVO D1, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr1Store - XORQ itr2, itr2 - MOVQ inl, itr1 - ANDQ $-16, itr1 + MOVO ·chacha20Constants<>+0(SB), X1 + MOVO 32(BP), X4 + MOVO 48(BP), X7 + MOVO 128(BP), X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 80(BP) + MOVO X1, X0 + MOVO X4, X3 + MOVO X7, X6 + MOVO X10, X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 96(BP) + XORQ R9, R9 + MOVQ BX, CX + ANDQ $-16, CX openSSETail128LoopA: - // Perform ChaCha rounds, while hashing the remaining input - polyAdd(0(inp)(itr2*1)) - polyMul + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 openSSETail128LoopB: - ADDQ $16, itr2 - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0) - shiftB0Left; shiftC0Left; shiftD0Left - shiftB1Left; shiftC1Left; shiftD1Left - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0) - shiftB0Right; shiftC0Right; shiftD0Right - shiftB1Right; shiftC1Right; shiftD1Right - - CMPQ itr2, itr1 - JB openSSETail128LoopA - - CMPQ itr2, $160 - JNE openSSETail128LoopB - - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1 - PADDL state1Store, B0; PADDL state1Store, B1 - PADDL state2Store, C0; PADDL state2Store, C1 - PADDL ctr1Store, D0; PADDL ctr0Store, D1 - - MOVOU (0*16)(inp), T0; MOVOU (1*16)(inp), T1; MOVOU (2*16)(inp), T2; MOVOU (3*16)(inp), T3 - PXOR T0, A1; PXOR T1, B1; PXOR T2, C1; PXOR T3, D1 - MOVOU A1, (0*16)(oup); MOVOU B1, (1*16)(oup); MOVOU C1, (2*16)(oup); MOVOU D1, (3*16)(oup) - - SUBQ $64, inl - LEAQ 64(inp), inp - LEAQ 64(oup), oup - JMP openSSETail64DecLoop - -// ---------------------------------------------------------------------------- -// Special optimization for the last 192 bytes of ciphertext + ADDQ $0x10, R9 + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + CMPQ R9, CX + JB openSSETail128LoopA + CMPQ R9, $0xa0 + JNE openSSETail128LoopB + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL 32(BP), X3 + PADDL 32(BP), X4 + PADDL 48(BP), X6 + PADDL 48(BP), X7 + PADDL 96(BP), X9 + PADDL 80(BP), X10 + MOVOU (SI), X12 + MOVOU 16(SI), X13 + MOVOU 32(SI), X14 + MOVOU 48(SI), X15 + PXOR X12, X1 + PXOR X13, X4 + PXOR X14, X7 + PXOR X15, X10 + MOVOU X1, (DI) + MOVOU X4, 16(DI) + MOVOU X7, 32(DI) + MOVOU X10, 48(DI) + SUBQ $0x40, BX + LEAQ 64(SI), SI + LEAQ 64(DI), DI + JMP openSSETail64DecLoop + openSSETail192: - // Need to decrypt up to 192 bytes - prepare three blocks - MOVO ·chacha20Constants<>(SB), A2; MOVO state1Store, B2; MOVO state2Store, C2; MOVO ctr3Store, D2; PADDL ·sseIncMask<>(SB), D2; MOVO D2, ctr0Store - MOVO A2, A1; MOVO B2, B1; MOVO C2, C1; MOVO D2, D1; PADDL ·sseIncMask<>(SB), D1; MOVO D1, ctr1Store - MOVO A1, A0; MOVO B1, B0; MOVO C1, C0; MOVO D1, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr2Store - - MOVQ inl, itr1 - MOVQ $160, itr2 - CMPQ itr1, $160 - CMOVQGT itr2, itr1 - ANDQ $-16, itr1 - XORQ itr2, itr2 + MOVO ·chacha20Constants<>+0(SB), X2 + MOVO 32(BP), X5 + MOVO 48(BP), X8 + MOVO 128(BP), X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X11, 80(BP) + MOVO X2, X1 + MOVO X5, X4 + MOVO X8, X7 + MOVO X11, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 96(BP) + MOVO X1, X0 + MOVO X4, X3 + MOVO X7, X6 + MOVO X10, X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 112(BP) + MOVQ BX, CX + MOVQ $0x000000a0, R9 + CMPQ CX, $0xa0 + CMOVQGT R9, CX + ANDQ $-16, CX + XORQ R9, R9 openSSLTail192LoopA: - // Perform ChaCha rounds, while hashing the remaining input - polyAdd(0(inp)(itr2*1)) - polyMul + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 openSSLTail192LoopB: - ADDQ $16, itr2 - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Left; shiftC0Left; shiftD0Left - shiftB1Left; shiftC1Left; shiftD1Left - shiftB2Left; shiftC2Left; shiftD2Left - - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Right; shiftC0Right; shiftD0Right - shiftB1Right; shiftC1Right; shiftD1Right - shiftB2Right; shiftC2Right; shiftD2Right - - CMPQ itr2, itr1 - JB openSSLTail192LoopA - - CMPQ itr2, $160 - JNE openSSLTail192LoopB - - CMPQ inl, $176 - JB openSSLTail192Store - - polyAdd(160(inp)) - polyMul - - CMPQ inl, $192 - JB openSSLTail192Store - - polyAdd(176(inp)) - polyMul + ADDQ $0x10, R9 + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + CMPQ R9, CX + JB openSSLTail192LoopA + CMPQ R9, $0xa0 + JNE openSSLTail192LoopB + CMPQ BX, $0xb0 + JB openSSLTail192Store + ADDQ 160(SI), R10 + ADCQ 168(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + CMPQ BX, $0xc0 + JB openSSLTail192Store + ADDQ 176(SI), R10 + ADCQ 184(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 openSSLTail192Store: - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1; PADDL ·chacha20Constants<>(SB), A2 - PADDL state1Store, B0; PADDL state1Store, B1; PADDL state1Store, B2 - PADDL state2Store, C0; PADDL state2Store, C1; PADDL state2Store, C2 - PADDL ctr2Store, D0; PADDL ctr1Store, D1; PADDL ctr0Store, D2 - - MOVOU (0*16)(inp), T0; MOVOU (1*16)(inp), T1; MOVOU (2*16)(inp), T2; MOVOU (3*16)(inp), T3 - PXOR T0, A2; PXOR T1, B2; PXOR T2, C2; PXOR T3, D2 - MOVOU A2, (0*16)(oup); MOVOU B2, (1*16)(oup); MOVOU C2, (2*16)(oup); MOVOU D2, (3*16)(oup) - - MOVOU (4*16)(inp), T0; MOVOU (5*16)(inp), T1; MOVOU (6*16)(inp), T2; MOVOU (7*16)(inp), T3 - PXOR T0, A1; PXOR T1, B1; PXOR T2, C1; PXOR T3, D1 - MOVOU A1, (4*16)(oup); MOVOU B1, (5*16)(oup); MOVOU C1, (6*16)(oup); MOVOU D1, (7*16)(oup) - - SUBQ $128, inl - LEAQ 128(inp), inp - LEAQ 128(oup), oup - JMP openSSETail64DecLoop - -// ---------------------------------------------------------------------------- -// Special optimization for the last 256 bytes of ciphertext + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL ·chacha20Constants<>+0(SB), X2 + PADDL 32(BP), X3 + PADDL 32(BP), X4 + PADDL 32(BP), X5 + PADDL 48(BP), X6 + PADDL 48(BP), X7 + PADDL 48(BP), X8 + PADDL 112(BP), X9 + PADDL 96(BP), X10 + PADDL 80(BP), X11 + MOVOU (SI), X12 + MOVOU 16(SI), X13 + MOVOU 32(SI), X14 + MOVOU 48(SI), X15 + PXOR X12, X2 + PXOR X13, X5 + PXOR X14, X8 + PXOR X15, X11 + MOVOU X2, (DI) + MOVOU X5, 16(DI) + MOVOU X8, 32(DI) + MOVOU X11, 48(DI) + MOVOU 64(SI), X12 + MOVOU 80(SI), X13 + MOVOU 96(SI), X14 + MOVOU 112(SI), X15 + PXOR X12, X1 + PXOR X13, X4 + PXOR X14, X7 + PXOR X15, X10 + MOVOU X1, 64(DI) + MOVOU X4, 80(DI) + MOVOU X7, 96(DI) + MOVOU X10, 112(DI) + SUBQ $0x80, BX + LEAQ 128(SI), SI + LEAQ 128(DI), DI + JMP openSSETail64DecLoop + openSSETail256: - // Need to decrypt up to 256 bytes - prepare four blocks - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO A2, A3; MOVO B2, B3; MOVO C2, C3; MOVO D2, D3; PADDL ·sseIncMask<>(SB), D3 + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X2, X12 + MOVO X5, X13 + MOVO X8, X14 + MOVO X11, X15 + PADDL ·sseIncMask<>+0(SB), X15 // Store counters - MOVO D0, ctr0Store; MOVO D1, ctr1Store; MOVO D2, ctr2Store; MOVO D3, ctr3Store - XORQ itr2, itr2 + MOVO X9, 80(BP) + MOVO X10, 96(BP) + MOVO X11, 112(BP) + MOVO X15, 128(BP) + XORQ R9, R9 openSSETail256Loop: - // This loop inteleaves 8 ChaCha quarter rounds with 1 poly multiplication - polyAdd(0(inp)(itr2*1)) - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - shiftB0Left; shiftB1Left; shiftB2Left; shiftB3Left - shiftC0Left; shiftC1Left; shiftC2Left; shiftC3Left - shiftD0Left; shiftD1Left; shiftD2Left; shiftD3Left - polyMulStage1 - polyMulStage2 - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyMulStage3 - polyMulReduceStage - shiftB0Right; shiftB1Right; shiftB2Right; shiftB3Right - shiftC0Right; shiftC1Right; shiftC2Right; shiftC3Right - shiftD0Right; shiftD1Right; shiftD2Right; shiftD3Right - ADDQ $2*8, itr2 - CMPQ itr2, $160 - JB openSSETail256Loop - MOVQ inl, itr1 - ANDQ $-16, itr1 + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x0c + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x04 + ADDQ $0x10, R9 + CMPQ R9, $0xa0 + JB openSSETail256Loop + MOVQ BX, CX + ANDQ $-16, CX openSSETail256HashLoop: - polyAdd(0(inp)(itr2*1)) - polyMul - ADDQ $2*8, itr2 - CMPQ itr2, itr1 - JB openSSETail256HashLoop + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ $0x10, R9 + CMPQ R9, CX + JB openSSETail256HashLoop // Add in the state - PADDD ·chacha20Constants<>(SB), A0; PADDD ·chacha20Constants<>(SB), A1; PADDD ·chacha20Constants<>(SB), A2; PADDD ·chacha20Constants<>(SB), A3 - PADDD state1Store, B0; PADDD state1Store, B1; PADDD state1Store, B2; PADDD state1Store, B3 - PADDD state2Store, C0; PADDD state2Store, C1; PADDD state2Store, C2; PADDD state2Store, C3 - PADDD ctr0Store, D0; PADDD ctr1Store, D1; PADDD ctr2Store, D2; PADDD ctr3Store, D3 - MOVO D3, tmpStore + PADDD ·chacha20Constants<>+0(SB), X0 + PADDD ·chacha20Constants<>+0(SB), X1 + PADDD ·chacha20Constants<>+0(SB), X2 + PADDD ·chacha20Constants<>+0(SB), X12 + PADDD 32(BP), X3 + PADDD 32(BP), X4 + PADDD 32(BP), X5 + PADDD 32(BP), X13 + PADDD 48(BP), X6 + PADDD 48(BP), X7 + PADDD 48(BP), X8 + PADDD 48(BP), X14 + PADDD 80(BP), X9 + PADDD 96(BP), X10 + PADDD 112(BP), X11 + PADDD 128(BP), X15 + MOVO X15, 64(BP) // Load - xor - store - MOVOU (0*16)(inp), D3; PXOR D3, A0 - MOVOU (1*16)(inp), D3; PXOR D3, B0 - MOVOU (2*16)(inp), D3; PXOR D3, C0 - MOVOU (3*16)(inp), D3; PXOR D3, D0 - MOVOU A0, (0*16)(oup) - MOVOU B0, (1*16)(oup) - MOVOU C0, (2*16)(oup) - MOVOU D0, (3*16)(oup) - MOVOU (4*16)(inp), A0; MOVOU (5*16)(inp), B0; MOVOU (6*16)(inp), C0; MOVOU (7*16)(inp), D0 - PXOR A0, A1; PXOR B0, B1; PXOR C0, C1; PXOR D0, D1 - MOVOU A1, (4*16)(oup); MOVOU B1, (5*16)(oup); MOVOU C1, (6*16)(oup); MOVOU D1, (7*16)(oup) - MOVOU (8*16)(inp), A0; MOVOU (9*16)(inp), B0; MOVOU (10*16)(inp), C0; MOVOU (11*16)(inp), D0 - PXOR A0, A2; PXOR B0, B2; PXOR C0, C2; PXOR D0, D2 - MOVOU A2, (8*16)(oup); MOVOU B2, (9*16)(oup); MOVOU C2, (10*16)(oup); MOVOU D2, (11*16)(oup) - LEAQ 192(inp), inp - LEAQ 192(oup), oup - SUBQ $192, inl - MOVO A3, A0 - MOVO B3, B0 - MOVO C3, C0 - MOVO tmpStore, D0 - - JMP openSSETail64DecLoop - -// ---------------------------------------------------------------------------- -// ------------------------- AVX2 Code ---------------------------------------- + MOVOU (SI), X15 + PXOR X15, X0 + MOVOU 16(SI), X15 + PXOR X15, X3 + MOVOU 32(SI), X15 + PXOR X15, X6 + MOVOU 48(SI), X15 + PXOR X15, X9 + MOVOU X0, (DI) + MOVOU X3, 16(DI) + MOVOU X6, 32(DI) + MOVOU X9, 48(DI) + MOVOU 64(SI), X0 + MOVOU 80(SI), X3 + MOVOU 96(SI), X6 + MOVOU 112(SI), X9 + PXOR X0, X1 + PXOR X3, X4 + PXOR X6, X7 + PXOR X9, X10 + MOVOU X1, 64(DI) + MOVOU X4, 80(DI) + MOVOU X7, 96(DI) + MOVOU X10, 112(DI) + MOVOU 128(SI), X0 + MOVOU 144(SI), X3 + MOVOU 160(SI), X6 + MOVOU 176(SI), X9 + PXOR X0, X2 + PXOR X3, X5 + PXOR X6, X8 + PXOR X9, X11 + MOVOU X2, 128(DI) + MOVOU X5, 144(DI) + MOVOU X8, 160(DI) + MOVOU X11, 176(DI) + LEAQ 192(SI), SI + LEAQ 192(DI), DI + SUBQ $0xc0, BX + MOVO X12, X0 + MOVO X13, X3 + MOVO X14, X6 + MOVO 64(BP), X9 + JMP openSSETail64DecLoop + chacha20Poly1305Open_AVX2: VZEROUPPER - VMOVDQU ·chacha20Constants<>(SB), AA0 - BYTE $0xc4; BYTE $0x42; BYTE $0x7d; BYTE $0x5a; BYTE $0x70; BYTE $0x10 // broadcasti128 16(r8), ymm14 - BYTE $0xc4; BYTE $0x42; BYTE $0x7d; BYTE $0x5a; BYTE $0x60; BYTE $0x20 // broadcasti128 32(r8), ymm12 - BYTE $0xc4; BYTE $0xc2; BYTE $0x7d; BYTE $0x5a; BYTE $0x60; BYTE $0x30 // broadcasti128 48(r8), ymm4 - VPADDD ·avx2InitMask<>(SB), DD0, DD0 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + BYTE $0xc4 + BYTE $0x42 + BYTE $0x7d + BYTE $0x5a + BYTE $0x70 + BYTE $0x10 + BYTE $0xc4 + BYTE $0x42 + BYTE $0x7d + BYTE $0x5a + BYTE $0x60 + BYTE $0x20 + BYTE $0xc4 + BYTE $0xc2 + BYTE $0x7d + BYTE $0x5a + BYTE $0x60 + BYTE $0x30 + VPADDD ·avx2InitMask<>+0(SB), Y4, Y4 // Special optimization, for very short buffers - CMPQ inl, $192 + CMPQ BX, $0xc0 JBE openAVX2192 - CMPQ inl, $320 + CMPQ BX, $0x00000140 JBE openAVX2320 // For the general key prepare the key first - as a byproduct we have 64 bytes of cipher stream - VMOVDQA BB0, state1StoreAVX2 - VMOVDQA CC0, state2StoreAVX2 - VMOVDQA DD0, ctr3StoreAVX2 - MOVQ $10, itr2 + VMOVDQA Y14, 32(BP) + VMOVDQA Y12, 64(BP) + VMOVDQA Y4, 192(BP) + MOVQ $0x0000000a, R9 openAVX2PreparePolyKey: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $12, DD0, DD0, DD0 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $4, DD0, DD0, DD0 - DECQ itr2 - JNE openAVX2PreparePolyKey - - VPADDD ·chacha20Constants<>(SB), AA0, AA0 - VPADDD state1StoreAVX2, BB0, BB0 - VPADDD state2StoreAVX2, CC0, CC0 - VPADDD ctr3StoreAVX2, DD0, DD0 - - VPERM2I128 $0x02, AA0, BB0, TT0 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + DECQ R9 + JNE openAVX2PreparePolyKey + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD 32(BP), Y14, Y14 + VPADDD 64(BP), Y12, Y12 + VPADDD 192(BP), Y4, Y4 + VPERM2I128 $0x02, Y0, Y14, Y3 // Clamp and store poly key - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for the first 64 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 // Hash AD + first 64 bytes - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) - XORQ itr1, itr1 + XORQ CX, CX openAVX2InitialHash64: - polyAdd(0(inp)(itr1*1)) - polyMulAVX2 - ADDQ $16, itr1 - CMPQ itr1, $64 - JNE openAVX2InitialHash64 + ADDQ (SI)(CX*1), R10 + ADCQ 8(SI)(CX*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ $0x10, CX + CMPQ CX, $0x40 + JNE openAVX2InitialHash64 // Decrypt the first 64 bytes - VPXOR (0*32)(inp), AA0, AA0 - VPXOR (1*32)(inp), BB0, BB0 - VMOVDQU AA0, (0*32)(oup) - VMOVDQU BB0, (1*32)(oup) - LEAQ (2*32)(inp), inp - LEAQ (2*32)(oup), oup - SUBQ $64, inl + VPXOR (SI), Y0, Y0 + VPXOR 32(SI), Y14, Y14 + VMOVDQU Y0, (DI) + VMOVDQU Y14, 32(DI) + LEAQ 64(SI), SI + LEAQ 64(DI), DI + SUBQ $0x40, BX openAVX2MainLoop: - CMPQ inl, $512 + CMPQ BX, $0x00000200 JB openAVX2MainLoopDone // Load state, increment counter blocks, store the incremented counters - VMOVDQU ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 - XORQ itr1, itr1 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + XORQ CX, CX openAVX2InternalLoop: - // Lets just say this spaghetti loop interleaves 2 quarter rounds with 3 poly multiplications - // Effectively per 512 bytes of stream we hash 480 bytes of ciphertext - polyAdd(0*8(inp)(itr1*1)) - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - polyMulStage1_AVX2 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - polyMulStage2_AVX2 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyMulStage3_AVX2 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulReduceStage - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - polyAdd(2*8(inp)(itr1*1)) - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - polyMulStage1_AVX2 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulStage2_AVX2 - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $4, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2; VPALIGNR $12, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - polyMulStage3_AVX2 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - polyMulReduceStage - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyAdd(4*8(inp)(itr1*1)) - LEAQ (6*8)(itr1), itr1 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulStage1_AVX2 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - polyMulStage2_AVX2 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - polyMulStage3_AVX2 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulReduceStage - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $12, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2; VPALIGNR $4, DD3, DD3, DD3 - CMPQ itr1, $480 + ADDQ (SI)(CX*1), R10 + ADCQ 8(SI)(CX*1), R11 + ADCQ $0x01, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + ADDQ 16(SI)(CX*1), R10 + ADCQ 24(SI)(CX*1), R11 + ADCQ $0x01, R12 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 32(SI)(CX*1), R10 + ADCQ 40(SI)(CX*1), R11 + ADCQ $0x01, R12 + LEAQ 48(CX), CX + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + CMPQ CX, $0x000001e0 JNE openAVX2InternalLoop - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - VMOVDQA CC3, tmpStoreAVX2 + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) // We only hashed 480 of the 512 bytes available - hash the remaining 32 here - polyAdd(480(inp)) - polyMulAVX2 - VPERM2I128 $0x02, AA0, BB0, CC3; VPERM2I128 $0x13, AA0, BB0, BB0; VPERM2I128 $0x02, CC0, DD0, AA0; VPERM2I128 $0x13, CC0, DD0, CC0 - VPXOR (0*32)(inp), CC3, CC3; VPXOR (1*32)(inp), AA0, AA0; VPXOR (2*32)(inp), BB0, BB0; VPXOR (3*32)(inp), CC0, CC0 - VMOVDQU CC3, (0*32)(oup); VMOVDQU AA0, (1*32)(oup); VMOVDQU BB0, (2*32)(oup); VMOVDQU CC0, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0 - VMOVDQU AA0, (4*32)(oup); VMOVDQU BB0, (5*32)(oup); VMOVDQU CC0, (6*32)(oup); VMOVDQU DD0, (7*32)(oup) + ADDQ 480(SI), R10 + ADCQ 488(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPERM2I128 $0x02, Y0, Y14, Y15 + VPERM2I128 $0x13, Y0, Y14, Y14 + VPERM2I128 $0x02, Y12, Y4, Y0 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPXOR (SI), Y15, Y15 + VPXOR 32(SI), Y0, Y0 + VPXOR 64(SI), Y14, Y14 + VPXOR 96(SI), Y12, Y12 + VMOVDQU Y15, (DI) + VMOVDQU Y0, 32(DI) + VMOVDQU Y14, 64(DI) + VMOVDQU Y12, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) // and here - polyAdd(496(inp)) - polyMulAVX2 - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (8*32)(inp), AA0, AA0; VPXOR (9*32)(inp), BB0, BB0; VPXOR (10*32)(inp), CC0, CC0; VPXOR (11*32)(inp), DD0, DD0 - VMOVDQU AA0, (8*32)(oup); VMOVDQU BB0, (9*32)(oup); VMOVDQU CC0, (10*32)(oup); VMOVDQU DD0, (11*32)(oup) - VPERM2I128 $0x02, AA3, BB3, AA0; VPERM2I128 $0x02, tmpStoreAVX2, DD3, BB0; VPERM2I128 $0x13, AA3, BB3, CC0; VPERM2I128 $0x13, tmpStoreAVX2, DD3, DD0 - VPXOR (12*32)(inp), AA0, AA0; VPXOR (13*32)(inp), BB0, BB0; VPXOR (14*32)(inp), CC0, CC0; VPXOR (15*32)(inp), DD0, DD0 - VMOVDQU AA0, (12*32)(oup); VMOVDQU BB0, (13*32)(oup); VMOVDQU CC0, (14*32)(oup); VMOVDQU DD0, (15*32)(oup) - LEAQ (32*16)(inp), inp - LEAQ (32*16)(oup), oup - SUBQ $(32*16), inl + ADDQ 496(SI), R10 + ADCQ 504(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + VPXOR 384(SI), Y0, Y0 + VPXOR 416(SI), Y14, Y14 + VPXOR 448(SI), Y12, Y12 + VPXOR 480(SI), Y4, Y4 + VMOVDQU Y0, 384(DI) + VMOVDQU Y14, 416(DI) + VMOVDQU Y12, 448(DI) + VMOVDQU Y4, 480(DI) + LEAQ 512(SI), SI + LEAQ 512(DI), DI + SUBQ $0x00000200, BX JMP openAVX2MainLoop openAVX2MainLoopDone: // Handle the various tail sizes efficiently - TESTQ inl, inl + TESTQ BX, BX JE openSSEFinalize - CMPQ inl, $128 + CMPQ BX, $0x80 JBE openAVX2Tail128 - CMPQ inl, $256 + CMPQ BX, $0x00000100 JBE openAVX2Tail256 - CMPQ inl, $384 + CMPQ BX, $0x00000180 JBE openAVX2Tail384 JMP openAVX2Tail512 -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 193 bytes openAVX2192: - // For up to 192 bytes of ciphertext and 64 bytes for the poly key, we process four blocks - VMOVDQA AA0, AA1 - VMOVDQA BB0, BB1 - VMOVDQA CC0, CC1 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA AA0, AA2 - VMOVDQA BB0, BB2 - VMOVDQA CC0, CC2 - VMOVDQA DD0, DD2 - VMOVDQA DD1, TT3 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VMOVDQA Y4, Y2 + VMOVDQA Y1, Y15 + MOVQ $0x0000000a, R9 openAVX2192InnerCipherLoop: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1 - DECQ itr2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + DECQ R9 JNE openAVX2192InnerCipherLoop - VPADDD AA2, AA0, AA0; VPADDD AA2, AA1, AA1 - VPADDD BB2, BB0, BB0; VPADDD BB2, BB1, BB1 - VPADDD CC2, CC0, CC0; VPADDD CC2, CC1, CC1 - VPADDD DD2, DD0, DD0; VPADDD TT3, DD1, DD1 - VPERM2I128 $0x02, AA0, BB0, TT0 + VPADDD Y6, Y0, Y0 + VPADDD Y6, Y5, Y5 + VPADDD Y10, Y14, Y14 + VPADDD Y10, Y9, Y9 + VPADDD Y8, Y12, Y12 + VPADDD Y8, Y13, Y13 + VPADDD Y2, Y4, Y4 + VPADDD Y15, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y3 // Clamp and store poly key - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for up to 192 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 - VPERM2I128 $0x02, AA1, BB1, CC0 - VPERM2I128 $0x02, CC1, DD1, DD0 - VPERM2I128 $0x13, AA1, BB1, AA1 - VPERM2I128 $0x13, CC1, DD1, BB1 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 openAVX2ShortOpen: // Hash - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) openAVX2ShortOpenLoop: - CMPQ inl, $32 + CMPQ BX, $0x20 JB openAVX2ShortTail32 - SUBQ $32, inl + SUBQ $0x20, BX // Load for hashing - polyAdd(0*8(inp)) - polyMulAVX2 - polyAdd(2*8(inp)) - polyMulAVX2 + ADDQ (SI), R10 + ADCQ 8(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ 16(SI), R10 + ADCQ 24(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Load for decryption - VPXOR (inp), AA0, AA0 - VMOVDQU AA0, (oup) - LEAQ (1*32)(inp), inp - LEAQ (1*32)(oup), oup + VPXOR (SI), Y0, Y0 + VMOVDQU Y0, (DI) + LEAQ 32(SI), SI + LEAQ 32(DI), DI // Shift stream left - VMOVDQA BB0, AA0 - VMOVDQA CC0, BB0 - VMOVDQA DD0, CC0 - VMOVDQA AA1, DD0 - VMOVDQA BB1, AA1 - VMOVDQA CC1, BB1 - VMOVDQA DD1, CC1 - VMOVDQA AA2, DD1 - VMOVDQA BB2, AA2 + VMOVDQA Y14, Y0 + VMOVDQA Y12, Y14 + VMOVDQA Y4, Y12 + VMOVDQA Y5, Y4 + VMOVDQA Y9, Y5 + VMOVDQA Y13, Y9 + VMOVDQA Y1, Y13 + VMOVDQA Y6, Y1 + VMOVDQA Y10, Y6 JMP openAVX2ShortOpenLoop openAVX2ShortTail32: - CMPQ inl, $16 - VMOVDQA A0, A1 + CMPQ BX, $0x10 + VMOVDQA X0, X1 JB openAVX2ShortDone - - SUBQ $16, inl + SUBQ $0x10, BX // Load for hashing - polyAdd(0*8(inp)) - polyMulAVX2 + ADDQ (SI), R10 + ADCQ 8(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Load for decryption - VPXOR (inp), A0, T0 - VMOVDQU T0, (oup) - LEAQ (1*16)(inp), inp - LEAQ (1*16)(oup), oup - VPERM2I128 $0x11, AA0, AA0, AA0 - VMOVDQA A0, A1 + VPXOR (SI), X0, X12 + VMOVDQU X12, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + VPERM2I128 $0x11, Y0, Y0, Y0 + VMOVDQA X0, X1 openAVX2ShortDone: VZEROUPPER JMP openSSETail16 -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 321 bytes openAVX2320: - // For up to 320 bytes of ciphertext and 64 bytes for the poly key, we process six blocks - VMOVDQA AA0, AA1; VMOVDQA BB0, BB1; VMOVDQA CC0, CC1; VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA AA0, AA2; VMOVDQA BB0, BB2; VMOVDQA CC0, CC2; VPADDD ·avx2IncMask<>(SB), DD1, DD2 - VMOVDQA BB0, TT1; VMOVDQA CC0, TT2; VMOVDQA DD0, TT3 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y14, Y7 + VMOVDQA Y12, Y11 + VMOVDQA Y4, Y15 + MOVQ $0x0000000a, R9 openAVX2320InnerCipherLoop: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2 - DECQ itr2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + DECQ R9 JNE openAVX2320InnerCipherLoop - - VMOVDQA ·chacha20Constants<>(SB), TT0 - VPADDD TT0, AA0, AA0; VPADDD TT0, AA1, AA1; VPADDD TT0, AA2, AA2 - VPADDD TT1, BB0, BB0; VPADDD TT1, BB1, BB1; VPADDD TT1, BB2, BB2 - VPADDD TT2, CC0, CC0; VPADDD TT2, CC1, CC1; VPADDD TT2, CC2, CC2 - VMOVDQA ·avx2IncMask<>(SB), TT0 - VPADDD TT3, DD0, DD0; VPADDD TT0, TT3, TT3 - VPADDD TT3, DD1, DD1; VPADDD TT0, TT3, TT3 - VPADDD TT3, DD2, DD2 + VMOVDQA ·chacha20Constants<>+0(SB), Y3 + VPADDD Y3, Y0, Y0 + VPADDD Y3, Y5, Y5 + VPADDD Y3, Y6, Y6 + VPADDD Y7, Y14, Y14 + VPADDD Y7, Y9, Y9 + VPADDD Y7, Y10, Y10 + VPADDD Y11, Y12, Y12 + VPADDD Y11, Y13, Y13 + VPADDD Y11, Y8, Y8 + VMOVDQA ·avx2IncMask<>+0(SB), Y3 + VPADDD Y15, Y4, Y4 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y1, Y1 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y2, Y2 // Clamp and store poly key - VPERM2I128 $0x02, AA0, BB0, TT0 - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for up to 320 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 - VPERM2I128 $0x02, AA1, BB1, CC0 - VPERM2I128 $0x02, CC1, DD1, DD0 - VPERM2I128 $0x13, AA1, BB1, AA1 - VPERM2I128 $0x13, CC1, DD1, BB1 - VPERM2I128 $0x02, AA2, BB2, CC1 - VPERM2I128 $0x02, CC2, DD2, DD1 - VPERM2I128 $0x13, AA2, BB2, AA2 - VPERM2I128 $0x13, CC2, DD2, BB2 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 + VPERM2I128 $0x02, Y6, Y10, Y13 + VPERM2I128 $0x02, Y8, Y2, Y1 + VPERM2I128 $0x13, Y6, Y10, Y6 + VPERM2I128 $0x13, Y8, Y2, Y10 JMP openAVX2ShortOpen -// ---------------------------------------------------------------------------- -// Special optimization for the last 128 bytes of ciphertext openAVX2Tail128: // Need to decrypt up to 128 bytes - prepare two blocks - VMOVDQA ·chacha20Constants<>(SB), AA1 - VMOVDQA state1StoreAVX2, BB1 - VMOVDQA state2StoreAVX2, CC1 - VMOVDQA ctr3StoreAVX2, DD1 - VPADDD ·avx2IncMask<>(SB), DD1, DD1 - VMOVDQA DD1, DD0 - - XORQ itr2, itr2 - MOVQ inl, itr1 - ANDQ $-16, itr1 - TESTQ itr1, itr1 - JE openAVX2Tail128LoopB + VMOVDQA ·chacha20Constants<>+0(SB), Y5 + VMOVDQA 32(BP), Y9 + VMOVDQA 64(BP), Y13 + VMOVDQA 192(BP), Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y1 + VMOVDQA Y1, Y4 + XORQ R9, R9 + MOVQ BX, CX + ANDQ $-16, CX + TESTQ CX, CX + JE openAVX2Tail128LoopB openAVX2Tail128LoopA: - // Perform ChaCha rounds, while hashing the remaining input - polyAdd(0(inp)(itr2*1)) - polyMulAVX2 + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 openAVX2Tail128LoopB: - ADDQ $16, itr2 - chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD1, DD1, DD1 - chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD1, DD1, DD1 - CMPQ itr2, itr1 - JB openAVX2Tail128LoopA - CMPQ itr2, $160 - JNE openAVX2Tail128LoopB - - VPADDD ·chacha20Constants<>(SB), AA1, AA1 - VPADDD state1StoreAVX2, BB1, BB1 - VPADDD state2StoreAVX2, CC1, CC1 - VPADDD DD0, DD1, DD1 - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 + ADDQ $0x10, R9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y1, Y1, Y1 + CMPQ R9, CX + JB openAVX2Tail128LoopA + CMPQ R9, $0xa0 + JNE openAVX2Tail128LoopB + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD 32(BP), Y9, Y9 + VPADDD 64(BP), Y13, Y13 + VPADDD Y4, Y1, Y1 + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 openAVX2TailLoop: - CMPQ inl, $32 + CMPQ BX, $0x20 JB openAVX2Tail - SUBQ $32, inl + SUBQ $0x20, BX // Load for decryption - VPXOR (inp), AA0, AA0 - VMOVDQU AA0, (oup) - LEAQ (1*32)(inp), inp - LEAQ (1*32)(oup), oup - VMOVDQA BB0, AA0 - VMOVDQA CC0, BB0 - VMOVDQA DD0, CC0 + VPXOR (SI), Y0, Y0 + VMOVDQU Y0, (DI) + LEAQ 32(SI), SI + LEAQ 32(DI), DI + VMOVDQA Y14, Y0 + VMOVDQA Y12, Y14 + VMOVDQA Y4, Y12 JMP openAVX2TailLoop openAVX2Tail: - CMPQ inl, $16 - VMOVDQA A0, A1 + CMPQ BX, $0x10 + VMOVDQA X0, X1 JB openAVX2TailDone - SUBQ $16, inl + SUBQ $0x10, BX // Load for decryption - VPXOR (inp), A0, T0 - VMOVDQU T0, (oup) - LEAQ (1*16)(inp), inp - LEAQ (1*16)(oup), oup - VPERM2I128 $0x11, AA0, AA0, AA0 - VMOVDQA A0, A1 + VPXOR (SI), X0, X12 + VMOVDQU X12, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + VPERM2I128 $0x11, Y0, Y0, Y0 + VMOVDQA X0, X1 openAVX2TailDone: VZEROUPPER JMP openSSETail16 -// ---------------------------------------------------------------------------- -// Special optimization for the last 256 bytes of ciphertext openAVX2Tail256: - // Need to decrypt up to 256 bytes - prepare four blocks - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA DD0, TT1 - VMOVDQA DD1, TT2 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y4, Y7 + VMOVDQA Y1, Y11 // Compute the number of iterations that will hash data - MOVQ inl, tmpStoreAVX2 - MOVQ inl, itr1 - SUBQ $128, itr1 - SHRQ $4, itr1 - MOVQ $10, itr2 - CMPQ itr1, $10 - CMOVQGT itr2, itr1 - MOVQ inp, inl - XORQ itr2, itr2 + MOVQ BX, 224(BP) + MOVQ BX, CX + SUBQ $0x80, CX + SHRQ $0x04, CX + MOVQ $0x0000000a, R9 + CMPQ CX, $0x0a + CMOVQGT R9, CX + MOVQ SI, BX + XORQ R9, R9 openAVX2Tail256LoopA: - polyAdd(0(inl)) - polyMulAVX2 - LEAQ 16(inl), inl + ADDQ (BX), R10 + ADCQ 8(BX), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(BX), BX - // Perform ChaCha rounds, while hashing the remaining input openAVX2Tail256LoopB: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1 - INCQ itr2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1 - CMPQ itr2, itr1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + INCQ R9 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + CMPQ R9, CX JB openAVX2Tail256LoopA + CMPQ R9, $0x0a + JNE openAVX2Tail256LoopB + MOVQ BX, R9 + SUBQ SI, BX + MOVQ BX, CX + MOVQ 224(BP), BX - CMPQ itr2, $10 - JNE openAVX2Tail256LoopB - - MOVQ inl, itr2 - SUBQ inp, inl - MOVQ inl, itr1 - MOVQ tmpStoreAVX2, inl - - // Hash the remainder of data (if any) openAVX2Tail256Hash: - ADDQ $16, itr1 - CMPQ itr1, inl - JGT openAVX2Tail256HashEnd - polyAdd (0(itr2)) - polyMulAVX2 - LEAQ 16(itr2), itr2 - JMP openAVX2Tail256Hash - -// Store 128 bytes safely, then go to store loop + ADDQ $0x10, CX + CMPQ CX, BX + JGT openAVX2Tail256HashEnd + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + JMP openAVX2Tail256Hash + openAVX2Tail256HashEnd: - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1 - VPADDD TT1, DD0, DD0; VPADDD TT2, DD1, DD1 - VPERM2I128 $0x02, AA0, BB0, AA2; VPERM2I128 $0x02, CC0, DD0, BB2; VPERM2I128 $0x13, AA0, BB0, CC2; VPERM2I128 $0x13, CC0, DD0, DD2 - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - - VPXOR (0*32)(inp), AA2, AA2; VPXOR (1*32)(inp), BB2, BB2; VPXOR (2*32)(inp), CC2, CC2; VPXOR (3*32)(inp), DD2, DD2 - VMOVDQU AA2, (0*32)(oup); VMOVDQU BB2, (1*32)(oup); VMOVDQU CC2, (2*32)(oup); VMOVDQU DD2, (3*32)(oup) - LEAQ (4*32)(inp), inp - LEAQ (4*32)(oup), oup - SUBQ $4*32, inl - - JMP openAVX2TailLoop - -// ---------------------------------------------------------------------------- -// Special optimization for the last 384 bytes of ciphertext + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD Y7, Y4, Y4 + VPADDD Y11, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y6 + VPERM2I128 $0x02, Y12, Y4, Y10 + VPERM2I128 $0x13, Y0, Y14, Y8 + VPERM2I128 $0x13, Y12, Y4, Y2 + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR (SI), Y6, Y6 + VPXOR 32(SI), Y10, Y10 + VPXOR 64(SI), Y8, Y8 + VPXOR 96(SI), Y2, Y2 + VMOVDQU Y6, (DI) + VMOVDQU Y10, 32(DI) + VMOVDQU Y8, 64(DI) + VMOVDQU Y2, 96(DI) + LEAQ 128(SI), SI + LEAQ 128(DI), DI + SUBQ $0x80, BX + JMP openAVX2TailLoop + openAVX2Tail384: // Need to decrypt up to 384 bytes - prepare six blocks - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VPADDD ·avx2IncMask<>(SB), DD1, DD2 - VMOVDQA DD0, ctr0StoreAVX2 - VMOVDQA DD1, ctr1StoreAVX2 - VMOVDQA DD2, ctr2StoreAVX2 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) // Compute the number of iterations that will hash two blocks of data - MOVQ inl, tmpStoreAVX2 - MOVQ inl, itr1 - SUBQ $256, itr1 - SHRQ $4, itr1 - ADDQ $6, itr1 - MOVQ $10, itr2 - CMPQ itr1, $10 - CMOVQGT itr2, itr1 - MOVQ inp, inl - XORQ itr2, itr2 - - // Perform ChaCha rounds, while hashing the remaining input + MOVQ BX, 224(BP) + MOVQ BX, CX + SUBQ $0x00000100, CX + SHRQ $0x04, CX + ADDQ $0x06, CX + MOVQ $0x0000000a, R9 + CMPQ CX, $0x0a + CMOVQGT R9, CX + MOVQ SI, BX + XORQ R9, R9 + openAVX2Tail384LoopB: - polyAdd(0(inl)) - polyMulAVX2 - LEAQ 16(inl), inl + ADDQ (BX), R10 + ADCQ 8(BX), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(BX), BX openAVX2Tail384LoopA: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2 - polyAdd(0(inl)) - polyMulAVX2 - LEAQ 16(inl), inl - INCQ itr2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2 - - CMPQ itr2, itr1 - JB openAVX2Tail384LoopB - - CMPQ itr2, $10 - JNE openAVX2Tail384LoopA - - MOVQ inl, itr2 - SUBQ inp, inl - MOVQ inl, itr1 - MOVQ tmpStoreAVX2, inl + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + ADDQ (BX), R10 + ADCQ 8(BX), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(BX), BX + INCQ R9 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + CMPQ R9, CX + JB openAVX2Tail384LoopB + CMPQ R9, $0x0a + JNE openAVX2Tail384LoopA + MOVQ BX, R9 + SUBQ SI, BX + MOVQ BX, CX + MOVQ 224(BP), BX openAVX2Tail384Hash: - ADDQ $16, itr1 - CMPQ itr1, inl - JGT openAVX2Tail384HashEnd - polyAdd(0(itr2)) - polyMulAVX2 - LEAQ 16(itr2), itr2 - JMP openAVX2Tail384Hash - -// Store 256 bytes safely, then go to store loop + ADDQ $0x10, CX + CMPQ CX, BX + JGT openAVX2Tail384HashEnd + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + JMP openAVX2Tail384Hash + openAVX2Tail384HashEnd: - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2 - VPERM2I128 $0x02, AA0, BB0, TT0; VPERM2I128 $0x02, CC0, DD0, TT1; VPERM2I128 $0x13, AA0, BB0, TT2; VPERM2I128 $0x13, CC0, DD0, TT3 - VPXOR (0*32)(inp), TT0, TT0; VPXOR (1*32)(inp), TT1, TT1; VPXOR (2*32)(inp), TT2, TT2; VPXOR (3*32)(inp), TT3, TT3 - VMOVDQU TT0, (0*32)(oup); VMOVDQU TT1, (1*32)(oup); VMOVDQU TT2, (2*32)(oup); VMOVDQU TT3, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, TT0; VPERM2I128 $0x02, CC1, DD1, TT1; VPERM2I128 $0x13, AA1, BB1, TT2; VPERM2I128 $0x13, CC1, DD1, TT3 - VPXOR (4*32)(inp), TT0, TT0; VPXOR (5*32)(inp), TT1, TT1; VPXOR (6*32)(inp), TT2, TT2; VPXOR (7*32)(inp), TT3, TT3 - VMOVDQU TT0, (4*32)(oup); VMOVDQU TT1, (5*32)(oup); VMOVDQU TT2, (6*32)(oup); VMOVDQU TT3, (7*32)(oup) - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - LEAQ (8*32)(inp), inp - LEAQ (8*32)(oup), oup - SUBQ $8*32, inl + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPERM2I128 $0x02, Y12, Y4, Y7 + VPERM2I128 $0x13, Y0, Y14, Y11 + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR (SI), Y3, Y3 + VPXOR 32(SI), Y7, Y7 + VPXOR 64(SI), Y11, Y11 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y3, (DI) + VMOVDQU Y7, 32(DI) + VMOVDQU Y11, 64(DI) + VMOVDQU Y15, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y3 + VPERM2I128 $0x02, Y13, Y1, Y7 + VPERM2I128 $0x13, Y5, Y9, Y11 + VPERM2I128 $0x13, Y13, Y1, Y15 + VPXOR 128(SI), Y3, Y3 + VPXOR 160(SI), Y7, Y7 + VPXOR 192(SI), Y11, Y11 + VPXOR 224(SI), Y15, Y15 + VMOVDQU Y3, 128(DI) + VMOVDQU Y7, 160(DI) + VMOVDQU Y11, 192(DI) + VMOVDQU Y15, 224(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + LEAQ 256(SI), SI + LEAQ 256(DI), DI + SUBQ $0x00000100, BX JMP openAVX2TailLoop -// ---------------------------------------------------------------------------- -// Special optimization for the last 512 bytes of ciphertext openAVX2Tail512: - VMOVDQU ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 - XORQ itr1, itr1 - MOVQ inp, itr2 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + XORQ CX, CX + MOVQ SI, R9 openAVX2Tail512LoopB: - polyAdd(0(itr2)) - polyMulAVX2 - LEAQ (2*8)(itr2), itr2 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 openAVX2Tail512LoopA: - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyAdd(0*8(itr2)) - polyMulAVX2 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $4, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2; VPALIGNR $12, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyAdd(2*8(itr2)) - polyMulAVX2 - LEAQ (4*8)(itr2), itr2 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $12, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2; VPALIGNR $4, DD3, DD3, DD3 - INCQ itr1 - CMPQ itr1, $4 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 16(R9), R10 + ADCQ 24(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(R9), R9 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + INCQ CX + CMPQ CX, $0x04 JLT openAVX2Tail512LoopB - - CMPQ itr1, $10 - JNE openAVX2Tail512LoopA - - MOVQ inl, itr1 - SUBQ $384, itr1 - ANDQ $-16, itr1 + CMPQ CX, $0x0a + JNE openAVX2Tail512LoopA + MOVQ BX, CX + SUBQ $0x00000180, CX + ANDQ $-16, CX openAVX2Tail512HashLoop: - TESTQ itr1, itr1 + TESTQ CX, CX JE openAVX2Tail512HashEnd - polyAdd(0(itr2)) - polyMulAVX2 - LEAQ 16(itr2), itr2 - SUBQ $16, itr1 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + SUBQ $0x10, CX JMP openAVX2Tail512HashLoop openAVX2Tail512HashEnd: - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - VMOVDQA CC3, tmpStoreAVX2 - VPERM2I128 $0x02, AA0, BB0, CC3; VPERM2I128 $0x13, AA0, BB0, BB0; VPERM2I128 $0x02, CC0, DD0, AA0; VPERM2I128 $0x13, CC0, DD0, CC0 - VPXOR (0*32)(inp), CC3, CC3; VPXOR (1*32)(inp), AA0, AA0; VPXOR (2*32)(inp), BB0, BB0; VPXOR (3*32)(inp), CC0, CC0 - VMOVDQU CC3, (0*32)(oup); VMOVDQU AA0, (1*32)(oup); VMOVDQU BB0, (2*32)(oup); VMOVDQU CC0, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0 - VMOVDQU AA0, (4*32)(oup); VMOVDQU BB0, (5*32)(oup); VMOVDQU CC0, (6*32)(oup); VMOVDQU DD0, (7*32)(oup) - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (8*32)(inp), AA0, AA0; VPXOR (9*32)(inp), BB0, BB0; VPXOR (10*32)(inp), CC0, CC0; VPXOR (11*32)(inp), DD0, DD0 - VMOVDQU AA0, (8*32)(oup); VMOVDQU BB0, (9*32)(oup); VMOVDQU CC0, (10*32)(oup); VMOVDQU DD0, (11*32)(oup) - VPERM2I128 $0x02, AA3, BB3, AA0; VPERM2I128 $0x02, tmpStoreAVX2, DD3, BB0; VPERM2I128 $0x13, AA3, BB3, CC0; VPERM2I128 $0x13, tmpStoreAVX2, DD3, DD0 - - LEAQ (12*32)(inp), inp - LEAQ (12*32)(oup), oup - SUBQ $12*32, inl - - JMP openAVX2TailLoop - -// ---------------------------------------------------------------------------- -// ---------------------------------------------------------------------------- -// func chacha20Poly1305Seal(dst, key, src, ad []byte) -TEXT ·chacha20Poly1305Seal(SB), 0, $288-96 - // For aligned stack access + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) + VPERM2I128 $0x02, Y0, Y14, Y15 + VPERM2I128 $0x13, Y0, Y14, Y14 + VPERM2I128 $0x02, Y12, Y4, Y0 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPXOR (SI), Y15, Y15 + VPXOR 32(SI), Y0, Y0 + VPXOR 64(SI), Y14, Y14 + VPXOR 96(SI), Y12, Y12 + VMOVDQU Y15, (DI) + VMOVDQU Y0, 32(DI) + VMOVDQU Y14, 64(DI) + VMOVDQU Y12, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + LEAQ 384(SI), SI + LEAQ 384(DI), DI + SUBQ $0x00000180, BX + JMP openAVX2TailLoop + +DATA ·chacha20Constants<>+0(SB)/4, $0x61707865 +DATA ·chacha20Constants<>+4(SB)/4, $0x3320646e +DATA ·chacha20Constants<>+8(SB)/4, $0x79622d32 +DATA ·chacha20Constants<>+12(SB)/4, $0x6b206574 +DATA ·chacha20Constants<>+16(SB)/4, $0x61707865 +DATA ·chacha20Constants<>+20(SB)/4, $0x3320646e +DATA ·chacha20Constants<>+24(SB)/4, $0x79622d32 +DATA ·chacha20Constants<>+28(SB)/4, $0x6b206574 +GLOBL ·chacha20Constants<>(SB), RODATA|NOPTR, $32 + +DATA ·polyClampMask<>+0(SB)/8, $0x0ffffffc0fffffff +DATA ·polyClampMask<>+8(SB)/8, $0x0ffffffc0ffffffc +DATA ·polyClampMask<>+16(SB)/8, $0xffffffffffffffff +DATA ·polyClampMask<>+24(SB)/8, $0xffffffffffffffff +GLOBL ·polyClampMask<>(SB), RODATA|NOPTR, $32 + +DATA ·sseIncMask<>+0(SB)/8, $0x0000000000000001 +DATA ·sseIncMask<>+8(SB)/8, $0x0000000000000000 +GLOBL ·sseIncMask<>(SB), RODATA|NOPTR, $16 + +DATA ·andMask<>+0(SB)/8, $0x00000000000000ff +DATA ·andMask<>+8(SB)/8, $0x0000000000000000 +DATA ·andMask<>+16(SB)/8, $0x000000000000ffff +DATA ·andMask<>+24(SB)/8, $0x0000000000000000 +DATA ·andMask<>+32(SB)/8, $0x0000000000ffffff +DATA ·andMask<>+40(SB)/8, $0x0000000000000000 +DATA ·andMask<>+48(SB)/8, $0x00000000ffffffff +DATA ·andMask<>+56(SB)/8, $0x0000000000000000 +DATA ·andMask<>+64(SB)/8, $0x000000ffffffffff +DATA ·andMask<>+72(SB)/8, $0x0000000000000000 +DATA ·andMask<>+80(SB)/8, $0x0000ffffffffffff +DATA ·andMask<>+88(SB)/8, $0x0000000000000000 +DATA ·andMask<>+96(SB)/8, $0x00ffffffffffffff +DATA ·andMask<>+104(SB)/8, $0x0000000000000000 +DATA ·andMask<>+112(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+120(SB)/8, $0x0000000000000000 +DATA ·andMask<>+128(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+136(SB)/8, $0x00000000000000ff +DATA ·andMask<>+144(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+152(SB)/8, $0x000000000000ffff +DATA ·andMask<>+160(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+168(SB)/8, $0x0000000000ffffff +DATA ·andMask<>+176(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+184(SB)/8, $0x00000000ffffffff +DATA ·andMask<>+192(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+200(SB)/8, $0x000000ffffffffff +DATA ·andMask<>+208(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+216(SB)/8, $0x0000ffffffffffff +DATA ·andMask<>+224(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+232(SB)/8, $0x00ffffffffffffff +GLOBL ·andMask<>(SB), RODATA|NOPTR, $240 + +DATA ·avx2InitMask<>+0(SB)/8, $0x0000000000000000 +DATA ·avx2InitMask<>+8(SB)/8, $0x0000000000000000 +DATA ·avx2InitMask<>+16(SB)/8, $0x0000000000000001 +DATA ·avx2InitMask<>+24(SB)/8, $0x0000000000000000 +GLOBL ·avx2InitMask<>(SB), RODATA|NOPTR, $32 + +DATA ·rol16<>+0(SB)/8, $0x0504070601000302 +DATA ·rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a +DATA ·rol16<>+16(SB)/8, $0x0504070601000302 +DATA ·rol16<>+24(SB)/8, $0x0d0c0f0e09080b0a +GLOBL ·rol16<>(SB), RODATA|NOPTR, $32 + +DATA ·rol8<>+0(SB)/8, $0x0605040702010003 +DATA ·rol8<>+8(SB)/8, $0x0e0d0c0f0a09080b +DATA ·rol8<>+16(SB)/8, $0x0605040702010003 +DATA ·rol8<>+24(SB)/8, $0x0e0d0c0f0a09080b +GLOBL ·rol8<>(SB), RODATA|NOPTR, $32 + +DATA ·avx2IncMask<>+0(SB)/8, $0x0000000000000002 +DATA ·avx2IncMask<>+8(SB)/8, $0x0000000000000000 +DATA ·avx2IncMask<>+16(SB)/8, $0x0000000000000002 +DATA ·avx2IncMask<>+24(SB)/8, $0x0000000000000000 +GLOBL ·avx2IncMask<>(SB), RODATA|NOPTR, $32 + +// func chacha20Poly1305Seal(dst []byte, key []uint32, src []byte, ad []byte) +// Requires: AVX, AVX2, BMI2, CMOV, SSE2 +TEXT ·chacha20Poly1305Seal(SB), $288-96 MOVQ SP, BP - ADDQ $32, BP + ADDQ $0x20, BP ANDQ $-32, BP - MOVQ dst+0(FP), oup - MOVQ key+24(FP), keyp - MOVQ src+48(FP), inp - MOVQ src_len+56(FP), inl - MOVQ ad+72(FP), adp - - CMPB ·useAVX2(SB), $1 + MOVQ dst_base+0(FP), DI + MOVQ key_base+24(FP), R8 + MOVQ src_base+48(FP), SI + MOVQ src_len+56(FP), BX + MOVQ ad_base+72(FP), CX + CMPB ·useAVX2+0(SB), $0x01 JE chacha20Poly1305Seal_AVX2 // Special optimization, for very short buffers - CMPQ inl, $128 - JBE sealSSE128 // About 15% faster + CMPQ BX, $0x80 + JBE sealSSE128 // In the seal case - prepare the poly key + 3 blocks of stream in the first iteration - MOVOU ·chacha20Constants<>(SB), A0 - MOVOU (1*16)(keyp), B0 - MOVOU (2*16)(keyp), C0 - MOVOU (3*16)(keyp), D0 + MOVOU ·chacha20Constants<>+0(SB), X0 + MOVOU 16(R8), X3 + MOVOU 32(R8), X6 + MOVOU 48(R8), X9 // Store state on stack for future use - MOVO B0, state1Store - MOVO C0, state2Store + MOVO X3, 32(BP) + MOVO X6, 48(BP) // Load state, increment counter blocks - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO A2, A3; MOVO B2, B3; MOVO C2, C3; MOVO D2, D3; PADDL ·sseIncMask<>(SB), D3 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X2, X12 + MOVO X5, X13 + MOVO X8, X14 + MOVO X11, X15 + PADDL ·sseIncMask<>+0(SB), X15 // Store counters - MOVO D0, ctr0Store; MOVO D1, ctr1Store; MOVO D2, ctr2Store; MOVO D3, ctr3Store - MOVQ $10, itr2 + MOVO X9, 80(BP) + MOVO X10, 96(BP) + MOVO X11, 112(BP) + MOVO X15, 128(BP) + MOVQ $0x0000000a, R9 sealSSEIntroLoop: - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - shiftB0Left; shiftB1Left; shiftB2Left; shiftB3Left - shiftC0Left; shiftC1Left; shiftC2Left; shiftC3Left - shiftD0Left; shiftD1Left; shiftD2Left; shiftD3Left - - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - shiftB0Right; shiftB1Right; shiftB2Right; shiftB3Right - shiftC0Right; shiftC1Right; shiftC2Right; shiftC3Right - shiftD0Right; shiftD1Right; shiftD2Right; shiftD3Right - DECQ itr2 - JNE sealSSEIntroLoop + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x0c + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x04 + DECQ R9 + JNE sealSSEIntroLoop // Add in the state - PADDD ·chacha20Constants<>(SB), A0; PADDD ·chacha20Constants<>(SB), A1; PADDD ·chacha20Constants<>(SB), A2; PADDD ·chacha20Constants<>(SB), A3 - PADDD state1Store, B0; PADDD state1Store, B1; PADDD state1Store, B2; PADDD state1Store, B3 - PADDD state2Store, C1; PADDD state2Store, C2; PADDD state2Store, C3 - PADDD ctr1Store, D1; PADDD ctr2Store, D2; PADDD ctr3Store, D3 + PADDD ·chacha20Constants<>+0(SB), X0 + PADDD ·chacha20Constants<>+0(SB), X1 + PADDD ·chacha20Constants<>+0(SB), X2 + PADDD ·chacha20Constants<>+0(SB), X12 + PADDD 32(BP), X3 + PADDD 32(BP), X4 + PADDD 32(BP), X5 + PADDD 32(BP), X13 + PADDD 48(BP), X7 + PADDD 48(BP), X8 + PADDD 48(BP), X14 + PADDD 96(BP), X10 + PADDD 112(BP), X11 + PADDD 128(BP), X15 // Clamp and store the key - PAND ·polyClampMask<>(SB), A0 - MOVO A0, rStore - MOVO B0, sStore + PAND ·polyClampMask<>+0(SB), X0 + MOVO X0, (BP) + MOVO X3, 16(BP) // Hash AAD - MOVQ ad_len+80(FP), itr2 - CALL polyHashADInternal<>(SB) - - MOVOU (0*16)(inp), A0; MOVOU (1*16)(inp), B0; MOVOU (2*16)(inp), C0; MOVOU (3*16)(inp), D0 - PXOR A0, A1; PXOR B0, B1; PXOR C0, C1; PXOR D0, D1 - MOVOU A1, (0*16)(oup); MOVOU B1, (1*16)(oup); MOVOU C1, (2*16)(oup); MOVOU D1, (3*16)(oup) - MOVOU (4*16)(inp), A0; MOVOU (5*16)(inp), B0; MOVOU (6*16)(inp), C0; MOVOU (7*16)(inp), D0 - PXOR A0, A2; PXOR B0, B2; PXOR C0, C2; PXOR D0, D2 - MOVOU A2, (4*16)(oup); MOVOU B2, (5*16)(oup); MOVOU C2, (6*16)(oup); MOVOU D2, (7*16)(oup) - - MOVQ $128, itr1 - SUBQ $128, inl - LEAQ 128(inp), inp - - MOVO A3, A1; MOVO B3, B1; MOVO C3, C1; MOVO D3, D1 - - CMPQ inl, $64 - JBE sealSSE128SealHash - - MOVOU (0*16)(inp), A0; MOVOU (1*16)(inp), B0; MOVOU (2*16)(inp), C0; MOVOU (3*16)(inp), D0 - PXOR A0, A3; PXOR B0, B3; PXOR C0, C3; PXOR D0, D3 - MOVOU A3, (8*16)(oup); MOVOU B3, (9*16)(oup); MOVOU C3, (10*16)(oup); MOVOU D3, (11*16)(oup) - - ADDQ $64, itr1 - SUBQ $64, inl - LEAQ 64(inp), inp - - MOVQ $2, itr1 - MOVQ $8, itr2 - - CMPQ inl, $64 - JBE sealSSETail64 - CMPQ inl, $128 - JBE sealSSETail128 - CMPQ inl, $192 - JBE sealSSETail192 + MOVQ ad_len+80(FP), R9 + CALL polyHashADInternal<>(SB) + MOVOU (SI), X0 + MOVOU 16(SI), X3 + MOVOU 32(SI), X6 + MOVOU 48(SI), X9 + PXOR X0, X1 + PXOR X3, X4 + PXOR X6, X7 + PXOR X9, X10 + MOVOU X1, (DI) + MOVOU X4, 16(DI) + MOVOU X7, 32(DI) + MOVOU X10, 48(DI) + MOVOU 64(SI), X0 + MOVOU 80(SI), X3 + MOVOU 96(SI), X6 + MOVOU 112(SI), X9 + PXOR X0, X2 + PXOR X3, X5 + PXOR X6, X8 + PXOR X9, X11 + MOVOU X2, 64(DI) + MOVOU X5, 80(DI) + MOVOU X8, 96(DI) + MOVOU X11, 112(DI) + MOVQ $0x00000080, CX + SUBQ $0x80, BX + LEAQ 128(SI), SI + MOVO X12, X1 + MOVO X13, X4 + MOVO X14, X7 + MOVO X15, X10 + CMPQ BX, $0x40 + JBE sealSSE128SealHash + MOVOU (SI), X0 + MOVOU 16(SI), X3 + MOVOU 32(SI), X6 + MOVOU 48(SI), X9 + PXOR X0, X12 + PXOR X3, X13 + PXOR X6, X14 + PXOR X9, X15 + MOVOU X12, 128(DI) + MOVOU X13, 144(DI) + MOVOU X14, 160(DI) + MOVOU X15, 176(DI) + ADDQ $0x40, CX + SUBQ $0x40, BX + LEAQ 64(SI), SI + MOVQ $0x00000002, CX + MOVQ $0x00000008, R9 + CMPQ BX, $0x40 + JBE sealSSETail64 + CMPQ BX, $0x80 + JBE sealSSETail128 + CMPQ BX, $0xc0 + JBE sealSSETail192 sealSSEMainLoop: // Load state, increment counter blocks - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO A2, A3; MOVO B2, B3; MOVO C2, C3; MOVO D2, D3; PADDL ·sseIncMask<>(SB), D3 + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X2, X12 + MOVO X5, X13 + MOVO X8, X14 + MOVO X11, X15 + PADDL ·sseIncMask<>+0(SB), X15 // Store counters - MOVO D0, ctr0Store; MOVO D1, ctr1Store; MOVO D2, ctr2Store; MOVO D3, ctr3Store + MOVO X9, 80(BP) + MOVO X10, 96(BP) + MOVO X11, 112(BP) + MOVO X15, 128(BP) sealSSEInnerLoop: - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyAdd(0(oup)) - shiftB0Left; shiftB1Left; shiftB2Left; shiftB3Left - shiftC0Left; shiftC1Left; shiftC2Left; shiftC3Left - shiftD0Left; shiftD1Left; shiftD2Left; shiftD3Left - polyMulStage1 - polyMulStage2 - LEAQ (2*8)(oup), oup - MOVO C3, tmpStore - chachaQR(A0, B0, C0, D0, C3); chachaQR(A1, B1, C1, D1, C3); chachaQR(A2, B2, C2, D2, C3) - MOVO tmpStore, C3 - MOVO C1, tmpStore - polyMulStage3 - chachaQR(A3, B3, C3, D3, C1) - MOVO tmpStore, C1 - polyMulReduceStage - shiftB0Right; shiftB1Right; shiftB2Right; shiftB3Right - shiftC0Right; shiftC1Right; shiftC2Right; shiftC3Right - shiftD0Right; shiftD1Right; shiftD2Right; shiftD3Right - DECQ itr2 - JGE sealSSEInnerLoop - polyAdd(0(oup)) - polyMul - LEAQ (2*8)(oup), oup - DECQ itr1 - JG sealSSEInnerLoop + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x0c + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + LEAQ 16(DI), DI + MOVO X14, 64(BP) + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X3 + PXOR X14, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X14) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X3 + PXOR X14, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X4 + PXOR X14, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X14) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X4 + PXOR X14, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x0c, X14 + PSRLL $0x14, X5 + PXOR X14, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X14) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X14 + PSLLL $0x07, X14 + PSRLL $0x19, X5 + PXOR X14, X5 + MOVO 64(BP), X14 + MOVO X7, 64(BP) + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + PADDD X13, X12 + PXOR X12, X15 + ROL16(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x0c, X7 + PSRLL $0x14, X13 + PXOR X7, X13 + PADDD X13, X12 + PXOR X12, X15 + ROL8(X15, X7) + PADDD X15, X14 + PXOR X14, X13 + MOVO X13, X7 + PSLLL $0x07, X7 + PSRLL $0x19, X13 + PXOR X7, X13 + MOVO 64(BP), X7 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x04 + DECQ R9 + JGE sealSSEInnerLoop + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + DECQ CX + JG sealSSEInnerLoop // Add in the state - PADDD ·chacha20Constants<>(SB), A0; PADDD ·chacha20Constants<>(SB), A1; PADDD ·chacha20Constants<>(SB), A2; PADDD ·chacha20Constants<>(SB), A3 - PADDD state1Store, B0; PADDD state1Store, B1; PADDD state1Store, B2; PADDD state1Store, B3 - PADDD state2Store, C0; PADDD state2Store, C1; PADDD state2Store, C2; PADDD state2Store, C3 - PADDD ctr0Store, D0; PADDD ctr1Store, D1; PADDD ctr2Store, D2; PADDD ctr3Store, D3 - MOVO D3, tmpStore + PADDD ·chacha20Constants<>+0(SB), X0 + PADDD ·chacha20Constants<>+0(SB), X1 + PADDD ·chacha20Constants<>+0(SB), X2 + PADDD ·chacha20Constants<>+0(SB), X12 + PADDD 32(BP), X3 + PADDD 32(BP), X4 + PADDD 32(BP), X5 + PADDD 32(BP), X13 + PADDD 48(BP), X6 + PADDD 48(BP), X7 + PADDD 48(BP), X8 + PADDD 48(BP), X14 + PADDD 80(BP), X9 + PADDD 96(BP), X10 + PADDD 112(BP), X11 + PADDD 128(BP), X15 + MOVO X15, 64(BP) // Load - xor - store - MOVOU (0*16)(inp), D3; PXOR D3, A0 - MOVOU (1*16)(inp), D3; PXOR D3, B0 - MOVOU (2*16)(inp), D3; PXOR D3, C0 - MOVOU (3*16)(inp), D3; PXOR D3, D0 - MOVOU A0, (0*16)(oup) - MOVOU B0, (1*16)(oup) - MOVOU C0, (2*16)(oup) - MOVOU D0, (3*16)(oup) - MOVO tmpStore, D3 - - MOVOU (4*16)(inp), A0; MOVOU (5*16)(inp), B0; MOVOU (6*16)(inp), C0; MOVOU (7*16)(inp), D0 - PXOR A0, A1; PXOR B0, B1; PXOR C0, C1; PXOR D0, D1 - MOVOU A1, (4*16)(oup); MOVOU B1, (5*16)(oup); MOVOU C1, (6*16)(oup); MOVOU D1, (7*16)(oup) - MOVOU (8*16)(inp), A0; MOVOU (9*16)(inp), B0; MOVOU (10*16)(inp), C0; MOVOU (11*16)(inp), D0 - PXOR A0, A2; PXOR B0, B2; PXOR C0, C2; PXOR D0, D2 - MOVOU A2, (8*16)(oup); MOVOU B2, (9*16)(oup); MOVOU C2, (10*16)(oup); MOVOU D2, (11*16)(oup) - ADDQ $192, inp - MOVQ $192, itr1 - SUBQ $192, inl - MOVO A3, A1 - MOVO B3, B1 - MOVO C3, C1 - MOVO D3, D1 - CMPQ inl, $64 + MOVOU (SI), X15 + PXOR X15, X0 + MOVOU 16(SI), X15 + PXOR X15, X3 + MOVOU 32(SI), X15 + PXOR X15, X6 + MOVOU 48(SI), X15 + PXOR X15, X9 + MOVOU X0, (DI) + MOVOU X3, 16(DI) + MOVOU X6, 32(DI) + MOVOU X9, 48(DI) + MOVO 64(BP), X15 + MOVOU 64(SI), X0 + MOVOU 80(SI), X3 + MOVOU 96(SI), X6 + MOVOU 112(SI), X9 + PXOR X0, X1 + PXOR X3, X4 + PXOR X6, X7 + PXOR X9, X10 + MOVOU X1, 64(DI) + MOVOU X4, 80(DI) + MOVOU X7, 96(DI) + MOVOU X10, 112(DI) + MOVOU 128(SI), X0 + MOVOU 144(SI), X3 + MOVOU 160(SI), X6 + MOVOU 176(SI), X9 + PXOR X0, X2 + PXOR X3, X5 + PXOR X6, X8 + PXOR X9, X11 + MOVOU X2, 128(DI) + MOVOU X5, 144(DI) + MOVOU X8, 160(DI) + MOVOU X11, 176(DI) + ADDQ $0xc0, SI + MOVQ $0x000000c0, CX + SUBQ $0xc0, BX + MOVO X12, X1 + MOVO X13, X4 + MOVO X14, X7 + MOVO X15, X10 + CMPQ BX, $0x40 JBE sealSSE128SealHash - MOVOU (0*16)(inp), A0; MOVOU (1*16)(inp), B0; MOVOU (2*16)(inp), C0; MOVOU (3*16)(inp), D0 - PXOR A0, A3; PXOR B0, B3; PXOR C0, C3; PXOR D0, D3 - MOVOU A3, (12*16)(oup); MOVOU B3, (13*16)(oup); MOVOU C3, (14*16)(oup); MOVOU D3, (15*16)(oup) - LEAQ 64(inp), inp - SUBQ $64, inl - MOVQ $6, itr1 - MOVQ $4, itr2 - CMPQ inl, $192 + MOVOU (SI), X0 + MOVOU 16(SI), X3 + MOVOU 32(SI), X6 + MOVOU 48(SI), X9 + PXOR X0, X12 + PXOR X3, X13 + PXOR X6, X14 + PXOR X9, X15 + MOVOU X12, 192(DI) + MOVOU X13, 208(DI) + MOVOU X14, 224(DI) + MOVOU X15, 240(DI) + LEAQ 64(SI), SI + SUBQ $0x40, BX + MOVQ $0x00000006, CX + MOVQ $0x00000004, R9 + CMPQ BX, $0xc0 JG sealSSEMainLoop - - MOVQ inl, itr1 - TESTQ inl, inl + MOVQ BX, CX + TESTQ BX, BX JE sealSSE128SealHash - MOVQ $6, itr1 - CMPQ inl, $64 + MOVQ $0x00000006, CX + CMPQ BX, $0x40 JBE sealSSETail64 - CMPQ inl, $128 + CMPQ BX, $0x80 JBE sealSSETail128 JMP sealSSETail192 -// ---------------------------------------------------------------------------- -// Special optimization for the last 64 bytes of plaintext sealSSETail64: - // Need to encrypt up to 64 bytes - prepare single block, hash 192 or 256 bytes - MOVO ·chacha20Constants<>(SB), A1 - MOVO state1Store, B1 - MOVO state2Store, C1 - MOVO ctr3Store, D1 - PADDL ·sseIncMask<>(SB), D1 - MOVO D1, ctr0Store + MOVO ·chacha20Constants<>+0(SB), X1 + MOVO 32(BP), X4 + MOVO 48(BP), X7 + MOVO 128(BP), X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 80(BP) sealSSETail64LoopA: - // Perform ChaCha rounds, while hashing the previously encrypted ciphertext - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealSSETail64LoopB: - chachaQR(A1, B1, C1, D1, T1) - shiftB1Left; shiftC1Left; shiftD1Left - chachaQR(A1, B1, C1, D1, T1) - shiftB1Right; shiftC1Right; shiftD1Right - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup - - DECQ itr1 - JG sealSSETail64LoopA - - DECQ itr2 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X13) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X13 + PSLLL $0x0c, X13 + PSRLL $0x14, X4 + PXOR X13, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X13) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X13 + PSLLL $0x07, X13 + PSRLL $0x19, X4 + PXOR X13, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X13) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X13 + PSLLL $0x0c, X13 + PSRLL $0x14, X4 + PXOR X13, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X13) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X13 + PSLLL $0x07, X13 + PSRLL $0x19, X4 + PXOR X13, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + DECQ CX + JG sealSSETail64LoopA + DECQ R9 JGE sealSSETail64LoopB - PADDL ·chacha20Constants<>(SB), A1 - PADDL state1Store, B1 - PADDL state2Store, C1 - PADDL ctr0Store, D1 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL 32(BP), X4 + PADDL 48(BP), X7 + PADDL 80(BP), X10 + JMP sealSSE128Seal - JMP sealSSE128Seal - -// ---------------------------------------------------------------------------- -// Special optimization for the last 128 bytes of plaintext sealSSETail128: - // Need to encrypt up to 128 bytes - prepare two blocks, hash 192 or 256 bytes - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr0Store - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1; MOVO D1, ctr1Store + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 80(BP) + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 96(BP) sealSSETail128LoopA: - // Perform ChaCha rounds, while hashing the previously encrypted ciphertext - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealSSETail128LoopB: - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0) - shiftB0Left; shiftC0Left; shiftD0Left - shiftB1Left; shiftC1Left; shiftD1Left - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0) - shiftB0Right; shiftC0Right; shiftD0Right - shiftB1Right; shiftC1Right; shiftD1Right - - DECQ itr1 - JG sealSSETail128LoopA - - DECQ itr2 - JGE sealSSETail128LoopB - - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1 - PADDL state1Store, B0; PADDL state1Store, B1 - PADDL state2Store, C0; PADDL state2Store, C1 - PADDL ctr0Store, D0; PADDL ctr1Store, D1 - - MOVOU (0*16)(inp), T0; MOVOU (1*16)(inp), T1; MOVOU (2*16)(inp), T2; MOVOU (3*16)(inp), T3 - PXOR T0, A0; PXOR T1, B0; PXOR T2, C0; PXOR T3, D0 - MOVOU A0, (0*16)(oup); MOVOU B0, (1*16)(oup); MOVOU C0, (2*16)(oup); MOVOU D0, (3*16)(oup) - - MOVQ $64, itr1 - LEAQ 64(inp), inp - SUBQ $64, inl - - JMP sealSSE128SealHash - -// ---------------------------------------------------------------------------- -// Special optimization for the last 192 bytes of plaintext + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + DECQ CX + JG sealSSETail128LoopA + DECQ R9 + JGE sealSSETail128LoopB + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL 32(BP), X3 + PADDL 32(BP), X4 + PADDL 48(BP), X6 + PADDL 48(BP), X7 + PADDL 80(BP), X9 + PADDL 96(BP), X10 + MOVOU (SI), X12 + MOVOU 16(SI), X13 + MOVOU 32(SI), X14 + MOVOU 48(SI), X15 + PXOR X12, X0 + PXOR X13, X3 + PXOR X14, X6 + PXOR X15, X9 + MOVOU X0, (DI) + MOVOU X3, 16(DI) + MOVOU X6, 32(DI) + MOVOU X9, 48(DI) + MOVQ $0x00000040, CX + LEAQ 64(SI), SI + SUBQ $0x40, BX + JMP sealSSE128SealHash + sealSSETail192: - // Need to encrypt up to 192 bytes - prepare three blocks, hash 192 or 256 bytes - MOVO ·chacha20Constants<>(SB), A0; MOVO state1Store, B0; MOVO state2Store, C0; MOVO ctr3Store, D0; PADDL ·sseIncMask<>(SB), D0; MOVO D0, ctr0Store - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1; MOVO D1, ctr1Store - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2; MOVO D2, ctr2Store + MOVO ·chacha20Constants<>+0(SB), X0 + MOVO 32(BP), X3 + MOVO 48(BP), X6 + MOVO 128(BP), X9 + PADDL ·sseIncMask<>+0(SB), X9 + MOVO X9, 80(BP) + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X10, 96(BP) + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X11, 112(BP) sealSSETail192LoopA: - // Perform ChaCha rounds, while hashing the previously encrypted ciphertext - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealSSETail192LoopB: - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Left; shiftC0Left; shiftD0Left - shiftB1Left; shiftC1Left; shiftD1Left - shiftB2Left; shiftC2Left; shiftD2Left - - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup - - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Right; shiftC0Right; shiftD0Right - shiftB1Right; shiftC1Right; shiftD1Right - shiftB2Right; shiftC2Right; shiftD2Right - - DECQ itr1 - JG sealSSETail192LoopA - - DECQ itr2 - JGE sealSSETail192LoopB - - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1; PADDL ·chacha20Constants<>(SB), A2 - PADDL state1Store, B0; PADDL state1Store, B1; PADDL state1Store, B2 - PADDL state2Store, C0; PADDL state2Store, C1; PADDL state2Store, C2 - PADDL ctr0Store, D0; PADDL ctr1Store, D1; PADDL ctr2Store, D2 - - MOVOU (0*16)(inp), T0; MOVOU (1*16)(inp), T1; MOVOU (2*16)(inp), T2; MOVOU (3*16)(inp), T3 - PXOR T0, A0; PXOR T1, B0; PXOR T2, C0; PXOR T3, D0 - MOVOU A0, (0*16)(oup); MOVOU B0, (1*16)(oup); MOVOU C0, (2*16)(oup); MOVOU D0, (3*16)(oup) - MOVOU (4*16)(inp), T0; MOVOU (5*16)(inp), T1; MOVOU (6*16)(inp), T2; MOVOU (7*16)(inp), T3 - PXOR T0, A1; PXOR T1, B1; PXOR T2, C1; PXOR T3, D1 - MOVOU A1, (4*16)(oup); MOVOU B1, (5*16)(oup); MOVOU C1, (6*16)(oup); MOVOU D1, (7*16)(oup) - - MOVO A2, A1 - MOVO B2, B1 - MOVO C2, C1 - MOVO D2, D1 - MOVQ $128, itr1 - LEAQ 128(inp), inp - SUBQ $128, inl - - JMP sealSSE128SealHash - -// ---------------------------------------------------------------------------- -// Special seal optimization for buffers smaller than 129 bytes + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + DECQ CX + JG sealSSETail192LoopA + DECQ R9 + JGE sealSSETail192LoopB + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL ·chacha20Constants<>+0(SB), X2 + PADDL 32(BP), X3 + PADDL 32(BP), X4 + PADDL 32(BP), X5 + PADDL 48(BP), X6 + PADDL 48(BP), X7 + PADDL 48(BP), X8 + PADDL 80(BP), X9 + PADDL 96(BP), X10 + PADDL 112(BP), X11 + MOVOU (SI), X12 + MOVOU 16(SI), X13 + MOVOU 32(SI), X14 + MOVOU 48(SI), X15 + PXOR X12, X0 + PXOR X13, X3 + PXOR X14, X6 + PXOR X15, X9 + MOVOU X0, (DI) + MOVOU X3, 16(DI) + MOVOU X6, 32(DI) + MOVOU X9, 48(DI) + MOVOU 64(SI), X12 + MOVOU 80(SI), X13 + MOVOU 96(SI), X14 + MOVOU 112(SI), X15 + PXOR X12, X1 + PXOR X13, X4 + PXOR X14, X7 + PXOR X15, X10 + MOVOU X1, 64(DI) + MOVOU X4, 80(DI) + MOVOU X7, 96(DI) + MOVOU X10, 112(DI) + MOVO X2, X1 + MOVO X5, X4 + MOVO X8, X7 + MOVO X11, X10 + MOVQ $0x00000080, CX + LEAQ 128(SI), SI + SUBQ $0x80, BX + JMP sealSSE128SealHash + sealSSE128: - // For up to 128 bytes of ciphertext and 64 bytes for the poly key, we require to process three blocks - MOVOU ·chacha20Constants<>(SB), A0; MOVOU (1*16)(keyp), B0; MOVOU (2*16)(keyp), C0; MOVOU (3*16)(keyp), D0 - MOVO A0, A1; MOVO B0, B1; MOVO C0, C1; MOVO D0, D1; PADDL ·sseIncMask<>(SB), D1 - MOVO A1, A2; MOVO B1, B2; MOVO C1, C2; MOVO D1, D2; PADDL ·sseIncMask<>(SB), D2 - MOVO B0, T1; MOVO C0, T2; MOVO D1, T3 - MOVQ $10, itr2 + MOVOU ·chacha20Constants<>+0(SB), X0 + MOVOU 16(R8), X3 + MOVOU 32(R8), X6 + MOVOU 48(R8), X9 + MOVO X0, X1 + MOVO X3, X4 + MOVO X6, X7 + MOVO X9, X10 + PADDL ·sseIncMask<>+0(SB), X10 + MOVO X1, X2 + MOVO X4, X5 + MOVO X7, X8 + MOVO X10, X11 + PADDL ·sseIncMask<>+0(SB), X11 + MOVO X3, X13 + MOVO X6, X14 + MOVO X10, X15 + MOVQ $0x0000000a, R9 sealSSE128InnerCipherLoop: - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Left; shiftB1Left; shiftB2Left - shiftC0Left; shiftC1Left; shiftC2Left - shiftD0Left; shiftD1Left; shiftD2Left - chachaQR(A0, B0, C0, D0, T0); chachaQR(A1, B1, C1, D1, T0); chachaQR(A2, B2, C2, D2, T0) - shiftB0Right; shiftB1Right; shiftB2Right - shiftC0Right; shiftC1Right; shiftC2Right - shiftD0Right; shiftD1Right; shiftD2Right - DECQ itr2 - JNE sealSSE128InnerCipherLoop + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x04 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x0c + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + PADDD X3, X0 + PXOR X0, X9 + ROL16(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X3 + PXOR X12, X3 + PADDD X3, X0 + PXOR X0, X9 + ROL8(X9, X12) + PADDD X9, X6 + PXOR X6, X3 + MOVO X3, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X3 + PXOR X12, X3 + PADDD X4, X1 + PXOR X1, X10 + ROL16(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X4 + PXOR X12, X4 + PADDD X4, X1 + PXOR X1, X10 + ROL8(X10, X12) + PADDD X10, X7 + PXOR X7, X4 + MOVO X4, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X4 + PXOR X12, X4 + PADDD X5, X2 + PXOR X2, X11 + ROL16(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x0c, X12 + PSRLL $0x14, X5 + PXOR X12, X5 + PADDD X5, X2 + PXOR X2, X11 + ROL8(X11, X12) + PADDD X11, X8 + PXOR X8, X5 + MOVO X5, X12 + PSLLL $0x07, X12 + PSRLL $0x19, X5 + PXOR X12, X5 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xe4 + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xed + BYTE $0x0c + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xf6 + BYTE $0x08 + BYTE $0x66 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xff + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc0 + BYTE $0x08 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xc9 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xd2 + BYTE $0x04 + BYTE $0x66 + BYTE $0x45 + BYTE $0x0f + BYTE $0x3a + BYTE $0x0f + BYTE $0xdb + BYTE $0x04 + DECQ R9 + JNE sealSSE128InnerCipherLoop // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>(SB), A0; PADDL ·chacha20Constants<>(SB), A1; PADDL ·chacha20Constants<>(SB), A2 - PADDL T1, B0; PADDL T1, B1; PADDL T1, B2 - PADDL T2, C1; PADDL T2, C2 - PADDL T3, D1; PADDL ·sseIncMask<>(SB), T3; PADDL T3, D2 - PAND ·polyClampMask<>(SB), A0 - MOVOU A0, rStore - MOVOU B0, sStore + PADDL ·chacha20Constants<>+0(SB), X0 + PADDL ·chacha20Constants<>+0(SB), X1 + PADDL ·chacha20Constants<>+0(SB), X2 + PADDL X13, X3 + PADDL X13, X4 + PADDL X13, X5 + PADDL X14, X7 + PADDL X14, X8 + PADDL X15, X10 + PADDL ·sseIncMask<>+0(SB), X15 + PADDL X15, X11 + PAND ·polyClampMask<>+0(SB), X0 + MOVOU X0, (BP) + MOVOU X3, 16(BP) // Hash - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) - XORQ itr1, itr1 + XORQ CX, CX sealSSE128SealHash: - // itr1 holds the number of bytes encrypted but not yet hashed - CMPQ itr1, $16 - JB sealSSE128Seal - polyAdd(0(oup)) - polyMul - - SUBQ $16, itr1 - ADDQ $16, oup - - JMP sealSSE128SealHash + CMPQ CX, $0x10 + JB sealSSE128Seal + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + SUBQ $0x10, CX + ADDQ $0x10, DI + JMP sealSSE128SealHash sealSSE128Seal: - CMPQ inl, $16 + CMPQ BX, $0x10 JB sealSSETail - SUBQ $16, inl + SUBQ $0x10, BX // Load for decryption - MOVOU (inp), T0 - PXOR T0, A1 - MOVOU A1, (oup) - LEAQ (1*16)(inp), inp - LEAQ (1*16)(oup), oup + MOVOU (SI), X12 + PXOR X12, X1 + MOVOU X1, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI // Extract for hashing - MOVQ A1, t0 - PSRLDQ $8, A1 - MOVQ A1, t1 - ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $1, acc2 - polyMul + MOVQ X1, R13 + PSRLDQ $0x08, X1 + MOVQ X1, R14 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Shift the stream "left" - MOVO B1, A1 - MOVO C1, B1 - MOVO D1, C1 - MOVO A2, D1 - MOVO B2, A2 - MOVO C2, B2 - MOVO D2, C2 + MOVO X4, X1 + MOVO X7, X4 + MOVO X10, X7 + MOVO X2, X10 + MOVO X5, X2 + MOVO X8, X5 + MOVO X11, X8 JMP sealSSE128Seal sealSSETail: - TESTQ inl, inl + TESTQ BX, BX JE sealSSEFinalize // We can only load the PT one byte at a time to avoid read after end of buffer - MOVQ inl, itr2 - SHLQ $4, itr2 - LEAQ ·andMask<>(SB), t0 - MOVQ inl, itr1 - LEAQ -1(inp)(inl*1), inp - XORQ t2, t2 - XORQ t3, t3 + MOVQ BX, R9 + SHLQ $0x04, R9 + LEAQ ·andMask<>+0(SB), R13 + MOVQ BX, CX + LEAQ -1(SI)(BX*1), SI + XORQ R15, R15 + XORQ R8, R8 XORQ AX, AX sealSSETailLoadLoop: - SHLQ $8, t2, t3 - SHLQ $8, t2 - MOVB (inp), AX - XORQ AX, t2 - LEAQ -1(inp), inp - DECQ itr1 + SHLQ $0x08, R15, R8 + SHLQ $0x08, R15 + MOVB (SI), AX + XORQ AX, R15 + LEAQ -1(SI), SI + DECQ CX JNE sealSSETailLoadLoop - MOVQ t2, 0+tmpStore - MOVQ t3, 8+tmpStore - PXOR 0+tmpStore, A1 - MOVOU A1, (oup) - MOVOU -16(t0)(itr2*1), T0 - PAND T0, A1 - MOVQ A1, t0 - PSRLDQ $8, A1 - MOVQ A1, t1 - ADDQ t0, acc0; ADCQ t1, acc1; ADCQ $1, acc2 - polyMul - - ADDQ inl, oup + MOVQ R15, 64(BP) + MOVQ R8, 72(BP) + PXOR 64(BP), X1 + MOVOU X1, (DI) + MOVOU -16(R13)(R9*1), X12 + PAND X12, X1 + MOVQ X1, R13 + PSRLDQ $0x08, X1 + MOVQ X1, R14 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ BX, DI sealSSEFinalize: // Hash in the buffer lengths - ADDQ ad_len+80(FP), acc0 - ADCQ src_len+56(FP), acc1 - ADCQ $1, acc2 - polyMul + ADDQ ad_len+80(FP), R10 + ADCQ src_len+56(FP), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 // Final reduce - MOVQ acc0, t0 - MOVQ acc1, t1 - MOVQ acc2, t2 - SUBQ $-5, acc0 - SBBQ $-1, acc1 - SBBQ $3, acc2 - CMOVQCS t0, acc0 - CMOVQCS t1, acc1 - CMOVQCS t2, acc2 + MOVQ R10, R13 + MOVQ R11, R14 + MOVQ R12, R15 + SUBQ $-5, R10 + SBBQ $-1, R11 + SBBQ $0x03, R12 + CMOVQCS R13, R10 + CMOVQCS R14, R11 + CMOVQCS R15, R12 // Add in the "s" part of the key - ADDQ 0+sStore, acc0 - ADCQ 8+sStore, acc1 + ADDQ 16(BP), R10 + ADCQ 24(BP), R11 // Finally store the tag at the end of the message - MOVQ acc0, (0*8)(oup) - MOVQ acc1, (1*8)(oup) + MOVQ R10, (DI) + MOVQ R11, 8(DI) RET -// ---------------------------------------------------------------------------- -// ------------------------- AVX2 Code ---------------------------------------- chacha20Poly1305Seal_AVX2: VZEROUPPER - VMOVDQU ·chacha20Constants<>(SB), AA0 - BYTE $0xc4; BYTE $0x42; BYTE $0x7d; BYTE $0x5a; BYTE $0x70; BYTE $0x10 // broadcasti128 16(r8), ymm14 - BYTE $0xc4; BYTE $0x42; BYTE $0x7d; BYTE $0x5a; BYTE $0x60; BYTE $0x20 // broadcasti128 32(r8), ymm12 - BYTE $0xc4; BYTE $0xc2; BYTE $0x7d; BYTE $0x5a; BYTE $0x60; BYTE $0x30 // broadcasti128 48(r8), ymm4 - VPADDD ·avx2InitMask<>(SB), DD0, DD0 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + BYTE $0xc4 + BYTE $0x42 + BYTE $0x7d + BYTE $0x5a + BYTE $0x70 + BYTE $0x10 + BYTE $0xc4 + BYTE $0x42 + BYTE $0x7d + BYTE $0x5a + BYTE $0x60 + BYTE $0x20 + BYTE $0xc4 + BYTE $0xc2 + BYTE $0x7d + BYTE $0x5a + BYTE $0x60 + BYTE $0x30 + VPADDD ·avx2InitMask<>+0(SB), Y4, Y4 // Special optimizations, for very short buffers - CMPQ inl, $192 - JBE seal192AVX2 // 33% faster - CMPQ inl, $320 - JBE seal320AVX2 // 17% faster + CMPQ BX, $0x000000c0 + JBE seal192AVX2 + CMPQ BX, $0x00000140 + JBE seal320AVX2 // For the general key prepare the key first - as a byproduct we have 64 bytes of cipher stream - VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3; VMOVDQA BB0, state1StoreAVX2 - VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3; VMOVDQA CC0, state2StoreAVX2 - VPADDD ·avx2IncMask<>(SB), DD0, DD1; VMOVDQA DD0, ctr0StoreAVX2 - VPADDD ·avx2IncMask<>(SB), DD1, DD2; VMOVDQA DD1, ctr1StoreAVX2 - VPADDD ·avx2IncMask<>(SB), DD2, DD3; VMOVDQA DD2, ctr2StoreAVX2 - VMOVDQA DD3, ctr3StoreAVX2 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA Y14, 32(BP) + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA Y12, 64(BP) + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y4, 96(BP) + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y1, 128(BP) + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + MOVQ $0x0000000a, R9 sealAVX2IntroLoop: - VMOVDQA CC3, tmpStoreAVX2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, CC3); chachaQR_AVX2(AA1, BB1, CC1, DD1, CC3); chachaQR_AVX2(AA2, BB2, CC2, DD2, CC3) - VMOVDQA tmpStoreAVX2, CC3 - VMOVDQA CC1, tmpStoreAVX2 - chachaQR_AVX2(AA3, BB3, CC3, DD3, CC1) - VMOVDQA tmpStoreAVX2, CC1 - - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $12, DD0, DD0, DD0 - VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $12, DD1, DD1, DD1 - VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $12, DD2, DD2, DD2 - VPALIGNR $4, BB3, BB3, BB3; VPALIGNR $8, CC3, CC3, CC3; VPALIGNR $12, DD3, DD3, DD3 - - VMOVDQA CC3, tmpStoreAVX2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, CC3); chachaQR_AVX2(AA1, BB1, CC1, DD1, CC3); chachaQR_AVX2(AA2, BB2, CC2, DD2, CC3) - VMOVDQA tmpStoreAVX2, CC3 - VMOVDQA CC1, tmpStoreAVX2 - chachaQR_AVX2(AA3, BB3, CC3, DD3, CC1) - VMOVDQA tmpStoreAVX2, CC1 - - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $4, DD0, DD0, DD0 - VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $4, DD1, DD1, DD1 - VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $4, DD2, DD2, DD2 - VPALIGNR $12, BB3, BB3, BB3; VPALIGNR $8, CC3, CC3, CC3; VPALIGNR $4, DD3, DD3, DD3 - DECQ itr2 - JNE sealAVX2IntroLoop - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - - VPERM2I128 $0x13, CC0, DD0, CC0 // Stream bytes 96 - 127 - VPERM2I128 $0x02, AA0, BB0, DD0 // The Poly1305 key - VPERM2I128 $0x13, AA0, BB0, AA0 // Stream bytes 64 - 95 + VMOVDQA Y15, 224(BP) + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VMOVDQA 224(BP), Y15 + VMOVDQA Y13, 224(BP) + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x0c, Y11, Y13 + VPSRLD $0x14, Y11, Y11 + VPXOR Y13, Y11, Y11 + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x07, Y11, Y13 + VPSRLD $0x19, Y11, Y11 + VPXOR Y13, Y11, Y11 + VMOVDQA 224(BP), Y13 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y3, Y3, Y3 + VMOVDQA Y15, 224(BP) + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VMOVDQA 224(BP), Y15 + VMOVDQA Y13, 224(BP) + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x0c, Y11, Y13 + VPSRLD $0x14, Y11, Y11 + VPXOR Y13, Y11, Y11 + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x07, Y11, Y13 + VPSRLD $0x19, Y11, Y11 + VPXOR Y13, Y11, Y11 + VMOVDQA 224(BP), Y13 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y3, Y3, Y3 + DECQ R9 + JNE sealAVX2IntroLoop + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPERM2I128 $0x02, Y0, Y14, Y4 + VPERM2I128 $0x13, Y0, Y14, Y0 // Clamp and store poly key - VPAND ·polyClampMask<>(SB), DD0, DD0 - VMOVDQA DD0, rsStoreAVX2 + VPAND ·polyClampMask<>+0(SB), Y4, Y4 + VMOVDQA Y4, (BP) // Hash AD - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) // Can store at least 320 bytes - VPXOR (0*32)(inp), AA0, AA0 - VPXOR (1*32)(inp), CC0, CC0 - VMOVDQU AA0, (0*32)(oup) - VMOVDQU CC0, (1*32)(oup) - - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (2*32)(inp), AA0, AA0; VPXOR (3*32)(inp), BB0, BB0; VPXOR (4*32)(inp), CC0, CC0; VPXOR (5*32)(inp), DD0, DD0 - VMOVDQU AA0, (2*32)(oup); VMOVDQU BB0, (3*32)(oup); VMOVDQU CC0, (4*32)(oup); VMOVDQU DD0, (5*32)(oup) - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (6*32)(inp), AA0, AA0; VPXOR (7*32)(inp), BB0, BB0; VPXOR (8*32)(inp), CC0, CC0; VPXOR (9*32)(inp), DD0, DD0 - VMOVDQU AA0, (6*32)(oup); VMOVDQU BB0, (7*32)(oup); VMOVDQU CC0, (8*32)(oup); VMOVDQU DD0, (9*32)(oup) - - MOVQ $320, itr1 - SUBQ $320, inl - LEAQ 320(inp), inp - - VPERM2I128 $0x02, AA3, BB3, AA0; VPERM2I128 $0x02, CC3, DD3, BB0; VPERM2I128 $0x13, AA3, BB3, CC0; VPERM2I128 $0x13, CC3, DD3, DD0 - CMPQ inl, $128 + VPXOR (SI), Y0, Y0 + VPXOR 32(SI), Y12, Y12 + VMOVDQU Y0, (DI) + VMOVDQU Y12, 32(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 64(SI), Y0, Y0 + VPXOR 96(SI), Y14, Y14 + VPXOR 128(SI), Y12, Y12 + VPXOR 160(SI), Y4, Y4 + VMOVDQU Y0, 64(DI) + VMOVDQU Y14, 96(DI) + VMOVDQU Y12, 128(DI) + VMOVDQU Y4, 160(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 192(SI), Y0, Y0 + VPXOR 224(SI), Y14, Y14 + VPXOR 256(SI), Y12, Y12 + VPXOR 288(SI), Y4, Y4 + VMOVDQU Y0, 192(DI) + VMOVDQU Y14, 224(DI) + VMOVDQU Y12, 256(DI) + VMOVDQU Y4, 288(DI) + MOVQ $0x00000140, CX + SUBQ $0x00000140, BX + LEAQ 320(SI), SI + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, Y15, Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, Y15, Y3, Y4 + CMPQ BX, $0x80 JBE sealAVX2SealHash - - VPXOR (0*32)(inp), AA0, AA0; VPXOR (1*32)(inp), BB0, BB0; VPXOR (2*32)(inp), CC0, CC0; VPXOR (3*32)(inp), DD0, DD0 - VMOVDQU AA0, (10*32)(oup); VMOVDQU BB0, (11*32)(oup); VMOVDQU CC0, (12*32)(oup); VMOVDQU DD0, (13*32)(oup) - SUBQ $128, inl - LEAQ 128(inp), inp - - MOVQ $8, itr1 - MOVQ $2, itr2 - - CMPQ inl, $128 - JBE sealAVX2Tail128 - CMPQ inl, $256 - JBE sealAVX2Tail256 - CMPQ inl, $384 - JBE sealAVX2Tail384 - CMPQ inl, $512 - JBE sealAVX2Tail512 + VPXOR (SI), Y0, Y0 + VPXOR 32(SI), Y14, Y14 + VPXOR 64(SI), Y12, Y12 + VPXOR 96(SI), Y4, Y4 + VMOVDQU Y0, 320(DI) + VMOVDQU Y14, 352(DI) + VMOVDQU Y12, 384(DI) + VMOVDQU Y4, 416(DI) + SUBQ $0x80, BX + LEAQ 128(SI), SI + MOVQ $0x00000008, CX + MOVQ $0x00000002, R9 + CMPQ BX, $0x80 + JBE sealAVX2Tail128 + CMPQ BX, $0x00000100 + JBE sealAVX2Tail256 + CMPQ BX, $0x00000180 + JBE sealAVX2Tail384 + CMPQ BX, $0x00000200 + JBE sealAVX2Tail512 // We have 448 bytes to hash, but main loop hashes 512 bytes at a time - perform some rounds, before the main loop - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 - - VMOVDQA CC3, tmpStoreAVX2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, CC3); chachaQR_AVX2(AA1, BB1, CC1, DD1, CC3); chachaQR_AVX2(AA2, BB2, CC2, DD2, CC3) - VMOVDQA tmpStoreAVX2, CC3 - VMOVDQA CC1, tmpStoreAVX2 - chachaQR_AVX2(AA3, BB3, CC3, DD3, CC1) - VMOVDQA tmpStoreAVX2, CC1 - - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $12, DD0, DD0, DD0 - VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $12, DD1, DD1, DD1 - VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $12, DD2, DD2, DD2 - VPALIGNR $4, BB3, BB3, BB3; VPALIGNR $8, CC3, CC3, CC3; VPALIGNR $12, DD3, DD3, DD3 - - VMOVDQA CC3, tmpStoreAVX2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, CC3); chachaQR_AVX2(AA1, BB1, CC1, DD1, CC3); chachaQR_AVX2(AA2, BB2, CC2, DD2, CC3) - VMOVDQA tmpStoreAVX2, CC3 - VMOVDQA CC1, tmpStoreAVX2 - chachaQR_AVX2(AA3, BB3, CC3, DD3, CC1) - VMOVDQA tmpStoreAVX2, CC1 - - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $4, DD0, DD0, DD0 - VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $4, DD1, DD1, DD1 - VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $4, DD2, DD2, DD2 - VPALIGNR $12, BB3, BB3, BB3; VPALIGNR $8, CC3, CC3, CC3; VPALIGNR $4, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - - SUBQ $16, oup // Adjust the pointer - MOVQ $9, itr1 - JMP sealAVX2InternalLoopStart + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + VMOVDQA Y15, 224(BP) + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VMOVDQA 224(BP), Y15 + VMOVDQA Y13, 224(BP) + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x0c, Y11, Y13 + VPSRLD $0x14, Y11, Y11 + VPXOR Y13, Y11, Y11 + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x07, Y11, Y13 + VPSRLD $0x19, Y11, Y11 + VPXOR Y13, Y11, Y11 + VMOVDQA 224(BP), Y13 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y3, Y3, Y3 + VMOVDQA Y15, 224(BP) + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VMOVDQA 224(BP), Y15 + VMOVDQA Y13, 224(BP) + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x0c, Y11, Y13 + VPSRLD $0x14, Y11, Y11 + VPXOR Y13, Y11, Y11 + VPADDD Y11, Y7, Y7 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y3, Y15, Y15 + VPXOR Y15, Y11, Y11 + VPSLLD $0x07, Y11, Y13 + VPSRLD $0x19, Y11, Y11 + VPXOR Y13, Y11, Y11 + VMOVDQA 224(BP), Y13 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + SUBQ $0x10, DI + MOVQ $0x00000009, CX + JMP sealAVX2InternalLoopStart sealAVX2MainLoop: - // Load state, increment counter blocks, store the incremented counters - VMOVDQU ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 - MOVQ $10, itr1 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + MOVQ $0x0000000a, CX sealAVX2InternalLoop: - polyAdd(0*8(oup)) - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - polyMulStage1_AVX2 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - polyMulStage2_AVX2 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyMulStage3_AVX2 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulReduceStage + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 sealAVX2InternalLoopStart: - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - polyAdd(2*8(oup)) - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - polyMulStage1_AVX2 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulStage2_AVX2 - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $4, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2; VPALIGNR $12, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - polyMulStage3_AVX2 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - polyMulReduceStage - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyAdd(4*8(oup)) - LEAQ (6*8)(oup), oup - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulStage1_AVX2 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - polyMulStage2_AVX2 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - polyMulStage3_AVX2 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyMulReduceStage - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $12, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2; VPALIGNR $4, DD3, DD3, DD3 - DECQ itr1 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 32(DI), R10 + ADCQ 40(DI), R11 + ADCQ $0x01, R12 + LEAQ 48(DI), DI + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + DECQ CX JNE sealAVX2InternalLoop - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - VMOVDQA CC3, tmpStoreAVX2 + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) // We only hashed 480 of the 512 bytes available - hash the remaining 32 here - polyAdd(0*8(oup)) - polyMulAVX2 - LEAQ (4*8)(oup), oup - VPERM2I128 $0x02, AA0, BB0, CC3; VPERM2I128 $0x13, AA0, BB0, BB0; VPERM2I128 $0x02, CC0, DD0, AA0; VPERM2I128 $0x13, CC0, DD0, CC0 - VPXOR (0*32)(inp), CC3, CC3; VPXOR (1*32)(inp), AA0, AA0; VPXOR (2*32)(inp), BB0, BB0; VPXOR (3*32)(inp), CC0, CC0 - VMOVDQU CC3, (0*32)(oup); VMOVDQU AA0, (1*32)(oup); VMOVDQU BB0, (2*32)(oup); VMOVDQU CC0, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, AA0; VPERM2I128 $0x02, CC1, DD1, BB0; VPERM2I128 $0x13, AA1, BB1, CC0; VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0 - VMOVDQU AA0, (4*32)(oup); VMOVDQU BB0, (5*32)(oup); VMOVDQU CC0, (6*32)(oup); VMOVDQU DD0, (7*32)(oup) + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VPERM2I128 $0x02, Y0, Y14, Y15 + VPERM2I128 $0x13, Y0, Y14, Y14 + VPERM2I128 $0x02, Y12, Y4, Y0 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPXOR (SI), Y15, Y15 + VPXOR 32(SI), Y0, Y0 + VPXOR 64(SI), Y14, Y14 + VPXOR 96(SI), Y12, Y12 + VMOVDQU Y15, (DI) + VMOVDQU Y0, 32(DI) + VMOVDQU Y14, 64(DI) + VMOVDQU Y12, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) // and here - polyAdd(-2*8(oup)) - polyMulAVX2 - VPERM2I128 $0x02, AA2, BB2, AA0; VPERM2I128 $0x02, CC2, DD2, BB0; VPERM2I128 $0x13, AA2, BB2, CC0; VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (8*32)(inp), AA0, AA0; VPXOR (9*32)(inp), BB0, BB0; VPXOR (10*32)(inp), CC0, CC0; VPXOR (11*32)(inp), DD0, DD0 - VMOVDQU AA0, (8*32)(oup); VMOVDQU BB0, (9*32)(oup); VMOVDQU CC0, (10*32)(oup); VMOVDQU DD0, (11*32)(oup) - VPERM2I128 $0x02, AA3, BB3, AA0; VPERM2I128 $0x02, tmpStoreAVX2, DD3, BB0; VPERM2I128 $0x13, AA3, BB3, CC0; VPERM2I128 $0x13, tmpStoreAVX2, DD3, DD0 - VPXOR (12*32)(inp), AA0, AA0; VPXOR (13*32)(inp), BB0, BB0; VPXOR (14*32)(inp), CC0, CC0; VPXOR (15*32)(inp), DD0, DD0 - VMOVDQU AA0, (12*32)(oup); VMOVDQU BB0, (13*32)(oup); VMOVDQU CC0, (14*32)(oup); VMOVDQU DD0, (15*32)(oup) - LEAQ (32*16)(inp), inp - SUBQ $(32*16), inl - CMPQ inl, $512 + ADDQ -16(DI), R10 + ADCQ -8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + VPXOR 384(SI), Y0, Y0 + VPXOR 416(SI), Y14, Y14 + VPXOR 448(SI), Y12, Y12 + VPXOR 480(SI), Y4, Y4 + VMOVDQU Y0, 384(DI) + VMOVDQU Y14, 416(DI) + VMOVDQU Y12, 448(DI) + VMOVDQU Y4, 480(DI) + LEAQ 512(SI), SI + SUBQ $0x00000200, BX + CMPQ BX, $0x00000200 JG sealAVX2MainLoop // Tail can only hash 480 bytes - polyAdd(0*8(oup)) - polyMulAVX2 - polyAdd(2*8(oup)) - polyMulAVX2 - LEAQ 32(oup), oup - - MOVQ $10, itr1 - MOVQ $0, itr2 - CMPQ inl, $128 - JBE sealAVX2Tail128 - CMPQ inl, $256 - JBE sealAVX2Tail256 - CMPQ inl, $384 - JBE sealAVX2Tail384 - JMP sealAVX2Tail512 - -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 193 bytes + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + MOVQ $0x0000000a, CX + MOVQ $0x00000000, R9 + CMPQ BX, $0x80 + JBE sealAVX2Tail128 + CMPQ BX, $0x00000100 + JBE sealAVX2Tail256 + CMPQ BX, $0x00000180 + JBE sealAVX2Tail384 + JMP sealAVX2Tail512 + seal192AVX2: - // For up to 192 bytes of ciphertext and 64 bytes for the poly key, we process four blocks - VMOVDQA AA0, AA1 - VMOVDQA BB0, BB1 - VMOVDQA CC0, CC1 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA AA0, AA2 - VMOVDQA BB0, BB2 - VMOVDQA CC0, CC2 - VMOVDQA DD0, DD2 - VMOVDQA DD1, TT3 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VMOVDQA Y4, Y2 + VMOVDQA Y1, Y15 + MOVQ $0x0000000a, R9 sealAVX2192InnerCipherLoop: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1 - DECQ itr2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + DECQ R9 JNE sealAVX2192InnerCipherLoop - VPADDD AA2, AA0, AA0; VPADDD AA2, AA1, AA1 - VPADDD BB2, BB0, BB0; VPADDD BB2, BB1, BB1 - VPADDD CC2, CC0, CC0; VPADDD CC2, CC1, CC1 - VPADDD DD2, DD0, DD0; VPADDD TT3, DD1, DD1 - VPERM2I128 $0x02, AA0, BB0, TT0 + VPADDD Y6, Y0, Y0 + VPADDD Y6, Y5, Y5 + VPADDD Y10, Y14, Y14 + VPADDD Y10, Y9, Y9 + VPADDD Y8, Y12, Y12 + VPADDD Y8, Y13, Y13 + VPADDD Y2, Y4, Y4 + VPADDD Y15, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y3 // Clamp and store poly key - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for up to 192 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 - VPERM2I128 $0x02, AA1, BB1, CC0 - VPERM2I128 $0x02, CC1, DD1, DD0 - VPERM2I128 $0x13, AA1, BB1, AA1 - VPERM2I128 $0x13, CC1, DD1, BB1 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 sealAVX2ShortSeal: // Hash aad - MOVQ ad_len+80(FP), itr2 + MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) - XORQ itr1, itr1 + XORQ CX, CX sealAVX2SealHash: // itr1 holds the number of bytes encrypted but not yet hashed - CMPQ itr1, $16 - JB sealAVX2ShortSealLoop - polyAdd(0(oup)) - polyMul - SUBQ $16, itr1 - ADDQ $16, oup - JMP sealAVX2SealHash + CMPQ CX, $0x10 + JB sealAVX2ShortSealLoop + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + SUBQ $0x10, CX + ADDQ $0x10, DI + JMP sealAVX2SealHash sealAVX2ShortSealLoop: - CMPQ inl, $32 + CMPQ BX, $0x20 JB sealAVX2ShortTail32 - SUBQ $32, inl + SUBQ $0x20, BX // Load for encryption - VPXOR (inp), AA0, AA0 - VMOVDQU AA0, (oup) - LEAQ (1*32)(inp), inp + VPXOR (SI), Y0, Y0 + VMOVDQU Y0, (DI) + LEAQ 32(SI), SI // Now can hash - polyAdd(0*8(oup)) - polyMulAVX2 - polyAdd(2*8(oup)) - polyMulAVX2 - LEAQ (1*32)(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI // Shift stream left - VMOVDQA BB0, AA0 - VMOVDQA CC0, BB0 - VMOVDQA DD0, CC0 - VMOVDQA AA1, DD0 - VMOVDQA BB1, AA1 - VMOVDQA CC1, BB1 - VMOVDQA DD1, CC1 - VMOVDQA AA2, DD1 - VMOVDQA BB2, AA2 + VMOVDQA Y14, Y0 + VMOVDQA Y12, Y14 + VMOVDQA Y4, Y12 + VMOVDQA Y5, Y4 + VMOVDQA Y9, Y5 + VMOVDQA Y13, Y9 + VMOVDQA Y1, Y13 + VMOVDQA Y6, Y1 + VMOVDQA Y10, Y6 JMP sealAVX2ShortSealLoop sealAVX2ShortTail32: - CMPQ inl, $16 - VMOVDQA A0, A1 + CMPQ BX, $0x10 + VMOVDQA X0, X1 JB sealAVX2ShortDone - - SUBQ $16, inl + SUBQ $0x10, BX // Load for encryption - VPXOR (inp), A0, T0 - VMOVDQU T0, (oup) - LEAQ (1*16)(inp), inp + VPXOR (SI), X0, X12 + VMOVDQU X12, (DI) + LEAQ 16(SI), SI // Hash - polyAdd(0*8(oup)) - polyMulAVX2 - LEAQ (1*16)(oup), oup - VPERM2I128 $0x11, AA0, AA0, AA0 - VMOVDQA A0, A1 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI + VPERM2I128 $0x11, Y0, Y0, Y0 + VMOVDQA X0, X1 sealAVX2ShortDone: VZEROUPPER JMP sealSSETail -// ---------------------------------------------------------------------------- -// Special optimization for buffers smaller than 321 bytes seal320AVX2: - // For up to 320 bytes of ciphertext and 64 bytes for the poly key, we process six blocks - VMOVDQA AA0, AA1; VMOVDQA BB0, BB1; VMOVDQA CC0, CC1; VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA AA0, AA2; VMOVDQA BB0, BB2; VMOVDQA CC0, CC2; VPADDD ·avx2IncMask<>(SB), DD1, DD2 - VMOVDQA BB0, TT1; VMOVDQA CC0, TT2; VMOVDQA DD0, TT3 - MOVQ $10, itr2 + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y14, Y7 + VMOVDQA Y12, Y11 + VMOVDQA Y4, Y15 + MOVQ $0x0000000a, R9 sealAVX2320InnerCipherLoop: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2 - DECQ itr2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + DECQ R9 JNE sealAVX2320InnerCipherLoop - - VMOVDQA ·chacha20Constants<>(SB), TT0 - VPADDD TT0, AA0, AA0; VPADDD TT0, AA1, AA1; VPADDD TT0, AA2, AA2 - VPADDD TT1, BB0, BB0; VPADDD TT1, BB1, BB1; VPADDD TT1, BB2, BB2 - VPADDD TT2, CC0, CC0; VPADDD TT2, CC1, CC1; VPADDD TT2, CC2, CC2 - VMOVDQA ·avx2IncMask<>(SB), TT0 - VPADDD TT3, DD0, DD0; VPADDD TT0, TT3, TT3 - VPADDD TT3, DD1, DD1; VPADDD TT0, TT3, TT3 - VPADDD TT3, DD2, DD2 + VMOVDQA ·chacha20Constants<>+0(SB), Y3 + VPADDD Y3, Y0, Y0 + VPADDD Y3, Y5, Y5 + VPADDD Y3, Y6, Y6 + VPADDD Y7, Y14, Y14 + VPADDD Y7, Y9, Y9 + VPADDD Y7, Y10, Y10 + VPADDD Y11, Y12, Y12 + VPADDD Y11, Y13, Y13 + VPADDD Y11, Y8, Y8 + VMOVDQA ·avx2IncMask<>+0(SB), Y3 + VPADDD Y15, Y4, Y4 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y1, Y1 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y2, Y2 // Clamp and store poly key - VPERM2I128 $0x02, AA0, BB0, TT0 - VPAND ·polyClampMask<>(SB), TT0, TT0 - VMOVDQA TT0, rsStoreAVX2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) // Stream for up to 320 bytes - VPERM2I128 $0x13, AA0, BB0, AA0 - VPERM2I128 $0x13, CC0, DD0, BB0 - VPERM2I128 $0x02, AA1, BB1, CC0 - VPERM2I128 $0x02, CC1, DD1, DD0 - VPERM2I128 $0x13, AA1, BB1, AA1 - VPERM2I128 $0x13, CC1, DD1, BB1 - VPERM2I128 $0x02, AA2, BB2, CC1 - VPERM2I128 $0x02, CC2, DD2, DD1 - VPERM2I128 $0x13, AA2, BB2, AA2 - VPERM2I128 $0x13, CC2, DD2, BB2 + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 + VPERM2I128 $0x02, Y6, Y10, Y13 + VPERM2I128 $0x02, Y8, Y2, Y1 + VPERM2I128 $0x13, Y6, Y10, Y6 + VPERM2I128 $0x13, Y8, Y2, Y10 JMP sealAVX2ShortSeal -// ---------------------------------------------------------------------------- -// Special optimization for the last 128 bytes of ciphertext sealAVX2Tail128: - // Need to decrypt up to 128 bytes - prepare two blocks - // If we got here after the main loop - there are 512 encrypted bytes waiting to be hashed - // If we got here before the main loop - there are 448 encrpyred bytes waiting to be hashed - VMOVDQA ·chacha20Constants<>(SB), AA0 - VMOVDQA state1StoreAVX2, BB0 - VMOVDQA state2StoreAVX2, CC0 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0 - VMOVDQA DD0, DD1 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA 32(BP), Y14 + VMOVDQA 64(BP), Y12 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VMOVDQA Y4, Y1 sealAVX2Tail128LoopA: - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealAVX2Tail128LoopB: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0) - polyAdd(0(oup)) - polyMul - VPALIGNR $4, BB0, BB0, BB0 - VPALIGNR $8, CC0, CC0, CC0 - VPALIGNR $12, DD0, DD0, DD0 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0) - polyAdd(16(oup)) - polyMul - LEAQ 32(oup), oup - VPALIGNR $12, BB0, BB0, BB0 - VPALIGNR $8, CC0, CC0, CC0 - VPALIGNR $4, DD0, DD0, DD0 - DECQ itr1 - JG sealAVX2Tail128LoopA - DECQ itr2 - JGE sealAVX2Tail128LoopB - - VPADDD ·chacha20Constants<>(SB), AA0, AA1 - VPADDD state1StoreAVX2, BB0, BB1 - VPADDD state2StoreAVX2, CC0, CC1 - VPADDD DD1, DD0, DD1 - - VPERM2I128 $0x02, AA1, BB1, AA0 - VPERM2I128 $0x02, CC1, DD1, BB0 - VPERM2I128 $0x13, AA1, BB1, CC0 - VPERM2I128 $0x13, CC1, DD1, DD0 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + DECQ CX + JG sealAVX2Tail128LoopA + DECQ R9 + JGE sealAVX2Tail128LoopB + VPADDD ·chacha20Constants<>+0(SB), Y0, Y5 + VPADDD 32(BP), Y14, Y9 + VPADDD 64(BP), Y12, Y13 + VPADDD Y1, Y4, Y1 + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 JMP sealAVX2ShortSealLoop -// ---------------------------------------------------------------------------- -// Special optimization for the last 256 bytes of ciphertext sealAVX2Tail256: - // Need to decrypt up to 256 bytes - prepare two blocks - // If we got here after the main loop - there are 512 encrypted bytes waiting to be hashed - // If we got here before the main loop - there are 448 encrpyred bytes waiting to be hashed - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA ·chacha20Constants<>(SB), AA1 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA state1StoreAVX2, BB1 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA state2StoreAVX2, CC1 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD1 - VMOVDQA DD0, TT1 - VMOVDQA DD1, TT2 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA ·chacha20Constants<>+0(SB), Y5 + VMOVDQA 32(BP), Y14 + VMOVDQA 32(BP), Y9 + VMOVDQA 64(BP), Y12 + VMOVDQA 64(BP), Y13 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y4, Y7 + VMOVDQA Y1, Y11 sealAVX2Tail256LoopA: - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealAVX2Tail256LoopB: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - polyAdd(0(oup)) - polyMul - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0) - polyAdd(16(oup)) - polyMul - LEAQ 32(oup), oup - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1 - DECQ itr1 - JG sealAVX2Tail256LoopA - DECQ itr2 - JGE sealAVX2Tail256LoopB - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1 - VPADDD TT1, DD0, DD0; VPADDD TT2, DD1, DD1 - VPERM2I128 $0x02, AA0, BB0, TT0 - VPERM2I128 $0x02, CC0, DD0, TT1 - VPERM2I128 $0x13, AA0, BB0, TT2 - VPERM2I128 $0x13, CC0, DD0, TT3 - VPXOR (0*32)(inp), TT0, TT0; VPXOR (1*32)(inp), TT1, TT1; VPXOR (2*32)(inp), TT2, TT2; VPXOR (3*32)(inp), TT3, TT3 - VMOVDQU TT0, (0*32)(oup); VMOVDQU TT1, (1*32)(oup); VMOVDQU TT2, (2*32)(oup); VMOVDQU TT3, (3*32)(oup) - MOVQ $128, itr1 - LEAQ 128(inp), inp - SUBQ $128, inl - VPERM2I128 $0x02, AA1, BB1, AA0 - VPERM2I128 $0x02, CC1, DD1, BB0 - VPERM2I128 $0x13, AA1, BB1, CC0 - VPERM2I128 $0x13, CC1, DD1, DD0 - - JMP sealAVX2SealHash - -// ---------------------------------------------------------------------------- -// Special optimization for the last 384 bytes of ciphertext + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + DECQ CX + JG sealAVX2Tail256LoopA + DECQ R9 + JGE sealAVX2Tail256LoopB + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD Y7, Y4, Y4 + VPADDD Y11, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPERM2I128 $0x02, Y12, Y4, Y7 + VPERM2I128 $0x13, Y0, Y14, Y11 + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR (SI), Y3, Y3 + VPXOR 32(SI), Y7, Y7 + VPXOR 64(SI), Y11, Y11 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y3, (DI) + VMOVDQU Y7, 32(DI) + VMOVDQU Y11, 64(DI) + VMOVDQU Y15, 96(DI) + MOVQ $0x00000080, CX + LEAQ 128(SI), SI + SUBQ $0x80, BX + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + JMP sealAVX2SealHash + sealAVX2Tail384: - // Need to decrypt up to 384 bytes - prepare two blocks - // If we got here after the main loop - there are 512 encrypted bytes waiting to be hashed - // If we got here before the main loop - there are 448 encrpyred bytes waiting to be hashed - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2 - VMOVDQA DD0, TT1; VMOVDQA DD1, TT2; VMOVDQA DD2, TT3 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y4, Y7 + VMOVDQA Y1, Y11 + VMOVDQA Y2, Y15 sealAVX2Tail384LoopA: - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealAVX2Tail384LoopB: - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - polyAdd(0(oup)) - polyMul - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2 - chachaQR_AVX2(AA0, BB0, CC0, DD0, TT0); chachaQR_AVX2(AA1, BB1, CC1, DD1, TT0); chachaQR_AVX2(AA2, BB2, CC2, DD2, TT0) - polyAdd(16(oup)) - polyMul - LEAQ 32(oup), oup - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2 - DECQ itr1 - JG sealAVX2Tail384LoopA - DECQ itr2 - JGE sealAVX2Tail384LoopB - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2 - VPADDD TT1, DD0, DD0; VPADDD TT2, DD1, DD1; VPADDD TT3, DD2, DD2 - VPERM2I128 $0x02, AA0, BB0, TT0 - VPERM2I128 $0x02, CC0, DD0, TT1 - VPERM2I128 $0x13, AA0, BB0, TT2 - VPERM2I128 $0x13, CC0, DD0, TT3 - VPXOR (0*32)(inp), TT0, TT0; VPXOR (1*32)(inp), TT1, TT1; VPXOR (2*32)(inp), TT2, TT2; VPXOR (3*32)(inp), TT3, TT3 - VMOVDQU TT0, (0*32)(oup); VMOVDQU TT1, (1*32)(oup); VMOVDQU TT2, (2*32)(oup); VMOVDQU TT3, (3*32)(oup) - VPERM2I128 $0x02, AA1, BB1, TT0 - VPERM2I128 $0x02, CC1, DD1, TT1 - VPERM2I128 $0x13, AA1, BB1, TT2 - VPERM2I128 $0x13, CC1, DD1, TT3 - VPXOR (4*32)(inp), TT0, TT0; VPXOR (5*32)(inp), TT1, TT1; VPXOR (6*32)(inp), TT2, TT2; VPXOR (7*32)(inp), TT3, TT3 - VMOVDQU TT0, (4*32)(oup); VMOVDQU TT1, (5*32)(oup); VMOVDQU TT2, (6*32)(oup); VMOVDQU TT3, (7*32)(oup) - MOVQ $256, itr1 - LEAQ 256(inp), inp - SUBQ $256, inl - VPERM2I128 $0x02, AA2, BB2, AA0 - VPERM2I128 $0x02, CC2, DD2, BB0 - VPERM2I128 $0x13, AA2, BB2, CC0 - VPERM2I128 $0x13, CC2, DD2, DD0 - - JMP sealAVX2SealHash - -// ---------------------------------------------------------------------------- -// Special optimization for the last 512 bytes of ciphertext + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + DECQ CX + JG sealAVX2Tail384LoopA + DECQ R9 + JGE sealAVX2Tail384LoopB + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD Y7, Y4, Y4 + VPADDD Y11, Y1, Y1 + VPADDD Y15, Y2, Y2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPERM2I128 $0x02, Y12, Y4, Y7 + VPERM2I128 $0x13, Y0, Y14, Y11 + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR (SI), Y3, Y3 + VPXOR 32(SI), Y7, Y7 + VPXOR 64(SI), Y11, Y11 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y3, (DI) + VMOVDQU Y7, 32(DI) + VMOVDQU Y11, 64(DI) + VMOVDQU Y15, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y3 + VPERM2I128 $0x02, Y13, Y1, Y7 + VPERM2I128 $0x13, Y5, Y9, Y11 + VPERM2I128 $0x13, Y13, Y1, Y15 + VPXOR 128(SI), Y3, Y3 + VPXOR 160(SI), Y7, Y7 + VPXOR 192(SI), Y11, Y11 + VPXOR 224(SI), Y15, Y15 + VMOVDQU Y3, 128(DI) + VMOVDQU Y7, 160(DI) + VMOVDQU Y11, 192(DI) + VMOVDQU Y15, 224(DI) + MOVQ $0x00000100, CX + LEAQ 256(SI), SI + SUBQ $0x00000100, BX + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + JMP sealAVX2SealHash + sealAVX2Tail512: - // Need to decrypt up to 512 bytes - prepare two blocks - // If we got here after the main loop - there are 512 encrypted bytes waiting to be hashed - // If we got here before the main loop - there are 448 encrpyred bytes waiting to be hashed - VMOVDQA ·chacha20Constants<>(SB), AA0; VMOVDQA AA0, AA1; VMOVDQA AA0, AA2; VMOVDQA AA0, AA3 - VMOVDQA state1StoreAVX2, BB0; VMOVDQA BB0, BB1; VMOVDQA BB0, BB2; VMOVDQA BB0, BB3 - VMOVDQA state2StoreAVX2, CC0; VMOVDQA CC0, CC1; VMOVDQA CC0, CC2; VMOVDQA CC0, CC3 - VMOVDQA ctr3StoreAVX2, DD0 - VPADDD ·avx2IncMask<>(SB), DD0, DD0; VPADDD ·avx2IncMask<>(SB), DD0, DD1; VPADDD ·avx2IncMask<>(SB), DD1, DD2; VPADDD ·avx2IncMask<>(SB), DD2, DD3 - VMOVDQA DD0, ctr0StoreAVX2; VMOVDQA DD1, ctr1StoreAVX2; VMOVDQA DD2, ctr2StoreAVX2; VMOVDQA DD3, ctr3StoreAVX2 + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) sealAVX2Tail512LoopA: - polyAdd(0(oup)) - polyMul - LEAQ 16(oup), oup + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(DI), DI sealAVX2Tail512LoopB: - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - polyAdd(0*8(oup)) - polyMulAVX2 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPALIGNR $4, BB0, BB0, BB0; VPALIGNR $4, BB1, BB1, BB1; VPALIGNR $4, BB2, BB2, BB2; VPALIGNR $4, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $12, DD0, DD0, DD0; VPALIGNR $12, DD1, DD1, DD1; VPALIGNR $12, DD2, DD2, DD2; VPALIGNR $12, DD3, DD3, DD3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol16<>(SB), DD0, DD0; VPSHUFB ·rol16<>(SB), DD1, DD1; VPSHUFB ·rol16<>(SB), DD2, DD2; VPSHUFB ·rol16<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - polyAdd(2*8(oup)) - polyMulAVX2 - LEAQ (4*8)(oup), oup - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $12, BB0, CC3; VPSRLD $20, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $12, BB1, CC3; VPSRLD $20, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $12, BB2, CC3; VPSRLD $20, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $12, BB3, CC3; VPSRLD $20, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPADDD BB0, AA0, AA0; VPADDD BB1, AA1, AA1; VPADDD BB2, AA2, AA2; VPADDD BB3, AA3, AA3 - VPXOR AA0, DD0, DD0; VPXOR AA1, DD1, DD1; VPXOR AA2, DD2, DD2; VPXOR AA3, DD3, DD3 - VPSHUFB ·rol8<>(SB), DD0, DD0; VPSHUFB ·rol8<>(SB), DD1, DD1; VPSHUFB ·rol8<>(SB), DD2, DD2; VPSHUFB ·rol8<>(SB), DD3, DD3 - VPADDD DD0, CC0, CC0; VPADDD DD1, CC1, CC1; VPADDD DD2, CC2, CC2; VPADDD DD3, CC3, CC3 - VPXOR CC0, BB0, BB0; VPXOR CC1, BB1, BB1; VPXOR CC2, BB2, BB2; VPXOR CC3, BB3, BB3 - VMOVDQA CC3, tmpStoreAVX2 - VPSLLD $7, BB0, CC3; VPSRLD $25, BB0, BB0; VPXOR CC3, BB0, BB0 - VPSLLD $7, BB1, CC3; VPSRLD $25, BB1, BB1; VPXOR CC3, BB1, BB1 - VPSLLD $7, BB2, CC3; VPSRLD $25, BB2, BB2; VPXOR CC3, BB2, BB2 - VPSLLD $7, BB3, CC3; VPSRLD $25, BB3, BB3; VPXOR CC3, BB3, BB3 - VMOVDQA tmpStoreAVX2, CC3 - VPALIGNR $12, BB0, BB0, BB0; VPALIGNR $12, BB1, BB1, BB1; VPALIGNR $12, BB2, BB2, BB2; VPALIGNR $12, BB3, BB3, BB3 - VPALIGNR $8, CC0, CC0, CC0; VPALIGNR $8, CC1, CC1, CC1; VPALIGNR $8, CC2, CC2, CC2; VPALIGNR $8, CC3, CC3, CC3 - VPALIGNR $4, DD0, DD0, DD0; VPALIGNR $4, DD1, DD1, DD1; VPALIGNR $4, DD2, DD2, DD2; VPALIGNR $4, DD3, DD3, DD3 - - DECQ itr1 - JG sealAVX2Tail512LoopA - DECQ itr2 - JGE sealAVX2Tail512LoopB - - VPADDD ·chacha20Constants<>(SB), AA0, AA0; VPADDD ·chacha20Constants<>(SB), AA1, AA1; VPADDD ·chacha20Constants<>(SB), AA2, AA2; VPADDD ·chacha20Constants<>(SB), AA3, AA3 - VPADDD state1StoreAVX2, BB0, BB0; VPADDD state1StoreAVX2, BB1, BB1; VPADDD state1StoreAVX2, BB2, BB2; VPADDD state1StoreAVX2, BB3, BB3 - VPADDD state2StoreAVX2, CC0, CC0; VPADDD state2StoreAVX2, CC1, CC1; VPADDD state2StoreAVX2, CC2, CC2; VPADDD state2StoreAVX2, CC3, CC3 - VPADDD ctr0StoreAVX2, DD0, DD0; VPADDD ctr1StoreAVX2, DD1, DD1; VPADDD ctr2StoreAVX2, DD2, DD2; VPADDD ctr3StoreAVX2, DD3, DD3 - VMOVDQA CC3, tmpStoreAVX2 - VPERM2I128 $0x02, AA0, BB0, CC3 - VPXOR (0*32)(inp), CC3, CC3 - VMOVDQU CC3, (0*32)(oup) - VPERM2I128 $0x02, CC0, DD0, CC3 - VPXOR (1*32)(inp), CC3, CC3 - VMOVDQU CC3, (1*32)(oup) - VPERM2I128 $0x13, AA0, BB0, CC3 - VPXOR (2*32)(inp), CC3, CC3 - VMOVDQU CC3, (2*32)(oup) - VPERM2I128 $0x13, CC0, DD0, CC3 - VPXOR (3*32)(inp), CC3, CC3 - VMOVDQU CC3, (3*32)(oup) - - VPERM2I128 $0x02, AA1, BB1, AA0 - VPERM2I128 $0x02, CC1, DD1, BB0 - VPERM2I128 $0x13, AA1, BB1, CC0 - VPERM2I128 $0x13, CC1, DD1, DD0 - VPXOR (4*32)(inp), AA0, AA0; VPXOR (5*32)(inp), BB0, BB0; VPXOR (6*32)(inp), CC0, CC0; VPXOR (7*32)(inp), DD0, DD0 - VMOVDQU AA0, (4*32)(oup); VMOVDQU BB0, (5*32)(oup); VMOVDQU CC0, (6*32)(oup); VMOVDQU DD0, (7*32)(oup) - - VPERM2I128 $0x02, AA2, BB2, AA0 - VPERM2I128 $0x02, CC2, DD2, BB0 - VPERM2I128 $0x13, AA2, BB2, CC0 - VPERM2I128 $0x13, CC2, DD2, DD0 - VPXOR (8*32)(inp), AA0, AA0; VPXOR (9*32)(inp), BB0, BB0; VPXOR (10*32)(inp), CC0, CC0; VPXOR (11*32)(inp), DD0, DD0 - VMOVDQU AA0, (8*32)(oup); VMOVDQU BB0, (9*32)(oup); VMOVDQU CC0, (10*32)(oup); VMOVDQU DD0, (11*32)(oup) - - MOVQ $384, itr1 - LEAQ 384(inp), inp - SUBQ $384, inl - VPERM2I128 $0x02, AA3, BB3, AA0 - VPERM2I128 $0x02, tmpStoreAVX2, DD3, BB0 - VPERM2I128 $0x13, AA3, BB3, CC0 - VPERM2I128 $0x13, tmpStoreAVX2, DD3, DD0 - - JMP sealAVX2SealHash + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + ADDQ (DI), R10 + ADCQ 8(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 16(DI), R10 + ADCQ 24(DI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(DI), DI + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + DECQ CX + JG sealAVX2Tail512LoopA + DECQ R9 + JGE sealAVX2Tail512LoopB + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) + VPERM2I128 $0x02, Y0, Y14, Y15 + VPXOR (SI), Y15, Y15 + VMOVDQU Y15, (DI) + VPERM2I128 $0x02, Y12, Y4, Y15 + VPXOR 32(SI), Y15, Y15 + VMOVDQU Y15, 32(DI) + VPERM2I128 $0x13, Y0, Y14, Y15 + VPXOR 64(SI), Y15, Y15 + VMOVDQU Y15, 64(DI) + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y15, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + MOVQ $0x00000180, CX + LEAQ 384(SI), SI + SUBQ $0x00000180, BX + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + JMP sealAVX2SealHash diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519.go b/vendor/golang.org/x/crypto/curve25519/curve25519.go index 00f963ea..21ca3b2e 100644 --- a/vendor/golang.org/x/crypto/curve25519/curve25519.go +++ b/vendor/golang.org/x/crypto/curve25519/curve25519.go @@ -6,9 +6,11 @@ // performs scalar multiplication on the elliptic curve known as Curve25519. // See RFC 7748. // -// Starting in Go 1.20, this package is a wrapper for the X25519 implementation +// This package is a wrapper for the X25519 implementation // in the crypto/ecdh package. -package curve25519 // import "golang.org/x/crypto/curve25519" +package curve25519 + +import "crypto/ecdh" // ScalarMult sets dst to the product scalar * point. // @@ -16,7 +18,13 @@ package curve25519 // import "golang.org/x/crypto/curve25519" // zeroes, irrespective of the scalar. Instead, use the X25519 function, which // will return an error. func ScalarMult(dst, scalar, point *[32]byte) { - scalarMult(dst, scalar, point) + if _, err := x25519(dst, scalar[:], point[:]); err != nil { + // The only error condition for x25519 when the inputs are 32 bytes long + // is if the output would have been the all-zero value. + for i := range dst { + dst[i] = 0 + } + } } // ScalarBaseMult sets dst to the product scalar * base where base is the @@ -25,7 +33,12 @@ func ScalarMult(dst, scalar, point *[32]byte) { // It is recommended to use the X25519 function with Basepoint instead, as // copying into fixed size arrays can lead to unexpected bugs. func ScalarBaseMult(dst, scalar *[32]byte) { - scalarBaseMult(dst, scalar) + curve := ecdh.X25519() + priv, err := curve.NewPrivateKey(scalar[:]) + if err != nil { + panic("curve25519: internal error: scalarBaseMult was not 32 bytes") + } + copy(dst[:], priv.PublicKey().Bytes()) } const ( @@ -57,3 +70,21 @@ func X25519(scalar, point []byte) ([]byte, error) { var dst [32]byte return x25519(&dst, scalar, point) } + +func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) { + curve := ecdh.X25519() + pub, err := curve.NewPublicKey(point) + if err != nil { + return nil, err + } + priv, err := curve.NewPrivateKey(scalar) + if err != nil { + return nil, err + } + out, err := priv.ECDH(pub) + if err != nil { + return nil, err + } + copy(dst[:], out) + return dst[:], nil +} diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519_compat.go b/vendor/golang.org/x/crypto/curve25519/curve25519_compat.go deleted file mode 100644 index ba647e8d..00000000 --- a/vendor/golang.org/x/crypto/curve25519/curve25519_compat.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.20 - -package curve25519 - -import ( - "crypto/subtle" - "errors" - "strconv" - - "golang.org/x/crypto/curve25519/internal/field" -) - -func scalarMult(dst, scalar, point *[32]byte) { - var e [32]byte - - copy(e[:], scalar[:]) - e[0] &= 248 - e[31] &= 127 - e[31] |= 64 - - var x1, x2, z2, x3, z3, tmp0, tmp1 field.Element - x1.SetBytes(point[:]) - x2.One() - x3.Set(&x1) - z3.One() - - swap := 0 - for pos := 254; pos >= 0; pos-- { - b := e[pos/8] >> uint(pos&7) - b &= 1 - swap ^= int(b) - x2.Swap(&x3, swap) - z2.Swap(&z3, swap) - swap = int(b) - - tmp0.Subtract(&x3, &z3) - tmp1.Subtract(&x2, &z2) - x2.Add(&x2, &z2) - z2.Add(&x3, &z3) - z3.Multiply(&tmp0, &x2) - z2.Multiply(&z2, &tmp1) - tmp0.Square(&tmp1) - tmp1.Square(&x2) - x3.Add(&z3, &z2) - z2.Subtract(&z3, &z2) - x2.Multiply(&tmp1, &tmp0) - tmp1.Subtract(&tmp1, &tmp0) - z2.Square(&z2) - - z3.Mult32(&tmp1, 121666) - x3.Square(&x3) - tmp0.Add(&tmp0, &z3) - z3.Multiply(&x1, &z2) - z2.Multiply(&tmp1, &tmp0) - } - - x2.Swap(&x3, swap) - z2.Swap(&z3, swap) - - z2.Invert(&z2) - x2.Multiply(&x2, &z2) - copy(dst[:], x2.Bytes()) -} - -func scalarBaseMult(dst, scalar *[32]byte) { - checkBasepoint() - scalarMult(dst, scalar, &basePoint) -} - -func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) { - var in [32]byte - if l := len(scalar); l != 32 { - return nil, errors.New("bad scalar length: " + strconv.Itoa(l) + ", expected 32") - } - if l := len(point); l != 32 { - return nil, errors.New("bad point length: " + strconv.Itoa(l) + ", expected 32") - } - copy(in[:], scalar) - if &point[0] == &Basepoint[0] { - scalarBaseMult(dst, &in) - } else { - var base, zero [32]byte - copy(base[:], point) - scalarMult(dst, &in, &base) - if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 { - return nil, errors.New("bad input point: low order point") - } - } - return dst[:], nil -} - -func checkBasepoint() { - if subtle.ConstantTimeCompare(Basepoint, []byte{ - 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - }) != 1 { - panic("curve25519: global Basepoint value was modified") - } -} diff --git a/vendor/golang.org/x/crypto/curve25519/curve25519_go120.go b/vendor/golang.org/x/crypto/curve25519/curve25519_go120.go deleted file mode 100644 index 627df497..00000000 --- a/vendor/golang.org/x/crypto/curve25519/curve25519_go120.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.20 - -package curve25519 - -import "crypto/ecdh" - -func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) { - curve := ecdh.X25519() - pub, err := curve.NewPublicKey(point) - if err != nil { - return nil, err - } - priv, err := curve.NewPrivateKey(scalar) - if err != nil { - return nil, err - } - out, err := priv.ECDH(pub) - if err != nil { - return nil, err - } - copy(dst[:], out) - return dst[:], nil -} - -func scalarMult(dst, scalar, point *[32]byte) { - if _, err := x25519(dst, scalar[:], point[:]); err != nil { - // The only error condition for x25519 when the inputs are 32 bytes long - // is if the output would have been the all-zero value. - for i := range dst { - dst[i] = 0 - } - } -} - -func scalarBaseMult(dst, scalar *[32]byte) { - curve := ecdh.X25519() - priv, err := curve.NewPrivateKey(scalar[:]) - if err != nil { - panic("curve25519: internal error: scalarBaseMult was not 32 bytes") - } - copy(dst[:], priv.PublicKey().Bytes()) -} diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/README b/vendor/golang.org/x/crypto/curve25519/internal/field/README deleted file mode 100644 index e25bca7d..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/README +++ /dev/null @@ -1,7 +0,0 @@ -This package is kept in sync with crypto/ed25519/internal/edwards25519/field in -the standard library. - -If there are any changes in the standard library that need to be synced to this -package, run sync.sh. It will not overwrite any local changes made since the -previous sync, so it's ok to land changes in this package first, and then sync -to the standard library later. diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/fe.go b/vendor/golang.org/x/crypto/curve25519/internal/field/fe.go deleted file mode 100644 index ca841ad9..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/fe.go +++ /dev/null @@ -1,416 +0,0 @@ -// Copyright (c) 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package field implements fast arithmetic modulo 2^255-19. -package field - -import ( - "crypto/subtle" - "encoding/binary" - "math/bits" -) - -// Element represents an element of the field GF(2^255-19). Note that this -// is not a cryptographically secure group, and should only be used to interact -// with edwards25519.Point coordinates. -// -// This type works similarly to math/big.Int, and all arguments and receivers -// are allowed to alias. -// -// The zero value is a valid zero element. -type Element struct { - // An element t represents the integer - // t.l0 + t.l1*2^51 + t.l2*2^102 + t.l3*2^153 + t.l4*2^204 - // - // Between operations, all limbs are expected to be lower than 2^52. - l0 uint64 - l1 uint64 - l2 uint64 - l3 uint64 - l4 uint64 -} - -const maskLow51Bits uint64 = (1 << 51) - 1 - -var feZero = &Element{0, 0, 0, 0, 0} - -// Zero sets v = 0, and returns v. -func (v *Element) Zero() *Element { - *v = *feZero - return v -} - -var feOne = &Element{1, 0, 0, 0, 0} - -// One sets v = 1, and returns v. -func (v *Element) One() *Element { - *v = *feOne - return v -} - -// reduce reduces v modulo 2^255 - 19 and returns it. -func (v *Element) reduce() *Element { - v.carryPropagate() - - // After the light reduction we now have a field element representation - // v < 2^255 + 2^13 * 19, but need v < 2^255 - 19. - - // If v >= 2^255 - 19, then v + 19 >= 2^255, which would overflow 2^255 - 1, - // generating a carry. That is, c will be 0 if v < 2^255 - 19, and 1 otherwise. - c := (v.l0 + 19) >> 51 - c = (v.l1 + c) >> 51 - c = (v.l2 + c) >> 51 - c = (v.l3 + c) >> 51 - c = (v.l4 + c) >> 51 - - // If v < 2^255 - 19 and c = 0, this will be a no-op. Otherwise, it's - // effectively applying the reduction identity to the carry. - v.l0 += 19 * c - - v.l1 += v.l0 >> 51 - v.l0 = v.l0 & maskLow51Bits - v.l2 += v.l1 >> 51 - v.l1 = v.l1 & maskLow51Bits - v.l3 += v.l2 >> 51 - v.l2 = v.l2 & maskLow51Bits - v.l4 += v.l3 >> 51 - v.l3 = v.l3 & maskLow51Bits - // no additional carry - v.l4 = v.l4 & maskLow51Bits - - return v -} - -// Add sets v = a + b, and returns v. -func (v *Element) Add(a, b *Element) *Element { - v.l0 = a.l0 + b.l0 - v.l1 = a.l1 + b.l1 - v.l2 = a.l2 + b.l2 - v.l3 = a.l3 + b.l3 - v.l4 = a.l4 + b.l4 - // Using the generic implementation here is actually faster than the - // assembly. Probably because the body of this function is so simple that - // the compiler can figure out better optimizations by inlining the carry - // propagation. TODO - return v.carryPropagateGeneric() -} - -// Subtract sets v = a - b, and returns v. -func (v *Element) Subtract(a, b *Element) *Element { - // We first add 2 * p, to guarantee the subtraction won't underflow, and - // then subtract b (which can be up to 2^255 + 2^13 * 19). - v.l0 = (a.l0 + 0xFFFFFFFFFFFDA) - b.l0 - v.l1 = (a.l1 + 0xFFFFFFFFFFFFE) - b.l1 - v.l2 = (a.l2 + 0xFFFFFFFFFFFFE) - b.l2 - v.l3 = (a.l3 + 0xFFFFFFFFFFFFE) - b.l3 - v.l4 = (a.l4 + 0xFFFFFFFFFFFFE) - b.l4 - return v.carryPropagate() -} - -// Negate sets v = -a, and returns v. -func (v *Element) Negate(a *Element) *Element { - return v.Subtract(feZero, a) -} - -// Invert sets v = 1/z mod p, and returns v. -// -// If z == 0, Invert returns v = 0. -func (v *Element) Invert(z *Element) *Element { - // Inversion is implemented as exponentiation with exponent p − 2. It uses the - // same sequence of 255 squarings and 11 multiplications as [Curve25519]. - var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t Element - - z2.Square(z) // 2 - t.Square(&z2) // 4 - t.Square(&t) // 8 - z9.Multiply(&t, z) // 9 - z11.Multiply(&z9, &z2) // 11 - t.Square(&z11) // 22 - z2_5_0.Multiply(&t, &z9) // 31 = 2^5 - 2^0 - - t.Square(&z2_5_0) // 2^6 - 2^1 - for i := 0; i < 4; i++ { - t.Square(&t) // 2^10 - 2^5 - } - z2_10_0.Multiply(&t, &z2_5_0) // 2^10 - 2^0 - - t.Square(&z2_10_0) // 2^11 - 2^1 - for i := 0; i < 9; i++ { - t.Square(&t) // 2^20 - 2^10 - } - z2_20_0.Multiply(&t, &z2_10_0) // 2^20 - 2^0 - - t.Square(&z2_20_0) // 2^21 - 2^1 - for i := 0; i < 19; i++ { - t.Square(&t) // 2^40 - 2^20 - } - t.Multiply(&t, &z2_20_0) // 2^40 - 2^0 - - t.Square(&t) // 2^41 - 2^1 - for i := 0; i < 9; i++ { - t.Square(&t) // 2^50 - 2^10 - } - z2_50_0.Multiply(&t, &z2_10_0) // 2^50 - 2^0 - - t.Square(&z2_50_0) // 2^51 - 2^1 - for i := 0; i < 49; i++ { - t.Square(&t) // 2^100 - 2^50 - } - z2_100_0.Multiply(&t, &z2_50_0) // 2^100 - 2^0 - - t.Square(&z2_100_0) // 2^101 - 2^1 - for i := 0; i < 99; i++ { - t.Square(&t) // 2^200 - 2^100 - } - t.Multiply(&t, &z2_100_0) // 2^200 - 2^0 - - t.Square(&t) // 2^201 - 2^1 - for i := 0; i < 49; i++ { - t.Square(&t) // 2^250 - 2^50 - } - t.Multiply(&t, &z2_50_0) // 2^250 - 2^0 - - t.Square(&t) // 2^251 - 2^1 - t.Square(&t) // 2^252 - 2^2 - t.Square(&t) // 2^253 - 2^3 - t.Square(&t) // 2^254 - 2^4 - t.Square(&t) // 2^255 - 2^5 - - return v.Multiply(&t, &z11) // 2^255 - 21 -} - -// Set sets v = a, and returns v. -func (v *Element) Set(a *Element) *Element { - *v = *a - return v -} - -// SetBytes sets v to x, which must be a 32-byte little-endian encoding. -// -// Consistent with RFC 7748, the most significant bit (the high bit of the -// last byte) is ignored, and non-canonical values (2^255-19 through 2^255-1) -// are accepted. Note that this is laxer than specified by RFC 8032. -func (v *Element) SetBytes(x []byte) *Element { - if len(x) != 32 { - panic("edwards25519: invalid field element input size") - } - - // Bits 0:51 (bytes 0:8, bits 0:64, shift 0, mask 51). - v.l0 = binary.LittleEndian.Uint64(x[0:8]) - v.l0 &= maskLow51Bits - // Bits 51:102 (bytes 6:14, bits 48:112, shift 3, mask 51). - v.l1 = binary.LittleEndian.Uint64(x[6:14]) >> 3 - v.l1 &= maskLow51Bits - // Bits 102:153 (bytes 12:20, bits 96:160, shift 6, mask 51). - v.l2 = binary.LittleEndian.Uint64(x[12:20]) >> 6 - v.l2 &= maskLow51Bits - // Bits 153:204 (bytes 19:27, bits 152:216, shift 1, mask 51). - v.l3 = binary.LittleEndian.Uint64(x[19:27]) >> 1 - v.l3 &= maskLow51Bits - // Bits 204:251 (bytes 24:32, bits 192:256, shift 12, mask 51). - // Note: not bytes 25:33, shift 4, to avoid overread. - v.l4 = binary.LittleEndian.Uint64(x[24:32]) >> 12 - v.l4 &= maskLow51Bits - - return v -} - -// Bytes returns the canonical 32-byte little-endian encoding of v. -func (v *Element) Bytes() []byte { - // This function is outlined to make the allocations inline in the caller - // rather than happen on the heap. - var out [32]byte - return v.bytes(&out) -} - -func (v *Element) bytes(out *[32]byte) []byte { - t := *v - t.reduce() - - var buf [8]byte - for i, l := range [5]uint64{t.l0, t.l1, t.l2, t.l3, t.l4} { - bitsOffset := i * 51 - binary.LittleEndian.PutUint64(buf[:], l<= len(out) { - break - } - out[off] |= bb - } - } - - return out[:] -} - -// Equal returns 1 if v and u are equal, and 0 otherwise. -func (v *Element) Equal(u *Element) int { - sa, sv := u.Bytes(), v.Bytes() - return subtle.ConstantTimeCompare(sa, sv) -} - -// mask64Bits returns 0xffffffff if cond is 1, and 0 otherwise. -func mask64Bits(cond int) uint64 { return ^(uint64(cond) - 1) } - -// Select sets v to a if cond == 1, and to b if cond == 0. -func (v *Element) Select(a, b *Element, cond int) *Element { - m := mask64Bits(cond) - v.l0 = (m & a.l0) | (^m & b.l0) - v.l1 = (m & a.l1) | (^m & b.l1) - v.l2 = (m & a.l2) | (^m & b.l2) - v.l3 = (m & a.l3) | (^m & b.l3) - v.l4 = (m & a.l4) | (^m & b.l4) - return v -} - -// Swap swaps v and u if cond == 1 or leaves them unchanged if cond == 0, and returns v. -func (v *Element) Swap(u *Element, cond int) { - m := mask64Bits(cond) - t := m & (v.l0 ^ u.l0) - v.l0 ^= t - u.l0 ^= t - t = m & (v.l1 ^ u.l1) - v.l1 ^= t - u.l1 ^= t - t = m & (v.l2 ^ u.l2) - v.l2 ^= t - u.l2 ^= t - t = m & (v.l3 ^ u.l3) - v.l3 ^= t - u.l3 ^= t - t = m & (v.l4 ^ u.l4) - v.l4 ^= t - u.l4 ^= t -} - -// IsNegative returns 1 if v is negative, and 0 otherwise. -func (v *Element) IsNegative() int { - return int(v.Bytes()[0] & 1) -} - -// Absolute sets v to |u|, and returns v. -func (v *Element) Absolute(u *Element) *Element { - return v.Select(new(Element).Negate(u), u, u.IsNegative()) -} - -// Multiply sets v = x * y, and returns v. -func (v *Element) Multiply(x, y *Element) *Element { - feMul(v, x, y) - return v -} - -// Square sets v = x * x, and returns v. -func (v *Element) Square(x *Element) *Element { - feSquare(v, x) - return v -} - -// Mult32 sets v = x * y, and returns v. -func (v *Element) Mult32(x *Element, y uint32) *Element { - x0lo, x0hi := mul51(x.l0, y) - x1lo, x1hi := mul51(x.l1, y) - x2lo, x2hi := mul51(x.l2, y) - x3lo, x3hi := mul51(x.l3, y) - x4lo, x4hi := mul51(x.l4, y) - v.l0 = x0lo + 19*x4hi // carried over per the reduction identity - v.l1 = x1lo + x0hi - v.l2 = x2lo + x1hi - v.l3 = x3lo + x2hi - v.l4 = x4lo + x3hi - // The hi portions are going to be only 32 bits, plus any previous excess, - // so we can skip the carry propagation. - return v -} - -// mul51 returns lo + hi * 2⁵¹ = a * b. -func mul51(a uint64, b uint32) (lo uint64, hi uint64) { - mh, ml := bits.Mul64(a, uint64(b)) - lo = ml & maskLow51Bits - hi = (mh << 13) | (ml >> 51) - return -} - -// Pow22523 set v = x^((p-5)/8), and returns v. (p-5)/8 is 2^252-3. -func (v *Element) Pow22523(x *Element) *Element { - var t0, t1, t2 Element - - t0.Square(x) // x^2 - t1.Square(&t0) // x^4 - t1.Square(&t1) // x^8 - t1.Multiply(x, &t1) // x^9 - t0.Multiply(&t0, &t1) // x^11 - t0.Square(&t0) // x^22 - t0.Multiply(&t1, &t0) // x^31 - t1.Square(&t0) // x^62 - for i := 1; i < 5; i++ { // x^992 - t1.Square(&t1) - } - t0.Multiply(&t1, &t0) // x^1023 -> 1023 = 2^10 - 1 - t1.Square(&t0) // 2^11 - 2 - for i := 1; i < 10; i++ { // 2^20 - 2^10 - t1.Square(&t1) - } - t1.Multiply(&t1, &t0) // 2^20 - 1 - t2.Square(&t1) // 2^21 - 2 - for i := 1; i < 20; i++ { // 2^40 - 2^20 - t2.Square(&t2) - } - t1.Multiply(&t2, &t1) // 2^40 - 1 - t1.Square(&t1) // 2^41 - 2 - for i := 1; i < 10; i++ { // 2^50 - 2^10 - t1.Square(&t1) - } - t0.Multiply(&t1, &t0) // 2^50 - 1 - t1.Square(&t0) // 2^51 - 2 - for i := 1; i < 50; i++ { // 2^100 - 2^50 - t1.Square(&t1) - } - t1.Multiply(&t1, &t0) // 2^100 - 1 - t2.Square(&t1) // 2^101 - 2 - for i := 1; i < 100; i++ { // 2^200 - 2^100 - t2.Square(&t2) - } - t1.Multiply(&t2, &t1) // 2^200 - 1 - t1.Square(&t1) // 2^201 - 2 - for i := 1; i < 50; i++ { // 2^250 - 2^50 - t1.Square(&t1) - } - t0.Multiply(&t1, &t0) // 2^250 - 1 - t0.Square(&t0) // 2^251 - 2 - t0.Square(&t0) // 2^252 - 4 - return v.Multiply(&t0, x) // 2^252 - 3 -> x^(2^252-3) -} - -// sqrtM1 is 2^((p-1)/4), which squared is equal to -1 by Euler's Criterion. -var sqrtM1 = &Element{1718705420411056, 234908883556509, - 2233514472574048, 2117202627021982, 765476049583133} - -// SqrtRatio sets r to the non-negative square root of the ratio of u and v. -// -// If u/v is square, SqrtRatio returns r and 1. If u/v is not square, SqrtRatio -// sets r according to Section 4.3 of draft-irtf-cfrg-ristretto255-decaf448-00, -// and returns r and 0. -func (r *Element) SqrtRatio(u, v *Element) (rr *Element, wasSquare int) { - var a, b Element - - // r = (u * v3) * (u * v7)^((p-5)/8) - v2 := a.Square(v) - uv3 := b.Multiply(u, b.Multiply(v2, v)) - uv7 := a.Multiply(uv3, a.Square(v2)) - r.Multiply(uv3, r.Pow22523(uv7)) - - check := a.Multiply(v, a.Square(r)) // check = v * r^2 - - uNeg := b.Negate(u) - correctSignSqrt := check.Equal(u) - flippedSignSqrt := check.Equal(uNeg) - flippedSignSqrtI := check.Equal(uNeg.Multiply(uNeg, sqrtM1)) - - rPrime := b.Multiply(r, sqrtM1) // r_prime = SQRT_M1 * r - // r = CT_SELECT(r_prime IF flipped_sign_sqrt | flipped_sign_sqrt_i ELSE r) - r.Select(rPrime, r, flippedSignSqrt|flippedSignSqrtI) - - r.Absolute(r) // Choose the nonnegative square root. - return r, correctSignSqrt | flippedSignSqrt -} diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64.go b/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64.go deleted file mode 100644 index 70c54169..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64.go +++ /dev/null @@ -1,15 +0,0 @@ -// Code generated by command: go run fe_amd64_asm.go -out ../fe_amd64.s -stubs ../fe_amd64.go -pkg field. DO NOT EDIT. - -//go:build amd64 && gc && !purego - -package field - -// feMul sets out = a * b. It works like feMulGeneric. -// -//go:noescape -func feMul(out *Element, a *Element, b *Element) - -// feSquare sets out = a * a. It works like feSquareGeneric. -// -//go:noescape -func feSquare(out *Element, a *Element) diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64.s b/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64.s deleted file mode 100644 index 60817acc..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64.s +++ /dev/null @@ -1,378 +0,0 @@ -// Code generated by command: go run fe_amd64_asm.go -out ../fe_amd64.s -stubs ../fe_amd64.go -pkg field. DO NOT EDIT. - -//go:build amd64 && gc && !purego - -#include "textflag.h" - -// func feMul(out *Element, a *Element, b *Element) -TEXT ·feMul(SB), NOSPLIT, $0-24 - MOVQ a+8(FP), CX - MOVQ b+16(FP), BX - - // r0 = a0×b0 - MOVQ (CX), AX - MULQ (BX) - MOVQ AX, DI - MOVQ DX, SI - - // r0 += 19×a1×b4 - MOVQ 8(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 32(BX) - ADDQ AX, DI - ADCQ DX, SI - - // r0 += 19×a2×b3 - MOVQ 16(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 24(BX) - ADDQ AX, DI - ADCQ DX, SI - - // r0 += 19×a3×b2 - MOVQ 24(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 16(BX) - ADDQ AX, DI - ADCQ DX, SI - - // r0 += 19×a4×b1 - MOVQ 32(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 8(BX) - ADDQ AX, DI - ADCQ DX, SI - - // r1 = a0×b1 - MOVQ (CX), AX - MULQ 8(BX) - MOVQ AX, R9 - MOVQ DX, R8 - - // r1 += a1×b0 - MOVQ 8(CX), AX - MULQ (BX) - ADDQ AX, R9 - ADCQ DX, R8 - - // r1 += 19×a2×b4 - MOVQ 16(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 32(BX) - ADDQ AX, R9 - ADCQ DX, R8 - - // r1 += 19×a3×b3 - MOVQ 24(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 24(BX) - ADDQ AX, R9 - ADCQ DX, R8 - - // r1 += 19×a4×b2 - MOVQ 32(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 16(BX) - ADDQ AX, R9 - ADCQ DX, R8 - - // r2 = a0×b2 - MOVQ (CX), AX - MULQ 16(BX) - MOVQ AX, R11 - MOVQ DX, R10 - - // r2 += a1×b1 - MOVQ 8(CX), AX - MULQ 8(BX) - ADDQ AX, R11 - ADCQ DX, R10 - - // r2 += a2×b0 - MOVQ 16(CX), AX - MULQ (BX) - ADDQ AX, R11 - ADCQ DX, R10 - - // r2 += 19×a3×b4 - MOVQ 24(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 32(BX) - ADDQ AX, R11 - ADCQ DX, R10 - - // r2 += 19×a4×b3 - MOVQ 32(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 24(BX) - ADDQ AX, R11 - ADCQ DX, R10 - - // r3 = a0×b3 - MOVQ (CX), AX - MULQ 24(BX) - MOVQ AX, R13 - MOVQ DX, R12 - - // r3 += a1×b2 - MOVQ 8(CX), AX - MULQ 16(BX) - ADDQ AX, R13 - ADCQ DX, R12 - - // r3 += a2×b1 - MOVQ 16(CX), AX - MULQ 8(BX) - ADDQ AX, R13 - ADCQ DX, R12 - - // r3 += a3×b0 - MOVQ 24(CX), AX - MULQ (BX) - ADDQ AX, R13 - ADCQ DX, R12 - - // r3 += 19×a4×b4 - MOVQ 32(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 32(BX) - ADDQ AX, R13 - ADCQ DX, R12 - - // r4 = a0×b4 - MOVQ (CX), AX - MULQ 32(BX) - MOVQ AX, R15 - MOVQ DX, R14 - - // r4 += a1×b3 - MOVQ 8(CX), AX - MULQ 24(BX) - ADDQ AX, R15 - ADCQ DX, R14 - - // r4 += a2×b2 - MOVQ 16(CX), AX - MULQ 16(BX) - ADDQ AX, R15 - ADCQ DX, R14 - - // r4 += a3×b1 - MOVQ 24(CX), AX - MULQ 8(BX) - ADDQ AX, R15 - ADCQ DX, R14 - - // r4 += a4×b0 - MOVQ 32(CX), AX - MULQ (BX) - ADDQ AX, R15 - ADCQ DX, R14 - - // First reduction chain - MOVQ $0x0007ffffffffffff, AX - SHLQ $0x0d, DI, SI - SHLQ $0x0d, R9, R8 - SHLQ $0x0d, R11, R10 - SHLQ $0x0d, R13, R12 - SHLQ $0x0d, R15, R14 - ANDQ AX, DI - IMUL3Q $0x13, R14, R14 - ADDQ R14, DI - ANDQ AX, R9 - ADDQ SI, R9 - ANDQ AX, R11 - ADDQ R8, R11 - ANDQ AX, R13 - ADDQ R10, R13 - ANDQ AX, R15 - ADDQ R12, R15 - - // Second reduction chain (carryPropagate) - MOVQ DI, SI - SHRQ $0x33, SI - MOVQ R9, R8 - SHRQ $0x33, R8 - MOVQ R11, R10 - SHRQ $0x33, R10 - MOVQ R13, R12 - SHRQ $0x33, R12 - MOVQ R15, R14 - SHRQ $0x33, R14 - ANDQ AX, DI - IMUL3Q $0x13, R14, R14 - ADDQ R14, DI - ANDQ AX, R9 - ADDQ SI, R9 - ANDQ AX, R11 - ADDQ R8, R11 - ANDQ AX, R13 - ADDQ R10, R13 - ANDQ AX, R15 - ADDQ R12, R15 - - // Store output - MOVQ out+0(FP), AX - MOVQ DI, (AX) - MOVQ R9, 8(AX) - MOVQ R11, 16(AX) - MOVQ R13, 24(AX) - MOVQ R15, 32(AX) - RET - -// func feSquare(out *Element, a *Element) -TEXT ·feSquare(SB), NOSPLIT, $0-16 - MOVQ a+8(FP), CX - - // r0 = l0×l0 - MOVQ (CX), AX - MULQ (CX) - MOVQ AX, SI - MOVQ DX, BX - - // r0 += 38×l1×l4 - MOVQ 8(CX), AX - IMUL3Q $0x26, AX, AX - MULQ 32(CX) - ADDQ AX, SI - ADCQ DX, BX - - // r0 += 38×l2×l3 - MOVQ 16(CX), AX - IMUL3Q $0x26, AX, AX - MULQ 24(CX) - ADDQ AX, SI - ADCQ DX, BX - - // r1 = 2×l0×l1 - MOVQ (CX), AX - SHLQ $0x01, AX - MULQ 8(CX) - MOVQ AX, R8 - MOVQ DX, DI - - // r1 += 38×l2×l4 - MOVQ 16(CX), AX - IMUL3Q $0x26, AX, AX - MULQ 32(CX) - ADDQ AX, R8 - ADCQ DX, DI - - // r1 += 19×l3×l3 - MOVQ 24(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 24(CX) - ADDQ AX, R8 - ADCQ DX, DI - - // r2 = 2×l0×l2 - MOVQ (CX), AX - SHLQ $0x01, AX - MULQ 16(CX) - MOVQ AX, R10 - MOVQ DX, R9 - - // r2 += l1×l1 - MOVQ 8(CX), AX - MULQ 8(CX) - ADDQ AX, R10 - ADCQ DX, R9 - - // r2 += 38×l3×l4 - MOVQ 24(CX), AX - IMUL3Q $0x26, AX, AX - MULQ 32(CX) - ADDQ AX, R10 - ADCQ DX, R9 - - // r3 = 2×l0×l3 - MOVQ (CX), AX - SHLQ $0x01, AX - MULQ 24(CX) - MOVQ AX, R12 - MOVQ DX, R11 - - // r3 += 2×l1×l2 - MOVQ 8(CX), AX - IMUL3Q $0x02, AX, AX - MULQ 16(CX) - ADDQ AX, R12 - ADCQ DX, R11 - - // r3 += 19×l4×l4 - MOVQ 32(CX), AX - IMUL3Q $0x13, AX, AX - MULQ 32(CX) - ADDQ AX, R12 - ADCQ DX, R11 - - // r4 = 2×l0×l4 - MOVQ (CX), AX - SHLQ $0x01, AX - MULQ 32(CX) - MOVQ AX, R14 - MOVQ DX, R13 - - // r4 += 2×l1×l3 - MOVQ 8(CX), AX - IMUL3Q $0x02, AX, AX - MULQ 24(CX) - ADDQ AX, R14 - ADCQ DX, R13 - - // r4 += l2×l2 - MOVQ 16(CX), AX - MULQ 16(CX) - ADDQ AX, R14 - ADCQ DX, R13 - - // First reduction chain - MOVQ $0x0007ffffffffffff, AX - SHLQ $0x0d, SI, BX - SHLQ $0x0d, R8, DI - SHLQ $0x0d, R10, R9 - SHLQ $0x0d, R12, R11 - SHLQ $0x0d, R14, R13 - ANDQ AX, SI - IMUL3Q $0x13, R13, R13 - ADDQ R13, SI - ANDQ AX, R8 - ADDQ BX, R8 - ANDQ AX, R10 - ADDQ DI, R10 - ANDQ AX, R12 - ADDQ R9, R12 - ANDQ AX, R14 - ADDQ R11, R14 - - // Second reduction chain (carryPropagate) - MOVQ SI, BX - SHRQ $0x33, BX - MOVQ R8, DI - SHRQ $0x33, DI - MOVQ R10, R9 - SHRQ $0x33, R9 - MOVQ R12, R11 - SHRQ $0x33, R11 - MOVQ R14, R13 - SHRQ $0x33, R13 - ANDQ AX, SI - IMUL3Q $0x13, R13, R13 - ADDQ R13, SI - ANDQ AX, R8 - ADDQ BX, R8 - ANDQ AX, R10 - ADDQ DI, R10 - ANDQ AX, R12 - ADDQ R9, R12 - ANDQ AX, R14 - ADDQ R11, R14 - - // Store output - MOVQ out+0(FP), AX - MOVQ SI, (AX) - MOVQ R8, 8(AX) - MOVQ R10, 16(AX) - MOVQ R12, 24(AX) - MOVQ R14, 32(AX) - RET diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64_noasm.go b/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64_noasm.go deleted file mode 100644 index 9da280d1..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_amd64_noasm.go +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !amd64 || !gc || purego - -package field - -func feMul(v, x, y *Element) { feMulGeneric(v, x, y) } - -func feSquare(v, x *Element) { feSquareGeneric(v, x) } diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64.go b/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64.go deleted file mode 100644 index 075fe9b9..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build arm64 && gc && !purego - -package field - -//go:noescape -func carryPropagate(v *Element) - -func (v *Element) carryPropagate() *Element { - carryPropagate(v) - return v -} diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64.s b/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64.s deleted file mode 100644 index 3126a434..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64.s +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build arm64 && gc && !purego - -#include "textflag.h" - -// carryPropagate works exactly like carryPropagateGeneric and uses the -// same AND, ADD, and LSR+MADD instructions emitted by the compiler, but -// avoids loading R0-R4 twice and uses LDP and STP. -// -// See https://golang.org/issues/43145 for the main compiler issue. -// -// func carryPropagate(v *Element) -TEXT ·carryPropagate(SB),NOFRAME|NOSPLIT,$0-8 - MOVD v+0(FP), R20 - - LDP 0(R20), (R0, R1) - LDP 16(R20), (R2, R3) - MOVD 32(R20), R4 - - AND $0x7ffffffffffff, R0, R10 - AND $0x7ffffffffffff, R1, R11 - AND $0x7ffffffffffff, R2, R12 - AND $0x7ffffffffffff, R3, R13 - AND $0x7ffffffffffff, R4, R14 - - ADD R0>>51, R11, R11 - ADD R1>>51, R12, R12 - ADD R2>>51, R13, R13 - ADD R3>>51, R14, R14 - // R4>>51 * 19 + R10 -> R10 - LSR $51, R4, R21 - MOVD $19, R22 - MADD R22, R10, R21, R10 - - STP (R10, R11), 0(R20) - STP (R12, R13), 16(R20) - MOVD R14, 32(R20) - - RET diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64_noasm.go b/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64_noasm.go deleted file mode 100644 index fc029ac1..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_arm64_noasm.go +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !arm64 || !gc || purego - -package field - -func (v *Element) carryPropagate() *Element { - return v.carryPropagateGeneric() -} diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_generic.go b/vendor/golang.org/x/crypto/curve25519/internal/field/fe_generic.go deleted file mode 100644 index 2671217d..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/fe_generic.go +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright (c) 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package field - -import "math/bits" - -// uint128 holds a 128-bit number as two 64-bit limbs, for use with the -// bits.Mul64 and bits.Add64 intrinsics. -type uint128 struct { - lo, hi uint64 -} - -// mul64 returns a * b. -func mul64(a, b uint64) uint128 { - hi, lo := bits.Mul64(a, b) - return uint128{lo, hi} -} - -// addMul64 returns v + a * b. -func addMul64(v uint128, a, b uint64) uint128 { - hi, lo := bits.Mul64(a, b) - lo, c := bits.Add64(lo, v.lo, 0) - hi, _ = bits.Add64(hi, v.hi, c) - return uint128{lo, hi} -} - -// shiftRightBy51 returns a >> 51. a is assumed to be at most 115 bits. -func shiftRightBy51(a uint128) uint64 { - return (a.hi << (64 - 51)) | (a.lo >> 51) -} - -func feMulGeneric(v, a, b *Element) { - a0 := a.l0 - a1 := a.l1 - a2 := a.l2 - a3 := a.l3 - a4 := a.l4 - - b0 := b.l0 - b1 := b.l1 - b2 := b.l2 - b3 := b.l3 - b4 := b.l4 - - // Limb multiplication works like pen-and-paper columnar multiplication, but - // with 51-bit limbs instead of digits. - // - // a4 a3 a2 a1 a0 x - // b4 b3 b2 b1 b0 = - // ------------------------ - // a4b0 a3b0 a2b0 a1b0 a0b0 + - // a4b1 a3b1 a2b1 a1b1 a0b1 + - // a4b2 a3b2 a2b2 a1b2 a0b2 + - // a4b3 a3b3 a2b3 a1b3 a0b3 + - // a4b4 a3b4 a2b4 a1b4 a0b4 = - // ---------------------------------------------- - // r8 r7 r6 r5 r4 r3 r2 r1 r0 - // - // We can then use the reduction identity (a * 2²⁵⁵ + b = a * 19 + b) to - // reduce the limbs that would overflow 255 bits. r5 * 2²⁵⁵ becomes 19 * r5, - // r6 * 2³⁰⁶ becomes 19 * r6 * 2⁵¹, etc. - // - // Reduction can be carried out simultaneously to multiplication. For - // example, we do not compute r5: whenever the result of a multiplication - // belongs to r5, like a1b4, we multiply it by 19 and add the result to r0. - // - // a4b0 a3b0 a2b0 a1b0 a0b0 + - // a3b1 a2b1 a1b1 a0b1 19×a4b1 + - // a2b2 a1b2 a0b2 19×a4b2 19×a3b2 + - // a1b3 a0b3 19×a4b3 19×a3b3 19×a2b3 + - // a0b4 19×a4b4 19×a3b4 19×a2b4 19×a1b4 = - // -------------------------------------- - // r4 r3 r2 r1 r0 - // - // Finally we add up the columns into wide, overlapping limbs. - - a1_19 := a1 * 19 - a2_19 := a2 * 19 - a3_19 := a3 * 19 - a4_19 := a4 * 19 - - // r0 = a0×b0 + 19×(a1×b4 + a2×b3 + a3×b2 + a4×b1) - r0 := mul64(a0, b0) - r0 = addMul64(r0, a1_19, b4) - r0 = addMul64(r0, a2_19, b3) - r0 = addMul64(r0, a3_19, b2) - r0 = addMul64(r0, a4_19, b1) - - // r1 = a0×b1 + a1×b0 + 19×(a2×b4 + a3×b3 + a4×b2) - r1 := mul64(a0, b1) - r1 = addMul64(r1, a1, b0) - r1 = addMul64(r1, a2_19, b4) - r1 = addMul64(r1, a3_19, b3) - r1 = addMul64(r1, a4_19, b2) - - // r2 = a0×b2 + a1×b1 + a2×b0 + 19×(a3×b4 + a4×b3) - r2 := mul64(a0, b2) - r2 = addMul64(r2, a1, b1) - r2 = addMul64(r2, a2, b0) - r2 = addMul64(r2, a3_19, b4) - r2 = addMul64(r2, a4_19, b3) - - // r3 = a0×b3 + a1×b2 + a2×b1 + a3×b0 + 19×a4×b4 - r3 := mul64(a0, b3) - r3 = addMul64(r3, a1, b2) - r3 = addMul64(r3, a2, b1) - r3 = addMul64(r3, a3, b0) - r3 = addMul64(r3, a4_19, b4) - - // r4 = a0×b4 + a1×b3 + a2×b2 + a3×b1 + a4×b0 - r4 := mul64(a0, b4) - r4 = addMul64(r4, a1, b3) - r4 = addMul64(r4, a2, b2) - r4 = addMul64(r4, a3, b1) - r4 = addMul64(r4, a4, b0) - - // After the multiplication, we need to reduce (carry) the five coefficients - // to obtain a result with limbs that are at most slightly larger than 2⁵¹, - // to respect the Element invariant. - // - // Overall, the reduction works the same as carryPropagate, except with - // wider inputs: we take the carry for each coefficient by shifting it right - // by 51, and add it to the limb above it. The top carry is multiplied by 19 - // according to the reduction identity and added to the lowest limb. - // - // The largest coefficient (r0) will be at most 111 bits, which guarantees - // that all carries are at most 111 - 51 = 60 bits, which fits in a uint64. - // - // r0 = a0×b0 + 19×(a1×b4 + a2×b3 + a3×b2 + a4×b1) - // r0 < 2⁵²×2⁵² + 19×(2⁵²×2⁵² + 2⁵²×2⁵² + 2⁵²×2⁵² + 2⁵²×2⁵²) - // r0 < (1 + 19 × 4) × 2⁵² × 2⁵² - // r0 < 2⁷ × 2⁵² × 2⁵² - // r0 < 2¹¹¹ - // - // Moreover, the top coefficient (r4) is at most 107 bits, so c4 is at most - // 56 bits, and c4 * 19 is at most 61 bits, which again fits in a uint64 and - // allows us to easily apply the reduction identity. - // - // r4 = a0×b4 + a1×b3 + a2×b2 + a3×b1 + a4×b0 - // r4 < 5 × 2⁵² × 2⁵² - // r4 < 2¹⁰⁷ - // - - c0 := shiftRightBy51(r0) - c1 := shiftRightBy51(r1) - c2 := shiftRightBy51(r2) - c3 := shiftRightBy51(r3) - c4 := shiftRightBy51(r4) - - rr0 := r0.lo&maskLow51Bits + c4*19 - rr1 := r1.lo&maskLow51Bits + c0 - rr2 := r2.lo&maskLow51Bits + c1 - rr3 := r3.lo&maskLow51Bits + c2 - rr4 := r4.lo&maskLow51Bits + c3 - - // Now all coefficients fit into 64-bit registers but are still too large to - // be passed around as a Element. We therefore do one last carry chain, - // where the carries will be small enough to fit in the wiggle room above 2⁵¹. - *v = Element{rr0, rr1, rr2, rr3, rr4} - v.carryPropagate() -} - -func feSquareGeneric(v, a *Element) { - l0 := a.l0 - l1 := a.l1 - l2 := a.l2 - l3 := a.l3 - l4 := a.l4 - - // Squaring works precisely like multiplication above, but thanks to its - // symmetry we get to group a few terms together. - // - // l4 l3 l2 l1 l0 x - // l4 l3 l2 l1 l0 = - // ------------------------ - // l4l0 l3l0 l2l0 l1l0 l0l0 + - // l4l1 l3l1 l2l1 l1l1 l0l1 + - // l4l2 l3l2 l2l2 l1l2 l0l2 + - // l4l3 l3l3 l2l3 l1l3 l0l3 + - // l4l4 l3l4 l2l4 l1l4 l0l4 = - // ---------------------------------------------- - // r8 r7 r6 r5 r4 r3 r2 r1 r0 - // - // l4l0 l3l0 l2l0 l1l0 l0l0 + - // l3l1 l2l1 l1l1 l0l1 19×l4l1 + - // l2l2 l1l2 l0l2 19×l4l2 19×l3l2 + - // l1l3 l0l3 19×l4l3 19×l3l3 19×l2l3 + - // l0l4 19×l4l4 19×l3l4 19×l2l4 19×l1l4 = - // -------------------------------------- - // r4 r3 r2 r1 r0 - // - // With precomputed 2×, 19×, and 2×19× terms, we can compute each limb with - // only three Mul64 and four Add64, instead of five and eight. - - l0_2 := l0 * 2 - l1_2 := l1 * 2 - - l1_38 := l1 * 38 - l2_38 := l2 * 38 - l3_38 := l3 * 38 - - l3_19 := l3 * 19 - l4_19 := l4 * 19 - - // r0 = l0×l0 + 19×(l1×l4 + l2×l3 + l3×l2 + l4×l1) = l0×l0 + 19×2×(l1×l4 + l2×l3) - r0 := mul64(l0, l0) - r0 = addMul64(r0, l1_38, l4) - r0 = addMul64(r0, l2_38, l3) - - // r1 = l0×l1 + l1×l0 + 19×(l2×l4 + l3×l3 + l4×l2) = 2×l0×l1 + 19×2×l2×l4 + 19×l3×l3 - r1 := mul64(l0_2, l1) - r1 = addMul64(r1, l2_38, l4) - r1 = addMul64(r1, l3_19, l3) - - // r2 = l0×l2 + l1×l1 + l2×l0 + 19×(l3×l4 + l4×l3) = 2×l0×l2 + l1×l1 + 19×2×l3×l4 - r2 := mul64(l0_2, l2) - r2 = addMul64(r2, l1, l1) - r2 = addMul64(r2, l3_38, l4) - - // r3 = l0×l3 + l1×l2 + l2×l1 + l3×l0 + 19×l4×l4 = 2×l0×l3 + 2×l1×l2 + 19×l4×l4 - r3 := mul64(l0_2, l3) - r3 = addMul64(r3, l1_2, l2) - r3 = addMul64(r3, l4_19, l4) - - // r4 = l0×l4 + l1×l3 + l2×l2 + l3×l1 + l4×l0 = 2×l0×l4 + 2×l1×l3 + l2×l2 - r4 := mul64(l0_2, l4) - r4 = addMul64(r4, l1_2, l3) - r4 = addMul64(r4, l2, l2) - - c0 := shiftRightBy51(r0) - c1 := shiftRightBy51(r1) - c2 := shiftRightBy51(r2) - c3 := shiftRightBy51(r3) - c4 := shiftRightBy51(r4) - - rr0 := r0.lo&maskLow51Bits + c4*19 - rr1 := r1.lo&maskLow51Bits + c0 - rr2 := r2.lo&maskLow51Bits + c1 - rr3 := r3.lo&maskLow51Bits + c2 - rr4 := r4.lo&maskLow51Bits + c3 - - *v = Element{rr0, rr1, rr2, rr3, rr4} - v.carryPropagate() -} - -// carryPropagateGeneric brings the limbs below 52 bits by applying the reduction -// identity (a * 2²⁵⁵ + b = a * 19 + b) to the l4 carry. TODO inline -func (v *Element) carryPropagateGeneric() *Element { - c0 := v.l0 >> 51 - c1 := v.l1 >> 51 - c2 := v.l2 >> 51 - c3 := v.l3 >> 51 - c4 := v.l4 >> 51 - - v.l0 = v.l0&maskLow51Bits + c4*19 - v.l1 = v.l1&maskLow51Bits + c0 - v.l2 = v.l2&maskLow51Bits + c1 - v.l3 = v.l3&maskLow51Bits + c2 - v.l4 = v.l4&maskLow51Bits + c3 - - return v -} diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/sync.checkpoint b/vendor/golang.org/x/crypto/curve25519/internal/field/sync.checkpoint deleted file mode 100644 index e3685f95..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/sync.checkpoint +++ /dev/null @@ -1 +0,0 @@ -b0c49ae9f59d233526f8934262c5bbbe14d4358d diff --git a/vendor/golang.org/x/crypto/curve25519/internal/field/sync.sh b/vendor/golang.org/x/crypto/curve25519/internal/field/sync.sh deleted file mode 100644 index 1ba22a8b..00000000 --- a/vendor/golang.org/x/crypto/curve25519/internal/field/sync.sh +++ /dev/null @@ -1,19 +0,0 @@ -#! /bin/bash -set -euo pipefail - -cd "$(git rev-parse --show-toplevel)" - -STD_PATH=src/crypto/ed25519/internal/edwards25519/field -LOCAL_PATH=curve25519/internal/field -LAST_SYNC_REF=$(cat $LOCAL_PATH/sync.checkpoint) - -git fetch https://go.googlesource.com/go master - -if git diff --quiet $LAST_SYNC_REF:$STD_PATH FETCH_HEAD:$STD_PATH; then - echo "No changes." -else - NEW_REF=$(git rev-parse FETCH_HEAD | tee $LOCAL_PATH/sync.checkpoint) - echo "Applying changes from $LAST_SYNC_REF to $NEW_REF..." - git diff $LAST_SYNC_REF:$STD_PATH FETCH_HEAD:$STD_PATH | \ - git apply -3 --directory=$LOCAL_PATH -fi diff --git a/vendor/golang.org/x/crypto/hkdf/hkdf.go b/vendor/golang.org/x/crypto/hkdf/hkdf.go index f4ded5fe..3bee6629 100644 --- a/vendor/golang.org/x/crypto/hkdf/hkdf.go +++ b/vendor/golang.org/x/crypto/hkdf/hkdf.go @@ -8,7 +8,7 @@ // HKDF is a cryptographic key derivation function (KDF) with the goal of // expanding limited input keying material into one or more cryptographically // strong secret keys. -package hkdf // import "golang.org/x/crypto/hkdf" +package hkdf import ( "crypto/hmac" diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s index e0d3c647..13375738 100644 --- a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s +++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s @@ -1,108 +1,93 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT. //go:build gc && !purego -#include "textflag.h" - -#define POLY1305_ADD(msg, h0, h1, h2) \ - ADDQ 0(msg), h0; \ - ADCQ 8(msg), h1; \ - ADCQ $1, h2; \ - LEAQ 16(msg), msg - -#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \ - MOVQ r0, AX; \ - MULQ h0; \ - MOVQ AX, t0; \ - MOVQ DX, t1; \ - MOVQ r0, AX; \ - MULQ h1; \ - ADDQ AX, t1; \ - ADCQ $0, DX; \ - MOVQ r0, t2; \ - IMULQ h2, t2; \ - ADDQ DX, t2; \ - \ - MOVQ r1, AX; \ - MULQ h0; \ - ADDQ AX, t1; \ - ADCQ $0, DX; \ - MOVQ DX, h0; \ - MOVQ r1, t3; \ - IMULQ h2, t3; \ - MOVQ r1, AX; \ - MULQ h1; \ - ADDQ AX, t2; \ - ADCQ DX, t3; \ - ADDQ h0, t2; \ - ADCQ $0, t3; \ - \ - MOVQ t0, h0; \ - MOVQ t1, h1; \ - MOVQ t2, h2; \ - ANDQ $3, h2; \ - MOVQ t2, t0; \ - ANDQ $0xFFFFFFFFFFFFFFFC, t0; \ - ADDQ t0, h0; \ - ADCQ t3, h1; \ - ADCQ $0, h2; \ - SHRQ $2, t3, t2; \ - SHRQ $2, t3; \ - ADDQ t2, h0; \ - ADCQ t3, h1; \ - ADCQ $0, h2 - -// func update(state *[7]uint64, msg []byte) +// func update(state *macState, msg []byte) TEXT ·update(SB), $0-32 MOVQ state+0(FP), DI MOVQ msg_base+8(FP), SI MOVQ msg_len+16(FP), R15 - - MOVQ 0(DI), R8 // h0 - MOVQ 8(DI), R9 // h1 - MOVQ 16(DI), R10 // h2 - MOVQ 24(DI), R11 // r0 - MOVQ 32(DI), R12 // r1 - - CMPQ R15, $16 + MOVQ (DI), R8 + MOVQ 8(DI), R9 + MOVQ 16(DI), R10 + MOVQ 24(DI), R11 + MOVQ 32(DI), R12 + CMPQ R15, $0x10 JB bytes_between_0_and_15 loop: - POLY1305_ADD(SI, R8, R9, R10) + ADDQ (SI), R8 + ADCQ 8(SI), R9 + ADCQ $0x01, R10 + LEAQ 16(SI), SI multiply: - POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14) - SUBQ $16, R15 - CMPQ R15, $16 - JAE loop + MOVQ R11, AX + MULQ R8 + MOVQ AX, BX + MOVQ DX, CX + MOVQ R11, AX + MULQ R9 + ADDQ AX, CX + ADCQ $0x00, DX + MOVQ R11, R13 + IMULQ R10, R13 + ADDQ DX, R13 + MOVQ R12, AX + MULQ R8 + ADDQ AX, CX + ADCQ $0x00, DX + MOVQ DX, R8 + MOVQ R12, R14 + IMULQ R10, R14 + MOVQ R12, AX + MULQ R9 + ADDQ AX, R13 + ADCQ DX, R14 + ADDQ R8, R13 + ADCQ $0x00, R14 + MOVQ BX, R8 + MOVQ CX, R9 + MOVQ R13, R10 + ANDQ $0x03, R10 + MOVQ R13, BX + ANDQ $-4, BX + ADDQ BX, R8 + ADCQ R14, R9 + ADCQ $0x00, R10 + SHRQ $0x02, R14, R13 + SHRQ $0x02, R14 + ADDQ R13, R8 + ADCQ R14, R9 + ADCQ $0x00, R10 + SUBQ $0x10, R15 + CMPQ R15, $0x10 + JAE loop bytes_between_0_and_15: TESTQ R15, R15 JZ done - MOVQ $1, BX + MOVQ $0x00000001, BX XORQ CX, CX XORQ R13, R13 ADDQ R15, SI flush_buffer: - SHLQ $8, BX, CX - SHLQ $8, BX + SHLQ $0x08, BX, CX + SHLQ $0x08, BX MOVB -1(SI), R13 XORQ R13, BX DECQ SI DECQ R15 JNZ flush_buffer - ADDQ BX, R8 ADCQ CX, R9 - ADCQ $0, R10 - MOVQ $16, R15 + ADCQ $0x00, R10 + MOVQ $0x00000010, R15 JMP multiply done: - MOVQ R8, 0(DI) + MOVQ R8, (DI) MOVQ R9, 8(DI) MOVQ R10, 16(DI) RET diff --git a/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go b/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go index 904b57e0..28cd99c7 100644 --- a/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go +++ b/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go @@ -16,7 +16,7 @@ Hash Functions SHA-1, SHA-224, SHA-256, SHA-384 and SHA-512 for HMAC. To choose, you can pass the `New` functions from the different SHA packages to pbkdf2.Key. */ -package pbkdf2 // import "golang.org/x/crypto/pbkdf2" +package pbkdf2 import ( "crypto/hmac" diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go index 3fd05b27..3685b344 100644 --- a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go +++ b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // Package salsa provides low-level access to functions in the Salsa family. -package salsa // import "golang.org/x/crypto/salsa20/salsa" +package salsa import "math/bits" diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s index fcce0234..3883e0ec 100644 --- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s +++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s @@ -1,880 +1,880 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run salsa20_amd64_asm.go -out ../salsa20_amd64.s -pkg salsa. DO NOT EDIT. //go:build amd64 && !purego && gc -// This code was translated into a form compatible with 6a from the public -// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html +// func salsa2020XORKeyStream(out *byte, in *byte, n uint64, nonce *byte, key *byte) +// Requires: SSE2 +TEXT ·salsa2020XORKeyStream(SB), $456-40 + // This needs up to 64 bytes at 360(R12); hence the non-obvious frame size. + MOVQ out+0(FP), DI + MOVQ in+8(FP), SI + MOVQ n+16(FP), DX + MOVQ nonce+24(FP), CX + MOVQ key+32(FP), R8 + MOVQ SP, R12 + ADDQ $0x1f, R12 + ANDQ $-32, R12 + MOVQ DX, R9 + MOVQ CX, DX + MOVQ R8, R10 + CMPQ R9, $0x00 + JBE DONE + MOVL 20(R10), CX + MOVL (R10), R8 + MOVL (DX), AX + MOVL 16(R10), R11 + MOVL CX, (R12) + MOVL R8, 4(R12) + MOVL AX, 8(R12) + MOVL R11, 12(R12) + MOVL 8(DX), CX + MOVL 24(R10), R8 + MOVL 4(R10), AX + MOVL 4(DX), R11 + MOVL CX, 16(R12) + MOVL R8, 20(R12) + MOVL AX, 24(R12) + MOVL R11, 28(R12) + MOVL 12(DX), CX + MOVL 12(R10), DX + MOVL 28(R10), R8 + MOVL 8(R10), AX + MOVL DX, 32(R12) + MOVL CX, 36(R12) + MOVL R8, 40(R12) + MOVL AX, 44(R12) + MOVQ $0x61707865, DX + MOVQ $0x3320646e, CX + MOVQ $0x79622d32, R8 + MOVQ $0x6b206574, AX + MOVL DX, 48(R12) + MOVL CX, 52(R12) + MOVL R8, 56(R12) + MOVL AX, 60(R12) + CMPQ R9, $0x00000100 + JB BYTESBETWEEN1AND255 + MOVOA 48(R12), X0 + PSHUFL $0x55, X0, X1 + PSHUFL $0xaa, X0, X2 + PSHUFL $0xff, X0, X3 + PSHUFL $0x00, X0, X0 + MOVOA X1, 64(R12) + MOVOA X2, 80(R12) + MOVOA X3, 96(R12) + MOVOA X0, 112(R12) + MOVOA (R12), X0 + PSHUFL $0xaa, X0, X1 + PSHUFL $0xff, X0, X2 + PSHUFL $0x00, X0, X3 + PSHUFL $0x55, X0, X0 + MOVOA X1, 128(R12) + MOVOA X2, 144(R12) + MOVOA X3, 160(R12) + MOVOA X0, 176(R12) + MOVOA 16(R12), X0 + PSHUFL $0xff, X0, X1 + PSHUFL $0x55, X0, X2 + PSHUFL $0xaa, X0, X0 + MOVOA X1, 192(R12) + MOVOA X2, 208(R12) + MOVOA X0, 224(R12) + MOVOA 32(R12), X0 + PSHUFL $0x00, X0, X1 + PSHUFL $0xaa, X0, X2 + PSHUFL $0xff, X0, X0 + MOVOA X1, 240(R12) + MOVOA X2, 256(R12) + MOVOA X0, 272(R12) -// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte) -// This needs up to 64 bytes at 360(R12); hence the non-obvious frame size. -TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment - MOVQ out+0(FP),DI - MOVQ in+8(FP),SI - MOVQ n+16(FP),DX - MOVQ nonce+24(FP),CX - MOVQ key+32(FP),R8 +BYTESATLEAST256: + MOVL 16(R12), DX + MOVL 36(R12), CX + MOVL DX, 288(R12) + MOVL CX, 304(R12) + SHLQ $0x20, CX + ADDQ CX, DX + ADDQ $0x01, DX + MOVQ DX, CX + SHRQ $0x20, CX + MOVL DX, 292(R12) + MOVL CX, 308(R12) + ADDQ $0x01, DX + MOVQ DX, CX + SHRQ $0x20, CX + MOVL DX, 296(R12) + MOVL CX, 312(R12) + ADDQ $0x01, DX + MOVQ DX, CX + SHRQ $0x20, CX + MOVL DX, 300(R12) + MOVL CX, 316(R12) + ADDQ $0x01, DX + MOVQ DX, CX + SHRQ $0x20, CX + MOVL DX, 16(R12) + MOVL CX, 36(R12) + MOVQ R9, 352(R12) + MOVQ $0x00000014, DX + MOVOA 64(R12), X0 + MOVOA 80(R12), X1 + MOVOA 96(R12), X2 + MOVOA 256(R12), X3 + MOVOA 272(R12), X4 + MOVOA 128(R12), X5 + MOVOA 144(R12), X6 + MOVOA 176(R12), X7 + MOVOA 192(R12), X8 + MOVOA 208(R12), X9 + MOVOA 224(R12), X10 + MOVOA 304(R12), X11 + MOVOA 112(R12), X12 + MOVOA 160(R12), X13 + MOVOA 240(R12), X14 + MOVOA 288(R12), X15 - MOVQ SP,R12 - ADDQ $31, R12 - ANDQ $~31, R12 +MAINLOOP1: + MOVOA X1, 320(R12) + MOVOA X2, 336(R12) + MOVOA X13, X1 + PADDL X12, X1 + MOVOA X1, X2 + PSLLL $0x07, X1 + PXOR X1, X14 + PSRLL $0x19, X2 + PXOR X2, X14 + MOVOA X7, X1 + PADDL X0, X1 + MOVOA X1, X2 + PSLLL $0x07, X1 + PXOR X1, X11 + PSRLL $0x19, X2 + PXOR X2, X11 + MOVOA X12, X1 + PADDL X14, X1 + MOVOA X1, X2 + PSLLL $0x09, X1 + PXOR X1, X15 + PSRLL $0x17, X2 + PXOR X2, X15 + MOVOA X0, X1 + PADDL X11, X1 + MOVOA X1, X2 + PSLLL $0x09, X1 + PXOR X1, X9 + PSRLL $0x17, X2 + PXOR X2, X9 + MOVOA X14, X1 + PADDL X15, X1 + MOVOA X1, X2 + PSLLL $0x0d, X1 + PXOR X1, X13 + PSRLL $0x13, X2 + PXOR X2, X13 + MOVOA X11, X1 + PADDL X9, X1 + MOVOA X1, X2 + PSLLL $0x0d, X1 + PXOR X1, X7 + PSRLL $0x13, X2 + PXOR X2, X7 + MOVOA X15, X1 + PADDL X13, X1 + MOVOA X1, X2 + PSLLL $0x12, X1 + PXOR X1, X12 + PSRLL $0x0e, X2 + PXOR X2, X12 + MOVOA 320(R12), X1 + MOVOA X12, 320(R12) + MOVOA X9, X2 + PADDL X7, X2 + MOVOA X2, X12 + PSLLL $0x12, X2 + PXOR X2, X0 + PSRLL $0x0e, X12 + PXOR X12, X0 + MOVOA X5, X2 + PADDL X1, X2 + MOVOA X2, X12 + PSLLL $0x07, X2 + PXOR X2, X3 + PSRLL $0x19, X12 + PXOR X12, X3 + MOVOA 336(R12), X2 + MOVOA X0, 336(R12) + MOVOA X6, X0 + PADDL X2, X0 + MOVOA X0, X12 + PSLLL $0x07, X0 + PXOR X0, X4 + PSRLL $0x19, X12 + PXOR X12, X4 + MOVOA X1, X0 + PADDL X3, X0 + MOVOA X0, X12 + PSLLL $0x09, X0 + PXOR X0, X10 + PSRLL $0x17, X12 + PXOR X12, X10 + MOVOA X2, X0 + PADDL X4, X0 + MOVOA X0, X12 + PSLLL $0x09, X0 + PXOR X0, X8 + PSRLL $0x17, X12 + PXOR X12, X8 + MOVOA X3, X0 + PADDL X10, X0 + MOVOA X0, X12 + PSLLL $0x0d, X0 + PXOR X0, X5 + PSRLL $0x13, X12 + PXOR X12, X5 + MOVOA X4, X0 + PADDL X8, X0 + MOVOA X0, X12 + PSLLL $0x0d, X0 + PXOR X0, X6 + PSRLL $0x13, X12 + PXOR X12, X6 + MOVOA X10, X0 + PADDL X5, X0 + MOVOA X0, X12 + PSLLL $0x12, X0 + PXOR X0, X1 + PSRLL $0x0e, X12 + PXOR X12, X1 + MOVOA 320(R12), X0 + MOVOA X1, 320(R12) + MOVOA X4, X1 + PADDL X0, X1 + MOVOA X1, X12 + PSLLL $0x07, X1 + PXOR X1, X7 + PSRLL $0x19, X12 + PXOR X12, X7 + MOVOA X8, X1 + PADDL X6, X1 + MOVOA X1, X12 + PSLLL $0x12, X1 + PXOR X1, X2 + PSRLL $0x0e, X12 + PXOR X12, X2 + MOVOA 336(R12), X12 + MOVOA X2, 336(R12) + MOVOA X14, X1 + PADDL X12, X1 + MOVOA X1, X2 + PSLLL $0x07, X1 + PXOR X1, X5 + PSRLL $0x19, X2 + PXOR X2, X5 + MOVOA X0, X1 + PADDL X7, X1 + MOVOA X1, X2 + PSLLL $0x09, X1 + PXOR X1, X10 + PSRLL $0x17, X2 + PXOR X2, X10 + MOVOA X12, X1 + PADDL X5, X1 + MOVOA X1, X2 + PSLLL $0x09, X1 + PXOR X1, X8 + PSRLL $0x17, X2 + PXOR X2, X8 + MOVOA X7, X1 + PADDL X10, X1 + MOVOA X1, X2 + PSLLL $0x0d, X1 + PXOR X1, X4 + PSRLL $0x13, X2 + PXOR X2, X4 + MOVOA X5, X1 + PADDL X8, X1 + MOVOA X1, X2 + PSLLL $0x0d, X1 + PXOR X1, X14 + PSRLL $0x13, X2 + PXOR X2, X14 + MOVOA X10, X1 + PADDL X4, X1 + MOVOA X1, X2 + PSLLL $0x12, X1 + PXOR X1, X0 + PSRLL $0x0e, X2 + PXOR X2, X0 + MOVOA 320(R12), X1 + MOVOA X0, 320(R12) + MOVOA X8, X0 + PADDL X14, X0 + MOVOA X0, X2 + PSLLL $0x12, X0 + PXOR X0, X12 + PSRLL $0x0e, X2 + PXOR X2, X12 + MOVOA X11, X0 + PADDL X1, X0 + MOVOA X0, X2 + PSLLL $0x07, X0 + PXOR X0, X6 + PSRLL $0x19, X2 + PXOR X2, X6 + MOVOA 336(R12), X2 + MOVOA X12, 336(R12) + MOVOA X3, X0 + PADDL X2, X0 + MOVOA X0, X12 + PSLLL $0x07, X0 + PXOR X0, X13 + PSRLL $0x19, X12 + PXOR X12, X13 + MOVOA X1, X0 + PADDL X6, X0 + MOVOA X0, X12 + PSLLL $0x09, X0 + PXOR X0, X15 + PSRLL $0x17, X12 + PXOR X12, X15 + MOVOA X2, X0 + PADDL X13, X0 + MOVOA X0, X12 + PSLLL $0x09, X0 + PXOR X0, X9 + PSRLL $0x17, X12 + PXOR X12, X9 + MOVOA X6, X0 + PADDL X15, X0 + MOVOA X0, X12 + PSLLL $0x0d, X0 + PXOR X0, X11 + PSRLL $0x13, X12 + PXOR X12, X11 + MOVOA X13, X0 + PADDL X9, X0 + MOVOA X0, X12 + PSLLL $0x0d, X0 + PXOR X0, X3 + PSRLL $0x13, X12 + PXOR X12, X3 + MOVOA X15, X0 + PADDL X11, X0 + MOVOA X0, X12 + PSLLL $0x12, X0 + PXOR X0, X1 + PSRLL $0x0e, X12 + PXOR X12, X1 + MOVOA X9, X0 + PADDL X3, X0 + MOVOA X0, X12 + PSLLL $0x12, X0 + PXOR X0, X2 + PSRLL $0x0e, X12 + PXOR X12, X2 + MOVOA 320(R12), X12 + MOVOA 336(R12), X0 + SUBQ $0x02, DX + JA MAINLOOP1 + PADDL 112(R12), X12 + PADDL 176(R12), X7 + PADDL 224(R12), X10 + PADDL 272(R12), X4 + MOVD X12, DX + MOVD X7, CX + MOVD X10, R8 + MOVD X4, R9 + PSHUFL $0x39, X12, X12 + PSHUFL $0x39, X7, X7 + PSHUFL $0x39, X10, X10 + PSHUFL $0x39, X4, X4 + XORL (SI), DX + XORL 4(SI), CX + XORL 8(SI), R8 + XORL 12(SI), R9 + MOVL DX, (DI) + MOVL CX, 4(DI) + MOVL R8, 8(DI) + MOVL R9, 12(DI) + MOVD X12, DX + MOVD X7, CX + MOVD X10, R8 + MOVD X4, R9 + PSHUFL $0x39, X12, X12 + PSHUFL $0x39, X7, X7 + PSHUFL $0x39, X10, X10 + PSHUFL $0x39, X4, X4 + XORL 64(SI), DX + XORL 68(SI), CX + XORL 72(SI), R8 + XORL 76(SI), R9 + MOVL DX, 64(DI) + MOVL CX, 68(DI) + MOVL R8, 72(DI) + MOVL R9, 76(DI) + MOVD X12, DX + MOVD X7, CX + MOVD X10, R8 + MOVD X4, R9 + PSHUFL $0x39, X12, X12 + PSHUFL $0x39, X7, X7 + PSHUFL $0x39, X10, X10 + PSHUFL $0x39, X4, X4 + XORL 128(SI), DX + XORL 132(SI), CX + XORL 136(SI), R8 + XORL 140(SI), R9 + MOVL DX, 128(DI) + MOVL CX, 132(DI) + MOVL R8, 136(DI) + MOVL R9, 140(DI) + MOVD X12, DX + MOVD X7, CX + MOVD X10, R8 + MOVD X4, R9 + XORL 192(SI), DX + XORL 196(SI), CX + XORL 200(SI), R8 + XORL 204(SI), R9 + MOVL DX, 192(DI) + MOVL CX, 196(DI) + MOVL R8, 200(DI) + MOVL R9, 204(DI) + PADDL 240(R12), X14 + PADDL 64(R12), X0 + PADDL 128(R12), X5 + PADDL 192(R12), X8 + MOVD X14, DX + MOVD X0, CX + MOVD X5, R8 + MOVD X8, R9 + PSHUFL $0x39, X14, X14 + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X5, X5 + PSHUFL $0x39, X8, X8 + XORL 16(SI), DX + XORL 20(SI), CX + XORL 24(SI), R8 + XORL 28(SI), R9 + MOVL DX, 16(DI) + MOVL CX, 20(DI) + MOVL R8, 24(DI) + MOVL R9, 28(DI) + MOVD X14, DX + MOVD X0, CX + MOVD X5, R8 + MOVD X8, R9 + PSHUFL $0x39, X14, X14 + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X5, X5 + PSHUFL $0x39, X8, X8 + XORL 80(SI), DX + XORL 84(SI), CX + XORL 88(SI), R8 + XORL 92(SI), R9 + MOVL DX, 80(DI) + MOVL CX, 84(DI) + MOVL R8, 88(DI) + MOVL R9, 92(DI) + MOVD X14, DX + MOVD X0, CX + MOVD X5, R8 + MOVD X8, R9 + PSHUFL $0x39, X14, X14 + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X5, X5 + PSHUFL $0x39, X8, X8 + XORL 144(SI), DX + XORL 148(SI), CX + XORL 152(SI), R8 + XORL 156(SI), R9 + MOVL DX, 144(DI) + MOVL CX, 148(DI) + MOVL R8, 152(DI) + MOVL R9, 156(DI) + MOVD X14, DX + MOVD X0, CX + MOVD X5, R8 + MOVD X8, R9 + XORL 208(SI), DX + XORL 212(SI), CX + XORL 216(SI), R8 + XORL 220(SI), R9 + MOVL DX, 208(DI) + MOVL CX, 212(DI) + MOVL R8, 216(DI) + MOVL R9, 220(DI) + PADDL 288(R12), X15 + PADDL 304(R12), X11 + PADDL 80(R12), X1 + PADDL 144(R12), X6 + MOVD X15, DX + MOVD X11, CX + MOVD X1, R8 + MOVD X6, R9 + PSHUFL $0x39, X15, X15 + PSHUFL $0x39, X11, X11 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X6, X6 + XORL 32(SI), DX + XORL 36(SI), CX + XORL 40(SI), R8 + XORL 44(SI), R9 + MOVL DX, 32(DI) + MOVL CX, 36(DI) + MOVL R8, 40(DI) + MOVL R9, 44(DI) + MOVD X15, DX + MOVD X11, CX + MOVD X1, R8 + MOVD X6, R9 + PSHUFL $0x39, X15, X15 + PSHUFL $0x39, X11, X11 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X6, X6 + XORL 96(SI), DX + XORL 100(SI), CX + XORL 104(SI), R8 + XORL 108(SI), R9 + MOVL DX, 96(DI) + MOVL CX, 100(DI) + MOVL R8, 104(DI) + MOVL R9, 108(DI) + MOVD X15, DX + MOVD X11, CX + MOVD X1, R8 + MOVD X6, R9 + PSHUFL $0x39, X15, X15 + PSHUFL $0x39, X11, X11 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X6, X6 + XORL 160(SI), DX + XORL 164(SI), CX + XORL 168(SI), R8 + XORL 172(SI), R9 + MOVL DX, 160(DI) + MOVL CX, 164(DI) + MOVL R8, 168(DI) + MOVL R9, 172(DI) + MOVD X15, DX + MOVD X11, CX + MOVD X1, R8 + MOVD X6, R9 + XORL 224(SI), DX + XORL 228(SI), CX + XORL 232(SI), R8 + XORL 236(SI), R9 + MOVL DX, 224(DI) + MOVL CX, 228(DI) + MOVL R8, 232(DI) + MOVL R9, 236(DI) + PADDL 160(R12), X13 + PADDL 208(R12), X9 + PADDL 256(R12), X3 + PADDL 96(R12), X2 + MOVD X13, DX + MOVD X9, CX + MOVD X3, R8 + MOVD X2, R9 + PSHUFL $0x39, X13, X13 + PSHUFL $0x39, X9, X9 + PSHUFL $0x39, X3, X3 + PSHUFL $0x39, X2, X2 + XORL 48(SI), DX + XORL 52(SI), CX + XORL 56(SI), R8 + XORL 60(SI), R9 + MOVL DX, 48(DI) + MOVL CX, 52(DI) + MOVL R8, 56(DI) + MOVL R9, 60(DI) + MOVD X13, DX + MOVD X9, CX + MOVD X3, R8 + MOVD X2, R9 + PSHUFL $0x39, X13, X13 + PSHUFL $0x39, X9, X9 + PSHUFL $0x39, X3, X3 + PSHUFL $0x39, X2, X2 + XORL 112(SI), DX + XORL 116(SI), CX + XORL 120(SI), R8 + XORL 124(SI), R9 + MOVL DX, 112(DI) + MOVL CX, 116(DI) + MOVL R8, 120(DI) + MOVL R9, 124(DI) + MOVD X13, DX + MOVD X9, CX + MOVD X3, R8 + MOVD X2, R9 + PSHUFL $0x39, X13, X13 + PSHUFL $0x39, X9, X9 + PSHUFL $0x39, X3, X3 + PSHUFL $0x39, X2, X2 + XORL 176(SI), DX + XORL 180(SI), CX + XORL 184(SI), R8 + XORL 188(SI), R9 + MOVL DX, 176(DI) + MOVL CX, 180(DI) + MOVL R8, 184(DI) + MOVL R9, 188(DI) + MOVD X13, DX + MOVD X9, CX + MOVD X3, R8 + MOVD X2, R9 + XORL 240(SI), DX + XORL 244(SI), CX + XORL 248(SI), R8 + XORL 252(SI), R9 + MOVL DX, 240(DI) + MOVL CX, 244(DI) + MOVL R8, 248(DI) + MOVL R9, 252(DI) + MOVQ 352(R12), R9 + SUBQ $0x00000100, R9 + ADDQ $0x00000100, SI + ADDQ $0x00000100, DI + CMPQ R9, $0x00000100 + JAE BYTESATLEAST256 + CMPQ R9, $0x00 + JBE DONE - MOVQ DX,R9 - MOVQ CX,DX - MOVQ R8,R10 - CMPQ R9,$0 - JBE DONE - START: - MOVL 20(R10),CX - MOVL 0(R10),R8 - MOVL 0(DX),AX - MOVL 16(R10),R11 - MOVL CX,0(R12) - MOVL R8, 4 (R12) - MOVL AX, 8 (R12) - MOVL R11, 12 (R12) - MOVL 8(DX),CX - MOVL 24(R10),R8 - MOVL 4(R10),AX - MOVL 4(DX),R11 - MOVL CX,16(R12) - MOVL R8, 20 (R12) - MOVL AX, 24 (R12) - MOVL R11, 28 (R12) - MOVL 12(DX),CX - MOVL 12(R10),DX - MOVL 28(R10),R8 - MOVL 8(R10),AX - MOVL DX,32(R12) - MOVL CX, 36 (R12) - MOVL R8, 40 (R12) - MOVL AX, 44 (R12) - MOVQ $1634760805,DX - MOVQ $857760878,CX - MOVQ $2036477234,R8 - MOVQ $1797285236,AX - MOVL DX,48(R12) - MOVL CX, 52 (R12) - MOVL R8, 56 (R12) - MOVL AX, 60 (R12) - CMPQ R9,$256 - JB BYTESBETWEEN1AND255 - MOVOA 48(R12),X0 - PSHUFL $0X55,X0,X1 - PSHUFL $0XAA,X0,X2 - PSHUFL $0XFF,X0,X3 - PSHUFL $0X00,X0,X0 - MOVOA X1,64(R12) - MOVOA X2,80(R12) - MOVOA X3,96(R12) - MOVOA X0,112(R12) - MOVOA 0(R12),X0 - PSHUFL $0XAA,X0,X1 - PSHUFL $0XFF,X0,X2 - PSHUFL $0X00,X0,X3 - PSHUFL $0X55,X0,X0 - MOVOA X1,128(R12) - MOVOA X2,144(R12) - MOVOA X3,160(R12) - MOVOA X0,176(R12) - MOVOA 16(R12),X0 - PSHUFL $0XFF,X0,X1 - PSHUFL $0X55,X0,X2 - PSHUFL $0XAA,X0,X0 - MOVOA X1,192(R12) - MOVOA X2,208(R12) - MOVOA X0,224(R12) - MOVOA 32(R12),X0 - PSHUFL $0X00,X0,X1 - PSHUFL $0XAA,X0,X2 - PSHUFL $0XFF,X0,X0 - MOVOA X1,240(R12) - MOVOA X2,256(R12) - MOVOA X0,272(R12) - BYTESATLEAST256: - MOVL 16(R12),DX - MOVL 36 (R12),CX - MOVL DX,288(R12) - MOVL CX,304(R12) - SHLQ $32,CX - ADDQ CX,DX - ADDQ $1,DX - MOVQ DX,CX - SHRQ $32,CX - MOVL DX, 292 (R12) - MOVL CX, 308 (R12) - ADDQ $1,DX - MOVQ DX,CX - SHRQ $32,CX - MOVL DX, 296 (R12) - MOVL CX, 312 (R12) - ADDQ $1,DX - MOVQ DX,CX - SHRQ $32,CX - MOVL DX, 300 (R12) - MOVL CX, 316 (R12) - ADDQ $1,DX - MOVQ DX,CX - SHRQ $32,CX - MOVL DX,16(R12) - MOVL CX, 36 (R12) - MOVQ R9,352(R12) - MOVQ $20,DX - MOVOA 64(R12),X0 - MOVOA 80(R12),X1 - MOVOA 96(R12),X2 - MOVOA 256(R12),X3 - MOVOA 272(R12),X4 - MOVOA 128(R12),X5 - MOVOA 144(R12),X6 - MOVOA 176(R12),X7 - MOVOA 192(R12),X8 - MOVOA 208(R12),X9 - MOVOA 224(R12),X10 - MOVOA 304(R12),X11 - MOVOA 112(R12),X12 - MOVOA 160(R12),X13 - MOVOA 240(R12),X14 - MOVOA 288(R12),X15 - MAINLOOP1: - MOVOA X1,320(R12) - MOVOA X2,336(R12) - MOVOA X13,X1 - PADDL X12,X1 - MOVOA X1,X2 - PSLLL $7,X1 - PXOR X1,X14 - PSRLL $25,X2 - PXOR X2,X14 - MOVOA X7,X1 - PADDL X0,X1 - MOVOA X1,X2 - PSLLL $7,X1 - PXOR X1,X11 - PSRLL $25,X2 - PXOR X2,X11 - MOVOA X12,X1 - PADDL X14,X1 - MOVOA X1,X2 - PSLLL $9,X1 - PXOR X1,X15 - PSRLL $23,X2 - PXOR X2,X15 - MOVOA X0,X1 - PADDL X11,X1 - MOVOA X1,X2 - PSLLL $9,X1 - PXOR X1,X9 - PSRLL $23,X2 - PXOR X2,X9 - MOVOA X14,X1 - PADDL X15,X1 - MOVOA X1,X2 - PSLLL $13,X1 - PXOR X1,X13 - PSRLL $19,X2 - PXOR X2,X13 - MOVOA X11,X1 - PADDL X9,X1 - MOVOA X1,X2 - PSLLL $13,X1 - PXOR X1,X7 - PSRLL $19,X2 - PXOR X2,X7 - MOVOA X15,X1 - PADDL X13,X1 - MOVOA X1,X2 - PSLLL $18,X1 - PXOR X1,X12 - PSRLL $14,X2 - PXOR X2,X12 - MOVOA 320(R12),X1 - MOVOA X12,320(R12) - MOVOA X9,X2 - PADDL X7,X2 - MOVOA X2,X12 - PSLLL $18,X2 - PXOR X2,X0 - PSRLL $14,X12 - PXOR X12,X0 - MOVOA X5,X2 - PADDL X1,X2 - MOVOA X2,X12 - PSLLL $7,X2 - PXOR X2,X3 - PSRLL $25,X12 - PXOR X12,X3 - MOVOA 336(R12),X2 - MOVOA X0,336(R12) - MOVOA X6,X0 - PADDL X2,X0 - MOVOA X0,X12 - PSLLL $7,X0 - PXOR X0,X4 - PSRLL $25,X12 - PXOR X12,X4 - MOVOA X1,X0 - PADDL X3,X0 - MOVOA X0,X12 - PSLLL $9,X0 - PXOR X0,X10 - PSRLL $23,X12 - PXOR X12,X10 - MOVOA X2,X0 - PADDL X4,X0 - MOVOA X0,X12 - PSLLL $9,X0 - PXOR X0,X8 - PSRLL $23,X12 - PXOR X12,X8 - MOVOA X3,X0 - PADDL X10,X0 - MOVOA X0,X12 - PSLLL $13,X0 - PXOR X0,X5 - PSRLL $19,X12 - PXOR X12,X5 - MOVOA X4,X0 - PADDL X8,X0 - MOVOA X0,X12 - PSLLL $13,X0 - PXOR X0,X6 - PSRLL $19,X12 - PXOR X12,X6 - MOVOA X10,X0 - PADDL X5,X0 - MOVOA X0,X12 - PSLLL $18,X0 - PXOR X0,X1 - PSRLL $14,X12 - PXOR X12,X1 - MOVOA 320(R12),X0 - MOVOA X1,320(R12) - MOVOA X4,X1 - PADDL X0,X1 - MOVOA X1,X12 - PSLLL $7,X1 - PXOR X1,X7 - PSRLL $25,X12 - PXOR X12,X7 - MOVOA X8,X1 - PADDL X6,X1 - MOVOA X1,X12 - PSLLL $18,X1 - PXOR X1,X2 - PSRLL $14,X12 - PXOR X12,X2 - MOVOA 336(R12),X12 - MOVOA X2,336(R12) - MOVOA X14,X1 - PADDL X12,X1 - MOVOA X1,X2 - PSLLL $7,X1 - PXOR X1,X5 - PSRLL $25,X2 - PXOR X2,X5 - MOVOA X0,X1 - PADDL X7,X1 - MOVOA X1,X2 - PSLLL $9,X1 - PXOR X1,X10 - PSRLL $23,X2 - PXOR X2,X10 - MOVOA X12,X1 - PADDL X5,X1 - MOVOA X1,X2 - PSLLL $9,X1 - PXOR X1,X8 - PSRLL $23,X2 - PXOR X2,X8 - MOVOA X7,X1 - PADDL X10,X1 - MOVOA X1,X2 - PSLLL $13,X1 - PXOR X1,X4 - PSRLL $19,X2 - PXOR X2,X4 - MOVOA X5,X1 - PADDL X8,X1 - MOVOA X1,X2 - PSLLL $13,X1 - PXOR X1,X14 - PSRLL $19,X2 - PXOR X2,X14 - MOVOA X10,X1 - PADDL X4,X1 - MOVOA X1,X2 - PSLLL $18,X1 - PXOR X1,X0 - PSRLL $14,X2 - PXOR X2,X0 - MOVOA 320(R12),X1 - MOVOA X0,320(R12) - MOVOA X8,X0 - PADDL X14,X0 - MOVOA X0,X2 - PSLLL $18,X0 - PXOR X0,X12 - PSRLL $14,X2 - PXOR X2,X12 - MOVOA X11,X0 - PADDL X1,X0 - MOVOA X0,X2 - PSLLL $7,X0 - PXOR X0,X6 - PSRLL $25,X2 - PXOR X2,X6 - MOVOA 336(R12),X2 - MOVOA X12,336(R12) - MOVOA X3,X0 - PADDL X2,X0 - MOVOA X0,X12 - PSLLL $7,X0 - PXOR X0,X13 - PSRLL $25,X12 - PXOR X12,X13 - MOVOA X1,X0 - PADDL X6,X0 - MOVOA X0,X12 - PSLLL $9,X0 - PXOR X0,X15 - PSRLL $23,X12 - PXOR X12,X15 - MOVOA X2,X0 - PADDL X13,X0 - MOVOA X0,X12 - PSLLL $9,X0 - PXOR X0,X9 - PSRLL $23,X12 - PXOR X12,X9 - MOVOA X6,X0 - PADDL X15,X0 - MOVOA X0,X12 - PSLLL $13,X0 - PXOR X0,X11 - PSRLL $19,X12 - PXOR X12,X11 - MOVOA X13,X0 - PADDL X9,X0 - MOVOA X0,X12 - PSLLL $13,X0 - PXOR X0,X3 - PSRLL $19,X12 - PXOR X12,X3 - MOVOA X15,X0 - PADDL X11,X0 - MOVOA X0,X12 - PSLLL $18,X0 - PXOR X0,X1 - PSRLL $14,X12 - PXOR X12,X1 - MOVOA X9,X0 - PADDL X3,X0 - MOVOA X0,X12 - PSLLL $18,X0 - PXOR X0,X2 - PSRLL $14,X12 - PXOR X12,X2 - MOVOA 320(R12),X12 - MOVOA 336(R12),X0 - SUBQ $2,DX - JA MAINLOOP1 - PADDL 112(R12),X12 - PADDL 176(R12),X7 - PADDL 224(R12),X10 - PADDL 272(R12),X4 - MOVD X12,DX - MOVD X7,CX - MOVD X10,R8 - MOVD X4,R9 - PSHUFL $0X39,X12,X12 - PSHUFL $0X39,X7,X7 - PSHUFL $0X39,X10,X10 - PSHUFL $0X39,X4,X4 - XORL 0(SI),DX - XORL 4(SI),CX - XORL 8(SI),R8 - XORL 12(SI),R9 - MOVL DX,0(DI) - MOVL CX,4(DI) - MOVL R8,8(DI) - MOVL R9,12(DI) - MOVD X12,DX - MOVD X7,CX - MOVD X10,R8 - MOVD X4,R9 - PSHUFL $0X39,X12,X12 - PSHUFL $0X39,X7,X7 - PSHUFL $0X39,X10,X10 - PSHUFL $0X39,X4,X4 - XORL 64(SI),DX - XORL 68(SI),CX - XORL 72(SI),R8 - XORL 76(SI),R9 - MOVL DX,64(DI) - MOVL CX,68(DI) - MOVL R8,72(DI) - MOVL R9,76(DI) - MOVD X12,DX - MOVD X7,CX - MOVD X10,R8 - MOVD X4,R9 - PSHUFL $0X39,X12,X12 - PSHUFL $0X39,X7,X7 - PSHUFL $0X39,X10,X10 - PSHUFL $0X39,X4,X4 - XORL 128(SI),DX - XORL 132(SI),CX - XORL 136(SI),R8 - XORL 140(SI),R9 - MOVL DX,128(DI) - MOVL CX,132(DI) - MOVL R8,136(DI) - MOVL R9,140(DI) - MOVD X12,DX - MOVD X7,CX - MOVD X10,R8 - MOVD X4,R9 - XORL 192(SI),DX - XORL 196(SI),CX - XORL 200(SI),R8 - XORL 204(SI),R9 - MOVL DX,192(DI) - MOVL CX,196(DI) - MOVL R8,200(DI) - MOVL R9,204(DI) - PADDL 240(R12),X14 - PADDL 64(R12),X0 - PADDL 128(R12),X5 - PADDL 192(R12),X8 - MOVD X14,DX - MOVD X0,CX - MOVD X5,R8 - MOVD X8,R9 - PSHUFL $0X39,X14,X14 - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X5,X5 - PSHUFL $0X39,X8,X8 - XORL 16(SI),DX - XORL 20(SI),CX - XORL 24(SI),R8 - XORL 28(SI),R9 - MOVL DX,16(DI) - MOVL CX,20(DI) - MOVL R8,24(DI) - MOVL R9,28(DI) - MOVD X14,DX - MOVD X0,CX - MOVD X5,R8 - MOVD X8,R9 - PSHUFL $0X39,X14,X14 - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X5,X5 - PSHUFL $0X39,X8,X8 - XORL 80(SI),DX - XORL 84(SI),CX - XORL 88(SI),R8 - XORL 92(SI),R9 - MOVL DX,80(DI) - MOVL CX,84(DI) - MOVL R8,88(DI) - MOVL R9,92(DI) - MOVD X14,DX - MOVD X0,CX - MOVD X5,R8 - MOVD X8,R9 - PSHUFL $0X39,X14,X14 - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X5,X5 - PSHUFL $0X39,X8,X8 - XORL 144(SI),DX - XORL 148(SI),CX - XORL 152(SI),R8 - XORL 156(SI),R9 - MOVL DX,144(DI) - MOVL CX,148(DI) - MOVL R8,152(DI) - MOVL R9,156(DI) - MOVD X14,DX - MOVD X0,CX - MOVD X5,R8 - MOVD X8,R9 - XORL 208(SI),DX - XORL 212(SI),CX - XORL 216(SI),R8 - XORL 220(SI),R9 - MOVL DX,208(DI) - MOVL CX,212(DI) - MOVL R8,216(DI) - MOVL R9,220(DI) - PADDL 288(R12),X15 - PADDL 304(R12),X11 - PADDL 80(R12),X1 - PADDL 144(R12),X6 - MOVD X15,DX - MOVD X11,CX - MOVD X1,R8 - MOVD X6,R9 - PSHUFL $0X39,X15,X15 - PSHUFL $0X39,X11,X11 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X6,X6 - XORL 32(SI),DX - XORL 36(SI),CX - XORL 40(SI),R8 - XORL 44(SI),R9 - MOVL DX,32(DI) - MOVL CX,36(DI) - MOVL R8,40(DI) - MOVL R9,44(DI) - MOVD X15,DX - MOVD X11,CX - MOVD X1,R8 - MOVD X6,R9 - PSHUFL $0X39,X15,X15 - PSHUFL $0X39,X11,X11 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X6,X6 - XORL 96(SI),DX - XORL 100(SI),CX - XORL 104(SI),R8 - XORL 108(SI),R9 - MOVL DX,96(DI) - MOVL CX,100(DI) - MOVL R8,104(DI) - MOVL R9,108(DI) - MOVD X15,DX - MOVD X11,CX - MOVD X1,R8 - MOVD X6,R9 - PSHUFL $0X39,X15,X15 - PSHUFL $0X39,X11,X11 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X6,X6 - XORL 160(SI),DX - XORL 164(SI),CX - XORL 168(SI),R8 - XORL 172(SI),R9 - MOVL DX,160(DI) - MOVL CX,164(DI) - MOVL R8,168(DI) - MOVL R9,172(DI) - MOVD X15,DX - MOVD X11,CX - MOVD X1,R8 - MOVD X6,R9 - XORL 224(SI),DX - XORL 228(SI),CX - XORL 232(SI),R8 - XORL 236(SI),R9 - MOVL DX,224(DI) - MOVL CX,228(DI) - MOVL R8,232(DI) - MOVL R9,236(DI) - PADDL 160(R12),X13 - PADDL 208(R12),X9 - PADDL 256(R12),X3 - PADDL 96(R12),X2 - MOVD X13,DX - MOVD X9,CX - MOVD X3,R8 - MOVD X2,R9 - PSHUFL $0X39,X13,X13 - PSHUFL $0X39,X9,X9 - PSHUFL $0X39,X3,X3 - PSHUFL $0X39,X2,X2 - XORL 48(SI),DX - XORL 52(SI),CX - XORL 56(SI),R8 - XORL 60(SI),R9 - MOVL DX,48(DI) - MOVL CX,52(DI) - MOVL R8,56(DI) - MOVL R9,60(DI) - MOVD X13,DX - MOVD X9,CX - MOVD X3,R8 - MOVD X2,R9 - PSHUFL $0X39,X13,X13 - PSHUFL $0X39,X9,X9 - PSHUFL $0X39,X3,X3 - PSHUFL $0X39,X2,X2 - XORL 112(SI),DX - XORL 116(SI),CX - XORL 120(SI),R8 - XORL 124(SI),R9 - MOVL DX,112(DI) - MOVL CX,116(DI) - MOVL R8,120(DI) - MOVL R9,124(DI) - MOVD X13,DX - MOVD X9,CX - MOVD X3,R8 - MOVD X2,R9 - PSHUFL $0X39,X13,X13 - PSHUFL $0X39,X9,X9 - PSHUFL $0X39,X3,X3 - PSHUFL $0X39,X2,X2 - XORL 176(SI),DX - XORL 180(SI),CX - XORL 184(SI),R8 - XORL 188(SI),R9 - MOVL DX,176(DI) - MOVL CX,180(DI) - MOVL R8,184(DI) - MOVL R9,188(DI) - MOVD X13,DX - MOVD X9,CX - MOVD X3,R8 - MOVD X2,R9 - XORL 240(SI),DX - XORL 244(SI),CX - XORL 248(SI),R8 - XORL 252(SI),R9 - MOVL DX,240(DI) - MOVL CX,244(DI) - MOVL R8,248(DI) - MOVL R9,252(DI) - MOVQ 352(R12),R9 - SUBQ $256,R9 - ADDQ $256,SI - ADDQ $256,DI - CMPQ R9,$256 - JAE BYTESATLEAST256 - CMPQ R9,$0 - JBE DONE - BYTESBETWEEN1AND255: - CMPQ R9,$64 - JAE NOCOPY - MOVQ DI,DX - LEAQ 360(R12),DI - MOVQ R9,CX +BYTESBETWEEN1AND255: + CMPQ R9, $0x40 + JAE NOCOPY + MOVQ DI, DX + LEAQ 360(R12), DI + MOVQ R9, CX REP; MOVSB - LEAQ 360(R12),DI - LEAQ 360(R12),SI - NOCOPY: - MOVQ R9,352(R12) - MOVOA 48(R12),X0 - MOVOA 0(R12),X1 - MOVOA 16(R12),X2 - MOVOA 32(R12),X3 - MOVOA X1,X4 - MOVQ $20,CX - MAINLOOP2: - PADDL X0,X4 - MOVOA X0,X5 - MOVOA X4,X6 - PSLLL $7,X4 - PSRLL $25,X6 - PXOR X4,X3 - PXOR X6,X3 - PADDL X3,X5 - MOVOA X3,X4 - MOVOA X5,X6 - PSLLL $9,X5 - PSRLL $23,X6 - PXOR X5,X2 - PSHUFL $0X93,X3,X3 - PXOR X6,X2 - PADDL X2,X4 - MOVOA X2,X5 - MOVOA X4,X6 - PSLLL $13,X4 - PSRLL $19,X6 - PXOR X4,X1 - PSHUFL $0X4E,X2,X2 - PXOR X6,X1 - PADDL X1,X5 - MOVOA X3,X4 - MOVOA X5,X6 - PSLLL $18,X5 - PSRLL $14,X6 - PXOR X5,X0 - PSHUFL $0X39,X1,X1 - PXOR X6,X0 - PADDL X0,X4 - MOVOA X0,X5 - MOVOA X4,X6 - PSLLL $7,X4 - PSRLL $25,X6 - PXOR X4,X1 - PXOR X6,X1 - PADDL X1,X5 - MOVOA X1,X4 - MOVOA X5,X6 - PSLLL $9,X5 - PSRLL $23,X6 - PXOR X5,X2 - PSHUFL $0X93,X1,X1 - PXOR X6,X2 - PADDL X2,X4 - MOVOA X2,X5 - MOVOA X4,X6 - PSLLL $13,X4 - PSRLL $19,X6 - PXOR X4,X3 - PSHUFL $0X4E,X2,X2 - PXOR X6,X3 - PADDL X3,X5 - MOVOA X1,X4 - MOVOA X5,X6 - PSLLL $18,X5 - PSRLL $14,X6 - PXOR X5,X0 - PSHUFL $0X39,X3,X3 - PXOR X6,X0 - PADDL X0,X4 - MOVOA X0,X5 - MOVOA X4,X6 - PSLLL $7,X4 - PSRLL $25,X6 - PXOR X4,X3 - PXOR X6,X3 - PADDL X3,X5 - MOVOA X3,X4 - MOVOA X5,X6 - PSLLL $9,X5 - PSRLL $23,X6 - PXOR X5,X2 - PSHUFL $0X93,X3,X3 - PXOR X6,X2 - PADDL X2,X4 - MOVOA X2,X5 - MOVOA X4,X6 - PSLLL $13,X4 - PSRLL $19,X6 - PXOR X4,X1 - PSHUFL $0X4E,X2,X2 - PXOR X6,X1 - PADDL X1,X5 - MOVOA X3,X4 - MOVOA X5,X6 - PSLLL $18,X5 - PSRLL $14,X6 - PXOR X5,X0 - PSHUFL $0X39,X1,X1 - PXOR X6,X0 - PADDL X0,X4 - MOVOA X0,X5 - MOVOA X4,X6 - PSLLL $7,X4 - PSRLL $25,X6 - PXOR X4,X1 - PXOR X6,X1 - PADDL X1,X5 - MOVOA X1,X4 - MOVOA X5,X6 - PSLLL $9,X5 - PSRLL $23,X6 - PXOR X5,X2 - PSHUFL $0X93,X1,X1 - PXOR X6,X2 - PADDL X2,X4 - MOVOA X2,X5 - MOVOA X4,X6 - PSLLL $13,X4 - PSRLL $19,X6 - PXOR X4,X3 - PSHUFL $0X4E,X2,X2 - PXOR X6,X3 - SUBQ $4,CX - PADDL X3,X5 - MOVOA X1,X4 - MOVOA X5,X6 - PSLLL $18,X5 - PXOR X7,X7 - PSRLL $14,X6 - PXOR X5,X0 - PSHUFL $0X39,X3,X3 - PXOR X6,X0 - JA MAINLOOP2 - PADDL 48(R12),X0 - PADDL 0(R12),X1 - PADDL 16(R12),X2 - PADDL 32(R12),X3 - MOVD X0,CX - MOVD X1,R8 - MOVD X2,R9 - MOVD X3,AX - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X2,X2 - PSHUFL $0X39,X3,X3 - XORL 0(SI),CX - XORL 48(SI),R8 - XORL 32(SI),R9 - XORL 16(SI),AX - MOVL CX,0(DI) - MOVL R8,48(DI) - MOVL R9,32(DI) - MOVL AX,16(DI) - MOVD X0,CX - MOVD X1,R8 - MOVD X2,R9 - MOVD X3,AX - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X2,X2 - PSHUFL $0X39,X3,X3 - XORL 20(SI),CX - XORL 4(SI),R8 - XORL 52(SI),R9 - XORL 36(SI),AX - MOVL CX,20(DI) - MOVL R8,4(DI) - MOVL R9,52(DI) - MOVL AX,36(DI) - MOVD X0,CX - MOVD X1,R8 - MOVD X2,R9 - MOVD X3,AX - PSHUFL $0X39,X0,X0 - PSHUFL $0X39,X1,X1 - PSHUFL $0X39,X2,X2 - PSHUFL $0X39,X3,X3 - XORL 40(SI),CX - XORL 24(SI),R8 - XORL 8(SI),R9 - XORL 56(SI),AX - MOVL CX,40(DI) - MOVL R8,24(DI) - MOVL R9,8(DI) - MOVL AX,56(DI) - MOVD X0,CX - MOVD X1,R8 - MOVD X2,R9 - MOVD X3,AX - XORL 60(SI),CX - XORL 44(SI),R8 - XORL 28(SI),R9 - XORL 12(SI),AX - MOVL CX,60(DI) - MOVL R8,44(DI) - MOVL R9,28(DI) - MOVL AX,12(DI) - MOVQ 352(R12),R9 - MOVL 16(R12),CX - MOVL 36 (R12),R8 - ADDQ $1,CX - SHLQ $32,R8 - ADDQ R8,CX - MOVQ CX,R8 - SHRQ $32,R8 - MOVL CX,16(R12) - MOVL R8, 36 (R12) - CMPQ R9,$64 - JA BYTESATLEAST65 - JAE BYTESATLEAST64 - MOVQ DI,SI - MOVQ DX,DI - MOVQ R9,CX + LEAQ 360(R12), DI + LEAQ 360(R12), SI + +NOCOPY: + MOVQ R9, 352(R12) + MOVOA 48(R12), X0 + MOVOA (R12), X1 + MOVOA 16(R12), X2 + MOVOA 32(R12), X3 + MOVOA X1, X4 + MOVQ $0x00000014, CX + +MAINLOOP2: + PADDL X0, X4 + MOVOA X0, X5 + MOVOA X4, X6 + PSLLL $0x07, X4 + PSRLL $0x19, X6 + PXOR X4, X3 + PXOR X6, X3 + PADDL X3, X5 + MOVOA X3, X4 + MOVOA X5, X6 + PSLLL $0x09, X5 + PSRLL $0x17, X6 + PXOR X5, X2 + PSHUFL $0x93, X3, X3 + PXOR X6, X2 + PADDL X2, X4 + MOVOA X2, X5 + MOVOA X4, X6 + PSLLL $0x0d, X4 + PSRLL $0x13, X6 + PXOR X4, X1 + PSHUFL $0x4e, X2, X2 + PXOR X6, X1 + PADDL X1, X5 + MOVOA X3, X4 + MOVOA X5, X6 + PSLLL $0x12, X5 + PSRLL $0x0e, X6 + PXOR X5, X0 + PSHUFL $0x39, X1, X1 + PXOR X6, X0 + PADDL X0, X4 + MOVOA X0, X5 + MOVOA X4, X6 + PSLLL $0x07, X4 + PSRLL $0x19, X6 + PXOR X4, X1 + PXOR X6, X1 + PADDL X1, X5 + MOVOA X1, X4 + MOVOA X5, X6 + PSLLL $0x09, X5 + PSRLL $0x17, X6 + PXOR X5, X2 + PSHUFL $0x93, X1, X1 + PXOR X6, X2 + PADDL X2, X4 + MOVOA X2, X5 + MOVOA X4, X6 + PSLLL $0x0d, X4 + PSRLL $0x13, X6 + PXOR X4, X3 + PSHUFL $0x4e, X2, X2 + PXOR X6, X3 + PADDL X3, X5 + MOVOA X1, X4 + MOVOA X5, X6 + PSLLL $0x12, X5 + PSRLL $0x0e, X6 + PXOR X5, X0 + PSHUFL $0x39, X3, X3 + PXOR X6, X0 + PADDL X0, X4 + MOVOA X0, X5 + MOVOA X4, X6 + PSLLL $0x07, X4 + PSRLL $0x19, X6 + PXOR X4, X3 + PXOR X6, X3 + PADDL X3, X5 + MOVOA X3, X4 + MOVOA X5, X6 + PSLLL $0x09, X5 + PSRLL $0x17, X6 + PXOR X5, X2 + PSHUFL $0x93, X3, X3 + PXOR X6, X2 + PADDL X2, X4 + MOVOA X2, X5 + MOVOA X4, X6 + PSLLL $0x0d, X4 + PSRLL $0x13, X6 + PXOR X4, X1 + PSHUFL $0x4e, X2, X2 + PXOR X6, X1 + PADDL X1, X5 + MOVOA X3, X4 + MOVOA X5, X6 + PSLLL $0x12, X5 + PSRLL $0x0e, X6 + PXOR X5, X0 + PSHUFL $0x39, X1, X1 + PXOR X6, X0 + PADDL X0, X4 + MOVOA X0, X5 + MOVOA X4, X6 + PSLLL $0x07, X4 + PSRLL $0x19, X6 + PXOR X4, X1 + PXOR X6, X1 + PADDL X1, X5 + MOVOA X1, X4 + MOVOA X5, X6 + PSLLL $0x09, X5 + PSRLL $0x17, X6 + PXOR X5, X2 + PSHUFL $0x93, X1, X1 + PXOR X6, X2 + PADDL X2, X4 + MOVOA X2, X5 + MOVOA X4, X6 + PSLLL $0x0d, X4 + PSRLL $0x13, X6 + PXOR X4, X3 + PSHUFL $0x4e, X2, X2 + PXOR X6, X3 + SUBQ $0x04, CX + PADDL X3, X5 + MOVOA X1, X4 + MOVOA X5, X6 + PSLLL $0x12, X5 + PXOR X7, X7 + PSRLL $0x0e, X6 + PXOR X5, X0 + PSHUFL $0x39, X3, X3 + PXOR X6, X0 + JA MAINLOOP2 + PADDL 48(R12), X0 + PADDL (R12), X1 + PADDL 16(R12), X2 + PADDL 32(R12), X3 + MOVD X0, CX + MOVD X1, R8 + MOVD X2, R9 + MOVD X3, AX + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X2, X2 + PSHUFL $0x39, X3, X3 + XORL (SI), CX + XORL 48(SI), R8 + XORL 32(SI), R9 + XORL 16(SI), AX + MOVL CX, (DI) + MOVL R8, 48(DI) + MOVL R9, 32(DI) + MOVL AX, 16(DI) + MOVD X0, CX + MOVD X1, R8 + MOVD X2, R9 + MOVD X3, AX + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X2, X2 + PSHUFL $0x39, X3, X3 + XORL 20(SI), CX + XORL 4(SI), R8 + XORL 52(SI), R9 + XORL 36(SI), AX + MOVL CX, 20(DI) + MOVL R8, 4(DI) + MOVL R9, 52(DI) + MOVL AX, 36(DI) + MOVD X0, CX + MOVD X1, R8 + MOVD X2, R9 + MOVD X3, AX + PSHUFL $0x39, X0, X0 + PSHUFL $0x39, X1, X1 + PSHUFL $0x39, X2, X2 + PSHUFL $0x39, X3, X3 + XORL 40(SI), CX + XORL 24(SI), R8 + XORL 8(SI), R9 + XORL 56(SI), AX + MOVL CX, 40(DI) + MOVL R8, 24(DI) + MOVL R9, 8(DI) + MOVL AX, 56(DI) + MOVD X0, CX + MOVD X1, R8 + MOVD X2, R9 + MOVD X3, AX + XORL 60(SI), CX + XORL 44(SI), R8 + XORL 28(SI), R9 + XORL 12(SI), AX + MOVL CX, 60(DI) + MOVL R8, 44(DI) + MOVL R9, 28(DI) + MOVL AX, 12(DI) + MOVQ 352(R12), R9 + MOVL 16(R12), CX + MOVL 36(R12), R8 + ADDQ $0x01, CX + SHLQ $0x20, R8 + ADDQ R8, CX + MOVQ CX, R8 + SHRQ $0x20, R8 + MOVL CX, 16(R12) + MOVL R8, 36(R12) + CMPQ R9, $0x40 + JA BYTESATLEAST65 + JAE BYTESATLEAST64 + MOVQ DI, SI + MOVQ DX, DI + MOVQ R9, CX REP; MOVSB - BYTESATLEAST64: - DONE: + +BYTESATLEAST64: +DONE: RET - BYTESATLEAST65: - SUBQ $64,R9 - ADDQ $64,DI - ADDQ $64,SI - JMP BYTESBETWEEN1AND255 + +BYTESATLEAST65: + SUBQ $0x40, R9 + ADDQ $0x40, DI + ADDQ $0x40, SI + JMP BYTESBETWEEN1AND255 diff --git a/vendor/golang.org/x/crypto/salsa20/salsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa20.go index 8f4f896c..e75c9342 100644 --- a/vendor/golang.org/x/crypto/salsa20/salsa20.go +++ b/vendor/golang.org/x/crypto/salsa20/salsa20.go @@ -19,7 +19,7 @@ This package also implements XSalsa20: a version of Salsa20 with a 24-byte nonce as specified in https://cr.yp.to/snuffle/xsalsa-20081128.pdf. Simply passing a 24-byte slice as the nonce triggers XSalsa20. */ -package salsa20 // import "golang.org/x/crypto/salsa20" +package salsa20 // TODO(agl): implement XORKeyStream12 and XORKeyStream8 - the reduced round variants of Salsa20. diff --git a/vendor/golang.org/x/crypto/sha3/doc.go b/vendor/golang.org/x/crypto/sha3/doc.go index decd8cf9..7e023090 100644 --- a/vendor/golang.org/x/crypto/sha3/doc.go +++ b/vendor/golang.org/x/crypto/sha3/doc.go @@ -59,4 +59,4 @@ // They produce output of the same length, with the same security strengths // against all attacks. This means, in particular, that SHA3-256 only has // 128-bit collision resistance, because its output length is 32 bytes. -package sha3 // import "golang.org/x/crypto/sha3" +package sha3 diff --git a/vendor/golang.org/x/crypto/sha3/hashes.go b/vendor/golang.org/x/crypto/sha3/hashes.go index 0d8043fd..c544b29e 100644 --- a/vendor/golang.org/x/crypto/sha3/hashes.go +++ b/vendor/golang.org/x/crypto/sha3/hashes.go @@ -9,6 +9,7 @@ package sha3 // bytes. import ( + "crypto" "hash" ) @@ -16,39 +17,50 @@ import ( // Its generic security strength is 224 bits against preimage attacks, // and 112 bits against collision attacks. func New224() hash.Hash { - if h := new224Asm(); h != nil { - return h - } - return &state{rate: 144, outputLen: 28, dsbyte: 0x06} + return new224() } // New256 creates a new SHA3-256 hash. // Its generic security strength is 256 bits against preimage attacks, // and 128 bits against collision attacks. func New256() hash.Hash { - if h := new256Asm(); h != nil { - return h - } - return &state{rate: 136, outputLen: 32, dsbyte: 0x06} + return new256() } // New384 creates a new SHA3-384 hash. // Its generic security strength is 384 bits against preimage attacks, // and 192 bits against collision attacks. func New384() hash.Hash { - if h := new384Asm(); h != nil { - return h - } - return &state{rate: 104, outputLen: 48, dsbyte: 0x06} + return new384() } // New512 creates a new SHA3-512 hash. // Its generic security strength is 512 bits against preimage attacks, // and 256 bits against collision attacks. func New512() hash.Hash { - if h := new512Asm(); h != nil { - return h - } + return new512() +} + +func init() { + crypto.RegisterHash(crypto.SHA3_224, New224) + crypto.RegisterHash(crypto.SHA3_256, New256) + crypto.RegisterHash(crypto.SHA3_384, New384) + crypto.RegisterHash(crypto.SHA3_512, New512) +} + +func new224Generic() *state { + return &state{rate: 144, outputLen: 28, dsbyte: 0x06} +} + +func new256Generic() *state { + return &state{rate: 136, outputLen: 32, dsbyte: 0x06} +} + +func new384Generic() *state { + return &state{rate: 104, outputLen: 48, dsbyte: 0x06} +} + +func new512Generic() *state { return &state{rate: 72, outputLen: 64, dsbyte: 0x06} } diff --git a/vendor/golang.org/x/crypto/sha3/hashes_generic.go b/vendor/golang.org/x/crypto/sha3/hashes_generic.go deleted file mode 100644 index fe8c8479..00000000 --- a/vendor/golang.org/x/crypto/sha3/hashes_generic.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !gc || purego || !s390x - -package sha3 - -import ( - "hash" -) - -// new224Asm returns an assembly implementation of SHA3-224 if available, -// otherwise it returns nil. -func new224Asm() hash.Hash { return nil } - -// new256Asm returns an assembly implementation of SHA3-256 if available, -// otherwise it returns nil. -func new256Asm() hash.Hash { return nil } - -// new384Asm returns an assembly implementation of SHA3-384 if available, -// otherwise it returns nil. -func new384Asm() hash.Hash { return nil } - -// new512Asm returns an assembly implementation of SHA3-512 if available, -// otherwise it returns nil. -func new512Asm() hash.Hash { return nil } diff --git a/vendor/golang.org/x/crypto/sha3/hashes_noasm.go b/vendor/golang.org/x/crypto/sha3/hashes_noasm.go new file mode 100644 index 00000000..9d85fb62 --- /dev/null +++ b/vendor/golang.org/x/crypto/sha3/hashes_noasm.go @@ -0,0 +1,23 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !gc || purego || !s390x + +package sha3 + +func new224() *state { + return new224Generic() +} + +func new256() *state { + return new256Generic() +} + +func new384() *state { + return new384Generic() +} + +func new512() *state { + return new512Generic() +} diff --git a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s b/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s index 1f539388..99e2f16e 100644 --- a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s +++ b/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s @@ -1,390 +1,5419 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run keccakf_amd64_asm.go -out ../keccakf_amd64.s -pkg sha3. DO NOT EDIT. //go:build amd64 && !purego && gc -// This code was translated into a form compatible with 6a from the public -// domain sources at https://github.com/gvanas/KeccakCodePackage - -// Offsets in state -#define _ba (0*8) -#define _be (1*8) -#define _bi (2*8) -#define _bo (3*8) -#define _bu (4*8) -#define _ga (5*8) -#define _ge (6*8) -#define _gi (7*8) -#define _go (8*8) -#define _gu (9*8) -#define _ka (10*8) -#define _ke (11*8) -#define _ki (12*8) -#define _ko (13*8) -#define _ku (14*8) -#define _ma (15*8) -#define _me (16*8) -#define _mi (17*8) -#define _mo (18*8) -#define _mu (19*8) -#define _sa (20*8) -#define _se (21*8) -#define _si (22*8) -#define _so (23*8) -#define _su (24*8) - -// Temporary registers -#define rT1 AX - -// Round vars -#define rpState DI -#define rpStack SP - -#define rDa BX -#define rDe CX -#define rDi DX -#define rDo R8 -#define rDu R9 - -#define rBa R10 -#define rBe R11 -#define rBi R12 -#define rBo R13 -#define rBu R14 - -#define rCa SI -#define rCe BP -#define rCi rBi -#define rCo rBo -#define rCu R15 - -#define MOVQ_RBI_RCE MOVQ rBi, rCe -#define XORQ_RT1_RCA XORQ rT1, rCa -#define XORQ_RT1_RCE XORQ rT1, rCe -#define XORQ_RBA_RCU XORQ rBa, rCu -#define XORQ_RBE_RCU XORQ rBe, rCu -#define XORQ_RDU_RCU XORQ rDu, rCu -#define XORQ_RDA_RCA XORQ rDa, rCa -#define XORQ_RDE_RCE XORQ rDe, rCe - -#define mKeccakRound(iState, oState, rc, B_RBI_RCE, G_RT1_RCA, G_RT1_RCE, G_RBA_RCU, K_RT1_RCA, K_RT1_RCE, K_RBA_RCU, M_RT1_RCA, M_RT1_RCE, M_RBE_RCU, S_RDU_RCU, S_RDA_RCA, S_RDE_RCE) \ - /* Prepare round */ \ - MOVQ rCe, rDa; \ - ROLQ $1, rDa; \ - \ - MOVQ _bi(iState), rCi; \ - XORQ _gi(iState), rDi; \ - XORQ rCu, rDa; \ - XORQ _ki(iState), rCi; \ - XORQ _mi(iState), rDi; \ - XORQ rDi, rCi; \ - \ - MOVQ rCi, rDe; \ - ROLQ $1, rDe; \ - \ - MOVQ _bo(iState), rCo; \ - XORQ _go(iState), rDo; \ - XORQ rCa, rDe; \ - XORQ _ko(iState), rCo; \ - XORQ _mo(iState), rDo; \ - XORQ rDo, rCo; \ - \ - MOVQ rCo, rDi; \ - ROLQ $1, rDi; \ - \ - MOVQ rCu, rDo; \ - XORQ rCe, rDi; \ - ROLQ $1, rDo; \ - \ - MOVQ rCa, rDu; \ - XORQ rCi, rDo; \ - ROLQ $1, rDu; \ - \ - /* Result b */ \ - MOVQ _ba(iState), rBa; \ - MOVQ _ge(iState), rBe; \ - XORQ rCo, rDu; \ - MOVQ _ki(iState), rBi; \ - MOVQ _mo(iState), rBo; \ - MOVQ _su(iState), rBu; \ - XORQ rDe, rBe; \ - ROLQ $44, rBe; \ - XORQ rDi, rBi; \ - XORQ rDa, rBa; \ - ROLQ $43, rBi; \ - \ - MOVQ rBe, rCa; \ - MOVQ rc, rT1; \ - ORQ rBi, rCa; \ - XORQ rBa, rT1; \ - XORQ rT1, rCa; \ - MOVQ rCa, _ba(oState); \ - \ - XORQ rDu, rBu; \ - ROLQ $14, rBu; \ - MOVQ rBa, rCu; \ - ANDQ rBe, rCu; \ - XORQ rBu, rCu; \ - MOVQ rCu, _bu(oState); \ - \ - XORQ rDo, rBo; \ - ROLQ $21, rBo; \ - MOVQ rBo, rT1; \ - ANDQ rBu, rT1; \ - XORQ rBi, rT1; \ - MOVQ rT1, _bi(oState); \ - \ - NOTQ rBi; \ - ORQ rBa, rBu; \ - ORQ rBo, rBi; \ - XORQ rBo, rBu; \ - XORQ rBe, rBi; \ - MOVQ rBu, _bo(oState); \ - MOVQ rBi, _be(oState); \ - B_RBI_RCE; \ - \ - /* Result g */ \ - MOVQ _gu(iState), rBe; \ - XORQ rDu, rBe; \ - MOVQ _ka(iState), rBi; \ - ROLQ $20, rBe; \ - XORQ rDa, rBi; \ - ROLQ $3, rBi; \ - MOVQ _bo(iState), rBa; \ - MOVQ rBe, rT1; \ - ORQ rBi, rT1; \ - XORQ rDo, rBa; \ - MOVQ _me(iState), rBo; \ - MOVQ _si(iState), rBu; \ - ROLQ $28, rBa; \ - XORQ rBa, rT1; \ - MOVQ rT1, _ga(oState); \ - G_RT1_RCA; \ - \ - XORQ rDe, rBo; \ - ROLQ $45, rBo; \ - MOVQ rBi, rT1; \ - ANDQ rBo, rT1; \ - XORQ rBe, rT1; \ - MOVQ rT1, _ge(oState); \ - G_RT1_RCE; \ - \ - XORQ rDi, rBu; \ - ROLQ $61, rBu; \ - MOVQ rBu, rT1; \ - ORQ rBa, rT1; \ - XORQ rBo, rT1; \ - MOVQ rT1, _go(oState); \ - \ - ANDQ rBe, rBa; \ - XORQ rBu, rBa; \ - MOVQ rBa, _gu(oState); \ - NOTQ rBu; \ - G_RBA_RCU; \ - \ - ORQ rBu, rBo; \ - XORQ rBi, rBo; \ - MOVQ rBo, _gi(oState); \ - \ - /* Result k */ \ - MOVQ _be(iState), rBa; \ - MOVQ _gi(iState), rBe; \ - MOVQ _ko(iState), rBi; \ - MOVQ _mu(iState), rBo; \ - MOVQ _sa(iState), rBu; \ - XORQ rDi, rBe; \ - ROLQ $6, rBe; \ - XORQ rDo, rBi; \ - ROLQ $25, rBi; \ - MOVQ rBe, rT1; \ - ORQ rBi, rT1; \ - XORQ rDe, rBa; \ - ROLQ $1, rBa; \ - XORQ rBa, rT1; \ - MOVQ rT1, _ka(oState); \ - K_RT1_RCA; \ - \ - XORQ rDu, rBo; \ - ROLQ $8, rBo; \ - MOVQ rBi, rT1; \ - ANDQ rBo, rT1; \ - XORQ rBe, rT1; \ - MOVQ rT1, _ke(oState); \ - K_RT1_RCE; \ - \ - XORQ rDa, rBu; \ - ROLQ $18, rBu; \ - NOTQ rBo; \ - MOVQ rBo, rT1; \ - ANDQ rBu, rT1; \ - XORQ rBi, rT1; \ - MOVQ rT1, _ki(oState); \ - \ - MOVQ rBu, rT1; \ - ORQ rBa, rT1; \ - XORQ rBo, rT1; \ - MOVQ rT1, _ko(oState); \ - \ - ANDQ rBe, rBa; \ - XORQ rBu, rBa; \ - MOVQ rBa, _ku(oState); \ - K_RBA_RCU; \ - \ - /* Result m */ \ - MOVQ _ga(iState), rBe; \ - XORQ rDa, rBe; \ - MOVQ _ke(iState), rBi; \ - ROLQ $36, rBe; \ - XORQ rDe, rBi; \ - MOVQ _bu(iState), rBa; \ - ROLQ $10, rBi; \ - MOVQ rBe, rT1; \ - MOVQ _mi(iState), rBo; \ - ANDQ rBi, rT1; \ - XORQ rDu, rBa; \ - MOVQ _so(iState), rBu; \ - ROLQ $27, rBa; \ - XORQ rBa, rT1; \ - MOVQ rT1, _ma(oState); \ - M_RT1_RCA; \ - \ - XORQ rDi, rBo; \ - ROLQ $15, rBo; \ - MOVQ rBi, rT1; \ - ORQ rBo, rT1; \ - XORQ rBe, rT1; \ - MOVQ rT1, _me(oState); \ - M_RT1_RCE; \ - \ - XORQ rDo, rBu; \ - ROLQ $56, rBu; \ - NOTQ rBo; \ - MOVQ rBo, rT1; \ - ORQ rBu, rT1; \ - XORQ rBi, rT1; \ - MOVQ rT1, _mi(oState); \ - \ - ORQ rBa, rBe; \ - XORQ rBu, rBe; \ - MOVQ rBe, _mu(oState); \ - \ - ANDQ rBa, rBu; \ - XORQ rBo, rBu; \ - MOVQ rBu, _mo(oState); \ - M_RBE_RCU; \ - \ - /* Result s */ \ - MOVQ _bi(iState), rBa; \ - MOVQ _go(iState), rBe; \ - MOVQ _ku(iState), rBi; \ - XORQ rDi, rBa; \ - MOVQ _ma(iState), rBo; \ - ROLQ $62, rBa; \ - XORQ rDo, rBe; \ - MOVQ _se(iState), rBu; \ - ROLQ $55, rBe; \ - \ - XORQ rDu, rBi; \ - MOVQ rBa, rDu; \ - XORQ rDe, rBu; \ - ROLQ $2, rBu; \ - ANDQ rBe, rDu; \ - XORQ rBu, rDu; \ - MOVQ rDu, _su(oState); \ - \ - ROLQ $39, rBi; \ - S_RDU_RCU; \ - NOTQ rBe; \ - XORQ rDa, rBo; \ - MOVQ rBe, rDa; \ - ANDQ rBi, rDa; \ - XORQ rBa, rDa; \ - MOVQ rDa, _sa(oState); \ - S_RDA_RCA; \ - \ - ROLQ $41, rBo; \ - MOVQ rBi, rDe; \ - ORQ rBo, rDe; \ - XORQ rBe, rDe; \ - MOVQ rDe, _se(oState); \ - S_RDE_RCE; \ - \ - MOVQ rBo, rDi; \ - MOVQ rBu, rDo; \ - ANDQ rBu, rDi; \ - ORQ rBa, rDo; \ - XORQ rBi, rDi; \ - XORQ rBo, rDo; \ - MOVQ rDi, _si(oState); \ - MOVQ rDo, _so(oState) \ - // func keccakF1600(a *[25]uint64) -TEXT ·keccakF1600(SB), 0, $200-8 - MOVQ a+0(FP), rpState +TEXT ·keccakF1600(SB), $200-8 + MOVQ a+0(FP), DI // Convert the user state into an internal state - NOTQ _be(rpState) - NOTQ _bi(rpState) - NOTQ _go(rpState) - NOTQ _ki(rpState) - NOTQ _mi(rpState) - NOTQ _sa(rpState) + NOTQ 8(DI) + NOTQ 16(DI) + NOTQ 64(DI) + NOTQ 96(DI) + NOTQ 136(DI) + NOTQ 160(DI) // Execute the KeccakF permutation - MOVQ _ba(rpState), rCa - MOVQ _be(rpState), rCe - MOVQ _bu(rpState), rCu - - XORQ _ga(rpState), rCa - XORQ _ge(rpState), rCe - XORQ _gu(rpState), rCu - - XORQ _ka(rpState), rCa - XORQ _ke(rpState), rCe - XORQ _ku(rpState), rCu - - XORQ _ma(rpState), rCa - XORQ _me(rpState), rCe - XORQ _mu(rpState), rCu - - XORQ _sa(rpState), rCa - XORQ _se(rpState), rCe - MOVQ _si(rpState), rDi - MOVQ _so(rpState), rDo - XORQ _su(rpState), rCu - - mKeccakRound(rpState, rpStack, $0x0000000000000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x0000000000008082, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x800000000000808a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000080008000, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x000000000000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000000008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x000000000000008a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x0000000000000088, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x0000000080008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x000000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x000000008000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x800000000000008b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x8000000000008089, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000000008003, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x8000000000008002, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000000000080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x000000000000800a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x800000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000000008080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000080008008, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP) + MOVQ (DI), SI + MOVQ 8(DI), BP + MOVQ 32(DI), R15 + XORQ 40(DI), SI + XORQ 48(DI), BP + XORQ 72(DI), R15 + XORQ 80(DI), SI + XORQ 88(DI), BP + XORQ 112(DI), R15 + XORQ 120(DI), SI + XORQ 128(DI), BP + XORQ 152(DI), R15 + XORQ 160(DI), SI + XORQ 168(DI), BP + MOVQ 176(DI), DX + MOVQ 184(DI), R8 + XORQ 192(DI), R15 - // Revert the internal state to the user state - NOTQ _be(rpState) - NOTQ _bi(rpState) - NOTQ _go(rpState) - NOTQ _ki(rpState) - NOTQ _mi(rpState) - NOTQ _sa(rpState) + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000008082, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000000000808a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008000, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000808b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008081, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008009, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000008a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000000088, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080008009, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000008000000a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000008000808b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000000000008b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008089, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008003, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008002, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000000080, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000800a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000008000000a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008081, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008080, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008008, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + NOP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + NOP + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + NOP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + NOP + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + NOP + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + NOP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + NOP + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + NOP + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + NOP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + NOP + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + NOP + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + NOP + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + NOP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Revert the internal state to the user state + NOTQ 8(DI) + NOTQ 16(DI) + NOTQ 64(DI) + NOTQ 96(DI) + NOTQ 136(DI) + NOTQ 160(DI) RET diff --git a/vendor/golang.org/x/crypto/sha3/register.go b/vendor/golang.org/x/crypto/sha3/register.go deleted file mode 100644 index addfd504..00000000 --- a/vendor/golang.org/x/crypto/sha3/register.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.4 - -package sha3 - -import ( - "crypto" -) - -func init() { - crypto.RegisterHash(crypto.SHA3_224, New224) - crypto.RegisterHash(crypto.SHA3_256, New256) - crypto.RegisterHash(crypto.SHA3_384, New384) - crypto.RegisterHash(crypto.SHA3_512, New512) -} diff --git a/vendor/golang.org/x/crypto/sha3/sha3.go b/vendor/golang.org/x/crypto/sha3/sha3.go index 4884d172..afedde5a 100644 --- a/vendor/golang.org/x/crypto/sha3/sha3.go +++ b/vendor/golang.org/x/crypto/sha3/sha3.go @@ -23,7 +23,6 @@ const ( type state struct { // Generic sponge components. a [25]uint64 // main state of the hash - buf []byte // points into storage rate int // the number of bytes of state to use // dsbyte contains the "domain separation" bits and the first bit of @@ -40,7 +39,8 @@ type state struct { // Extendable-Output Functions (May 2014)" dsbyte byte - storage storageBuf + i, n int // storage[i:n] is the buffer, i is only used while squeezing + storage [maxRate]byte // Specific to SHA-3 and SHAKE. outputLen int // the default output size in bytes @@ -54,24 +54,18 @@ func (d *state) BlockSize() int { return d.rate } func (d *state) Size() int { return d.outputLen } // Reset clears the internal state by zeroing the sponge state and -// the byte buffer, and setting Sponge.state to absorbing. +// the buffer indexes, and setting Sponge.state to absorbing. func (d *state) Reset() { // Zero the permutation's state. for i := range d.a { d.a[i] = 0 } d.state = spongeAbsorbing - d.buf = d.storage.asBytes()[:0] + d.i, d.n = 0, 0 } func (d *state) clone() *state { ret := *d - if ret.state == spongeAbsorbing { - ret.buf = ret.storage.asBytes()[:len(ret.buf)] - } else { - ret.buf = ret.storage.asBytes()[d.rate-cap(d.buf) : d.rate] - } - return &ret } @@ -82,43 +76,40 @@ func (d *state) permute() { case spongeAbsorbing: // If we're absorbing, we need to xor the input into the state // before applying the permutation. - xorIn(d, d.buf) - d.buf = d.storage.asBytes()[:0] + xorIn(d, d.storage[:d.rate]) + d.n = 0 keccakF1600(&d.a) case spongeSqueezing: // If we're squeezing, we need to apply the permutation before // copying more output. keccakF1600(&d.a) - d.buf = d.storage.asBytes()[:d.rate] - copyOut(d, d.buf) + d.i = 0 + copyOut(d, d.storage[:d.rate]) } } // pads appends the domain separation bits in dsbyte, applies // the multi-bitrate 10..1 padding rule, and permutes the state. -func (d *state) padAndPermute(dsbyte byte) { - if d.buf == nil { - d.buf = d.storage.asBytes()[:0] - } +func (d *state) padAndPermute() { // Pad with this instance's domain-separator bits. We know that there's // at least one byte of space in d.buf because, if it were full, // permute would have been called to empty it. dsbyte also contains the // first one bit for the padding. See the comment in the state struct. - d.buf = append(d.buf, dsbyte) - zerosStart := len(d.buf) - d.buf = d.storage.asBytes()[:d.rate] - for i := zerosStart; i < d.rate; i++ { - d.buf[i] = 0 + d.storage[d.n] = d.dsbyte + d.n++ + for d.n < d.rate { + d.storage[d.n] = 0 + d.n++ } // This adds the final one bit for the padding. Because of the way that // bits are numbered from the LSB upwards, the final bit is the MSB of // the last byte. - d.buf[d.rate-1] ^= 0x80 + d.storage[d.rate-1] ^= 0x80 // Apply the permutation d.permute() d.state = spongeSqueezing - d.buf = d.storage.asBytes()[:d.rate] - copyOut(d, d.buf) + d.n = d.rate + copyOut(d, d.storage[:d.rate]) } // Write absorbs more data into the hash's state. It panics if any @@ -127,28 +118,25 @@ func (d *state) Write(p []byte) (written int, err error) { if d.state != spongeAbsorbing { panic("sha3: Write after Read") } - if d.buf == nil { - d.buf = d.storage.asBytes()[:0] - } written = len(p) for len(p) > 0 { - if len(d.buf) == 0 && len(p) >= d.rate { + if d.n == 0 && len(p) >= d.rate { // The fast path; absorb a full "rate" bytes of input and apply the permutation. xorIn(d, p[:d.rate]) p = p[d.rate:] keccakF1600(&d.a) } else { // The slow path; buffer the input until we can fill the sponge, and then xor it in. - todo := d.rate - len(d.buf) + todo := d.rate - d.n if todo > len(p) { todo = len(p) } - d.buf = append(d.buf, p[:todo]...) + d.n += copy(d.storage[d.n:], p[:todo]) p = p[todo:] // If the sponge is full, apply the permutation. - if len(d.buf) == d.rate { + if d.n == d.rate { d.permute() } } @@ -161,19 +149,19 @@ func (d *state) Write(p []byte) (written int, err error) { func (d *state) Read(out []byte) (n int, err error) { // If we're still absorbing, pad and apply the permutation. if d.state == spongeAbsorbing { - d.padAndPermute(d.dsbyte) + d.padAndPermute() } n = len(out) // Now, do the squeezing. for len(out) > 0 { - n := copy(out, d.buf) - d.buf = d.buf[n:] + n := copy(out, d.storage[d.i:d.n]) + d.i += n out = out[n:] // Apply the permutation if we've squeezed the sponge dry. - if len(d.buf) == 0 { + if d.i == d.rate { d.permute() } } diff --git a/vendor/golang.org/x/crypto/sha3/sha3_s390x.go b/vendor/golang.org/x/crypto/sha3/sha3_s390x.go index d861bca5..00d8034a 100644 --- a/vendor/golang.org/x/crypto/sha3/sha3_s390x.go +++ b/vendor/golang.org/x/crypto/sha3/sha3_s390x.go @@ -143,6 +143,12 @@ func (s *asmState) Write(b []byte) (int, error) { // Read squeezes an arbitrary number of bytes from the sponge. func (s *asmState) Read(out []byte) (n int, err error) { + // The 'compute last message digest' instruction only stores the digest + // at the first operand (dst) for SHAKE functions. + if s.function != shake_128 && s.function != shake_256 { + panic("sha3: can only call Read for SHAKE functions") + } + n = len(out) // need to pad if we were absorbing @@ -202,8 +208,17 @@ func (s *asmState) Sum(b []byte) []byte { // Hash the buffer. Note that we don't clear it because we // aren't updating the state. - klmd(s.function, &a, nil, s.buf) - return append(b, a[:s.outputLen]...) + switch s.function { + case sha3_224, sha3_256, sha3_384, sha3_512: + klmd(s.function, &a, nil, s.buf) + return append(b, a[:s.outputLen]...) + case shake_128, shake_256: + d := make([]byte, s.outputLen, 64) + klmd(s.function, &a, d, s.buf) + return append(b, d[:s.outputLen]...) + default: + panic("sha3: unknown function") + } } // Reset resets the Hash to its initial state. @@ -233,56 +248,56 @@ func (s *asmState) Clone() ShakeHash { return s.clone() } -// new224Asm returns an assembly implementation of SHA3-224 if available, -// otherwise it returns nil. -func new224Asm() hash.Hash { +// new224 returns an assembly implementation of SHA3-224 if available, +// otherwise it returns a generic implementation. +func new224() hash.Hash { if cpu.S390X.HasSHA3 { return newAsmState(sha3_224) } - return nil + return new224Generic() } -// new256Asm returns an assembly implementation of SHA3-256 if available, -// otherwise it returns nil. -func new256Asm() hash.Hash { +// new256 returns an assembly implementation of SHA3-256 if available, +// otherwise it returns a generic implementation. +func new256() hash.Hash { if cpu.S390X.HasSHA3 { return newAsmState(sha3_256) } - return nil + return new256Generic() } -// new384Asm returns an assembly implementation of SHA3-384 if available, -// otherwise it returns nil. -func new384Asm() hash.Hash { +// new384 returns an assembly implementation of SHA3-384 if available, +// otherwise it returns a generic implementation. +func new384() hash.Hash { if cpu.S390X.HasSHA3 { return newAsmState(sha3_384) } - return nil + return new384Generic() } -// new512Asm returns an assembly implementation of SHA3-512 if available, -// otherwise it returns nil. -func new512Asm() hash.Hash { +// new512 returns an assembly implementation of SHA3-512 if available, +// otherwise it returns a generic implementation. +func new512() hash.Hash { if cpu.S390X.HasSHA3 { return newAsmState(sha3_512) } - return nil + return new512Generic() } -// newShake128Asm returns an assembly implementation of SHAKE-128 if available, -// otherwise it returns nil. -func newShake128Asm() ShakeHash { +// newShake128 returns an assembly implementation of SHAKE-128 if available, +// otherwise it returns a generic implementation. +func newShake128() ShakeHash { if cpu.S390X.HasSHA3 { return newAsmState(shake_128) } - return nil + return newShake128Generic() } -// newShake256Asm returns an assembly implementation of SHAKE-256 if available, -// otherwise it returns nil. -func newShake256Asm() ShakeHash { +// newShake256 returns an assembly implementation of SHAKE-256 if available, +// otherwise it returns a generic implementation. +func newShake256() ShakeHash { if cpu.S390X.HasSHA3 { return newAsmState(shake_256) } - return nil + return newShake256Generic() } diff --git a/vendor/golang.org/x/crypto/sha3/shake.go b/vendor/golang.org/x/crypto/sha3/shake.go index bb699840..a01ef435 100644 --- a/vendor/golang.org/x/crypto/sha3/shake.go +++ b/vendor/golang.org/x/crypto/sha3/shake.go @@ -85,9 +85,9 @@ func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) ShakeHash { // leftEncode returns max 9 bytes c.initBlock = make([]byte, 0, 9*2+len(N)+len(S)) - c.initBlock = append(c.initBlock, leftEncode(uint64(len(N)*8))...) + c.initBlock = append(c.initBlock, leftEncode(uint64(len(N))*8)...) c.initBlock = append(c.initBlock, N...) - c.initBlock = append(c.initBlock, leftEncode(uint64(len(S)*8))...) + c.initBlock = append(c.initBlock, leftEncode(uint64(len(S))*8)...) c.initBlock = append(c.initBlock, S...) c.Write(bytepad(c.initBlock, c.rate)) return &c @@ -115,19 +115,21 @@ func (c *state) Clone() ShakeHash { // Its generic security strength is 128 bits against all attacks if at // least 32 bytes of its output are used. func NewShake128() ShakeHash { - if h := newShake128Asm(); h != nil { - return h - } - return &state{rate: rate128, outputLen: 32, dsbyte: dsbyteShake} + return newShake128() } // NewShake256 creates a new SHAKE256 variable-output-length ShakeHash. // Its generic security strength is 256 bits against all attacks if // at least 64 bytes of its output are used. func NewShake256() ShakeHash { - if h := newShake256Asm(); h != nil { - return h - } + return newShake256() +} + +func newShake128Generic() *state { + return &state{rate: rate128, outputLen: 32, dsbyte: dsbyteShake} +} + +func newShake256Generic() *state { return &state{rate: rate256, outputLen: 64, dsbyte: dsbyteShake} } diff --git a/vendor/golang.org/x/crypto/sha3/shake_generic.go b/vendor/golang.org/x/crypto/sha3/shake_generic.go deleted file mode 100644 index 8d31cf5b..00000000 --- a/vendor/golang.org/x/crypto/sha3/shake_generic.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !gc || purego || !s390x - -package sha3 - -// newShake128Asm returns an assembly implementation of SHAKE-128 if available, -// otherwise it returns nil. -func newShake128Asm() ShakeHash { - return nil -} - -// newShake256Asm returns an assembly implementation of SHAKE-256 if available, -// otherwise it returns nil. -func newShake256Asm() ShakeHash { - return nil -} diff --git a/vendor/golang.org/x/crypto/sha3/shake_noasm.go b/vendor/golang.org/x/crypto/sha3/shake_noasm.go new file mode 100644 index 00000000..4276ba4a --- /dev/null +++ b/vendor/golang.org/x/crypto/sha3/shake_noasm.go @@ -0,0 +1,15 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !gc || purego || !s390x + +package sha3 + +func newShake128() *state { + return newShake128Generic() +} + +func newShake256() *state { + return newShake256Generic() +} diff --git a/vendor/golang.org/x/crypto/sha3/xor.go b/vendor/golang.org/x/crypto/sha3/xor.go index 7337cca8..6ada5c95 100644 --- a/vendor/golang.org/x/crypto/sha3/xor.go +++ b/vendor/golang.org/x/crypto/sha3/xor.go @@ -2,22 +2,39 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build (!amd64 && !386 && !ppc64le) || purego - package sha3 -// A storageBuf is an aligned array of maxRate bytes. -type storageBuf [maxRate]byte - -func (b *storageBuf) asBytes() *[maxRate]byte { - return (*[maxRate]byte)(b) -} +import ( + "crypto/subtle" + "encoding/binary" + "unsafe" -var ( - xorIn = xorInGeneric - copyOut = copyOutGeneric - xorInUnaligned = xorInGeneric - copyOutUnaligned = copyOutGeneric + "golang.org/x/sys/cpu" ) -const xorImplementationUnaligned = "generic" +// xorIn xors the bytes in buf into the state. +func xorIn(d *state, buf []byte) { + if cpu.IsBigEndian { + for i := 0; len(buf) >= 8; i++ { + a := binary.LittleEndian.Uint64(buf) + d.a[i] ^= a + buf = buf[8:] + } + } else { + ab := (*[25 * 64 / 8]byte)(unsafe.Pointer(&d.a)) + subtle.XORBytes(ab[:], ab[:], buf) + } +} + +// copyOut copies uint64s to a byte buffer. +func copyOut(d *state, b []byte) { + if cpu.IsBigEndian { + for i := 0; len(b) >= 8; i++ { + binary.LittleEndian.PutUint64(b, d.a[i]) + b = b[8:] + } + } else { + ab := (*[25 * 64 / 8]byte)(unsafe.Pointer(&d.a)) + copy(b, ab[:]) + } +} diff --git a/vendor/golang.org/x/crypto/sha3/xor_generic.go b/vendor/golang.org/x/crypto/sha3/xor_generic.go deleted file mode 100644 index 8d947711..00000000 --- a/vendor/golang.org/x/crypto/sha3/xor_generic.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package sha3 - -import "encoding/binary" - -// xorInGeneric xors the bytes in buf into the state; it -// makes no non-portable assumptions about memory layout -// or alignment. -func xorInGeneric(d *state, buf []byte) { - n := len(buf) / 8 - - for i := 0; i < n; i++ { - a := binary.LittleEndian.Uint64(buf) - d.a[i] ^= a - buf = buf[8:] - } -} - -// copyOutGeneric copies uint64s to a byte buffer. -func copyOutGeneric(d *state, b []byte) { - for i := 0; len(b) >= 8; i++ { - binary.LittleEndian.PutUint64(b, d.a[i]) - b = b[8:] - } -} diff --git a/vendor/golang.org/x/crypto/sha3/xor_unaligned.go b/vendor/golang.org/x/crypto/sha3/xor_unaligned.go deleted file mode 100644 index 870e2d16..00000000 --- a/vendor/golang.org/x/crypto/sha3/xor_unaligned.go +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build (amd64 || 386 || ppc64le) && !purego - -package sha3 - -import "unsafe" - -// A storageBuf is an aligned array of maxRate bytes. -type storageBuf [maxRate / 8]uint64 - -func (b *storageBuf) asBytes() *[maxRate]byte { - return (*[maxRate]byte)(unsafe.Pointer(b)) -} - -// xorInUnaligned uses unaligned reads and writes to update d.a to contain d.a -// XOR buf. -func xorInUnaligned(d *state, buf []byte) { - n := len(buf) - bw := (*[maxRate / 8]uint64)(unsafe.Pointer(&buf[0]))[: n/8 : n/8] - if n >= 72 { - d.a[0] ^= bw[0] - d.a[1] ^= bw[1] - d.a[2] ^= bw[2] - d.a[3] ^= bw[3] - d.a[4] ^= bw[4] - d.a[5] ^= bw[5] - d.a[6] ^= bw[6] - d.a[7] ^= bw[7] - d.a[8] ^= bw[8] - } - if n >= 104 { - d.a[9] ^= bw[9] - d.a[10] ^= bw[10] - d.a[11] ^= bw[11] - d.a[12] ^= bw[12] - } - if n >= 136 { - d.a[13] ^= bw[13] - d.a[14] ^= bw[14] - d.a[15] ^= bw[15] - d.a[16] ^= bw[16] - } - if n >= 144 { - d.a[17] ^= bw[17] - } - if n >= 168 { - d.a[18] ^= bw[18] - d.a[19] ^= bw[19] - d.a[20] ^= bw[20] - } -} - -func copyOutUnaligned(d *state, buf []byte) { - ab := (*[maxRate]uint8)(unsafe.Pointer(&d.a[0])) - copy(buf, ab[:]) -} - -var ( - xorIn = xorInUnaligned - copyOut = copyOutUnaligned -) - -const xorImplementationUnaligned = "unaligned" diff --git a/vendor/golang.org/x/crypto/twofish/twofish.go b/vendor/golang.org/x/crypto/twofish/twofish.go index e4eeae17..6d0a3028 100644 --- a/vendor/golang.org/x/crypto/twofish/twofish.go +++ b/vendor/golang.org/x/crypto/twofish/twofish.go @@ -9,7 +9,7 @@ // implementation. Instead, use AES (from crypto/aes, if necessary in an AEAD // mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from // golang.org/x/crypto/chacha20poly1305). -package twofish // import "golang.org/x/crypto/twofish" +package twofish // Twofish is defined in https://www.schneier.com/paper-twofish-paper.pdf [TWOFISH] diff --git a/vendor/golang.org/x/crypto/xtea/cipher.go b/vendor/golang.org/x/crypto/xtea/cipher.go index a4c2fd02..7b4f8aaa 100644 --- a/vendor/golang.org/x/crypto/xtea/cipher.go +++ b/vendor/golang.org/x/crypto/xtea/cipher.go @@ -12,7 +12,7 @@ // Deprecated: any new system should use AES (from crypto/aes, if necessary in // an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from // golang.org/x/crypto/chacha20poly1305). -package xtea // import "golang.org/x/crypto/xtea" +package xtea // For details, see http://www.cix.co.uk/~klockstone/xtea.pdf diff --git a/vendor/golang.org/x/exp/LICENSE b/vendor/golang.org/x/exp/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/exp/LICENSE +++ b/vendor/golang.org/x/exp/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/image/AUTHORS b/vendor/golang.org/x/image/AUTHORS deleted file mode 100644 index 15167cd7..00000000 --- a/vendor/golang.org/x/image/AUTHORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code refers to The Go Authors for copyright purposes. -# The master list of authors is in the main Go distribution, -# visible at http://tip.golang.org/AUTHORS. diff --git a/vendor/golang.org/x/image/CONTRIBUTORS b/vendor/golang.org/x/image/CONTRIBUTORS deleted file mode 100644 index 1c4577e9..00000000 --- a/vendor/golang.org/x/image/CONTRIBUTORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code was written by the Go contributors. -# The master list of contributors is in the main Go distribution, -# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/vendor/golang.org/x/image/LICENSE b/vendor/golang.org/x/image/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/image/LICENSE +++ b/vendor/golang.org/x/image/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/image/draw/draw.go b/vendor/golang.org/x/image/draw/draw.go index cd5aaba6..42d5d7e0 100644 --- a/vendor/golang.org/x/image/draw/draw.go +++ b/vendor/golang.org/x/image/draw/draw.go @@ -47,6 +47,12 @@ func (floydSteinberg) Draw(dst Image, r image.Rectangle, src image.Image, sp ima // Image is an image.Image with a Set method to change a single pixel. type Image = draw.Image +// RGBA64Image extends both the Image and image.RGBA64Image interfaces with a +// SetRGBA64 method to change a single pixel. SetRGBA64 is equivalent to +// calling Set, but it can avoid allocations from converting concrete color +// types to the color.Color interface type. +type RGBA64Image = draw.RGBA64Image + // Op is a Porter-Duff compositing operator. type Op = draw.Op diff --git a/vendor/golang.org/x/image/draw/draw_go117.go b/vendor/golang.org/x/image/draw/draw_go117.go deleted file mode 100644 index fa836486..00000000 --- a/vendor/golang.org/x/image/draw/draw_go117.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.17 -// +build go1.17 - -package draw - -import ( - "image/draw" -) - -// The package documentation, in draw.go, gives the intent of this package: -// -// This package is a superset of and a drop-in replacement for the -// image/draw package in the standard library. -// -// "Drop-in replacement" means that we use type aliases in this file. -// -// TODO: move the type aliases to draw.go once Go 1.16 is no longer supported. - -// RGBA64Image extends both the Image and image.RGBA64Image interfaces with a -// SetRGBA64 method to change a single pixel. SetRGBA64 is equivalent to -// calling Set, but it can avoid allocations from converting concrete color -// types to the color.Color interface type. -type RGBA64Image = draw.RGBA64Image diff --git a/vendor/golang.org/x/image/draw/impl.go b/vendor/golang.org/x/image/draw/impl.go index 75498adb..fcd19943 100644 --- a/vendor/golang.org/x/image/draw/impl.go +++ b/vendor/golang.org/x/image/draw/impl.go @@ -59,9 +59,16 @@ func (z nnInterpolator) Scale(dst Image, dr image.Rectangle, src image.Image, sr z.scale_RGBA_NRGBA_Over(dst, dr, adr, src, sr, &o) case *image.RGBA: z.scale_RGBA_RGBA_Over(dst, dr, adr, src, sr, &o) + case image.RGBA64Image: + z.scale_RGBA_RGBA64Image_Over(dst, dr, adr, src, sr, &o) default: z.scale_RGBA_Image_Over(dst, dr, adr, src, sr, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + z.scale_RGBA64Image_RGBA64Image_Over(dst, dr, adr, src, sr, &o) + } default: switch src := src.(type) { default: @@ -91,9 +98,16 @@ func (z nnInterpolator) Scale(dst Image, dr image.Rectangle, src image.Image, sr case image.YCbCrSubsampleRatio440: z.scale_RGBA_YCbCr440_Src(dst, dr, adr, src, sr, &o) } + case image.RGBA64Image: + z.scale_RGBA_RGBA64Image_Src(dst, dr, adr, src, sr, &o) default: z.scale_RGBA_Image_Src(dst, dr, adr, src, sr, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + z.scale_RGBA64Image_RGBA64Image_Src(dst, dr, adr, src, sr, &o) + } default: switch src := src.(type) { default: @@ -170,9 +184,16 @@ func (z nnInterpolator) Transform(dst Image, s2d f64.Aff3, src image.Image, sr i z.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o) case *image.RGBA: z.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o) + case image.RGBA64Image: + z.transform_RGBA_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o) default: z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + z.transform_RGBA64Image_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o) + } default: switch src := src.(type) { default: @@ -202,9 +223,16 @@ func (z nnInterpolator) Transform(dst Image, s2d f64.Aff3, src image.Image, sr i case image.YCbCrSubsampleRatio440: z.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, &o) } + case image.RGBA64Image: + z.transform_RGBA_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o) default: z.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + z.transform_RGBA64Image_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o) + } default: switch src := src.(type) { default: @@ -502,6 +530,45 @@ func (nnInterpolator) scale_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rec } } +func (nnInterpolator) scale_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) { + dw2 := uint64(dr.Dx()) * 2 + dh2 := uint64(dr.Dy()) * 2 + sw := uint64(sr.Dx()) + sh := uint64(sr.Dy()) + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + sy := (2*uint64(dy) + 1) * sh / dh2 + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + sx := (2*uint64(dx) + 1) * sw / dw2 + p := src.RGBA64At(sr.Min.X+int(sx), sr.Min.Y+int(sy)) + pa1 := (0xffff - uint32(p.A)) * 0x101 + dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8) + dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + uint32(p.G)) >> 8) + dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + uint32(p.B)) >> 8) + dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + uint32(p.A)) >> 8) + } + } +} + +func (nnInterpolator) scale_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) { + dw2 := uint64(dr.Dx()) * 2 + dh2 := uint64(dr.Dy()) * 2 + sw := uint64(sr.Dx()) + sh := uint64(sr.Dy()) + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + sy := (2*uint64(dy) + 1) * sh / dh2 + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + sx := (2*uint64(dx) + 1) * sw / dw2 + p := src.RGBA64At(sr.Min.X+int(sx), sr.Min.Y+int(sy)) + dst.Pix[d+0] = uint8(p.R >> 8) + dst.Pix[d+1] = uint8(p.G >> 8) + dst.Pix[d+2] = uint8(p.B >> 8) + dst.Pix[d+3] = uint8(p.A >> 8) + } + } +} + func (nnInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) { dw2 := uint64(dr.Dx()) * 2 dh2 := uint64(dr.Dy()) * 2 @@ -541,6 +608,86 @@ func (nnInterpolator) scale_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectan } } +func (nnInterpolator) scale_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) { + dw2 := uint64(dr.Dx()) * 2 + dh2 := uint64(dr.Dy()) * 2 + sw := uint64(sr.Dx()) + sh := uint64(sr.Dy()) + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + sy := (2*uint64(dy) + 1) * sh / dh2 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + sx := (2*uint64(dx) + 1) * sw / dw2 + p := src.RGBA64At(sr.Min.X+int(sx), sr.Min.Y+int(sy)) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx), smp.Y+sr.Min.Y+int(sy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + } + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + if dstMask != nil { + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + } + pa1 := 0xffff - uint32(p.A) + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R)) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G)) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B)) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A)) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } + } +} + +func (nnInterpolator) scale_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) { + dw2 := uint64(dr.Dx()) * 2 + dh2 := uint64(dr.Dy()) * 2 + sw := uint64(sr.Dx()) + sh := uint64(sr.Dy()) + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + sy := (2*uint64(dy) + 1) * sh / dh2 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + sx := (2*uint64(dx) + 1) * sw / dw2 + p := src.RGBA64At(sr.Min.X+int(sx), sr.Min.Y+int(sy)) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx), smp.Y+sr.Min.Y+int(sy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + } + if dstMask != nil { + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + pa1 := 0xffff - ma + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R)) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G)) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B)) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A)) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } else { + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), p) + } + } + } +} + func (nnInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) { dw2 := uint64(dr.Dx()) * 2 dh2 := uint64(dr.Dy()) * 2 @@ -631,8 +778,8 @@ func (nnInterpolator) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rec d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -653,8 +800,8 @@ func (nnInterpolator) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.R d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -678,8 +825,8 @@ func (nnInterpolator) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -702,8 +849,8 @@ func (nnInterpolator) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -727,8 +874,8 @@ func (nnInterpolator) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rec d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -751,8 +898,8 @@ func (nnInterpolator) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -795,8 +942,8 @@ func (nnInterpolator) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -839,8 +986,8 @@ func (nnInterpolator) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -883,8 +1030,8 @@ func (nnInterpolator) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -921,14 +1068,55 @@ func (nnInterpolator) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image } } +func (nnInterpolator) transform_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) { + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y + if !(image.Point{sx0, sy0}).In(sr) { + continue + } + p := src.RGBA64At(sx0, sy0) + pa1 := (0xffff - uint32(p.A)) * 0x101 + dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8) + dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + uint32(p.G)) >> 8) + dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + uint32(p.B)) >> 8) + dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + uint32(p.A)) >> 8) + } + } +} + +func (nnInterpolator) transform_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) { + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y + if !(image.Point{sx0, sy0}).In(sr) { + continue + } + p := src.RGBA64At(sx0, sy0) + dst.Pix[d+0] = uint8(p.R >> 8) + dst.Pix[d+1] = uint8(p.G >> 8) + dst.Pix[d+2] = uint8(p.B >> 8) + dst.Pix[d+3] = uint8(p.A >> 8) + } + } +} + func (nnInterpolator) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) { for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { dyf := float64(dr.Min.Y+int(dy)) + 0.5 d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -948,8 +1136,8 @@ func (nnInterpolator) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -962,6 +1150,88 @@ func (nnInterpolator) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Re } } +func (nnInterpolator) transform_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) { + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y + if !(image.Point{sx0, sy0}).In(sr) { + continue + } + p := src.RGBA64At(sx0, sy0) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + } + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + if dstMask != nil { + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + } + pa1 := 0xffff - uint32(p.A) + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R)) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G)) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B)) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A)) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } + } +} + +func (nnInterpolator) transform_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) { + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y + if !(image.Point{sx0, sy0}).In(sr) { + continue + } + p := src.RGBA64At(sx0, sy0) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + } + if dstMask != nil { + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + pa1 := 0xffff - ma + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R)) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G)) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B)) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A)) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } else { + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), p) + } + } + } +} + func (nnInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) { srcMask, smp := opts.SrcMask, opts.SrcMaskP dstMask, dmp := opts.DstMask, opts.DstMaskP @@ -971,8 +1241,8 @@ func (nnInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectan dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -1011,8 +1281,8 @@ func (nnInterpolator) transform_Image_Image_Src(dst Image, dr, adr image.Rectang dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -1097,9 +1367,16 @@ func (z ablInterpolator) Scale(dst Image, dr image.Rectangle, src image.Image, s z.scale_RGBA_NRGBA_Over(dst, dr, adr, src, sr, &o) case *image.RGBA: z.scale_RGBA_RGBA_Over(dst, dr, adr, src, sr, &o) + case image.RGBA64Image: + z.scale_RGBA_RGBA64Image_Over(dst, dr, adr, src, sr, &o) default: z.scale_RGBA_Image_Over(dst, dr, adr, src, sr, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + z.scale_RGBA64Image_RGBA64Image_Over(dst, dr, adr, src, sr, &o) + } default: switch src := src.(type) { default: @@ -1129,9 +1406,16 @@ func (z ablInterpolator) Scale(dst Image, dr image.Rectangle, src image.Image, s case image.YCbCrSubsampleRatio440: z.scale_RGBA_YCbCr440_Src(dst, dr, adr, src, sr, &o) } + case image.RGBA64Image: + z.scale_RGBA_RGBA64Image_Src(dst, dr, adr, src, sr, &o) default: z.scale_RGBA_Image_Src(dst, dr, adr, src, sr, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + z.scale_RGBA64Image_RGBA64Image_Src(dst, dr, adr, src, sr, &o) + } default: switch src := src.(type) { default: @@ -1208,9 +1492,16 @@ func (z ablInterpolator) Transform(dst Image, s2d f64.Aff3, src image.Image, sr z.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o) case *image.RGBA: z.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o) + case image.RGBA64Image: + z.transform_RGBA_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o) default: z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + z.transform_RGBA64Image_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o) + } default: switch src := src.(type) { default: @@ -1240,9 +1531,16 @@ func (z ablInterpolator) Transform(dst Image, s2d f64.Aff3, src image.Image, sr case image.YCbCrSubsampleRatio440: z.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, &o) } + case image.RGBA64Image: + z.transform_RGBA_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o) default: z.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + z.transform_RGBA64Image_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o) + } default: switch src := src.(type) { default: @@ -1261,7 +1559,7 @@ func (ablInterpolator) scale_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectan swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -1279,7 +1577,7 @@ func (ablInterpolator) scale_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectan d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -1298,15 +1596,15 @@ func (ablInterpolator) scale_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectan s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx1) - src.Rect.Min.X) s10ru := uint32(src.Pix[s10i]) * 0x101 s10r := float64(s10ru) - s10r = xFrac1*s00r + xFrac0*s10r + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx0) - src.Rect.Min.X) s01ru := uint32(src.Pix[s01i]) * 0x101 s01r := float64(s01ru) s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx1) - src.Rect.Min.X) s11ru := uint32(src.Pix[s11i]) * 0x101 s11r := float64(s11ru) - s11r = xFrac1*s01r + xFrac0*s11r - s11r = yFrac1*s10r + yFrac0*s11r + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) pr := uint32(s11r) out := uint8(pr >> 8) dst.Pix[d+0] = out @@ -1325,7 +1623,7 @@ func (ablInterpolator) scale_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rect swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -1343,7 +1641,7 @@ func (ablInterpolator) scale_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rect d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -1374,10 +1672,10 @@ func (ablInterpolator) scale_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rect s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4 s01au := uint32(src.Pix[s01i+3]) * 0x101 s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff @@ -1396,14 +1694,14 @@ func (ablInterpolator) scale_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rect s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -1425,7 +1723,7 @@ func (ablInterpolator) scale_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Recta swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -1443,7 +1741,7 @@ func (ablInterpolator) scale_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Recta d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -1474,10 +1772,10 @@ func (ablInterpolator) scale_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Recta s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4 s01au := uint32(src.Pix[s01i+3]) * 0x101 s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff @@ -1496,14 +1794,14 @@ func (ablInterpolator) scale_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Recta s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -1524,7 +1822,7 @@ func (ablInterpolator) scale_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Recta swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -1542,7 +1840,7 @@ func (ablInterpolator) scale_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Recta d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -1573,10 +1871,10 @@ func (ablInterpolator) scale_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Recta s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4 s01ru := uint32(src.Pix[s01i+0]) * 0x101 s01gu := uint32(src.Pix[s01i+1]) * 0x101 @@ -1595,14 +1893,14 @@ func (ablInterpolator) scale_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Recta s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -1624,7 +1922,7 @@ func (ablInterpolator) scale_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectan swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -1642,7 +1940,7 @@ func (ablInterpolator) scale_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectan d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -1673,10 +1971,10 @@ func (ablInterpolator) scale_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectan s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4 s01ru := uint32(src.Pix[s01i+0]) * 0x101 s01gu := uint32(src.Pix[s01i+1]) * 0x101 @@ -1695,14 +1993,14 @@ func (ablInterpolator) scale_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectan s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -1723,7 +2021,7 @@ func (ablInterpolator) scale_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Re swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -1741,7 +2039,7 @@ func (ablInterpolator) scale_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -1812,9 +2110,9 @@ func (ablInterpolator) scale_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Re s10r := float64(s10ru) s10g := float64(s10gu) s10b := float64(s10bu) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X) s01j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X) @@ -1873,12 +2171,12 @@ func (ablInterpolator) scale_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Re s11r := float64(s11ru) s11g := float64(s11gu) s11b := float64(s11bu) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -1898,7 +2196,7 @@ func (ablInterpolator) scale_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Re swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -1916,7 +2214,7 @@ func (ablInterpolator) scale_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -1987,9 +2285,9 @@ func (ablInterpolator) scale_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Re s10r := float64(s10ru) s10g := float64(s10gu) s10b := float64(s10bu) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X) s01j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2) @@ -2048,12 +2346,12 @@ func (ablInterpolator) scale_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Re s11r := float64(s11ru) s11g := float64(s11gu) s11b := float64(s11bu) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -2073,7 +2371,7 @@ func (ablInterpolator) scale_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Re swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -2091,7 +2389,7 @@ func (ablInterpolator) scale_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -2162,9 +2460,9 @@ func (ablInterpolator) scale_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Re s10r := float64(s10ru) s10g := float64(s10gu) s10b := float64(s10bu) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X) s01j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2) @@ -2223,12 +2521,12 @@ func (ablInterpolator) scale_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Re s11r := float64(s11ru) s11g := float64(s11gu) s11b := float64(s11bu) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -2248,7 +2546,7 @@ func (ablInterpolator) scale_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Re swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -2266,7 +2564,7 @@ func (ablInterpolator) scale_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -2337,9 +2635,9 @@ func (ablInterpolator) scale_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Re s10r := float64(s10ru) s10g := float64(s10gu) s10b := float64(s10bu) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X) s01j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X) @@ -2398,12 +2696,12 @@ func (ablInterpolator) scale_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Re s11r := float64(s11ru) s11g := float64(s11gu) s11b := float64(s11bu) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -2415,6 +2713,167 @@ func (ablInterpolator) scale_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Re } } +func (ablInterpolator) scale_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) { + sw := int32(sr.Dx()) + sh := int32(sr.Dy()) + yscale := float64(sh) / float64(dr.Dy()) + xscale := float64(sw) / float64(dr.Dx()) + swMinus1, shMinus1 := sw-1, sh-1 + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + sy := float64((float64(dy)+0.5)*yscale) - 0.5 + // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if + // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for + // sx, below. + sy0 := int32(sy) + yFrac0 := sy - float64(sy0) + yFrac1 := 1 - yFrac0 + sy1 := sy0 + 1 + if sy < 0 { + sy0, sy1 = 0, 0 + yFrac0, yFrac1 = 0, 1 + } else if sy1 > shMinus1 { + sy0, sy1 = shMinus1, shMinus1 + yFrac0, yFrac1 = 1, 0 + } + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + sx := float64((float64(dx)+0.5)*xscale) - 0.5 + sx0 := int32(sx) + xFrac0 := sx - float64(sx0) + xFrac1 := 1 - xFrac0 + sx1 := sx0 + 1 + if sx < 0 { + sx0, sx1 = 0, 0 + xFrac0, xFrac1 = 0, 1 + } else if sx1 > swMinus1 { + sx0, sx1 = swMinus1, swMinus1 + xFrac0, xFrac1 = 1, 0 + } + + s00u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)) + s00r := float64(s00u.R) + s00g := float64(s00u.G) + s00b := float64(s00u.B) + s00a := float64(s00u.A) + s10u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)) + s10r := float64(s10u.R) + s10g := float64(s10u.G) + s10b := float64(s10u.B) + s10a := float64(s10u.A) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)) + s01r := float64(s01u.R) + s01g := float64(s01u.G) + s01b := float64(s01u.B) + s01a := float64(s01u.A) + s11u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1)) + s11r := float64(s11u.R) + s11g := float64(s11u.G) + s11b := float64(s11u.B) + s11a := float64(s11u.A) + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) + p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)} + pa1 := (0xffff - uint32(p.A)) * 0x101 + dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8) + dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + uint32(p.G)) >> 8) + dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + uint32(p.B)) >> 8) + dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + uint32(p.A)) >> 8) + } + } +} + +func (ablInterpolator) scale_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) { + sw := int32(sr.Dx()) + sh := int32(sr.Dy()) + yscale := float64(sh) / float64(dr.Dy()) + xscale := float64(sw) / float64(dr.Dx()) + swMinus1, shMinus1 := sw-1, sh-1 + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + sy := float64((float64(dy)+0.5)*yscale) - 0.5 + // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if + // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for + // sx, below. + sy0 := int32(sy) + yFrac0 := sy - float64(sy0) + yFrac1 := 1 - yFrac0 + sy1 := sy0 + 1 + if sy < 0 { + sy0, sy1 = 0, 0 + yFrac0, yFrac1 = 0, 1 + } else if sy1 > shMinus1 { + sy0, sy1 = shMinus1, shMinus1 + yFrac0, yFrac1 = 1, 0 + } + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + sx := float64((float64(dx)+0.5)*xscale) - 0.5 + sx0 := int32(sx) + xFrac0 := sx - float64(sx0) + xFrac1 := 1 - xFrac0 + sx1 := sx0 + 1 + if sx < 0 { + sx0, sx1 = 0, 0 + xFrac0, xFrac1 = 0, 1 + } else if sx1 > swMinus1 { + sx0, sx1 = swMinus1, swMinus1 + xFrac0, xFrac1 = 1, 0 + } + + s00u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)) + s00r := float64(s00u.R) + s00g := float64(s00u.G) + s00b := float64(s00u.B) + s00a := float64(s00u.A) + s10u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)) + s10r := float64(s10u.R) + s10g := float64(s10u.G) + s10b := float64(s10u.B) + s10a := float64(s10u.A) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)) + s01r := float64(s01u.R) + s01g := float64(s01u.G) + s01b := float64(s01u.B) + s01a := float64(s01u.A) + s11u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1)) + s11r := float64(s11u.R) + s11g := float64(s11u.G) + s11b := float64(s11u.B) + s11a := float64(s11u.A) + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) + p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)} + dst.Pix[d+0] = uint8(p.R >> 8) + dst.Pix[d+1] = uint8(p.G >> 8) + dst.Pix[d+2] = uint8(p.B >> 8) + dst.Pix[d+3] = uint8(p.A >> 8) + } + } +} + func (ablInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) { sw := int32(sr.Dx()) sh := int32(sr.Dy()) @@ -2423,7 +2882,7 @@ func (ablInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rect swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -2441,7 +2900,7 @@ func (ablInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rect d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -2464,10 +2923,10 @@ func (ablInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rect s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA() s01r := float64(s01ru) s01g := float64(s01gu) @@ -2478,14 +2937,14 @@ func (ablInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rect s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -2507,7 +2966,7 @@ func (ablInterpolator) scale_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Recta swMinus1, shMinus1 := sw-1, sh-1 for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -2525,7 +2984,7 @@ func (ablInterpolator) scale_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Recta d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -2548,10 +3007,10 @@ func (ablInterpolator) scale_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Recta s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA() s01r := float64(s01ru) s01g := float64(s01gu) @@ -2562,14 +3021,14 @@ func (ablInterpolator) scale_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Recta s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -2582,7 +3041,7 @@ func (ablInterpolator) scale_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Recta } } -func (ablInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) { +func (ablInterpolator) scale_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) { sw := int32(sr.Dx()) sh := int32(sr.Dy()) yscale := float64(sh) / float64(dr.Dy()) @@ -2590,11 +3049,10 @@ func (ablInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle swMinus1, shMinus1 := sw-1, sh-1 srcMask, smp := opts.SrcMask, opts.SrcMaskP dstMask, dmp := opts.DstMask, opts.DstMaskP - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) + dstColorRGBA64 := color.RGBA64{} for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -2611,7 +3069,7 @@ func (ablInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle } for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -2624,39 +3082,282 @@ func (ablInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle xFrac0, xFrac1 = 1, 0 } - s00ru, s00gu, s00bu, s00au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)).RGBA() + s00u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)) if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy0)).RGBA() - s00ru = s00ru * ma / 0xffff - s00gu = s00gu * ma / 0xffff - s00bu = s00bu * ma / 0xffff - s00au = s00au * ma / 0xffff - } - s00r := float64(s00ru) - s00g := float64(s00gu) - s00b := float64(s00bu) - s00a := float64(s00au) - s10ru, s10gu, s10bu, s10au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)).RGBA() + s00u.R = uint16(uint32(s00u.R) * ma / 0xffff) + s00u.G = uint16(uint32(s00u.G) * ma / 0xffff) + s00u.B = uint16(uint32(s00u.B) * ma / 0xffff) + s00u.A = uint16(uint32(s00u.A) * ma / 0xffff) + } + s00r := float64(s00u.R) + s00g := float64(s00u.G) + s00b := float64(s00u.B) + s00a := float64(s00u.A) + s10u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)) if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy0)).RGBA() - s10ru = s10ru * ma / 0xffff - s10gu = s10gu * ma / 0xffff - s10bu = s10bu * ma / 0xffff - s10au = s10au * ma / 0xffff - } - s10r := float64(s10ru) - s10g := float64(s10gu) - s10b := float64(s10bu) - s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a - s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA() + s10u.R = uint16(uint32(s10u.R) * ma / 0xffff) + s10u.G = uint16(uint32(s10u.G) * ma / 0xffff) + s10u.B = uint16(uint32(s10u.B) * ma / 0xffff) + s10u.A = uint16(uint32(s10u.A) * ma / 0xffff) + } + s10r := float64(s10u.R) + s10g := float64(s10u.G) + s10b := float64(s10u.B) + s10a := float64(s10u.A) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)) if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA() - s01ru = s01ru * ma / 0xffff - s01gu = s01gu * ma / 0xffff + s01u.R = uint16(uint32(s01u.R) * ma / 0xffff) + s01u.G = uint16(uint32(s01u.G) * ma / 0xffff) + s01u.B = uint16(uint32(s01u.B) * ma / 0xffff) + s01u.A = uint16(uint32(s01u.A) * ma / 0xffff) + } + s01r := float64(s01u.R) + s01g := float64(s01u.G) + s01b := float64(s01u.B) + s01a := float64(s01u.A) + s11u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1)) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy1)).RGBA() + s11u.R = uint16(uint32(s11u.R) * ma / 0xffff) + s11u.G = uint16(uint32(s11u.G) * ma / 0xffff) + s11u.B = uint16(uint32(s11u.B) * ma / 0xffff) + s11u.A = uint16(uint32(s11u.A) * ma / 0xffff) + } + s11r := float64(s11u.R) + s11g := float64(s11u.G) + s11b := float64(s11u.B) + s11a := float64(s11u.A) + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) + p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)} + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + if dstMask != nil { + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + } + pa1 := 0xffff - uint32(p.A) + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R)) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G)) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B)) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A)) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } + } +} + +func (ablInterpolator) scale_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) { + sw := int32(sr.Dx()) + sh := int32(sr.Dy()) + yscale := float64(sh) / float64(dr.Dy()) + xscale := float64(sw) / float64(dr.Dx()) + swMinus1, shMinus1 := sw-1, sh-1 + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + sy := float64((float64(dy)+0.5)*yscale) - 0.5 + // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if + // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for + // sx, below. + sy0 := int32(sy) + yFrac0 := sy - float64(sy0) + yFrac1 := 1 - yFrac0 + sy1 := sy0 + 1 + if sy < 0 { + sy0, sy1 = 0, 0 + yFrac0, yFrac1 = 0, 1 + } else if sy1 > shMinus1 { + sy0, sy1 = shMinus1, shMinus1 + yFrac0, yFrac1 = 1, 0 + } + + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + sx := float64((float64(dx)+0.5)*xscale) - 0.5 + sx0 := int32(sx) + xFrac0 := sx - float64(sx0) + xFrac1 := 1 - xFrac0 + sx1 := sx0 + 1 + if sx < 0 { + sx0, sx1 = 0, 0 + xFrac0, xFrac1 = 0, 1 + } else if sx1 > swMinus1 { + sx0, sx1 = swMinus1, swMinus1 + xFrac0, xFrac1 = 1, 0 + } + + s00u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy0)).RGBA() + s00u.R = uint16(uint32(s00u.R) * ma / 0xffff) + s00u.G = uint16(uint32(s00u.G) * ma / 0xffff) + s00u.B = uint16(uint32(s00u.B) * ma / 0xffff) + s00u.A = uint16(uint32(s00u.A) * ma / 0xffff) + } + s00r := float64(s00u.R) + s00g := float64(s00u.G) + s00b := float64(s00u.B) + s00a := float64(s00u.A) + s10u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy0)).RGBA() + s10u.R = uint16(uint32(s10u.R) * ma / 0xffff) + s10u.G = uint16(uint32(s10u.G) * ma / 0xffff) + s10u.B = uint16(uint32(s10u.B) * ma / 0xffff) + s10u.A = uint16(uint32(s10u.A) * ma / 0xffff) + } + s10r := float64(s10u.R) + s10g := float64(s10u.G) + s10b := float64(s10u.B) + s10a := float64(s10u.A) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA() + s01u.R = uint16(uint32(s01u.R) * ma / 0xffff) + s01u.G = uint16(uint32(s01u.G) * ma / 0xffff) + s01u.B = uint16(uint32(s01u.B) * ma / 0xffff) + s01u.A = uint16(uint32(s01u.A) * ma / 0xffff) + } + s01r := float64(s01u.R) + s01g := float64(s01u.G) + s01b := float64(s01u.B) + s01a := float64(s01u.A) + s11u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1)) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy1)).RGBA() + s11u.R = uint16(uint32(s11u.R) * ma / 0xffff) + s11u.G = uint16(uint32(s11u.G) * ma / 0xffff) + s11u.B = uint16(uint32(s11u.B) * ma / 0xffff) + s11u.A = uint16(uint32(s11u.A) * ma / 0xffff) + } + s11r := float64(s11u.R) + s11g := float64(s11u.G) + s11b := float64(s11u.B) + s11a := float64(s11u.A) + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) + p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)} + if dstMask != nil { + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + pa1 := 0xffff - ma + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R)) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G)) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B)) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A)) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } else { + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), p) + } + } + } +} + +func (ablInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) { + sw := int32(sr.Dx()) + sh := int32(sr.Dy()) + yscale := float64(sh) / float64(dr.Dy()) + xscale := float64(sw) / float64(dr.Dx()) + swMinus1, shMinus1 := sw-1, sh-1 + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := &color.RGBA64{} + dstColor := color.Color(dstColorRGBA64) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + sy := float64((float64(dy)+0.5)*yscale) - 0.5 + // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if + // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for + // sx, below. + sy0 := int32(sy) + yFrac0 := sy - float64(sy0) + yFrac1 := 1 - yFrac0 + sy1 := sy0 + 1 + if sy < 0 { + sy0, sy1 = 0, 0 + yFrac0, yFrac1 = 0, 1 + } else if sy1 > shMinus1 { + sy0, sy1 = shMinus1, shMinus1 + yFrac0, yFrac1 = 1, 0 + } + + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + sx := float64((float64(dx)+0.5)*xscale) - 0.5 + sx0 := int32(sx) + xFrac0 := sx - float64(sx0) + xFrac1 := 1 - xFrac0 + sx1 := sx0 + 1 + if sx < 0 { + sx0, sx1 = 0, 0 + xFrac0, xFrac1 = 0, 1 + } else if sx1 > swMinus1 { + sx0, sx1 = swMinus1, swMinus1 + xFrac0, xFrac1 = 1, 0 + } + + s00ru, s00gu, s00bu, s00au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)).RGBA() + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy0)).RGBA() + s00ru = s00ru * ma / 0xffff + s00gu = s00gu * ma / 0xffff + s00bu = s00bu * ma / 0xffff + s00au = s00au * ma / 0xffff + } + s00r := float64(s00ru) + s00g := float64(s00gu) + s00b := float64(s00bu) + s00a := float64(s00au) + s10ru, s10gu, s10bu, s10au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)).RGBA() + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy0)).RGBA() + s10ru = s10ru * ma / 0xffff + s10gu = s10gu * ma / 0xffff + s10bu = s10bu * ma / 0xffff + s10au = s10au * ma / 0xffff + } + s10r := float64(s10ru) + s10g := float64(s10gu) + s10b := float64(s10bu) + s10a := float64(s10au) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA() + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA() + s01ru = s01ru * ma / 0xffff + s01gu = s01gu * ma / 0xffff s01bu = s01bu * ma / 0xffff s01au = s01au * ma / 0xffff } @@ -2676,14 +3377,14 @@ func (ablInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -2718,7 +3419,7 @@ func (ablInterpolator) scale_Image_Image_Src(dst Image, dr, adr image.Rectangle, dstColor := color.Color(dstColorRGBA64) for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { - sy := (float64(dy)+0.5)*yscale - 0.5 + sy := float64((float64(dy)+0.5)*yscale) - 0.5 // If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if // we say int32(sy) instead of int32(math.Floor(sy)). Similarly for // sx, below. @@ -2735,7 +3436,7 @@ func (ablInterpolator) scale_Image_Image_Src(dst Image, dr, adr image.Rectangle, } for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { - sx := (float64(dx)+0.5)*xscale - 0.5 + sx := float64((float64(dx)+0.5)*xscale) - 0.5 sx0 := int32(sx) xFrac0 := sx - float64(sx0) xFrac1 := 1 - xFrac0 @@ -2772,10 +3473,10 @@ func (ablInterpolator) scale_Image_Image_Src(dst Image, dr, adr image.Rectangle, s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA() if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA() @@ -2800,14 +3501,14 @@ func (ablInterpolator) scale_Image_Image_Src(dst Image, dr, adr image.Rectangle, s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -2842,8 +3543,8 @@ func (ablInterpolator) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -2882,15 +3583,15 @@ func (ablInterpolator) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Re s10i := (sy0-src.Rect.Min.Y)*src.Stride + (sx1 - src.Rect.Min.X) s10ru := uint32(src.Pix[s10i]) * 0x101 s10r := float64(s10ru) - s10r = xFrac1*s00r + xFrac0*s10r + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0 - src.Rect.Min.X) s01ru := uint32(src.Pix[s01i]) * 0x101 s01r := float64(s01ru) s11i := (sy1-src.Rect.Min.Y)*src.Stride + (sx1 - src.Rect.Min.X) s11ru := uint32(src.Pix[s11i]) * 0x101 s11r := float64(s11ru) - s11r = xFrac1*s01r + xFrac0*s11r - s11r = yFrac1*s10r + yFrac0*s11r + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) pr := uint32(s11r) out := uint8(pr >> 8) dst.Pix[d+0] = out @@ -2907,8 +3608,8 @@ func (ablInterpolator) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image. d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -2959,10 +3660,10 @@ func (ablInterpolator) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image. s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4 s01au := uint32(src.Pix[s01i+3]) * 0x101 s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff @@ -2981,14 +3682,14 @@ func (ablInterpolator) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image. s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -3008,8 +3709,8 @@ func (ablInterpolator) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.R d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -3060,10 +3761,10 @@ func (ablInterpolator) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.R s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4 s01au := uint32(src.Pix[s01i+3]) * 0x101 s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff @@ -3082,14 +3783,14 @@ func (ablInterpolator) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.R s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -3108,8 +3809,8 @@ func (ablInterpolator) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.R d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -3160,10 +3861,10 @@ func (ablInterpolator) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.R s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4 s01ru := uint32(src.Pix[s01i+0]) * 0x101 s01gu := uint32(src.Pix[s01i+1]) * 0x101 @@ -3182,14 +3883,14 @@ func (ablInterpolator) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.R s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -3209,8 +3910,8 @@ func (ablInterpolator) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Re d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -3261,10 +3962,10 @@ func (ablInterpolator) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Re s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4 s01ru := uint32(src.Pix[s01i+0]) * 0x101 s01gu := uint32(src.Pix[s01i+1]) * 0x101 @@ -3283,14 +3984,14 @@ func (ablInterpolator) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Re s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -3309,8 +4010,8 @@ func (ablInterpolator) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr imag d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -3401,9 +4102,9 @@ func (ablInterpolator) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr imag s10r := float64(s10ru) s10g := float64(s10gu) s10b := float64(s10bu) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X) s01j := (sy1-src.Rect.Min.Y)*src.CStride + (sx0 - src.Rect.Min.X) @@ -3462,12 +4163,12 @@ func (ablInterpolator) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr imag s11r := float64(s11ru) s11g := float64(s11gu) s11b := float64(s11bu) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -3485,8 +4186,8 @@ func (ablInterpolator) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr imag d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -3577,9 +4278,9 @@ func (ablInterpolator) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr imag s10r := float64(s10ru) s10g := float64(s10gu) s10b := float64(s10bu) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X) s01j := (sy1-src.Rect.Min.Y)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2) @@ -3638,12 +4339,12 @@ func (ablInterpolator) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr imag s11r := float64(s11ru) s11g := float64(s11gu) s11b := float64(s11bu) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -3661,8 +4362,8 @@ func (ablInterpolator) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr imag d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -3753,9 +4454,9 @@ func (ablInterpolator) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr imag s10r := float64(s10ru) s10g := float64(s10gu) s10b := float64(s10bu) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X) s01j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2) @@ -3814,12 +4515,12 @@ func (ablInterpolator) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr imag s11r := float64(s11ru) s11g := float64(s11gu) s11b := float64(s11bu) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -3837,8 +4538,8 @@ func (ablInterpolator) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr imag d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -3929,9 +4630,9 @@ func (ablInterpolator) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr imag s10r := float64(s10ru) s10g := float64(s10gu) s10b := float64(s10bu) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X) s01j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + (sx0 - src.Rect.Min.X) @@ -3990,12 +4691,12 @@ func (ablInterpolator) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr imag s11r := float64(s11ru) s11g := float64(s11gu) s11b := float64(s11bu) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -4007,14 +4708,177 @@ func (ablInterpolator) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr imag } } +func (ablInterpolator) transform_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) { + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] + if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { + continue + } + + sx -= 0.5 + sx0 := int(sx) + xFrac0 := sx - float64(sx0) + xFrac1 := 1 - xFrac0 + sx0 += bias.X + sx1 := sx0 + 1 + if sx0 < sr.Min.X { + sx0, sx1 = sr.Min.X, sr.Min.X + xFrac0, xFrac1 = 0, 1 + } else if sx1 >= sr.Max.X { + sx0, sx1 = sr.Max.X-1, sr.Max.X-1 + xFrac0, xFrac1 = 1, 0 + } + + sy -= 0.5 + sy0 := int(sy) + yFrac0 := sy - float64(sy0) + yFrac1 := 1 - yFrac0 + sy0 += bias.Y + sy1 := sy0 + 1 + if sy0 < sr.Min.Y { + sy0, sy1 = sr.Min.Y, sr.Min.Y + yFrac0, yFrac1 = 0, 1 + } else if sy1 >= sr.Max.Y { + sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1 + yFrac0, yFrac1 = 1, 0 + } + + s00u := src.RGBA64At(sx0, sy0) + s00r := float64(s00u.R) + s00g := float64(s00u.G) + s00b := float64(s00u.B) + s00a := float64(s00u.A) + s10u := src.RGBA64At(sx1, sy0) + s10r := float64(s10u.R) + s10g := float64(s10u.G) + s10b := float64(s10u.B) + s10a := float64(s10u.A) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01u := src.RGBA64At(sx0, sy1) + s01r := float64(s01u.R) + s01g := float64(s01u.G) + s01b := float64(s01u.B) + s01a := float64(s01u.A) + s11u := src.RGBA64At(sx1, sy1) + s11r := float64(s11u.R) + s11g := float64(s11u.G) + s11b := float64(s11u.B) + s11a := float64(s11u.A) + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) + p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)} + pa1 := (0xffff - uint32(p.A)) * 0x101 + dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8) + dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + uint32(p.G)) >> 8) + dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + uint32(p.B)) >> 8) + dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + uint32(p.A)) >> 8) + } + } +} + +func (ablInterpolator) transform_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) { + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] + if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { + continue + } + + sx -= 0.5 + sx0 := int(sx) + xFrac0 := sx - float64(sx0) + xFrac1 := 1 - xFrac0 + sx0 += bias.X + sx1 := sx0 + 1 + if sx0 < sr.Min.X { + sx0, sx1 = sr.Min.X, sr.Min.X + xFrac0, xFrac1 = 0, 1 + } else if sx1 >= sr.Max.X { + sx0, sx1 = sr.Max.X-1, sr.Max.X-1 + xFrac0, xFrac1 = 1, 0 + } + + sy -= 0.5 + sy0 := int(sy) + yFrac0 := sy - float64(sy0) + yFrac1 := 1 - yFrac0 + sy0 += bias.Y + sy1 := sy0 + 1 + if sy0 < sr.Min.Y { + sy0, sy1 = sr.Min.Y, sr.Min.Y + yFrac0, yFrac1 = 0, 1 + } else if sy1 >= sr.Max.Y { + sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1 + yFrac0, yFrac1 = 1, 0 + } + + s00u := src.RGBA64At(sx0, sy0) + s00r := float64(s00u.R) + s00g := float64(s00u.G) + s00b := float64(s00u.B) + s00a := float64(s00u.A) + s10u := src.RGBA64At(sx1, sy0) + s10r := float64(s10u.R) + s10g := float64(s10u.G) + s10b := float64(s10u.B) + s10a := float64(s10u.A) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01u := src.RGBA64At(sx0, sy1) + s01r := float64(s01u.R) + s01g := float64(s01u.G) + s01b := float64(s01u.B) + s01a := float64(s01u.A) + s11u := src.RGBA64At(sx1, sy1) + s11r := float64(s11u.R) + s11g := float64(s11u.G) + s11b := float64(s11u.B) + s11a := float64(s11u.A) + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) + p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)} + dst.Pix[d+0] = uint8(p.R >> 8) + dst.Pix[d+1] = uint8(p.G >> 8) + dst.Pix[d+2] = uint8(p.B >> 8) + dst.Pix[d+3] = uint8(p.A >> 8) + } + } +} + func (ablInterpolator) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) { for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { dyf := float64(dr.Min.Y+int(dy)) + 0.5 d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -4057,10 +4921,10 @@ func (ablInterpolator) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image. s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA() s01r := float64(s01ru) s01g := float64(s01gu) @@ -4071,14 +4935,14 @@ func (ablInterpolator) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image. s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -4098,8 +4962,8 @@ func (ablInterpolator) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.R d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -4142,10 +5006,10 @@ func (ablInterpolator) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.R s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA() s01r := float64(s01ru) s01g := float64(s01gu) @@ -4156,14 +5020,14 @@ func (ablInterpolator) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.R s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -4176,17 +5040,17 @@ func (ablInterpolator) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.R } } -func (ablInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) { +func (ablInterpolator) transform_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) { srcMask, smp := opts.SrcMask, opts.SrcMaskP dstMask, dmp := opts.DstMask, opts.DstMaskP - dstColorRGBA64 := &color.RGBA64{} - dstColor := color.Color(dstColorRGBA64) + dstColorRGBA64 := color.RGBA64{} + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -4219,66 +5083,312 @@ func (ablInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Recta yFrac0, yFrac1 = 1, 0 } - s00ru, s00gu, s00bu, s00au := src.At(sx0, sy0).RGBA() + s00u := src.RGBA64At(sx0, sy0) if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA() - s00ru = s00ru * ma / 0xffff - s00gu = s00gu * ma / 0xffff - s00bu = s00bu * ma / 0xffff - s00au = s00au * ma / 0xffff - } - s00r := float64(s00ru) - s00g := float64(s00gu) - s00b := float64(s00bu) - s00a := float64(s00au) - s10ru, s10gu, s10bu, s10au := src.At(sx1, sy0).RGBA() + s00u.R = uint16(uint32(s00u.R) * ma / 0xffff) + s00u.G = uint16(uint32(s00u.G) * ma / 0xffff) + s00u.B = uint16(uint32(s00u.B) * ma / 0xffff) + s00u.A = uint16(uint32(s00u.A) * ma / 0xffff) + } + s00r := float64(s00u.R) + s00g := float64(s00u.G) + s00b := float64(s00u.B) + s00a := float64(s00u.A) + s10u := src.RGBA64At(sx1, sy0) if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy0).RGBA() - s10ru = s10ru * ma / 0xffff - s10gu = s10gu * ma / 0xffff - s10bu = s10bu * ma / 0xffff - s10au = s10au * ma / 0xffff - } - s10r := float64(s10ru) - s10g := float64(s10gu) - s10b := float64(s10bu) - s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a - s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA() + s10u.R = uint16(uint32(s10u.R) * ma / 0xffff) + s10u.G = uint16(uint32(s10u.G) * ma / 0xffff) + s10u.B = uint16(uint32(s10u.B) * ma / 0xffff) + s10u.A = uint16(uint32(s10u.A) * ma / 0xffff) + } + s10r := float64(s10u.R) + s10g := float64(s10u.G) + s10b := float64(s10u.B) + s10a := float64(s10u.A) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01u := src.RGBA64At(sx0, sy1) if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA() - s01ru = s01ru * ma / 0xffff - s01gu = s01gu * ma / 0xffff - s01bu = s01bu * ma / 0xffff - s01au = s01au * ma / 0xffff - } - s01r := float64(s01ru) - s01g := float64(s01gu) - s01b := float64(s01bu) - s01a := float64(s01au) - s11ru, s11gu, s11bu, s11au := src.At(sx1, sy1).RGBA() + s01u.R = uint16(uint32(s01u.R) * ma / 0xffff) + s01u.G = uint16(uint32(s01u.G) * ma / 0xffff) + s01u.B = uint16(uint32(s01u.B) * ma / 0xffff) + s01u.A = uint16(uint32(s01u.A) * ma / 0xffff) + } + s01r := float64(s01u.R) + s01g := float64(s01u.G) + s01b := float64(s01u.B) + s01a := float64(s01u.A) + s11u := src.RGBA64At(sx1, sy1) if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy1).RGBA() - s11ru = s11ru * ma / 0xffff - s11gu = s11gu * ma / 0xffff - s11bu = s11bu * ma / 0xffff - s11au = s11au * ma / 0xffff - } - s11r := float64(s11ru) - s11g := float64(s11gu) - s11b := float64(s11bu) + s11u.R = uint16(uint32(s11u.R) * ma / 0xffff) + s11u.G = uint16(uint32(s11u.G) * ma / 0xffff) + s11u.B = uint16(uint32(s11u.B) * ma / 0xffff) + s11u.A = uint16(uint32(s11u.A) * ma / 0xffff) + } + s11r := float64(s11u.R) + s11g := float64(s11u.G) + s11b := float64(s11u.B) + s11a := float64(s11u.A) + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) + p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)} + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + if dstMask != nil { + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + } + pa1 := 0xffff - uint32(p.A) + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R)) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G)) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B)) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A)) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } + } +} + +func (ablInterpolator) transform_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) { + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] + if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { + continue + } + + sx -= 0.5 + sx0 := int(sx) + xFrac0 := sx - float64(sx0) + xFrac1 := 1 - xFrac0 + sx0 += bias.X + sx1 := sx0 + 1 + if sx0 < sr.Min.X { + sx0, sx1 = sr.Min.X, sr.Min.X + xFrac0, xFrac1 = 0, 1 + } else if sx1 >= sr.Max.X { + sx0, sx1 = sr.Max.X-1, sr.Max.X-1 + xFrac0, xFrac1 = 1, 0 + } + + sy -= 0.5 + sy0 := int(sy) + yFrac0 := sy - float64(sy0) + yFrac1 := 1 - yFrac0 + sy0 += bias.Y + sy1 := sy0 + 1 + if sy0 < sr.Min.Y { + sy0, sy1 = sr.Min.Y, sr.Min.Y + yFrac0, yFrac1 = 0, 1 + } else if sy1 >= sr.Max.Y { + sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1 + yFrac0, yFrac1 = 1, 0 + } + + s00u := src.RGBA64At(sx0, sy0) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA() + s00u.R = uint16(uint32(s00u.R) * ma / 0xffff) + s00u.G = uint16(uint32(s00u.G) * ma / 0xffff) + s00u.B = uint16(uint32(s00u.B) * ma / 0xffff) + s00u.A = uint16(uint32(s00u.A) * ma / 0xffff) + } + s00r := float64(s00u.R) + s00g := float64(s00u.G) + s00b := float64(s00u.B) + s00a := float64(s00u.A) + s10u := src.RGBA64At(sx1, sy0) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy0).RGBA() + s10u.R = uint16(uint32(s10u.R) * ma / 0xffff) + s10u.G = uint16(uint32(s10u.G) * ma / 0xffff) + s10u.B = uint16(uint32(s10u.B) * ma / 0xffff) + s10u.A = uint16(uint32(s10u.A) * ma / 0xffff) + } + s10r := float64(s10u.R) + s10g := float64(s10u.G) + s10b := float64(s10u.B) + s10a := float64(s10u.A) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01u := src.RGBA64At(sx0, sy1) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA() + s01u.R = uint16(uint32(s01u.R) * ma / 0xffff) + s01u.G = uint16(uint32(s01u.G) * ma / 0xffff) + s01u.B = uint16(uint32(s01u.B) * ma / 0xffff) + s01u.A = uint16(uint32(s01u.A) * ma / 0xffff) + } + s01r := float64(s01u.R) + s01g := float64(s01u.G) + s01b := float64(s01u.B) + s01a := float64(s01u.A) + s11u := src.RGBA64At(sx1, sy1) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy1).RGBA() + s11u.R = uint16(uint32(s11u.R) * ma / 0xffff) + s11u.G = uint16(uint32(s11u.G) * ma / 0xffff) + s11u.B = uint16(uint32(s11u.B) * ma / 0xffff) + s11u.A = uint16(uint32(s11u.A) * ma / 0xffff) + } + s11r := float64(s11u.R) + s11g := float64(s11u.G) + s11b := float64(s11u.B) + s11a := float64(s11u.A) + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) + p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)} + if dstMask != nil { + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + p.R = uint16(uint32(p.R) * ma / 0xffff) + p.G = uint16(uint32(p.G) * ma / 0xffff) + p.B = uint16(uint32(p.B) * ma / 0xffff) + p.A = uint16(uint32(p.A) * ma / 0xffff) + pa1 := 0xffff - ma + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R)) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G)) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B)) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A)) + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } else { + dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), p) + } + } + } +} + +func (ablInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) { + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := &color.RGBA64{} + dstColor := color.Color(dstColorRGBA64) + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] + if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { + continue + } + + sx -= 0.5 + sx0 := int(sx) + xFrac0 := sx - float64(sx0) + xFrac1 := 1 - xFrac0 + sx0 += bias.X + sx1 := sx0 + 1 + if sx0 < sr.Min.X { + sx0, sx1 = sr.Min.X, sr.Min.X + xFrac0, xFrac1 = 0, 1 + } else if sx1 >= sr.Max.X { + sx0, sx1 = sr.Max.X-1, sr.Max.X-1 + xFrac0, xFrac1 = 1, 0 + } + + sy -= 0.5 + sy0 := int(sy) + yFrac0 := sy - float64(sy0) + yFrac1 := 1 - yFrac0 + sy0 += bias.Y + sy1 := sy0 + 1 + if sy0 < sr.Min.Y { + sy0, sy1 = sr.Min.Y, sr.Min.Y + yFrac0, yFrac1 = 0, 1 + } else if sy1 >= sr.Max.Y { + sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1 + yFrac0, yFrac1 = 1, 0 + } + + s00ru, s00gu, s00bu, s00au := src.At(sx0, sy0).RGBA() + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA() + s00ru = s00ru * ma / 0xffff + s00gu = s00gu * ma / 0xffff + s00bu = s00bu * ma / 0xffff + s00au = s00au * ma / 0xffff + } + s00r := float64(s00ru) + s00g := float64(s00gu) + s00b := float64(s00bu) + s00a := float64(s00au) + s10ru, s10gu, s10bu, s10au := src.At(sx1, sy0).RGBA() + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy0).RGBA() + s10ru = s10ru * ma / 0xffff + s10gu = s10gu * ma / 0xffff + s10bu = s10bu * ma / 0xffff + s10au = s10au * ma / 0xffff + } + s10r := float64(s10ru) + s10g := float64(s10gu) + s10b := float64(s10bu) + s10a := float64(s10au) + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) + s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA() + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA() + s01ru = s01ru * ma / 0xffff + s01gu = s01gu * ma / 0xffff + s01bu = s01bu * ma / 0xffff + s01au = s01au * ma / 0xffff + } + s01r := float64(s01ru) + s01g := float64(s01gu) + s01b := float64(s01bu) + s01a := float64(s01au) + s11ru, s11gu, s11bu, s11au := src.At(sx1, sy1).RGBA() + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy1).RGBA() + s11ru = s11ru * ma / 0xffff + s11gu = s11gu * ma / 0xffff + s11bu = s11bu * ma / 0xffff + s11au = s11au * ma / 0xffff + } + s11r := float64(s11ru) + s11g := float64(s11gu) + s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -4310,8 +5420,8 @@ func (ablInterpolator) transform_Image_Image_Src(dst Image, dr, adr image.Rectan dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -4368,10 +5478,10 @@ func (ablInterpolator) transform_Image_Image_Src(dst Image, dr, adr image.Rectan s10g := float64(s10gu) s10b := float64(s10bu) s10a := float64(s10au) - s10r = xFrac1*s00r + xFrac0*s10r - s10g = xFrac1*s00g + xFrac0*s10g - s10b = xFrac1*s00b + xFrac0*s10b - s10a = xFrac1*s00a + xFrac0*s10a + s10r = float64(xFrac1*s00r) + float64(xFrac0*s10r) + s10g = float64(xFrac1*s00g) + float64(xFrac0*s10g) + s10b = float64(xFrac1*s00b) + float64(xFrac0*s10b) + s10a = float64(xFrac1*s00a) + float64(xFrac0*s10a) s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA() if srcMask != nil { _, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA() @@ -4396,14 +5506,14 @@ func (ablInterpolator) transform_Image_Image_Src(dst Image, dr, adr image.Rectan s11g := float64(s11gu) s11b := float64(s11bu) s11a := float64(s11au) - s11r = xFrac1*s01r + xFrac0*s11r - s11g = xFrac1*s01g + xFrac0*s11g - s11b = xFrac1*s01b + xFrac0*s11b - s11a = xFrac1*s01a + xFrac0*s11a - s11r = yFrac1*s10r + yFrac0*s11r - s11g = yFrac1*s10g + yFrac0*s11g - s11b = yFrac1*s10b + yFrac0*s11b - s11a = yFrac1*s10a + yFrac0*s11a + s11r = float64(xFrac1*s01r) + float64(xFrac0*s11r) + s11g = float64(xFrac1*s01g) + float64(xFrac0*s11g) + s11b = float64(xFrac1*s01b) + float64(xFrac0*s11b) + s11a = float64(xFrac1*s01a) + float64(xFrac0*s11a) + s11r = float64(yFrac1*s10r) + float64(yFrac0*s11r) + s11g = float64(yFrac1*s10g) + float64(yFrac0*s11g) + s11b = float64(yFrac1*s10b) + float64(yFrac0*s11b) + s11a = float64(yFrac1*s10a) + float64(yFrac0*s11a) pr := uint32(s11r) pg := uint32(s11g) pb := uint32(s11b) @@ -4500,6 +5610,8 @@ func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr case image.YCbCrSubsampleRatio440: z.scaleX_YCbCr440(tmp, src, sr, &o) } + case image.RGBA64Image: + z.scaleX_RGBA64Image(tmp, src, sr, &o) default: z.scaleX_Image(tmp, src, sr, &o) } @@ -4518,6 +5630,8 @@ func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr switch dst := dst.(type) { case *image.RGBA: z.scaleY_RGBA_Over(dst, dr, adr, tmp, &o) + case RGBA64Image: + z.scaleY_RGBA64Image_Over(dst, dr, adr, tmp, &o) default: z.scaleY_Image_Over(dst, dr, adr, tmp, &o) } @@ -4525,6 +5639,8 @@ func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr switch dst := dst.(type) { case *image.RGBA: z.scaleY_RGBA_Src(dst, dr, adr, tmp, &o) + case RGBA64Image: + z.scaleY_RGBA64Image_Src(dst, dr, adr, tmp, &o) default: z.scaleY_Image_Src(dst, dr, adr, tmp, &o) } @@ -4600,9 +5716,16 @@ func (q *Kernel) Transform(dst Image, s2d f64.Aff3, src image.Image, sr image.Re q.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) case *image.RGBA: q.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) + case image.RGBA64Image: + q.transform_RGBA_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) default: q.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + q.transform_RGBA64Image_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) + } default: switch src := src.(type) { default: @@ -4632,9 +5755,16 @@ func (q *Kernel) Transform(dst Image, s2d f64.Aff3, src image.Image, sr image.Re case image.YCbCrSubsampleRatio440: q.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) } + case image.RGBA64Image: + q.transform_RGBA_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) default: q.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) } + case RGBA64Image: + switch src := src.(type) { + case image.RGBA64Image: + q.transform_RGBA64Image_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o) + } default: switch src := src.(type) { default: @@ -4653,7 +5783,7 @@ func (z *kernelScaler) scaleX_Gray(tmp [][4]float64, src *image.Gray, sr image.R for _, c := range z.horizontal.contribs[s.i:s.j] { pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(c.coord) - src.Rect.Min.X) pru := uint32(src.Pix[pi]) * 0x101 - pr += float64(pru) * c.weight + pr += float64(float64(pru) * c.weight) } pr *= s.invTotalWeightFFFF tmp[t] = [4]float64{ @@ -4678,10 +5808,10 @@ func (z *kernelScaler) scaleX_NRGBA(tmp [][4]float64, src *image.NRGBA, sr image pru := uint32(src.Pix[pi+0]) * pau / 0xff pgu := uint32(src.Pix[pi+1]) * pau / 0xff pbu := uint32(src.Pix[pi+2]) * pau / 0xff - pr += float64(pru) * c.weight - pg += float64(pgu) * c.weight - pb += float64(pbu) * c.weight - pa += float64(pau) * c.weight + pr += float64(float64(pru) * c.weight) + pg += float64(float64(pgu) * c.weight) + pb += float64(float64(pbu) * c.weight) + pa += float64(float64(pau) * c.weight) } tmp[t] = [4]float64{ pr * s.invTotalWeightFFFF, @@ -4705,10 +5835,10 @@ func (z *kernelScaler) scaleX_RGBA(tmp [][4]float64, src *image.RGBA, sr image.R pgu := uint32(src.Pix[pi+1]) * 0x101 pbu := uint32(src.Pix[pi+2]) * 0x101 pau := uint32(src.Pix[pi+3]) * 0x101 - pr += float64(pru) * c.weight - pg += float64(pgu) * c.weight - pb += float64(pbu) * c.weight - pa += float64(pau) * c.weight + pr += float64(float64(pru) * c.weight) + pg += float64(float64(pgu) * c.weight) + pb += float64(float64(pbu) * c.weight) + pa += float64(float64(pau) * c.weight) } tmp[t] = [4]float64{ pr * s.invTotalWeightFFFF, @@ -4753,9 +5883,9 @@ func (z *kernelScaler) scaleX_YCbCr444(tmp [][4]float64, src *image.YCbCr, sr im pbu = 0xffff } - pr += float64(pru) * c.weight - pg += float64(pgu) * c.weight - pb += float64(pbu) * c.weight + pr += float64(float64(pru) * c.weight) + pg += float64(float64(pgu) * c.weight) + pb += float64(float64(pbu) * c.weight) } tmp[t] = [4]float64{ pr * s.invTotalWeightFFFF, @@ -4800,9 +5930,9 @@ func (z *kernelScaler) scaleX_YCbCr422(tmp [][4]float64, src *image.YCbCr, sr im pbu = 0xffff } - pr += float64(pru) * c.weight - pg += float64(pgu) * c.weight - pb += float64(pbu) * c.weight + pr += float64(float64(pru) * c.weight) + pg += float64(float64(pgu) * c.weight) + pb += float64(float64(pbu) * c.weight) } tmp[t] = [4]float64{ pr * s.invTotalWeightFFFF, @@ -4847,9 +5977,9 @@ func (z *kernelScaler) scaleX_YCbCr420(tmp [][4]float64, src *image.YCbCr, sr im pbu = 0xffff } - pr += float64(pru) * c.weight - pg += float64(pgu) * c.weight - pb += float64(pbu) * c.weight + pr += float64(float64(pru) * c.weight) + pg += float64(float64(pgu) * c.weight) + pb += float64(float64(pbu) * c.weight) } tmp[t] = [4]float64{ pr * s.invTotalWeightFFFF, @@ -4894,9 +6024,9 @@ func (z *kernelScaler) scaleX_YCbCr440(tmp [][4]float64, src *image.YCbCr, sr im pbu = 0xffff } - pr += float64(pru) * c.weight - pg += float64(pgu) * c.weight - pb += float64(pbu) * c.weight + pr += float64(float64(pru) * c.weight) + pg += float64(float64(pgu) * c.weight) + pb += float64(float64(pbu) * c.weight) } tmp[t] = [4]float64{ pr * s.invTotalWeightFFFF, @@ -4909,6 +6039,37 @@ func (z *kernelScaler) scaleX_YCbCr440(tmp [][4]float64, src *image.YCbCr, sr im } } +func (z *kernelScaler) scaleX_RGBA64Image(tmp [][4]float64, src image.RGBA64Image, sr image.Rectangle, opts *Options) { + t := 0 + srcMask, smp := opts.SrcMask, opts.SrcMaskP + for y := int32(0); y < z.sh; y++ { + for _, s := range z.horizontal.sources { + var pr, pg, pb, pa float64 + for _, c := range z.horizontal.contribs[s.i:s.j] { + pu := src.RGBA64At(sr.Min.X+int(c.coord), sr.Min.Y+int(y)) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(c.coord), smp.Y+sr.Min.Y+int(y)).RGBA() + pu.R = uint16(uint32(pu.R) * ma / 0xffff) + pu.G = uint16(uint32(pu.G) * ma / 0xffff) + pu.B = uint16(uint32(pu.B) * ma / 0xffff) + pu.A = uint16(uint32(pu.A) * ma / 0xffff) + } + pr += float64(float64(pu.R) * c.weight) + pg += float64(float64(pu.G) * c.weight) + pb += float64(float64(pu.B) * c.weight) + pa += float64(float64(pu.A) * c.weight) + } + tmp[t] = [4]float64{ + pr * s.invTotalWeightFFFF, + pg * s.invTotalWeightFFFF, + pb * s.invTotalWeightFFFF, + pa * s.invTotalWeightFFFF, + } + t++ + } + } +} + func (z *kernelScaler) scaleX_Image(tmp [][4]float64, src image.Image, sr image.Rectangle, opts *Options) { t := 0 srcMask, smp := opts.SrcMask, opts.SrcMaskP @@ -4924,10 +6085,10 @@ func (z *kernelScaler) scaleX_Image(tmp [][4]float64, src image.Image, sr image. pbu = pbu * ma / 0xffff pau = pau * ma / 0xffff } - pr += float64(pru) * c.weight - pg += float64(pgu) * c.weight - pb += float64(pbu) * c.weight - pa += float64(pau) * c.weight + pr += float64(float64(pru) * c.weight) + pg += float64(float64(pgu) * c.weight) + pb += float64(float64(pbu) * c.weight) + pa += float64(float64(pau) * c.weight) } tmp[t] = [4]float64{ pr * s.invTotalWeightFFFF, @@ -4947,10 +6108,10 @@ func (z *kernelScaler) scaleY_RGBA_Over(dst *image.RGBA, dr, adr image.Rectangle var pr, pg, pb, pa float64 for _, c := range z.vertical.contribs[s.i:s.j] { p := &tmp[c.coord*z.dw+dx] - pr += p[0] * c.weight - pg += p[1] * c.weight - pb += p[2] * c.weight - pa += p[3] * c.weight + pr += float64(p[0] * c.weight) + pg += float64(p[1] * c.weight) + pb += float64(p[2] * c.weight) + pa += float64(p[3] * c.weight) } if pr > pa { @@ -4984,10 +6145,10 @@ func (z *kernelScaler) scaleY_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, var pr, pg, pb, pa float64 for _, c := range z.vertical.contribs[s.i:s.j] { p := &tmp[c.coord*z.dw+dx] - pr += p[0] * c.weight - pg += p[1] * c.weight - pb += p[2] * c.weight - pa += p[3] * c.weight + pr += float64(p[0] * c.weight) + pg += float64(p[1] * c.weight) + pb += float64(p[2] * c.weight) + pa += float64(p[3] * c.weight) } if pr > pa { @@ -5009,6 +6170,102 @@ func (z *kernelScaler) scaleY_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, } } +func (z *kernelScaler) scaleY_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) { + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] { + var pr, pg, pb, pa float64 + for _, c := range z.vertical.contribs[s.i:s.j] { + p := &tmp[c.coord*z.dw+dx] + pr += float64(p[0] * c.weight) + pg += float64(p[1] * c.weight) + pb += float64(p[2] * c.weight) + pa += float64(p[3] * c.weight) + } + + if pr > pa { + pr = pa + } + if pg > pa { + pg = pa + } + if pb > pa { + pb = pa + } + + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy)) + pr0 := uint32(ftou(pr * s.invTotalWeight)) + pg0 := uint32(ftou(pg * s.invTotalWeight)) + pb0 := uint32(ftou(pb * s.invTotalWeight)) + pa0 := uint32(ftou(pa * s.invTotalWeight)) + if dstMask != nil { + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(adr.Min.Y+dy)).RGBA() + pr0 = pr0 * ma / 0xffff + pg0 = pg0 * ma / 0xffff + pb0 = pb0 * ma / 0xffff + pa0 = pa0 * ma / 0xffff + } + pa1 := 0xffff - pa0 + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr0) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg0) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb0) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa0) + dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColorRGBA64) + } + } +} + +func (z *kernelScaler) scaleY_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) { + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] { + var pr, pg, pb, pa float64 + for _, c := range z.vertical.contribs[s.i:s.j] { + p := &tmp[c.coord*z.dw+dx] + pr += float64(p[0] * c.weight) + pg += float64(p[1] * c.weight) + pb += float64(p[2] * c.weight) + pa += float64(p[3] * c.weight) + } + + if pr > pa { + pr = pa + } + if pg > pa { + pg = pa + } + if pb > pa { + pb = pa + } + + if dstMask != nil { + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy)) + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(adr.Min.Y+dy)).RGBA() + pr := uint32(ftou(pr*s.invTotalWeight)) * ma / 0xffff + pg := uint32(ftou(pg*s.invTotalWeight)) * ma / 0xffff + pb := uint32(ftou(pb*s.invTotalWeight)) * ma / 0xffff + pa := uint32(ftou(pa*s.invTotalWeight)) * ma / 0xffff + pa1 := 0xffff - ma + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa) + dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColorRGBA64) + } else { + dstColorRGBA64.R = ftou(pr * s.invTotalWeight) + dstColorRGBA64.G = ftou(pg * s.invTotalWeight) + dstColorRGBA64.B = ftou(pb * s.invTotalWeight) + dstColorRGBA64.A = ftou(pa * s.invTotalWeight) + dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColorRGBA64) + } + } + } +} + func (z *kernelScaler) scaleY_Image_Over(dst Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) { dstMask, dmp := opts.DstMask, opts.DstMaskP dstColorRGBA64 := &color.RGBA64{} @@ -5018,10 +6275,10 @@ func (z *kernelScaler) scaleY_Image_Over(dst Image, dr, adr image.Rectangle, tmp var pr, pg, pb, pa float64 for _, c := range z.vertical.contribs[s.i:s.j] { p := &tmp[c.coord*z.dw+dx] - pr += p[0] * c.weight - pg += p[1] * c.weight - pb += p[2] * c.weight - pa += p[3] * c.weight + pr += float64(p[0] * c.weight) + pg += float64(p[1] * c.weight) + pb += float64(p[2] * c.weight) + pa += float64(p[3] * c.weight) } if pr > pa { @@ -5065,10 +6322,10 @@ func (z *kernelScaler) scaleY_Image_Src(dst Image, dr, adr image.Rectangle, tmp var pr, pg, pb, pa float64 for _, c := range z.vertical.contribs[s.i:s.j] { p := &tmp[c.coord*z.dw+dx] - pr += p[0] * c.weight - pg += p[1] * c.weight - pb += p[2] * c.weight - pa += p[3] * c.weight + pr += float64(p[0] * c.weight) + pg += float64(p[1] * c.weight) + pb += float64(p[2] * c.weight) + pa += float64(p[3] * c.weight) } if pr > pa { @@ -5127,8 +6384,8 @@ func (q *Kernel) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangl d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -5190,7 +6447,7 @@ func (q *Kernel) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangl if w := xWeights[kx-ix] * yWeight; w != 0 { pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X) pru := uint32(src.Pix[pi]) * 0x101 - pr += float64(pru) * w + pr += float64(float64(pru) * w) } } } @@ -5226,8 +6483,8 @@ func (q *Kernel) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rectan d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -5292,10 +6549,10 @@ func (q *Kernel) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rectan pru := uint32(src.Pix[pi+0]) * pau / 0xff pgu := uint32(src.Pix[pi+1]) * pau / 0xff pbu := uint32(src.Pix[pi+2]) * pau / 0xff - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) + pa += float64(float64(pau) * w) } } } @@ -5346,8 +6603,8 @@ func (q *Kernel) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Rectang d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -5412,10 +6669,10 @@ func (q *Kernel) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Rectang pru := uint32(src.Pix[pi+0]) * pau / 0xff pgu := uint32(src.Pix[pi+1]) * pau / 0xff pbu := uint32(src.Pix[pi+2]) * pau / 0xff - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) + pa += float64(float64(pau) * w) } } } @@ -5461,8 +6718,8 @@ func (q *Kernel) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Rectang d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -5527,10 +6784,10 @@ func (q *Kernel) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Rectang pgu := uint32(src.Pix[pi+1]) * 0x101 pbu := uint32(src.Pix[pi+2]) * 0x101 pau := uint32(src.Pix[pi+3]) * 0x101 - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) + pa += float64(float64(pau) * w) } } } @@ -5581,8 +6838,8 @@ func (q *Kernel) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangl d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -5647,10 +6904,10 @@ func (q *Kernel) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangl pgu := uint32(src.Pix[pi+1]) * 0x101 pbu := uint32(src.Pix[pi+2]) * 0x101 pau := uint32(src.Pix[pi+3]) * 0x101 - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) + pa += float64(float64(pau) * w) } } } @@ -5696,8 +6953,8 @@ func (q *Kernel) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rect d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -5783,9 +7040,9 @@ func (q *Kernel) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rect pbu = 0xffff } - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) } } } @@ -5820,8 +7077,8 @@ func (q *Kernel) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Rect d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -5907,9 +7164,9 @@ func (q *Kernel) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Rect pbu = 0xffff } - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) } } } @@ -5944,8 +7201,8 @@ func (q *Kernel) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Rect d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -6031,9 +7288,9 @@ func (q *Kernel) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Rect pbu = 0xffff } - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) } } } @@ -6068,8 +7325,8 @@ func (q *Kernel) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rect d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -6155,9 +7412,9 @@ func (q *Kernel) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rect pbu = 0xffff } - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) } } } @@ -6170,7 +7427,7 @@ func (q *Kernel) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rect } } -func (q *Kernel) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { +func (q *Kernel) transform_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { // When shrinking, broaden the effective kernel support so that we still // visit every source pixel. xHalfWidth, xKernelArgScale := q.Support, 1.0 @@ -6192,8 +7449,8 @@ func (q *Kernel) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectan d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -6253,11 +7510,11 @@ func (q *Kernel) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectan if yWeight := yWeights[ky-iy]; yWeight != 0 { for kx := ix; kx < jx; kx++ { if w := xWeights[kx-ix] * yWeight; w != 0 { - pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + pu := src.RGBA64At(kx, ky) + pr += float64(float64(pu.R) * w) + pg += float64(float64(pu.G) * w) + pb += float64(float64(pu.B) * w) + pa += float64(float64(pu.A) * w) } } } @@ -6286,7 +7543,7 @@ func (q *Kernel) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectan } } -func (q *Kernel) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { +func (q *Kernel) transform_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { // When shrinking, broaden the effective kernel support so that we still // visit every source pixel. xHalfWidth, xKernelArgScale := q.Support, 1.0 @@ -6308,8 +7565,8 @@ func (q *Kernel) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectang d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -6369,11 +7626,11 @@ func (q *Kernel) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectang if yWeight := yWeights[ky-iy]; yWeight != 0 { for kx := ix; kx < jx; kx++ { if w := xWeights[kx-ix] * yWeight; w != 0 { - pru, pgu, pbu, pau := src.At(kx, ky).RGBA() - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + pu := src.RGBA64At(kx, ky) + pr += float64(float64(pu.R) * w) + pg += float64(float64(pu.G) * w) + pb += float64(float64(pu.B) * w) + pa += float64(float64(pu.A) * w) } } } @@ -6397,6 +7654,505 @@ func (q *Kernel) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectang } } +func (q *Kernel) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] + if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { + continue + } + + // TODO: adjust the bias so that we can use int(f) instead + // of math.Floor(f) and math.Ceil(f). + sx += float64(bias.X) + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy += float64(bias.Y) + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) + pa += float64(float64(pau) * w) + } + } + } + } + + if pr > pa { + pr = pa + } + if pg > pa { + pg = pa + } + if pb > pa { + pb = pa + } + + pr0 := uint32(fffftou(pr)) + pg0 := uint32(fffftou(pg)) + pb0 := uint32(fffftou(pb)) + pa0 := uint32(fffftou(pa)) + pa1 := (0xffff - uint32(pa0)) * 0x101 + dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr0) >> 8) + dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg0) >> 8) + dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb0) >> 8) + dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa0) >> 8) + } + } +} + +func (q *Kernel) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] + if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { + continue + } + + // TODO: adjust the bias so that we can use int(f) instead + // of math.Floor(f) and math.Ceil(f). + sx += float64(bias.X) + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy += float64(bias.Y) + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pru, pgu, pbu, pau := src.At(kx, ky).RGBA() + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) + pa += float64(float64(pau) * w) + } + } + } + } + + if pr > pa { + pr = pa + } + if pg > pa { + pg = pa + } + if pb > pa { + pb = pa + } + + dst.Pix[d+0] = uint8(fffftou(pr) >> 8) + dst.Pix[d+1] = uint8(fffftou(pg) >> 8) + dst.Pix[d+2] = uint8(fffftou(pb) >> 8) + dst.Pix[d+3] = uint8(fffftou(pa) >> 8) + } + } +} + +func (q *Kernel) transform_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] + if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { + continue + } + + // TODO: adjust the bias so that we can use int(f) instead + // of math.Floor(f) and math.Ceil(f). + sx += float64(bias.X) + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy += float64(bias.Y) + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pu := src.RGBA64At(kx, ky) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+kx, smp.Y+ky).RGBA() + pu.R = uint16(uint32(pu.R) * ma / 0xffff) + pu.G = uint16(uint32(pu.G) * ma / 0xffff) + pu.B = uint16(uint32(pu.B) * ma / 0xffff) + pu.A = uint16(uint32(pu.A) * ma / 0xffff) + } + pr += float64(float64(pu.R) * w) + pg += float64(float64(pu.G) * w) + pb += float64(float64(pu.B) * w) + pa += float64(float64(pu.A) * w) + } + } + } + } + + if pr > pa { + pr = pa + } + if pg > pa { + pg = pa + } + if pb > pa { + pb = pa + } + + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + pr0 := uint32(fffftou(pr)) + pg0 := uint32(fffftou(pg)) + pb0 := uint32(fffftou(pb)) + pa0 := uint32(fffftou(pa)) + if dstMask != nil { + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + pr0 = pr0 * ma / 0xffff + pg0 = pg0 * ma / 0xffff + pb0 = pb0 * ma / 0xffff + pa0 = pa0 * ma / 0xffff + } + pa1 := 0xffff - pa0 + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr0) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg0) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb0) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa0) + dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } + } +} + +func (q *Kernel) transform_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { + // When shrinking, broaden the effective kernel support so that we still + // visit every source pixel. + xHalfWidth, xKernelArgScale := q.Support, 1.0 + if xscale > 1 { + xHalfWidth *= xscale + xKernelArgScale = 1 / xscale + } + yHalfWidth, yKernelArgScale := q.Support, 1.0 + if yscale > 1 { + yHalfWidth *= yscale + yKernelArgScale = 1 / yscale + } + + xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth))) + yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth))) + + srcMask, smp := opts.SrcMask, opts.SrcMaskP + dstMask, dmp := opts.DstMask, opts.DstMaskP + dstColorRGBA64 := color.RGBA64{} + + for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ { + dyf := float64(dr.Min.Y+int(dy)) + 0.5 + for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { + dxf := float64(dr.Min.X+int(dx)) + 0.5 + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] + if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { + continue + } + + // TODO: adjust the bias so that we can use int(f) instead + // of math.Floor(f) and math.Ceil(f). + sx += float64(bias.X) + sx -= 0.5 + ix := int(math.Floor(sx - xHalfWidth)) + if ix < sr.Min.X { + ix = sr.Min.X + } + jx := int(math.Ceil(sx + xHalfWidth)) + if jx > sr.Max.X { + jx = sr.Max.X + } + + totalXWeight := 0.0 + for kx := ix; kx < jx; kx++ { + xWeight := 0.0 + if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support { + xWeight = q.At(t) + } + xWeights[kx-ix] = xWeight + totalXWeight += xWeight + } + for x := range xWeights[:jx-ix] { + xWeights[x] /= totalXWeight + } + + sy += float64(bias.Y) + sy -= 0.5 + iy := int(math.Floor(sy - yHalfWidth)) + if iy < sr.Min.Y { + iy = sr.Min.Y + } + jy := int(math.Ceil(sy + yHalfWidth)) + if jy > sr.Max.Y { + jy = sr.Max.Y + } + + totalYWeight := 0.0 + for ky := iy; ky < jy; ky++ { + yWeight := 0.0 + if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support { + yWeight = q.At(t) + } + yWeights[ky-iy] = yWeight + totalYWeight += yWeight + } + for y := range yWeights[:jy-iy] { + yWeights[y] /= totalYWeight + } + + var pr, pg, pb, pa float64 + for ky := iy; ky < jy; ky++ { + if yWeight := yWeights[ky-iy]; yWeight != 0 { + for kx := ix; kx < jx; kx++ { + if w := xWeights[kx-ix] * yWeight; w != 0 { + pu := src.RGBA64At(kx, ky) + if srcMask != nil { + _, _, _, ma := srcMask.At(smp.X+kx, smp.Y+ky).RGBA() + pu.R = uint16(uint32(pu.R) * ma / 0xffff) + pu.G = uint16(uint32(pu.G) * ma / 0xffff) + pu.B = uint16(uint32(pu.B) * ma / 0xffff) + pu.A = uint16(uint32(pu.A) * ma / 0xffff) + } + pr += float64(float64(pu.R) * w) + pg += float64(float64(pu.G) * w) + pb += float64(float64(pu.B) * w) + pa += float64(float64(pu.A) * w) + } + } + } + } + + if pr > pa { + pr = pa + } + if pg > pa { + pg = pa + } + if pb > pa { + pb = pa + } + + if dstMask != nil { + q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy)) + _, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA() + pr := uint32(fffftou(pr)) * ma / 0xffff + pg := uint32(fffftou(pg)) * ma / 0xffff + pb := uint32(fffftou(pb)) * ma / 0xffff + pa := uint32(fffftou(pa)) * ma / 0xffff + pa1 := 0xffff - ma + dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr) + dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg) + dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb) + dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa) + dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } else { + dstColorRGBA64.R = fffftou(pr) + dstColorRGBA64.G = fffftou(pg) + dstColorRGBA64.B = fffftou(pb) + dstColorRGBA64.A = fffftou(pa) + dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64) + } + } + } +} + func (q *Kernel) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) { // When shrinking, broaden the effective kernel support so that we still // visit every source pixel. @@ -6422,8 +8178,8 @@ func (q *Kernel) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -6491,10 +8247,10 @@ func (q *Kernel) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, pbu = pbu * ma / 0xffff pau = pau * ma / 0xffff } - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) + pa += float64(float64(pau) * w) } } } @@ -6557,8 +8313,8 @@ func (q *Kernel) transform_Image_Image_Src(dst Image, dr, adr image.Rectangle, d dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2] - sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5] + sx := float64(d2s[0]*dxf) + float64(d2s[1]*dyf) + d2s[2] + sy := float64(d2s[3]*dxf) + float64(d2s[4]*dyf) + d2s[5] if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) { continue } @@ -6626,10 +8382,10 @@ func (q *Kernel) transform_Image_Image_Src(dst Image, dr, adr image.Rectangle, d pbu = pbu * ma / 0xffff pau = pau * ma / 0xffff } - pr += float64(pru) * w - pg += float64(pgu) * w - pb += float64(pbu) * w - pa += float64(pau) * w + pr += float64(float64(pru) * w) + pg += float64(float64(pgu) * w) + pb += float64(float64(pbu) * w) + pa += float64(float64(pau) * w) } } } diff --git a/vendor/golang.org/x/image/draw/scale.go b/vendor/golang.org/x/image/draw/scale.go index 00121a12..aef200b2 100644 --- a/vendor/golang.org/x/image/draw/scale.go +++ b/vendor/golang.org/x/image/draw/scale.go @@ -46,8 +46,8 @@ type Scaler interface { // // For example, if m is the matrix // -// m00 m01 m02 -// m10 m11 m12 +// m00 m01 m02 +// m10 m11 m12 // // then the src-space point (sx, sy) maps to the dst-space point // (m00*sx + m01*sy + m02, m10*sx + m11*sy + m12). @@ -98,9 +98,9 @@ type Options struct { // have a 1:1 correspondence. // // Of the interpolators provided by this package: -// - NearestNeighbor is fast but usually looks worst. -// - CatmullRom is slow but usually looks best. -// - ApproxBiLinear has reasonable speed and quality. +// - NearestNeighbor is fast but usually looks worst. +// - CatmullRom is slow but usually looks best. +// - ApproxBiLinear has reasonable speed and quality. // // The time taken depends on the size of dr. For kernel interpolators, the // speed also depends on the size of sr, and so are often slower than @@ -182,9 +182,9 @@ var ( // Computer Graphics", Computer Graphics, Vol. 22, No. 4, pp. 221-228. CatmullRom = &Kernel{2, func(t float64) float64 { if t < 1 { - return (1.5*t-2.5)*t*t + 1 + return float64((float64(1.5*t)-2.5)*t*t) + 1 } - return ((-0.5*t+2.5)*t-4)*t + 2 + return float64((float64(float64(float64(-0.5*t)+2.5)*t)-4)*t) + 2 }} // TODO: a Kaiser-Bessel kernel? @@ -247,7 +247,7 @@ func newDistrib(q *Kernel, dw, sw int32) distrib { // source column or row. n, sources := int32(0), make([]source, dw) for x := range sources { - center := (float64(x)+0.5)*scale - 0.5 + center := float64((float64(x)+0.5)*scale) - 0.5 i := int32(math.Floor(center - halfWidth)) if i < 0 { i = 0 @@ -302,7 +302,7 @@ func abs(f float64) float64 { // ftou converts the range [0.0, 1.0] to [0, 0xffff]. func ftou(f float64) uint16 { - i := int32(0xffff*f + 0.5) + i := int32(float64(0xffff*f) + 0.5) if i > 0xffff { return 0xffff } @@ -332,12 +332,12 @@ func fffftou(f float64) uint16 { func invert(m *f64.Aff3) f64.Aff3 { m00 := +m[3*1+1] m01 := -m[3*0+1] - m02 := +m[3*1+2]*m[3*0+1] - m[3*1+1]*m[3*0+2] + m02 := +float64(m[3*1+2]*m[3*0+1]) - float64(m[3*1+1]*m[3*0+2]) m10 := -m[3*1+0] m11 := +m[3*0+0] - m12 := +m[3*1+0]*m[3*0+2] - m[3*1+2]*m[3*0+0] + m12 := +float64(m[3*1+0]*m[3*0+2]) - float64(m[3*1+2]*m[3*0+0]) - det := m00*m11 - m10*m01 + det := float64(m00*m11) - float64(m10*m01) return f64.Aff3{ m00 / det, @@ -351,12 +351,12 @@ func invert(m *f64.Aff3) f64.Aff3 { func matMul(p, q *f64.Aff3) f64.Aff3 { return f64.Aff3{ - p[3*0+0]*q[3*0+0] + p[3*0+1]*q[3*1+0], - p[3*0+0]*q[3*0+1] + p[3*0+1]*q[3*1+1], - p[3*0+0]*q[3*0+2] + p[3*0+1]*q[3*1+2] + p[3*0+2], - p[3*1+0]*q[3*0+0] + p[3*1+1]*q[3*1+0], - p[3*1+0]*q[3*0+1] + p[3*1+1]*q[3*1+1], - p[3*1+0]*q[3*0+2] + p[3*1+1]*q[3*1+2] + p[3*1+2], + float64(p[3*0+0]*q[3*0+0]) + float64(p[3*0+1]*q[3*1+0]), + float64(p[3*0+0]*q[3*0+1]) + float64(p[3*0+1]*q[3*1+1]), + float64(p[3*0+0]*q[3*0+2]) + float64(p[3*0+1]*q[3*1+2]) + p[3*0+2], + float64(p[3*1+0]*q[3*0+0]) + float64(p[3*1+1]*q[3*1+0]), + float64(p[3*1+0]*q[3*0+1]) + float64(p[3*1+1]*q[3*1+1]), + float64(p[3*1+0]*q[3*0+2]) + float64(p[3*1+1]*q[3*1+2]) + p[3*1+2], } } @@ -371,8 +371,8 @@ func transformRect(s2d *f64.Aff3, sr *image.Rectangle) (dr image.Rectangle) { for i, p := range ps { sxf := float64(p.X) syf := float64(p.Y) - dx := int(math.Floor(s2d[0]*sxf + s2d[1]*syf + s2d[2])) - dy := int(math.Floor(s2d[3]*sxf + s2d[4]*syf + s2d[5])) + dx := int(math.Floor(float64(s2d[0]*sxf) + float64(s2d[1]*syf) + s2d[2])) + dy := int(math.Floor(float64(s2d[3]*sxf) + float64(s2d[4]*syf) + s2d[5])) // The +1 adjustments below are because an image.Rectangle is inclusive // on the low end but exclusive on the high end. @@ -428,8 +428,8 @@ func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *i d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -450,8 +450,8 @@ func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *i dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -479,8 +479,8 @@ func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *i d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy)) for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } @@ -505,8 +505,8 @@ func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *i dyf := float64(dr.Min.Y+int(dy)) + 0.5 for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { dxf := float64(dr.Min.X+int(dx)) + 0.5 - sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X - sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y + sx0 := int(float64(d2s[0]*dxf)+float64(d2s[1]*dyf)+d2s[2]) + bias.X + sy0 := int(float64(d2s[3]*dxf)+float64(d2s[4]*dyf)+d2s[5]) + bias.Y if !(image.Point{sx0, sy0}).In(sr) { continue } diff --git a/vendor/golang.org/x/image/font/basicfont/basicfont.go b/vendor/golang.org/x/image/font/basicfont/basicfont.go index 15503818..173c0610 100644 --- a/vendor/golang.org/x/image/font/basicfont/basicfont.go +++ b/vendor/golang.org/x/image/font/basicfont/basicfont.go @@ -89,41 +89,50 @@ func (f *Face) Metrics() font.Metrics { func (f *Face) Glyph(dot fixed.Point26_6, r rune) ( dr image.Rectangle, mask image.Image, maskp image.Point, advance fixed.Int26_6, ok bool) { -loop: - for _, rr := range [2]rune{r, '\ufffd'} { - for _, rng := range f.Ranges { - if rr < rng.Low || rng.High <= rr { - continue - } - maskp.Y = (int(rr-rng.Low) + rng.Offset) * (f.Ascent + f.Descent) - ok = true - break loop + if found, rng := f.find(r); rng != nil { + maskp.Y = (int(found-rng.Low) + rng.Offset) * (f.Ascent + f.Descent) + x := int(dot.X+32)>>6 + f.Left + y := int(dot.Y+32) >> 6 + dr = image.Rectangle{ + Min: image.Point{ + X: x, + Y: y - f.Ascent, + }, + Max: image.Point{ + X: x + f.Width, + Y: y + f.Descent, + }, } - } - if !ok { - return image.Rectangle{}, nil, image.Point{}, 0, false - } - x := int(dot.X+32)>>6 + f.Left - y := int(dot.Y+32) >> 6 - dr = image.Rectangle{ - Min: image.Point{ - X: x, - Y: y - f.Ascent, - }, - Max: image.Point{ - X: x + f.Width, - Y: y + f.Descent, - }, + return dr, f.Mask, maskp, fixed.I(f.Advance), r == found } - - return dr, f.Mask, maskp, fixed.I(f.Advance), true + return image.Rectangle{}, nil, image.Point{}, 0, false } func (f *Face) GlyphBounds(r rune) (bounds fixed.Rectangle26_6, advance fixed.Int26_6, ok bool) { - return fixed.R(0, -f.Ascent, f.Width, +f.Descent), fixed.I(f.Advance), true + if found, rng := f.find(r); rng != nil { + return fixed.R(0, -f.Ascent, f.Width, +f.Descent), fixed.I(f.Advance), r == found + } + return fixed.Rectangle26_6{}, 0, false } func (f *Face) GlyphAdvance(r rune) (advance fixed.Int26_6, ok bool) { - return fixed.I(f.Advance), true + if found, rng := f.find(r); rng != nil { + return fixed.I(f.Advance), r == found + } + return 0, false +} + +func (f *Face) find(r rune) (rune, *Range) { + for { + for i, rng := range f.Ranges { + if (rng.Low <= r) && (r < rng.High) { + return r, &f.Ranges[i] + } + } + if r == '\ufffd' { + return 0, nil + } + r = '\ufffd' + } } diff --git a/vendor/golang.org/x/image/font/font.go b/vendor/golang.org/x/image/font/font.go index d1a75350..6b9b9bc8 100644 --- a/vendor/golang.org/x/image/font/font.go +++ b/vendor/golang.org/x/image/font/font.go @@ -38,7 +38,10 @@ type Face interface { // glyph at the sub-pixel destination location dot, and that glyph's // advance width. // - // It returns !ok if the face does not contain a glyph for r. + // It returns !ok if the face does not contain a glyph for r. This includes + // returning !ok for a fallback glyph (such as substituting a U+FFFD glyph + // or OpenType's .notdef glyph), in which case the other return values may + // still be non-zero. // // The contents of the mask image returned by one Glyph call may change // after the next Glyph call. Callers that want to cache the mask must make @@ -49,7 +52,10 @@ type Face interface { // GlyphBounds returns the bounding box of r's glyph, drawn at a dot equal // to the origin, and that glyph's advance width. // - // It returns !ok if the face does not contain a glyph for r. + // It returns !ok if the face does not contain a glyph for r. This includes + // returning !ok for a fallback glyph (such as substituting a U+FFFD glyph + // or OpenType's .notdef glyph), in which case the other return values may + // still be non-zero. // // The glyph's ascent and descent are equal to -bounds.Min.Y and // +bounds.Max.Y. The glyph's left-side and right-side bearings are equal @@ -60,7 +66,10 @@ type Face interface { // GlyphAdvance returns the advance width of r's glyph. // - // It returns !ok if the face does not contain a glyph for r. + // It returns !ok if the face does not contain a glyph for r. This includes + // returning !ok for a fallback glyph (such as substituting a U+FFFD glyph + // or OpenType's .notdef glyph), in which case the other return values may + // still be non-zero. GlyphAdvance(r rune) (advance fixed.Int26_6, ok bool) // Kern returns the horizontal adjustment for the kerning pair (r0, r1). A @@ -150,14 +159,10 @@ func (d *Drawer) DrawBytes(s []byte) { if prevC >= 0 { d.Dot.X += d.Face.Kern(prevC, c) } - dr, mask, maskp, advance, ok := d.Face.Glyph(d.Dot, c) - if !ok { - // TODO: is falling back on the U+FFFD glyph the responsibility of - // the Drawer or the Face? - // TODO: set prevC = '\ufffd'? - continue + dr, mask, maskp, advance, _ := d.Face.Glyph(d.Dot, c) + if !dr.Empty() { + draw.DrawMask(d.Dst, dr, d.Src, image.Point{}, mask, maskp, draw.Over) } - draw.DrawMask(d.Dst, dr, d.Src, image.Point{}, mask, maskp, draw.Over) d.Dot.X += advance prevC = c } @@ -170,14 +175,10 @@ func (d *Drawer) DrawString(s string) { if prevC >= 0 { d.Dot.X += d.Face.Kern(prevC, c) } - dr, mask, maskp, advance, ok := d.Face.Glyph(d.Dot, c) - if !ok { - // TODO: is falling back on the U+FFFD glyph the responsibility of - // the Drawer or the Face? - // TODO: set prevC = '\ufffd'? - continue + dr, mask, maskp, advance, _ := d.Face.Glyph(d.Dot, c) + if !dr.Empty() { + draw.DrawMask(d.Dst, dr, d.Src, image.Point{}, mask, maskp, draw.Over) } - draw.DrawMask(d.Dst, dr, d.Src, image.Point{}, mask, maskp, draw.Over) d.Dot.X += advance prevC = c } @@ -227,16 +228,12 @@ func BoundBytes(f Face, s []byte) (bounds fixed.Rectangle26_6, advance fixed.Int if prevC >= 0 { advance += f.Kern(prevC, c) } - b, a, ok := f.GlyphBounds(c) - if !ok { - // TODO: is falling back on the U+FFFD glyph the responsibility of - // the Drawer or the Face? - // TODO: set prevC = '\ufffd'? - continue + b, a, _ := f.GlyphBounds(c) + if !b.Empty() { + b.Min.X += advance + b.Max.X += advance + bounds = bounds.Union(b) } - b.Min.X += advance - b.Max.X += advance - bounds = bounds.Union(b) advance += a prevC = c } @@ -251,16 +248,12 @@ func BoundString(f Face, s string) (bounds fixed.Rectangle26_6, advance fixed.In if prevC >= 0 { advance += f.Kern(prevC, c) } - b, a, ok := f.GlyphBounds(c) - if !ok { - // TODO: is falling back on the U+FFFD glyph the responsibility of - // the Drawer or the Face? - // TODO: set prevC = '\ufffd'? - continue + b, a, _ := f.GlyphBounds(c) + if !b.Empty() { + b.Min.X += advance + b.Max.X += advance + bounds = bounds.Union(b) } - b.Min.X += advance - b.Max.X += advance - bounds = bounds.Union(b) advance += a prevC = c } @@ -278,13 +271,7 @@ func MeasureBytes(f Face, s []byte) (advance fixed.Int26_6) { if prevC >= 0 { advance += f.Kern(prevC, c) } - a, ok := f.GlyphAdvance(c) - if !ok { - // TODO: is falling back on the U+FFFD glyph the responsibility of - // the Drawer or the Face? - // TODO: set prevC = '\ufffd'? - continue - } + a, _ := f.GlyphAdvance(c) advance += a prevC = c } @@ -298,13 +285,7 @@ func MeasureString(f Face, s string) (advance fixed.Int26_6) { if prevC >= 0 { advance += f.Kern(prevC, c) } - a, ok := f.GlyphAdvance(c) - if !ok { - // TODO: is falling back on the U+FFFD glyph the responsibility of - // the Drawer or the Face? - // TODO: set prevC = '\ufffd'? - continue - } + a, _ := f.GlyphAdvance(c) advance += a prevC = c } diff --git a/vendor/golang.org/x/mod/LICENSE b/vendor/golang.org/x/mod/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/mod/LICENSE +++ b/vendor/golang.org/x/mod/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/mod/modfile/read.go b/vendor/golang.org/x/mod/modfile/read.go index 5b5bb5e1..de1b9821 100644 --- a/vendor/golang.org/x/mod/modfile/read.go +++ b/vendor/golang.org/x/mod/modfile/read.go @@ -225,9 +225,10 @@ func (x *FileSyntax) Cleanup() { if ww == 0 { continue } - if ww == 1 { - // Collapse block into single line. - line := &Line{ + if ww == 1 && len(stmt.RParen.Comments.Before) == 0 { + // Collapse block into single line but keep the Line reference used by the + // parsed File structure. + *stmt.Line[0] = Line{ Comments: Comments{ Before: commentsAdd(stmt.Before, stmt.Line[0].Before), Suffix: commentsAdd(stmt.Line[0].Suffix, stmt.Suffix), @@ -235,7 +236,7 @@ func (x *FileSyntax) Cleanup() { }, Token: stringsAdd(stmt.Token, stmt.Line[0].Token), } - x.Stmt[w] = line + x.Stmt[w] = stmt.Line[0] w++ continue } diff --git a/vendor/golang.org/x/mod/modfile/rule.go b/vendor/golang.org/x/mod/modfile/rule.go index 930b6c59..3e4a1d0a 100644 --- a/vendor/golang.org/x/mod/modfile/rule.go +++ b/vendor/golang.org/x/mod/modfile/rule.go @@ -38,10 +38,12 @@ type File struct { Module *Module Go *Go Toolchain *Toolchain + Godebug []*Godebug Require []*Require Exclude []*Exclude Replace []*Replace Retract []*Retract + Tool []*Tool Syntax *FileSyntax } @@ -65,6 +67,13 @@ type Toolchain struct { Syntax *Line } +// A Godebug is a single godebug key=value statement. +type Godebug struct { + Key string + Value string + Syntax *Line +} + // An Exclude is a single exclude statement. type Exclude struct { Mod module.Version @@ -85,6 +94,12 @@ type Retract struct { Syntax *Line } +// A Tool is a single tool statement. +type Tool struct { + Path string + Syntax *Line +} + // A VersionInterval represents a range of versions with upper and lower bounds. // Intervals are closed: both bounds are included. When Low is equal to High, // the interval may refer to a single version ('v1.2.3') or an interval @@ -289,7 +304,7 @@ func parseToFile(file string, data []byte, fix VersionFixer, strict bool) (parse }) } continue - case "module", "require", "exclude", "replace", "retract": + case "module", "godebug", "require", "exclude", "replace", "retract", "tool": for _, l := range x.Line { f.add(&errs, x, l, x.Token[0], l.Token, fix, strict) } @@ -308,6 +323,9 @@ var laxGoVersionRE = lazyregexp.New(`^v?(([1-9][0-9]*)\.(0|[1-9][0-9]*))([^0-9]. // Toolchains must be named beginning with `go1`, // like "go1.20.3" or "go1.20.3-gccgo". As a special case, "default" is also permitted. +// Note that this regexp is a much looser condition than go/version.IsValid, +// for forward compatibility. +// (This code has to be work to identify new toolchains even if we tweak the syntax in the future.) var ToolchainRE = lazyregexp.New(`^default$|^go1($|\.)`) func (f *File) add(errs *ErrorList, block *LineBlock, line *Line, verb string, args []string, fix VersionFixer, strict bool) { @@ -367,7 +385,7 @@ func (f *File) add(errs *ErrorList, block *LineBlock, line *Line, verb string, a } } if !fixed { - errorf("invalid go version '%s': must match format 1.23", args[0]) + errorf("invalid go version '%s': must match format 1.23.0", args[0]) return } } @@ -383,8 +401,8 @@ func (f *File) add(errs *ErrorList, block *LineBlock, line *Line, verb string, a if len(args) != 1 { errorf("toolchain directive expects exactly one argument") return - } else if strict && !ToolchainRE.MatchString(args[0]) { - errorf("invalid toolchain version '%s': must match format go1.23 or local", args[0]) + } else if !ToolchainRE.MatchString(args[0]) { + errorf("invalid toolchain version '%s': must match format go1.23.0 or default", args[0]) return } f.Toolchain = &Toolchain{Syntax: line} @@ -411,6 +429,22 @@ func (f *File) add(errs *ErrorList, block *LineBlock, line *Line, verb string, a } f.Module.Mod = module.Version{Path: s} + case "godebug": + if len(args) != 1 || strings.ContainsAny(args[0], "\"`',") { + errorf("usage: godebug key=value") + return + } + key, value, ok := strings.Cut(args[0], "=") + if !ok { + errorf("usage: godebug key=value") + return + } + f.Godebug = append(f.Godebug, &Godebug{ + Key: key, + Value: value, + Syntax: line, + }) + case "require", "exclude": if len(args) != 2 { errorf("usage: %s module/path v1.2.3", verb) @@ -482,6 +516,21 @@ func (f *File) add(errs *ErrorList, block *LineBlock, line *Line, verb string, a Syntax: line, } f.Retract = append(f.Retract, retract) + + case "tool": + if len(args) != 1 { + errorf("tool directive expects exactly one argument") + return + } + s, err := parseString(&args[0]) + if err != nil { + errorf("invalid quoted string: %v", err) + return + } + f.Tool = append(f.Tool, &Tool{ + Path: s, + Syntax: line, + }) } } @@ -542,7 +591,7 @@ func parseReplace(filename string, line *Line, verb string, args []string, fix V if strings.Contains(ns, "@") { return nil, errorf("replacement module must match format 'path version', not 'path@version'") } - return nil, errorf("replacement module without version must be directory path (rooted or starting with ./ or ../)") + return nil, errorf("replacement module without version must be directory path (rooted or starting with . or ..)") } if filepath.Separator == '/' && strings.Contains(ns, `\`) { return nil, errorf("replacement directory appears to be Windows path (on a non-windows system)") @@ -555,7 +604,6 @@ func parseReplace(filename string, line *Line, verb string, args []string, fix V } if IsDirectoryPath(ns) { return nil, errorf("replacement module directory path %q cannot have version", ns) - } } return &Replace{ @@ -631,7 +679,7 @@ func (f *WorkFile) add(errs *ErrorList, line *Line, verb string, args []string, errorf("go directive expects exactly one argument") return } else if !GoVersionRE.MatchString(args[0]) { - errorf("invalid go version '%s': must match format 1.23", args[0]) + errorf("invalid go version '%s': must match format 1.23.0", args[0]) return } @@ -647,13 +695,29 @@ func (f *WorkFile) add(errs *ErrorList, line *Line, verb string, args []string, errorf("toolchain directive expects exactly one argument") return } else if !ToolchainRE.MatchString(args[0]) { - errorf("invalid toolchain version '%s': must match format go1.23 or local", args[0]) + errorf("invalid toolchain version '%s': must match format go1.23.0 or default", args[0]) return } f.Toolchain = &Toolchain{Syntax: line} f.Toolchain.Name = args[0] + case "godebug": + if len(args) != 1 || strings.ContainsAny(args[0], "\"`',") { + errorf("usage: godebug key=value") + return + } + key, value, ok := strings.Cut(args[0], "=") + if !ok { + errorf("usage: godebug key=value") + return + } + f.Godebug = append(f.Godebug, &Godebug{ + Key: key, + Value: value, + Syntax: line, + }) + case "use": if len(args) != 1 { errorf("usage: %s local/dir", verb) @@ -679,14 +743,15 @@ func (f *WorkFile) add(errs *ErrorList, line *Line, verb string, args []string, } } -// IsDirectoryPath reports whether the given path should be interpreted -// as a directory path. Just like on the go command line, relative paths +// IsDirectoryPath reports whether the given path should be interpreted as a directory path. +// Just like on the go command line, relative paths starting with a '.' or '..' path component // and rooted paths are directory paths; the rest are module paths. func IsDirectoryPath(ns string) bool { // Because go.mod files can move from one system to another, // we check all known path syntaxes, both Unix and Windows. - return strings.HasPrefix(ns, "./") || strings.HasPrefix(ns, "../") || strings.HasPrefix(ns, "/") || - strings.HasPrefix(ns, `.\`) || strings.HasPrefix(ns, `..\`) || strings.HasPrefix(ns, `\`) || + return ns == "." || strings.HasPrefix(ns, "./") || strings.HasPrefix(ns, `.\`) || + ns == ".." || strings.HasPrefix(ns, "../") || strings.HasPrefix(ns, `..\`) || + strings.HasPrefix(ns, "/") || strings.HasPrefix(ns, `\`) || len(ns) >= 2 && ('A' <= ns[0] && ns[0] <= 'Z' || 'a' <= ns[0] && ns[0] <= 'z') && ns[1] == ':' } @@ -928,6 +993,15 @@ func (f *File) Format() ([]byte, error) { // Cleanup cleans out all the cleared entries. func (f *File) Cleanup() { w := 0 + for _, g := range f.Godebug { + if g.Key != "" { + f.Godebug[w] = g + w++ + } + } + f.Godebug = f.Godebug[:w] + + w = 0 for _, r := range f.Require { if r.Mod.Path != "" { f.Require[w] = r @@ -974,6 +1048,8 @@ func (f *File) AddGoStmt(version string) error { var hint Expr if f.Module != nil && f.Module.Syntax != nil { hint = f.Module.Syntax + } else if f.Syntax == nil { + f.Syntax = new(FileSyntax) } f.Go = &Go{ Version: version, @@ -1024,6 +1100,45 @@ func (f *File) AddToolchainStmt(name string) error { return nil } +// AddGodebug sets the first godebug line for key to value, +// preserving any existing comments for that line and removing all +// other godebug lines for key. +// +// If no line currently exists for key, AddGodebug adds a new line +// at the end of the last godebug block. +func (f *File) AddGodebug(key, value string) error { + need := true + for _, g := range f.Godebug { + if g.Key == key { + if need { + g.Value = value + f.Syntax.updateLine(g.Syntax, "godebug", key+"="+value) + need = false + } else { + g.Syntax.markRemoved() + *g = Godebug{} + } + } + } + + if need { + f.addNewGodebug(key, value) + } + return nil +} + +// addNewGodebug adds a new godebug key=value line at the end +// of the last godebug block, regardless of any existing godebug lines for key. +func (f *File) addNewGodebug(key, value string) { + line := f.Syntax.addLine(nil, "godebug", key+"="+value) + g := &Godebug{ + Key: key, + Value: value, + Syntax: line, + } + f.Godebug = append(f.Godebug, g) +} + // AddRequire sets the first require line for path to version vers, // preserving any existing comments for that line and removing all // other lines for path. @@ -1331,6 +1446,16 @@ func (f *File) SetRequireSeparateIndirect(req []*Require) { f.SortBlocks() } +func (f *File) DropGodebug(key string) error { + for _, g := range f.Godebug { + if g.Key == key { + g.Syntax.markRemoved() + *g = Godebug{} + } + } + return nil +} + func (f *File) DropRequire(path string) error { for _, r := range f.Require { if r.Mod.Path == path { @@ -1464,6 +1589,36 @@ func (f *File) DropRetract(vi VersionInterval) error { return nil } +// AddTool adds a new tool directive with the given path. +// It does nothing if the tool line already exists. +func (f *File) AddTool(path string) error { + for _, t := range f.Tool { + if t.Path == path { + return nil + } + } + + f.Tool = append(f.Tool, &Tool{ + Path: path, + Syntax: f.Syntax.addLine(nil, "tool", path), + }) + + f.SortBlocks() + return nil +} + +// RemoveTool removes a tool directive with the given path. +// It does nothing if no such tool directive exists. +func (f *File) DropTool(path string) error { + for _, t := range f.Tool { + if t.Path == path { + t.Syntax.markRemoved() + *t = Tool{} + } + } + return nil +} + func (f *File) SortBlocks() { f.removeDups() // otherwise sorting is unsafe @@ -1490,9 +1645,9 @@ func (f *File) SortBlocks() { } } -// removeDups removes duplicate exclude and replace directives. +// removeDups removes duplicate exclude, replace and tool directives. // -// Earlier exclude directives take priority. +// Earlier exclude and tool directives take priority. // // Later replace directives take priority. // @@ -1502,10 +1657,10 @@ func (f *File) SortBlocks() { // retract directives are not de-duplicated since comments are // meaningful, and versions may be retracted multiple times. func (f *File) removeDups() { - removeDups(f.Syntax, &f.Exclude, &f.Replace) + removeDups(f.Syntax, &f.Exclude, &f.Replace, &f.Tool) } -func removeDups(syntax *FileSyntax, exclude *[]*Exclude, replace *[]*Replace) { +func removeDups(syntax *FileSyntax, exclude *[]*Exclude, replace *[]*Replace, tool *[]*Tool) { kill := make(map[*Line]bool) // Remove duplicate excludes. @@ -1546,6 +1701,24 @@ func removeDups(syntax *FileSyntax, exclude *[]*Exclude, replace *[]*Replace) { } *replace = repl + if tool != nil { + haveTool := make(map[string]bool) + for _, t := range *tool { + if haveTool[t.Path] { + kill[t.Syntax] = true + continue + } + haveTool[t.Path] = true + } + var newTool []*Tool + for _, t := range *tool { + if !kill[t.Syntax] { + newTool = append(newTool, t) + } + } + *tool = newTool + } + // Duplicate require and retract directives are not removed. // Drop killed statements from the syntax tree. diff --git a/vendor/golang.org/x/mod/modfile/work.go b/vendor/golang.org/x/mod/modfile/work.go index d7b99376..5387d0c2 100644 --- a/vendor/golang.org/x/mod/modfile/work.go +++ b/vendor/golang.org/x/mod/modfile/work.go @@ -14,6 +14,7 @@ import ( type WorkFile struct { Go *Go Toolchain *Toolchain + Godebug []*Godebug Use []*Use Replace []*Replace @@ -68,7 +69,7 @@ func ParseWork(file string, data []byte, fix VersionFixer) (*WorkFile, error) { Err: fmt.Errorf("unknown block type: %s", strings.Join(x.Token, " ")), }) continue - case "use", "replace": + case "godebug", "use", "replace": for _, l := range x.Line { f.add(&errs, l, x.Token[0], l.Token, fix) } @@ -184,6 +185,55 @@ func (f *WorkFile) DropToolchainStmt() { } } +// AddGodebug sets the first godebug line for key to value, +// preserving any existing comments for that line and removing all +// other godebug lines for key. +// +// If no line currently exists for key, AddGodebug adds a new line +// at the end of the last godebug block. +func (f *WorkFile) AddGodebug(key, value string) error { + need := true + for _, g := range f.Godebug { + if g.Key == key { + if need { + g.Value = value + f.Syntax.updateLine(g.Syntax, "godebug", key+"="+value) + need = false + } else { + g.Syntax.markRemoved() + *g = Godebug{} + } + } + } + + if need { + f.addNewGodebug(key, value) + } + return nil +} + +// addNewGodebug adds a new godebug key=value line at the end +// of the last godebug block, regardless of any existing godebug lines for key. +func (f *WorkFile) addNewGodebug(key, value string) { + line := f.Syntax.addLine(nil, "godebug", key+"="+value) + g := &Godebug{ + Key: key, + Value: value, + Syntax: line, + } + f.Godebug = append(f.Godebug, g) +} + +func (f *WorkFile) DropGodebug(key string) error { + for _, g := range f.Godebug { + if g.Key == key { + g.Syntax.markRemoved() + *g = Godebug{} + } + } + return nil +} + func (f *WorkFile) AddUse(diskPath, modulePath string) error { need := true for _, d := range f.Use { @@ -281,5 +331,5 @@ func (f *WorkFile) SortBlocks() { // retract directives are not de-duplicated since comments are // meaningful, and versions may be retracted multiple times. func (f *WorkFile) removeDups() { - removeDups(f.Syntax, nil, &f.Replace) + removeDups(f.Syntax, nil, &f.Replace, nil) } diff --git a/vendor/golang.org/x/net/LICENSE b/vendor/golang.org/x/net/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/net/LICENSE +++ b/vendor/golang.org/x/net/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/net/html/doc.go b/vendor/golang.org/x/net/html/doc.go index 2466ae3d..3a7e5ab1 100644 --- a/vendor/golang.org/x/net/html/doc.go +++ b/vendor/golang.org/x/net/html/doc.go @@ -104,7 +104,7 @@ tokenization, and tokenization and tree construction stages of the WHATWG HTML parsing specification respectively. While the tokenizer parses and normalizes individual HTML tokens, only the parser constructs the DOM tree from the tokenized HTML, as described in the tree construction stage of the -specification, dynamically modifying or extending the docuemnt's DOM tree. +specification, dynamically modifying or extending the document's DOM tree. If your use case requires semantically well-formed HTML documents, as defined by the WHATWG specification, the parser should be used rather than the tokenizer. diff --git a/vendor/golang.org/x/net/http/httpguts/httplex.go b/vendor/golang.org/x/net/http/httpguts/httplex.go index 6e071e85..9b4de940 100644 --- a/vendor/golang.org/x/net/http/httpguts/httplex.go +++ b/vendor/golang.org/x/net/http/httpguts/httplex.go @@ -12,7 +12,7 @@ import ( "golang.org/x/net/idna" ) -var isTokenTable = [127]bool{ +var isTokenTable = [256]bool{ '!': true, '#': true, '$': true, @@ -93,12 +93,7 @@ var isTokenTable = [127]bool{ } func IsTokenRune(r rune) bool { - i := int(r) - return i < len(isTokenTable) && isTokenTable[i] -} - -func isNotToken(r rune) bool { - return !IsTokenRune(r) + return r < utf8.RuneSelf && isTokenTable[byte(r)] } // HeaderValuesContainsToken reports whether any string in values @@ -202,8 +197,8 @@ func ValidHeaderFieldName(v string) bool { if len(v) == 0 { return false } - for _, r := range v { - if !IsTokenRune(r) { + for i := 0; i < len(v); i++ { + if !isTokenTable[v[i]] { return false } } diff --git a/vendor/golang.org/x/net/http2/config.go b/vendor/golang.org/x/net/http2/config.go new file mode 100644 index 00000000..de58dfb8 --- /dev/null +++ b/vendor/golang.org/x/net/http2/config.go @@ -0,0 +1,122 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http2 + +import ( + "math" + "net/http" + "time" +) + +// http2Config is a package-internal version of net/http.HTTP2Config. +// +// http.HTTP2Config was added in Go 1.24. +// When running with a version of net/http that includes HTTP2Config, +// we merge the configuration with the fields in Transport or Server +// to produce an http2Config. +// +// Zero valued fields in http2Config are interpreted as in the +// net/http.HTTPConfig documentation. +// +// Precedence order for reconciling configurations is: +// +// - Use the net/http.{Server,Transport}.HTTP2Config value, when non-zero. +// - Otherwise use the http2.{Server.Transport} value. +// - If the resulting value is zero or out of range, use a default. +type http2Config struct { + MaxConcurrentStreams uint32 + MaxDecoderHeaderTableSize uint32 + MaxEncoderHeaderTableSize uint32 + MaxReadFrameSize uint32 + MaxUploadBufferPerConnection int32 + MaxUploadBufferPerStream int32 + SendPingTimeout time.Duration + PingTimeout time.Duration + WriteByteTimeout time.Duration + PermitProhibitedCipherSuites bool + CountError func(errType string) +} + +// configFromServer merges configuration settings from +// net/http.Server.HTTP2Config and http2.Server. +func configFromServer(h1 *http.Server, h2 *Server) http2Config { + conf := http2Config{ + MaxConcurrentStreams: h2.MaxConcurrentStreams, + MaxEncoderHeaderTableSize: h2.MaxEncoderHeaderTableSize, + MaxDecoderHeaderTableSize: h2.MaxDecoderHeaderTableSize, + MaxReadFrameSize: h2.MaxReadFrameSize, + MaxUploadBufferPerConnection: h2.MaxUploadBufferPerConnection, + MaxUploadBufferPerStream: h2.MaxUploadBufferPerStream, + SendPingTimeout: h2.ReadIdleTimeout, + PingTimeout: h2.PingTimeout, + WriteByteTimeout: h2.WriteByteTimeout, + PermitProhibitedCipherSuites: h2.PermitProhibitedCipherSuites, + CountError: h2.CountError, + } + fillNetHTTPServerConfig(&conf, h1) + setConfigDefaults(&conf, true) + return conf +} + +// configFromServer merges configuration settings from h2 and h2.t1.HTTP2 +// (the net/http Transport). +func configFromTransport(h2 *Transport) http2Config { + conf := http2Config{ + MaxEncoderHeaderTableSize: h2.MaxEncoderHeaderTableSize, + MaxDecoderHeaderTableSize: h2.MaxDecoderHeaderTableSize, + MaxReadFrameSize: h2.MaxReadFrameSize, + SendPingTimeout: h2.ReadIdleTimeout, + PingTimeout: h2.PingTimeout, + WriteByteTimeout: h2.WriteByteTimeout, + } + + // Unlike most config fields, where out-of-range values revert to the default, + // Transport.MaxReadFrameSize clips. + if conf.MaxReadFrameSize < minMaxFrameSize { + conf.MaxReadFrameSize = minMaxFrameSize + } else if conf.MaxReadFrameSize > maxFrameSize { + conf.MaxReadFrameSize = maxFrameSize + } + + if h2.t1 != nil { + fillNetHTTPTransportConfig(&conf, h2.t1) + } + setConfigDefaults(&conf, false) + return conf +} + +func setDefault[T ~int | ~int32 | ~uint32 | ~int64](v *T, minval, maxval, defval T) { + if *v < minval || *v > maxval { + *v = defval + } +} + +func setConfigDefaults(conf *http2Config, server bool) { + setDefault(&conf.MaxConcurrentStreams, 1, math.MaxUint32, defaultMaxStreams) + setDefault(&conf.MaxEncoderHeaderTableSize, 1, math.MaxUint32, initialHeaderTableSize) + setDefault(&conf.MaxDecoderHeaderTableSize, 1, math.MaxUint32, initialHeaderTableSize) + if server { + setDefault(&conf.MaxUploadBufferPerConnection, initialWindowSize, math.MaxInt32, 1<<20) + } else { + setDefault(&conf.MaxUploadBufferPerConnection, initialWindowSize, math.MaxInt32, transportDefaultConnFlow) + } + if server { + setDefault(&conf.MaxUploadBufferPerStream, 1, math.MaxInt32, 1<<20) + } else { + setDefault(&conf.MaxUploadBufferPerStream, 1, math.MaxInt32, transportDefaultStreamFlow) + } + setDefault(&conf.MaxReadFrameSize, minMaxFrameSize, maxFrameSize, defaultMaxReadFrameSize) + setDefault(&conf.PingTimeout, 1, math.MaxInt64, 15*time.Second) +} + +// adjustHTTP1MaxHeaderSize converts a limit in bytes on the size of an HTTP/1 header +// to an HTTP/2 MAX_HEADER_LIST_SIZE value. +func adjustHTTP1MaxHeaderSize(n int64) int64 { + // http2's count is in a slightly different unit and includes 32 bytes per pair. + // So, take the net/http.Server value and pad it up a bit, assuming 10 headers. + const perFieldOverhead = 32 // per http2 spec + const typicalHeaders = 10 // conservative + return n + typicalHeaders*perFieldOverhead +} diff --git a/vendor/golang.org/x/net/http2/config_go124.go b/vendor/golang.org/x/net/http2/config_go124.go new file mode 100644 index 00000000..e3784123 --- /dev/null +++ b/vendor/golang.org/x/net/http2/config_go124.go @@ -0,0 +1,61 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.24 + +package http2 + +import "net/http" + +// fillNetHTTPServerConfig sets fields in conf from srv.HTTP2. +func fillNetHTTPServerConfig(conf *http2Config, srv *http.Server) { + fillNetHTTPConfig(conf, srv.HTTP2) +} + +// fillNetHTTPServerConfig sets fields in conf from tr.HTTP2. +func fillNetHTTPTransportConfig(conf *http2Config, tr *http.Transport) { + fillNetHTTPConfig(conf, tr.HTTP2) +} + +func fillNetHTTPConfig(conf *http2Config, h2 *http.HTTP2Config) { + if h2 == nil { + return + } + if h2.MaxConcurrentStreams != 0 { + conf.MaxConcurrentStreams = uint32(h2.MaxConcurrentStreams) + } + if h2.MaxEncoderHeaderTableSize != 0 { + conf.MaxEncoderHeaderTableSize = uint32(h2.MaxEncoderHeaderTableSize) + } + if h2.MaxDecoderHeaderTableSize != 0 { + conf.MaxDecoderHeaderTableSize = uint32(h2.MaxDecoderHeaderTableSize) + } + if h2.MaxConcurrentStreams != 0 { + conf.MaxConcurrentStreams = uint32(h2.MaxConcurrentStreams) + } + if h2.MaxReadFrameSize != 0 { + conf.MaxReadFrameSize = uint32(h2.MaxReadFrameSize) + } + if h2.MaxReceiveBufferPerConnection != 0 { + conf.MaxUploadBufferPerConnection = int32(h2.MaxReceiveBufferPerConnection) + } + if h2.MaxReceiveBufferPerStream != 0 { + conf.MaxUploadBufferPerStream = int32(h2.MaxReceiveBufferPerStream) + } + if h2.SendPingTimeout != 0 { + conf.SendPingTimeout = h2.SendPingTimeout + } + if h2.PingTimeout != 0 { + conf.PingTimeout = h2.PingTimeout + } + if h2.WriteByteTimeout != 0 { + conf.WriteByteTimeout = h2.WriteByteTimeout + } + if h2.PermitProhibitedCipherSuites { + conf.PermitProhibitedCipherSuites = true + } + if h2.CountError != nil { + conf.CountError = h2.CountError + } +} diff --git a/vendor/golang.org/x/net/http2/config_pre_go124.go b/vendor/golang.org/x/net/http2/config_pre_go124.go new file mode 100644 index 00000000..060fd6c6 --- /dev/null +++ b/vendor/golang.org/x/net/http2/config_pre_go124.go @@ -0,0 +1,16 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.24 + +package http2 + +import "net/http" + +// Pre-Go 1.24 fallback. +// The Server.HTTP2 and Transport.HTTP2 config fields were added in Go 1.24. + +func fillNetHTTPServerConfig(conf *http2Config, srv *http.Server) {} + +func fillNetHTTPTransportConfig(conf *http2Config, tr *http.Transport) {} diff --git a/vendor/golang.org/x/net/http2/frame.go b/vendor/golang.org/x/net/http2/frame.go index e2b298d8..105c3b27 100644 --- a/vendor/golang.org/x/net/http2/frame.go +++ b/vendor/golang.org/x/net/http2/frame.go @@ -490,6 +490,9 @@ func terminalReadFrameError(err error) bool { // returned error is ErrFrameTooLarge. Other errors may be of type // ConnectionError, StreamError, or anything else from the underlying // reader. +// +// If ReadFrame returns an error and a non-nil Frame, the Frame's StreamID +// indicates the stream responsible for the error. func (fr *Framer) ReadFrame() (Frame, error) { fr.errDetail = nil if fr.lastFrame != nil { @@ -1521,7 +1524,7 @@ func (fr *Framer) maxHeaderStringLen() int { // readMetaFrame returns 0 or more CONTINUATION frames from fr and // merge them into the provided hf and returns a MetaHeadersFrame // with the decoded hpack values. -func (fr *Framer) readMetaFrame(hf *HeadersFrame) (*MetaHeadersFrame, error) { +func (fr *Framer) readMetaFrame(hf *HeadersFrame) (Frame, error) { if fr.AllowIllegalReads { return nil, errors.New("illegal use of AllowIllegalReads with ReadMetaHeaders") } @@ -1564,6 +1567,7 @@ func (fr *Framer) readMetaFrame(hf *HeadersFrame) (*MetaHeadersFrame, error) { if size > remainSize { hdec.SetEmitEnabled(false) mh.Truncated = true + remainSize = 0 return } remainSize -= size @@ -1576,8 +1580,38 @@ func (fr *Framer) readMetaFrame(hf *HeadersFrame) (*MetaHeadersFrame, error) { var hc headersOrContinuation = hf for { frag := hc.HeaderBlockFragment() + + // Avoid parsing large amounts of headers that we will then discard. + // If the sender exceeds the max header list size by too much, + // skip parsing the fragment and close the connection. + // + // "Too much" is either any CONTINUATION frame after we've already + // exceeded the max header list size (in which case remainSize is 0), + // or a frame whose encoded size is more than twice the remaining + // header list bytes we're willing to accept. + if int64(len(frag)) > int64(2*remainSize) { + if VerboseLogs { + log.Printf("http2: header list too large") + } + // It would be nice to send a RST_STREAM before sending the GOAWAY, + // but the structure of the server's frame writer makes this difficult. + return mh, ConnectionError(ErrCodeProtocol) + } + + // Also close the connection after any CONTINUATION frame following an + // invalid header, since we stop tracking the size of the headers after + // an invalid one. + if invalid != nil { + if VerboseLogs { + log.Printf("http2: invalid header: %v", invalid) + } + // It would be nice to send a RST_STREAM before sending the GOAWAY, + // but the structure of the server's frame writer makes this difficult. + return mh, ConnectionError(ErrCodeProtocol) + } + if _, err := hdec.Write(frag); err != nil { - return nil, ConnectionError(ErrCodeCompression) + return mh, ConnectionError(ErrCodeCompression) } if hc.HeadersEnded() { @@ -1594,7 +1628,7 @@ func (fr *Framer) readMetaFrame(hf *HeadersFrame) (*MetaHeadersFrame, error) { mh.HeadersFrame.invalidate() if err := hdec.Close(); err != nil { - return nil, ConnectionError(ErrCodeCompression) + return mh, ConnectionError(ErrCodeCompression) } if invalid != nil { fr.errDetail = invalid diff --git a/vendor/golang.org/x/net/http2/http2.go b/vendor/golang.org/x/net/http2/http2.go index 6f2df281..7688c356 100644 --- a/vendor/golang.org/x/net/http2/http2.go +++ b/vendor/golang.org/x/net/http2/http2.go @@ -17,15 +17,18 @@ package http2 // import "golang.org/x/net/http2" import ( "bufio" + "context" "crypto/tls" + "errors" "fmt" - "io" + "net" "net/http" "os" "sort" "strconv" "strings" "sync" + "time" "golang.org/x/net/http/httpguts" ) @@ -210,12 +213,6 @@ type stringWriter interface { WriteString(s string) (n int, err error) } -// A gate lets two goroutines coordinate their activities. -type gate chan struct{} - -func (g gate) Done() { g <- struct{}{} } -func (g gate) Wait() { <-g } - // A closeWaiter is like a sync.WaitGroup but only goes 1 to 0 (open to closed). type closeWaiter chan struct{} @@ -241,13 +238,19 @@ func (cw closeWaiter) Wait() { // Its buffered writer is lazily allocated as needed, to minimize // idle memory usage with many connections. type bufferedWriter struct { - _ incomparable - w io.Writer // immutable - bw *bufio.Writer // non-nil when data is buffered + _ incomparable + group synctestGroupInterface // immutable + conn net.Conn // immutable + bw *bufio.Writer // non-nil when data is buffered + byteTimeout time.Duration // immutable, WriteByteTimeout } -func newBufferedWriter(w io.Writer) *bufferedWriter { - return &bufferedWriter{w: w} +func newBufferedWriter(group synctestGroupInterface, conn net.Conn, timeout time.Duration) *bufferedWriter { + return &bufferedWriter{ + group: group, + conn: conn, + byteTimeout: timeout, + } } // bufWriterPoolBufferSize is the size of bufio.Writer's @@ -274,7 +277,7 @@ func (w *bufferedWriter) Available() int { func (w *bufferedWriter) Write(p []byte) (n int, err error) { if w.bw == nil { bw := bufWriterPool.Get().(*bufio.Writer) - bw.Reset(w.w) + bw.Reset((*bufferedWriterTimeoutWriter)(w)) w.bw = bw } return w.bw.Write(p) @@ -292,6 +295,38 @@ func (w *bufferedWriter) Flush() error { return err } +type bufferedWriterTimeoutWriter bufferedWriter + +func (w *bufferedWriterTimeoutWriter) Write(p []byte) (n int, err error) { + return writeWithByteTimeout(w.group, w.conn, w.byteTimeout, p) +} + +// writeWithByteTimeout writes to conn. +// If more than timeout passes without any bytes being written to the connection, +// the write fails. +func writeWithByteTimeout(group synctestGroupInterface, conn net.Conn, timeout time.Duration, p []byte) (n int, err error) { + if timeout <= 0 { + return conn.Write(p) + } + for { + var now time.Time + if group == nil { + now = time.Now() + } else { + now = group.Now() + } + conn.SetWriteDeadline(now.Add(timeout)) + nn, err := conn.Write(p[n:]) + n += nn + if n == len(p) || nn == 0 || !errors.Is(err, os.ErrDeadlineExceeded) { + // Either we finished the write, made no progress, or hit the deadline. + // Whichever it is, we're done now. + conn.SetWriteDeadline(time.Time{}) + return n, err + } + } +} + func mustUint31(v int32) uint32 { if v < 0 || v > 2147483647 { panic("out of range") @@ -383,3 +418,14 @@ func validPseudoPath(v string) bool { // makes that struct also non-comparable, and generally doesn't add // any size (as long as it's first). type incomparable [0]func() + +// synctestGroupInterface is the methods of synctestGroup used by Server and Transport. +// It's defined as an interface here to let us keep synctestGroup entirely test-only +// and not a part of non-test builds. +type synctestGroupInterface interface { + Join() + Now() time.Time + NewTimer(d time.Duration) timer + AfterFunc(d time.Duration, f func()) timer + ContextWithTimeout(ctx context.Context, d time.Duration) (context.Context, context.CancelFunc) +} diff --git a/vendor/golang.org/x/net/http2/pipe.go b/vendor/golang.org/x/net/http2/pipe.go index 684d984f..3b9f06b9 100644 --- a/vendor/golang.org/x/net/http2/pipe.go +++ b/vendor/golang.org/x/net/http2/pipe.go @@ -77,7 +77,10 @@ func (p *pipe) Read(d []byte) (n int, err error) { } } -var errClosedPipeWrite = errors.New("write on closed buffer") +var ( + errClosedPipeWrite = errors.New("write on closed buffer") + errUninitializedPipeWrite = errors.New("write on uninitialized buffer") +) // Write copies bytes from p into the buffer and wakes a reader. // It is an error to write more data than the buffer can hold. @@ -91,6 +94,12 @@ func (p *pipe) Write(d []byte) (n int, err error) { if p.err != nil || p.breakErr != nil { return 0, errClosedPipeWrite } + // pipe.setBuffer is never invoked, leaving the buffer uninitialized. + // We shouldn't try to write to an uninitialized pipe, + // but returning an error is better than panicking. + if p.b == nil { + return 0, errUninitializedPipeWrite + } return p.b.Write(d) } diff --git a/vendor/golang.org/x/net/http2/server.go b/vendor/golang.org/x/net/http2/server.go index ae94c640..617b4a47 100644 --- a/vendor/golang.org/x/net/http2/server.go +++ b/vendor/golang.org/x/net/http2/server.go @@ -29,6 +29,7 @@ import ( "bufio" "bytes" "context" + "crypto/rand" "crypto/tls" "errors" "fmt" @@ -52,10 +53,14 @@ import ( ) const ( - prefaceTimeout = 10 * time.Second - firstSettingsTimeout = 2 * time.Second // should be in-flight with preface anyway - handlerChunkWriteSize = 4 << 10 - defaultMaxStreams = 250 // TODO: make this 100 as the GFE seems to? + prefaceTimeout = 10 * time.Second + firstSettingsTimeout = 2 * time.Second // should be in-flight with preface anyway + handlerChunkWriteSize = 4 << 10 + defaultMaxStreams = 250 // TODO: make this 100 as the GFE seems to? + + // maxQueuedControlFrames is the maximum number of control frames like + // SETTINGS, PING and RST_STREAM that will be queued for writing before + // the connection is closed to prevent memory exhaustion attacks. maxQueuedControlFrames = 10000 ) @@ -124,8 +129,25 @@ type Server struct { // IdleTimeout specifies how long until idle clients should be // closed with a GOAWAY frame. PING frames are not considered // activity for the purposes of IdleTimeout. + // If zero or negative, there is no timeout. IdleTimeout time.Duration + // ReadIdleTimeout is the timeout after which a health check using a ping + // frame will be carried out if no frame is received on the connection. + // If zero, no health check is performed. + ReadIdleTimeout time.Duration + + // PingTimeout is the timeout after which the connection will be closed + // if a response to a ping is not received. + // If zero, a default of 15 seconds is used. + PingTimeout time.Duration + + // WriteByteTimeout is the timeout after which a connection will be + // closed if no data can be written to it. The timeout begins when data is + // available to write, and is extended whenever any bytes are written. + // If zero or negative, there is no timeout. + WriteByteTimeout time.Duration + // MaxUploadBufferPerConnection is the size of the initial flow // control window for each connections. The HTTP/2 spec does not // allow this to be smaller than 65535 or larger than 2^32-1. @@ -153,57 +175,39 @@ type Server struct { // so that we don't embed a Mutex in this struct, which will make the // struct non-copyable, which might break some callers. state *serverInternalState -} -func (s *Server) initialConnRecvWindowSize() int32 { - if s.MaxUploadBufferPerConnection >= initialWindowSize { - return s.MaxUploadBufferPerConnection - } - return 1 << 20 -} - -func (s *Server) initialStreamRecvWindowSize() int32 { - if s.MaxUploadBufferPerStream > 0 { - return s.MaxUploadBufferPerStream - } - return 1 << 20 + // Synchronization group used for testing. + // Outside of tests, this is nil. + group synctestGroupInterface } -func (s *Server) maxReadFrameSize() uint32 { - if v := s.MaxReadFrameSize; v >= minMaxFrameSize && v <= maxFrameSize { - return v +func (s *Server) markNewGoroutine() { + if s.group != nil { + s.group.Join() } - return defaultMaxReadFrameSize } -func (s *Server) maxConcurrentStreams() uint32 { - if v := s.MaxConcurrentStreams; v > 0 { - return v +func (s *Server) now() time.Time { + if s.group != nil { + return s.group.Now() } - return defaultMaxStreams + return time.Now() } -func (s *Server) maxDecoderHeaderTableSize() uint32 { - if v := s.MaxDecoderHeaderTableSize; v > 0 { - return v +// newTimer creates a new time.Timer, or a synthetic timer in tests. +func (s *Server) newTimer(d time.Duration) timer { + if s.group != nil { + return s.group.NewTimer(d) } - return initialHeaderTableSize + return timeTimer{time.NewTimer(d)} } -func (s *Server) maxEncoderHeaderTableSize() uint32 { - if v := s.MaxEncoderHeaderTableSize; v > 0 { - return v +// afterFunc creates a new time.AfterFunc timer, or a synthetic timer in tests. +func (s *Server) afterFunc(d time.Duration, f func()) timer { + if s.group != nil { + return s.group.AfterFunc(d, f) } - return initialHeaderTableSize -} - -// maxQueuedControlFrames is the maximum number of control frames like -// SETTINGS, PING and RST_STREAM that will be queued for writing before -// the connection is closed to prevent memory exhaustion attacks. -func (s *Server) maxQueuedControlFrames() int { - // TODO: if anybody asks, add a Server field, and remember to define the - // behavior of negative values. - return maxQueuedControlFrames + return timeTimer{time.AfterFunc(d, f)} } type serverInternalState struct { @@ -399,16 +403,22 @@ func (o *ServeConnOpts) handler() http.Handler { // // The opts parameter is optional. If nil, default values are used. func (s *Server) ServeConn(c net.Conn, opts *ServeConnOpts) { + s.serveConn(c, opts, nil) +} + +func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverConn)) { baseCtx, cancel := serverConnBaseContext(c, opts) defer cancel() + http1srv := opts.baseConfig() + conf := configFromServer(http1srv, s) sc := &serverConn{ srv: s, - hs: opts.baseConfig(), + hs: http1srv, conn: c, baseCtx: baseCtx, remoteAddrStr: c.RemoteAddr().String(), - bw: newBufferedWriter(c), + bw: newBufferedWriter(s.group, c, conf.WriteByteTimeout), handler: opts.handler(), streams: make(map[uint32]*stream), readFrameCh: make(chan readFrameResult), @@ -418,13 +428,19 @@ func (s *Server) ServeConn(c net.Conn, opts *ServeConnOpts) { bodyReadCh: make(chan bodyReadMsg), // buffering doesn't matter either way doneServing: make(chan struct{}), clientMaxStreams: math.MaxUint32, // Section 6.5.2: "Initially, there is no limit to this value" - advMaxStreams: s.maxConcurrentStreams(), + advMaxStreams: conf.MaxConcurrentStreams, initialStreamSendWindowSize: initialWindowSize, + initialStreamRecvWindowSize: conf.MaxUploadBufferPerStream, maxFrameSize: initialMaxFrameSize, + pingTimeout: conf.PingTimeout, + countErrorFunc: conf.CountError, serveG: newGoroutineLock(), pushEnabled: true, sawClientPreface: opts.SawClientPreface, } + if newf != nil { + newf(sc) + } s.state.registerConn(sc) defer s.state.unregisterConn(sc) @@ -434,7 +450,7 @@ func (s *Server) ServeConn(c net.Conn, opts *ServeConnOpts) { // passes the connection off to us with the deadline already set. // Write deadlines are set per stream in serverConn.newStream. // Disarm the net.Conn write deadline here. - if sc.hs.WriteTimeout != 0 { + if sc.hs.WriteTimeout > 0 { sc.conn.SetWriteDeadline(time.Time{}) } @@ -450,15 +466,15 @@ func (s *Server) ServeConn(c net.Conn, opts *ServeConnOpts) { sc.flow.add(initialWindowSize) sc.inflow.init(initialWindowSize) sc.hpackEncoder = hpack.NewEncoder(&sc.headerWriteBuf) - sc.hpackEncoder.SetMaxDynamicTableSizeLimit(s.maxEncoderHeaderTableSize()) + sc.hpackEncoder.SetMaxDynamicTableSizeLimit(conf.MaxEncoderHeaderTableSize) fr := NewFramer(sc.bw, c) - if s.CountError != nil { - fr.countError = s.CountError + if conf.CountError != nil { + fr.countError = conf.CountError } - fr.ReadMetaHeaders = hpack.NewDecoder(s.maxDecoderHeaderTableSize(), nil) + fr.ReadMetaHeaders = hpack.NewDecoder(conf.MaxDecoderHeaderTableSize, nil) fr.MaxHeaderListSize = sc.maxHeaderListSize() - fr.SetMaxReadFrameSize(s.maxReadFrameSize()) + fr.SetMaxReadFrameSize(conf.MaxReadFrameSize) sc.framer = fr if tc, ok := c.(connectionStater); ok { @@ -491,7 +507,7 @@ func (s *Server) ServeConn(c net.Conn, opts *ServeConnOpts) { // So for now, do nothing here again. } - if !s.PermitProhibitedCipherSuites && isBadCipher(sc.tlsState.CipherSuite) { + if !conf.PermitProhibitedCipherSuites && isBadCipher(sc.tlsState.CipherSuite) { // "Endpoints MAY choose to generate a connection error // (Section 5.4.1) of type INADEQUATE_SECURITY if one of // the prohibited cipher suites are negotiated." @@ -528,7 +544,7 @@ func (s *Server) ServeConn(c net.Conn, opts *ServeConnOpts) { opts.UpgradeRequest = nil } - sc.serve() + sc.serve(conf) } func serverConnBaseContext(c net.Conn, opts *ServeConnOpts) (ctx context.Context, cancel func()) { @@ -568,6 +584,7 @@ type serverConn struct { tlsState *tls.ConnectionState // shared by all handlers, like net/http remoteAddrStr string writeSched WriteScheduler + countErrorFunc func(errType string) // Everything following is owned by the serve loop; use serveG.check(): serveG goroutineLock // used to verify funcs are on serve() @@ -587,6 +604,7 @@ type serverConn struct { streams map[uint32]*stream unstartedHandlers []unstartedHandler initialStreamSendWindowSize int32 + initialStreamRecvWindowSize int32 maxFrameSize int32 peerMaxHeaderListSize uint32 // zero means unknown (default) canonHeader map[string]string // http2-lower-case -> Go-Canonical-Case @@ -597,9 +615,14 @@ type serverConn struct { inGoAway bool // we've started to or sent GOAWAY inFrameScheduleLoop bool // whether we're in the scheduleFrameWrite loop needToSendGoAway bool // we need to schedule a GOAWAY frame write + pingSent bool + sentPingData [8]byte goAwayCode ErrCode - shutdownTimer *time.Timer // nil until used - idleTimer *time.Timer // nil if unused + shutdownTimer timer // nil until used + idleTimer timer // nil if unused + readIdleTimeout time.Duration + pingTimeout time.Duration + readIdleTimer timer // nil if unused // Owned by the writeFrameAsync goroutine: headerWriteBuf bytes.Buffer @@ -614,11 +637,7 @@ func (sc *serverConn) maxHeaderListSize() uint32 { if n <= 0 { n = http.DefaultMaxHeaderBytes } - // http2's count is in a slightly different unit and includes 32 bytes per pair. - // So, take the net/http.Server value and pad it up a bit, assuming 10 headers. - const perFieldOverhead = 32 // per http2 spec - const typicalHeaders = 10 // conservative - return uint32(n + typicalHeaders*perFieldOverhead) + return uint32(adjustHTTP1MaxHeaderSize(int64(n))) } func (sc *serverConn) curOpenStreams() uint32 { @@ -648,12 +667,12 @@ type stream struct { flow outflow // limits writing from Handler to client inflow inflow // what the client is allowed to POST/etc to us state streamState - resetQueued bool // RST_STREAM queued for write; set by sc.resetStream - gotTrailerHeader bool // HEADER frame for trailers was seen - wroteHeaders bool // whether we wrote headers (not status 100) - readDeadline *time.Timer // nil if unused - writeDeadline *time.Timer // nil if unused - closeErr error // set before cw is closed + resetQueued bool // RST_STREAM queued for write; set by sc.resetStream + gotTrailerHeader bool // HEADER frame for trailers was seen + wroteHeaders bool // whether we wrote headers (not status 100) + readDeadline timer // nil if unused + writeDeadline timer // nil if unused + closeErr error // set before cw is closed trailer http.Header // accumulated trailers reqTrailer http.Header // handler's Request.Trailer @@ -731,11 +750,7 @@ func isClosedConnError(err error) bool { return false } - // TODO: remove this string search and be more like the Windows - // case below. That might involve modifying the standard library - // to return better error types. - str := err.Error() - if strings.Contains(str, "use of closed network connection") { + if errors.Is(err, net.ErrClosed) { return true } @@ -814,8 +829,9 @@ type readFrameResult struct { // consumer is done with the frame. // It's run on its own goroutine. func (sc *serverConn) readFrames() { - gate := make(gate) - gateDone := gate.Done + sc.srv.markNewGoroutine() + gate := make(chan struct{}) + gateDone := func() { gate <- struct{}{} } for { f, err := sc.framer.ReadFrame() select { @@ -846,6 +862,7 @@ type frameWriteResult struct { // At most one goroutine can be running writeFrameAsync at a time per // serverConn. func (sc *serverConn) writeFrameAsync(wr FrameWriteRequest, wd *writeData) { + sc.srv.markNewGoroutine() var err error if wd == nil { err = wr.write.writeFrame(sc) @@ -884,7 +901,7 @@ func (sc *serverConn) notePanic() { } } -func (sc *serverConn) serve() { +func (sc *serverConn) serve(conf http2Config) { sc.serveG.check() defer sc.notePanic() defer sc.conn.Close() @@ -898,18 +915,18 @@ func (sc *serverConn) serve() { sc.writeFrame(FrameWriteRequest{ write: writeSettings{ - {SettingMaxFrameSize, sc.srv.maxReadFrameSize()}, + {SettingMaxFrameSize, conf.MaxReadFrameSize}, {SettingMaxConcurrentStreams, sc.advMaxStreams}, {SettingMaxHeaderListSize, sc.maxHeaderListSize()}, - {SettingHeaderTableSize, sc.srv.maxDecoderHeaderTableSize()}, - {SettingInitialWindowSize, uint32(sc.srv.initialStreamRecvWindowSize())}, + {SettingHeaderTableSize, conf.MaxDecoderHeaderTableSize}, + {SettingInitialWindowSize, uint32(sc.initialStreamRecvWindowSize)}, }, }) sc.unackedSettings++ // Each connection starts with initialWindowSize inflow tokens. // If a higher value is configured, we add more tokens. - if diff := sc.srv.initialConnRecvWindowSize() - initialWindowSize; diff > 0 { + if diff := conf.MaxUploadBufferPerConnection - initialWindowSize; diff > 0 { sc.sendWindowUpdate(nil, int(diff)) } @@ -924,16 +941,23 @@ func (sc *serverConn) serve() { sc.setConnState(http.StateActive) sc.setConnState(http.StateIdle) - if sc.srv.IdleTimeout != 0 { - sc.idleTimer = time.AfterFunc(sc.srv.IdleTimeout, sc.onIdleTimer) + if sc.srv.IdleTimeout > 0 { + sc.idleTimer = sc.srv.afterFunc(sc.srv.IdleTimeout, sc.onIdleTimer) defer sc.idleTimer.Stop() } + if conf.SendPingTimeout > 0 { + sc.readIdleTimeout = conf.SendPingTimeout + sc.readIdleTimer = sc.srv.afterFunc(conf.SendPingTimeout, sc.onReadIdleTimer) + defer sc.readIdleTimer.Stop() + } + go sc.readFrames() // closed by defer sc.conn.Close above - settingsTimer := time.AfterFunc(firstSettingsTimeout, sc.onSettingsTimer) + settingsTimer := sc.srv.afterFunc(firstSettingsTimeout, sc.onSettingsTimer) defer settingsTimer.Stop() + lastFrameTime := sc.srv.now() loopNum := 0 for { loopNum++ @@ -947,6 +971,7 @@ func (sc *serverConn) serve() { case res := <-sc.wroteFrameCh: sc.wroteFrame(res) case res := <-sc.readFrameCh: + lastFrameTime = sc.srv.now() // Process any written frames before reading new frames from the client since a // written frame could have triggered a new stream to be started. if sc.writingFrameAsync { @@ -978,6 +1003,8 @@ func (sc *serverConn) serve() { case idleTimerMsg: sc.vlogf("connection is idle") sc.goAway(ErrCodeNo) + case readIdleTimerMsg: + sc.handlePingTimer(lastFrameTime) case shutdownTimerMsg: sc.vlogf("GOAWAY close timer fired; closing conn from %v", sc.conn.RemoteAddr()) return @@ -1000,7 +1027,7 @@ func (sc *serverConn) serve() { // If the peer is causing us to generate a lot of control frames, // but not reading them from us, assume they are trying to make us // run out of memory. - if sc.queuedControlFrames > sc.srv.maxQueuedControlFrames() { + if sc.queuedControlFrames > maxQueuedControlFrames { sc.vlogf("http2: too many control frames in send queue, closing connection") return } @@ -1016,12 +1043,39 @@ func (sc *serverConn) serve() { } } +func (sc *serverConn) handlePingTimer(lastFrameReadTime time.Time) { + if sc.pingSent { + sc.vlogf("timeout waiting for PING response") + sc.conn.Close() + return + } + + pingAt := lastFrameReadTime.Add(sc.readIdleTimeout) + now := sc.srv.now() + if pingAt.After(now) { + // We received frames since arming the ping timer. + // Reset it for the next possible timeout. + sc.readIdleTimer.Reset(pingAt.Sub(now)) + return + } + + sc.pingSent = true + // Ignore crypto/rand.Read errors: It generally can't fail, and worse case if it does + // is we send a PING frame containing 0s. + _, _ = rand.Read(sc.sentPingData[:]) + sc.writeFrame(FrameWriteRequest{ + write: &writePing{data: sc.sentPingData}, + }) + sc.readIdleTimer.Reset(sc.pingTimeout) +} + type serverMessage int // Message values sent to serveMsgCh. var ( settingsTimerMsg = new(serverMessage) idleTimerMsg = new(serverMessage) + readIdleTimerMsg = new(serverMessage) shutdownTimerMsg = new(serverMessage) gracefulShutdownMsg = new(serverMessage) handlerDoneMsg = new(serverMessage) @@ -1029,6 +1083,7 @@ var ( func (sc *serverConn) onSettingsTimer() { sc.sendServeMsg(settingsTimerMsg) } func (sc *serverConn) onIdleTimer() { sc.sendServeMsg(idleTimerMsg) } +func (sc *serverConn) onReadIdleTimer() { sc.sendServeMsg(readIdleTimerMsg) } func (sc *serverConn) onShutdownTimer() { sc.sendServeMsg(shutdownTimerMsg) } func (sc *serverConn) sendServeMsg(msg interface{}) { @@ -1060,10 +1115,10 @@ func (sc *serverConn) readPreface() error { errc <- nil } }() - timer := time.NewTimer(prefaceTimeout) // TODO: configurable on *Server? + timer := sc.srv.newTimer(prefaceTimeout) // TODO: configurable on *Server? defer timer.Stop() select { - case <-timer.C: + case <-timer.C(): return errPrefaceTimeout case err := <-errc: if err == nil { @@ -1281,6 +1336,10 @@ func (sc *serverConn) wroteFrame(res frameWriteResult) { sc.writingFrame = false sc.writingFrameAsync = false + if res.err != nil { + sc.conn.Close() + } + wr := res.wr if writeEndsStream(wr.write) { @@ -1428,7 +1487,7 @@ func (sc *serverConn) goAway(code ErrCode) { func (sc *serverConn) shutDownIn(d time.Duration) { sc.serveG.check() - sc.shutdownTimer = time.AfterFunc(d, sc.onShutdownTimer) + sc.shutdownTimer = sc.srv.afterFunc(d, sc.onShutdownTimer) } func (sc *serverConn) resetStream(se StreamError) { @@ -1481,6 +1540,11 @@ func (sc *serverConn) processFrameFromReader(res readFrameResult) bool { sc.goAway(ErrCodeFlowControl) return true case ConnectionError: + if res.f != nil { + if id := res.f.Header().StreamID; id > sc.maxClientStreamID { + sc.maxClientStreamID = id + } + } sc.logf("http2: server connection error from %v: %v", sc.conn.RemoteAddr(), ev) sc.goAway(ErrCode(ev)) return true // goAway will handle shutdown @@ -1550,6 +1614,11 @@ func (sc *serverConn) processFrame(f Frame) error { func (sc *serverConn) processPing(f *PingFrame) error { sc.serveG.check() if f.IsAck() { + if sc.pingSent && sc.sentPingData == f.Data { + // This is a response to a PING we sent. + sc.pingSent = false + sc.readIdleTimer.Reset(sc.readIdleTimeout) + } // 6.7 PING: " An endpoint MUST NOT respond to PING frames // containing this flag." return nil @@ -1637,7 +1706,7 @@ func (sc *serverConn) closeStream(st *stream, err error) { delete(sc.streams, st.id) if len(sc.streams) == 0 { sc.setConnState(http.StateIdle) - if sc.srv.IdleTimeout != 0 { + if sc.srv.IdleTimeout > 0 && sc.idleTimer != nil { sc.idleTimer.Reset(sc.srv.IdleTimeout) } if h1ServerKeepAlivesDisabled(sc.hs) { @@ -1659,6 +1728,7 @@ func (sc *serverConn) closeStream(st *stream, err error) { } } st.closeErr = err + st.cancelCtx() st.cw.Close() // signals Handler's CloseNotifier, unblocks writes, etc sc.writeSched.CloseStream(st.id) } @@ -2017,9 +2087,9 @@ func (sc *serverConn) processHeaders(f *MetaHeadersFrame) error { // similar to how the http1 server works. Here it's // technically more like the http1 Server's ReadHeaderTimeout // (in Go 1.8), though. That's a more sane option anyway. - if sc.hs.ReadTimeout != 0 { + if sc.hs.ReadTimeout > 0 { sc.conn.SetReadDeadline(time.Time{}) - st.readDeadline = time.AfterFunc(sc.hs.ReadTimeout, st.onReadTimeout) + st.readDeadline = sc.srv.afterFunc(sc.hs.ReadTimeout, st.onReadTimeout) } return sc.scheduleHandler(id, rw, req, handler) @@ -2038,7 +2108,7 @@ func (sc *serverConn) upgradeRequest(req *http.Request) { // Disable any read deadline set by the net/http package // prior to the upgrade. - if sc.hs.ReadTimeout != 0 { + if sc.hs.ReadTimeout > 0 { sc.conn.SetReadDeadline(time.Time{}) } @@ -2115,9 +2185,9 @@ func (sc *serverConn) newStream(id, pusherID uint32, state streamState) *stream st.cw.Init() st.flow.conn = &sc.flow // link to conn-level counter st.flow.add(sc.initialStreamSendWindowSize) - st.inflow.init(sc.srv.initialStreamRecvWindowSize()) - if sc.hs.WriteTimeout != 0 { - st.writeDeadline = time.AfterFunc(sc.hs.WriteTimeout, st.onWriteTimeout) + st.inflow.init(sc.initialStreamRecvWindowSize) + if sc.hs.WriteTimeout > 0 { + st.writeDeadline = sc.srv.afterFunc(sc.hs.WriteTimeout, st.onWriteTimeout) } sc.streams[id] = st @@ -2341,6 +2411,7 @@ func (sc *serverConn) handlerDone() { // Run on its own goroutine. func (sc *serverConn) runHandler(rw *responseWriter, req *http.Request, handler func(http.ResponseWriter, *http.Request)) { + sc.srv.markNewGoroutine() defer sc.sendServeMsg(handlerDoneMsg) didPanic := true defer func() { @@ -2637,7 +2708,7 @@ func (rws *responseWriterState) writeChunk(p []byte) (n int, err error) { var date string if _, ok := rws.snapHeader["Date"]; !ok { // TODO(bradfitz): be faster here, like net/http? measure. - date = time.Now().UTC().Format(http.TimeFormat) + date = rws.conn.srv.now().UTC().Format(http.TimeFormat) } for _, v := range rws.snapHeader["Trailer"] { @@ -2759,7 +2830,7 @@ func (rws *responseWriterState) promoteUndeclaredTrailers() { func (w *responseWriter) SetReadDeadline(deadline time.Time) error { st := w.rws.stream - if !deadline.IsZero() && deadline.Before(time.Now()) { + if !deadline.IsZero() && deadline.Before(w.rws.conn.srv.now()) { // If we're setting a deadline in the past, reset the stream immediately // so writes after SetWriteDeadline returns will fail. st.onReadTimeout() @@ -2775,9 +2846,9 @@ func (w *responseWriter) SetReadDeadline(deadline time.Time) error { if deadline.IsZero() { st.readDeadline = nil } else if st.readDeadline == nil { - st.readDeadline = time.AfterFunc(deadline.Sub(time.Now()), st.onReadTimeout) + st.readDeadline = sc.srv.afterFunc(deadline.Sub(sc.srv.now()), st.onReadTimeout) } else { - st.readDeadline.Reset(deadline.Sub(time.Now())) + st.readDeadline.Reset(deadline.Sub(sc.srv.now())) } }) return nil @@ -2785,7 +2856,7 @@ func (w *responseWriter) SetReadDeadline(deadline time.Time) error { func (w *responseWriter) SetWriteDeadline(deadline time.Time) error { st := w.rws.stream - if !deadline.IsZero() && deadline.Before(time.Now()) { + if !deadline.IsZero() && deadline.Before(w.rws.conn.srv.now()) { // If we're setting a deadline in the past, reset the stream immediately // so writes after SetWriteDeadline returns will fail. st.onWriteTimeout() @@ -2801,9 +2872,9 @@ func (w *responseWriter) SetWriteDeadline(deadline time.Time) error { if deadline.IsZero() { st.writeDeadline = nil } else if st.writeDeadline == nil { - st.writeDeadline = time.AfterFunc(deadline.Sub(time.Now()), st.onWriteTimeout) + st.writeDeadline = sc.srv.afterFunc(deadline.Sub(sc.srv.now()), st.onWriteTimeout) } else { - st.writeDeadline.Reset(deadline.Sub(time.Now())) + st.writeDeadline.Reset(deadline.Sub(sc.srv.now())) } }) return nil @@ -3255,7 +3326,7 @@ func (sc *serverConn) countError(name string, err error) error { if sc == nil || sc.srv == nil { return err } - f := sc.srv.CountError + f := sc.countErrorFunc if f == nil { return err } diff --git a/vendor/golang.org/x/net/http2/timer.go b/vendor/golang.org/x/net/http2/timer.go new file mode 100644 index 00000000..0b1c17b8 --- /dev/null +++ b/vendor/golang.org/x/net/http2/timer.go @@ -0,0 +1,20 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +package http2 + +import "time" + +// A timer is a time.Timer, as an interface which can be replaced in tests. +type timer = interface { + C() <-chan time.Time + Reset(d time.Duration) bool + Stop() bool +} + +// timeTimer adapts a time.Timer to the timer interface. +type timeTimer struct { + *time.Timer +} + +func (t timeTimer) C() <-chan time.Time { return t.Timer.C } diff --git a/vendor/golang.org/x/net/http2/transport.go b/vendor/golang.org/x/net/http2/transport.go index df578b86..0c5f64aa 100644 --- a/vendor/golang.org/x/net/http2/transport.go +++ b/vendor/golang.org/x/net/http2/transport.go @@ -25,7 +25,6 @@ import ( "net/http" "net/http/httptrace" "net/textproto" - "os" "sort" "strconv" "strings" @@ -147,6 +146,12 @@ type Transport struct { // waiting for their turn. StrictMaxConcurrentStreams bool + // IdleConnTimeout is the maximum amount of time an idle + // (keep-alive) connection will remain idle before closing + // itself. + // Zero means no limit. + IdleConnTimeout time.Duration + // ReadIdleTimeout is the timeout after which a health check using ping // frame will be carried out if no frame is received on the connection. // Note that a ping response will is considered a received frame, so if @@ -178,41 +183,67 @@ type Transport struct { connPoolOnce sync.Once connPoolOrDef ClientConnPool // non-nil version of ConnPool + + *transportTestHooks } -func (t *Transport) maxHeaderListSize() uint32 { - if t.MaxHeaderListSize == 0 { - return 10 << 20 - } - if t.MaxHeaderListSize == 0xffffffff { - return 0 - } - return t.MaxHeaderListSize +// Hook points used for testing. +// Outside of tests, t.transportTestHooks is nil and these all have minimal implementations. +// Inside tests, see the testSyncHooks function docs. + +type transportTestHooks struct { + newclientconn func(*ClientConn) + group synctestGroupInterface } -func (t *Transport) maxFrameReadSize() uint32 { - if t.MaxReadFrameSize == 0 { - return 0 // use the default provided by the peer +func (t *Transport) markNewGoroutine() { + if t != nil && t.transportTestHooks != nil { + t.transportTestHooks.group.Join() } - if t.MaxReadFrameSize < minMaxFrameSize { - return minMaxFrameSize +} + +// newTimer creates a new time.Timer, or a synthetic timer in tests. +func (t *Transport) newTimer(d time.Duration) timer { + if t.transportTestHooks != nil { + return t.transportTestHooks.group.NewTimer(d) } - if t.MaxReadFrameSize > maxFrameSize { - return maxFrameSize + return timeTimer{time.NewTimer(d)} +} + +// afterFunc creates a new time.AfterFunc timer, or a synthetic timer in tests. +func (t *Transport) afterFunc(d time.Duration, f func()) timer { + if t.transportTestHooks != nil { + return t.transportTestHooks.group.AfterFunc(d, f) } - return t.MaxReadFrameSize + return timeTimer{time.AfterFunc(d, f)} } -func (t *Transport) disableCompression() bool { - return t.DisableCompression || (t.t1 != nil && t.t1.DisableCompression) +func (t *Transport) contextWithTimeout(ctx context.Context, d time.Duration) (context.Context, context.CancelFunc) { + if t.transportTestHooks != nil { + return t.transportTestHooks.group.ContextWithTimeout(ctx, d) + } + return context.WithTimeout(ctx, d) } -func (t *Transport) pingTimeout() time.Duration { - if t.PingTimeout == 0 { - return 15 * time.Second +func (t *Transport) maxHeaderListSize() uint32 { + n := int64(t.MaxHeaderListSize) + if t.t1 != nil && t.t1.MaxResponseHeaderBytes != 0 { + n = t.t1.MaxResponseHeaderBytes + if n > 0 { + n = adjustHTTP1MaxHeaderSize(n) + } + } + if n <= 0 { + return 10 << 20 + } + if n >= 0xffffffff { + return 0 } - return t.PingTimeout + return uint32(n) +} +func (t *Transport) disableCompression() bool { + return t.DisableCompression || (t.t1 != nil && t.t1.DisableCompression) } // ConfigureTransport configures a net/http HTTP/1 Transport to use HTTP/2. @@ -302,7 +333,7 @@ type ClientConn struct { readerErr error // set before readerDone is closed idleTimeout time.Duration // or 0 for never - idleTimer *time.Timer + idleTimer timer mu sync.Mutex // guards following cond *sync.Cond // hold mu; broadcast on flow/closed changes @@ -324,11 +355,14 @@ type ClientConn struct { lastActive time.Time lastIdle time.Time // time last idle // Settings from peer: (also guarded by wmu) - maxFrameSize uint32 - maxConcurrentStreams uint32 - peerMaxHeaderListSize uint64 - peerMaxHeaderTableSize uint32 - initialWindowSize uint32 + maxFrameSize uint32 + maxConcurrentStreams uint32 + peerMaxHeaderListSize uint64 + peerMaxHeaderTableSize uint32 + initialWindowSize uint32 + initialStreamRecvWindowSize int32 + readIdleTimeout time.Duration + pingTimeout time.Duration // reqHeaderMu is a 1-element semaphore channel controlling access to sending new requests. // Write to reqHeaderMu to lock it, read from it to unlock. @@ -446,12 +480,14 @@ func (cs *clientStream) closeReqBodyLocked() { cs.reqBodyClosed = make(chan struct{}) reqBodyClosed := cs.reqBodyClosed go func() { + cs.cc.t.markNewGoroutine() cs.reqBody.Close() close(reqBodyClosed) }() } type stickyErrWriter struct { + group synctestGroupInterface conn net.Conn timeout time.Duration err *error @@ -461,22 +497,9 @@ func (sew stickyErrWriter) Write(p []byte) (n int, err error) { if *sew.err != nil { return 0, *sew.err } - for { - if sew.timeout != 0 { - sew.conn.SetWriteDeadline(time.Now().Add(sew.timeout)) - } - nn, err := sew.conn.Write(p[n:]) - n += nn - if n < len(p) && nn > 0 && errors.Is(err, os.ErrDeadlineExceeded) { - // Keep extending the deadline so long as we're making progress. - continue - } - if sew.timeout != 0 { - sew.conn.SetWriteDeadline(time.Time{}) - } - *sew.err = err - return n, err - } + n, err = writeWithByteTimeout(sew.group, sew.conn, sew.timeout, p) + *sew.err = err + return n, err } // noCachedConnError is the concrete type of ErrNoCachedConn, which @@ -537,15 +560,6 @@ func authorityAddr(scheme string, authority string) (addr string) { return net.JoinHostPort(host, port) } -var retryBackoffHook func(time.Duration) *time.Timer - -func backoffNewTimer(d time.Duration) *time.Timer { - if retryBackoffHook != nil { - return retryBackoffHook(d) - } - return time.NewTimer(d) -} - // RoundTripOpt is like RoundTrip, but takes options. func (t *Transport) RoundTripOpt(req *http.Request, opt RoundTripOpt) (*http.Response, error) { if !(req.URL.Scheme == "https" || (req.URL.Scheme == "http" && t.AllowHTTP)) { @@ -573,13 +587,13 @@ func (t *Transport) RoundTripOpt(req *http.Request, opt RoundTripOpt) (*http.Res backoff := float64(uint(1) << (uint(retry) - 1)) backoff += backoff * (0.1 * mathrand.Float64()) d := time.Second * time.Duration(backoff) - timer := backoffNewTimer(d) + tm := t.newTimer(d) select { - case <-timer.C: + case <-tm.C(): t.vlogf("RoundTrip retrying after failure: %v", roundTripErr) continue case <-req.Context().Done(): - timer.Stop() + tm.Stop() err = req.Context().Err() } } @@ -658,6 +672,9 @@ func canRetryError(err error) bool { } func (t *Transport) dialClientConn(ctx context.Context, addr string, singleUse bool) (*ClientConn, error) { + if t.transportTestHooks != nil { + return t.newClientConn(nil, singleUse) + } host, _, err := net.SplitHostPort(addr) if err != nil { return nil, err @@ -717,43 +734,36 @@ func (t *Transport) expectContinueTimeout() time.Duration { return t.t1.ExpectContinueTimeout } -func (t *Transport) maxDecoderHeaderTableSize() uint32 { - if v := t.MaxDecoderHeaderTableSize; v > 0 { - return v - } - return initialHeaderTableSize -} - -func (t *Transport) maxEncoderHeaderTableSize() uint32 { - if v := t.MaxEncoderHeaderTableSize; v > 0 { - return v - } - return initialHeaderTableSize -} - func (t *Transport) NewClientConn(c net.Conn) (*ClientConn, error) { return t.newClientConn(c, t.disableKeepAlives()) } func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, error) { + conf := configFromTransport(t) cc := &ClientConn{ - t: t, - tconn: c, - readerDone: make(chan struct{}), - nextStreamID: 1, - maxFrameSize: 16 << 10, // spec default - initialWindowSize: 65535, // spec default - maxConcurrentStreams: initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings. - peerMaxHeaderListSize: 0xffffffffffffffff, // "infinite", per spec. Use 2^64-1 instead. - streams: make(map[uint32]*clientStream), - singleUse: singleUse, - wantSettingsAck: true, - pings: make(map[[8]byte]chan struct{}), - reqHeaderMu: make(chan struct{}, 1), - } - if d := t.idleConnTimeout(); d != 0 { - cc.idleTimeout = d - cc.idleTimer = time.AfterFunc(d, cc.onIdleTimeout) + t: t, + tconn: c, + readerDone: make(chan struct{}), + nextStreamID: 1, + maxFrameSize: 16 << 10, // spec default + initialWindowSize: 65535, // spec default + initialStreamRecvWindowSize: conf.MaxUploadBufferPerStream, + maxConcurrentStreams: initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings. + peerMaxHeaderListSize: 0xffffffffffffffff, // "infinite", per spec. Use 2^64-1 instead. + streams: make(map[uint32]*clientStream), + singleUse: singleUse, + wantSettingsAck: true, + readIdleTimeout: conf.SendPingTimeout, + pingTimeout: conf.PingTimeout, + pings: make(map[[8]byte]chan struct{}), + reqHeaderMu: make(chan struct{}, 1), + } + var group synctestGroupInterface + if t.transportTestHooks != nil { + t.markNewGoroutine() + t.transportTestHooks.newclientconn(cc) + c = cc.tconn + group = t.group } if VerboseLogs { t.vlogf("http2: Transport creating client conn %p to %v", cc, c.RemoteAddr()) @@ -765,30 +775,25 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro // TODO: adjust this writer size to account for frame size + // MTU + crypto/tls record padding. cc.bw = bufio.NewWriter(stickyErrWriter{ + group: group, conn: c, - timeout: t.WriteByteTimeout, + timeout: conf.WriteByteTimeout, err: &cc.werr, }) cc.br = bufio.NewReader(c) cc.fr = NewFramer(cc.bw, cc.br) - if t.maxFrameReadSize() != 0 { - cc.fr.SetMaxReadFrameSize(t.maxFrameReadSize()) - } + cc.fr.SetMaxReadFrameSize(conf.MaxReadFrameSize) if t.CountError != nil { cc.fr.countError = t.CountError } - maxHeaderTableSize := t.maxDecoderHeaderTableSize() + maxHeaderTableSize := conf.MaxDecoderHeaderTableSize cc.fr.ReadMetaHeaders = hpack.NewDecoder(maxHeaderTableSize, nil) cc.fr.MaxHeaderListSize = t.maxHeaderListSize() cc.henc = hpack.NewEncoder(&cc.hbuf) - cc.henc.SetMaxDynamicTableSizeLimit(t.maxEncoderHeaderTableSize()) + cc.henc.SetMaxDynamicTableSizeLimit(conf.MaxEncoderHeaderTableSize) cc.peerMaxHeaderTableSize = initialHeaderTableSize - if t.AllowHTTP { - cc.nextStreamID = 3 - } - if cs, ok := c.(connectionStater); ok { state := cs.ConnectionState() cc.tlsState = &state @@ -796,11 +801,9 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro initialSettings := []Setting{ {ID: SettingEnablePush, Val: 0}, - {ID: SettingInitialWindowSize, Val: transportDefaultStreamFlow}, - } - if max := t.maxFrameReadSize(); max != 0 { - initialSettings = append(initialSettings, Setting{ID: SettingMaxFrameSize, Val: max}) + {ID: SettingInitialWindowSize, Val: uint32(cc.initialStreamRecvWindowSize)}, } + initialSettings = append(initialSettings, Setting{ID: SettingMaxFrameSize, Val: conf.MaxReadFrameSize}) if max := t.maxHeaderListSize(); max != 0 { initialSettings = append(initialSettings, Setting{ID: SettingMaxHeaderListSize, Val: max}) } @@ -810,23 +813,29 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro cc.bw.Write(clientPreface) cc.fr.WriteSettings(initialSettings...) - cc.fr.WriteWindowUpdate(0, transportDefaultConnFlow) - cc.inflow.init(transportDefaultConnFlow + initialWindowSize) + cc.fr.WriteWindowUpdate(0, uint32(conf.MaxUploadBufferPerConnection)) + cc.inflow.init(conf.MaxUploadBufferPerConnection + initialWindowSize) cc.bw.Flush() if cc.werr != nil { cc.Close() return nil, cc.werr } + // Start the idle timer after the connection is fully initialized. + if d := t.idleConnTimeout(); d != 0 { + cc.idleTimeout = d + cc.idleTimer = t.afterFunc(d, cc.onIdleTimeout) + } + go cc.readLoop() return cc, nil } func (cc *ClientConn) healthCheck() { - pingTimeout := cc.t.pingTimeout() + pingTimeout := cc.pingTimeout // We don't need to periodically ping in the health check, because the readLoop of ClientConn will // trigger the healthCheck again if there is no frame received. - ctx, cancel := context.WithTimeout(context.Background(), pingTimeout) + ctx, cancel := cc.t.contextWithTimeout(context.Background(), pingTimeout) defer cancel() cc.vlogf("http2: Transport sending health check") err := cc.Ping(ctx) @@ -861,7 +870,20 @@ func (cc *ClientConn) setGoAway(f *GoAwayFrame) { } last := f.LastStreamID for streamID, cs := range cc.streams { - if streamID > last { + if streamID <= last { + // The server's GOAWAY indicates that it received this stream. + // It will either finish processing it, or close the connection + // without doing so. Either way, leave the stream alone for now. + continue + } + if streamID == 1 && cc.goAway.ErrCode != ErrCodeNo { + // Don't retry the first stream on a connection if we get a non-NO error. + // If the server is sending an error on a new connection, + // retrying the request on a new one probably isn't going to work. + cs.abortStreamLocked(fmt.Errorf("http2: Transport received GOAWAY from server ErrCode:%v", cc.goAway.ErrCode)) + } else { + // Aborting the stream with errClentConnGotGoAway indicates that + // the request should be retried on a new connection. cs.abortStreamLocked(errClientConnGotGoAway) } } @@ -1057,6 +1079,7 @@ func (cc *ClientConn) Shutdown(ctx context.Context) error { done := make(chan struct{}) cancelled := false // guarded by cc.mu go func() { + cc.t.markNewGoroutine() cc.mu.Lock() defer cc.mu.Unlock() for { @@ -1215,6 +1238,10 @@ func (cc *ClientConn) decrStreamReservationsLocked() { } func (cc *ClientConn) RoundTrip(req *http.Request) (*http.Response, error) { + return cc.roundTrip(req, nil) +} + +func (cc *ClientConn) roundTrip(req *http.Request, streamf func(*clientStream)) (*http.Response, error) { ctx := req.Context() cs := &clientStream{ cc: cc, @@ -1229,7 +1256,28 @@ func (cc *ClientConn) RoundTrip(req *http.Request) (*http.Response, error) { respHeaderRecv: make(chan struct{}), donec: make(chan struct{}), } - go cs.doRequest(req) + + // TODO(bradfitz): this is a copy of the logic in net/http. Unify somewhere? + if !cc.t.disableCompression() && + req.Header.Get("Accept-Encoding") == "" && + req.Header.Get("Range") == "" && + !cs.isHead { + // Request gzip only, not deflate. Deflate is ambiguous and + // not as universally supported anyway. + // See: https://zlib.net/zlib_faq.html#faq39 + // + // Note that we don't request this for HEAD requests, + // due to a bug in nginx: + // http://trac.nginx.org/nginx/ticket/358 + // https://golang.org/issue/5522 + // + // We don't request gzip if the request is for a range, since + // auto-decoding a portion of a gzipped document will just fail + // anyway. See https://golang.org/issue/8923 + cs.requestedGzip = true + } + + go cs.doRequest(req, streamf) waitDone := func() error { select { @@ -1322,8 +1370,9 @@ func (cc *ClientConn) RoundTrip(req *http.Request) (*http.Response, error) { // doRequest runs for the duration of the request lifetime. // // It sends the request and performs post-request cleanup (closing Request.Body, etc.). -func (cs *clientStream) doRequest(req *http.Request) { - err := cs.writeRequest(req) +func (cs *clientStream) doRequest(req *http.Request, streamf func(*clientStream)) { + cs.cc.t.markNewGoroutine() + err := cs.writeRequest(req, streamf) cs.cleanupWriteRequest(err) } @@ -1334,7 +1383,7 @@ func (cs *clientStream) doRequest(req *http.Request) { // // It returns non-nil if the request ends otherwise. // If the returned error is StreamError, the error Code may be used in resetting the stream. -func (cs *clientStream) writeRequest(req *http.Request) (err error) { +func (cs *clientStream) writeRequest(req *http.Request, streamf func(*clientStream)) (err error) { cc := cs.cc ctx := cs.ctx @@ -1372,24 +1421,8 @@ func (cs *clientStream) writeRequest(req *http.Request) (err error) { } cc.mu.Unlock() - // TODO(bradfitz): this is a copy of the logic in net/http. Unify somewhere? - if !cc.t.disableCompression() && - req.Header.Get("Accept-Encoding") == "" && - req.Header.Get("Range") == "" && - !cs.isHead { - // Request gzip only, not deflate. Deflate is ambiguous and - // not as universally supported anyway. - // See: https://zlib.net/zlib_faq.html#faq39 - // - // Note that we don't request this for HEAD requests, - // due to a bug in nginx: - // http://trac.nginx.org/nginx/ticket/358 - // https://golang.org/issue/5522 - // - // We don't request gzip if the request is for a range, since - // auto-decoding a portion of a gzipped document will just fail - // anyway. See https://golang.org/issue/8923 - cs.requestedGzip = true + if streamf != nil { + streamf(cs) } continueTimeout := cc.t.expectContinueTimeout() @@ -1452,9 +1485,9 @@ func (cs *clientStream) writeRequest(req *http.Request) (err error) { var respHeaderTimer <-chan time.Time var respHeaderRecv chan struct{} if d := cc.responseHeaderTimeout(); d != 0 { - timer := time.NewTimer(d) + timer := cc.t.newTimer(d) defer timer.Stop() - respHeaderTimer = timer.C + respHeaderTimer = timer.C() respHeaderRecv = cs.respHeaderRecv } // Wait until the peer half-closes its end of the stream, @@ -1875,6 +1908,22 @@ func (cs *clientStream) awaitFlowControl(maxBytes int) (taken int32, err error) } } +func validateHeaders(hdrs http.Header) string { + for k, vv := range hdrs { + if !httpguts.ValidHeaderFieldName(k) { + return fmt.Sprintf("name %q", k) + } + for _, v := range vv { + if !httpguts.ValidHeaderFieldValue(v) { + // Don't include the value in the error, + // because it may be sensitive. + return fmt.Sprintf("value for header %q", k) + } + } + } + return "" +} + var errNilRequestURL = errors.New("http2: Request.URI is nil") // requires cc.wmu be held. @@ -1912,19 +1961,14 @@ func (cc *ClientConn) encodeHeaders(req *http.Request, addGzipHeader bool, trail } } - // Check for any invalid headers and return an error before we + // Check for any invalid headers+trailers and return an error before we // potentially pollute our hpack state. (We want to be able to // continue to reuse the hpack encoder for future requests) - for k, vv := range req.Header { - if !httpguts.ValidHeaderFieldName(k) { - return nil, fmt.Errorf("invalid HTTP header name %q", k) - } - for _, v := range vv { - if !httpguts.ValidHeaderFieldValue(v) { - // Don't include the value in the error, because it may be sensitive. - return nil, fmt.Errorf("invalid HTTP header value for header %q", k) - } - } + if err := validateHeaders(req.Header); err != "" { + return nil, fmt.Errorf("invalid HTTP header %s", err) + } + if err := validateHeaders(req.Trailer); err != "" { + return nil, fmt.Errorf("invalid HTTP trailer %s", err) } enumerateHeaders := func(f func(name, value string)) { @@ -2120,7 +2164,7 @@ type resAndError struct { func (cc *ClientConn) addStreamLocked(cs *clientStream) { cs.flow.add(int32(cc.initialWindowSize)) cs.flow.setConnFlow(&cc.flow) - cs.inflow.init(transportDefaultStreamFlow) + cs.inflow.init(cc.initialStreamRecvWindowSize) cs.ID = cc.nextStreamID cc.nextStreamID += 2 cc.streams[cs.ID] = cs @@ -2165,6 +2209,7 @@ type clientConnReadLoop struct { // readLoop runs in its own goroutine and reads and dispatches frames. func (cc *ClientConn) readLoop() { + cc.t.markNewGoroutine() rl := &clientConnReadLoop{cc: cc} defer rl.cleanup() cc.readerErr = rl.run() @@ -2265,11 +2310,10 @@ func (cc *ClientConn) countReadFrameError(err error) { func (rl *clientConnReadLoop) run() error { cc := rl.cc gotSettings := false - readIdleTimeout := cc.t.ReadIdleTimeout - var t *time.Timer + readIdleTimeout := cc.readIdleTimeout + var t timer if readIdleTimeout != 0 { - t = time.AfterFunc(readIdleTimeout, cc.healthCheck) - defer t.Stop() + t = cc.t.afterFunc(readIdleTimeout, cc.healthCheck) } for { f, err := cc.fr.ReadFrame() @@ -2684,7 +2728,7 @@ func (rl *clientConnReadLoop) processData(f *DataFrame) error { }) return nil } - if !cs.firstByte { + if !cs.pastHeaders { cc.logf("protocol error: received DATA before a HEADERS frame") rl.endStreamError(cs, StreamError{ StreamID: f.StreamID, @@ -2911,6 +2955,15 @@ func (rl *clientConnReadLoop) processWindowUpdate(f *WindowUpdateFrame) error { fl = &cs.flow } if !fl.add(int32(f.Increment)) { + // For stream, the sender sends RST_STREAM with an error code of FLOW_CONTROL_ERROR + if cs != nil { + rl.endStreamError(cs, StreamError{ + StreamID: f.StreamID, + Code: ErrCodeFlowControl, + }) + return nil + } + return ConnectionError(ErrCodeFlowControl) } cc.cond.Broadcast() @@ -2955,24 +3008,26 @@ func (cc *ClientConn) Ping(ctx context.Context) error { } cc.mu.Unlock() } - errc := make(chan error, 1) + var pingError error + errc := make(chan struct{}) go func() { + cc.t.markNewGoroutine() cc.wmu.Lock() defer cc.wmu.Unlock() - if err := cc.fr.WritePing(false, p); err != nil { - errc <- err + if pingError = cc.fr.WritePing(false, p); pingError != nil { + close(errc) return } - if err := cc.bw.Flush(); err != nil { - errc <- err + if pingError = cc.bw.Flush(); pingError != nil { + close(errc) return } }() select { case <-c: return nil - case err := <-errc: - return err + case <-errc: + return pingError case <-ctx.Done(): return ctx.Err() case <-cc.readerDone: @@ -3141,9 +3196,17 @@ func (rt noDialH2RoundTripper) RoundTrip(req *http.Request) (*http.Response, err } func (t *Transport) idleConnTimeout() time.Duration { + // to keep things backwards compatible, we use non-zero values of + // IdleConnTimeout, followed by using the IdleConnTimeout on the underlying + // http1 transport, followed by 0 + if t.IdleConnTimeout != 0 { + return t.IdleConnTimeout + } + if t.t1 != nil { return t.t1.IdleConnTimeout } + return 0 } diff --git a/vendor/golang.org/x/net/http2/write.go b/vendor/golang.org/x/net/http2/write.go index 33f61398..6ff6bee7 100644 --- a/vendor/golang.org/x/net/http2/write.go +++ b/vendor/golang.org/x/net/http2/write.go @@ -131,6 +131,16 @@ func (se StreamError) writeFrame(ctx writeContext) error { func (se StreamError) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max } +type writePing struct { + data [8]byte +} + +func (w writePing) writeFrame(ctx writeContext) error { + return ctx.Framer().WritePing(false, w.data) +} + +func (w writePing) staysWithinBuffer(max int) bool { return frameHeaderLen+len(w.data) <= max } + type writePingAck struct{ pf *PingFrame } func (w writePingAck) writeFrame(ctx writeContext) error { diff --git a/vendor/golang.org/x/net/http2/writesched_priority.go b/vendor/golang.org/x/net/http2/writesched_priority.go index 0a242c66..f6783339 100644 --- a/vendor/golang.org/x/net/http2/writesched_priority.go +++ b/vendor/golang.org/x/net/http2/writesched_priority.go @@ -443,8 +443,8 @@ func (ws *priorityWriteScheduler) addClosedOrIdleNode(list *[]*priorityNode, max } func (ws *priorityWriteScheduler) removeNode(n *priorityNode) { - for k := n.kids; k != nil; k = k.next { - k.setParent(n.parent) + for n.kids != nil { + n.kids.setParent(n.parent) } n.setParent(nil) delete(ws.nodes, n.id) diff --git a/vendor/golang.org/x/net/proxy/per_host.go b/vendor/golang.org/x/net/proxy/per_host.go index 573fe79e..d7d4b8b6 100644 --- a/vendor/golang.org/x/net/proxy/per_host.go +++ b/vendor/golang.org/x/net/proxy/per_host.go @@ -137,9 +137,7 @@ func (p *PerHost) AddNetwork(net *net.IPNet) { // AddZone specifies a DNS suffix that will use the bypass proxy. A zone of // "example.com" matches "example.com" and all of its subdomains. func (p *PerHost) AddZone(zone string) { - if strings.HasSuffix(zone, ".") { - zone = zone[:len(zone)-1] - } + zone = strings.TrimSuffix(zone, ".") if !strings.HasPrefix(zone, ".") { zone = "." + zone } @@ -148,8 +146,6 @@ func (p *PerHost) AddZone(zone string) { // AddHost specifies a host name that will use the bypass proxy. func (p *PerHost) AddHost(host string) { - if strings.HasSuffix(host, ".") { - host = host[:len(host)-1] - } + host = strings.TrimSuffix(host, ".") p.bypassHosts = append(p.bypassHosts, host) } diff --git a/vendor/github.com/ghodss/yaml/LICENSE b/vendor/golang.org/x/sync/LICENSE similarity index 53% rename from vendor/github.com/ghodss/yaml/LICENSE rename to vendor/golang.org/x/sync/LICENSE index 7805d36d..2a7cf70d 100644 --- a/vendor/github.com/ghodss/yaml/LICENSE +++ b/vendor/golang.org/x/sync/LICENSE @@ -1,27 +1,4 @@ -The MIT License (MIT) - -Copyright (c) 2014 Sam Ghods - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -Copyright (c) 2012 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -33,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/sync/PATENTS b/vendor/golang.org/x/sync/PATENTS new file mode 100644 index 00000000..73309904 --- /dev/null +++ b/vendor/golang.org/x/sync/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/sync/errgroup/errgroup.go b/vendor/golang.org/x/sync/errgroup/errgroup.go new file mode 100644 index 00000000..948a3ee6 --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/errgroup.go @@ -0,0 +1,135 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package errgroup provides synchronization, error propagation, and Context +// cancelation for groups of goroutines working on subtasks of a common task. +// +// [errgroup.Group] is related to [sync.WaitGroup] but adds handling of tasks +// returning errors. +package errgroup + +import ( + "context" + "fmt" + "sync" +) + +type token struct{} + +// A Group is a collection of goroutines working on subtasks that are part of +// the same overall task. +// +// A zero Group is valid, has no limit on the number of active goroutines, +// and does not cancel on error. +type Group struct { + cancel func(error) + + wg sync.WaitGroup + + sem chan token + + errOnce sync.Once + err error +} + +func (g *Group) done() { + if g.sem != nil { + <-g.sem + } + g.wg.Done() +} + +// WithContext returns a new Group and an associated Context derived from ctx. +// +// The derived Context is canceled the first time a function passed to Go +// returns a non-nil error or the first time Wait returns, whichever occurs +// first. +func WithContext(ctx context.Context) (*Group, context.Context) { + ctx, cancel := withCancelCause(ctx) + return &Group{cancel: cancel}, ctx +} + +// Wait blocks until all function calls from the Go method have returned, then +// returns the first non-nil error (if any) from them. +func (g *Group) Wait() error { + g.wg.Wait() + if g.cancel != nil { + g.cancel(g.err) + } + return g.err +} + +// Go calls the given function in a new goroutine. +// It blocks until the new goroutine can be added without the number of +// active goroutines in the group exceeding the configured limit. +// +// The first call to return a non-nil error cancels the group's context, if the +// group was created by calling WithContext. The error will be returned by Wait. +func (g *Group) Go(f func() error) { + if g.sem != nil { + g.sem <- token{} + } + + g.wg.Add(1) + go func() { + defer g.done() + + if err := f(); err != nil { + g.errOnce.Do(func() { + g.err = err + if g.cancel != nil { + g.cancel(g.err) + } + }) + } + }() +} + +// TryGo calls the given function in a new goroutine only if the number of +// active goroutines in the group is currently below the configured limit. +// +// The return value reports whether the goroutine was started. +func (g *Group) TryGo(f func() error) bool { + if g.sem != nil { + select { + case g.sem <- token{}: + // Note: this allows barging iff channels in general allow barging. + default: + return false + } + } + + g.wg.Add(1) + go func() { + defer g.done() + + if err := f(); err != nil { + g.errOnce.Do(func() { + g.err = err + if g.cancel != nil { + g.cancel(g.err) + } + }) + } + }() + return true +} + +// SetLimit limits the number of active goroutines in this group to at most n. +// A negative value indicates no limit. +// +// Any subsequent call to the Go method will block until it can add an active +// goroutine without exceeding the configured limit. +// +// The limit must not be modified while any goroutines in the group are active. +func (g *Group) SetLimit(n int) { + if n < 0 { + g.sem = nil + return + } + if len(g.sem) != 0 { + panic(fmt.Errorf("errgroup: modify limit while %v goroutines in the group are still active", len(g.sem))) + } + g.sem = make(chan token, n) +} diff --git a/vendor/golang.org/x/sync/errgroup/go120.go b/vendor/golang.org/x/sync/errgroup/go120.go new file mode 100644 index 00000000..f93c740b --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/go120.go @@ -0,0 +1,13 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.20 + +package errgroup + +import "context" + +func withCancelCause(parent context.Context) (context.Context, func(error)) { + return context.WithCancelCause(parent) +} diff --git a/vendor/golang.org/x/sync/errgroup/pre_go120.go b/vendor/golang.org/x/sync/errgroup/pre_go120.go new file mode 100644 index 00000000..88ce3343 --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/pre_go120.go @@ -0,0 +1,14 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.20 + +package errgroup + +import "context" + +func withCancelCause(parent context.Context) (context.Context, func(error)) { + ctx, cancel := context.WithCancel(parent) + return ctx, func(error) { cancel() } +} diff --git a/vendor/golang.org/x/sync/semaphore/semaphore.go b/vendor/golang.org/x/sync/semaphore/semaphore.go new file mode 100644 index 00000000..b618162a --- /dev/null +++ b/vendor/golang.org/x/sync/semaphore/semaphore.go @@ -0,0 +1,160 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package semaphore provides a weighted semaphore implementation. +package semaphore // import "golang.org/x/sync/semaphore" + +import ( + "container/list" + "context" + "sync" +) + +type waiter struct { + n int64 + ready chan<- struct{} // Closed when semaphore acquired. +} + +// NewWeighted creates a new weighted semaphore with the given +// maximum combined weight for concurrent access. +func NewWeighted(n int64) *Weighted { + w := &Weighted{size: n} + return w +} + +// Weighted provides a way to bound concurrent access to a resource. +// The callers can request access with a given weight. +type Weighted struct { + size int64 + cur int64 + mu sync.Mutex + waiters list.List +} + +// Acquire acquires the semaphore with a weight of n, blocking until resources +// are available or ctx is done. On success, returns nil. On failure, returns +// ctx.Err() and leaves the semaphore unchanged. +func (s *Weighted) Acquire(ctx context.Context, n int64) error { + done := ctx.Done() + + s.mu.Lock() + select { + case <-done: + // ctx becoming done has "happened before" acquiring the semaphore, + // whether it became done before the call began or while we were + // waiting for the mutex. We prefer to fail even if we could acquire + // the mutex without blocking. + s.mu.Unlock() + return ctx.Err() + default: + } + if s.size-s.cur >= n && s.waiters.Len() == 0 { + // Since we hold s.mu and haven't synchronized since checking done, if + // ctx becomes done before we return here, it becoming done must have + // "happened concurrently" with this call - it cannot "happen before" + // we return in this branch. So, we're ok to always acquire here. + s.cur += n + s.mu.Unlock() + return nil + } + + if n > s.size { + // Don't make other Acquire calls block on one that's doomed to fail. + s.mu.Unlock() + <-done + return ctx.Err() + } + + ready := make(chan struct{}) + w := waiter{n: n, ready: ready} + elem := s.waiters.PushBack(w) + s.mu.Unlock() + + select { + case <-done: + s.mu.Lock() + select { + case <-ready: + // Acquired the semaphore after we were canceled. + // Pretend we didn't and put the tokens back. + s.cur -= n + s.notifyWaiters() + default: + isFront := s.waiters.Front() == elem + s.waiters.Remove(elem) + // If we're at the front and there're extra tokens left, notify other waiters. + if isFront && s.size > s.cur { + s.notifyWaiters() + } + } + s.mu.Unlock() + return ctx.Err() + + case <-ready: + // Acquired the semaphore. Check that ctx isn't already done. + // We check the done channel instead of calling ctx.Err because we + // already have the channel, and ctx.Err is O(n) with the nesting + // depth of ctx. + select { + case <-done: + s.Release(n) + return ctx.Err() + default: + } + return nil + } +} + +// TryAcquire acquires the semaphore with a weight of n without blocking. +// On success, returns true. On failure, returns false and leaves the semaphore unchanged. +func (s *Weighted) TryAcquire(n int64) bool { + s.mu.Lock() + success := s.size-s.cur >= n && s.waiters.Len() == 0 + if success { + s.cur += n + } + s.mu.Unlock() + return success +} + +// Release releases the semaphore with a weight of n. +func (s *Weighted) Release(n int64) { + s.mu.Lock() + s.cur -= n + if s.cur < 0 { + s.mu.Unlock() + panic("semaphore: released more than held") + } + s.notifyWaiters() + s.mu.Unlock() +} + +func (s *Weighted) notifyWaiters() { + for { + next := s.waiters.Front() + if next == nil { + break // No more waiters blocked. + } + + w := next.Value.(waiter) + if s.size-s.cur < w.n { + // Not enough tokens for the next waiter. We could keep going (to try to + // find a waiter with a smaller request), but under load that could cause + // starvation for large requests; instead, we leave all remaining waiters + // blocked. + // + // Consider a semaphore used as a read-write lock, with N tokens, N + // readers, and one writer. Each reader can Acquire(1) to obtain a read + // lock. The writer can Acquire(N) to obtain a write lock, excluding all + // of the readers. If we allow the readers to jump ahead in the queue, + // the writer will starve — there is always one token available for every + // reader. + break + } + + s.cur += w.n + s.waiters.Remove(next) + close(w.ready) + } +} diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/sys/LICENSE +++ b/vendor/golang.org/x/sys/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go index 8fa707aa..02609d5b 100644 --- a/vendor/golang.org/x/sys/cpu/cpu.go +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -105,6 +105,8 @@ var ARM64 struct { HasSVE bool // Scalable Vector Extensions HasSVE2 bool // Scalable Vector Extensions 2 HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32 + HasDIT bool // Data Independent Timing support + HasI8MM bool // Advanced SIMD Int8 matrix multiplication instructions _ CacheLinePad } @@ -199,6 +201,25 @@ var S390X struct { _ CacheLinePad } +// RISCV64 contains the supported CPU features and performance characteristics for riscv64 +// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate +// the presence of RISC-V extensions. +// +// It is safe to assume that all the RV64G extensions are supported and so they are omitted from +// this structure. As riscv64 Go programs require at least RV64G, the code that populates +// this structure cannot run successfully if some of the RV64G extensions are missing. +// The struct is padded to avoid false sharing. +var RISCV64 struct { + _ CacheLinePad + HasFastMisaligned bool // Fast misaligned accesses + HasC bool // Compressed instruction-set extension + HasV bool // Vector extension compatible with RVV 1.0 + HasZba bool // Address generation instructions extension + HasZbb bool // Basic bit-manipulation extension + HasZbs bool // Single-bit instructions extension + _ CacheLinePad +} + func init() { archInit() initOptions() diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go index 0e27a21e..af2aa99f 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go @@ -38,6 +38,8 @@ func initOptions() { {Name: "dcpop", Feature: &ARM64.HasDCPOP}, {Name: "asimddp", Feature: &ARM64.HasASIMDDP}, {Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM}, + {Name: "dit", Feature: &ARM64.HasDIT}, + {Name: "i8mm", Feature: &ARM64.HasI8MM}, } } @@ -145,6 +147,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { ARM64.HasLRCPC = true } + switch extractBits(isar1, 52, 55) { + case 1: + ARM64.HasI8MM = true + } + // ID_AA64PFR0_EL1 switch extractBits(pfr0, 16, 19) { case 0: @@ -168,6 +175,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { parseARM64SVERegister(getzfr0()) } + + switch extractBits(pfr0, 48, 51) { + case 1: + ARM64.HasDIT = true + } } func parseARM64SVERegister(zfr0 uint64) { diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go index 3d386d0f..08f35ea1 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go @@ -35,8 +35,10 @@ const ( hwcap_SHA512 = 1 << 21 hwcap_SVE = 1 << 22 hwcap_ASIMDFHM = 1 << 23 + hwcap_DIT = 1 << 24 hwcap2_SVE2 = 1 << 1 + hwcap2_I8MM = 1 << 13 ) // linuxKernelCanEmulateCPUID reports whether we're running @@ -106,9 +108,12 @@ func doinit() { ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) ARM64.HasSVE = isSet(hwCap, hwcap_SVE) ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) + ARM64.HasDIT = isSet(hwCap, hwcap_DIT) + // HWCAP2 feature bits ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2) + ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM) } func isSet(hwc uint, value uint) bool { diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go index cd63e733..7d902b68 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x +//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64 package cpu diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go new file mode 100644 index 00000000..cb4a0c57 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go @@ -0,0 +1,137 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "syscall" + "unsafe" +) + +// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe +// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available. +// +// A note on detection of the Vector extension using HWCAP. +// +// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5. +// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe +// syscall is not available then neither is the Vector extension (which needs kernel support). +// The riscv_hwprobe syscall should then be all we need to detect the Vector extension. +// However, some RISC-V board manufacturers ship boards with an older kernel on top of which +// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe +// patches. These kernels advertise support for the Vector extension using HWCAP. Falling +// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not +// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option. +// +// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by +// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board +// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified +// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use +// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector +// extension are binary incompatible. HWCAP can then not be used in isolation to populate the +// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0. +// +// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector +// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype +// register. This check would allow us to safely detect version 1.0 of the Vector extension +// with HWCAP, if riscv_hwprobe were not available. However, the check cannot +// be added until the assembler supports the Vector instructions. +// +// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the +// extensions it advertises support for are explicitly versioned. It's also worth noting that +// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba. +// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority +// of RISC-V extensions. +// +// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information. + +// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must +// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall +// here. + +const ( + // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. + riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 + riscv_HWPROBE_IMA_C = 0x2 + riscv_HWPROBE_IMA_V = 0x4 + riscv_HWPROBE_EXT_ZBA = 0x8 + riscv_HWPROBE_EXT_ZBB = 0x10 + riscv_HWPROBE_EXT_ZBS = 0x20 + riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 + riscv_HWPROBE_MISALIGNED_FAST = 0x3 + riscv_HWPROBE_MISALIGNED_MASK = 0x7 +) + +const ( + // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go. + sys_RISCV_HWPROBE = 258 +) + +// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. +type riscvHWProbePairs struct { + key int64 + value uint64 +} + +const ( + // CPU features + hwcap_RISCV_ISA_C = 1 << ('C' - 'A') +) + +func doinit() { + // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key + // field should be initialised with one of the key constants defined above, e.g., + // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value. + // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0. + + pairs := []riscvHWProbePairs{ + {riscv_HWPROBE_KEY_IMA_EXT_0, 0}, + {riscv_HWPROBE_KEY_CPUPERF_0, 0}, + } + + // This call only indicates that extensions are supported if they are implemented on all cores. + if riscvHWProbe(pairs, 0) { + if pairs[0].key != -1 { + v := uint(pairs[0].value) + RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C) + RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) + RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA) + RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB) + RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS) + } + if pairs[1].key != -1 { + v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK + RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST + } + } + + // Let's double check with HWCAP if the C extension does not appear to be supported. + // This may happen if we're running on a kernel older than 6.4. + + if !RISCV64.HasC { + RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C) + } +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} + +// riscvHWProbe is a simplified version of the generated wrapper function found in +// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the +// cpuCount and cpus parameters which we do not need. We always want to pass 0 for +// these parameters here so the kernel only reports the extensions that are present +// on all cores. +func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool { + var _zero uintptr + var p0 unsafe.Pointer + if len(pairs) > 0 { + p0 = unsafe.Pointer(&pairs[0]) + } else { + p0 = unsafe.Pointer(&_zero) + } + + _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0) + return e1 == 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go index 7f0c79c0..aca3199c 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go @@ -8,4 +8,13 @@ package cpu const cacheLineSize = 64 -func initOptions() {} +func initOptions() { + options = []option{ + {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, + {Name: "c", Feature: &RISCV64.HasC}, + {Name: "v", Feature: &RISCV64.HasV}, + {Name: "zba", Feature: &RISCV64.HasZba}, + {Name: "zbb", Feature: &RISCV64.HasZbb}, + {Name: "zbs", Feature: &RISCV64.HasZbs}, + } +} diff --git a/vendor/golang.org/x/sys/execabs/execabs.go b/vendor/golang.org/x/sys/execabs/execabs.go deleted file mode 100644 index 3bf40fdf..00000000 --- a/vendor/golang.org/x/sys/execabs/execabs.go +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package execabs is a drop-in replacement for os/exec -// that requires PATH lookups to find absolute paths. -// That is, execabs.Command("cmd") runs the same PATH lookup -// as exec.Command("cmd"), but if the result is a path -// which is relative, the Run and Start methods will report -// an error instead of running the executable. -// -// See https://blog.golang.org/path-security for more information -// about when it may be necessary or appropriate to use this package. -package execabs - -import ( - "context" - "fmt" - "os/exec" - "path/filepath" - "reflect" - "unsafe" -) - -// ErrNotFound is the error resulting if a path search failed to find an executable file. -// It is an alias for exec.ErrNotFound. -var ErrNotFound = exec.ErrNotFound - -// Cmd represents an external command being prepared or run. -// It is an alias for exec.Cmd. -type Cmd = exec.Cmd - -// Error is returned by LookPath when it fails to classify a file as an executable. -// It is an alias for exec.Error. -type Error = exec.Error - -// An ExitError reports an unsuccessful exit by a command. -// It is an alias for exec.ExitError. -type ExitError = exec.ExitError - -func relError(file, path string) error { - return fmt.Errorf("%s resolves to executable in current directory (.%c%s)", file, filepath.Separator, path) -} - -// LookPath searches for an executable named file in the directories -// named by the PATH environment variable. If file contains a slash, -// it is tried directly and the PATH is not consulted. The result will be -// an absolute path. -// -// LookPath differs from exec.LookPath in its handling of PATH lookups, -// which are used for file names without slashes. If exec.LookPath's -// PATH lookup would have returned an executable from the current directory, -// LookPath instead returns an error. -func LookPath(file string) (string, error) { - path, err := exec.LookPath(file) - if err != nil && !isGo119ErrDot(err) { - return "", err - } - if filepath.Base(file) == file && !filepath.IsAbs(path) { - return "", relError(file, path) - } - return path, nil -} - -func fixCmd(name string, cmd *exec.Cmd) { - if filepath.Base(name) == name && !filepath.IsAbs(cmd.Path) && !isGo119ErrFieldSet(cmd) { - // exec.Command was called with a bare binary name and - // exec.LookPath returned a path which is not absolute. - // Set cmd.lookPathErr and clear cmd.Path so that it - // cannot be run. - lookPathErr := (*error)(unsafe.Pointer(reflect.ValueOf(cmd).Elem().FieldByName("lookPathErr").Addr().Pointer())) - if *lookPathErr == nil { - *lookPathErr = relError(name, cmd.Path) - } - cmd.Path = "" - } -} - -// CommandContext is like Command but includes a context. -// -// The provided context is used to kill the process (by calling os.Process.Kill) -// if the context becomes done before the command completes on its own. -func CommandContext(ctx context.Context, name string, arg ...string) *exec.Cmd { - cmd := exec.CommandContext(ctx, name, arg...) - fixCmd(name, cmd) - return cmd - -} - -// Command returns the Cmd struct to execute the named program with the given arguments. -// See exec.Command for most details. -// -// Command differs from exec.Command in its handling of PATH lookups, -// which are used when the program name contains no slashes. -// If exec.Command would have returned an exec.Cmd configured to run an -// executable from the current directory, Command instead -// returns an exec.Cmd that will return an error from Start or Run. -func Command(name string, arg ...string) *exec.Cmd { - cmd := exec.Command(name, arg...) - fixCmd(name, cmd) - return cmd -} diff --git a/vendor/golang.org/x/sys/execabs/execabs_go118.go b/vendor/golang.org/x/sys/execabs/execabs_go118.go deleted file mode 100644 index 5627d70e..00000000 --- a/vendor/golang.org/x/sys/execabs/execabs_go118.go +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.19 - -package execabs - -import "os/exec" - -func isGo119ErrDot(err error) bool { - return false -} - -func isGo119ErrFieldSet(cmd *exec.Cmd) bool { - return false -} diff --git a/vendor/golang.org/x/sys/execabs/execabs_go119.go b/vendor/golang.org/x/sys/execabs/execabs_go119.go deleted file mode 100644 index d60ab1b4..00000000 --- a/vendor/golang.org/x/sys/execabs/execabs_go119.go +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.19 - -package execabs - -import ( - "errors" - "os/exec" -) - -func isGo119ErrDot(err error) bool { - return errors.Is(err, exec.ErrDot) -} - -func isGo119ErrFieldSet(cmd *exec.Cmd) bool { - return cmd.Err != nil -} diff --git a/vendor/golang.org/x/sys/unix/README.md b/vendor/golang.org/x/sys/unix/README.md index 7d3c060e..6e08a76a 100644 --- a/vendor/golang.org/x/sys/unix/README.md +++ b/vendor/golang.org/x/sys/unix/README.md @@ -156,7 +156,7 @@ from the generated architecture-specific files listed below, and merge these into a common file for each OS. The merge is performed in the following steps: -1. Construct the set of common code that is idential in all architecture-specific files. +1. Construct the set of common code that is identical in all architecture-specific files. 2. Write this common code to the merged file. 3. Remove the common code from all architecture-specific files. diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh index fdcaa974..ac54ecab 100644 --- a/vendor/golang.org/x/sys/unix/mkerrors.sh +++ b/vendor/golang.org/x/sys/unix/mkerrors.sh @@ -58,6 +58,7 @@ includes_Darwin=' #define _DARWIN_USE_64_BIT_INODE #define __APPLE_USE_RFC_3542 #include +#include #include #include #include @@ -263,6 +264,7 @@ struct ltchars { #include #include #include +#include #include #include #include @@ -549,6 +551,8 @@ ccflags="$@" $2 !~ "NLA_TYPE_MASK" && $2 !~ /^RTC_VL_(ACCURACY|BACKUP|DATA)/ && $2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTC|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P|NETNSA)_/ || + $2 ~ /^SOCK_|SK_DIAG_|SKNLGRP_$/ || + $2 ~ /^(CONNECT|SAE)_/ || $2 ~ /^FIORDCHK$/ || $2 ~ /^SIOC/ || $2 ~ /^TIOC/ || @@ -652,7 +656,7 @@ errors=$( signals=$( echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print $2 }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort ) @@ -662,7 +666,7 @@ echo '#include ' | $CC -x c - -E -dM $ccflags | sort >_error.grep echo '#include ' | $CC -x c - -E -dM $ccflags | awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print "^\t" $2 "[ \t]*=" }' | - grep -v 'SIGSTKSIZE\|SIGSTKSZ\|SIGRT\|SIGMAX64' | + grep -E -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT|SIGMAX64)' | sort >_signal.grep echo '// mkerrors.sh' "$@" diff --git a/vendor/golang.org/x/sys/unix/mremap.go b/vendor/golang.org/x/sys/unix/mremap.go index fd45fe52..3a5e776f 100644 --- a/vendor/golang.org/x/sys/unix/mremap.go +++ b/vendor/golang.org/x/sys/unix/mremap.go @@ -50,3 +50,8 @@ func (m *mremapMmapper) Mremap(oldData []byte, newLength int, flags int) (data [ func Mremap(oldData []byte, newLength int, flags int) (data []byte, err error) { return mapper.Mremap(oldData, newLength, flags) } + +func MremapPtr(oldAddr unsafe.Pointer, oldSize uintptr, newAddr unsafe.Pointer, newSize uintptr, flags int) (ret unsafe.Pointer, err error) { + xaddr, err := mapper.mremap(uintptr(oldAddr), oldSize, newSize, flags, uintptr(newAddr)) + return unsafe.Pointer(xaddr), err +} diff --git a/vendor/golang.org/x/sys/unix/syscall_aix.go b/vendor/golang.org/x/sys/unix/syscall_aix.go index 67ce6cef..6f15ba1e 100644 --- a/vendor/golang.org/x/sys/unix/syscall_aix.go +++ b/vendor/golang.org/x/sys/unix/syscall_aix.go @@ -360,7 +360,7 @@ func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, var status _C_int var r Pid_t err = ERESTART - // AIX wait4 may return with ERESTART errno, while the processus is still + // AIX wait4 may return with ERESTART errno, while the process is still // active. for err == ERESTART { r, err = wait4(Pid_t(pid), &status, options, rusage) diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go index 59542a89..099867de 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go @@ -402,6 +402,18 @@ func IoctlSetIfreqMTU(fd int, ifreq *IfreqMTU) error { return ioctlPtr(fd, SIOCSIFMTU, unsafe.Pointer(ifreq)) } +//sys renamexNp(from string, to string, flag uint32) (err error) + +func RenamexNp(from string, to string, flag uint32) (err error) { + return renamexNp(from, to, flag) +} + +//sys renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) + +func RenameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + return renameatxNp(fromfd, from, tofd, to, flag) +} + //sys sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) = SYS_SYSCTL func Uname(uname *Utsname) error { @@ -542,6 +554,55 @@ func SysctlKinfoProcSlice(name string, args ...int) ([]KinfoProc, error) { } } +//sys pthread_chdir_np(path string) (err error) + +func PthreadChdir(path string) (err error) { + return pthread_chdir_np(path) +} + +//sys pthread_fchdir_np(fd int) (err error) + +func PthreadFchdir(fd int) (err error) { + return pthread_fchdir_np(fd) +} + +// Connectx calls connectx(2) to initiate a connection on a socket. +// +// srcIf, srcAddr, and dstAddr are filled into a [SaEndpoints] struct and passed as the endpoints argument. +// +// - srcIf is the optional source interface index. 0 means unspecified. +// - srcAddr is the optional source address. nil means unspecified. +// - dstAddr is the destination address. +// +// On success, Connectx returns the number of bytes enqueued for transmission. +func Connectx(fd int, srcIf uint32, srcAddr, dstAddr Sockaddr, associd SaeAssocID, flags uint32, iov []Iovec, connid *SaeConnID) (n uintptr, err error) { + endpoints := SaEndpoints{ + Srcif: srcIf, + } + + if srcAddr != nil { + addrp, addrlen, err := srcAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Srcaddr = (*RawSockaddr)(addrp) + endpoints.Srcaddrlen = uint32(addrlen) + } + + if dstAddr != nil { + addrp, addrlen, err := dstAddr.sockaddr() + if err != nil { + return 0, err + } + endpoints.Dstaddr = (*RawSockaddr)(addrp) + endpoints.Dstaddrlen = uint32(addrlen) + } + + err = connectx(fd, &endpoints, associd, flags, iov, &n, connid) + return +} + +//sys connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) //sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) //sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_hurd.go b/vendor/golang.org/x/sys/unix/syscall_hurd.go index ba46651f..a6a2d2fc 100644 --- a/vendor/golang.org/x/sys/unix/syscall_hurd.go +++ b/vendor/golang.org/x/sys/unix/syscall_hurd.go @@ -11,6 +11,7 @@ package unix int ioctl(int, unsigned long int, uintptr_t); */ import "C" +import "unsafe" func ioctl(fd int, req uint, arg uintptr) (err error) { r0, er := C.ioctl(C.int(fd), C.ulong(req), C.uintptr_t(arg)) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go index 5682e262..f08abd43 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux.go @@ -1295,6 +1295,48 @@ func GetsockoptTCPInfo(fd, level, opt int) (*TCPInfo, error) { return &value, err } +// GetsockoptTCPCCVegasInfo returns algorithm specific congestion control information for a socket using the "vegas" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCVegasInfo(fd, level, opt int) (*TCPVegasInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPVegasInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCDCTCPInfo returns algorithm specific congestion control information for a socket using the "dctp" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCDCTCPInfo(fd, level, opt int) (*TCPDCTCPInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPDCTCPInfo)(unsafe.Pointer(&value[0])) + return out, err +} + +// GetsockoptTCPCCBBRInfo returns algorithm specific congestion control information for a socket using the "bbr" +// algorithm. +// +// The socket's congestion control algorighm can be retrieved via [GetsockoptString] with the [TCP_CONGESTION] option: +// +// algo, err := unix.GetsockoptString(fd, unix.IPPROTO_TCP, unix.TCP_CONGESTION) +func GetsockoptTCPCCBBRInfo(fd, level, opt int) (*TCPBBRInfo, error) { + var value [SizeofTCPCCInfo / 4]uint32 // ensure proper alignment + vallen := _Socklen(SizeofTCPCCInfo) + err := getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen) + out := (*TCPBBRInfo)(unsafe.Pointer(&value[0])) + return out, err +} + // GetsockoptString returns the string value of the socket option opt for the // socket associated with fd at the given socket level. func GetsockoptString(fd, level, opt int) (string, error) { @@ -1959,7 +2001,26 @@ func Getpgrp() (pid int) { //sysnb Getpid() (pid int) //sysnb Getppid() (ppid int) //sys Getpriority(which int, who int) (prio int, err error) -//sys Getrandom(buf []byte, flags int) (n int, err error) + +func Getrandom(buf []byte, flags int) (n int, err error) { + vdsoRet, supported := vgetrandom(buf, uint32(flags)) + if supported { + if vdsoRet < 0 { + return 0, errnoErr(syscall.Errno(-vdsoRet)) + } + return vdsoRet, nil + } + var p *byte + if len(buf) > 0 { + p = &buf[0] + } + r, _, e := Syscall(SYS_GETRANDOM, uintptr(unsafe.Pointer(p)), uintptr(len(buf)), uintptr(flags)) + if e != 0 { + return 0, errnoErr(e) + } + return int(r), nil +} + //sysnb Getrusage(who int, rusage *Rusage) (err error) //sysnb Getsid(pid int) (sid int, err error) //sysnb Gettid() (tid int) @@ -2592,3 +2653,4 @@ func SchedGetAttr(pid int, flags uint) (*SchedAttr, error) { } //sys Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) (err error) +//sys Mseal(b []byte, flags uint) (err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go index cf2ee6c7..745e5c7e 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_arm64.go @@ -182,3 +182,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go index 3d0e9845..dd2262a4 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_loong64.go @@ -214,3 +214,5 @@ func KexecFileLoad(kernelFd int, initrdFd int, cmdline string, flags int) error } return kexecFileLoad(kernelFd, initrdFd, cmdlineLen, cmdline, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go index 6f5a2889..8cf3670b 100644 --- a/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/syscall_linux_riscv64.go @@ -187,3 +187,5 @@ func RISCVHWProbe(pairs []RISCVHWProbePairs, set *CPUSet, flags uint) (err error } return riscvHWProbe(pairs, setSize, set, flags) } + +const SYS_FSTATAT = SYS_NEWFSTATAT diff --git a/vendor/golang.org/x/sys/unix/syscall_openbsd.go b/vendor/golang.org/x/sys/unix/syscall_openbsd.go index b25343c7..b86ded54 100644 --- a/vendor/golang.org/x/sys/unix/syscall_openbsd.go +++ b/vendor/golang.org/x/sys/unix/syscall_openbsd.go @@ -293,6 +293,7 @@ func Uname(uname *Utsname) error { //sys Mkfifoat(dirfd int, path string, mode uint32) (err error) //sys Mknod(path string, mode uint32, dev int) (err error) //sys Mknodat(dirfd int, path string, mode uint32, dev int) (err error) +//sys Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) //sys Nanosleep(time *Timespec, leftover *Timespec) (err error) //sys Open(path string, mode int, perm uint32) (fd int, err error) //sys Openat(dirfd int, path string, mode int, perm uint32) (fd int, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_unix.go b/vendor/golang.org/x/sys/unix/syscall_unix.go index 77081de8..4e92e5aa 100644 --- a/vendor/golang.org/x/sys/unix/syscall_unix.go +++ b/vendor/golang.org/x/sys/unix/syscall_unix.go @@ -154,6 +154,15 @@ func Munmap(b []byte) (err error) { return mapper.Munmap(b) } +func MmapPtr(fd int, offset int64, addr unsafe.Pointer, length uintptr, prot int, flags int) (ret unsafe.Pointer, err error) { + xaddr, err := mapper.mmap(uintptr(addr), length, prot, flags, fd, offset) + return unsafe.Pointer(xaddr), err +} + +func MunmapPtr(addr unsafe.Pointer, length uintptr) (err error) { + return mapper.munmap(uintptr(addr), length) +} + func Read(fd int, p []byte) (n int, err error) { n, err = read(fd, p) if raceenabled { diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_linux.go b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go new file mode 100644 index 00000000..07ac8e09 --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_linux.go @@ -0,0 +1,13 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build linux && go1.24 + +package unix + +import _ "unsafe" + +//go:linkname vgetrandom runtime.vgetrandom +//go:noescape +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) diff --git a/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go new file mode 100644 index 00000000..297e97bc --- /dev/null +++ b/vendor/golang.org/x/sys/unix/vgetrandom_unsupported.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !linux || !go1.24 + +package unix + +func vgetrandom(p []byte, flags uint32) (ret int, supported bool) { + return -1, false +} diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go index e40fa852..d73c4652 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_amd64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1169,6 +1172,11 @@ const ( PT_WRITE_D = 0x5 PT_WRITE_I = 0x4 PT_WRITE_U = 0x6 + RENAME_EXCL = 0x4 + RENAME_NOFOLLOW_ANY = 0x10 + RENAME_RESERVED1 = 0x8 + RENAME_SECLUDE = 0x1 + RENAME_SWAP = 0x2 RLIMIT_AS = 0x5 RLIMIT_CORE = 0x4 RLIMIT_CPU = 0x0 @@ -1260,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go index bb02aa6c..4a55a400 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_darwin_arm64.go @@ -237,6 +237,9 @@ const ( CLOCK_UPTIME_RAW_APPROX = 0x9 CLONE_NOFOLLOW = 0x1 CLONE_NOOWNERCOPY = 0x2 + CONNECT_DATA_AUTHENTICATED = 0x4 + CONNECT_DATA_IDEMPOTENT = 0x2 + CONNECT_RESUME_ON_READ_WRITE = 0x1 CR0 = 0x0 CR1 = 0x1000 CR2 = 0x2000 @@ -1169,6 +1172,11 @@ const ( PT_WRITE_D = 0x5 PT_WRITE_I = 0x4 PT_WRITE_U = 0x6 + RENAME_EXCL = 0x4 + RENAME_NOFOLLOW_ANY = 0x10 + RENAME_RESERVED1 = 0x8 + RENAME_SECLUDE = 0x1 + RENAME_SWAP = 0x2 RLIMIT_AS = 0x5 RLIMIT_CORE = 0x4 RLIMIT_CPU = 0x0 @@ -1260,6 +1268,10 @@ const ( RTV_SSTHRESH = 0x20 RUSAGE_CHILDREN = -0x1 RUSAGE_SELF = 0x0 + SAE_ASSOCID_ALL = 0xffffffff + SAE_ASSOCID_ANY = 0x0 + SAE_CONNID_ALL = 0xffffffff + SAE_CONNID_ANY = 0x0 SCM_CREDS = 0x3 SCM_RIGHTS = 0x1 SCM_TIMESTAMP = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go index 93a38a97..de3b4624 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go @@ -457,6 +457,7 @@ const ( B600 = 0x8 B75 = 0x2 B9600 = 0xd + BCACHEFS_SUPER_MAGIC = 0xca451a4e BDEVFS_MAGIC = 0x62646576 BINDERFS_SUPER_MAGIC = 0x6c6f6f70 BINFMTFS_MAGIC = 0x42494e4d @@ -494,6 +495,7 @@ const ( BPF_F_TEST_REG_INVARIANTS = 0x80 BPF_F_TEST_RND_HI32 = 0x4 BPF_F_TEST_RUN_ON_CPU = 0x1 + BPF_F_TEST_SKB_CHECKSUM_COMPLETE = 0x4 BPF_F_TEST_STATE_FREQ = 0x8 BPF_F_TEST_XDP_LIVE_FRAMES = 0x2 BPF_F_XDP_DEV_BOUND_ONLY = 0x40 @@ -502,6 +504,7 @@ const ( BPF_IMM = 0x0 BPF_IND = 0x40 BPF_JA = 0x0 + BPF_JCOND = 0xe0 BPF_JEQ = 0x10 BPF_JGE = 0x30 BPF_JGT = 0x20 @@ -657,6 +660,9 @@ const ( CAN_NPROTO = 0x8 CAN_RAW = 0x1 CAN_RAW_FILTER_MAX = 0x200 + CAN_RAW_XL_VCID_RX_FILTER = 0x4 + CAN_RAW_XL_VCID_TX_PASS = 0x2 + CAN_RAW_XL_VCID_TX_SET = 0x1 CAN_RTR_FLAG = 0x40000000 CAN_SFF_ID_BITS = 0xb CAN_SFF_MASK = 0x7ff @@ -924,6 +930,7 @@ const ( EPOLL_CTL_ADD = 0x1 EPOLL_CTL_DEL = 0x2 EPOLL_CTL_MOD = 0x3 + EPOLL_IOC_TYPE = 0x8a EROFS_SUPER_MAGIC_V1 = 0xe0f5e1e2 ESP_V4_FLOW = 0xa ESP_V6_FLOW = 0xc @@ -937,9 +944,6 @@ const ( ETHTOOL_FEC_OFF = 0x4 ETHTOOL_FEC_RS = 0x8 ETHTOOL_FLAG_ALL = 0x7 - ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 - ETHTOOL_FLAG_OMIT_REPLY = 0x2 - ETHTOOL_FLAG_STATS = 0x4 ETHTOOL_FLASHDEV = 0x33 ETHTOOL_FLASH_MAX_FILENAME = 0x80 ETHTOOL_FWVERS_LEN = 0x20 @@ -1339,6 +1343,7 @@ const ( F_OFD_SETLK = 0x25 F_OFD_SETLKW = 0x26 F_OK = 0x0 + F_SEAL_EXEC = 0x20 F_SEAL_FUTURE_WRITE = 0x10 F_SEAL_GROW = 0x4 F_SEAL_SEAL = 0x1 @@ -1627,6 +1632,7 @@ const ( IP_FREEBIND = 0xf IP_HDRINCL = 0x3 IP_IPSEC_POLICY = 0x10 + IP_LOCAL_PORT_RANGE = 0x33 IP_MAXPACKET = 0xffff IP_MAX_MEMBERSHIPS = 0x14 IP_MF = 0x2000 @@ -1653,6 +1659,7 @@ const ( IP_PMTUDISC_OMIT = 0x5 IP_PMTUDISC_PROBE = 0x3 IP_PMTUDISC_WANT = 0x1 + IP_PROTOCOL = 0x34 IP_RECVERR = 0xb IP_RECVERR_RFC4884 = 0x1a IP_RECVFRAGSIZE = 0x19 @@ -1698,6 +1705,7 @@ const ( KEXEC_ARCH_S390 = 0x160000 KEXEC_ARCH_SH = 0x2a0000 KEXEC_ARCH_X86_64 = 0x3e0000 + KEXEC_CRASH_HOTPLUG_SUPPORT = 0x8 KEXEC_FILE_DEBUG = 0x8 KEXEC_FILE_NO_INITRAMFS = 0x4 KEXEC_FILE_ON_CRASH = 0x2 @@ -1773,6 +1781,7 @@ const ( KEY_SPEC_USER_KEYRING = -0x4 KEY_SPEC_USER_SESSION_KEYRING = -0x5 LANDLOCK_ACCESS_FS_EXECUTE = 0x1 + LANDLOCK_ACCESS_FS_IOCTL_DEV = 0x8000 LANDLOCK_ACCESS_FS_MAKE_BLOCK = 0x800 LANDLOCK_ACCESS_FS_MAKE_CHAR = 0x40 LANDLOCK_ACCESS_FS_MAKE_DIR = 0x80 @@ -1854,6 +1863,19 @@ const ( MAP_FILE = 0x0 MAP_FIXED = 0x10 MAP_FIXED_NOREPLACE = 0x100000 + MAP_HUGE_16GB = 0x88000000 + MAP_HUGE_16KB = 0x38000000 + MAP_HUGE_16MB = 0x60000000 + MAP_HUGE_1GB = 0x78000000 + MAP_HUGE_1MB = 0x50000000 + MAP_HUGE_256MB = 0x70000000 + MAP_HUGE_2GB = 0x7c000000 + MAP_HUGE_2MB = 0x54000000 + MAP_HUGE_32MB = 0x64000000 + MAP_HUGE_512KB = 0x4c000000 + MAP_HUGE_512MB = 0x74000000 + MAP_HUGE_64KB = 0x40000000 + MAP_HUGE_8MB = 0x5c000000 MAP_HUGE_MASK = 0x3f MAP_HUGE_SHIFT = 0x1a MAP_PRIVATE = 0x2 @@ -1901,6 +1923,7 @@ const ( MNT_EXPIRE = 0x4 MNT_FORCE = 0x1 MNT_ID_REQ_SIZE_VER0 = 0x18 + MNT_ID_REQ_SIZE_VER1 = 0x20 MODULE_INIT_COMPRESSED_FILE = 0x4 MODULE_INIT_IGNORE_MODVERSIONS = 0x1 MODULE_INIT_IGNORE_VERMAGIC = 0x2 @@ -2166,10 +2189,10 @@ const ( NFT_REG_SIZE = 0x10 NFT_REJECT_ICMPX_MAX = 0x3 NFT_RT_MAX = 0x4 - NFT_SECMARK_CTX_MAXLEN = 0x100 + NFT_SECMARK_CTX_MAXLEN = 0x1000 NFT_SET_MAXNAMELEN = 0x100 NFT_SOCKET_MAX = 0x3 - NFT_TABLE_F_MASK = 0x3 + NFT_TABLE_F_MASK = 0x7 NFT_TABLE_MAXNAMELEN = 0x100 NFT_TRACETYPE_MAX = 0x3 NFT_TUNNEL_F_MASK = 0x7 @@ -2335,9 +2358,11 @@ const ( PERF_MEM_LVLNUM_IO = 0xa PERF_MEM_LVLNUM_L1 = 0x1 PERF_MEM_LVLNUM_L2 = 0x2 + PERF_MEM_LVLNUM_L2_MHB = 0x5 PERF_MEM_LVLNUM_L3 = 0x3 PERF_MEM_LVLNUM_L4 = 0x4 PERF_MEM_LVLNUM_LFB = 0xc + PERF_MEM_LVLNUM_MSC = 0x6 PERF_MEM_LVLNUM_NA = 0xf PERF_MEM_LVLNUM_PMEM = 0xe PERF_MEM_LVLNUM_RAM = 0xd @@ -2403,12 +2428,14 @@ const ( PERF_RECORD_MISC_USER = 0x2 PERF_SAMPLE_BRANCH_PLM_ALL = 0x7 PERF_SAMPLE_WEIGHT_TYPE = 0x1004000 + PID_FS_MAGIC = 0x50494446 PIPEFS_MAGIC = 0x50495045 PPPIOCGNPMODE = 0xc008744c PPPIOCNEWUNIT = 0xc004743e PRIO_PGRP = 0x1 PRIO_PROCESS = 0x0 PRIO_USER = 0x2 + PROCFS_IOCTL_MAGIC = 'f' PROC_SUPER_MAGIC = 0x9fa0 PROT_EXEC = 0x4 PROT_GROWSDOWN = 0x1000000 @@ -2490,6 +2517,23 @@ const ( PR_PAC_GET_ENABLED_KEYS = 0x3d PR_PAC_RESET_KEYS = 0x36 PR_PAC_SET_ENABLED_KEYS = 0x3c + PR_PPC_DEXCR_CTRL_CLEAR = 0x4 + PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC = 0x10 + PR_PPC_DEXCR_CTRL_EDITABLE = 0x1 + PR_PPC_DEXCR_CTRL_MASK = 0x1f + PR_PPC_DEXCR_CTRL_SET = 0x2 + PR_PPC_DEXCR_CTRL_SET_ONEXEC = 0x8 + PR_PPC_DEXCR_IBRTPD = 0x1 + PR_PPC_DEXCR_NPHIE = 0x3 + PR_PPC_DEXCR_SBHE = 0x0 + PR_PPC_DEXCR_SRAPD = 0x2 + PR_PPC_GET_DEXCR = 0x48 + PR_PPC_SET_DEXCR = 0x49 + PR_RISCV_CTX_SW_FENCEI_OFF = 0x1 + PR_RISCV_CTX_SW_FENCEI_ON = 0x0 + PR_RISCV_SCOPE_PER_PROCESS = 0x0 + PR_RISCV_SCOPE_PER_THREAD = 0x1 + PR_RISCV_SET_ICACHE_FLUSH_CTX = 0x47 PR_RISCV_V_GET_CONTROL = 0x46 PR_RISCV_V_SET_CONTROL = 0x45 PR_RISCV_V_VSTATE_CTRL_CUR_MASK = 0x3 @@ -2894,10 +2938,12 @@ const ( RUSAGE_SELF = 0x0 RUSAGE_THREAD = 0x1 RWF_APPEND = 0x10 + RWF_ATOMIC = 0x40 RWF_DSYNC = 0x2 RWF_HIPRI = 0x1 + RWF_NOAPPEND = 0x20 RWF_NOWAIT = 0x8 - RWF_SUPPORTED = 0x1f + RWF_SUPPORTED = 0x7f RWF_SYNC = 0x4 RWF_WRITE_LIFE_NOT_SET = 0x0 SCHED_BATCH = 0x3 @@ -2918,7 +2964,9 @@ const ( SCHED_RESET_ON_FORK = 0x40000000 SCHED_RR = 0x2 SCM_CREDENTIALS = 0x2 + SCM_PIDFD = 0x4 SCM_RIGHTS = 0x1 + SCM_SECURITY = 0x3 SCM_TIMESTAMP = 0x1d SC_LOG_FLUSH = 0x100000 SECCOMP_ADDFD_FLAG_SEND = 0x2 @@ -3051,6 +3099,8 @@ const ( SIOCSMIIREG = 0x8949 SIOCSRARP = 0x8962 SIOCWANDEV = 0x894a + SK_DIAG_BPF_STORAGE_MAX = 0x3 + SK_DIAG_BPF_STORAGE_REQ_MAX = 0x1 SMACK_MAGIC = 0x43415d53 SMART_AUTOSAVE = 0xd2 SMART_AUTO_OFFLINE = 0xdb @@ -3071,6 +3121,8 @@ const ( SOCKFS_MAGIC = 0x534f434b SOCK_BUF_LOCK_MASK = 0x3 SOCK_DCCP = 0x6 + SOCK_DESTROY = 0x15 + SOCK_DIAG_BY_FAMILY = 0x14 SOCK_IOC_TYPE = 0x89 SOCK_PACKET = 0xa SOCK_RAW = 0x3 @@ -3164,6 +3216,7 @@ const ( STATX_ATTR_MOUNT_ROOT = 0x2000 STATX_ATTR_NODUMP = 0x40 STATX_ATTR_VERITY = 0x100000 + STATX_ATTR_WRITE_ATOMIC = 0x400000 STATX_BASIC_STATS = 0x7ff STATX_BLOCKS = 0x400 STATX_BTIME = 0x800 @@ -3177,8 +3230,10 @@ const ( STATX_MTIME = 0x40 STATX_NLINK = 0x4 STATX_SIZE = 0x200 + STATX_SUBVOL = 0x8000 STATX_TYPE = 0x1 STATX_UID = 0x8 + STATX_WRITE_ATOMIC = 0x10000 STATX__RESERVED = 0x80000000 SYNC_FILE_RANGE_WAIT_AFTER = 0x4 SYNC_FILE_RANGE_WAIT_BEFORE = 0x1 @@ -3260,6 +3315,7 @@ const ( TCP_MAX_WINSHIFT = 0xe TCP_MD5SIG = 0xe TCP_MD5SIG_EXT = 0x20 + TCP_MD5SIG_FLAG_IFINDEX = 0x2 TCP_MD5SIG_FLAG_PREFIX = 0x1 TCP_MD5SIG_MAXKEYLEN = 0x50 TCP_MSS = 0x200 @@ -3576,6 +3632,7 @@ const ( XDP_UMEM_PGOFF_COMPLETION_RING = 0x180000000 XDP_UMEM_PGOFF_FILL_RING = 0x100000000 XDP_UMEM_REG = 0x4 + XDP_UMEM_TX_METADATA_LEN = 0x4 XDP_UMEM_TX_SW_CSUM = 0x2 XDP_UMEM_UNALIGNED_CHUNK_FLAG = 0x1 XDP_USE_NEED_WAKEUP = 0x8 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go index 42ff8c3c..8aa6d77c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -118,6 +120,7 @@ const ( IXOFF = 0x1000 IXON = 0x400 MAP_32BIT = 0x40 + MAP_ABOVE4G = 0x80 MAP_ANON = 0x20 MAP_ANONYMOUS = 0x20 MAP_DENYWRITE = 0x800 @@ -150,9 +153,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go index dca43600..da428f42 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -118,6 +120,7 @@ const ( IXOFF = 0x1000 IXON = 0x400 MAP_32BIT = 0x40 + MAP_ABOVE4G = 0x80 MAP_ANON = 0x20 MAP_ANONYMOUS = 0x20 MAP_DENYWRITE = 0x800 @@ -150,9 +153,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go index 5cca668a..bf45bfec 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go index d8cae6d1..71c67162 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 ESR_MAGIC = 0x45535201 EXTPROC = 0x10000 @@ -87,6 +89,7 @@ const ( FICLONE = 0x40049409 FICLONERANGE = 0x4020940d FLUSHO = 0x1000 + FPMR_MAGIC = 0x46504d52 FPSIMD_MAGIC = 0x46508001 FS_IOC_ENABLE_VERITY = 0x40806685 FS_IOC_GETFLAGS = 0x80086601 @@ -151,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go index 28e39afd..9476628f 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -152,9 +154,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go index cd66e92c..b9e85f3c 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go index c1595eba..a48b68a7 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go index ee9456b0..ea00e852 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go index 8cfca81e..91c64687 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x80 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x20 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go index 60b0deb3..8cbf38d6 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go index f90aa728..a2df7341 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go index ba9e0150..24791379 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x20 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000000 FF1 = 0x4000 @@ -150,9 +152,14 @@ const ( NL3 = 0x300 NLDLY = 0x300 NOFLSH = 0x80000000 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x4 ONLCR = 0x2 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go index 07cdfd6e..d265f146 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go index 2f1dd214..3f2d6443 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go @@ -78,6 +78,8 @@ const ( ECHOPRT = 0x400 EFD_CLOEXEC = 0x80000 EFD_NONBLOCK = 0x800 + EPIOCGPARAMS = 0x80088a02 + EPIOCSPARAMS = 0x40088a01 EPOLL_CLOEXEC = 0x80000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -148,9 +150,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x8008b705 NS_GET_NSTYPE = 0xb703 NS_GET_OWNER_UID = 0xb704 NS_GET_PARENT = 0xb702 + NS_GET_PID_FROM_PIDNS = 0x8004b706 + NS_GET_PID_IN_PIDNS = 0x8004b708 + NS_GET_TGID_FROM_PIDNS = 0x8004b707 + NS_GET_TGID_IN_PIDNS = 0x8004b709 NS_GET_USERNS = 0xb701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go index f40519d9..5d8b727a 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go @@ -82,6 +82,8 @@ const ( EFD_CLOEXEC = 0x400000 EFD_NONBLOCK = 0x4000 EMT_TAGOVF = 0x1 + EPIOCGPARAMS = 0x40088a02 + EPIOCSPARAMS = 0x80088a01 EPOLL_CLOEXEC = 0x400000 EXTPROC = 0x10000 FF1 = 0x8000 @@ -153,9 +155,14 @@ const ( NFDBITS = 0x40 NLDLY = 0x100 NOFLSH = 0x80 + NS_GET_MNTNS_ID = 0x4008b705 NS_GET_NSTYPE = 0x2000b703 NS_GET_OWNER_UID = 0x2000b704 NS_GET_PARENT = 0x2000b702 + NS_GET_PID_FROM_PIDNS = 0x4004b706 + NS_GET_PID_IN_PIDNS = 0x4004b708 + NS_GET_TGID_FROM_PIDNS = 0x4004b707 + NS_GET_TGID_IN_PIDNS = 0x4004b709 NS_GET_USERNS = 0x2000b701 OLCUC = 0x2 ONLCR = 0x4 diff --git a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go index da08b2ab..1ec2b140 100644 --- a/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go +++ b/vendor/golang.org/x/sys/unix/zerrors_zos_s390x.go @@ -581,6 +581,8 @@ const ( AT_EMPTY_PATH = 0x1000 AT_REMOVEDIR = 0x200 RENAME_NOREPLACE = 1 << 0 + ST_RDONLY = 1 + ST_NOSUID = 2 ) const ( diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go index ccb02f24..24b346e1 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go @@ -740,6 +740,54 @@ func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func renamexNp(from string, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_renamex_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flag)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renamex_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renamex_np renamex_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_renameatx_np_trampoline_addr, uintptr(fromfd), uintptr(unsafe.Pointer(_p0)), uintptr(tofd), uintptr(unsafe.Pointer(_p1)), uintptr(flag), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renameatx_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renameatx_np renameatx_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -760,6 +808,59 @@ var libc_sysctl_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func pthread_chdir_np(path string) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(path) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_pthread_chdir_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_pthread_chdir_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_pthread_chdir_np pthread_chdir_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func pthread_fchdir_np(fd int) (err error) { + _, _, e1 := syscall_syscall(libc_pthread_fchdir_np_trampoline_addr, uintptr(fd), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_pthread_fchdir_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_pthread_fchdir_np pthread_fchdir_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s index 8b8bb284..ebd21310 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s @@ -223,11 +223,36 @@ TEXT libc_ioctl_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_ioctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_ioctl_trampoline_addr(SB)/8, $libc_ioctl_trampoline<>(SB) +TEXT libc_renamex_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renamex_np(SB) +GLOBL ·libc_renamex_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renamex_np_trampoline_addr(SB)/8, $libc_renamex_np_trampoline<>(SB) + +TEXT libc_renameatx_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renameatx_np(SB) +GLOBL ·libc_renameatx_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renameatx_np_trampoline_addr(SB)/8, $libc_renameatx_np_trampoline<>(SB) + TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sysctl(SB) GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) +TEXT libc_pthread_chdir_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_pthread_chdir_np(SB) +GLOBL ·libc_pthread_chdir_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_pthread_chdir_np_trampoline_addr(SB)/8, $libc_pthread_chdir_np_trampoline<>(SB) + +TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_pthread_fchdir_np(SB) +GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) + +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go index 1b40b997..824b9c2d 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go @@ -740,6 +740,54 @@ func ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func renamexNp(from string, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_renamex_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flag)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renamex_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renamex_np renamex_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func renameatxNp(fromfd int, from string, tofd int, to string, flag uint32) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(from) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(to) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_renameatx_np_trampoline_addr, uintptr(fromfd), uintptr(unsafe.Pointer(_p0)), uintptr(tofd), uintptr(unsafe.Pointer(_p1)), uintptr(flag), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_renameatx_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_renameatx_np renameatx_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { var _p0 unsafe.Pointer if len(mib) > 0 { @@ -760,6 +808,59 @@ var libc_sysctl_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func pthread_chdir_np(path string) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(path) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_pthread_chdir_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_pthread_chdir_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_pthread_chdir_np pthread_chdir_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func pthread_fchdir_np(fd int) (err error) { + _, _, e1 := syscall_syscall(libc_pthread_fchdir_np_trampoline_addr, uintptr(fd), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_pthread_fchdir_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_pthread_fchdir_np pthread_fchdir_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func connectx(fd int, endpoints *SaEndpoints, associd SaeAssocID, flags uint32, iov []Iovec, n *uintptr, connid *SaeConnID) (err error) { + var _p0 unsafe.Pointer + if len(iov) > 0 { + _p0 = unsafe.Pointer(&iov[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := syscall_syscall9(libc_connectx_trampoline_addr, uintptr(fd), uintptr(unsafe.Pointer(endpoints)), uintptr(associd), uintptr(flags), uintptr(_p0), uintptr(len(iov)), uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(connid)), 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_connectx_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_connectx connectx "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s index 08362c1a..4f178a22 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s @@ -223,11 +223,36 @@ TEXT libc_ioctl_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_ioctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_ioctl_trampoline_addr(SB)/8, $libc_ioctl_trampoline<>(SB) +TEXT libc_renamex_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renamex_np(SB) +GLOBL ·libc_renamex_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renamex_np_trampoline_addr(SB)/8, $libc_renamex_np_trampoline<>(SB) + +TEXT libc_renameatx_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_renameatx_np(SB) +GLOBL ·libc_renameatx_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_renameatx_np_trampoline_addr(SB)/8, $libc_renameatx_np_trampoline<>(SB) + TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sysctl(SB) GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) +TEXT libc_pthread_chdir_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_pthread_chdir_np(SB) +GLOBL ·libc_pthread_chdir_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_pthread_chdir_np_trampoline_addr(SB)/8, $libc_pthread_chdir_np_trampoline<>(SB) + +TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_pthread_fchdir_np(SB) +GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) + +TEXT libc_connectx_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_connectx(SB) +GLOBL ·libc_connectx_trampoline_addr(SB), RODATA, $8 +DATA ·libc_connectx_trampoline_addr(SB)/8, $libc_connectx_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go index 87d8612a..af30da55 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go @@ -971,23 +971,6 @@ func Getpriority(which int, who int) (prio int, err error) { // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT -func Getrandom(buf []byte, flags int) (n int, err error) { - var _p0 unsafe.Pointer - if len(buf) > 0 { - _p0 = unsafe.Pointer(&buf[0]) - } else { - _p0 = unsafe.Pointer(&_zero) - } - r0, _, e1 := Syscall(SYS_GETRANDOM, uintptr(_p0), uintptr(len(buf)), uintptr(flags)) - n = int(r0) - if e1 != 0 { - err = errnoErr(e1) - } - return -} - -// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT - func Getrusage(who int, rusage *Rusage) (err error) { _, _, e1 := RawSyscall(SYS_GETRUSAGE, uintptr(who), uintptr(unsafe.Pointer(rusage)), 0) if e1 != 0 { @@ -2229,3 +2212,19 @@ func Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) } return } + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func Mseal(b []byte, flags uint) (err error) { + var _p0 unsafe.Pointer + if len(b) > 0 { + _p0 = unsafe.Pointer(&b[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + _, _, e1 := Syscall(SYS_MSEAL, uintptr(_p0), uintptr(len(b)), uintptr(flags)) + if e1 != 0 { + err = errnoErr(e1) + } + return +} diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go index 9dc42410..1851df14 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s index 41b56173..0b43c693 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_386.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $4 DATA ·libc_mknodat_trampoline_addr(SB)/4, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $4 +DATA ·libc_mount_trampoline_addr(SB)/4, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $4 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go index 0d3a0751..e1ec0dbe 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s index 4019a656..880c6d6e 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_amd64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go index c39f7776..7c8452a6 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s index ac4af24f..b8ef95b0 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $4 DATA ·libc_mknodat_trampoline_addr(SB)/4, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $4 +DATA ·libc_mount_trampoline_addr(SB)/4, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $4 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go index 57571d07..2ffdf861 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s index f77d5321..2af3b5c7 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_arm64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go index e62963e6..1da08d52 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s index fae140b6..b7a25135 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_mips64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go index 00831354..6e85b0aa 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s index 9d1e0ff0..f15dadf0 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_ppc64.s @@ -555,6 +555,12 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + CALL libc_mount(SB) + RET +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 CALL libc_nanosleep(SB) RET diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go index 79029ed5..28b487df 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.go @@ -1493,6 +1493,30 @@ var libc_mknodat_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func Mount(fsType string, dir string, flags int, data unsafe.Pointer) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(fsType) + if err != nil { + return + } + var _p1 *byte + _p1, err = BytePtrFromString(dir) + if err != nil { + return + } + _, _, e1 := syscall_syscall6(libc_mount_trampoline_addr, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_p1)), uintptr(flags), uintptr(data), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_mount_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_mount mount "libc.so" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func Nanosleep(time *Timespec, leftover *Timespec) (err error) { _, _, e1 := syscall_syscall(libc_nanosleep_trampoline_addr, uintptr(unsafe.Pointer(time)), uintptr(unsafe.Pointer(leftover)), 0) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s index da115f9a..1e7f321e 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_openbsd_riscv64.s @@ -463,6 +463,11 @@ TEXT libc_mknodat_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_mknodat_trampoline_addr(SB), RODATA, $8 DATA ·libc_mknodat_trampoline_addr(SB)/8, $libc_mknodat_trampoline<>(SB) +TEXT libc_mount_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_mount(SB) +GLOBL ·libc_mount_trampoline_addr(SB), RODATA, $8 +DATA ·libc_mount_trampoline_addr(SB)/8, $libc_mount_trampoline<>(SB) + TEXT libc_nanosleep_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_nanosleep(SB) GLOBL ·libc_nanosleep_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go index 53aef5dc..524b0820 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_386.go @@ -457,4 +457,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go index 71d52476..f485dbf4 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_amd64.go @@ -341,6 +341,7 @@ const ( SYS_STATX = 332 SYS_IO_PGETEVENTS = 333 SYS_RSEQ = 334 + SYS_URETPROBE = 335 SYS_PIDFD_SEND_SIGNAL = 424 SYS_IO_URING_SETUP = 425 SYS_IO_URING_ENTER = 426 @@ -379,4 +380,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go index c7477061..70b35bf3 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm.go @@ -421,4 +421,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go index f96e214f..1893e2fe 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_arm64.go @@ -85,7 +85,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 @@ -324,4 +324,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go index 28425346..16a4017d 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_loong64.go @@ -84,6 +84,8 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 + SYS_NEWFSTATAT = 79 + SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 SYS_FDATASYNC = 83 @@ -318,4 +320,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go index d0953018..7e567f1e 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips.go @@ -441,4 +441,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 4459 SYS_LSM_SET_SELF_ATTR = 4460 SYS_LSM_LIST_MODULES = 4461 + SYS_MSEAL = 4462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go index 295c7f4b..38ae55e5 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64.go @@ -371,4 +371,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 5459 SYS_LSM_SET_SELF_ATTR = 5460 SYS_LSM_LIST_MODULES = 5461 + SYS_MSEAL = 5462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go index d1a9eaca..55e92e60 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mips64le.go @@ -371,4 +371,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 5459 SYS_LSM_SET_SELF_ATTR = 5460 SYS_LSM_LIST_MODULES = 5461 + SYS_MSEAL = 5462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go index bec157c3..60658d6a 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_mipsle.go @@ -441,4 +441,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 4459 SYS_LSM_SET_SELF_ATTR = 4460 SYS_LSM_LIST_MODULES = 4461 + SYS_MSEAL = 4462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go index 7ee7bdc4..e203e8a7 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc.go @@ -448,4 +448,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go index fad1f25b..5944b97d 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64.go @@ -420,4 +420,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go index 7d3e1635..c66d416d 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_ppc64le.go @@ -420,4 +420,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go index 0ed53ad9..a5459e76 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_riscv64.go @@ -84,7 +84,7 @@ const ( SYS_SPLICE = 76 SYS_TEE = 77 SYS_READLINKAT = 78 - SYS_FSTATAT = 79 + SYS_NEWFSTATAT = 79 SYS_FSTAT = 80 SYS_SYNC = 81 SYS_FSYNC = 82 @@ -325,4 +325,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go index 2fba04ad..01d86825 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_s390x.go @@ -386,4 +386,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go index 621d00d7..7b703e77 100644 --- a/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/zsysnum_linux_sparc64.go @@ -399,4 +399,5 @@ const ( SYS_LSM_GET_SELF_ATTR = 459 SYS_LSM_SET_SELF_ATTR = 460 SYS_LSM_LIST_MODULES = 461 + SYS_MSEAL = 462 ) diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go index 091d107f..d003c3d4 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go index 28ff4ef7..0d45a941 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go @@ -306,6 +306,19 @@ type XVSockPgen struct { type _Socklen uint32 +type SaeAssocID uint32 + +type SaeConnID uint32 + +type SaEndpoints struct { + Srcif uint32 + Srcaddr *RawSockaddr + Srcaddrlen uint32 + Dstaddr *RawSockaddr + Dstaddrlen uint32 + _ [4]byte +} + type Xucred struct { Version uint32 Uid uint32 diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go index 6cbd094a..51e13eb0 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_386.go @@ -625,6 +625,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go index 7c03b6ee..d002d8ef 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_amd64.go @@ -630,6 +630,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go index 422107ee..3f863d89 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm.go @@ -616,6 +616,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go index 505a12ac..61c72931 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_arm64.go @@ -610,6 +610,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go index cc986c79..b5d17414 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_freebsd_riscv64.go @@ -612,6 +612,7 @@ const ( POLLRDNORM = 0x40 POLLWRBAND = 0x100 POLLWRNORM = 0x4 + POLLRDHUP = 0x4000 ) type CapRights struct { diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index 0036746e..3a69e454 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -87,30 +87,35 @@ type StatxTimestamp struct { } type Statx_t struct { - Mask uint32 - Blksize uint32 - Attributes uint64 - Nlink uint32 - Uid uint32 - Gid uint32 - Mode uint16 - _ [1]uint16 - Ino uint64 - Size uint64 - Blocks uint64 - Attributes_mask uint64 - Atime StatxTimestamp - Btime StatxTimestamp - Ctime StatxTimestamp - Mtime StatxTimestamp - Rdev_major uint32 - Rdev_minor uint32 - Dev_major uint32 - Dev_minor uint32 - Mnt_id uint64 - Dio_mem_align uint32 - Dio_offset_align uint32 - _ [12]uint64 + Mask uint32 + Blksize uint32 + Attributes uint64 + Nlink uint32 + Uid uint32 + Gid uint32 + Mode uint16 + _ [1]uint16 + Ino uint64 + Size uint64 + Blocks uint64 + Attributes_mask uint64 + Atime StatxTimestamp + Btime StatxTimestamp + Ctime StatxTimestamp + Mtime StatxTimestamp + Rdev_major uint32 + Rdev_minor uint32 + Dev_major uint32 + Dev_minor uint32 + Mnt_id uint64 + Dio_mem_align uint32 + Dio_offset_align uint32 + Subvol uint64 + Atomic_write_unit_min uint32 + Atomic_write_unit_max uint32 + Atomic_write_segments_max uint32 + _ [1]uint32 + _ [9]uint64 } type Fsid struct { @@ -515,6 +520,29 @@ type TCPInfo struct { Total_rto_time uint32 } +type TCPVegasInfo struct { + Enabled uint32 + Rttcnt uint32 + Rtt uint32 + Minrtt uint32 +} + +type TCPDCTCPInfo struct { + Enabled uint16 + Ce_state uint16 + Alpha uint32 + Ab_ecn uint32 + Ab_tot uint32 +} + +type TCPBBRInfo struct { + Bw_lo uint32 + Bw_hi uint32 + Min_rtt uint32 + Pacing_gain uint32 + Cwnd_gain uint32 +} + type CanFilter struct { Id uint32 Mask uint32 @@ -556,6 +584,7 @@ const ( SizeofICMPv6Filter = 0x20 SizeofUcred = 0xc SizeofTCPInfo = 0xf8 + SizeofTCPCCInfo = 0x14 SizeofCanFilter = 0x8 SizeofTCPRepairOpt = 0x8 ) @@ -2485,7 +2514,7 @@ type XDPMmapOffsets struct { type XDPUmemReg struct { Addr uint64 Len uint64 - Chunk_size uint32 + Size uint32 Headroom uint32 Flags uint32 Tx_metadata_len uint32 @@ -3473,7 +3502,7 @@ const ( DEVLINK_PORT_FN_ATTR_STATE = 0x2 DEVLINK_PORT_FN_ATTR_OPSTATE = 0x3 DEVLINK_PORT_FN_ATTR_CAPS = 0x4 - DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x5 + DEVLINK_PORT_FUNCTION_ATTR_MAX = 0x6 ) type FsverityDigest struct { @@ -3765,7 +3794,7 @@ const ( ETHTOOL_MSG_PSE_GET = 0x24 ETHTOOL_MSG_PSE_SET = 0x25 ETHTOOL_MSG_RSS_GET = 0x26 - ETHTOOL_MSG_USER_MAX = 0x2b + ETHTOOL_MSG_USER_MAX = 0x2c ETHTOOL_MSG_KERNEL_NONE = 0x0 ETHTOOL_MSG_STRSET_GET_REPLY = 0x1 ETHTOOL_MSG_LINKINFO_GET_REPLY = 0x2 @@ -3805,7 +3834,10 @@ const ( ETHTOOL_MSG_MODULE_NTF = 0x24 ETHTOOL_MSG_PSE_GET_REPLY = 0x25 ETHTOOL_MSG_RSS_GET_REPLY = 0x26 - ETHTOOL_MSG_KERNEL_MAX = 0x2b + ETHTOOL_MSG_KERNEL_MAX = 0x2c + ETHTOOL_FLAG_COMPACT_BITSETS = 0x1 + ETHTOOL_FLAG_OMIT_REPLY = 0x2 + ETHTOOL_FLAG_STATS = 0x4 ETHTOOL_A_HEADER_UNSPEC = 0x0 ETHTOOL_A_HEADER_DEV_INDEX = 0x1 ETHTOOL_A_HEADER_DEV_NAME = 0x2 @@ -3947,7 +3979,7 @@ const ( ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL = 0x17 ETHTOOL_A_COALESCE_USE_CQE_MODE_TX = 0x18 ETHTOOL_A_COALESCE_USE_CQE_MODE_RX = 0x19 - ETHTOOL_A_COALESCE_MAX = 0x1c + ETHTOOL_A_COALESCE_MAX = 0x1e ETHTOOL_A_PAUSE_UNSPEC = 0x0 ETHTOOL_A_PAUSE_HEADER = 0x1 ETHTOOL_A_PAUSE_AUTONEG = 0x2 @@ -3975,7 +4007,7 @@ const ( ETHTOOL_A_TSINFO_TX_TYPES = 0x3 ETHTOOL_A_TSINFO_RX_FILTERS = 0x4 ETHTOOL_A_TSINFO_PHC_INDEX = 0x5 - ETHTOOL_A_TSINFO_MAX = 0x5 + ETHTOOL_A_TSINFO_MAX = 0x6 ETHTOOL_A_CABLE_TEST_UNSPEC = 0x0 ETHTOOL_A_CABLE_TEST_HEADER = 0x1 ETHTOOL_A_CABLE_TEST_MAX = 0x1 @@ -4605,7 +4637,7 @@ const ( NL80211_ATTR_MAC_HINT = 0xc8 NL80211_ATTR_MAC_MASK = 0xd7 NL80211_ATTR_MAX_AP_ASSOC_STA = 0xca - NL80211_ATTR_MAX = 0x149 + NL80211_ATTR_MAX = 0x14c NL80211_ATTR_MAX_CRIT_PROT_DURATION = 0xb4 NL80211_ATTR_MAX_CSA_COUNTERS = 0xce NL80211_ATTR_MAX_MATCH_SETS = 0x85 @@ -5209,7 +5241,7 @@ const ( NL80211_FREQUENCY_ATTR_GO_CONCURRENT = 0xf NL80211_FREQUENCY_ATTR_INDOOR_ONLY = 0xe NL80211_FREQUENCY_ATTR_IR_CONCURRENT = 0xf - NL80211_FREQUENCY_ATTR_MAX = 0x1f + NL80211_FREQUENCY_ATTR_MAX = 0x21 NL80211_FREQUENCY_ATTR_MAX_TX_POWER = 0x6 NL80211_FREQUENCY_ATTR_NO_10MHZ = 0x11 NL80211_FREQUENCY_ATTR_NO_160MHZ = 0xc @@ -5703,7 +5735,7 @@ const ( NL80211_STA_FLAG_ASSOCIATED = 0x7 NL80211_STA_FLAG_AUTHENTICATED = 0x5 NL80211_STA_FLAG_AUTHORIZED = 0x1 - NL80211_STA_FLAG_MAX = 0x7 + NL80211_STA_FLAG_MAX = 0x8 NL80211_STA_FLAG_MAX_OLD_API = 0x6 NL80211_STA_FLAG_MFP = 0x4 NL80211_STA_FLAG_SHORT_PREAMBLE = 0x2 @@ -6001,3 +6033,34 @@ type CachestatRange struct { Off uint64 Len uint64 } + +const ( + SK_MEMINFO_RMEM_ALLOC = 0x0 + SK_MEMINFO_RCVBUF = 0x1 + SK_MEMINFO_WMEM_ALLOC = 0x2 + SK_MEMINFO_SNDBUF = 0x3 + SK_MEMINFO_FWD_ALLOC = 0x4 + SK_MEMINFO_WMEM_QUEUED = 0x5 + SK_MEMINFO_OPTMEM = 0x6 + SK_MEMINFO_BACKLOG = 0x7 + SK_MEMINFO_DROPS = 0x8 + SK_MEMINFO_VARS = 0x9 + SKNLGRP_NONE = 0x0 + SKNLGRP_INET_TCP_DESTROY = 0x1 + SKNLGRP_INET_UDP_DESTROY = 0x2 + SKNLGRP_INET6_TCP_DESTROY = 0x3 + SKNLGRP_INET6_UDP_DESTROY = 0x4 + SK_DIAG_BPF_STORAGE_REQ_NONE = 0x0 + SK_DIAG_BPF_STORAGE_REQ_MAP_FD = 0x1 + SK_DIAG_BPF_STORAGE_REP_NONE = 0x0 + SK_DIAG_BPF_STORAGE = 0x1 + SK_DIAG_BPF_STORAGE_NONE = 0x0 + SK_DIAG_BPF_STORAGE_PAD = 0x1 + SK_DIAG_BPF_STORAGE_MAP_ID = 0x2 + SK_DIAG_BPF_STORAGE_MAP_VALUE = 0x3 +) + +type SockDiagReq struct { + Family uint8 + Protocol uint8 +} diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go index 15adc041..ad05b51a 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go @@ -727,6 +727,37 @@ const ( RISCV_HWPROBE_EXT_ZBA = 0x8 RISCV_HWPROBE_EXT_ZBB = 0x10 RISCV_HWPROBE_EXT_ZBS = 0x20 + RISCV_HWPROBE_EXT_ZICBOZ = 0x40 + RISCV_HWPROBE_EXT_ZBC = 0x80 + RISCV_HWPROBE_EXT_ZBKB = 0x100 + RISCV_HWPROBE_EXT_ZBKC = 0x200 + RISCV_HWPROBE_EXT_ZBKX = 0x400 + RISCV_HWPROBE_EXT_ZKND = 0x800 + RISCV_HWPROBE_EXT_ZKNE = 0x1000 + RISCV_HWPROBE_EXT_ZKNH = 0x2000 + RISCV_HWPROBE_EXT_ZKSED = 0x4000 + RISCV_HWPROBE_EXT_ZKSH = 0x8000 + RISCV_HWPROBE_EXT_ZKT = 0x10000 + RISCV_HWPROBE_EXT_ZVBB = 0x20000 + RISCV_HWPROBE_EXT_ZVBC = 0x40000 + RISCV_HWPROBE_EXT_ZVKB = 0x80000 + RISCV_HWPROBE_EXT_ZVKG = 0x100000 + RISCV_HWPROBE_EXT_ZVKNED = 0x200000 + RISCV_HWPROBE_EXT_ZVKNHA = 0x400000 + RISCV_HWPROBE_EXT_ZVKNHB = 0x800000 + RISCV_HWPROBE_EXT_ZVKSED = 0x1000000 + RISCV_HWPROBE_EXT_ZVKSH = 0x2000000 + RISCV_HWPROBE_EXT_ZVKT = 0x4000000 + RISCV_HWPROBE_EXT_ZFH = 0x8000000 + RISCV_HWPROBE_EXT_ZFHMIN = 0x10000000 + RISCV_HWPROBE_EXT_ZIHINTNTL = 0x20000000 + RISCV_HWPROBE_EXT_ZVFH = 0x40000000 + RISCV_HWPROBE_EXT_ZVFHMIN = 0x80000000 + RISCV_HWPROBE_EXT_ZFA = 0x100000000 + RISCV_HWPROBE_EXT_ZTSO = 0x200000000 + RISCV_HWPROBE_EXT_ZACAS = 0x400000000 + RISCV_HWPROBE_EXT_ZICOND = 0x800000000 + RISCV_HWPROBE_EXT_ZIHINTPAUSE = 0x1000000000 RISCV_HWPROBE_KEY_CPUPERF_0 = 0x5 RISCV_HWPROBE_MISALIGNED_UNKNOWN = 0x0 RISCV_HWPROBE_MISALIGNED_EMULATED = 0x1 @@ -734,4 +765,6 @@ const ( RISCV_HWPROBE_MISALIGNED_FAST = 0x3 RISCV_HWPROBE_MISALIGNED_UNSUPPORTED = 0x4 RISCV_HWPROBE_MISALIGNED_MASK = 0x7 + RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6 + RISCV_HWPROBE_WHICH_CPUS = 0x1 ) diff --git a/vendor/golang.org/x/sys/windows/dll_windows.go b/vendor/golang.org/x/sys/windows/dll_windows.go index 115341fb..4e613cf6 100644 --- a/vendor/golang.org/x/sys/windows/dll_windows.go +++ b/vendor/golang.org/x/sys/windows/dll_windows.go @@ -65,7 +65,7 @@ func LoadDLL(name string) (dll *DLL, err error) { return d, nil } -// MustLoadDLL is like LoadDLL but panics if load operation failes. +// MustLoadDLL is like LoadDLL but panics if load operation fails. func MustLoadDLL(name string) *DLL { d, e := LoadDLL(name) if e != nil { diff --git a/vendor/golang.org/x/sys/windows/security_windows.go b/vendor/golang.org/x/sys/windows/security_windows.go index 26be94a8..b6e1ab76 100644 --- a/vendor/golang.org/x/sys/windows/security_windows.go +++ b/vendor/golang.org/x/sys/windows/security_windows.go @@ -68,6 +68,7 @@ type UserInfo10 struct { //sys NetUserGetInfo(serverName *uint16, userName *uint16, level uint32, buf **byte) (neterr error) = netapi32.NetUserGetInfo //sys NetGetJoinInformation(server *uint16, name **uint16, bufType *uint32) (neterr error) = netapi32.NetGetJoinInformation //sys NetApiBufferFree(buf *byte) (neterr error) = netapi32.NetApiBufferFree +//sys NetUserEnum(serverName *uint16, level uint32, filter uint32, buf **byte, prefMaxLen uint32, entriesRead *uint32, totalEntries *uint32, resumeHandle *uint32) (neterr error) = netapi32.NetUserEnum const ( // do not reorder @@ -893,7 +894,7 @@ type ACL struct { aclRevision byte sbz1 byte aclSize uint16 - aceCount uint16 + AceCount uint16 sbz2 uint16 } @@ -1086,6 +1087,27 @@ type EXPLICIT_ACCESS struct { Trustee TRUSTEE } +// https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-ace_header +type ACE_HEADER struct { + AceType uint8 + AceFlags uint8 + AceSize uint16 +} + +// https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-access_allowed_ace +type ACCESS_ALLOWED_ACE struct { + Header ACE_HEADER + Mask ACCESS_MASK + SidStart uint32 +} + +const ( + // Constants for AceType + // https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-ace_header + ACCESS_ALLOWED_ACE_TYPE = 0 + ACCESS_DENIED_ACE_TYPE = 1 +) + // This type is the union inside of TRUSTEE and must be created using one of the TrusteeValueFrom* functions. type TrusteeValue uintptr @@ -1157,6 +1179,7 @@ type OBJECTS_AND_NAME struct { //sys makeSelfRelativeSD(absoluteSD *SECURITY_DESCRIPTOR, selfRelativeSD *SECURITY_DESCRIPTOR, selfRelativeSDSize *uint32) (err error) = advapi32.MakeSelfRelativeSD //sys setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCESS, oldACL *ACL, newACL **ACL) (ret error) = advapi32.SetEntriesInAclW +//sys GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) = advapi32.GetAce // Control returns the security descriptor control bits. func (sd *SECURITY_DESCRIPTOR) Control() (control SECURITY_DESCRIPTOR_CONTROL, revision uint32, err error) { diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go index 6525c62f..5cee9a31 100644 --- a/vendor/golang.org/x/sys/windows/syscall_windows.go +++ b/vendor/golang.org/x/sys/windows/syscall_windows.go @@ -17,8 +17,10 @@ import ( "unsafe" ) -type Handle uintptr -type HWND uintptr +type ( + Handle uintptr + HWND uintptr +) const ( InvalidHandle = ^Handle(0) @@ -211,6 +213,10 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys OpenProcess(desiredAccess uint32, inheritHandle bool, processId uint32) (handle Handle, err error) //sys ShellExecute(hwnd Handle, verb *uint16, file *uint16, args *uint16, cwd *uint16, showCmd int32) (err error) [failretval<=32] = shell32.ShellExecuteW //sys GetWindowThreadProcessId(hwnd HWND, pid *uint32) (tid uint32, err error) = user32.GetWindowThreadProcessId +//sys LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) [failretval==0] = user32.LoadKeyboardLayoutW +//sys UnloadKeyboardLayout(hkl Handle) (err error) = user32.UnloadKeyboardLayout +//sys GetKeyboardLayout(tid uint32) (hkl Handle) = user32.GetKeyboardLayout +//sys ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) = user32.ToUnicodeEx //sys GetShellWindow() (shellWindow HWND) = user32.GetShellWindow //sys MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) [failretval==0] = user32.MessageBoxW //sys ExitWindowsEx(flags uint32, reason uint32) (err error) = user32.ExitWindowsEx @@ -307,6 +313,10 @@ func NewCallbackCDecl(fn interface{}) uintptr { //sys SetConsoleMode(console Handle, mode uint32) (err error) = kernel32.SetConsoleMode //sys GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) = kernel32.GetConsoleScreenBufferInfo //sys setConsoleCursorPosition(console Handle, position uint32) (err error) = kernel32.SetConsoleCursorPosition +//sys GetConsoleCP() (cp uint32, err error) = kernel32.GetConsoleCP +//sys GetConsoleOutputCP() (cp uint32, err error) = kernel32.GetConsoleOutputCP +//sys SetConsoleCP(cp uint32) (err error) = kernel32.SetConsoleCP +//sys SetConsoleOutputCP(cp uint32) (err error) = kernel32.SetConsoleOutputCP //sys WriteConsole(console Handle, buf *uint16, towrite uint32, written *uint32, reserved *byte) (err error) = kernel32.WriteConsoleW //sys ReadConsole(console Handle, buf *uint16, toread uint32, read *uint32, inputControl *byte) (err error) = kernel32.ReadConsoleW //sys resizePseudoConsole(pconsole Handle, size uint32) (hr error) = kernel32.ResizePseudoConsole @@ -1368,9 +1378,11 @@ func SetsockoptLinger(fd Handle, level, opt int, l *Linger) (err error) { func SetsockoptInet4Addr(fd Handle, level, opt int, value [4]byte) (err error) { return Setsockopt(fd, int32(level), int32(opt), (*byte)(unsafe.Pointer(&value[0])), 4) } + func SetsockoptIPMreq(fd Handle, level, opt int, mreq *IPMreq) (err error) { return Setsockopt(fd, int32(level), int32(opt), (*byte)(unsafe.Pointer(mreq)), int32(unsafe.Sizeof(*mreq))) } + func SetsockoptIPv6Mreq(fd Handle, level, opt int, mreq *IPv6Mreq) (err error) { return syscall.EWINDOWS } diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go index d8cb71db..7b97a154 100644 --- a/vendor/golang.org/x/sys/windows/types_windows.go +++ b/vendor/golang.org/x/sys/windows/types_windows.go @@ -1060,6 +1060,7 @@ const ( SIO_GET_EXTENSION_FUNCTION_POINTER = IOC_INOUT | IOC_WS2 | 6 SIO_KEEPALIVE_VALS = IOC_IN | IOC_VENDOR | 4 SIO_UDP_CONNRESET = IOC_IN | IOC_VENDOR | 12 + SIO_UDP_NETRESET = IOC_IN | IOC_VENDOR | 15 // cf. http://support.microsoft.com/default.aspx?scid=kb;en-us;257460 @@ -2003,7 +2004,21 @@ const ( MOVEFILE_FAIL_IF_NOT_TRACKABLE = 0x20 ) -const GAA_FLAG_INCLUDE_PREFIX = 0x00000010 +// Flags for GetAdaptersAddresses, see +// https://learn.microsoft.com/en-us/windows/win32/api/iphlpapi/nf-iphlpapi-getadaptersaddresses. +const ( + GAA_FLAG_SKIP_UNICAST = 0x1 + GAA_FLAG_SKIP_ANYCAST = 0x2 + GAA_FLAG_SKIP_MULTICAST = 0x4 + GAA_FLAG_SKIP_DNS_SERVER = 0x8 + GAA_FLAG_INCLUDE_PREFIX = 0x10 + GAA_FLAG_SKIP_FRIENDLY_NAME = 0x20 + GAA_FLAG_INCLUDE_WINS_INFO = 0x40 + GAA_FLAG_INCLUDE_GATEWAYS = 0x80 + GAA_FLAG_INCLUDE_ALL_INTERFACES = 0x100 + GAA_FLAG_INCLUDE_ALL_COMPARTMENTS = 0x200 + GAA_FLAG_INCLUDE_TUNNEL_BINDINGORDER = 0x400 +) const ( IF_TYPE_OTHER = 1 @@ -2017,6 +2032,50 @@ const ( IF_TYPE_IEEE1394 = 144 ) +// Enum NL_PREFIX_ORIGIN for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_prefix_origin +const ( + IpPrefixOriginOther = 0 + IpPrefixOriginManual = 1 + IpPrefixOriginWellKnown = 2 + IpPrefixOriginDhcp = 3 + IpPrefixOriginRouterAdvertisement = 4 + IpPrefixOriginUnchanged = 1 << 4 +) + +// Enum NL_SUFFIX_ORIGIN for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_suffix_origin +const ( + NlsoOther = 0 + NlsoManual = 1 + NlsoWellKnown = 2 + NlsoDhcp = 3 + NlsoLinkLayerAddress = 4 + NlsoRandom = 5 + IpSuffixOriginOther = 0 + IpSuffixOriginManual = 1 + IpSuffixOriginWellKnown = 2 + IpSuffixOriginDhcp = 3 + IpSuffixOriginLinkLayerAddress = 4 + IpSuffixOriginRandom = 5 + IpSuffixOriginUnchanged = 1 << 4 +) + +// Enum NL_DAD_STATE for [IpAdapterUnicastAddress], see +// https://learn.microsoft.com/en-us/windows/win32/api/nldef/ne-nldef-nl_dad_state +const ( + NldsInvalid = 0 + NldsTentative = 1 + NldsDuplicate = 2 + NldsDeprecated = 3 + NldsPreferred = 4 + IpDadStateInvalid = 0 + IpDadStateTentative = 1 + IpDadStateDuplicate = 2 + IpDadStateDeprecated = 3 + IpDadStatePreferred = 4 +) + type SocketAddress struct { Sockaddr *syscall.RawSockaddrAny SockaddrLength int32 @@ -3404,3 +3463,14 @@ type DCB struct { EvtChar byte wReserved1 uint16 } + +// Keyboard Layout Flags. +// See https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-loadkeyboardlayoutw +const ( + KLF_ACTIVATE = 0x00000001 + KLF_SUBSTITUTE_OK = 0x00000002 + KLF_REORDER = 0x00000008 + KLF_REPLACELANG = 0x00000010 + KLF_NOTELLSHELL = 0x00000080 + KLF_SETFORPROCESS = 0x00000100 +) diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index 5c6035dd..4c2e1bdc 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -91,6 +91,7 @@ var ( procEnumServicesStatusExW = modadvapi32.NewProc("EnumServicesStatusExW") procEqualSid = modadvapi32.NewProc("EqualSid") procFreeSid = modadvapi32.NewProc("FreeSid") + procGetAce = modadvapi32.NewProc("GetAce") procGetLengthSid = modadvapi32.NewProc("GetLengthSid") procGetNamedSecurityInfoW = modadvapi32.NewProc("GetNamedSecurityInfoW") procGetSecurityDescriptorControl = modadvapi32.NewProc("GetSecurityDescriptorControl") @@ -246,7 +247,9 @@ var ( procGetCommandLineW = modkernel32.NewProc("GetCommandLineW") procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW") procGetComputerNameW = modkernel32.NewProc("GetComputerNameW") + procGetConsoleCP = modkernel32.NewProc("GetConsoleCP") procGetConsoleMode = modkernel32.NewProc("GetConsoleMode") + procGetConsoleOutputCP = modkernel32.NewProc("GetConsoleOutputCP") procGetConsoleScreenBufferInfo = modkernel32.NewProc("GetConsoleScreenBufferInfo") procGetCurrentDirectoryW = modkernel32.NewProc("GetCurrentDirectoryW") procGetCurrentProcessId = modkernel32.NewProc("GetCurrentProcessId") @@ -346,8 +349,10 @@ var ( procSetCommMask = modkernel32.NewProc("SetCommMask") procSetCommState = modkernel32.NewProc("SetCommState") procSetCommTimeouts = modkernel32.NewProc("SetCommTimeouts") + procSetConsoleCP = modkernel32.NewProc("SetConsoleCP") procSetConsoleCursorPosition = modkernel32.NewProc("SetConsoleCursorPosition") procSetConsoleMode = modkernel32.NewProc("SetConsoleMode") + procSetConsoleOutputCP = modkernel32.NewProc("SetConsoleOutputCP") procSetCurrentDirectoryW = modkernel32.NewProc("SetCurrentDirectoryW") procSetDefaultDllDirectories = modkernel32.NewProc("SetDefaultDllDirectories") procSetDllDirectoryW = modkernel32.NewProc("SetDllDirectoryW") @@ -401,6 +406,7 @@ var ( procTransmitFile = modmswsock.NewProc("TransmitFile") procNetApiBufferFree = modnetapi32.NewProc("NetApiBufferFree") procNetGetJoinInformation = modnetapi32.NewProc("NetGetJoinInformation") + procNetUserEnum = modnetapi32.NewProc("NetUserEnum") procNetUserGetInfo = modnetapi32.NewProc("NetUserGetInfo") procNtCreateFile = modntdll.NewProc("NtCreateFile") procNtCreateNamedPipeFile = modntdll.NewProc("NtCreateNamedPipeFile") @@ -476,12 +482,16 @@ var ( procGetDesktopWindow = moduser32.NewProc("GetDesktopWindow") procGetForegroundWindow = moduser32.NewProc("GetForegroundWindow") procGetGUIThreadInfo = moduser32.NewProc("GetGUIThreadInfo") + procGetKeyboardLayout = moduser32.NewProc("GetKeyboardLayout") procGetShellWindow = moduser32.NewProc("GetShellWindow") procGetWindowThreadProcessId = moduser32.NewProc("GetWindowThreadProcessId") procIsWindow = moduser32.NewProc("IsWindow") procIsWindowUnicode = moduser32.NewProc("IsWindowUnicode") procIsWindowVisible = moduser32.NewProc("IsWindowVisible") + procLoadKeyboardLayoutW = moduser32.NewProc("LoadKeyboardLayoutW") procMessageBoxW = moduser32.NewProc("MessageBoxW") + procToUnicodeEx = moduser32.NewProc("ToUnicodeEx") + procUnloadKeyboardLayout = moduser32.NewProc("UnloadKeyboardLayout") procCreateEnvironmentBlock = moduserenv.NewProc("CreateEnvironmentBlock") procDestroyEnvironmentBlock = moduserenv.NewProc("DestroyEnvironmentBlock") procGetUserProfileDirectoryW = moduserenv.NewProc("GetUserProfileDirectoryW") @@ -787,6 +797,14 @@ func FreeSid(sid *SID) (err error) { return } +func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (err error) { + r1, _, e1 := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce))) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func GetLengthSid(sid *SID) (len uint32) { r0, _, _ := syscall.Syscall(procGetLengthSid.Addr(), 1, uintptr(unsafe.Pointer(sid)), 0, 0) len = uint32(r0) @@ -2148,6 +2166,15 @@ func GetComputerName(buf *uint16, n *uint32) (err error) { return } +func GetConsoleCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleMode(console Handle, mode *uint32) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleMode.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(mode)), 0) if r1 == 0 { @@ -2156,6 +2183,15 @@ func GetConsoleMode(console Handle, mode *uint32) (err error) { return } +func GetConsoleOutputCP() (cp uint32, err error) { + r0, _, e1 := syscall.Syscall(procGetConsoleOutputCP.Addr(), 0, 0, 0, 0) + cp = uint32(r0) + if cp == 0 { + err = errnoErr(e1) + } + return +} + func GetConsoleScreenBufferInfo(console Handle, info *ConsoleScreenBufferInfo) (err error) { r1, _, e1 := syscall.Syscall(procGetConsoleScreenBufferInfo.Addr(), 2, uintptr(console), uintptr(unsafe.Pointer(info)), 0) if r1 == 0 { @@ -3024,6 +3060,14 @@ func SetCommTimeouts(handle Handle, timeouts *CommTimeouts) (err error) { return } +func SetConsoleCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func setConsoleCursorPosition(console Handle, position uint32) (err error) { r1, _, e1 := syscall.Syscall(procSetConsoleCursorPosition.Addr(), 2, uintptr(console), uintptr(position), 0) if r1 == 0 { @@ -3040,6 +3084,14 @@ func SetConsoleMode(console Handle, mode uint32) (err error) { return } +func SetConsoleOutputCP(cp uint32) (err error) { + r1, _, e1 := syscall.Syscall(procSetConsoleOutputCP.Addr(), 1, uintptr(cp), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func SetCurrentDirectory(path *uint16) (err error) { r1, _, e1 := syscall.Syscall(procSetCurrentDirectoryW.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0) if r1 == 0 { @@ -3486,6 +3538,14 @@ func NetGetJoinInformation(server *uint16, name **uint16, bufType *uint32) (nete return } +func NetUserEnum(serverName *uint16, level uint32, filter uint32, buf **byte, prefMaxLen uint32, entriesRead *uint32, totalEntries *uint32, resumeHandle *uint32) (neterr error) { + r0, _, _ := syscall.Syscall9(procNetUserEnum.Addr(), 8, uintptr(unsafe.Pointer(serverName)), uintptr(level), uintptr(filter), uintptr(unsafe.Pointer(buf)), uintptr(prefMaxLen), uintptr(unsafe.Pointer(entriesRead)), uintptr(unsafe.Pointer(totalEntries)), uintptr(unsafe.Pointer(resumeHandle)), 0) + if r0 != 0 { + neterr = syscall.Errno(r0) + } + return +} + func NetUserGetInfo(serverName *uint16, userName *uint16, level uint32, buf **byte) (neterr error) { r0, _, _ := syscall.Syscall6(procNetUserGetInfo.Addr(), 4, uintptr(unsafe.Pointer(serverName)), uintptr(unsafe.Pointer(userName)), uintptr(level), uintptr(unsafe.Pointer(buf)), 0, 0) if r0 != 0 { @@ -4064,6 +4124,12 @@ func GetGUIThreadInfo(thread uint32, info *GUIThreadInfo) (err error) { return } +func GetKeyboardLayout(tid uint32) (hkl Handle) { + r0, _, _ := syscall.Syscall(procGetKeyboardLayout.Addr(), 1, uintptr(tid), 0, 0) + hkl = Handle(r0) + return +} + func GetShellWindow() (shellWindow HWND) { r0, _, _ := syscall.Syscall(procGetShellWindow.Addr(), 0, 0, 0, 0) shellWindow = HWND(r0) @@ -4097,6 +4163,15 @@ func IsWindowVisible(hwnd HWND) (isVisible bool) { return } +func LoadKeyboardLayout(name *uint16, flags uint32) (hkl Handle, err error) { + r0, _, e1 := syscall.Syscall(procLoadKeyboardLayoutW.Addr(), 2, uintptr(unsafe.Pointer(name)), uintptr(flags), 0) + hkl = Handle(r0) + if hkl == 0 { + err = errnoErr(e1) + } + return +} + func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret int32, err error) { r0, _, e1 := syscall.Syscall6(procMessageBoxW.Addr(), 4, uintptr(hwnd), uintptr(unsafe.Pointer(text)), uintptr(unsafe.Pointer(caption)), uintptr(boxtype), 0, 0) ret = int32(r0) @@ -4106,6 +4181,20 @@ func MessageBox(hwnd HWND, text *uint16, caption *uint16, boxtype uint32) (ret i return } +func ToUnicodeEx(vkey uint32, scancode uint32, keystate *byte, pwszBuff *uint16, cchBuff int32, flags uint32, hkl Handle) (ret int32) { + r0, _, _ := syscall.Syscall9(procToUnicodeEx.Addr(), 7, uintptr(vkey), uintptr(scancode), uintptr(unsafe.Pointer(keystate)), uintptr(unsafe.Pointer(pwszBuff)), uintptr(cchBuff), uintptr(flags), uintptr(hkl), 0, 0) + ret = int32(r0) + return +} + +func UnloadKeyboardLayout(hkl Handle) (err error) { + r1, _, e1 := syscall.Syscall(procUnloadKeyboardLayout.Addr(), 1, uintptr(hkl), 0, 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func CreateEnvironmentBlock(block **uint16, token Token, inheritExisting bool) (err error) { var _p0 uint32 if inheritExisting { diff --git a/vendor/golang.org/x/term/LICENSE b/vendor/golang.org/x/term/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/term/LICENSE +++ b/vendor/golang.org/x/term/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/term/term_windows.go b/vendor/golang.org/x/term/term_windows.go index 465f5606..df6bf948 100644 --- a/vendor/golang.org/x/term/term_windows.go +++ b/vendor/golang.org/x/term/term_windows.go @@ -26,6 +26,7 @@ func makeRaw(fd int) (*State, error) { return nil, err } raw := st &^ (windows.ENABLE_ECHO_INPUT | windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT) + raw |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT if err := windows.SetConsoleMode(windows.Handle(fd), raw); err != nil { return nil, err } diff --git a/vendor/golang.org/x/text/LICENSE b/vendor/golang.org/x/text/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/text/LICENSE +++ b/vendor/golang.org/x/text/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/tools/LICENSE b/vendor/golang.org/x/tools/LICENSE index 6a66aea5..2a7cf70d 100644 --- a/vendor/golang.org/x/tools/LICENSE +++ b/vendor/golang.org/x/tools/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/tools/cmd/stringer/stringer.go b/vendor/golang.org/x/tools/cmd/stringer/stringer.go deleted file mode 100644 index 998d1a51..00000000 --- a/vendor/golang.org/x/tools/cmd/stringer/stringer.go +++ /dev/null @@ -1,657 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Stringer is a tool to automate the creation of methods that satisfy the fmt.Stringer -// interface. Given the name of a (signed or unsigned) integer type T that has constants -// defined, stringer will create a new self-contained Go source file implementing -// -// func (t T) String() string -// -// The file is created in the same package and directory as the package that defines T. -// It has helpful defaults designed for use with go generate. -// -// Stringer works best with constants that are consecutive values such as created using iota, -// but creates good code regardless. In the future it might also provide custom support for -// constant sets that are bit patterns. -// -// For example, given this snippet, -// -// package painkiller -// -// type Pill int -// -// const ( -// Placebo Pill = iota -// Aspirin -// Ibuprofen -// Paracetamol -// Acetaminophen = Paracetamol -// ) -// -// running this command -// -// stringer -type=Pill -// -// in the same directory will create the file pill_string.go, in package painkiller, -// containing a definition of -// -// func (Pill) String() string -// -// That method will translate the value of a Pill constant to the string representation -// of the respective constant name, so that the call fmt.Print(painkiller.Aspirin) will -// print the string "Aspirin". -// -// Typically this process would be run using go generate, like this: -// -// //go:generate stringer -type=Pill -// -// If multiple constants have the same value, the lexically first matching name will -// be used (in the example, Acetaminophen will print as "Paracetamol"). -// -// With no arguments, it processes the package in the current directory. -// Otherwise, the arguments must name a single directory holding a Go package -// or a set of Go source files that represent a single Go package. -// -// The -type flag accepts a comma-separated list of types so a single run can -// generate methods for multiple types. The default output file is t_string.go, -// where t is the lower-cased name of the first type listed. It can be overridden -// with the -output flag. -// -// The -linecomment flag tells stringer to generate the text of any line comment, trimmed -// of leading spaces, instead of the constant name. For instance, if the constants above had a -// Pill prefix, one could write -// -// PillAspirin // Aspirin -// -// to suppress it in the output. -package main // import "golang.org/x/tools/cmd/stringer" - -import ( - "bytes" - "flag" - "fmt" - "go/ast" - "go/constant" - "go/format" - "go/token" - "go/types" - "log" - "os" - "path/filepath" - "sort" - "strings" - - "golang.org/x/tools/go/packages" -) - -var ( - typeNames = flag.String("type", "", "comma-separated list of type names; must be set") - output = flag.String("output", "", "output file name; default srcdir/_string.go") - trimprefix = flag.String("trimprefix", "", "trim the `prefix` from the generated constant names") - linecomment = flag.Bool("linecomment", false, "use line comment text as printed text when present") - buildTags = flag.String("tags", "", "comma-separated list of build tags to apply") -) - -// Usage is a replacement usage function for the flags package. -func Usage() { - fmt.Fprintf(os.Stderr, "Usage of stringer:\n") - fmt.Fprintf(os.Stderr, "\tstringer [flags] -type T [directory]\n") - fmt.Fprintf(os.Stderr, "\tstringer [flags] -type T files... # Must be a single package\n") - fmt.Fprintf(os.Stderr, "For more information, see:\n") - fmt.Fprintf(os.Stderr, "\thttps://pkg.go.dev/golang.org/x/tools/cmd/stringer\n") - fmt.Fprintf(os.Stderr, "Flags:\n") - flag.PrintDefaults() -} - -func main() { - log.SetFlags(0) - log.SetPrefix("stringer: ") - flag.Usage = Usage - flag.Parse() - if len(*typeNames) == 0 { - flag.Usage() - os.Exit(2) - } - types := strings.Split(*typeNames, ",") - var tags []string - if len(*buildTags) > 0 { - tags = strings.Split(*buildTags, ",") - } - - // We accept either one directory or a list of files. Which do we have? - args := flag.Args() - if len(args) == 0 { - // Default: process whole package in current directory. - args = []string{"."} - } - - // Parse the package once. - var dir string - g := Generator{ - trimPrefix: *trimprefix, - lineComment: *linecomment, - } - // TODO(suzmue): accept other patterns for packages (directories, list of files, import paths, etc). - if len(args) == 1 && isDirectory(args[0]) { - dir = args[0] - } else { - if len(tags) != 0 { - log.Fatal("-tags option applies only to directories, not when files are specified") - } - dir = filepath.Dir(args[0]) - } - - g.parsePackage(args, tags) - - // Print the header and package clause. - g.Printf("// Code generated by \"stringer %s\"; DO NOT EDIT.\n", strings.Join(os.Args[1:], " ")) - g.Printf("\n") - g.Printf("package %s", g.pkg.name) - g.Printf("\n") - g.Printf("import \"strconv\"\n") // Used by all methods. - - // Run generate for each type. - for _, typeName := range types { - g.generate(typeName) - } - - // Format the output. - src := g.format() - - // Write to file. - outputName := *output - if outputName == "" { - baseName := fmt.Sprintf("%s_string.go", types[0]) - outputName = filepath.Join(dir, strings.ToLower(baseName)) - } - err := os.WriteFile(outputName, src, 0644) - if err != nil { - log.Fatalf("writing output: %s", err) - } -} - -// isDirectory reports whether the named file is a directory. -func isDirectory(name string) bool { - info, err := os.Stat(name) - if err != nil { - log.Fatal(err) - } - return info.IsDir() -} - -// Generator holds the state of the analysis. Primarily used to buffer -// the output for format.Source. -type Generator struct { - buf bytes.Buffer // Accumulated output. - pkg *Package // Package we are scanning. - - trimPrefix string - lineComment bool -} - -func (g *Generator) Printf(format string, args ...interface{}) { - fmt.Fprintf(&g.buf, format, args...) -} - -// File holds a single parsed file and associated data. -type File struct { - pkg *Package // Package to which this file belongs. - file *ast.File // Parsed AST. - // These fields are reset for each type being generated. - typeName string // Name of the constant type. - values []Value // Accumulator for constant values of that type. - - trimPrefix string - lineComment bool -} - -type Package struct { - name string - defs map[*ast.Ident]types.Object - files []*File -} - -// parsePackage analyzes the single package constructed from the patterns and tags. -// parsePackage exits if there is an error. -func (g *Generator) parsePackage(patterns []string, tags []string) { - cfg := &packages.Config{ - Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedSyntax, - // TODO: Need to think about constants in test files. Maybe write type_string_test.go - // in a separate pass? For later. - Tests: false, - BuildFlags: []string{fmt.Sprintf("-tags=%s", strings.Join(tags, " "))}, - } - pkgs, err := packages.Load(cfg, patterns...) - if err != nil { - log.Fatal(err) - } - if len(pkgs) != 1 { - log.Fatalf("error: %d packages found", len(pkgs)) - } - g.addPackage(pkgs[0]) -} - -// addPackage adds a type checked Package and its syntax files to the generator. -func (g *Generator) addPackage(pkg *packages.Package) { - g.pkg = &Package{ - name: pkg.Name, - defs: pkg.TypesInfo.Defs, - files: make([]*File, len(pkg.Syntax)), - } - - for i, file := range pkg.Syntax { - g.pkg.files[i] = &File{ - file: file, - pkg: g.pkg, - trimPrefix: g.trimPrefix, - lineComment: g.lineComment, - } - } -} - -// generate produces the String method for the named type. -func (g *Generator) generate(typeName string) { - values := make([]Value, 0, 100) - for _, file := range g.pkg.files { - // Set the state for this run of the walker. - file.typeName = typeName - file.values = nil - if file.file != nil { - ast.Inspect(file.file, file.genDecl) - values = append(values, file.values...) - } - } - - if len(values) == 0 { - log.Fatalf("no values defined for type %s", typeName) - } - // Generate code that will fail if the constants change value. - g.Printf("func _() {\n") - g.Printf("\t// An \"invalid array index\" compiler error signifies that the constant values have changed.\n") - g.Printf("\t// Re-run the stringer command to generate them again.\n") - g.Printf("\tvar x [1]struct{}\n") - for _, v := range values { - g.Printf("\t_ = x[%s - %s]\n", v.originalName, v.str) - } - g.Printf("}\n") - runs := splitIntoRuns(values) - // The decision of which pattern to use depends on the number of - // runs in the numbers. If there's only one, it's easy. For more than - // one, there's a tradeoff between complexity and size of the data - // and code vs. the simplicity of a map. A map takes more space, - // but so does the code. The decision here (crossover at 10) is - // arbitrary, but considers that for large numbers of runs the cost - // of the linear scan in the switch might become important, and - // rather than use yet another algorithm such as binary search, - // we punt and use a map. In any case, the likelihood of a map - // being necessary for any realistic example other than bitmasks - // is very low. And bitmasks probably deserve their own analysis, - // to be done some other day. - switch { - case len(runs) == 1: - g.buildOneRun(runs, typeName) - case len(runs) <= 10: - g.buildMultipleRuns(runs, typeName) - default: - g.buildMap(runs, typeName) - } -} - -// splitIntoRuns breaks the values into runs of contiguous sequences. -// For example, given 1,2,3,5,6,7 it returns {1,2,3},{5,6,7}. -// The input slice is known to be non-empty. -func splitIntoRuns(values []Value) [][]Value { - // We use stable sort so the lexically first name is chosen for equal elements. - sort.Stable(byValue(values)) - // Remove duplicates. Stable sort has put the one we want to print first, - // so use that one. The String method won't care about which named constant - // was the argument, so the first name for the given value is the only one to keep. - // We need to do this because identical values would cause the switch or map - // to fail to compile. - j := 1 - for i := 1; i < len(values); i++ { - if values[i].value != values[i-1].value { - values[j] = values[i] - j++ - } - } - values = values[:j] - runs := make([][]Value, 0, 10) - for len(values) > 0 { - // One contiguous sequence per outer loop. - i := 1 - for i < len(values) && values[i].value == values[i-1].value+1 { - i++ - } - runs = append(runs, values[:i]) - values = values[i:] - } - return runs -} - -// format returns the gofmt-ed contents of the Generator's buffer. -func (g *Generator) format() []byte { - src, err := format.Source(g.buf.Bytes()) - if err != nil { - // Should never happen, but can arise when developing this code. - // The user can compile the output to see the error. - log.Printf("warning: internal error: invalid Go generated: %s", err) - log.Printf("warning: compile the package to analyze the error") - return g.buf.Bytes() - } - return src -} - -// Value represents a declared constant. -type Value struct { - originalName string // The name of the constant. - name string // The name with trimmed prefix. - // The value is stored as a bit pattern alone. The boolean tells us - // whether to interpret it as an int64 or a uint64; the only place - // this matters is when sorting. - // Much of the time the str field is all we need; it is printed - // by Value.String. - value uint64 // Will be converted to int64 when needed. - signed bool // Whether the constant is a signed type. - str string // The string representation given by the "go/constant" package. -} - -func (v *Value) String() string { - return v.str -} - -// byValue lets us sort the constants into increasing order. -// We take care in the Less method to sort in signed or unsigned order, -// as appropriate. -type byValue []Value - -func (b byValue) Len() int { return len(b) } -func (b byValue) Swap(i, j int) { b[i], b[j] = b[j], b[i] } -func (b byValue) Less(i, j int) bool { - if b[i].signed { - return int64(b[i].value) < int64(b[j].value) - } - return b[i].value < b[j].value -} - -// genDecl processes one declaration clause. -func (f *File) genDecl(node ast.Node) bool { - decl, ok := node.(*ast.GenDecl) - if !ok || decl.Tok != token.CONST { - // We only care about const declarations. - return true - } - // The name of the type of the constants we are declaring. - // Can change if this is a multi-element declaration. - typ := "" - // Loop over the elements of the declaration. Each element is a ValueSpec: - // a list of names possibly followed by a type, possibly followed by values. - // If the type and value are both missing, we carry down the type (and value, - // but the "go/types" package takes care of that). - for _, spec := range decl.Specs { - vspec := spec.(*ast.ValueSpec) // Guaranteed to succeed as this is CONST. - if vspec.Type == nil && len(vspec.Values) > 0 { - // "X = 1". With no type but a value. If the constant is untyped, - // skip this vspec and reset the remembered type. - typ = "" - - // If this is a simple type conversion, remember the type. - // We don't mind if this is actually a call; a qualified call won't - // be matched (that will be SelectorExpr, not Ident), and only unusual - // situations will result in a function call that appears to be - // a type conversion. - ce, ok := vspec.Values[0].(*ast.CallExpr) - if !ok { - continue - } - id, ok := ce.Fun.(*ast.Ident) - if !ok { - continue - } - typ = id.Name - } - if vspec.Type != nil { - // "X T". We have a type. Remember it. - ident, ok := vspec.Type.(*ast.Ident) - if !ok { - continue - } - typ = ident.Name - } - if typ != f.typeName { - // This is not the type we're looking for. - continue - } - // We now have a list of names (from one line of source code) all being - // declared with the desired type. - // Grab their names and actual values and store them in f.values. - for _, name := range vspec.Names { - if name.Name == "_" { - continue - } - // This dance lets the type checker find the values for us. It's a - // bit tricky: look up the object declared by the name, find its - // types.Const, and extract its value. - obj, ok := f.pkg.defs[name] - if !ok { - log.Fatalf("no value for constant %s", name) - } - info := obj.Type().Underlying().(*types.Basic).Info() - if info&types.IsInteger == 0 { - log.Fatalf("can't handle non-integer constant type %s", typ) - } - value := obj.(*types.Const).Val() // Guaranteed to succeed as this is CONST. - if value.Kind() != constant.Int { - log.Fatalf("can't happen: constant is not an integer %s", name) - } - i64, isInt := constant.Int64Val(value) - u64, isUint := constant.Uint64Val(value) - if !isInt && !isUint { - log.Fatalf("internal error: value of %s is not an integer: %s", name, value.String()) - } - if !isInt { - u64 = uint64(i64) - } - v := Value{ - originalName: name.Name, - value: u64, - signed: info&types.IsUnsigned == 0, - str: value.String(), - } - if c := vspec.Comment; f.lineComment && c != nil && len(c.List) == 1 { - v.name = strings.TrimSpace(c.Text()) - } else { - v.name = strings.TrimPrefix(v.originalName, f.trimPrefix) - } - f.values = append(f.values, v) - } - } - return false -} - -// Helpers - -// usize returns the number of bits of the smallest unsigned integer -// type that will hold n. Used to create the smallest possible slice of -// integers to use as indexes into the concatenated strings. -func usize(n int) int { - switch { - case n < 1<<8: - return 8 - case n < 1<<16: - return 16 - default: - // 2^32 is enough constants for anyone. - return 32 - } -} - -// declareIndexAndNameVars declares the index slices and concatenated names -// strings representing the runs of values. -func (g *Generator) declareIndexAndNameVars(runs [][]Value, typeName string) { - var indexes, names []string - for i, run := range runs { - index, name := g.createIndexAndNameDecl(run, typeName, fmt.Sprintf("_%d", i)) - if len(run) != 1 { - indexes = append(indexes, index) - } - names = append(names, name) - } - g.Printf("const (\n") - for _, name := range names { - g.Printf("\t%s\n", name) - } - g.Printf(")\n\n") - - if len(indexes) > 0 { - g.Printf("var (") - for _, index := range indexes { - g.Printf("\t%s\n", index) - } - g.Printf(")\n\n") - } -} - -// declareIndexAndNameVar is the single-run version of declareIndexAndNameVars -func (g *Generator) declareIndexAndNameVar(run []Value, typeName string) { - index, name := g.createIndexAndNameDecl(run, typeName, "") - g.Printf("const %s\n", name) - g.Printf("var %s\n", index) -} - -// createIndexAndNameDecl returns the pair of declarations for the run. The caller will add "const" and "var". -func (g *Generator) createIndexAndNameDecl(run []Value, typeName string, suffix string) (string, string) { - b := new(bytes.Buffer) - indexes := make([]int, len(run)) - for i := range run { - b.WriteString(run[i].name) - indexes[i] = b.Len() - } - nameConst := fmt.Sprintf("_%s_name%s = %q", typeName, suffix, b.String()) - nameLen := b.Len() - b.Reset() - fmt.Fprintf(b, "_%s_index%s = [...]uint%d{0, ", typeName, suffix, usize(nameLen)) - for i, v := range indexes { - if i > 0 { - fmt.Fprintf(b, ", ") - } - fmt.Fprintf(b, "%d", v) - } - fmt.Fprintf(b, "}") - return b.String(), nameConst -} - -// declareNameVars declares the concatenated names string representing all the values in the runs. -func (g *Generator) declareNameVars(runs [][]Value, typeName string, suffix string) { - g.Printf("const _%s_name%s = \"", typeName, suffix) - for _, run := range runs { - for i := range run { - g.Printf("%s", run[i].name) - } - } - g.Printf("\"\n") -} - -// buildOneRun generates the variables and String method for a single run of contiguous values. -func (g *Generator) buildOneRun(runs [][]Value, typeName string) { - values := runs[0] - g.Printf("\n") - g.declareIndexAndNameVar(values, typeName) - // The generated code is simple enough to write as a Printf format. - lessThanZero := "" - if values[0].signed { - lessThanZero = "i < 0 || " - } - if values[0].value == 0 { // Signed or unsigned, 0 is still 0. - g.Printf(stringOneRun, typeName, usize(len(values)), lessThanZero) - } else { - g.Printf(stringOneRunWithOffset, typeName, values[0].String(), usize(len(values)), lessThanZero) - } -} - -// Arguments to format are: -// -// [1]: type name -// [2]: size of index element (8 for uint8 etc.) -// [3]: less than zero check (for signed types) -const stringOneRun = `func (i %[1]s) String() string { - if %[3]si >= %[1]s(len(_%[1]s_index)-1) { - return "%[1]s(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _%[1]s_name[_%[1]s_index[i]:_%[1]s_index[i+1]] -} -` - -// Arguments to format are: -// [1]: type name -// [2]: lowest defined value for type, as a string -// [3]: size of index element (8 for uint8 etc.) -// [4]: less than zero check (for signed types) -/* - */ -const stringOneRunWithOffset = `func (i %[1]s) String() string { - i -= %[2]s - if %[4]si >= %[1]s(len(_%[1]s_index)-1) { - return "%[1]s(" + strconv.FormatInt(int64(i + %[2]s), 10) + ")" - } - return _%[1]s_name[_%[1]s_index[i] : _%[1]s_index[i+1]] -} -` - -// buildMultipleRuns generates the variables and String method for multiple runs of contiguous values. -// For this pattern, a single Printf format won't do. -func (g *Generator) buildMultipleRuns(runs [][]Value, typeName string) { - g.Printf("\n") - g.declareIndexAndNameVars(runs, typeName) - g.Printf("func (i %s) String() string {\n", typeName) - g.Printf("\tswitch {\n") - for i, values := range runs { - if len(values) == 1 { - g.Printf("\tcase i == %s:\n", &values[0]) - g.Printf("\t\treturn _%s_name_%d\n", typeName, i) - continue - } - if values[0].value == 0 && !values[0].signed { - // For an unsigned lower bound of 0, "0 <= i" would be redundant. - g.Printf("\tcase i <= %s:\n", &values[len(values)-1]) - } else { - g.Printf("\tcase %s <= i && i <= %s:\n", &values[0], &values[len(values)-1]) - } - if values[0].value != 0 { - g.Printf("\t\ti -= %s\n", &values[0]) - } - g.Printf("\t\treturn _%s_name_%d[_%s_index_%d[i]:_%s_index_%d[i+1]]\n", - typeName, i, typeName, i, typeName, i) - } - g.Printf("\tdefault:\n") - g.Printf("\t\treturn \"%s(\" + strconv.FormatInt(int64(i), 10) + \")\"\n", typeName) - g.Printf("\t}\n") - g.Printf("}\n") -} - -// buildMap handles the case where the space is so sparse a map is a reasonable fallback. -// It's a rare situation but has simple code. -func (g *Generator) buildMap(runs [][]Value, typeName string) { - g.Printf("\n") - g.declareNameVars(runs, typeName, "") - g.Printf("\nvar _%s_map = map[%s]string{\n", typeName, typeName) - n := 0 - for _, values := range runs { - for _, value := range values { - g.Printf("\t%s: _%s_name[%d:%d],\n", &value, typeName, n, n+len(value.name)) - n += len(value.name) - } - } - g.Printf("}\n\n") - g.Printf(stringMap, typeName) -} - -// Argument to format is the type name. -const stringMap = `func (i %[1]s) String() string { - if str, ok := _%[1]s_map[i]; ok { - return str - } - return "%[1]s(" + strconv.FormatInt(int64(i), 10) + ")" -} -` diff --git a/vendor/golang.org/x/tools/cover/profile.go b/vendor/golang.org/x/tools/cover/profile.go new file mode 100644 index 00000000..47a9a541 --- /dev/null +++ b/vendor/golang.org/x/tools/cover/profile.go @@ -0,0 +1,266 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package cover provides support for parsing coverage profiles +// generated by "go test -coverprofile=cover.out". +package cover // import "golang.org/x/tools/cover" + +import ( + "bufio" + "errors" + "fmt" + "io" + "math" + "os" + "sort" + "strconv" + "strings" +) + +// Profile represents the profiling data for a specific file. +type Profile struct { + FileName string + Mode string + Blocks []ProfileBlock +} + +// ProfileBlock represents a single block of profiling data. +type ProfileBlock struct { + StartLine, StartCol int + EndLine, EndCol int + NumStmt, Count int +} + +type byFileName []*Profile + +func (p byFileName) Len() int { return len(p) } +func (p byFileName) Less(i, j int) bool { return p[i].FileName < p[j].FileName } +func (p byFileName) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// ParseProfiles parses profile data in the specified file and returns a +// Profile for each source file described therein. +func ParseProfiles(fileName string) ([]*Profile, error) { + pf, err := os.Open(fileName) + if err != nil { + return nil, err + } + defer pf.Close() + return ParseProfilesFromReader(pf) +} + +// ParseProfilesFromReader parses profile data from the Reader and +// returns a Profile for each source file described therein. +func ParseProfilesFromReader(rd io.Reader) ([]*Profile, error) { + // First line is "mode: foo", where foo is "set", "count", or "atomic". + // Rest of file is in the format + // encoding/base64/base64.go:34.44,37.40 3 1 + // where the fields are: name.go:line.column,line.column numberOfStatements count + files := make(map[string]*Profile) + s := bufio.NewScanner(rd) + mode := "" + for s.Scan() { + line := s.Text() + if mode == "" { + const p = "mode: " + if !strings.HasPrefix(line, p) || line == p { + return nil, fmt.Errorf("bad mode line: %v", line) + } + mode = line[len(p):] + continue + } + fn, b, err := parseLine(line) + if err != nil { + return nil, fmt.Errorf("line %q doesn't match expected format: %v", line, err) + } + p := files[fn] + if p == nil { + p = &Profile{ + FileName: fn, + Mode: mode, + } + files[fn] = p + } + p.Blocks = append(p.Blocks, b) + } + if err := s.Err(); err != nil { + return nil, err + } + for _, p := range files { + sort.Sort(blocksByStart(p.Blocks)) + // Merge samples from the same location. + j := 1 + for i := 1; i < len(p.Blocks); i++ { + b := p.Blocks[i] + last := p.Blocks[j-1] + if b.StartLine == last.StartLine && + b.StartCol == last.StartCol && + b.EndLine == last.EndLine && + b.EndCol == last.EndCol { + if b.NumStmt != last.NumStmt { + return nil, fmt.Errorf("inconsistent NumStmt: changed from %d to %d", last.NumStmt, b.NumStmt) + } + if mode == "set" { + p.Blocks[j-1].Count |= b.Count + } else { + p.Blocks[j-1].Count += b.Count + } + continue + } + p.Blocks[j] = b + j++ + } + p.Blocks = p.Blocks[:j] + } + // Generate a sorted slice. + profiles := make([]*Profile, 0, len(files)) + for _, profile := range files { + profiles = append(profiles, profile) + } + sort.Sort(byFileName(profiles)) + return profiles, nil +} + +// parseLine parses a line from a coverage file. +// It is equivalent to the regex +// ^(.+):([0-9]+)\.([0-9]+),([0-9]+)\.([0-9]+) ([0-9]+) ([0-9]+)$ +// +// However, it is much faster: https://golang.org/cl/179377 +func parseLine(l string) (fileName string, block ProfileBlock, err error) { + end := len(l) + + b := ProfileBlock{} + b.Count, end, err = seekBack(l, ' ', end, "Count") + if err != nil { + return "", b, err + } + b.NumStmt, end, err = seekBack(l, ' ', end, "NumStmt") + if err != nil { + return "", b, err + } + b.EndCol, end, err = seekBack(l, '.', end, "EndCol") + if err != nil { + return "", b, err + } + b.EndLine, end, err = seekBack(l, ',', end, "EndLine") + if err != nil { + return "", b, err + } + b.StartCol, end, err = seekBack(l, '.', end, "StartCol") + if err != nil { + return "", b, err + } + b.StartLine, end, err = seekBack(l, ':', end, "StartLine") + if err != nil { + return "", b, err + } + fn := l[0:end] + if fn == "" { + return "", b, errors.New("a FileName cannot be blank") + } + return fn, b, nil +} + +// seekBack searches backwards from end to find sep in l, then returns the +// value between sep and end as an integer. +// If seekBack fails, the returned error will reference what. +func seekBack(l string, sep byte, end int, what string) (value int, nextSep int, err error) { + // Since we're seeking backwards and we know only ASCII is legal for these values, + // we can ignore the possibility of non-ASCII characters. + for start := end - 1; start >= 0; start-- { + if l[start] == sep { + i, err := strconv.Atoi(l[start+1 : end]) + if err != nil { + return 0, 0, fmt.Errorf("couldn't parse %q: %v", what, err) + } + if i < 0 { + return 0, 0, fmt.Errorf("negative values are not allowed for %s, found %d", what, i) + } + return i, start, nil + } + } + return 0, 0, fmt.Errorf("couldn't find a %s before %s", string(sep), what) +} + +type blocksByStart []ProfileBlock + +func (b blocksByStart) Len() int { return len(b) } +func (b blocksByStart) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b blocksByStart) Less(i, j int) bool { + bi, bj := b[i], b[j] + return bi.StartLine < bj.StartLine || bi.StartLine == bj.StartLine && bi.StartCol < bj.StartCol +} + +// Boundary represents the position in a source file of the beginning or end of a +// block as reported by the coverage profile. In HTML mode, it will correspond to +// the opening or closing of a tag and will be used to colorize the source +type Boundary struct { + Offset int // Location as a byte offset in the source file. + Start bool // Is this the start of a block? + Count int // Event count from the cover profile. + Norm float64 // Count normalized to [0..1]. + Index int // Order in input file. +} + +// Boundaries returns a Profile as a set of Boundary objects within the provided src. +func (p *Profile) Boundaries(src []byte) (boundaries []Boundary) { + // Find maximum count. + max := 0 + for _, b := range p.Blocks { + if b.Count > max { + max = b.Count + } + } + // Divisor for normalization. + divisor := math.Log(float64(max)) + + // boundary returns a Boundary, populating the Norm field with a normalized Count. + index := 0 + boundary := func(offset int, start bool, count int) Boundary { + b := Boundary{Offset: offset, Start: start, Count: count, Index: index} + index++ + if !start || count == 0 { + return b + } + if max <= 1 { + b.Norm = 0.8 // Profile is in"set" mode; we want a heat map. Use cov8 in the CSS. + } else if count > 0 { + b.Norm = math.Log(float64(count)) / divisor + } + return b + } + + line, col := 1, 2 // TODO: Why is this 2? + for si, bi := 0, 0; si < len(src) && bi < len(p.Blocks); { + b := p.Blocks[bi] + if b.StartLine == line && b.StartCol == col { + boundaries = append(boundaries, boundary(si, true, b.Count)) + } + if b.EndLine == line && b.EndCol == col || line > b.EndLine { + boundaries = append(boundaries, boundary(si, false, 0)) + bi++ + continue // Don't advance through src; maybe the next block starts here. + } + if src[si] == '\n' { + line++ + col = 0 + } + col++ + si++ + } + sort.Sort(boundariesByPos(boundaries)) + return +} + +type boundariesByPos []Boundary + +func (b boundariesByPos) Len() int { return len(b) } +func (b boundariesByPos) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b boundariesByPos) Less(i, j int) bool { + if b[i].Offset == b[j].Offset { + // Boundaries at the same offset should be ordered according to + // their original position. + return b[i].Index < b[j].Index + } + return b[i].Offset < b[j].Offset +} diff --git a/vendor/golang.org/x/tools/go/ast/astutil/enclosing.go b/vendor/golang.org/x/tools/go/ast/astutil/enclosing.go index 9fa5aa19..6e34df46 100644 --- a/vendor/golang.org/x/tools/go/ast/astutil/enclosing.go +++ b/vendor/golang.org/x/tools/go/ast/astutil/enclosing.go @@ -11,8 +11,6 @@ import ( "go/ast" "go/token" "sort" - - "golang.org/x/tools/internal/typeparams" ) // PathEnclosingInterval returns the node that encloses the source @@ -108,8 +106,21 @@ func PathEnclosingInterval(root *ast.File, start, end token.Pos) (path []ast.Nod // Does augmented child strictly contain [start, end)? if augPos <= start && end <= augEnd { - _, isToken := child.(tokenNode) - return isToken || visit(child) + if is[tokenNode](child) { + return true + } + + // childrenOf elides the FuncType node beneath FuncDecl. + // Add it back here for TypeParams, Params, Results, + // all FieldLists). But we don't add it back for the "func" token + // even though it is is the tree at FuncDecl.Type.Func. + if decl, ok := node.(*ast.FuncDecl); ok { + if fields, ok := child.(*ast.FieldList); ok && fields != decl.Recv { + path = append(path, decl.Type) + } + } + + return visit(child) } // Does [start, end) overlap multiple children? @@ -315,6 +326,8 @@ func childrenOf(n ast.Node) []ast.Node { // // As a workaround, we inline the case for FuncType // here and order things correctly. + // We also need to insert the elided FuncType just + // before the 'visit' recursion. // children = nil // discard ast.Walk(FuncDecl) info subtrees children = append(children, tok(n.Type.Func, len("func"))) @@ -322,7 +335,7 @@ func childrenOf(n ast.Node) []ast.Node { children = append(children, n.Recv) } children = append(children, n.Name) - if tparams := typeparams.ForFuncType(n.Type); tparams != nil { + if tparams := n.Type.TypeParams; tparams != nil { children = append(children, tparams) } if n.Type.Params != nil { @@ -377,7 +390,7 @@ func childrenOf(n ast.Node) []ast.Node { tok(n.Lbrack, len("[")), tok(n.Rbrack, len("]"))) - case *typeparams.IndexListExpr: + case *ast.IndexListExpr: children = append(children, tok(n.Lbrack, len("[")), tok(n.Rbrack, len("]"))) @@ -588,7 +601,7 @@ func NodeDescription(n ast.Node) string { return "decrement statement" case *ast.IndexExpr: return "index expression" - case *typeparams.IndexListExpr: + case *ast.IndexListExpr: return "index list expression" case *ast.InterfaceType: return "interface type" @@ -634,3 +647,8 @@ func NodeDescription(n ast.Node) string { } panic(fmt.Sprintf("unexpected node type: %T", n)) } + +func is[T any](x any) bool { + _, ok := x.(T) + return ok +} diff --git a/vendor/golang.org/x/tools/go/ast/astutil/rewrite.go b/vendor/golang.org/x/tools/go/ast/astutil/rewrite.go index f430b21b..58934f76 100644 --- a/vendor/golang.org/x/tools/go/ast/astutil/rewrite.go +++ b/vendor/golang.org/x/tools/go/ast/astutil/rewrite.go @@ -9,8 +9,6 @@ import ( "go/ast" "reflect" "sort" - - "golang.org/x/tools/internal/typeparams" ) // An ApplyFunc is invoked by Apply for each node n, even if n is nil, @@ -252,7 +250,7 @@ func (a *application) apply(parent ast.Node, name string, iter *iterator, n ast. a.apply(n, "X", nil, n.X) a.apply(n, "Index", nil, n.Index) - case *typeparams.IndexListExpr: + case *ast.IndexListExpr: a.apply(n, "X", nil, n.X) a.applyList(n, "Indices") @@ -293,7 +291,7 @@ func (a *application) apply(parent ast.Node, name string, iter *iterator, n ast. a.apply(n, "Fields", nil, n.Fields) case *ast.FuncType: - if tparams := typeparams.ForFuncType(n); tparams != nil { + if tparams := n.TypeParams; tparams != nil { a.apply(n, "TypeParams", nil, tparams) } a.apply(n, "Params", nil, n.Params) @@ -408,7 +406,7 @@ func (a *application) apply(parent ast.Node, name string, iter *iterator, n ast. case *ast.TypeSpec: a.apply(n, "Doc", nil, n.Doc) a.apply(n, "Name", nil, n.Name) - if tparams := typeparams.ForTypeSpec(n); tparams != nil { + if tparams := n.TypeParams; tparams != nil { a.apply(n, "TypeParams", nil, tparams) } a.apply(n, "Type", nil, n.Type) diff --git a/vendor/golang.org/x/tools/go/ast/astutil/util.go b/vendor/golang.org/x/tools/go/ast/astutil/util.go index 919d5305..ca71e3e1 100644 --- a/vendor/golang.org/x/tools/go/ast/astutil/util.go +++ b/vendor/golang.org/x/tools/go/ast/astutil/util.go @@ -7,12 +7,5 @@ package astutil import "go/ast" // Unparen returns e with any enclosing parentheses stripped. -func Unparen(e ast.Expr) ast.Expr { - for { - p, ok := e.(*ast.ParenExpr) - if !ok { - return e - } - e = p.X - } -} +// Deprecated: use [ast.Unparen]. +func Unparen(e ast.Expr) ast.Expr { return ast.Unparen(e) } diff --git a/vendor/golang.org/x/tools/go/ast/inspector/inspector.go b/vendor/golang.org/x/tools/go/ast/inspector/inspector.go index 1fc1de0b..0e0ba4c0 100644 --- a/vendor/golang.org/x/tools/go/ast/inspector/inspector.go +++ b/vendor/golang.org/x/tools/go/ast/inspector/inspector.go @@ -73,6 +73,15 @@ func (in *Inspector) Preorder(types []ast.Node, f func(ast.Node)) { // check, Preorder is almost twice as fast as Nodes. The two // features seem to contribute similar slowdowns (~1.4x each). + // This function is equivalent to the PreorderSeq call below, + // but to avoid the additional dynamic call (which adds 13-35% + // to the benchmarks), we expand it out. + // + // in.PreorderSeq(types...)(func(n ast.Node) bool { + // f(n) + // return true + // }) + mask := maskOf(types) for i := 0; i < len(in.events); { ev := in.events[i] diff --git a/vendor/golang.org/x/tools/go/ast/inspector/iter.go b/vendor/golang.org/x/tools/go/ast/inspector/iter.go new file mode 100644 index 00000000..b7e95911 --- /dev/null +++ b/vendor/golang.org/x/tools/go/ast/inspector/iter.go @@ -0,0 +1,85 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.23 + +package inspector + +import ( + "go/ast" + "iter" +) + +// PreorderSeq returns an iterator that visits all the +// nodes of the files supplied to New in depth-first order. +// It visits each node n before n's children. +// The complete traversal sequence is determined by ast.Inspect. +// +// The types argument, if non-empty, enables type-based +// filtering of events: only nodes whose type matches an +// element of the types slice are included in the sequence. +func (in *Inspector) PreorderSeq(types ...ast.Node) iter.Seq[ast.Node] { + + // This implementation is identical to Preorder, + // except that it supports breaking out of the loop. + + return func(yield func(ast.Node) bool) { + mask := maskOf(types) + for i := 0; i < len(in.events); { + ev := in.events[i] + if ev.index > i { + // push + if ev.typ&mask != 0 { + if !yield(ev.node) { + break + } + } + pop := ev.index + if in.events[pop].typ&mask == 0 { + // Subtrees do not contain types: skip them and pop. + i = pop + 1 + continue + } + } + i++ + } + } +} + +// All[N] returns an iterator over all the nodes of type N. +// N must be a pointer-to-struct type that implements ast.Node. +// +// Example: +// +// for call := range All[*ast.CallExpr](in) { ... } +func All[N interface { + *S + ast.Node +}, S any](in *Inspector) iter.Seq[N] { + + // To avoid additional dynamic call overheads, + // we duplicate rather than call the logic of PreorderSeq. + + mask := typeOf((N)(nil)) + return func(yield func(N) bool) { + for i := 0; i < len(in.events); { + ev := in.events[i] + if ev.index > i { + // push + if ev.typ&mask != 0 { + if !yield(ev.node.(N)) { + break + } + } + pop := ev.index + if in.events[pop].typ&mask == 0 { + // Subtrees do not contain types: skip them and pop. + i = pop + 1 + continue + } + } + i++ + } + } +} diff --git a/vendor/golang.org/x/tools/go/ast/inspector/typeof.go b/vendor/golang.org/x/tools/go/ast/inspector/typeof.go index 703c8139..2a872f89 100644 --- a/vendor/golang.org/x/tools/go/ast/inspector/typeof.go +++ b/vendor/golang.org/x/tools/go/ast/inspector/typeof.go @@ -12,8 +12,6 @@ package inspector import ( "go/ast" "math" - - "golang.org/x/tools/internal/typeparams" ) const ( @@ -171,7 +169,7 @@ func typeOf(n ast.Node) uint64 { return 1 << nIncDecStmt case *ast.IndexExpr: return 1 << nIndexExpr - case *typeparams.IndexListExpr: + case *ast.IndexListExpr: return 1 << nIndexListExpr case *ast.InterfaceType: return 1 << nInterfaceType diff --git a/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go b/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go index 03543bd4..137cc8df 100644 --- a/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go +++ b/vendor/golang.org/x/tools/go/gcexportdata/gcexportdata.go @@ -47,7 +47,7 @@ import ( func Find(importPath, srcDir string) (filename, path string) { cmd := exec.Command("go", "list", "-json", "-export", "--", importPath) cmd.Dir = srcDir - out, err := cmd.CombinedOutput() + out, err := cmd.Output() if err != nil { return "", "" } diff --git a/vendor/golang.org/x/tools/go/internal/packagesdriver/sizes.go b/vendor/golang.org/x/tools/go/internal/packagesdriver/sizes.go deleted file mode 100644 index 0454cdd7..00000000 --- a/vendor/golang.org/x/tools/go/internal/packagesdriver/sizes.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package packagesdriver fetches type sizes for go/packages and go/analysis. -package packagesdriver - -import ( - "context" - "fmt" - "strings" - - "golang.org/x/tools/internal/gocommand" -) - -var debug = false - -func GetSizesForArgsGolist(ctx context.Context, inv gocommand.Invocation, gocmdRunner *gocommand.Runner) (string, string, error) { - inv.Verb = "list" - inv.Args = []string{"-f", "{{context.GOARCH}} {{context.Compiler}}", "--", "unsafe"} - stdout, stderr, friendlyErr, rawErr := gocmdRunner.RunRaw(ctx, inv) - var goarch, compiler string - if rawErr != nil { - if rawErrMsg := rawErr.Error(); strings.Contains(rawErrMsg, "cannot find main module") || strings.Contains(rawErrMsg, "go.mod file not found") { - // User's running outside of a module. All bets are off. Get GOARCH and guess compiler is gc. - // TODO(matloob): Is this a problem in practice? - inv.Verb = "env" - inv.Args = []string{"GOARCH"} - envout, enverr := gocmdRunner.Run(ctx, inv) - if enverr != nil { - return "", "", enverr - } - goarch = strings.TrimSpace(envout.String()) - compiler = "gc" - } else { - return "", "", friendlyErr - } - } else { - fields := strings.Fields(stdout.String()) - if len(fields) < 2 { - return "", "", fmt.Errorf("could not parse GOARCH and Go compiler in format \" \":\nstdout: <<%s>>\nstderr: <<%s>>", - stdout.String(), stderr.String()) - } - goarch = fields[0] - compiler = fields[1] - } - return compiler, goarch, nil -} diff --git a/vendor/golang.org/x/tools/go/packages/doc.go b/vendor/golang.org/x/tools/go/packages/doc.go index da4ab89f..f1931d10 100644 --- a/vendor/golang.org/x/tools/go/packages/doc.go +++ b/vendor/golang.org/x/tools/go/packages/doc.go @@ -5,12 +5,20 @@ /* Package packages loads Go packages for inspection and analysis. -The Load function takes as input a list of patterns and return a list of Package -structs describing individual packages matched by those patterns. -The LoadMode controls the amount of detail in the loaded packages. - -Load passes most patterns directly to the underlying build tool, -but all patterns with the prefix "query=", where query is a +The [Load] function takes as input a list of patterns and returns a +list of [Package] values describing individual packages matched by those +patterns. +A [Config] specifies configuration options, the most important of which is +the [LoadMode], which controls the amount of detail in the loaded packages. + +Load passes most patterns directly to the underlying build tool. +The default build tool is the go command. +Its supported patterns are described at +https://pkg.go.dev/cmd/go#hdr-Package_lists_and_patterns. +Other build systems may be supported by providing a "driver"; +see [The driver protocol]. + +All patterns with the prefix "query=", where query is a non-empty string of letters from [a-z], are reserved and may be interpreted as query operators. @@ -35,7 +43,7 @@ The Package struct provides basic information about the package, including - Imports, a map from source import strings to the Packages they name; - Types, the type information for the package's exported symbols; - Syntax, the parsed syntax trees for the package's source code; and - - TypeInfo, the result of a complete type-check of the package syntax trees. + - TypesInfo, the result of a complete type-check of the package syntax trees. (See the documentation for type Package for the complete list of fields and more detailed descriptions.) @@ -56,7 +64,7 @@ graph using the Imports fields. The Load function can be configured by passing a pointer to a Config as the first argument. A nil Config is equivalent to the zero Config, which -causes Load to run in LoadFiles mode, collecting minimal information. +causes Load to run in [LoadFiles] mode, collecting minimal information. See the documentation for type Config for details. As noted earlier, the Config.Mode controls the amount of detail @@ -64,9 +72,40 @@ reported about the loaded packages. See the documentation for type LoadMode for details. Most tools should pass their command-line arguments (after any flags) -uninterpreted to the loader, so that the loader can interpret them +uninterpreted to Load, so that it can interpret them according to the conventions of the underlying build system. + See the Example function for typical usage. + +# The driver protocol + +Load may be used to load Go packages even in Go projects that use +alternative build systems, by installing an appropriate "driver" +program for the build system and specifying its location in the +GOPACKAGESDRIVER environment variable. +For example, +https://github.com/bazelbuild/rules_go/wiki/Editor-and-tool-integration +explains how to use the driver for Bazel. + +The driver program is responsible for interpreting patterns in its +preferred notation and reporting information about the packages that +those patterns identify. Drivers must also support the special "file=" +and "pattern=" patterns described above. + +The patterns are provided as positional command-line arguments. A +JSON-encoded [DriverRequest] message providing additional information +is written to the driver's standard input. The driver must write a +JSON-encoded [DriverResponse] message to its standard output. (This +message differs from the JSON schema produced by 'go list'.) + +The value of the PWD environment variable seen by the driver process +is the preferred name of its working directory. (The working directory +may have other aliases due to symbolic links; see the comment on the +Dir field of [exec.Cmd] for related information.) +When the driver process emits in its response the name of a file +that is a descendant of this directory, it must use an absolute path +that has the value of PWD as a prefix, to ensure that the returned +filenames satisfy the original query. */ package packages // import "golang.org/x/tools/go/packages" @@ -168,14 +207,6 @@ Instead, ssadump no longer requests the runtime package, but seeks it among the dependencies of the user-specified packages, and emits an error if it is not found. -Overlays: The Overlay field in the Config allows providing alternate contents -for Go source files, by providing a mapping from file path to contents. -go/packages will pull in new imports added in overlay files when go/packages -is run in LoadImports mode or greater. -Overlay support for the go list driver isn't complete yet: if the file doesn't -exist on disk, it will only be recognized in an overlay if it is a non-test file -and the package would be reported even without the overlay. - Questions & Tasks - Add GOARCH/GOOS? diff --git a/vendor/golang.org/x/tools/go/packages/external.go b/vendor/golang.org/x/tools/go/packages/external.go index 7242a0a7..8f7afcb5 100644 --- a/vendor/golang.org/x/tools/go/packages/external.go +++ b/vendor/golang.org/x/tools/go/packages/external.go @@ -2,48 +2,87 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// This file enables an external tool to intercept package requests. -// If the tool is present then its results are used in preference to -// the go list command. - package packages +// This file defines the protocol that enables an external "driver" +// tool to supply package metadata in place of 'go list'. + import ( "bytes" "encoding/json" "fmt" - exec "golang.org/x/sys/execabs" "os" + "os/exec" "strings" ) -// The Driver Protocol +// DriverRequest defines the schema of a request for package metadata +// from an external driver program. The JSON-encoded DriverRequest +// message is provided to the driver program's standard input. The +// query patterns are provided as command-line arguments. // -// The driver, given the inputs to a call to Load, returns metadata about the packages specified. -// This allows for different build systems to support go/packages by telling go/packages how the -// packages' source is organized. -// The driver is a binary, either specified by the GOPACKAGESDRIVER environment variable or in -// the path as gopackagesdriver. It's given the inputs to load in its argv. See the package -// documentation in doc.go for the full description of the patterns that need to be supported. -// A driver receives as a JSON-serialized driverRequest struct in standard input and will -// produce a JSON-serialized driverResponse (see definition in packages.go) in its standard output. - -// driverRequest is used to provide the portion of Load's Config that is needed by a driver. -type driverRequest struct { +// See the package documentation for an overview. +type DriverRequest struct { Mode LoadMode `json:"mode"` + // Env specifies the environment the underlying build system should be run in. Env []string `json:"env"` + // BuildFlags are flags that should be passed to the underlying build system. BuildFlags []string `json:"build_flags"` + // Tests specifies whether the patterns should also return test packages. Tests bool `json:"tests"` - // Overlay maps file paths (relative to the driver's working directory) to the byte contents - // of overlay files. + + // Overlay maps file paths (relative to the driver's working directory) + // to the contents of overlay files (see Config.Overlay). Overlay map[string][]byte `json:"overlay"` } +// DriverResponse defines the schema of a response from an external +// driver program, providing the results of a query for package +// metadata. The driver program must write a JSON-encoded +// DriverResponse message to its standard output. +// +// See the package documentation for an overview. +type DriverResponse struct { + // NotHandled is returned if the request can't be handled by the current + // driver. If an external driver returns a response with NotHandled, the + // rest of the DriverResponse is ignored, and go/packages will fallback + // to the next driver. If go/packages is extended in the future to support + // lists of multiple drivers, go/packages will fall back to the next driver. + NotHandled bool + + // Compiler and Arch are the arguments pass of types.SizesFor + // to get a types.Sizes to use when type checking. + Compiler string + Arch string + + // Roots is the set of package IDs that make up the root packages. + // We have to encode this separately because when we encode a single package + // we cannot know if it is one of the roots as that requires knowledge of the + // graph it is part of. + Roots []string `json:",omitempty"` + + // Packages is the full set of packages in the graph. + // The packages are not connected into a graph. + // The Imports if populated will be stubs that only have their ID set. + // Imports will be connected and then type and syntax information added in a + // later pass (see refine). + Packages []*Package + + // GoVersion is the minor version number used by the driver + // (e.g. the go command on the PATH) when selecting .go files. + // Zero means unknown. + GoVersion int +} + +// driver is the type for functions that query the build system for the +// packages named by the patterns. +type driver func(cfg *Config, patterns ...string) (*DriverResponse, error) + // findExternalDriver returns the file path of a tool that supplies -// the build system package structure, or "" if not found." +// the build system package structure, or "" if not found. // If GOPACKAGESDRIVER is set in the environment findExternalTool returns its // value, otherwise it searches for a binary named gopackagesdriver on the PATH. func findExternalDriver(cfg *Config) driver { @@ -64,8 +103,8 @@ func findExternalDriver(cfg *Config) driver { return nil } } - return func(cfg *Config, words ...string) (*driverResponse, error) { - req, err := json.Marshal(driverRequest{ + return func(cfg *Config, words ...string) (*DriverResponse, error) { + req, err := json.Marshal(DriverRequest{ Mode: cfg.Mode, Env: cfg.Env, BuildFlags: cfg.BuildFlags, @@ -80,7 +119,19 @@ func findExternalDriver(cfg *Config) driver { stderr := new(bytes.Buffer) cmd := exec.CommandContext(cfg.Context, tool, words...) cmd.Dir = cfg.Dir - cmd.Env = cfg.Env + // The cwd gets resolved to the real path. On Darwin, where + // /tmp is a symlink, this breaks anything that expects the + // working directory to keep the original path, including the + // go command when dealing with modules. + // + // os.Getwd stdlib has a special feature where if the + // cwd and the PWD are the same node then it trusts + // the PWD, so by setting it in the env for the child + // process we fix up all the paths returned by the go + // command. + // + // (See similar trick in Invocation.run in ../../internal/gocommand/invoke.go) + cmd.Env = append(slicesClip(cfg.Env), "PWD="+cfg.Dir) cmd.Stdin = bytes.NewReader(req) cmd.Stdout = buf cmd.Stderr = stderr @@ -92,10 +143,14 @@ func findExternalDriver(cfg *Config) driver { fmt.Fprintf(os.Stderr, "%s stderr: <<%s>>\n", cmdDebugStr(cmd), stderr) } - var response driverResponse + var response DriverResponse if err := json.Unmarshal(buf.Bytes(), &response); err != nil { return nil, err } return &response, nil } } + +// slicesClip removes unused capacity from the slice, returning s[:len(s):len(s)]. +// TODO(adonovan): use go1.21 slices.Clip. +func slicesClip[S ~[]E, E any](s S) S { return s[:len(s):len(s)] } diff --git a/vendor/golang.org/x/tools/go/packages/golist.go b/vendor/golang.org/x/tools/go/packages/golist.go index b5de9cf9..1a3a5b44 100644 --- a/vendor/golang.org/x/tools/go/packages/golist.go +++ b/vendor/golang.org/x/tools/go/packages/golist.go @@ -9,9 +9,9 @@ import ( "context" "encoding/json" "fmt" - "io/ioutil" "log" "os" + "os/exec" "path" "path/filepath" "reflect" @@ -21,8 +21,6 @@ import ( "sync" "unicode" - exec "golang.org/x/sys/execabs" - "golang.org/x/tools/go/internal/packagesdriver" "golang.org/x/tools/internal/gocommand" "golang.org/x/tools/internal/packagesinternal" ) @@ -36,23 +34,23 @@ type goTooOldError struct { error } -// responseDeduper wraps a driverResponse, deduplicating its contents. +// responseDeduper wraps a DriverResponse, deduplicating its contents. type responseDeduper struct { seenRoots map[string]bool seenPackages map[string]*Package - dr *driverResponse + dr *DriverResponse } func newDeduper() *responseDeduper { return &responseDeduper{ - dr: &driverResponse{}, + dr: &DriverResponse{}, seenRoots: map[string]bool{}, seenPackages: map[string]*Package{}, } } -// addAll fills in r with a driverResponse. -func (r *responseDeduper) addAll(dr *driverResponse) { +// addAll fills in r with a DriverResponse. +func (r *responseDeduper) addAll(dr *DriverResponse) { for _, pkg := range dr.Packages { r.addPackage(pkg) } @@ -129,7 +127,7 @@ func (state *golistState) mustGetEnv() map[string]string { // goListDriver uses the go list command to interpret the patterns and produce // the build system package structure. // See driver for more details. -func goListDriver(cfg *Config, patterns ...string) (*driverResponse, error) { +func goListDriver(cfg *Config, patterns ...string) (_ *DriverResponse, err error) { // Make sure that any asynchronous go commands are killed when we return. parentCtx := cfg.Context if parentCtx == nil { @@ -147,16 +145,18 @@ func goListDriver(cfg *Config, patterns ...string) (*driverResponse, error) { } // Fill in response.Sizes asynchronously if necessary. - var sizeserr error - var sizeswg sync.WaitGroup if cfg.Mode&NeedTypesSizes != 0 || cfg.Mode&NeedTypes != 0 { - sizeswg.Add(1) + errCh := make(chan error) go func() { - compiler, arch, err := packagesdriver.GetSizesForArgsGolist(ctx, state.cfgInvocation(), cfg.gocmdRunner) - sizeserr = err + compiler, arch, err := getSizesForArgs(ctx, state.cfgInvocation(), cfg.gocmdRunner) response.dr.Compiler = compiler response.dr.Arch = arch - sizeswg.Done() + errCh <- err + }() + defer func() { + if sizesErr := <-errCh; sizesErr != nil { + err = sizesErr + } }() } @@ -209,87 +209,10 @@ extractQueries: } } - // Only use go/packages' overlay processing if we're using a Go version - // below 1.16. Otherwise, go list handles it. - if goVersion, err := state.getGoVersion(); err == nil && goVersion < 16 { - modifiedPkgs, needPkgs, err := state.processGolistOverlay(response) - if err != nil { - return nil, err - } - - var containsCandidates []string - if len(containFiles) > 0 { - containsCandidates = append(containsCandidates, modifiedPkgs...) - containsCandidates = append(containsCandidates, needPkgs...) - } - if err := state.addNeededOverlayPackages(response, needPkgs); err != nil { - return nil, err - } - // Check candidate packages for containFiles. - if len(containFiles) > 0 { - for _, id := range containsCandidates { - pkg, ok := response.seenPackages[id] - if !ok { - response.addPackage(&Package{ - ID: id, - Errors: []Error{{ - Kind: ListError, - Msg: fmt.Sprintf("package %s expected but not seen", id), - }}, - }) - continue - } - for _, f := range containFiles { - for _, g := range pkg.GoFiles { - if sameFile(f, g) { - response.addRoot(id) - } - } - } - } - } - // Add root for any package that matches a pattern. This applies only to - // packages that are modified by overlays, since they are not added as - // roots automatically. - for _, pattern := range restPatterns { - match := matchPattern(pattern) - for _, pkgID := range modifiedPkgs { - pkg, ok := response.seenPackages[pkgID] - if !ok { - continue - } - if match(pkg.PkgPath) { - response.addRoot(pkg.ID) - } - } - } - } - - sizeswg.Wait() - if sizeserr != nil { - return nil, sizeserr - } + // (We may yet return an error due to defer.) return response.dr, nil } -func (state *golistState) addNeededOverlayPackages(response *responseDeduper, pkgs []string) error { - if len(pkgs) == 0 { - return nil - } - dr, err := state.createDriverResponse(pkgs...) - if err != nil { - return err - } - for _, pkg := range dr.Packages { - response.addPackage(pkg) - } - _, needPkgs, err := state.processGolistOverlay(response) - if err != nil { - return err - } - return state.addNeededOverlayPackages(response, needPkgs) -} - func (state *golistState) runContainsQueries(response *responseDeduper, queries []string) error { for _, query := range queries { // TODO(matloob): Do only one query per directory. @@ -341,7 +264,7 @@ func (state *golistState) runContainsQueries(response *responseDeduper, queries // adhocPackage attempts to load or construct an ad-hoc package for a given // query, if the original call to the driver produced inadequate results. -func (state *golistState) adhocPackage(pattern, query string) (*driverResponse, error) { +func (state *golistState) adhocPackage(pattern, query string) (*DriverResponse, error) { response, err := state.createDriverResponse(query) if err != nil { return nil, err @@ -432,7 +355,7 @@ func otherFiles(p *jsonPackage) [][]string { // createDriverResponse uses the "go list" command to expand the pattern // words and return a response for the specified packages. -func (state *golistState) createDriverResponse(words ...string) (*driverResponse, error) { +func (state *golistState) createDriverResponse(words ...string) (*DriverResponse, error) { // go list uses the following identifiers in ImportPath and Imports: // // "p" -- importable package or main (command) @@ -459,7 +382,7 @@ func (state *golistState) createDriverResponse(words ...string) (*driverResponse pkgs := make(map[string]*Package) additionalErrors := make(map[string][]Error) // Decode the JSON and convert it to Package form. - response := &driverResponse{ + response := &DriverResponse{ GoVersion: goVersion, } for dec := json.NewDecoder(buf); dec.More(); { @@ -917,6 +840,7 @@ func (state *golistState) cfgInvocation() gocommand.Invocation { Env: cfg.Env, Logf: cfg.Logf, WorkingDir: cfg.Dir, + Overlay: cfg.goListOverlayFile, } } @@ -925,26 +849,6 @@ func (state *golistState) invokeGo(verb string, args ...string) (*bytes.Buffer, cfg := state.cfg inv := state.cfgInvocation() - - // For Go versions 1.16 and above, `go list` accepts overlays directly via - // the -overlay flag. Set it, if it's available. - // - // The check for "list" is not necessarily required, but we should avoid - // getting the go version if possible. - if verb == "list" { - goVersion, err := state.getGoVersion() - if err != nil { - return nil, err - } - if goVersion >= 16 { - filename, cleanup, err := state.writeOverlays() - if err != nil { - return nil, err - } - defer cleanup() - inv.Overlay = filename - } - } inv.Verb = verb inv.Args = args gocmdRunner := cfg.gocmdRunner @@ -1091,67 +995,6 @@ func (state *golistState) invokeGo(verb string, args ...string) (*bytes.Buffer, return stdout, nil } -// OverlayJSON is the format overlay files are expected to be in. -// The Replace map maps from overlaid paths to replacement paths: -// the Go command will forward all reads trying to open -// each overlaid path to its replacement path, or consider the overlaid -// path not to exist if the replacement path is empty. -// -// From golang/go#39958. -type OverlayJSON struct { - Replace map[string]string `json:"replace,omitempty"` -} - -// writeOverlays writes out files for go list's -overlay flag, as described -// above. -func (state *golistState) writeOverlays() (filename string, cleanup func(), err error) { - // Do nothing if there are no overlays in the config. - if len(state.cfg.Overlay) == 0 { - return "", func() {}, nil - } - dir, err := ioutil.TempDir("", "gopackages-*") - if err != nil { - return "", nil, err - } - // The caller must clean up this directory, unless this function returns an - // error. - cleanup = func() { - os.RemoveAll(dir) - } - defer func() { - if err != nil { - cleanup() - } - }() - overlays := map[string]string{} - for k, v := range state.cfg.Overlay { - // Create a unique filename for the overlaid files, to avoid - // creating nested directories. - noSeparator := strings.Join(strings.Split(filepath.ToSlash(k), "/"), "") - f, err := ioutil.TempFile(dir, fmt.Sprintf("*-%s", noSeparator)) - if err != nil { - return "", func() {}, err - } - if _, err := f.Write(v); err != nil { - return "", func() {}, err - } - if err := f.Close(); err != nil { - return "", func() {}, err - } - overlays[k] = f.Name() - } - b, err := json.Marshal(OverlayJSON{Replace: overlays}) - if err != nil { - return "", func() {}, err - } - // Write out the overlay file that contains the filepath mappings. - filename = filepath.Join(dir, "overlay.json") - if err := ioutil.WriteFile(filename, b, 0665); err != nil { - return "", func() {}, err - } - return filename, cleanup, nil -} - func containsGoFile(s []string) bool { for _, f := range s { if strings.HasSuffix(f, ".go") { @@ -1180,3 +1023,44 @@ func cmdDebugStr(cmd *exec.Cmd) string { } return fmt.Sprintf("GOROOT=%v GOPATH=%v GO111MODULE=%v GOPROXY=%v PWD=%v %v", env["GOROOT"], env["GOPATH"], env["GO111MODULE"], env["GOPROXY"], env["PWD"], strings.Join(args, " ")) } + +// getSizesForArgs queries 'go list' for the appropriate +// Compiler and GOARCH arguments to pass to [types.SizesFor]. +func getSizesForArgs(ctx context.Context, inv gocommand.Invocation, gocmdRunner *gocommand.Runner) (string, string, error) { + inv.Verb = "list" + inv.Args = []string{"-f", "{{context.GOARCH}} {{context.Compiler}}", "--", "unsafe"} + stdout, stderr, friendlyErr, rawErr := gocmdRunner.RunRaw(ctx, inv) + var goarch, compiler string + if rawErr != nil { + rawErrMsg := rawErr.Error() + if strings.Contains(rawErrMsg, "cannot find main module") || + strings.Contains(rawErrMsg, "go.mod file not found") { + // User's running outside of a module. + // All bets are off. Get GOARCH and guess compiler is gc. + // TODO(matloob): Is this a problem in practice? + inv.Verb = "env" + inv.Args = []string{"GOARCH"} + envout, enverr := gocmdRunner.Run(ctx, inv) + if enverr != nil { + return "", "", enverr + } + goarch = strings.TrimSpace(envout.String()) + compiler = "gc" + } else if friendlyErr != nil { + return "", "", friendlyErr + } else { + // This should be unreachable, but be defensive + // in case RunRaw's error results are inconsistent. + return "", "", rawErr + } + } else { + fields := strings.Fields(stdout.String()) + if len(fields) < 2 { + return "", "", fmt.Errorf("could not parse GOARCH and Go compiler in format \" \":\nstdout: <<%s>>\nstderr: <<%s>>", + stdout.String(), stderr.String()) + } + goarch = fields[0] + compiler = fields[1] + } + return compiler, goarch, nil +} diff --git a/vendor/golang.org/x/tools/go/packages/golist_overlay.go b/vendor/golang.org/x/tools/go/packages/golist_overlay.go index 9576b472..d823c474 100644 --- a/vendor/golang.org/x/tools/go/packages/golist_overlay.go +++ b/vendor/golang.org/x/tools/go/packages/golist_overlay.go @@ -6,314 +6,11 @@ package packages import ( "encoding/json" - "fmt" - "go/parser" - "go/token" - "os" "path/filepath" - "regexp" - "sort" - "strconv" - "strings" "golang.org/x/tools/internal/gocommand" ) -// processGolistOverlay provides rudimentary support for adding -// files that don't exist on disk to an overlay. The results can be -// sometimes incorrect. -// TODO(matloob): Handle unsupported cases, including the following: -// - determining the correct package to add given a new import path -func (state *golistState) processGolistOverlay(response *responseDeduper) (modifiedPkgs, needPkgs []string, err error) { - havePkgs := make(map[string]string) // importPath -> non-test package ID - needPkgsSet := make(map[string]bool) - modifiedPkgsSet := make(map[string]bool) - - pkgOfDir := make(map[string][]*Package) - for _, pkg := range response.dr.Packages { - // This is an approximation of import path to id. This can be - // wrong for tests, vendored packages, and a number of other cases. - havePkgs[pkg.PkgPath] = pkg.ID - dir, err := commonDir(pkg.GoFiles) - if err != nil { - return nil, nil, err - } - if dir != "" { - pkgOfDir[dir] = append(pkgOfDir[dir], pkg) - } - } - - // If no new imports are added, it is safe to avoid loading any needPkgs. - // Otherwise, it's hard to tell which package is actually being loaded - // (due to vendoring) and whether any modified package will show up - // in the transitive set of dependencies (because new imports are added, - // potentially modifying the transitive set of dependencies). - var overlayAddsImports bool - - // If both a package and its test package are created by the overlay, we - // need the real package first. Process all non-test files before test - // files, and make the whole process deterministic while we're at it. - var overlayFiles []string - for opath := range state.cfg.Overlay { - overlayFiles = append(overlayFiles, opath) - } - sort.Slice(overlayFiles, func(i, j int) bool { - iTest := strings.HasSuffix(overlayFiles[i], "_test.go") - jTest := strings.HasSuffix(overlayFiles[j], "_test.go") - if iTest != jTest { - return !iTest // non-tests are before tests. - } - return overlayFiles[i] < overlayFiles[j] - }) - for _, opath := range overlayFiles { - contents := state.cfg.Overlay[opath] - base := filepath.Base(opath) - dir := filepath.Dir(opath) - var pkg *Package // if opath belongs to both a package and its test variant, this will be the test variant - var testVariantOf *Package // if opath is a test file, this is the package it is testing - var fileExists bool - isTestFile := strings.HasSuffix(opath, "_test.go") - pkgName, ok := extractPackageName(opath, contents) - if !ok { - // Don't bother adding a file that doesn't even have a parsable package statement - // to the overlay. - continue - } - // If all the overlay files belong to a different package, change the - // package name to that package. - maybeFixPackageName(pkgName, isTestFile, pkgOfDir[dir]) - nextPackage: - for _, p := range response.dr.Packages { - if pkgName != p.Name && p.ID != "command-line-arguments" { - continue - } - for _, f := range p.GoFiles { - if !sameFile(filepath.Dir(f), dir) { - continue - } - // Make sure to capture information on the package's test variant, if needed. - if isTestFile && !hasTestFiles(p) { - // TODO(matloob): Are there packages other than the 'production' variant - // of a package that this can match? This shouldn't match the test main package - // because the file is generated in another directory. - testVariantOf = p - continue nextPackage - } else if !isTestFile && hasTestFiles(p) { - // We're examining a test variant, but the overlaid file is - // a non-test file. Because the overlay implementation - // (currently) only adds a file to one package, skip this - // package, so that we can add the file to the production - // variant of the package. (https://golang.org/issue/36857 - // tracks handling overlays on both the production and test - // variant of a package). - continue nextPackage - } - if pkg != nil && p != pkg && pkg.PkgPath == p.PkgPath { - // We have already seen the production version of the - // for which p is a test variant. - if hasTestFiles(p) { - testVariantOf = pkg - } - } - pkg = p - if filepath.Base(f) == base { - fileExists = true - } - } - } - // The overlay could have included an entirely new package or an - // ad-hoc package. An ad-hoc package is one that we have manually - // constructed from inadequate `go list` results for a file= query. - // It will have the ID command-line-arguments. - if pkg == nil || pkg.ID == "command-line-arguments" { - // Try to find the module or gopath dir the file is contained in. - // Then for modules, add the module opath to the beginning. - pkgPath, ok, err := state.getPkgPath(dir) - if err != nil { - return nil, nil, err - } - if !ok { - break - } - var forTest string // only set for x tests - isXTest := strings.HasSuffix(pkgName, "_test") - if isXTest { - forTest = pkgPath - pkgPath += "_test" - } - id := pkgPath - if isTestFile { - if isXTest { - id = fmt.Sprintf("%s [%s.test]", pkgPath, forTest) - } else { - id = fmt.Sprintf("%s [%s.test]", pkgPath, pkgPath) - } - } - if pkg != nil { - // TODO(rstambler): We should change the package's path and ID - // here. The only issue is that this messes with the roots. - } else { - // Try to reclaim a package with the same ID, if it exists in the response. - for _, p := range response.dr.Packages { - if reclaimPackage(p, id, opath, contents) { - pkg = p - break - } - } - // Otherwise, create a new package. - if pkg == nil { - pkg = &Package{ - PkgPath: pkgPath, - ID: id, - Name: pkgName, - Imports: make(map[string]*Package), - } - response.addPackage(pkg) - havePkgs[pkg.PkgPath] = id - // Add the production package's sources for a test variant. - if isTestFile && !isXTest && testVariantOf != nil { - pkg.GoFiles = append(pkg.GoFiles, testVariantOf.GoFiles...) - pkg.CompiledGoFiles = append(pkg.CompiledGoFiles, testVariantOf.CompiledGoFiles...) - // Add the package under test and its imports to the test variant. - pkg.forTest = testVariantOf.PkgPath - for k, v := range testVariantOf.Imports { - pkg.Imports[k] = &Package{ID: v.ID} - } - } - if isXTest { - pkg.forTest = forTest - } - } - } - } - if !fileExists { - pkg.GoFiles = append(pkg.GoFiles, opath) - // TODO(matloob): Adding the file to CompiledGoFiles can exhibit the wrong behavior - // if the file will be ignored due to its build tags. - pkg.CompiledGoFiles = append(pkg.CompiledGoFiles, opath) - modifiedPkgsSet[pkg.ID] = true - } - imports, err := extractImports(opath, contents) - if err != nil { - // Let the parser or type checker report errors later. - continue - } - for _, imp := range imports { - // TODO(rstambler): If the package is an x test and the import has - // a test variant, make sure to replace it. - if _, found := pkg.Imports[imp]; found { - continue - } - overlayAddsImports = true - id, ok := havePkgs[imp] - if !ok { - var err error - id, err = state.resolveImport(dir, imp) - if err != nil { - return nil, nil, err - } - } - pkg.Imports[imp] = &Package{ID: id} - // Add dependencies to the non-test variant version of this package as well. - if testVariantOf != nil { - testVariantOf.Imports[imp] = &Package{ID: id} - } - } - } - - // toPkgPath guesses the package path given the id. - toPkgPath := func(sourceDir, id string) (string, error) { - if i := strings.IndexByte(id, ' '); i >= 0 { - return state.resolveImport(sourceDir, id[:i]) - } - return state.resolveImport(sourceDir, id) - } - - // Now that new packages have been created, do another pass to determine - // the new set of missing packages. - for _, pkg := range response.dr.Packages { - for _, imp := range pkg.Imports { - if len(pkg.GoFiles) == 0 { - return nil, nil, fmt.Errorf("cannot resolve imports for package %q with no Go files", pkg.PkgPath) - } - pkgPath, err := toPkgPath(filepath.Dir(pkg.GoFiles[0]), imp.ID) - if err != nil { - return nil, nil, err - } - if _, ok := havePkgs[pkgPath]; !ok { - needPkgsSet[pkgPath] = true - } - } - } - - if overlayAddsImports { - needPkgs = make([]string, 0, len(needPkgsSet)) - for pkg := range needPkgsSet { - needPkgs = append(needPkgs, pkg) - } - } - modifiedPkgs = make([]string, 0, len(modifiedPkgsSet)) - for pkg := range modifiedPkgsSet { - modifiedPkgs = append(modifiedPkgs, pkg) - } - return modifiedPkgs, needPkgs, err -} - -// resolveImport finds the ID of a package given its import path. -// In particular, it will find the right vendored copy when in GOPATH mode. -func (state *golistState) resolveImport(sourceDir, importPath string) (string, error) { - env, err := state.getEnv() - if err != nil { - return "", err - } - if env["GOMOD"] != "" { - return importPath, nil - } - - searchDir := sourceDir - for { - vendorDir := filepath.Join(searchDir, "vendor") - exists, ok := state.vendorDirs[vendorDir] - if !ok { - info, err := os.Stat(vendorDir) - exists = err == nil && info.IsDir() - state.vendorDirs[vendorDir] = exists - } - - if exists { - vendoredPath := filepath.Join(vendorDir, importPath) - if info, err := os.Stat(vendoredPath); err == nil && info.IsDir() { - // We should probably check for .go files here, but shame on anyone who fools us. - path, ok, err := state.getPkgPath(vendoredPath) - if err != nil { - return "", err - } - if ok { - return path, nil - } - } - } - - // We know we've hit the top of the filesystem when we Dir / and get /, - // or C:\ and get C:\, etc. - next := filepath.Dir(searchDir) - if next == searchDir { - break - } - searchDir = next - } - return importPath, nil -} - -func hasTestFiles(p *Package) bool { - for _, f := range p.GoFiles { - if strings.HasSuffix(f, "_test.go") { - return true - } - } - return false -} - // determineRootDirs returns a mapping from absolute directories that could // contain code to their corresponding import path prefixes. func (state *golistState) determineRootDirs() (map[string]string, error) { @@ -384,192 +81,3 @@ func (state *golistState) determineRootDirsGOPATH() (map[string]string, error) { } return m, nil } - -func extractImports(filename string, contents []byte) ([]string, error) { - f, err := parser.ParseFile(token.NewFileSet(), filename, contents, parser.ImportsOnly) // TODO(matloob): reuse fileset? - if err != nil { - return nil, err - } - var res []string - for _, imp := range f.Imports { - quotedPath := imp.Path.Value - path, err := strconv.Unquote(quotedPath) - if err != nil { - return nil, err - } - res = append(res, path) - } - return res, nil -} - -// reclaimPackage attempts to reuse a package that failed to load in an overlay. -// -// If the package has errors and has no Name, GoFiles, or Imports, -// then it's possible that it doesn't yet exist on disk. -func reclaimPackage(pkg *Package, id string, filename string, contents []byte) bool { - // TODO(rstambler): Check the message of the actual error? - // It differs between $GOPATH and module mode. - if pkg.ID != id { - return false - } - if len(pkg.Errors) != 1 { - return false - } - if pkg.Name != "" || pkg.ExportFile != "" { - return false - } - if len(pkg.GoFiles) > 0 || len(pkg.CompiledGoFiles) > 0 || len(pkg.OtherFiles) > 0 { - return false - } - if len(pkg.Imports) > 0 { - return false - } - pkgName, ok := extractPackageName(filename, contents) - if !ok { - return false - } - pkg.Name = pkgName - pkg.Errors = nil - return true -} - -func extractPackageName(filename string, contents []byte) (string, bool) { - // TODO(rstambler): Check the message of the actual error? - // It differs between $GOPATH and module mode. - f, err := parser.ParseFile(token.NewFileSet(), filename, contents, parser.PackageClauseOnly) // TODO(matloob): reuse fileset? - if err != nil { - return "", false - } - return f.Name.Name, true -} - -// commonDir returns the directory that all files are in, "" if files is empty, -// or an error if they aren't in the same directory. -func commonDir(files []string) (string, error) { - seen := make(map[string]bool) - for _, f := range files { - seen[filepath.Dir(f)] = true - } - if len(seen) > 1 { - return "", fmt.Errorf("files (%v) are in more than one directory: %v", files, seen) - } - for k := range seen { - // seen has only one element; return it. - return k, nil - } - return "", nil // no files -} - -// It is possible that the files in the disk directory dir have a different package -// name from newName, which is deduced from the overlays. If they all have a different -// package name, and they all have the same package name, then that name becomes -// the package name. -// It returns true if it changes the package name, false otherwise. -func maybeFixPackageName(newName string, isTestFile bool, pkgsOfDir []*Package) { - names := make(map[string]int) - for _, p := range pkgsOfDir { - names[p.Name]++ - } - if len(names) != 1 { - // some files are in different packages - return - } - var oldName string - for k := range names { - oldName = k - } - if newName == oldName { - return - } - // We might have a case where all of the package names in the directory are - // the same, but the overlay file is for an x test, which belongs to its - // own package. If the x test does not yet exist on disk, we may not yet - // have its package name on disk, but we should not rename the packages. - // - // We use a heuristic to determine if this file belongs to an x test: - // The test file should have a package name whose package name has a _test - // suffix or looks like "newName_test". - maybeXTest := strings.HasPrefix(oldName+"_test", newName) || strings.HasSuffix(newName, "_test") - if isTestFile && maybeXTest { - return - } - for _, p := range pkgsOfDir { - p.Name = newName - } -} - -// This function is copy-pasted from -// https://github.com/golang/go/blob/9706f510a5e2754595d716bd64be8375997311fb/src/cmd/go/internal/search/search.go#L360. -// It should be deleted when we remove support for overlays from go/packages. -// -// NOTE: This does not handle any ./... or ./ style queries, as this function -// doesn't know the working directory. -// -// matchPattern(pattern)(name) reports whether -// name matches pattern. Pattern is a limited glob -// pattern in which '...' means 'any string' and there -// is no other special syntax. -// Unfortunately, there are two special cases. Quoting "go help packages": -// -// First, /... at the end of the pattern can match an empty string, -// so that net/... matches both net and packages in its subdirectories, like net/http. -// Second, any slash-separated pattern element containing a wildcard never -// participates in a match of the "vendor" element in the path of a vendored -// package, so that ./... does not match packages in subdirectories of -// ./vendor or ./mycode/vendor, but ./vendor/... and ./mycode/vendor/... do. -// Note, however, that a directory named vendor that itself contains code -// is not a vendored package: cmd/vendor would be a command named vendor, -// and the pattern cmd/... matches it. -func matchPattern(pattern string) func(name string) bool { - // Convert pattern to regular expression. - // The strategy for the trailing /... is to nest it in an explicit ? expression. - // The strategy for the vendor exclusion is to change the unmatchable - // vendor strings to a disallowed code point (vendorChar) and to use - // "(anything but that codepoint)*" as the implementation of the ... wildcard. - // This is a bit complicated but the obvious alternative, - // namely a hand-written search like in most shell glob matchers, - // is too easy to make accidentally exponential. - // Using package regexp guarantees linear-time matching. - - const vendorChar = "\x00" - - if strings.Contains(pattern, vendorChar) { - return func(name string) bool { return false } - } - - re := regexp.QuoteMeta(pattern) - re = replaceVendor(re, vendorChar) - switch { - case strings.HasSuffix(re, `/`+vendorChar+`/\.\.\.`): - re = strings.TrimSuffix(re, `/`+vendorChar+`/\.\.\.`) + `(/vendor|/` + vendorChar + `/\.\.\.)` - case re == vendorChar+`/\.\.\.`: - re = `(/vendor|/` + vendorChar + `/\.\.\.)` - case strings.HasSuffix(re, `/\.\.\.`): - re = strings.TrimSuffix(re, `/\.\.\.`) + `(/\.\.\.)?` - } - re = strings.ReplaceAll(re, `\.\.\.`, `[^`+vendorChar+`]*`) - - reg := regexp.MustCompile(`^` + re + `$`) - - return func(name string) bool { - if strings.Contains(name, vendorChar) { - return false - } - return reg.MatchString(replaceVendor(name, vendorChar)) - } -} - -// replaceVendor returns the result of replacing -// non-trailing vendor path elements in x with repl. -func replaceVendor(x, repl string) string { - if !strings.Contains(x, "vendor") { - return x - } - elem := strings.Split(x, "/") - for i := 0; i < len(elem)-1; i++ { - if elem[i] == "vendor" { - elem[i] = repl - } - } - return strings.Join(elem, "/") -} diff --git a/vendor/golang.org/x/tools/go/packages/loadmode_string.go b/vendor/golang.org/x/tools/go/packages/loadmode_string.go index 5c080d21..5fcad6ea 100644 --- a/vendor/golang.org/x/tools/go/packages/loadmode_string.go +++ b/vendor/golang.org/x/tools/go/packages/loadmode_string.go @@ -9,49 +9,46 @@ import ( "strings" ) -var allModes = []LoadMode{ - NeedName, - NeedFiles, - NeedCompiledGoFiles, - NeedImports, - NeedDeps, - NeedExportFile, - NeedTypes, - NeedSyntax, - NeedTypesInfo, - NeedTypesSizes, +var modes = [...]struct { + mode LoadMode + name string +}{ + {NeedName, "NeedName"}, + {NeedFiles, "NeedFiles"}, + {NeedCompiledGoFiles, "NeedCompiledGoFiles"}, + {NeedImports, "NeedImports"}, + {NeedDeps, "NeedDeps"}, + {NeedExportFile, "NeedExportFile"}, + {NeedTypes, "NeedTypes"}, + {NeedSyntax, "NeedSyntax"}, + {NeedTypesInfo, "NeedTypesInfo"}, + {NeedTypesSizes, "NeedTypesSizes"}, + {NeedModule, "NeedModule"}, + {NeedEmbedFiles, "NeedEmbedFiles"}, + {NeedEmbedPatterns, "NeedEmbedPatterns"}, } -var modeStrings = []string{ - "NeedName", - "NeedFiles", - "NeedCompiledGoFiles", - "NeedImports", - "NeedDeps", - "NeedExportFile", - "NeedTypes", - "NeedSyntax", - "NeedTypesInfo", - "NeedTypesSizes", -} - -func (mod LoadMode) String() string { - m := mod - if m == 0 { +func (mode LoadMode) String() string { + if mode == 0 { return "LoadMode(0)" } var out []string - for i, x := range allModes { - if x > m { - break + // named bits + for _, item := range modes { + if (mode & item.mode) != 0 { + mode ^= item.mode + out = append(out, item.name) } - if (m & x) != 0 { - out = append(out, modeStrings[i]) - m = m ^ x + } + // unnamed residue + if mode != 0 { + if out == nil { + return fmt.Sprintf("LoadMode(%#x)", int(mode)) } + out = append(out, fmt.Sprintf("%#x", int(mode))) } - if m != 0 { - out = append(out, "Unknown") + if len(out) == 1 { + return out[0] } - return fmt.Sprintf("LoadMode(%s)", strings.Join(out, "|")) + return "(" + strings.Join(out, "|") + ")" } diff --git a/vendor/golang.org/x/tools/go/packages/packages.go b/vendor/golang.org/x/tools/go/packages/packages.go index 124a6fe1..f227f1ba 100644 --- a/vendor/golang.org/x/tools/go/packages/packages.go +++ b/vendor/golang.org/x/tools/go/packages/packages.go @@ -9,6 +9,7 @@ package packages import ( "context" "encoding/json" + "errors" "fmt" "go/ast" "go/parser" @@ -16,7 +17,6 @@ import ( "go/token" "go/types" "io" - "io/ioutil" "log" "os" "path/filepath" @@ -25,20 +25,31 @@ import ( "sync" "time" + "golang.org/x/sync/errgroup" + "golang.org/x/tools/go/gcexportdata" "golang.org/x/tools/internal/gocommand" "golang.org/x/tools/internal/packagesinternal" - "golang.org/x/tools/internal/typeparams" "golang.org/x/tools/internal/typesinternal" + "golang.org/x/tools/internal/versions" ) // A LoadMode controls the amount of detail to return when loading. // The bits below can be combined to specify which fields should be // filled in the result packages. +// // The zero value is a special case, equivalent to combining // the NeedName, NeedFiles, and NeedCompiledGoFiles bits. +// // ID and Errors (if present) will always be filled. -// Load may return more information than requested. +// [Load] may return more information than requested. +// +// Unfortunately there are a number of open bugs related to +// interactions among the LoadMode bits: +// - https://github.com/golang/go/issues/56633 +// - https://github.com/golang/go/issues/56677 +// - https://github.com/golang/go/issues/58726 +// - https://github.com/golang/go/issues/63517 type LoadMode int const ( @@ -64,7 +75,7 @@ const ( // NeedTypes adds Types, Fset, and IllTyped. NeedTypes - // NeedSyntax adds Syntax. + // NeedSyntax adds Syntax and Fset. NeedSyntax // NeedTypesInfo adds TypesInfo. @@ -92,25 +103,37 @@ const ( // NeedEmbedPatterns adds EmbedPatterns. NeedEmbedPatterns + + // Be sure to update loadmode_string.go when adding new items! ) const ( + // LoadFiles loads the name and file names for the initial packages. + // // Deprecated: LoadFiles exists for historical compatibility // and should not be used. Please directly specify the needed fields using the Need values. LoadFiles = NeedName | NeedFiles | NeedCompiledGoFiles + // LoadImports loads the name, file names, and import mapping for the initial packages. + // // Deprecated: LoadImports exists for historical compatibility // and should not be used. Please directly specify the needed fields using the Need values. LoadImports = LoadFiles | NeedImports + // LoadTypes loads exported type information for the initial packages. + // // Deprecated: LoadTypes exists for historical compatibility // and should not be used. Please directly specify the needed fields using the Need values. LoadTypes = LoadImports | NeedTypes | NeedTypesSizes + // LoadSyntax loads typed syntax for the initial packages. + // // Deprecated: LoadSyntax exists for historical compatibility // and should not be used. Please directly specify the needed fields using the Need values. LoadSyntax = LoadTypes | NeedSyntax | NeedTypesInfo + // LoadAllSyntax loads typed syntax for the initial packages and all dependencies. + // // Deprecated: LoadAllSyntax exists for historical compatibility // and should not be used. Please directly specify the needed fields using the Need values. LoadAllSyntax = LoadSyntax | NeedDeps @@ -121,15 +144,21 @@ const ( // A Config specifies details about how packages should be loaded. // The zero value is a valid configuration. +// // Calls to Load do not modify this struct. +// +// TODO(adonovan): #67702: this is currently false: in fact, +// calls to [Load] do not modify the public fields of this struct, but +// may modify hidden fields, so concurrent calls to [Load] must not +// use the same Config. But perhaps we should reestablish the +// documented invariant. type Config struct { // Mode controls the level of information returned for each package. Mode LoadMode // Context specifies the context for the load operation. - // If the context is cancelled, the loader may stop early - // and return an ErrCancelled error. - // If Context is nil, the load cannot be cancelled. + // Cancelling the context may cause [Load] to abort and + // return an error. Context context.Context // Logf is the logger for the config. @@ -198,95 +227,209 @@ type Config struct { // setting Tests may have no effect. Tests bool - // Overlay provides a mapping of absolute file paths to file contents. - // If the file with the given path already exists, the parser will use the - // alternative file contents provided by the map. + // Overlay is a mapping from absolute file paths to file contents. // - // Overlays provide incomplete support for when a given file doesn't - // already exist on disk. See the package doc above for more details. + // For each map entry, [Load] uses the alternative file + // contents provided by the overlay mapping instead of reading + // from the file system. This mechanism can be used to enable + // editor-integrated tools to correctly analyze the contents + // of modified but unsaved buffers, for example. + // + // The overlay mapping is passed to the build system's driver + // (see "The driver protocol") so that it too can report + // consistent package metadata about unsaved files. However, + // drivers may vary in their level of support for overlays. Overlay map[string][]byte -} -// driver is the type for functions that query the build system for the -// packages named by the patterns. -type driver func(cfg *Config, patterns ...string) (*driverResponse, error) - -// driverResponse contains the results for a driver query. -type driverResponse struct { - // NotHandled is returned if the request can't be handled by the current - // driver. If an external driver returns a response with NotHandled, the - // rest of the driverResponse is ignored, and go/packages will fallback - // to the next driver. If go/packages is extended in the future to support - // lists of multiple drivers, go/packages will fall back to the next driver. - NotHandled bool - - // Compiler and Arch are the arguments pass of types.SizesFor - // to get a types.Sizes to use when type checking. - Compiler string - Arch string - - // Roots is the set of package IDs that make up the root packages. - // We have to encode this separately because when we encode a single package - // we cannot know if it is one of the roots as that requires knowledge of the - // graph it is part of. - Roots []string `json:",omitempty"` - - // Packages is the full set of packages in the graph. - // The packages are not connected into a graph. - // The Imports if populated will be stubs that only have their ID set. - // Imports will be connected and then type and syntax information added in a - // later pass (see refine). - Packages []*Package - - // GoVersion is the minor version number used by the driver - // (e.g. the go command on the PATH) when selecting .go files. - // Zero means unknown. - GoVersion int + // goListOverlayFile is the JSON file that encodes the Overlay + // mapping, used by 'go list -overlay=...' + goListOverlayFile string } // Load loads and returns the Go packages named by the given patterns. // -// Config specifies loading options; -// nil behaves the same as an empty Config. +// The cfg parameter specifies loading options; nil behaves the same as an empty [Config]. +// +// The [Config.Mode] field is a set of bits that determine what kinds +// of information should be computed and returned. Modes that require +// more information tend to be slower. See [LoadMode] for details +// and important caveats. Its zero value is equivalent to +// [NeedName] | [NeedFiles] | [NeedCompiledGoFiles]. +// +// Each call to Load returns a new set of [Package] instances. +// The Packages and their Imports form a directed acyclic graph. // -// Load returns an error if any of the patterns was invalid -// as defined by the underlying build system. +// If the [NeedTypes] mode flag was set, each call to Load uses a new +// [types.Importer], so [types.Object] and [types.Type] values from +// different calls to Load must not be mixed as they will have +// inconsistent notions of type identity. +// +// If any of the patterns was invalid as defined by the +// underlying build system, Load returns an error. // It may return an empty list of packages without an error, // for instance for an empty expansion of a valid wildcard. // Errors associated with a particular package are recorded in the // corresponding Package's Errors list, and do not cause Load to // return an error. Clients may need to handle such errors before -// proceeding with further analysis. The PrintErrors function is +// proceeding with further analysis. The [PrintErrors] function is // provided for convenient display of all errors. func Load(cfg *Config, patterns ...string) ([]*Package, error) { - l := newLoader(cfg) - response, err := defaultDriver(&l.Config, patterns...) + ld := newLoader(cfg) + response, external, err := defaultDriver(&ld.Config, patterns...) if err != nil { return nil, err } - l.sizes = types.SizesFor(response.Compiler, response.Arch) - return l.refine(response) + + ld.sizes = types.SizesFor(response.Compiler, response.Arch) + if ld.sizes == nil && ld.Config.Mode&(NeedTypes|NeedTypesSizes|NeedTypesInfo) != 0 { + // Type size information is needed but unavailable. + if external { + // An external driver may fail to populate the Compiler/GOARCH fields, + // especially since they are relatively new (see #63700). + // Provide a sensible fallback in this case. + ld.sizes = types.SizesFor("gc", runtime.GOARCH) + if ld.sizes == nil { // gccgo-only arch + ld.sizes = types.SizesFor("gc", "amd64") + } + } else { + // Go list should never fail to deliver accurate size information. + // Reject the whole Load since the error is the same for every package. + return nil, fmt.Errorf("can't determine type sizes for compiler %q on GOARCH %q", + response.Compiler, response.Arch) + } + } + + return ld.refine(response) } // defaultDriver is a driver that implements go/packages' fallback behavior. // It will try to request to an external driver, if one exists. If there's // no external driver, or the driver returns a response with NotHandled set, // defaultDriver will fall back to the go list driver. -func defaultDriver(cfg *Config, patterns ...string) (*driverResponse, error) { - driver := findExternalDriver(cfg) - if driver == nil { - driver = goListDriver +// The boolean result indicates that an external driver handled the request. +func defaultDriver(cfg *Config, patterns ...string) (*DriverResponse, bool, error) { + const ( + // windowsArgMax specifies the maximum command line length for + // the Windows' CreateProcess function. + windowsArgMax = 32767 + // maxEnvSize is a very rough estimation of the maximum environment + // size of a user. + maxEnvSize = 16384 + // safeArgMax specifies the maximum safe command line length to use + // by the underlying driver excl. the environment. We choose the Windows' + // ARG_MAX as the starting point because it's one of the lowest ARG_MAX + // constants out of the different supported platforms, + // e.g., https://www.in-ulm.de/~mascheck/various/argmax/#results. + safeArgMax = windowsArgMax - maxEnvSize + ) + chunks, err := splitIntoChunks(patterns, safeArgMax) + if err != nil { + return nil, false, err } - response, err := driver(cfg, patterns...) + + if driver := findExternalDriver(cfg); driver != nil { + response, err := callDriverOnChunks(driver, cfg, chunks) + if err != nil { + return nil, false, err + } else if !response.NotHandled { + return response, true, nil + } + // (fall through) + } + + // go list fallback + // + // Write overlays once, as there are many calls + // to 'go list' (one per chunk plus others too). + overlay, cleanupOverlay, err := gocommand.WriteOverlays(cfg.Overlay) + if err != nil { + return nil, false, err + } + defer cleanupOverlay() + cfg.goListOverlayFile = overlay + + response, err := callDriverOnChunks(goListDriver, cfg, chunks) if err != nil { - return response, err - } else if response.NotHandled { - return goListDriver(cfg, patterns...) + return nil, false, err + } + return response, false, err +} + +// splitIntoChunks chunks the slice so that the total number of characters +// in a chunk is no longer than argMax. +func splitIntoChunks(patterns []string, argMax int) ([][]string, error) { + if argMax <= 0 { + return nil, errors.New("failed to split patterns into chunks, negative safe argMax value") + } + var chunks [][]string + charsInChunk := 0 + nextChunkStart := 0 + for i, v := range patterns { + vChars := len(v) + if vChars > argMax { + // a single pattern is longer than the maximum safe ARG_MAX, hardly should happen + return nil, errors.New("failed to split patterns into chunks, a pattern is too long") + } + charsInChunk += vChars + 1 // +1 is for a whitespace between patterns that has to be counted too + if charsInChunk > argMax { + chunks = append(chunks, patterns[nextChunkStart:i]) + nextChunkStart = i + charsInChunk = vChars + } + } + // add the last chunk + if nextChunkStart < len(patterns) { + chunks = append(chunks, patterns[nextChunkStart:]) + } + return chunks, nil +} + +func callDriverOnChunks(driver driver, cfg *Config, chunks [][]string) (*DriverResponse, error) { + if len(chunks) == 0 { + return driver(cfg) + } + responses := make([]*DriverResponse, len(chunks)) + errNotHandled := errors.New("driver returned NotHandled") + var g errgroup.Group + for i, chunk := range chunks { + i := i + chunk := chunk + g.Go(func() (err error) { + responses[i], err = driver(cfg, chunk...) + if responses[i] != nil && responses[i].NotHandled { + err = errNotHandled + } + return err + }) + } + if err := g.Wait(); err != nil { + if errors.Is(err, errNotHandled) { + return &DriverResponse{NotHandled: true}, nil + } + return nil, err + } + return mergeResponses(responses...), nil +} + +func mergeResponses(responses ...*DriverResponse) *DriverResponse { + if len(responses) == 0 { + return nil + } + response := newDeduper() + response.dr.NotHandled = false + response.dr.Compiler = responses[0].Compiler + response.dr.Arch = responses[0].Arch + response.dr.GoVersion = responses[0].GoVersion + for _, v := range responses { + response.addAll(v) } - return response, nil + return response.dr } // A Package describes a loaded Go package. +// +// It also defines part of the JSON schema of [DriverResponse]. +// See the package documentation for an overview. type Package struct { // ID is a unique identifier for a package, // in a syntax provided by the underlying build system. @@ -345,19 +488,30 @@ type Package struct { // to corresponding loaded Packages. Imports map[string]*Package + // Module is the module information for the package if it exists. + // + // Note: it may be missing for std and cmd; see Go issue #65816. + Module *Module + + // -- The following fields are not part of the driver JSON schema. -- + // Types provides type information for the package. // The NeedTypes LoadMode bit sets this field for packages matching the // patterns; type information for dependencies may be missing or incomplete, // unless NeedDeps and NeedImports are also set. - Types *types.Package + // + // Each call to [Load] returns a consistent set of type + // symbols, as defined by the comment at [types.Identical]. + // Avoid mixing type information from two or more calls to [Load]. + Types *types.Package `json:"-"` // Fset provides position information for Types, TypesInfo, and Syntax. // It is set only when Types is set. - Fset *token.FileSet + Fset *token.FileSet `json:"-"` // IllTyped indicates whether the package or any dependency contains errors. // It is set only when Types is set. - IllTyped bool + IllTyped bool `json:"-"` // Syntax is the package's syntax trees, for the files listed in CompiledGoFiles. // @@ -367,26 +521,28 @@ type Package struct { // // Syntax is kept in the same order as CompiledGoFiles, with the caveat that nils are // removed. If parsing returned nil, Syntax may be shorter than CompiledGoFiles. - Syntax []*ast.File + Syntax []*ast.File `json:"-"` // TypesInfo provides type information about the package's syntax trees. // It is set only when Syntax is set. - TypesInfo *types.Info + TypesInfo *types.Info `json:"-"` // TypesSizes provides the effective size function for types in TypesInfo. - TypesSizes types.Sizes + TypesSizes types.Sizes `json:"-"` + + // -- internal -- // forTest is the package under test, if any. forTest string // depsErrors is the DepsErrors field from the go list response, if any. depsErrors []*packagesinternal.PackageError - - // module is the module information for the package if it exists. - Module *Module } // Module provides module information for a package. +// +// It also defines part of the JSON schema of [DriverResponse]. +// See the package documentation for an overview. type Module struct { Path string // module path Version string // module version @@ -412,12 +568,6 @@ func init() { packagesinternal.GetDepsErrors = func(p interface{}) []*packagesinternal.PackageError { return p.(*Package).depsErrors } - packagesinternal.GetGoCmdRunner = func(config interface{}) *gocommand.Runner { - return config.(*Config).gocmdRunner - } - packagesinternal.SetGoCmdRunner = func(config interface{}, runner *gocommand.Runner) { - config.(*Config).gocmdRunner = runner - } packagesinternal.SetModFile = func(config interface{}, value string) { config.(*Config).modFile = value } @@ -525,6 +675,7 @@ func (p *Package) UnmarshalJSON(b []byte) error { OtherFiles: flat.OtherFiles, EmbedFiles: flat.EmbedFiles, EmbedPatterns: flat.EmbedPatterns, + IgnoredFiles: flat.IgnoredFiles, ExportFile: flat.ExportFile, } if len(flat.Imports) > 0 { @@ -554,7 +705,7 @@ type loaderPackage struct { type loader struct { pkgs map[string]*loaderPackage Config - sizes types.Sizes + sizes types.Sizes // non-nil if needed by mode parseCache map[string]*parseValue parseCacheMu sync.Mutex exportMu sync.Mutex // enforces mutual exclusion of exportdata operations @@ -623,6 +774,7 @@ func newLoader(cfg *Config) *loader { // because we load source if export data is missing. if ld.ParseFile == nil { ld.ParseFile = func(fset *token.FileSet, filename string, src []byte) (*ast.File, error) { + // We implicitly promise to keep doing ast.Object resolution. :( const mode = parser.AllErrors | parser.ParseComments return parser.ParseFile(fset, filename, src, mode) } @@ -634,7 +786,7 @@ func newLoader(cfg *Config) *loader { // refine connects the supplied packages into a graph and then adds type // and syntax information as requested by the LoadMode. -func (ld *loader) refine(response *driverResponse) ([]*Package, error) { +func (ld *loader) refine(response *DriverResponse) ([]*Package, error) { roots := response.Roots rootMap := make(map[string]int, len(roots)) for i, root := range roots { @@ -679,39 +831,38 @@ func (ld *loader) refine(response *driverResponse) ([]*Package, error) { } } - // Materialize the import graph. - - const ( - white = 0 // new - grey = 1 // in progress - black = 2 // complete - ) - - // visit traverses the import graph, depth-first, - // and materializes the graph as Packages.Imports. - // - // Valid imports are saved in the Packages.Import map. - // Invalid imports (cycles and missing nodes) are saved in the importErrors map. - // Thus, even in the presence of both kinds of errors, the Import graph remains a DAG. - // - // visit returns whether the package needs src or has a transitive - // dependency on a package that does. These are the only packages - // for which we load source code. - var stack []*loaderPackage - var visit func(lpkg *loaderPackage) bool - var srcPkgs []*loaderPackage - visit = func(lpkg *loaderPackage) bool { - switch lpkg.color { - case black: - return lpkg.needsrc - case grey: - panic("internal error: grey node") - } - lpkg.color = grey - stack = append(stack, lpkg) // push - stubs := lpkg.Imports // the structure form has only stubs with the ID in the Imports - // If NeedImports isn't set, the imports fields will all be zeroed out. - if ld.Mode&NeedImports != 0 { + if ld.Mode&NeedImports != 0 { + // Materialize the import graph. + + const ( + white = 0 // new + grey = 1 // in progress + black = 2 // complete + ) + + // visit traverses the import graph, depth-first, + // and materializes the graph as Packages.Imports. + // + // Valid imports are saved in the Packages.Import map. + // Invalid imports (cycles and missing nodes) are saved in the importErrors map. + // Thus, even in the presence of both kinds of errors, + // the Import graph remains a DAG. + // + // visit returns whether the package needs src or has a transitive + // dependency on a package that does. These are the only packages + // for which we load source code. + var stack []*loaderPackage + var visit func(lpkg *loaderPackage) bool + visit = func(lpkg *loaderPackage) bool { + switch lpkg.color { + case black: + return lpkg.needsrc + case grey: + panic("internal error: grey node") + } + lpkg.color = grey + stack = append(stack, lpkg) // push + stubs := lpkg.Imports // the structure form has only stubs with the ID in the Imports lpkg.Imports = make(map[string]*Package, len(stubs)) for importPath, ipkg := range stubs { var importErr error @@ -735,40 +886,39 @@ func (ld *loader) refine(response *driverResponse) ([]*Package, error) { } lpkg.Imports[importPath] = imp.Package } - } - if lpkg.needsrc { - srcPkgs = append(srcPkgs, lpkg) - } - if ld.Mode&NeedTypesSizes != 0 { - lpkg.TypesSizes = ld.sizes - } - stack = stack[:len(stack)-1] // pop - lpkg.color = black - return lpkg.needsrc - } + // Complete type information is required for the + // immediate dependencies of each source package. + if lpkg.needsrc && ld.Mode&NeedTypes != 0 { + for _, ipkg := range lpkg.Imports { + ld.pkgs[ipkg.ID].needtypes = true + } + } - if ld.Mode&NeedImports == 0 { - // We do this to drop the stub import packages that we are not even going to try to resolve. - for _, lpkg := range initial { - lpkg.Imports = nil + // NeedTypeSizes causes TypeSizes to be set even + // on packages for which types aren't needed. + if ld.Mode&NeedTypesSizes != 0 { + lpkg.TypesSizes = ld.sizes + } + stack = stack[:len(stack)-1] // pop + lpkg.color = black + + return lpkg.needsrc } - } else { + // For each initial package, create its import DAG. for _, lpkg := range initial { visit(lpkg) } - } - if ld.Mode&NeedImports != 0 && ld.Mode&NeedTypes != 0 { - for _, lpkg := range srcPkgs { - // Complete type information is required for the - // immediate dependencies of each source package. - for _, ipkg := range lpkg.Imports { - imp := ld.pkgs[ipkg.ID] - imp.needtypes = true - } + + } else { + // !NeedImports: drop the stub (ID-only) import packages + // that we are not even going to try to resolve. + for _, lpkg := range initial { + lpkg.Imports = nil } } + // Load type data and syntax if needed, starting at // the initial packages (roots of the import DAG). if ld.Mode&NeedTypes != 0 || ld.Mode&NeedSyntax != 0 { @@ -783,6 +933,12 @@ func (ld *loader) refine(response *driverResponse) ([]*Package, error) { wg.Wait() } + // If the context is done, return its error and + // throw out [likely] incomplete packages. + if err := ld.Context.Err(); err != nil { + return nil, err + } + result := make([]*Package, len(initial)) for i, lpkg := range initial { result[i] = lpkg.Package @@ -816,12 +972,14 @@ func (ld *loader) refine(response *driverResponse) ([]*Package, error) { } if ld.requestedMode&NeedTypes == 0 { ld.pkgs[i].Types = nil - ld.pkgs[i].Fset = nil ld.pkgs[i].IllTyped = false } if ld.requestedMode&NeedSyntax == 0 { ld.pkgs[i].Syntax = nil } + if ld.requestedMode&NeedTypes == 0 && ld.requestedMode&NeedSyntax == 0 { + ld.pkgs[i].Fset = nil + } if ld.requestedMode&NeedTypesInfo == 0 { ld.pkgs[i].TypesInfo = nil } @@ -878,6 +1036,14 @@ func (ld *loader) loadPackage(lpkg *loaderPackage) { lpkg.Types = types.NewPackage(lpkg.PkgPath, lpkg.Name) lpkg.Fset = ld.Fset + // Start shutting down if the context is done and do not load + // source or export data files. + // Packages that import this one will have ld.Context.Err() != nil. + // ld.Context.Err() will be returned later by refine. + if ld.Context.Err() != nil { + return + } + // Subtle: we populate all Types fields with an empty Package // before loading export data so that export data processing // never has to create a types.Package for an indirect dependency, @@ -997,15 +1163,23 @@ func (ld *loader) loadPackage(lpkg *loaderPackage) { return } + // Start shutting down if the context is done and do not type check. + // Packages that import this one will have ld.Context.Err() != nil. + // ld.Context.Err() will be returned later by refine. + if ld.Context.Err() != nil { + return + } + lpkg.TypesInfo = &types.Info{ Types: make(map[ast.Expr]types.TypeAndValue), Defs: make(map[*ast.Ident]types.Object), Uses: make(map[*ast.Ident]types.Object), Implicits: make(map[ast.Node]types.Object), + Instances: make(map[*ast.Ident]types.Instance), Scopes: make(map[ast.Node]*types.Scope), Selections: make(map[*ast.SelectorExpr]*types.Selection), } - typeparams.InitInstanceInfo(lpkg.TypesInfo) + versions.InitFileVersions(lpkg.TypesInfo) lpkg.TypesSizes = ld.sizes importer := importerFunc(func(path string) (*types.Package, error) { @@ -1043,10 +1217,10 @@ func (ld *loader) loadPackage(lpkg *loaderPackage) { IgnoreFuncBodies: ld.Mode&NeedDeps == 0 && !lpkg.initial, Error: appendError, - Sizes: ld.sizes, + Sizes: ld.sizes, // may be nil } if lpkg.Module != nil && lpkg.Module.GoVersion != "" { - typesinternal.SetGoVersion(tc, "go"+lpkg.Module.GoVersion) + tc.GoVersion = "go" + lpkg.Module.GoVersion } if (ld.Mode & typecheckCgo) != 0 { if !typesinternal.SetUsesCgo(tc) { @@ -1057,10 +1231,24 @@ func (ld *loader) loadPackage(lpkg *loaderPackage) { return } } - types.NewChecker(tc, ld.Fset, lpkg.Types, lpkg.TypesInfo).Files(lpkg.Syntax) + typErr := types.NewChecker(tc, ld.Fset, lpkg.Types, lpkg.TypesInfo).Files(lpkg.Syntax) lpkg.importErrors = nil // no longer needed + // In go/types go1.21 and go1.22, Checker.Files failed fast with a + // a "too new" error, without calling tc.Error and without + // proceeding to type-check the package (#66525). + // We rely on the runtimeVersion error to give the suggested remedy. + if typErr != nil && len(lpkg.Errors) == 0 && len(lpkg.Syntax) > 0 { + if msg := typErr.Error(); strings.HasPrefix(msg, "package requires newer Go version") { + appendError(types.Error{ + Fset: ld.Fset, + Pos: lpkg.Syntax[0].Package, + Msg: msg, + }) + } + } + // If !Cgo, the type-checker uses FakeImportC mode, so // it doesn't invoke the importer for import "C", // nor report an error for the import, @@ -1082,6 +1270,12 @@ func (ld *loader) loadPackage(lpkg *loaderPackage) { } } + // If types.Checker.Files had an error that was unreported, + // make sure to report the unknown error so the package is illTyped. + if typErr != nil && len(lpkg.Errors) == 0 { + appendError(typErr) + } + // Record accumulated errors. illTyped := len(lpkg.Errors) > 0 if !illTyped { @@ -1127,7 +1321,7 @@ func (ld *loader) parseFile(filename string) (*ast.File, error) { var err error if src == nil { ioLimit <- true // wait - src, err = ioutil.ReadFile(filename) + src, err = os.ReadFile(filename) <-ioLimit // signal } if err != nil { @@ -1153,11 +1347,6 @@ func (ld *loader) parseFiles(filenames []string) ([]*ast.File, []error) { parsed := make([]*ast.File, n) errors := make([]error, n) for i, file := range filenames { - if ld.Config.Context.Err() != nil { - parsed[i] = nil - errors[i] = ld.Config.Context.Err() - continue - } wg.Add(1) go func(i int, filename string) { parsed[i], errors[i] = ld.parseFile(filename) @@ -1323,6 +1512,10 @@ func impliedLoadMode(loadMode LoadMode) LoadMode { // All these things require knowing the import graph. loadMode |= NeedImports } + if loadMode&NeedTypes != 0 { + // Types require the GoVersion from Module. + loadMode |= NeedModule + } return loadMode } diff --git a/vendor/golang.org/x/tools/go/packages/visit.go b/vendor/golang.org/x/tools/go/packages/visit.go index a1dcc40b..df14ffd9 100644 --- a/vendor/golang.org/x/tools/go/packages/visit.go +++ b/vendor/golang.org/x/tools/go/packages/visit.go @@ -49,11 +49,20 @@ func Visit(pkgs []*Package, pre func(*Package) bool, post func(*Package)) { // PrintErrors returns the number of errors printed. func PrintErrors(pkgs []*Package) int { var n int + errModules := make(map[*Module]bool) Visit(pkgs, nil, func(pkg *Package) { for _, err := range pkg.Errors { fmt.Fprintln(os.Stderr, err) n++ } + + // Print pkg.Module.Error once if present. + mod := pkg.Module + if mod != nil && mod.Error != nil && !errModules[mod] { + errModules[mod] = true + fmt.Fprintln(os.Stderr, mod.Error.Err) + n++ + } }) return n } diff --git a/vendor/golang.org/x/tools/go/types/objectpath/objectpath.go b/vendor/golang.org/x/tools/go/types/objectpath/objectpath.go index fa5834ba..a70b727f 100644 --- a/vendor/golang.org/x/tools/go/types/objectpath/objectpath.go +++ b/vendor/golang.org/x/tools/go/types/objectpath/objectpath.go @@ -26,15 +26,15 @@ package objectpath import ( "fmt" "go/types" - "sort" "strconv" "strings" - _ "unsafe" - "golang.org/x/tools/internal/typeparams" + "golang.org/x/tools/internal/aliases" "golang.org/x/tools/internal/typesinternal" ) +// TODO(adonovan): think about generic aliases. + // A Path is an opaque name that identifies a types.Object // relative to its package. Conceptually, the name consists of a // sequence of destructuring operations applied to the package scope @@ -51,7 +51,7 @@ type Path string // // PO package->object Package.Scope.Lookup // OT object->type Object.Type -// TT type->type Type.{Elem,Key,Params,Results,Underlying} [EKPRU] +// TT type->type Type.{Elem,Key,{,{,Recv}Type}Params,Results,Underlying,Rhs} [EKPRUTrCa] // TO type->object Type.{At,Field,Method,Obj} [AFMO] // // All valid paths start with a package and end at an object @@ -63,8 +63,8 @@ type Path string // - The only PO operator is Package.Scope.Lookup, which requires an identifier. // - The only OT operator is Object.Type, // which we encode as '.' because dot cannot appear in an identifier. -// - The TT operators are encoded as [EKPRUTC]; -// one of these (TypeParam) requires an integer operand, +// - The TT operators are encoded as [EKPRUTrCa]; +// two of these ({,Recv}TypeParams) require an integer operand, // which is encoded as a string of decimal digits. // - The TO operators are encoded as [AFMO]; // three of these (At,Field,Method) require an integer operand, @@ -98,19 +98,21 @@ const ( opType = '.' // .Type() (Object) // type->type operators - opElem = 'E' // .Elem() (Pointer, Slice, Array, Chan, Map) - opKey = 'K' // .Key() (Map) - opParams = 'P' // .Params() (Signature) - opResults = 'R' // .Results() (Signature) - opUnderlying = 'U' // .Underlying() (Named) - opTypeParam = 'T' // .TypeParams.At(i) (Named, Signature) - opConstraint = 'C' // .Constraint() (TypeParam) + opElem = 'E' // .Elem() (Pointer, Slice, Array, Chan, Map) + opKey = 'K' // .Key() (Map) + opParams = 'P' // .Params() (Signature) + opResults = 'R' // .Results() (Signature) + opUnderlying = 'U' // .Underlying() (Named) + opTypeParam = 'T' // .TypeParams.At(i) (Named, Signature) + opRecvTypeParam = 'r' // .RecvTypeParams.At(i) (Signature) + opConstraint = 'C' // .Constraint() (TypeParam) + opRhs = 'a' // .Rhs() (Alias) // type->object operators - opAt = 'A' // .At(i) (Tuple) - opField = 'F' // .Field(i) (Struct) - opMethod = 'M' // .Method(i) (Named or Interface; not Struct: "promoted" names are ignored) - opObj = 'O' // .Obj() (Named, TypeParam) + opAt = 'A' // .At(i) (Tuple) + opField = 'F' // .Field(i) (Struct) + opMethod = 'M' // .Method(i) (Named or Interface; not Struct: "promoted" names are ignored) + opObj = 'O' // .Obj() (Named, TypeParam) ) // For is equivalent to new(Encoder).For(obj). @@ -123,20 +125,7 @@ func For(obj types.Object) (Path, error) { // An Encoder amortizes the cost of encoding the paths of multiple objects. // The zero value of an Encoder is ready to use. type Encoder struct { - scopeMemo map[*types.Scope][]types.Object // memoization of scopeObjects - namedMethodsMemo map[*types.Named][]*types.Func // memoization of namedMethods() - skipMethodSorting bool -} - -// Expose back doors so that gopls can avoid method sorting, which can dominate -// analysis on certain repositories. -// -// TODO(golang/go#61443): remove this. -func init() { - typesinternal.SkipEncoderMethodSorting = func(enc interface{}) { - enc.(*Encoder).skipMethodSorting = true - } - typesinternal.ObjectpathObject = object + scopeMemo map[*types.Scope][]types.Object // memoization of scopeObjects } // For returns the path to an object relative to its package, @@ -239,7 +228,7 @@ func (enc *Encoder) For(obj types.Object) (Path, error) { // Reject obviously non-viable cases. switch obj := obj.(type) { case *types.TypeName: - if _, ok := obj.Type().(*typeparams.TypeParam); !ok { + if _, ok := types.Unalias(obj.Type()).(*types.TypeParam); !ok { // With the exception of type parameters, only package-level type names // have a path. return "", fmt.Errorf("no path for %v", obj) @@ -291,21 +280,26 @@ func (enc *Encoder) For(obj types.Object) (Path, error) { path = append(path, opType) T := o.Type() + if alias, ok := T.(*types.Alias); ok { + if r := findTypeParam(obj, aliases.TypeParams(alias), path, opTypeParam, nil); r != nil { + return Path(r), nil + } + if r := find(obj, aliases.Rhs(alias), append(path, opRhs), nil); r != nil { + return Path(r), nil + } - if tname.IsAlias() { - // type alias + } else if tname.IsAlias() { + // legacy alias if r := find(obj, T, path, nil); r != nil { return Path(r), nil } - } else { - if named, _ := T.(*types.Named); named != nil { - if r := findTypeParam(obj, typeparams.ForNamed(named), path, nil); r != nil { - // generic named type - return Path(r), nil - } - } + + } else if named, ok := T.(*types.Named); ok { // defined (named) type - if r := find(obj, T.Underlying(), append(path, opUnderlying), nil); r != nil { + if r := findTypeParam(obj, named.TypeParams(), path, opTypeParam, nil); r != nil { + return Path(r), nil + } + if r := find(obj, named.Underlying(), append(path, opUnderlying), nil); r != nil { return Path(r), nil } } @@ -326,33 +320,20 @@ func (enc *Encoder) For(obj types.Object) (Path, error) { } // Inspect declared methods of defined types. - if T, ok := o.Type().(*types.Named); ok { + if T, ok := types.Unalias(o.Type()).(*types.Named); ok { path = append(path, opType) - if !enc.skipMethodSorting { - // Note that method index here is always with respect - // to canonical ordering of methods, regardless of how - // they appear in the underlying type. - for i, m := range enc.namedMethods(T) { - path2 := appendOpArg(path, opMethod, i) - if m == obj { - return Path(path2), nil // found declared method - } - if r := find(obj, m.Type(), append(path2, opType), nil); r != nil { - return Path(r), nil - } + // The method index here is always with respect + // to the underlying go/types data structures, + // which ultimately derives from source order + // and must be preserved by export data. + for i := 0; i < T.NumMethods(); i++ { + m := T.Method(i) + path2 := appendOpArg(path, opMethod, i) + if m == obj { + return Path(path2), nil // found declared method } - } else { - // This branch must match the logic in the branch above, using go/types - // APIs without sorting. - for i := 0; i < T.NumMethods(); i++ { - m := T.Method(i) - path2 := appendOpArg(path, opMethod, i) - if m == obj { - return Path(path2), nil // found declared method - } - if r := find(obj, m.Type(), append(path2, opType), nil); r != nil { - return Path(r), nil - } + if r := find(obj, m.Type(), append(path2, opType), nil); r != nil { + return Path(r), nil } } } @@ -420,17 +401,12 @@ func (enc *Encoder) concreteMethod(meth *types.Func) (Path, bool) { // of objectpath will only be giving us origin methods, anyway, as referring // to instantiated methods is usually not useful. - if typeparams.OriginMethod(meth) != meth { + if meth.Origin() != meth { return "", false } - recvT := meth.Type().(*types.Signature).Recv().Type() - if ptr, ok := recvT.(*types.Pointer); ok { - recvT = ptr.Elem() - } - - named, ok := recvT.(*types.Named) - if !ok { + _, named := typesinternal.ReceiverNamed(meth.Type().(*types.Signature).Recv()) + if named == nil { return "", false } @@ -448,22 +424,13 @@ func (enc *Encoder) concreteMethod(meth *types.Func) (Path, bool) { path = append(path, name...) path = append(path, opType) - if !enc.skipMethodSorting { - for i, m := range enc.namedMethods(named) { - if m == meth { - path = appendOpArg(path, opMethod, i) - return Path(path), true - } - } - } else { - // This branch must match the logic of the branch above, using go/types - // APIs without sorting. - for i := 0; i < named.NumMethods(); i++ { - m := named.Method(i) - if m == meth { - path = appendOpArg(path, opMethod, i) - return Path(path), true - } + // Method indices are w.r.t. the go/types data structures, + // ultimately deriving from source order, + // which is preserved by export data. + for i := 0; i < named.NumMethods(); i++ { + if named.Method(i) == meth { + path = appendOpArg(path, opMethod, i) + return Path(path), true } } @@ -482,6 +449,8 @@ func (enc *Encoder) concreteMethod(meth *types.Func) (Path, bool) { // nil, it will be allocated as necessary. func find(obj types.Object, T types.Type, path []byte, seen map[*types.TypeName]bool) []byte { switch T := T.(type) { + case *types.Alias: + return find(obj, types.Unalias(T), path, seen) case *types.Basic, *types.Named: // Named types belonging to pkg were handled already, // so T must belong to another package. No path. @@ -500,7 +469,10 @@ func find(obj types.Object, T types.Type, path []byte, seen map[*types.TypeName] } return find(obj, T.Elem(), append(path, opElem), seen) case *types.Signature: - if r := findTypeParam(obj, typeparams.ForSignature(T), path, seen); r != nil { + if r := findTypeParam(obj, T.RecvTypeParams(), path, opRecvTypeParam, nil); r != nil { + return r + } + if r := findTypeParam(obj, T.TypeParams(), path, opTypeParam, seen); r != nil { return r } if r := find(obj, T.Params(), append(path, opParams), seen); r != nil { @@ -543,7 +515,7 @@ func find(obj types.Object, T types.Type, path []byte, seen map[*types.TypeName] } } return nil - case *typeparams.TypeParam: + case *types.TypeParam: name := T.Obj() if name == obj { return append(path, opObj) @@ -563,10 +535,10 @@ func find(obj types.Object, T types.Type, path []byte, seen map[*types.TypeName] panic(T) } -func findTypeParam(obj types.Object, list *typeparams.TypeParamList, path []byte, seen map[*types.TypeName]bool) []byte { +func findTypeParam(obj types.Object, list *types.TypeParamList, path []byte, op byte, seen map[*types.TypeName]bool) []byte { for i := 0; i < list.Len(); i++ { tparam := list.At(i) - path2 := appendOpArg(path, opTypeParam, i) + path2 := appendOpArg(path, op, i) if r := find(obj, tparam, path2, seen); r != nil { return r } @@ -576,12 +548,7 @@ func findTypeParam(obj types.Object, list *typeparams.TypeParamList, path []byte // Object returns the object denoted by path p within the package pkg. func Object(pkg *types.Package, p Path) (types.Object, error) { - return object(pkg, string(p), false) -} - -// Note: the skipMethodSorting parameter must match the value of -// Encoder.skipMethodSorting used during encoding. -func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.Object, error) { + pathstr := string(p) if pathstr == "" { return nil, fmt.Errorf("empty path") } @@ -605,7 +572,7 @@ func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.O } // abstraction of *types.{Named,Signature} type hasTypeParams interface { - TypeParams() *typeparams.TypeParamList + TypeParams() *types.TypeParamList } // abstraction of *types.{Named,TypeParam} type hasObj interface { @@ -623,10 +590,10 @@ func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.O code := suffix[0] suffix = suffix[1:] - // Codes [AFM] have an integer operand. + // Codes [AFMTr] have an integer operand. var index int switch code { - case opAt, opField, opMethod, opTypeParam: + case opAt, opField, opMethod, opTypeParam, opRecvTypeParam: rest := strings.TrimLeft(suffix, "0123456789") numerals := suffix[:len(suffix)-len(rest)] suffix = rest @@ -659,6 +626,7 @@ func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.O // Inv: t != nil, obj == nil + t = types.Unalias(t) switch code { case opElem: hasElem, ok := t.(hasElem) // Pointer, Slice, Array, Chan, Map @@ -695,6 +663,16 @@ func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.O } t = named.Underlying() + case opRhs: + if alias, ok := t.(*types.Alias); ok { + t = aliases.Rhs(alias) + } else if false && aliases.Enabled() { + // The Enabled check is too expensive, so for now we + // simply assume that aliases are not enabled. + // TODO(adonovan): replace with "if true {" when go1.24 is assured. + return nil, fmt.Errorf("cannot apply %q to %s (got %T, want alias)", code, t, t) + } + case opTypeParam: hasTypeParams, ok := t.(hasTypeParams) // Named, Signature if !ok { @@ -706,8 +684,19 @@ func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.O } t = tparams.At(index) + case opRecvTypeParam: + sig, ok := t.(*types.Signature) // Signature + if !ok { + return nil, fmt.Errorf("cannot apply %q to %s (got %T, want signature)", code, t, t) + } + rtparams := sig.RecvTypeParams() + if n := rtparams.Len(); index >= n { + return nil, fmt.Errorf("tuple index %d out of range [0-%d)", index, n) + } + t = rtparams.At(index) + case opConstraint: - tparam, ok := t.(*typeparams.TypeParam) + tparam, ok := t.(*types.TypeParam) if !ok { return nil, fmt.Errorf("cannot apply %q to %s (got %T, want type parameter)", code, t, t) } @@ -747,12 +736,7 @@ func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.O if index >= t.NumMethods() { return nil, fmt.Errorf("method index %d out of range [0-%d)", index, t.NumMethods()) } - if skipMethodSorting { - obj = t.Method(index) - } else { - methods := namedMethods(t) // (unmemoized) - obj = methods[index] // Id-ordered - } + obj = t.Method(index) default: return nil, fmt.Errorf("cannot apply %q to %s (got %T, want interface or named)", code, t, t) @@ -772,6 +756,10 @@ func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.O } } + if obj == nil { + panic(p) // path does not end in an object-valued operator + } + if obj.Pkg() != pkg { return nil, fmt.Errorf("path denotes %s, which belongs to a different package", obj) } @@ -779,33 +767,6 @@ func object(pkg *types.Package, pathstr string, skipMethodSorting bool) (types.O return obj, nil // success } -// namedMethods returns the methods of a Named type in ascending Id order. -func namedMethods(named *types.Named) []*types.Func { - methods := make([]*types.Func, named.NumMethods()) - for i := range methods { - methods[i] = named.Method(i) - } - sort.Slice(methods, func(i, j int) bool { - return methods[i].Id() < methods[j].Id() - }) - return methods -} - -// namedMethods is a memoization of the namedMethods function. Callers must not modify the result. -func (enc *Encoder) namedMethods(named *types.Named) []*types.Func { - m := enc.namedMethodsMemo - if m == nil { - m = make(map[*types.Named][]*types.Func) - enc.namedMethodsMemo = m - } - methods, ok := m[named] - if !ok { - methods = namedMethods(named) // allocates and sorts - m[named] = methods - } - return methods -} - // scopeObjects is a memoization of scope objects. // Callers must not modify the result. func (enc *Encoder) scopeObjects(scope *types.Scope) []types.Object { diff --git a/vendor/golang.org/x/tools/go/types/typeutil/callee.go b/vendor/golang.org/x/tools/go/types/typeutil/callee.go new file mode 100644 index 00000000..75438035 --- /dev/null +++ b/vendor/golang.org/x/tools/go/types/typeutil/callee.go @@ -0,0 +1,68 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typeutil + +import ( + "go/ast" + "go/types" + + "golang.org/x/tools/internal/typeparams" +) + +// Callee returns the named target of a function call, if any: +// a function, method, builtin, or variable. +// +// Functions and methods may potentially have type parameters. +func Callee(info *types.Info, call *ast.CallExpr) types.Object { + fun := ast.Unparen(call.Fun) + + // Look through type instantiation if necessary. + isInstance := false + switch fun.(type) { + case *ast.IndexExpr, *ast.IndexListExpr: + // When extracting the callee from an *IndexExpr, we need to check that + // it is a *types.Func and not a *types.Var. + // Example: Don't match a slice m within the expression `m[0]()`. + isInstance = true + fun, _, _, _ = typeparams.UnpackIndexExpr(fun) + } + + var obj types.Object + switch fun := fun.(type) { + case *ast.Ident: + obj = info.Uses[fun] // type, var, builtin, or declared func + case *ast.SelectorExpr: + if sel, ok := info.Selections[fun]; ok { + obj = sel.Obj() // method or field + } else { + obj = info.Uses[fun.Sel] // qualified identifier? + } + } + if _, ok := obj.(*types.TypeName); ok { + return nil // T(x) is a conversion, not a call + } + // A Func is required to match instantiations. + if _, ok := obj.(*types.Func); isInstance && !ok { + return nil // Was not a Func. + } + return obj +} + +// StaticCallee returns the target (function or method) of a static function +// call, if any. It returns nil for calls to builtins. +// +// Note: for calls of instantiated functions and methods, StaticCallee returns +// the corresponding generic function or method on the generic type. +func StaticCallee(info *types.Info, call *ast.CallExpr) *types.Func { + if f, ok := Callee(info, call).(*types.Func); ok && !interfaceMethod(f) { + return f + } + return nil +} + +func interfaceMethod(f *types.Func) bool { + recv := f.Type().(*types.Signature).Recv() + return recv != nil && types.IsInterface(recv.Type()) +} diff --git a/vendor/golang.org/x/tools/go/types/typeutil/imports.go b/vendor/golang.org/x/tools/go/types/typeutil/imports.go new file mode 100644 index 00000000..b81ce0c3 --- /dev/null +++ b/vendor/golang.org/x/tools/go/types/typeutil/imports.go @@ -0,0 +1,30 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typeutil + +import "go/types" + +// Dependencies returns all dependencies of the specified packages. +// +// Dependent packages appear in topological order: if package P imports +// package Q, Q appears earlier than P in the result. +// The algorithm follows import statements in the order they +// appear in the source code, so the result is a total order. +func Dependencies(pkgs ...*types.Package) []*types.Package { + var result []*types.Package + seen := make(map[*types.Package]bool) + var visit func(pkgs []*types.Package) + visit = func(pkgs []*types.Package) { + for _, p := range pkgs { + if !seen[p] { + seen[p] = true + visit(p.Imports()) + result = append(result, p) + } + } + } + visit(pkgs) + return result +} diff --git a/vendor/golang.org/x/tools/go/types/typeutil/map.go b/vendor/golang.org/x/tools/go/types/typeutil/map.go new file mode 100644 index 00000000..8d824f71 --- /dev/null +++ b/vendor/golang.org/x/tools/go/types/typeutil/map.go @@ -0,0 +1,517 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package typeutil defines various utilities for types, such as Map, +// a mapping from types.Type to any values. +package typeutil // import "golang.org/x/tools/go/types/typeutil" + +import ( + "bytes" + "fmt" + "go/types" + "reflect" + + "golang.org/x/tools/internal/typeparams" +) + +// Map is a hash-table-based mapping from types (types.Type) to +// arbitrary any values. The concrete types that implement +// the Type interface are pointers. Since they are not canonicalized, +// == cannot be used to check for equivalence, and thus we cannot +// simply use a Go map. +// +// Just as with map[K]V, a nil *Map is a valid empty map. +// +// Not thread-safe. +type Map struct { + hasher Hasher // shared by many Maps + table map[uint32][]entry // maps hash to bucket; entry.key==nil means unused + length int // number of map entries +} + +// entry is an entry (key/value association) in a hash bucket. +type entry struct { + key types.Type + value any +} + +// SetHasher sets the hasher used by Map. +// +// All Hashers are functionally equivalent but contain internal state +// used to cache the results of hashing previously seen types. +// +// A single Hasher created by MakeHasher() may be shared among many +// Maps. This is recommended if the instances have many keys in +// common, as it will amortize the cost of hash computation. +// +// A Hasher may grow without bound as new types are seen. Even when a +// type is deleted from the map, the Hasher never shrinks, since other +// types in the map may reference the deleted type indirectly. +// +// Hashers are not thread-safe, and read-only operations such as +// Map.Lookup require updates to the hasher, so a full Mutex lock (not a +// read-lock) is require around all Map operations if a shared +// hasher is accessed from multiple threads. +// +// If SetHasher is not called, the Map will create a private hasher at +// the first call to Insert. +func (m *Map) SetHasher(hasher Hasher) { + m.hasher = hasher +} + +// Delete removes the entry with the given key, if any. +// It returns true if the entry was found. +func (m *Map) Delete(key types.Type) bool { + if m != nil && m.table != nil { + hash := m.hasher.Hash(key) + bucket := m.table[hash] + for i, e := range bucket { + if e.key != nil && types.Identical(key, e.key) { + // We can't compact the bucket as it + // would disturb iterators. + bucket[i] = entry{} + m.length-- + return true + } + } + } + return false +} + +// At returns the map entry for the given key. +// The result is nil if the entry is not present. +func (m *Map) At(key types.Type) any { + if m != nil && m.table != nil { + for _, e := range m.table[m.hasher.Hash(key)] { + if e.key != nil && types.Identical(key, e.key) { + return e.value + } + } + } + return nil +} + +// Set sets the map entry for key to val, +// and returns the previous entry, if any. +func (m *Map) Set(key types.Type, value any) (prev any) { + if m.table != nil { + hash := m.hasher.Hash(key) + bucket := m.table[hash] + var hole *entry + for i, e := range bucket { + if e.key == nil { + hole = &bucket[i] + } else if types.Identical(key, e.key) { + prev = e.value + bucket[i].value = value + return + } + } + + if hole != nil { + *hole = entry{key, value} // overwrite deleted entry + } else { + m.table[hash] = append(bucket, entry{key, value}) + } + } else { + if m.hasher.memo == nil { + m.hasher = MakeHasher() + } + hash := m.hasher.Hash(key) + m.table = map[uint32][]entry{hash: {entry{key, value}}} + } + + m.length++ + return +} + +// Len returns the number of map entries. +func (m *Map) Len() int { + if m != nil { + return m.length + } + return 0 +} + +// Iterate calls function f on each entry in the map in unspecified order. +// +// If f should mutate the map, Iterate provides the same guarantees as +// Go maps: if f deletes a map entry that Iterate has not yet reached, +// f will not be invoked for it, but if f inserts a map entry that +// Iterate has not yet reached, whether or not f will be invoked for +// it is unspecified. +func (m *Map) Iterate(f func(key types.Type, value any)) { + if m != nil { + for _, bucket := range m.table { + for _, e := range bucket { + if e.key != nil { + f(e.key, e.value) + } + } + } + } +} + +// Keys returns a new slice containing the set of map keys. +// The order is unspecified. +func (m *Map) Keys() []types.Type { + keys := make([]types.Type, 0, m.Len()) + m.Iterate(func(key types.Type, _ any) { + keys = append(keys, key) + }) + return keys +} + +func (m *Map) toString(values bool) string { + if m == nil { + return "{}" + } + var buf bytes.Buffer + fmt.Fprint(&buf, "{") + sep := "" + m.Iterate(func(key types.Type, value any) { + fmt.Fprint(&buf, sep) + sep = ", " + fmt.Fprint(&buf, key) + if values { + fmt.Fprintf(&buf, ": %q", value) + } + }) + fmt.Fprint(&buf, "}") + return buf.String() +} + +// String returns a string representation of the map's entries. +// Values are printed using fmt.Sprintf("%v", v). +// Order is unspecified. +func (m *Map) String() string { + return m.toString(true) +} + +// KeysString returns a string representation of the map's key set. +// Order is unspecified. +func (m *Map) KeysString() string { + return m.toString(false) +} + +//////////////////////////////////////////////////////////////////////// +// Hasher + +// A Hasher maps each type to its hash value. +// For efficiency, a hasher uses memoization; thus its memory +// footprint grows monotonically over time. +// Hashers are not thread-safe. +// Hashers have reference semantics. +// Call MakeHasher to create a Hasher. +type Hasher struct { + memo map[types.Type]uint32 + + // ptrMap records pointer identity. + ptrMap map[any]uint32 + + // sigTParams holds type parameters from the signature being hashed. + // Signatures are considered identical modulo renaming of type parameters, so + // within the scope of a signature type the identity of the signature's type + // parameters is just their index. + // + // Since the language does not currently support referring to uninstantiated + // generic types or functions, and instantiated signatures do not have type + // parameter lists, we should never encounter a second non-empty type + // parameter list when hashing a generic signature. + sigTParams *types.TypeParamList +} + +// MakeHasher returns a new Hasher instance. +func MakeHasher() Hasher { + return Hasher{ + memo: make(map[types.Type]uint32), + ptrMap: make(map[any]uint32), + sigTParams: nil, + } +} + +// Hash computes a hash value for the given type t such that +// Identical(t, t') => Hash(t) == Hash(t'). +func (h Hasher) Hash(t types.Type) uint32 { + hash, ok := h.memo[t] + if !ok { + hash = h.hashFor(t) + h.memo[t] = hash + } + return hash +} + +// hashString computes the Fowler–Noll–Vo hash of s. +func hashString(s string) uint32 { + var h uint32 + for i := 0; i < len(s); i++ { + h ^= uint32(s[i]) + h *= 16777619 + } + return h +} + +// hashFor computes the hash of t. +func (h Hasher) hashFor(t types.Type) uint32 { + // See Identical for rationale. + switch t := t.(type) { + case *types.Basic: + return uint32(t.Kind()) + + case *types.Alias: + return h.Hash(types.Unalias(t)) + + case *types.Array: + return 9043 + 2*uint32(t.Len()) + 3*h.Hash(t.Elem()) + + case *types.Slice: + return 9049 + 2*h.Hash(t.Elem()) + + case *types.Struct: + var hash uint32 = 9059 + for i, n := 0, t.NumFields(); i < n; i++ { + f := t.Field(i) + if f.Anonymous() { + hash += 8861 + } + hash += hashString(t.Tag(i)) + hash += hashString(f.Name()) // (ignore f.Pkg) + hash += h.Hash(f.Type()) + } + return hash + + case *types.Pointer: + return 9067 + 2*h.Hash(t.Elem()) + + case *types.Signature: + var hash uint32 = 9091 + if t.Variadic() { + hash *= 8863 + } + + // Use a separate hasher for types inside of the signature, where type + // parameter identity is modified to be (index, constraint). We must use a + // new memo for this hasher as type identity may be affected by this + // masking. For example, in func[T any](*T), the identity of *T depends on + // whether we are mapping the argument in isolation, or recursively as part + // of hashing the signature. + // + // We should never encounter a generic signature while hashing another + // generic signature, but defensively set sigTParams only if h.mask is + // unset. + tparams := t.TypeParams() + if h.sigTParams == nil && tparams.Len() != 0 { + h = Hasher{ + // There may be something more efficient than discarding the existing + // memo, but it would require detecting whether types are 'tainted' by + // references to type parameters. + memo: make(map[types.Type]uint32), + // Re-using ptrMap ensures that pointer identity is preserved in this + // hasher. + ptrMap: h.ptrMap, + sigTParams: tparams, + } + } + + for i := 0; i < tparams.Len(); i++ { + tparam := tparams.At(i) + hash += 7 * h.Hash(tparam.Constraint()) + } + + return hash + 3*h.hashTuple(t.Params()) + 5*h.hashTuple(t.Results()) + + case *types.Union: + return h.hashUnion(t) + + case *types.Interface: + // Interfaces are identical if they have the same set of methods, with + // identical names and types, and they have the same set of type + // restrictions. See go/types.identical for more details. + var hash uint32 = 9103 + + // Hash methods. + for i, n := 0, t.NumMethods(); i < n; i++ { + // Method order is not significant. + // Ignore m.Pkg(). + m := t.Method(i) + // Use shallow hash on method signature to + // avoid anonymous interface cycles. + hash += 3*hashString(m.Name()) + 5*h.shallowHash(m.Type()) + } + + // Hash type restrictions. + terms, err := typeparams.InterfaceTermSet(t) + // if err != nil t has invalid type restrictions. + if err == nil { + hash += h.hashTermSet(terms) + } + + return hash + + case *types.Map: + return 9109 + 2*h.Hash(t.Key()) + 3*h.Hash(t.Elem()) + + case *types.Chan: + return 9127 + 2*uint32(t.Dir()) + 3*h.Hash(t.Elem()) + + case *types.Named: + hash := h.hashPtr(t.Obj()) + targs := t.TypeArgs() + for i := 0; i < targs.Len(); i++ { + targ := targs.At(i) + hash += 2 * h.Hash(targ) + } + return hash + + case *types.TypeParam: + return h.hashTypeParam(t) + + case *types.Tuple: + return h.hashTuple(t) + } + + panic(fmt.Sprintf("%T: %v", t, t)) +} + +func (h Hasher) hashTuple(tuple *types.Tuple) uint32 { + // See go/types.identicalTypes for rationale. + n := tuple.Len() + hash := 9137 + 2*uint32(n) + for i := 0; i < n; i++ { + hash += 3 * h.Hash(tuple.At(i).Type()) + } + return hash +} + +func (h Hasher) hashUnion(t *types.Union) uint32 { + // Hash type restrictions. + terms, err := typeparams.UnionTermSet(t) + // if err != nil t has invalid type restrictions. Fall back on a non-zero + // hash. + if err != nil { + return 9151 + } + return h.hashTermSet(terms) +} + +func (h Hasher) hashTermSet(terms []*types.Term) uint32 { + hash := 9157 + 2*uint32(len(terms)) + for _, term := range terms { + // term order is not significant. + termHash := h.Hash(term.Type()) + if term.Tilde() { + termHash *= 9161 + } + hash += 3 * termHash + } + return hash +} + +// hashTypeParam returns a hash of the type parameter t, with a hash value +// depending on whether t is contained in h.sigTParams. +// +// If h.sigTParams is set and contains t, then we are in the process of hashing +// a signature, and the hash value of t must depend only on t's index and +// constraint: signatures are considered identical modulo type parameter +// renaming. To avoid infinite recursion, we only hash the type parameter +// index, and rely on types.Identical to handle signatures where constraints +// are not identical. +// +// Otherwise the hash of t depends only on t's pointer identity. +func (h Hasher) hashTypeParam(t *types.TypeParam) uint32 { + if h.sigTParams != nil { + i := t.Index() + if i >= 0 && i < h.sigTParams.Len() && t == h.sigTParams.At(i) { + return 9173 + 3*uint32(i) + } + } + return h.hashPtr(t.Obj()) +} + +// hashPtr hashes the pointer identity of ptr. It uses h.ptrMap to ensure that +// pointers values are not dependent on the GC. +func (h Hasher) hashPtr(ptr any) uint32 { + if hash, ok := h.ptrMap[ptr]; ok { + return hash + } + hash := uint32(reflect.ValueOf(ptr).Pointer()) + h.ptrMap[ptr] = hash + return hash +} + +// shallowHash computes a hash of t without looking at any of its +// element Types, to avoid potential anonymous cycles in the types of +// interface methods. +// +// When an unnamed non-empty interface type appears anywhere among the +// arguments or results of an interface method, there is a potential +// for endless recursion. Consider: +// +// type X interface { m() []*interface { X } } +// +// The problem is that the Methods of the interface in m's result type +// include m itself; there is no mention of the named type X that +// might help us break the cycle. +// (See comment in go/types.identical, case *Interface, for more.) +func (h Hasher) shallowHash(t types.Type) uint32 { + // t is the type of an interface method (Signature), + // its params or results (Tuples), or their immediate + // elements (mostly Slice, Pointer, Basic, Named), + // so there's no need to optimize anything else. + switch t := t.(type) { + case *types.Alias: + return h.shallowHash(types.Unalias(t)) + + case *types.Signature: + var hash uint32 = 604171 + if t.Variadic() { + hash *= 971767 + } + // The Signature/Tuple recursion is always finite + // and invariably shallow. + return hash + 1062599*h.shallowHash(t.Params()) + 1282529*h.shallowHash(t.Results()) + + case *types.Tuple: + n := t.Len() + hash := 9137 + 2*uint32(n) + for i := 0; i < n; i++ { + hash += 53471161 * h.shallowHash(t.At(i).Type()) + } + return hash + + case *types.Basic: + return 45212177 * uint32(t.Kind()) + + case *types.Array: + return 1524181 + 2*uint32(t.Len()) + + case *types.Slice: + return 2690201 + + case *types.Struct: + return 3326489 + + case *types.Pointer: + return 4393139 + + case *types.Union: + return 562448657 + + case *types.Interface: + return 2124679 // no recursion here + + case *types.Map: + return 9109 + + case *types.Chan: + return 9127 + + case *types.Named: + return h.hashPtr(t.Obj()) + + case *types.TypeParam: + return h.hashPtr(t.Obj()) + } + panic(fmt.Sprintf("shallowHash: %T: %v", t, t)) +} diff --git a/vendor/golang.org/x/tools/go/types/typeutil/methodsetcache.go b/vendor/golang.org/x/tools/go/types/typeutil/methodsetcache.go new file mode 100644 index 00000000..f7666028 --- /dev/null +++ b/vendor/golang.org/x/tools/go/types/typeutil/methodsetcache.go @@ -0,0 +1,71 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file implements a cache of method sets. + +package typeutil + +import ( + "go/types" + "sync" +) + +// A MethodSetCache records the method set of each type T for which +// MethodSet(T) is called so that repeat queries are fast. +// The zero value is a ready-to-use cache instance. +type MethodSetCache struct { + mu sync.Mutex + named map[*types.Named]struct{ value, pointer *types.MethodSet } // method sets for named N and *N + others map[types.Type]*types.MethodSet // all other types +} + +// MethodSet returns the method set of type T. It is thread-safe. +// +// If cache is nil, this function is equivalent to types.NewMethodSet(T). +// Utility functions can thus expose an optional *MethodSetCache +// parameter to clients that care about performance. +func (cache *MethodSetCache) MethodSet(T types.Type) *types.MethodSet { + if cache == nil { + return types.NewMethodSet(T) + } + cache.mu.Lock() + defer cache.mu.Unlock() + + switch T := types.Unalias(T).(type) { + case *types.Named: + return cache.lookupNamed(T).value + + case *types.Pointer: + if N, ok := types.Unalias(T.Elem()).(*types.Named); ok { + return cache.lookupNamed(N).pointer + } + } + + // all other types + // (The map uses pointer equivalence, not type identity.) + mset := cache.others[T] + if mset == nil { + mset = types.NewMethodSet(T) + if cache.others == nil { + cache.others = make(map[types.Type]*types.MethodSet) + } + cache.others[T] = mset + } + return mset +} + +func (cache *MethodSetCache) lookupNamed(named *types.Named) struct{ value, pointer *types.MethodSet } { + if cache.named == nil { + cache.named = make(map[*types.Named]struct{ value, pointer *types.MethodSet }) + } + // Avoid recomputing mset(*T) for each distinct Pointer + // instance whose underlying type is a named type. + msets, ok := cache.named[named] + if !ok { + msets.value = types.NewMethodSet(named) + msets.pointer = types.NewMethodSet(types.NewPointer(named)) + cache.named[named] = msets + } + return msets +} diff --git a/vendor/golang.org/x/tools/go/types/typeutil/ui.go b/vendor/golang.org/x/tools/go/types/typeutil/ui.go new file mode 100644 index 00000000..9dda6a25 --- /dev/null +++ b/vendor/golang.org/x/tools/go/types/typeutil/ui.go @@ -0,0 +1,53 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typeutil + +// This file defines utilities for user interfaces that display types. + +import ( + "go/types" +) + +// IntuitiveMethodSet returns the intuitive method set of a type T, +// which is the set of methods you can call on an addressable value of +// that type. +// +// The result always contains MethodSet(T), and is exactly MethodSet(T) +// for interface types and for pointer-to-concrete types. +// For all other concrete types T, the result additionally +// contains each method belonging to *T if there is no identically +// named method on T itself. +// +// This corresponds to user intuition about method sets; +// this function is intended only for user interfaces. +// +// The order of the result is as for types.MethodSet(T). +func IntuitiveMethodSet(T types.Type, msets *MethodSetCache) []*types.Selection { + isPointerToConcrete := func(T types.Type) bool { + ptr, ok := types.Unalias(T).(*types.Pointer) + return ok && !types.IsInterface(ptr.Elem()) + } + + var result []*types.Selection + mset := msets.MethodSet(T) + if types.IsInterface(T) || isPointerToConcrete(T) { + for i, n := 0, mset.Len(); i < n; i++ { + result = append(result, mset.At(i)) + } + } else { + // T is some other concrete type. + // Report methods of T and *T, preferring those of T. + pmset := msets.MethodSet(types.NewPointer(T)) + for i, n := 0, pmset.Len(); i < n; i++ { + meth := pmset.At(i) + if m := mset.Lookup(meth.Obj().Pkg(), meth.Obj().Name()); m != nil { + meth = m + } + result = append(result, meth) + } + + } + return result +} diff --git a/vendor/golang.org/x/tools/imports/forward.go b/vendor/golang.org/x/tools/imports/forward.go index d2547c74..cb6db889 100644 --- a/vendor/golang.org/x/tools/imports/forward.go +++ b/vendor/golang.org/x/tools/imports/forward.go @@ -7,8 +7,8 @@ package imports // import "golang.org/x/tools/imports" import ( - "io/ioutil" "log" + "os" "golang.org/x/tools/internal/gocommand" intimp "golang.org/x/tools/internal/imports" @@ -44,7 +44,7 @@ var LocalPrefix string func Process(filename string, src []byte, opt *Options) ([]byte, error) { var err error if src == nil { - src, err = ioutil.ReadFile(filename) + src, err = os.ReadFile(filename) if err != nil { return nil, err } diff --git a/vendor/golang.org/x/tools/internal/aliases/aliases.go b/vendor/golang.org/x/tools/internal/aliases/aliases.go new file mode 100644 index 00000000..b9425f5a --- /dev/null +++ b/vendor/golang.org/x/tools/internal/aliases/aliases.go @@ -0,0 +1,38 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package aliases + +import ( + "go/token" + "go/types" +) + +// Package aliases defines backward compatible shims +// for the types.Alias type representation added in 1.22. +// This defines placeholders for x/tools until 1.26. + +// NewAlias creates a new TypeName in Package pkg that +// is an alias for the type rhs. +// +// The enabled parameter determines whether the resulting [TypeName]'s +// type is an [types.Alias]. Its value must be the result of a call to +// [Enabled], which computes the effective value of +// GODEBUG=gotypesalias=... by invoking the type checker. The Enabled +// function is expensive and should be called once per task (e.g. +// package import), not once per call to NewAlias. +// +// Precondition: enabled || len(tparams)==0. +// If materialized aliases are disabled, there must not be any type parameters. +func NewAlias(enabled bool, pos token.Pos, pkg *types.Package, name string, rhs types.Type, tparams []*types.TypeParam) *types.TypeName { + if enabled { + tname := types.NewTypeName(pos, pkg, name, nil) + SetTypeParams(types.NewAlias(tname, rhs), tparams) + return tname + } + if len(tparams) > 0 { + panic("cannot create an alias with type parameters when gotypesalias is not enabled") + } + return types.NewTypeName(pos, pkg, name, rhs) +} diff --git a/vendor/golang.org/x/tools/internal/aliases/aliases_go122.go b/vendor/golang.org/x/tools/internal/aliases/aliases_go122.go new file mode 100644 index 00000000..7716a333 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/aliases/aliases_go122.go @@ -0,0 +1,80 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package aliases + +import ( + "go/ast" + "go/parser" + "go/token" + "go/types" +) + +// Rhs returns the type on the right-hand side of the alias declaration. +func Rhs(alias *types.Alias) types.Type { + if alias, ok := any(alias).(interface{ Rhs() types.Type }); ok { + return alias.Rhs() // go1.23+ + } + + // go1.22's Alias didn't have the Rhs method, + // so Unalias is the best we can do. + return types.Unalias(alias) +} + +// TypeParams returns the type parameter list of the alias. +func TypeParams(alias *types.Alias) *types.TypeParamList { + if alias, ok := any(alias).(interface{ TypeParams() *types.TypeParamList }); ok { + return alias.TypeParams() // go1.23+ + } + return nil +} + +// SetTypeParams sets the type parameters of the alias type. +func SetTypeParams(alias *types.Alias, tparams []*types.TypeParam) { + if alias, ok := any(alias).(interface { + SetTypeParams(tparams []*types.TypeParam) + }); ok { + alias.SetTypeParams(tparams) // go1.23+ + } else if len(tparams) > 0 { + panic("cannot set type parameters of an Alias type in go1.22") + } +} + +// TypeArgs returns the type arguments used to instantiate the Alias type. +func TypeArgs(alias *types.Alias) *types.TypeList { + if alias, ok := any(alias).(interface{ TypeArgs() *types.TypeList }); ok { + return alias.TypeArgs() // go1.23+ + } + return nil // empty (go1.22) +} + +// Origin returns the generic Alias type of which alias is an instance. +// If alias is not an instance of a generic alias, Origin returns alias. +func Origin(alias *types.Alias) *types.Alias { + if alias, ok := any(alias).(interface{ Origin() *types.Alias }); ok { + return alias.Origin() // go1.23+ + } + return alias // not an instance of a generic alias (go1.22) +} + +// Enabled reports whether [NewAlias] should create [types.Alias] types. +// +// This function is expensive! Call it sparingly. +func Enabled() bool { + // The only reliable way to compute the answer is to invoke go/types. + // We don't parse the GODEBUG environment variable, because + // (a) it's tricky to do so in a manner that is consistent + // with the godebug package; in particular, a simple + // substring check is not good enough. The value is a + // rightmost-wins list of options. But more importantly: + // (b) it is impossible to detect changes to the effective + // setting caused by os.Setenv("GODEBUG"), as happens in + // many tests. Therefore any attempt to cache the result + // is just incorrect. + fset := token.NewFileSet() + f, _ := parser.ParseFile(fset, "a.go", "package p; type A = int", parser.SkipObjectResolution) + pkg, _ := new(types.Config).Check("p", fset, []*ast.File{f}, nil) + _, enabled := pkg.Scope().Lookup("A").Type().(*types.Alias) + return enabled +} diff --git a/vendor/golang.org/x/tools/internal/event/keys/util.go b/vendor/golang.org/x/tools/internal/event/keys/util.go new file mode 100644 index 00000000..c0e8e731 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/event/keys/util.go @@ -0,0 +1,21 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package keys + +import ( + "sort" + "strings" +) + +// Join returns a canonical join of the keys in S: +// a sorted comma-separated string list. +func Join[S ~[]T, T ~string](s S) string { + strs := make([]string, 0, len(s)) + for _, v := range s { + strs = append(strs, string(v)) + } + sort.Strings(strs) + return strings.Join(strs, ",") +} diff --git a/vendor/golang.org/x/tools/internal/event/tag/tag.go b/vendor/golang.org/x/tools/internal/event/tag/tag.go deleted file mode 100644 index 581b26c2..00000000 --- a/vendor/golang.org/x/tools/internal/event/tag/tag.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package tag provides the labels used for telemetry throughout gopls. -package tag - -import ( - "golang.org/x/tools/internal/event/keys" -) - -var ( - // create the label keys we use - Method = keys.NewString("method", "") - StatusCode = keys.NewString("status.code", "") - StatusMessage = keys.NewString("status.message", "") - RPCID = keys.NewString("id", "") - RPCDirection = keys.NewString("direction", "") - File = keys.NewString("file", "") - Directory = keys.New("directory", "") - URI = keys.New("URI", "") - Package = keys.NewString("package", "") // sorted comma-separated list of Package IDs - PackagePath = keys.NewString("package_path", "") - Query = keys.New("query", "") - Snapshot = keys.NewUInt64("snapshot", "") - Operation = keys.NewString("operation", "") - - Position = keys.New("position", "") - Category = keys.NewString("category", "") - PackageCount = keys.NewInt("packages", "") - Files = keys.New("files", "") - Port = keys.NewInt("port", "") - Type = keys.New("type", "") - HoverKind = keys.NewString("hoverkind", "") - - NewServer = keys.NewString("new_server", "A new server was added") - EndServer = keys.NewString("end_server", "A server was shut down") - - ServerID = keys.NewString("server", "The server ID an event is related to") - Logfile = keys.NewString("logfile", "") - DebugAddress = keys.NewString("debug_address", "") - GoplsPath = keys.NewString("gopls_path", "") - ClientID = keys.NewString("client_id", "") - - Level = keys.NewInt("level", "The logging level") -) - -var ( - // create the stats we measure - Started = keys.NewInt64("started", "Count of started RPCs.") - ReceivedBytes = keys.NewInt64("received_bytes", "Bytes received.") //, unit.Bytes) - SentBytes = keys.NewInt64("sent_bytes", "Bytes sent.") //, unit.Bytes) - Latency = keys.NewFloat64("latency_ms", "Elapsed time in milliseconds") //, unit.Milliseconds) -) - -const ( - Inbound = "in" - Outbound = "out" -) diff --git a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk.go b/vendor/golang.org/x/tools/internal/fastwalk/fastwalk.go deleted file mode 100644 index c40c7e93..00000000 --- a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk.go +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package fastwalk provides a faster version of [filepath.Walk] for file system -// scanning tools. -package fastwalk - -import ( - "errors" - "os" - "path/filepath" - "runtime" - "sync" -) - -// ErrTraverseLink is used as a return value from WalkFuncs to indicate that the -// symlink named in the call may be traversed. -var ErrTraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory") - -// ErrSkipFiles is a used as a return value from WalkFuncs to indicate that the -// callback should not be called for any other files in the current directory. -// Child directories will still be traversed. -var ErrSkipFiles = errors.New("fastwalk: skip remaining files in directory") - -// Walk is a faster implementation of [filepath.Walk]. -// -// [filepath.Walk]'s design necessarily calls [os.Lstat] on each file, -// even if the caller needs less info. -// Many tools need only the type of each file. -// On some platforms, this information is provided directly by the readdir -// system call, avoiding the need to stat each file individually. -// fastwalk_unix.go contains a fork of the syscall routines. -// -// See golang.org/issue/16399. -// -// Walk walks the file tree rooted at root, calling walkFn for -// each file or directory in the tree, including root. -// -// If Walk returns [filepath.SkipDir], the directory is skipped. -// -// Unlike [filepath.Walk]: -// - file stat calls must be done by the user. -// The only provided metadata is the file type, which does not include -// any permission bits. -// - multiple goroutines stat the filesystem concurrently. The provided -// walkFn must be safe for concurrent use. -// - Walk can follow symlinks if walkFn returns the TraverseLink -// sentinel error. It is the walkFn's responsibility to prevent -// Walk from going into symlink cycles. -func Walk(root string, walkFn func(path string, typ os.FileMode) error) error { - // TODO(bradfitz): make numWorkers configurable? We used a - // minimum of 4 to give the kernel more info about multiple - // things we want, in hopes its I/O scheduling can take - // advantage of that. Hopefully most are in cache. Maybe 4 is - // even too low of a minimum. Profile more. - numWorkers := 4 - if n := runtime.NumCPU(); n > numWorkers { - numWorkers = n - } - - // Make sure to wait for all workers to finish, otherwise - // walkFn could still be called after returning. This Wait call - // runs after close(e.donec) below. - var wg sync.WaitGroup - defer wg.Wait() - - w := &walker{ - fn: walkFn, - enqueuec: make(chan walkItem, numWorkers), // buffered for performance - workc: make(chan walkItem, numWorkers), // buffered for performance - donec: make(chan struct{}), - - // buffered for correctness & not leaking goroutines: - resc: make(chan error, numWorkers), - } - defer close(w.donec) - - for i := 0; i < numWorkers; i++ { - wg.Add(1) - go w.doWork(&wg) - } - todo := []walkItem{{dir: root}} - out := 0 - for { - workc := w.workc - var workItem walkItem - if len(todo) == 0 { - workc = nil - } else { - workItem = todo[len(todo)-1] - } - select { - case workc <- workItem: - todo = todo[:len(todo)-1] - out++ - case it := <-w.enqueuec: - todo = append(todo, it) - case err := <-w.resc: - out-- - if err != nil { - return err - } - if out == 0 && len(todo) == 0 { - // It's safe to quit here, as long as the buffered - // enqueue channel isn't also readable, which might - // happen if the worker sends both another unit of - // work and its result before the other select was - // scheduled and both w.resc and w.enqueuec were - // readable. - select { - case it := <-w.enqueuec: - todo = append(todo, it) - default: - return nil - } - } - } - } -} - -// doWork reads directories as instructed (via workc) and runs the -// user's callback function. -func (w *walker) doWork(wg *sync.WaitGroup) { - defer wg.Done() - for { - select { - case <-w.donec: - return - case it := <-w.workc: - select { - case <-w.donec: - return - case w.resc <- w.walk(it.dir, !it.callbackDone): - } - } - } -} - -type walker struct { - fn func(path string, typ os.FileMode) error - - donec chan struct{} // closed on fastWalk's return - workc chan walkItem // to workers - enqueuec chan walkItem // from workers - resc chan error // from workers -} - -type walkItem struct { - dir string - callbackDone bool // callback already called; don't do it again -} - -func (w *walker) enqueue(it walkItem) { - select { - case w.enqueuec <- it: - case <-w.donec: - } -} - -func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { - joined := dirName + string(os.PathSeparator) + baseName - if typ == os.ModeDir { - w.enqueue(walkItem{dir: joined}) - return nil - } - - err := w.fn(joined, typ) - if typ == os.ModeSymlink { - if err == ErrTraverseLink { - // Set callbackDone so we don't call it twice for both the - // symlink-as-symlink and the symlink-as-directory later: - w.enqueue(walkItem{dir: joined, callbackDone: true}) - return nil - } - if err == filepath.SkipDir { - // Permit SkipDir on symlinks too. - return nil - } - } - return err -} - -func (w *walker) walk(root string, runUserCallback bool) error { - if runUserCallback { - err := w.fn(root, os.ModeDir) - if err == filepath.SkipDir { - return nil - } - if err != nil { - return err - } - } - - return readDir(root, w.onDirEnt) -} diff --git a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_darwin.go b/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_darwin.go deleted file mode 100644 index 0ca55e0d..00000000 --- a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_darwin.go +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build darwin && cgo -// +build darwin,cgo - -package fastwalk - -/* -#include - -// fastwalk_readdir_r wraps readdir_r so that we don't have to pass a dirent** -// result pointer which triggers CGO's "Go pointer to Go pointer" check unless -// we allocat the result dirent* with malloc. -// -// fastwalk_readdir_r returns 0 on success, -1 upon reaching the end of the -// directory, or a positive error number to indicate failure. -static int fastwalk_readdir_r(DIR *fd, struct dirent *entry) { - struct dirent *result; - int ret = readdir_r(fd, entry, &result); - if (ret == 0 && result == NULL) { - ret = -1; // EOF - } - return ret; -} -*/ -import "C" - -import ( - "os" - "syscall" - "unsafe" -) - -func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { - fd, err := openDir(dirName) - if err != nil { - return &os.PathError{Op: "opendir", Path: dirName, Err: err} - } - defer C.closedir(fd) - - skipFiles := false - var dirent syscall.Dirent - for { - ret := int(C.fastwalk_readdir_r(fd, (*C.struct_dirent)(unsafe.Pointer(&dirent)))) - if ret != 0 { - if ret == -1 { - break // EOF - } - if ret == int(syscall.EINTR) { - continue - } - return &os.PathError{Op: "readdir", Path: dirName, Err: syscall.Errno(ret)} - } - if dirent.Ino == 0 { - continue - } - typ := dtToType(dirent.Type) - if skipFiles && typ.IsRegular() { - continue - } - name := (*[len(syscall.Dirent{}.Name)]byte)(unsafe.Pointer(&dirent.Name))[:] - name = name[:dirent.Namlen] - for i, c := range name { - if c == 0 { - name = name[:i] - break - } - } - // Check for useless names before allocating a string. - if string(name) == "." || string(name) == ".." { - continue - } - if err := fn(dirName, string(name), typ); err != nil { - if err != ErrSkipFiles { - return err - } - skipFiles = true - } - } - - return nil -} - -func dtToType(typ uint8) os.FileMode { - switch typ { - case syscall.DT_BLK: - return os.ModeDevice - case syscall.DT_CHR: - return os.ModeDevice | os.ModeCharDevice - case syscall.DT_DIR: - return os.ModeDir - case syscall.DT_FIFO: - return os.ModeNamedPipe - case syscall.DT_LNK: - return os.ModeSymlink - case syscall.DT_REG: - return 0 - case syscall.DT_SOCK: - return os.ModeSocket - } - return ^os.FileMode(0) -} - -// openDir wraps opendir(3) and handles any EINTR errors. The returned *DIR -// needs to be closed with closedir(3). -func openDir(path string) (*C.DIR, error) { - name, err := syscall.BytePtrFromString(path) - if err != nil { - return nil, err - } - for { - fd, err := C.opendir((*C.char)(unsafe.Pointer(name))) - if err != syscall.EINTR { - return fd, err - } - } -} diff --git a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_fileno.go b/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_fileno.go deleted file mode 100644 index d58595db..00000000 --- a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_fileno.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build freebsd || openbsd || netbsd -// +build freebsd openbsd netbsd - -package fastwalk - -import "syscall" - -func direntInode(dirent *syscall.Dirent) uint64 { - return uint64(dirent.Fileno) -} diff --git a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_ino.go b/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_ino.go deleted file mode 100644 index d3922890..00000000 --- a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_ino.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build (linux || (darwin && !cgo)) && !appengine -// +build linux darwin,!cgo -// +build !appengine - -package fastwalk - -import "syscall" - -func direntInode(dirent *syscall.Dirent) uint64 { - return dirent.Ino -} diff --git a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_bsd.go b/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_bsd.go deleted file mode 100644 index 38a4db6a..00000000 --- a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_bsd.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build (darwin && !cgo) || freebsd || openbsd || netbsd -// +build darwin,!cgo freebsd openbsd netbsd - -package fastwalk - -import "syscall" - -func direntNamlen(dirent *syscall.Dirent) uint64 { - return uint64(dirent.Namlen) -} diff --git a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_linux.go b/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_linux.go deleted file mode 100644 index c82e57df..00000000 --- a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_dirent_namlen_linux.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build linux && !appengine -// +build linux,!appengine - -package fastwalk - -import ( - "bytes" - "syscall" - "unsafe" -) - -func direntNamlen(dirent *syscall.Dirent) uint64 { - const fixedHdr = uint16(unsafe.Offsetof(syscall.Dirent{}.Name)) - nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0])) - const nameBufLen = uint16(len(nameBuf)) - limit := dirent.Reclen - fixedHdr - if limit > nameBufLen { - limit = nameBufLen - } - nameLen := bytes.IndexByte(nameBuf[:limit], 0) - if nameLen < 0 { - panic("failed to find terminating 0 byte in dirent") - } - return uint64(nameLen) -} diff --git a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_portable.go b/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_portable.go deleted file mode 100644 index 085d3116..00000000 --- a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_portable.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build appengine || (!linux && !darwin && !freebsd && !openbsd && !netbsd) -// +build appengine !linux,!darwin,!freebsd,!openbsd,!netbsd - -package fastwalk - -import ( - "io/ioutil" - "os" -) - -// readDir calls fn for each directory entry in dirName. -// It does not descend into directories or follow symlinks. -// If fn returns a non-nil error, readDir returns with that error -// immediately. -func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { - fis, err := ioutil.ReadDir(dirName) - if err != nil { - return err - } - skipFiles := false - for _, fi := range fis { - if fi.Mode().IsRegular() && skipFiles { - continue - } - if err := fn(dirName, fi.Name(), fi.Mode()&os.ModeType); err != nil { - if err == ErrSkipFiles { - skipFiles = true - continue - } - return err - } - } - return nil -} diff --git a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_unix.go b/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_unix.go deleted file mode 100644 index f12f1a73..00000000 --- a/vendor/golang.org/x/tools/internal/fastwalk/fastwalk_unix.go +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build (linux || freebsd || openbsd || netbsd || (darwin && !cgo)) && !appengine -// +build linux freebsd openbsd netbsd darwin,!cgo -// +build !appengine - -package fastwalk - -import ( - "fmt" - "os" - "syscall" - "unsafe" -) - -const blockSize = 8 << 10 - -// unknownFileMode is a sentinel (and bogus) os.FileMode -// value used to represent a syscall.DT_UNKNOWN Dirent.Type. -const unknownFileMode os.FileMode = os.ModeNamedPipe | os.ModeSocket | os.ModeDevice - -func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { - fd, err := open(dirName, 0, 0) - if err != nil { - return &os.PathError{Op: "open", Path: dirName, Err: err} - } - defer syscall.Close(fd) - - // The buffer must be at least a block long. - buf := make([]byte, blockSize) // stack-allocated; doesn't escape - bufp := 0 // starting read position in buf - nbuf := 0 // end valid data in buf - skipFiles := false - for { - if bufp >= nbuf { - bufp = 0 - nbuf, err = readDirent(fd, buf) - if err != nil { - return os.NewSyscallError("readdirent", err) - } - if nbuf <= 0 { - return nil - } - } - consumed, name, typ := parseDirEnt(buf[bufp:nbuf]) - bufp += consumed - if name == "" || name == "." || name == ".." { - continue - } - // Fallback for filesystems (like old XFS) that don't - // support Dirent.Type and have DT_UNKNOWN (0) there - // instead. - if typ == unknownFileMode { - fi, err := os.Lstat(dirName + "/" + name) - if err != nil { - // It got deleted in the meantime. - if os.IsNotExist(err) { - continue - } - return err - } - typ = fi.Mode() & os.ModeType - } - if skipFiles && typ.IsRegular() { - continue - } - if err := fn(dirName, name, typ); err != nil { - if err == ErrSkipFiles { - skipFiles = true - continue - } - return err - } - } -} - -func parseDirEnt(buf []byte) (consumed int, name string, typ os.FileMode) { - // golang.org/issue/37269 - dirent := &syscall.Dirent{} - copy((*[unsafe.Sizeof(syscall.Dirent{})]byte)(unsafe.Pointer(dirent))[:], buf) - if v := unsafe.Offsetof(dirent.Reclen) + unsafe.Sizeof(dirent.Reclen); uintptr(len(buf)) < v { - panic(fmt.Sprintf("buf size of %d smaller than dirent header size %d", len(buf), v)) - } - if len(buf) < int(dirent.Reclen) { - panic(fmt.Sprintf("buf size %d < record length %d", len(buf), dirent.Reclen)) - } - consumed = int(dirent.Reclen) - if direntInode(dirent) == 0 { // File absent in directory. - return - } - switch dirent.Type { - case syscall.DT_REG: - typ = 0 - case syscall.DT_DIR: - typ = os.ModeDir - case syscall.DT_LNK: - typ = os.ModeSymlink - case syscall.DT_BLK: - typ = os.ModeDevice - case syscall.DT_FIFO: - typ = os.ModeNamedPipe - case syscall.DT_SOCK: - typ = os.ModeSocket - case syscall.DT_UNKNOWN: - typ = unknownFileMode - default: - // Skip weird things. - // It's probably a DT_WHT (http://lwn.net/Articles/325369/) - // or something. Revisit if/when this package is moved outside - // of goimports. goimports only cares about regular files, - // symlinks, and directories. - return - } - - nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0])) - nameLen := direntNamlen(dirent) - - // Special cases for common things: - if nameLen == 1 && nameBuf[0] == '.' { - name = "." - } else if nameLen == 2 && nameBuf[0] == '.' && nameBuf[1] == '.' { - name = ".." - } else { - name = string(nameBuf[:nameLen]) - } - return -} - -// According to https://golang.org/doc/go1.14#runtime -// A consequence of the implementation of preemption is that on Unix systems, including Linux and macOS -// systems, programs built with Go 1.14 will receive more signals than programs built with earlier releases. -// -// This causes syscall.Open and syscall.ReadDirent sometimes fail with EINTR errors. -// We need to retry in this case. -func open(path string, mode int, perm uint32) (fd int, err error) { - for { - fd, err := syscall.Open(path, mode, perm) - if err != syscall.EINTR { - return fd, err - } - } -} - -func readDirent(fd int, buf []byte) (n int, err error) { - for { - nbuf, err := syscall.ReadDirent(fd, buf) - if err != syscall.EINTR { - return nbuf, err - } - } -} diff --git a/vendor/golang.org/x/tools/internal/gcimporter/bimport.go b/vendor/golang.org/x/tools/internal/gcimporter/bimport.go index d98b0db2..d79a605e 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/bimport.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/bimport.go @@ -87,64 +87,3 @@ func chanDir(d int) types.ChanDir { return 0 } } - -var predeclOnce sync.Once -var predecl []types.Type // initialized lazily - -func predeclared() []types.Type { - predeclOnce.Do(func() { - // initialize lazily to be sure that all - // elements have been initialized before - predecl = []types.Type{ // basic types - types.Typ[types.Bool], - types.Typ[types.Int], - types.Typ[types.Int8], - types.Typ[types.Int16], - types.Typ[types.Int32], - types.Typ[types.Int64], - types.Typ[types.Uint], - types.Typ[types.Uint8], - types.Typ[types.Uint16], - types.Typ[types.Uint32], - types.Typ[types.Uint64], - types.Typ[types.Uintptr], - types.Typ[types.Float32], - types.Typ[types.Float64], - types.Typ[types.Complex64], - types.Typ[types.Complex128], - types.Typ[types.String], - - // basic type aliases - types.Universe.Lookup("byte").Type(), - types.Universe.Lookup("rune").Type(), - - // error - types.Universe.Lookup("error").Type(), - - // untyped types - types.Typ[types.UntypedBool], - types.Typ[types.UntypedInt], - types.Typ[types.UntypedRune], - types.Typ[types.UntypedFloat], - types.Typ[types.UntypedComplex], - types.Typ[types.UntypedString], - types.Typ[types.UntypedNil], - - // package unsafe - types.Typ[types.UnsafePointer], - - // invalid type - types.Typ[types.Invalid], // only appears in packages with errors - - // used internally by gc; never used by this package or in .a files - anyType{}, - } - predecl = append(predecl, additionalPredeclared()...) - }) - return predecl -} - -type anyType struct{} - -func (t anyType) Underlying() types.Type { return t } -func (t anyType) String() string { return "any" } diff --git a/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go b/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go index b1223713..e6c5d51f 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/gcimporter.go @@ -29,7 +29,6 @@ import ( "go/token" "go/types" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -221,7 +220,7 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func switch hdr { case "$$B\n": var data []byte - data, err = ioutil.ReadAll(buf) + data, err = io.ReadAll(buf) if err != nil { break } @@ -233,14 +232,19 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func // Select appropriate importer. if len(data) > 0 { switch data[0] { - case 'v', 'c', 'd': // binary, till go1.10 + case 'v', 'c', 'd': + // binary: emitted by cmd/compile till go1.10; obsolete. return nil, fmt.Errorf("binary (%c) import format is no longer supported", data[0]) - case 'i': // indexed, till go1.19 + case 'i': + // indexed: emitted by cmd/compile till go1.19; + // now used only for serializing go/types. + // See https://github.com/golang/go/issues/69491. _, pkg, err := IImportData(fset, packages, data[1:], id) return pkg, err - case 'u': // unified, from go1.20 + case 'u': + // unified: emitted by cmd/compile since go1.20. _, pkg, err := UImportData(fset, packages, data[1:size], id) return pkg, err @@ -260,13 +264,6 @@ func Import(packages map[string]*types.Package, path, srcDir string, lookup func return } -func deref(typ types.Type) types.Type { - if p, _ := typ.(*types.Pointer); p != nil { - return p.Elem() - } - return typ -} - type byPath []*types.Package func (a byPath) Len() int { return len(a) } diff --git a/vendor/golang.org/x/tools/internal/gcimporter/iexport.go b/vendor/golang.org/x/tools/internal/gcimporter/iexport.go index 6103dd71..1e19fbed 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/iexport.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/iexport.go @@ -2,9 +2,227 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Indexed binary package export. -// This file was derived from $GOROOT/src/cmd/compile/internal/gc/iexport.go; -// see that file for specification of the format. +// Indexed package export. +// +// The indexed export data format is an evolution of the previous +// binary export data format. Its chief contribution is introducing an +// index table, which allows efficient random access of individual +// declarations and inline function bodies. In turn, this allows +// avoiding unnecessary work for compilation units that import large +// packages. +// +// +// The top-level data format is structured as: +// +// Header struct { +// Tag byte // 'i' +// Version uvarint +// StringSize uvarint +// DataSize uvarint +// } +// +// Strings [StringSize]byte +// Data [DataSize]byte +// +// MainIndex []struct{ +// PkgPath stringOff +// PkgName stringOff +// PkgHeight uvarint +// +// Decls []struct{ +// Name stringOff +// Offset declOff +// } +// } +// +// Fingerprint [8]byte +// +// uvarint means a uint64 written out using uvarint encoding. +// +// []T means a uvarint followed by that many T objects. In other +// words: +// +// Len uvarint +// Elems [Len]T +// +// stringOff means a uvarint that indicates an offset within the +// Strings section. At that offset is another uvarint, followed by +// that many bytes, which form the string value. +// +// declOff means a uvarint that indicates an offset within the Data +// section where the associated declaration can be found. +// +// +// There are five kinds of declarations, distinguished by their first +// byte: +// +// type Var struct { +// Tag byte // 'V' +// Pos Pos +// Type typeOff +// } +// +// type Func struct { +// Tag byte // 'F' or 'G' +// Pos Pos +// TypeParams []typeOff // only present if Tag == 'G' +// Signature Signature +// } +// +// type Const struct { +// Tag byte // 'C' +// Pos Pos +// Value Value +// } +// +// type Type struct { +// Tag byte // 'T' or 'U' +// Pos Pos +// TypeParams []typeOff // only present if Tag == 'U' +// Underlying typeOff +// +// Methods []struct{ // omitted if Underlying is an interface type +// Pos Pos +// Name stringOff +// Recv Param +// Signature Signature +// } +// } +// +// type Alias struct { +// Tag byte // 'A' or 'B' +// Pos Pos +// TypeParams []typeOff // only present if Tag == 'B' +// Type typeOff +// } +// +// // "Automatic" declaration of each typeparam +// type TypeParam struct { +// Tag byte // 'P' +// Pos Pos +// Implicit bool +// Constraint typeOff +// } +// +// typeOff means a uvarint that either indicates a predeclared type, +// or an offset into the Data section. If the uvarint is less than +// predeclReserved, then it indicates the index into the predeclared +// types list (see predeclared in bexport.go for order). Otherwise, +// subtracting predeclReserved yields the offset of a type descriptor. +// +// Value means a type, kind, and type-specific value. See +// (*exportWriter).value for details. +// +// +// There are twelve kinds of type descriptors, distinguished by an itag: +// +// type DefinedType struct { +// Tag itag // definedType +// Name stringOff +// PkgPath stringOff +// } +// +// type PointerType struct { +// Tag itag // pointerType +// Elem typeOff +// } +// +// type SliceType struct { +// Tag itag // sliceType +// Elem typeOff +// } +// +// type ArrayType struct { +// Tag itag // arrayType +// Len uint64 +// Elem typeOff +// } +// +// type ChanType struct { +// Tag itag // chanType +// Dir uint64 // 1 RecvOnly; 2 SendOnly; 3 SendRecv +// Elem typeOff +// } +// +// type MapType struct { +// Tag itag // mapType +// Key typeOff +// Elem typeOff +// } +// +// type FuncType struct { +// Tag itag // signatureType +// PkgPath stringOff +// Signature Signature +// } +// +// type StructType struct { +// Tag itag // structType +// PkgPath stringOff +// Fields []struct { +// Pos Pos +// Name stringOff +// Type typeOff +// Embedded bool +// Note stringOff +// } +// } +// +// type InterfaceType struct { +// Tag itag // interfaceType +// PkgPath stringOff +// Embeddeds []struct { +// Pos Pos +// Type typeOff +// } +// Methods []struct { +// Pos Pos +// Name stringOff +// Signature Signature +// } +// } +// +// // Reference to a type param declaration +// type TypeParamType struct { +// Tag itag // typeParamType +// Name stringOff +// PkgPath stringOff +// } +// +// // Instantiation of a generic type (like List[T2] or List[int]) +// type InstanceType struct { +// Tag itag // instanceType +// Pos pos +// TypeArgs []typeOff +// BaseType typeOff +// } +// +// type UnionType struct { +// Tag itag // interfaceType +// Terms []struct { +// tilde bool +// Type typeOff +// } +// } +// +// +// +// type Signature struct { +// Params []Param +// Results []Param +// Variadic bool // omitted if Results is empty +// } +// +// type Param struct { +// Pos Pos +// Name stringOff +// Type typOff +// } +// +// +// Pos encodes a file:line:column triple, incorporating a simple delta +// encoding scheme within a data object. See exportWriter.pos for +// details. package gcimporter @@ -23,8 +241,7 @@ import ( "strings" "golang.org/x/tools/go/types/objectpath" - "golang.org/x/tools/internal/tokeninternal" - "golang.org/x/tools/internal/typeparams" + "golang.org/x/tools/internal/aliases" ) // IExportShallow encodes "shallow" export data for the specified package. @@ -223,7 +440,7 @@ func (p *iexporter) encodeFile(w *intWriter, file *token.File, needed []uint64) // Sort the set of needed offsets. Duplicates are harmless. sort.Slice(needed, func(i, j int) bool { return needed[i] < needed[j] }) - lines := tokeninternal.GetLines(file) // byte offset of each line start + lines := file.Lines() // byte offset of each line start w.uint64(uint64(len(lines))) // Rather than record the entire array of line start offsets, @@ -464,7 +681,7 @@ func (p *iexporter) doDecl(obj types.Object) { switch obj := obj.(type) { case *types.Var: - w.tag('V') + w.tag(varTag) w.pos(obj.Pos()) w.typ(obj.Type(), obj.Pkg()) @@ -481,10 +698,10 @@ func (p *iexporter) doDecl(obj types.Object) { } // Function. - if typeparams.ForSignature(sig).Len() == 0 { - w.tag('F') + if sig.TypeParams().Len() == 0 { + w.tag(funcTag) } else { - w.tag('G') + w.tag(genericFuncTag) } w.pos(obj.Pos()) // The tparam list of the function type is the declaration of the type @@ -494,27 +711,27 @@ func (p *iexporter) doDecl(obj types.Object) { // // While importing the type parameters, tparamList computes and records // their export name, so that it can be later used when writing the index. - if tparams := typeparams.ForSignature(sig); tparams.Len() > 0 { + if tparams := sig.TypeParams(); tparams.Len() > 0 { w.tparamList(obj.Name(), tparams, obj.Pkg()) } w.signature(sig) case *types.Const: - w.tag('C') + w.tag(constTag) w.pos(obj.Pos()) w.value(obj.Type(), obj.Val()) case *types.TypeName: t := obj.Type() - if tparam, ok := t.(*typeparams.TypeParam); ok { - w.tag('P') + if tparam, ok := types.Unalias(t).(*types.TypeParam); ok { + w.tag(typeParamTag) w.pos(obj.Pos()) constraint := tparam.Constraint() if p.version >= iexportVersionGo1_18 { implicit := false - if iface, _ := constraint.(*types.Interface); iface != nil { - implicit = typeparams.IsImplicit(iface) + if iface, _ := types.Unalias(constraint).(*types.Interface); iface != nil { + implicit = iface.IsImplicit() } w.bool(implicit) } @@ -523,8 +740,26 @@ func (p *iexporter) doDecl(obj types.Object) { } if obj.IsAlias() { - w.tag('A') + alias, materialized := t.(*types.Alias) // may fail when aliases are not enabled + + var tparams *types.TypeParamList + if materialized { + tparams = aliases.TypeParams(alias) + } + if tparams.Len() == 0 { + w.tag(aliasTag) + } else { + w.tag(genericAliasTag) + } w.pos(obj.Pos()) + if tparams.Len() > 0 { + w.tparamList(obj.Name(), tparams, obj.Pkg()) + } + if materialized { + // Preserve materialized aliases, + // even of non-exported types. + t = aliases.Rhs(alias) + } w.typ(t, obj.Pkg()) break } @@ -535,20 +770,20 @@ func (p *iexporter) doDecl(obj types.Object) { panic(internalErrorf("%s is not a defined type", t)) } - if typeparams.ForNamed(named).Len() == 0 { - w.tag('T') + if named.TypeParams().Len() == 0 { + w.tag(typeTag) } else { - w.tag('U') + w.tag(genericTypeTag) } w.pos(obj.Pos()) - if typeparams.ForNamed(named).Len() > 0 { + if named.TypeParams().Len() > 0 { // While importing the type parameters, tparamList computes and records // their export name, so that it can be later used when writing the index. - w.tparamList(obj.Name(), typeparams.ForNamed(named), obj.Pkg()) + w.tparamList(obj.Name(), named.TypeParams(), obj.Pkg()) } - underlying := obj.Type().Underlying() + underlying := named.Underlying() w.typ(underlying, obj.Pkg()) if types.IsInterface(t) { @@ -565,7 +800,7 @@ func (p *iexporter) doDecl(obj types.Object) { // Receiver type parameters are type arguments of the receiver type, so // their name must be qualified before exporting recv. - if rparams := typeparams.RecvTypeParams(sig); rparams.Len() > 0 { + if rparams := sig.RecvTypeParams(); rparams.Len() > 0 { prefix := obj.Name() + "." + m.Name() for i := 0; i < rparams.Len(); i++ { rparam := rparams.At(i) @@ -739,20 +974,31 @@ func (w *exportWriter) doTyp(t types.Type, pkg *types.Package) { }() } switch t := t.(type) { + case *types.Alias: + if targs := aliases.TypeArgs(t); targs.Len() > 0 { + w.startType(instanceType) + w.pos(t.Obj().Pos()) + w.typeList(targs, pkg) + w.typ(aliases.Origin(t), pkg) + return + } + w.startType(aliasType) + w.qualifiedType(t.Obj()) + case *types.Named: - if targs := typeparams.NamedTypeArgs(t); targs.Len() > 0 { + if targs := t.TypeArgs(); targs.Len() > 0 { w.startType(instanceType) // TODO(rfindley): investigate if this position is correct, and if it // matters. w.pos(t.Obj().Pos()) w.typeList(targs, pkg) - w.typ(typeparams.NamedTypeOrigin(t), pkg) + w.typ(t.Origin(), pkg) return } w.startType(definedType) w.qualifiedType(t.Obj()) - case *typeparams.TypeParam: + case *types.TypeParam: w.startType(typeParamType) w.qualifiedType(t.Obj()) @@ -844,7 +1090,7 @@ func (w *exportWriter) doTyp(t types.Type, pkg *types.Package) { for i := 0; i < n; i++ { ft := t.EmbeddedType(i) tPkg := pkg - if named, _ := ft.(*types.Named); named != nil { + if named, _ := types.Unalias(ft).(*types.Named); named != nil { w.pos(named.Obj().Pos()) } else { w.pos(token.NoPos) @@ -868,7 +1114,7 @@ func (w *exportWriter) doTyp(t types.Type, pkg *types.Package) { w.signature(sig) } - case *typeparams.Union: + case *types.Union: w.startType(unionType) nt := t.Len() w.uint64(uint64(nt)) @@ -948,14 +1194,14 @@ func (w *exportWriter) signature(sig *types.Signature) { } } -func (w *exportWriter) typeList(ts *typeparams.TypeList, pkg *types.Package) { +func (w *exportWriter) typeList(ts *types.TypeList, pkg *types.Package) { w.uint64(uint64(ts.Len())) for i := 0; i < ts.Len(); i++ { w.typ(ts.At(i), pkg) } } -func (w *exportWriter) tparamList(prefix string, list *typeparams.TypeParamList, pkg *types.Package) { +func (w *exportWriter) tparamList(prefix string, list *types.TypeParamList, pkg *types.Package) { ll := uint64(list.Len()) w.uint64(ll) for i := 0; i < list.Len(); i++ { @@ -973,7 +1219,7 @@ const blankMarker = "$" // differs from its actual object name: it is prefixed with a qualifier, and // blank type parameter names are disambiguated by their index in the type // parameter list. -func tparamExportName(prefix string, tparam *typeparams.TypeParam) string { +func tparamExportName(prefix string, tparam *types.TypeParam) string { assert(prefix != "") name := tparam.Obj().Name() if name == "_" { diff --git a/vendor/golang.org/x/tools/internal/gcimporter/iimport.go b/vendor/golang.org/x/tools/internal/gcimporter/iimport.go index 8e64cf64..21908a15 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/iimport.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/iimport.go @@ -3,7 +3,7 @@ // license that can be found in the LICENSE file. // Indexed package import. -// See cmd/compile/internal/gc/iexport.go for the export data format. +// See iexport.go for the export data format. // This file is a copy of $GOROOT/src/go/internal/gcimporter/iimport.go. @@ -22,7 +22,8 @@ import ( "strings" "golang.org/x/tools/go/types/objectpath" - "golang.org/x/tools/internal/typeparams" + "golang.org/x/tools/internal/aliases" + "golang.org/x/tools/internal/typesinternal" ) type intReader struct { @@ -52,6 +53,7 @@ const ( iexportVersionPosCol = 1 iexportVersionGo1_18 = 2 iexportVersionGenerics = 2 + iexportVersion = iexportVersionGenerics iexportVersionCurrent = 2 ) @@ -79,6 +81,20 @@ const ( typeParamType instanceType unionType + aliasType +) + +// Object tags +const ( + varTag = 'V' + funcTag = 'F' + genericFuncTag = 'G' + constTag = 'C' + aliasTag = 'A' + genericAliasTag = 'B' + typeParamTag = 'P' + typeTag = 'T' + genericTypeTag = 'U' ) // IImportData imports a package from the serialized package data @@ -195,6 +211,7 @@ func iimportCommon(fset *token.FileSet, getPackages GetPackagesFunc, data []byte p := iimporter{ version: int(version), ipath: path, + aliases: aliases.Enabled(), shallow: shallow, reportf: reportf, @@ -225,6 +242,7 @@ func iimportCommon(fset *token.FileSet, getPackages GetPackagesFunc, data []byte // Gather the relevant packages from the manifest. items := make([]GetPackagesItem, r.uint64()) + uniquePkgPaths := make(map[string]bool) for i := range items { pkgPathOff := r.uint64() pkgPath := p.stringAt(pkgPathOff) @@ -249,6 +267,12 @@ func iimportCommon(fset *token.FileSet, getPackages GetPackagesFunc, data []byte } items[i].nameIndex = nameIndex + + uniquePkgPaths[pkgPath] = true + } + // Debugging #63822; hypothesis: there are duplicate PkgPaths. + if len(uniquePkgPaths) != len(items) { + reportf("found duplicate PkgPaths while reading export data manifest: %v", items) } // Request packages all at once from the client, @@ -316,12 +340,12 @@ func iimportCommon(fset *token.FileSet, getPackages GetPackagesFunc, data []byte } // SetConstraint can't be called if the constraint type is not yet complete. - // When type params are created in the 'P' case of (*importReader).obj(), + // When type params are created in the typeParamTag case of (*importReader).obj(), // the associated constraint type may not be complete due to recursion. // Therefore, we defer calling SetConstraint there, and call it here instead // after all types are complete. for _, d := range p.later { - typeparams.SetTypeParamConstraint(d.t, d.constraint) + d.t.SetConstraint(d.constraint) } for _, typ := range p.interfaceList { @@ -339,7 +363,7 @@ func iimportCommon(fset *token.FileSet, getPackages GetPackagesFunc, data []byte } type setConstraintArgs struct { - t *typeparams.TypeParam + t *types.TypeParam constraint types.Type } @@ -347,6 +371,7 @@ type iimporter struct { version int ipath string + aliases bool shallow bool reportf ReportFunc // if non-nil, used to report bugs @@ -516,7 +541,7 @@ func canReuse(def *types.Named, rhs types.Type) bool { if def == nil { return true } - iface, _ := rhs.(*types.Interface) + iface, _ := types.Unalias(rhs).(*types.Interface) if iface == nil { return true } @@ -538,25 +563,29 @@ func (r *importReader) obj(name string) { pos := r.pos() switch tag { - case 'A': + case aliasTag, genericAliasTag: + var tparams []*types.TypeParam + if tag == genericAliasTag { + tparams = r.tparamList() + } typ := r.typ() + obj := aliases.NewAlias(r.p.aliases, pos, r.currPkg, name, typ, tparams) + r.declare(obj) - r.declare(types.NewTypeName(pos, r.currPkg, name, typ)) - - case 'C': + case constTag: typ, val := r.value() r.declare(types.NewConst(pos, r.currPkg, name, typ, val)) - case 'F', 'G': - var tparams []*typeparams.TypeParam - if tag == 'G' { + case funcTag, genericFuncTag: + var tparams []*types.TypeParam + if tag == genericFuncTag { tparams = r.tparamList() } sig := r.signature(nil, nil, tparams) r.declare(types.NewFunc(pos, r.currPkg, name, sig)) - case 'T', 'U': + case typeTag, genericTypeTag: // Types can be recursive. We need to setup a stub // declaration before recursing. obj := types.NewTypeName(pos, r.currPkg, name, nil) @@ -564,9 +593,9 @@ func (r *importReader) obj(name string) { // Declare obj before calling r.tparamList, so the new type name is recognized // if used in the constraint of one of its own typeparams (see #48280). r.declare(obj) - if tag == 'U' { + if tag == genericTypeTag { tparams := r.tparamList() - typeparams.SetForNamed(named, tparams) + named.SetTypeParams(tparams) } underlying := r.p.typAt(r.uint64(), named).Underlying() @@ -581,14 +610,13 @@ func (r *importReader) obj(name string) { // If the receiver has any targs, set those as the // rparams of the method (since those are the // typeparams being used in the method sig/body). - base := baseType(recv.Type()) - assert(base != nil) - targs := typeparams.NamedTypeArgs(base) - var rparams []*typeparams.TypeParam + _, recvNamed := typesinternal.ReceiverNamed(recv) + targs := recvNamed.TypeArgs() + var rparams []*types.TypeParam if targs.Len() > 0 { - rparams = make([]*typeparams.TypeParam, targs.Len()) + rparams = make([]*types.TypeParam, targs.Len()) for i := range rparams { - rparams[i] = targs.At(i).(*typeparams.TypeParam) + rparams[i] = types.Unalias(targs.At(i)).(*types.TypeParam) } } msig := r.signature(recv, rparams, nil) @@ -597,7 +625,7 @@ func (r *importReader) obj(name string) { } } - case 'P': + case typeParamTag: // We need to "declare" a typeparam in order to have a name that // can be referenced recursively (if needed) in the type param's // bound. @@ -606,7 +634,7 @@ func (r *importReader) obj(name string) { } name0 := tparamName(name) tn := types.NewTypeName(pos, r.currPkg, name0, nil) - t := typeparams.NewTypeParam(tn, nil) + t := types.NewTypeParam(tn, nil) // To handle recursive references to the typeparam within its // bound, save the partial type in tparamIndex before reading the bounds. @@ -618,11 +646,11 @@ func (r *importReader) obj(name string) { } constraint := r.typ() if implicit { - iface, _ := constraint.(*types.Interface) + iface, _ := types.Unalias(constraint).(*types.Interface) if iface == nil { errorf("non-interface constraint marked implicit") } - typeparams.MarkImplicit(iface) + iface.MarkImplicit() } // The constraint type may not be complete, if we // are in the middle of a type recursion involving type @@ -630,7 +658,7 @@ func (r *importReader) obj(name string) { // completely set up all types in ImportData. r.p.later = append(r.p.later, setConstraintArgs{t: t, constraint: constraint}) - case 'V': + case varTag: typ := r.typ() r.declare(types.NewVar(pos, r.currPkg, name, typ)) @@ -825,7 +853,7 @@ func (r *importReader) typ() types.Type { } func isInterface(t types.Type) bool { - _, ok := t.(*types.Interface) + _, ok := types.Unalias(t).(*types.Interface) return ok } @@ -835,7 +863,7 @@ func (r *importReader) string() string { return r.p.stringAt(r.uint64()) } func (r *importReader) doType(base *types.Named) (res types.Type) { k := r.kind() if debug { - r.p.trace("importing type %d (base: %s)", k, base) + r.p.trace("importing type %d (base: %v)", k, base) r.p.indent++ defer func() { r.p.indent-- @@ -847,7 +875,7 @@ func (r *importReader) doType(base *types.Named) (res types.Type) { errorf("unexpected kind tag in %q: %v", r.p.ipath, k) return nil - case definedType: + case aliasType, definedType: pkg, name := r.qualifiedIdent() r.p.doDecl(pkg, name) return pkg.Scope().Lookup(name).(*types.TypeName).Type() @@ -932,7 +960,7 @@ func (r *importReader) doType(base *types.Named) (res types.Type) { methods[i] = method } - typ := newInterface(methods, embeddeds) + typ := types.NewInterfaceType(methods, embeddeds) r.p.interfaceList = append(r.p.interfaceList, typ) return typ @@ -966,7 +994,7 @@ func (r *importReader) doType(base *types.Named) (res types.Type) { // The imported instantiated type doesn't include any methods, so // we must always use the methods of the base (orig) type. // TODO provide a non-nil *Environment - t, _ := typeparams.Instantiate(nil, baseType, targs, false) + t, _ := types.Instantiate(nil, baseType, targs, false) // Workaround for golang/go#61561. See the doc for instanceList for details. r.p.instanceList = append(r.p.instanceList, t) @@ -976,11 +1004,11 @@ func (r *importReader) doType(base *types.Named) (res types.Type) { if r.p.version < iexportVersionGenerics { errorf("unexpected instantiation type") } - terms := make([]*typeparams.Term, r.uint64()) + terms := make([]*types.Term, r.uint64()) for i := range terms { - terms[i] = typeparams.NewTerm(r.bool(), r.typ()) + terms[i] = types.NewTerm(r.bool(), r.typ()) } - return typeparams.NewUnion(terms) + return types.NewUnion(terms) } } @@ -1008,23 +1036,23 @@ func (r *importReader) objectPathObject() types.Object { return obj } -func (r *importReader) signature(recv *types.Var, rparams []*typeparams.TypeParam, tparams []*typeparams.TypeParam) *types.Signature { +func (r *importReader) signature(recv *types.Var, rparams []*types.TypeParam, tparams []*types.TypeParam) *types.Signature { params := r.paramList() results := r.paramList() variadic := params.Len() > 0 && r.bool() - return typeparams.NewSignatureType(recv, rparams, tparams, params, results, variadic) + return types.NewSignatureType(recv, rparams, tparams, params, results, variadic) } -func (r *importReader) tparamList() []*typeparams.TypeParam { +func (r *importReader) tparamList() []*types.TypeParam { n := r.uint64() if n == 0 { return nil } - xs := make([]*typeparams.TypeParam, n) + xs := make([]*types.TypeParam, n) for i := range xs { // Note: the standard library importer is tolerant of nil types here, // though would panic in SetTypeParams. - xs[i] = r.typ().(*typeparams.TypeParam) + xs[i] = types.Unalias(r.typ()).(*types.TypeParam) } return xs } @@ -1071,13 +1099,3 @@ func (r *importReader) byte() byte { } return x } - -func baseType(typ types.Type) *types.Named { - // pointer receivers are never types.Named types - if p, _ := typ.(*types.Pointer); p != nil { - typ = p.Elem() - } - // receiver base types are always (possibly generic) types.Named types - n, _ := typ.(*types.Named) - return n -} diff --git a/vendor/golang.org/x/tools/internal/gcimporter/newInterface10.go b/vendor/golang.org/x/tools/internal/gcimporter/newInterface10.go deleted file mode 100644 index 8b163e3d..00000000 --- a/vendor/golang.org/x/tools/internal/gcimporter/newInterface10.go +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.11 -// +build !go1.11 - -package gcimporter - -import "go/types" - -func newInterface(methods []*types.Func, embeddeds []types.Type) *types.Interface { - named := make([]*types.Named, len(embeddeds)) - for i, e := range embeddeds { - var ok bool - named[i], ok = e.(*types.Named) - if !ok { - panic("embedding of non-defined interfaces in interfaces is not supported before Go 1.11") - } - } - return types.NewInterface(methods, named) -} diff --git a/vendor/golang.org/x/tools/internal/gcimporter/newInterface11.go b/vendor/golang.org/x/tools/internal/gcimporter/newInterface11.go deleted file mode 100644 index 49984f40..00000000 --- a/vendor/golang.org/x/tools/internal/gcimporter/newInterface11.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.11 -// +build go1.11 - -package gcimporter - -import "go/types" - -func newInterface(methods []*types.Func, embeddeds []types.Type) *types.Interface { - return types.NewInterfaceType(methods, embeddeds) -} diff --git a/vendor/golang.org/x/tools/internal/gcimporter/predeclared.go b/vendor/golang.org/x/tools/internal/gcimporter/predeclared.go new file mode 100644 index 00000000..907c8557 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/gcimporter/predeclared.go @@ -0,0 +1,91 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gcimporter + +import ( + "go/types" + "sync" +) + +// predecl is a cache for the predeclared types in types.Universe. +// +// Cache a distinct result based on the runtime value of any. +// The pointer value of the any type varies based on GODEBUG settings. +var predeclMu sync.Mutex +var predecl map[types.Type][]types.Type + +func predeclared() []types.Type { + anyt := types.Universe.Lookup("any").Type() + + predeclMu.Lock() + defer predeclMu.Unlock() + + if pre, ok := predecl[anyt]; ok { + return pre + } + + if predecl == nil { + predecl = make(map[types.Type][]types.Type) + } + + decls := []types.Type{ // basic types + types.Typ[types.Bool], + types.Typ[types.Int], + types.Typ[types.Int8], + types.Typ[types.Int16], + types.Typ[types.Int32], + types.Typ[types.Int64], + types.Typ[types.Uint], + types.Typ[types.Uint8], + types.Typ[types.Uint16], + types.Typ[types.Uint32], + types.Typ[types.Uint64], + types.Typ[types.Uintptr], + types.Typ[types.Float32], + types.Typ[types.Float64], + types.Typ[types.Complex64], + types.Typ[types.Complex128], + types.Typ[types.String], + + // basic type aliases + types.Universe.Lookup("byte").Type(), + types.Universe.Lookup("rune").Type(), + + // error + types.Universe.Lookup("error").Type(), + + // untyped types + types.Typ[types.UntypedBool], + types.Typ[types.UntypedInt], + types.Typ[types.UntypedRune], + types.Typ[types.UntypedFloat], + types.Typ[types.UntypedComplex], + types.Typ[types.UntypedString], + types.Typ[types.UntypedNil], + + // package unsafe + types.Typ[types.UnsafePointer], + + // invalid type + types.Typ[types.Invalid], // only appears in packages with errors + + // used internally by gc; never used by this package or in .a files + anyType{}, + + // comparable + types.Universe.Lookup("comparable").Type(), + + // any + anyt, + } + + predecl[anyt] = decls + return decls +} + +type anyType struct{} + +func (t anyType) Underlying() types.Type { return t } +func (t anyType) String() string { return "any" } diff --git a/vendor/golang.org/x/tools/internal/gcimporter/support_go117.go b/vendor/golang.org/x/tools/internal/gcimporter/support_go117.go deleted file mode 100644 index d892273e..00000000 --- a/vendor/golang.org/x/tools/internal/gcimporter/support_go117.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.18 -// +build !go1.18 - -package gcimporter - -import "go/types" - -const iexportVersion = iexportVersionGo1_11 - -func additionalPredeclared() []types.Type { - return nil -} diff --git a/vendor/golang.org/x/tools/internal/gcimporter/support_go118.go b/vendor/golang.org/x/tools/internal/gcimporter/support_go118.go deleted file mode 100644 index edbe6ea7..00000000 --- a/vendor/golang.org/x/tools/internal/gcimporter/support_go118.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.18 -// +build go1.18 - -package gcimporter - -import "go/types" - -const iexportVersion = iexportVersionGenerics - -// additionalPredeclared returns additional predeclared types in go.1.18. -func additionalPredeclared() []types.Type { - return []types.Type{ - // comparable - types.Universe.Lookup("comparable").Type(), - - // any - types.Universe.Lookup("any").Type(), - } -} - -// See cmd/compile/internal/types.SplitVargenSuffix. -func splitVargenSuffix(name string) (base, suffix string) { - i := len(name) - for i > 0 && name[i-1] >= '0' && name[i-1] <= '9' { - i-- - } - const dot = "·" - if i >= len(dot) && name[i-len(dot):i] == dot { - i -= len(dot) - return name[:i], name[i:] - } - return name, "" -} diff --git a/vendor/golang.org/x/tools/internal/gcimporter/unified_no.go b/vendor/golang.org/x/tools/internal/gcimporter/unified_no.go deleted file mode 100644 index 286bf445..00000000 --- a/vendor/golang.org/x/tools/internal/gcimporter/unified_no.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !(go1.18 && goexperiment.unified) -// +build !go1.18 !goexperiment.unified - -package gcimporter - -const unifiedIR = false diff --git a/vendor/golang.org/x/tools/internal/gcimporter/unified_yes.go b/vendor/golang.org/x/tools/internal/gcimporter/unified_yes.go deleted file mode 100644 index b5d69ffb..00000000 --- a/vendor/golang.org/x/tools/internal/gcimporter/unified_yes.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.18 && goexperiment.unified -// +build go1.18,goexperiment.unified - -package gcimporter - -const unifiedIR = true diff --git a/vendor/golang.org/x/tools/internal/gcimporter/ureader_no.go b/vendor/golang.org/x/tools/internal/gcimporter/ureader_no.go deleted file mode 100644 index 8eb20729..00000000 --- a/vendor/golang.org/x/tools/internal/gcimporter/ureader_no.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.18 -// +build !go1.18 - -package gcimporter - -import ( - "fmt" - "go/token" - "go/types" -) - -func UImportData(fset *token.FileSet, imports map[string]*types.Package, data []byte, path string) (_ int, pkg *types.Package, err error) { - err = fmt.Errorf("go/tools compiled with a Go version earlier than 1.18 cannot read unified IR export data") - return -} diff --git a/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go b/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go index b977435f..1db40861 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/ureader_yes.go @@ -4,9 +4,6 @@ // Derived from go/internal/gcimporter/ureader.go -//go:build go1.18 -// +build go1.18 - package gcimporter import ( @@ -16,6 +13,7 @@ import ( "sort" "strings" + "golang.org/x/tools/internal/aliases" "golang.org/x/tools/internal/pkgbits" ) @@ -28,6 +26,7 @@ type pkgReader struct { ctxt *types.Context imports map[string]*types.Package // previously imported packages, indexed by path + aliases bool // create types.Alias nodes // lazily initialized arrays corresponding to the unified IR // PosBase, Pkg, and Type sections, respectively. @@ -53,8 +52,7 @@ func (pr *pkgReader) later(fn func()) { // See cmd/compile/internal/noder.derivedInfo. type derivedInfo struct { - idx pkgbits.Index - needed bool + idx pkgbits.Index } // See cmd/compile/internal/noder.typeInfo. @@ -101,6 +99,7 @@ func readUnifiedPackage(fset *token.FileSet, ctxt *types.Context, imports map[st ctxt: ctxt, imports: imports, + aliases: aliases.Enabled(), posBases: make([]string, input.NumElems(pkgbits.RelocPosBase)), pkgs: make([]*types.Package, input.NumElems(pkgbits.RelocPkg)), @@ -110,13 +109,17 @@ func readUnifiedPackage(fset *token.FileSet, ctxt *types.Context, imports map[st r := pr.newReader(pkgbits.RelocMeta, pkgbits.PublicRootIdx, pkgbits.SyncPublic) pkg := r.pkg() - r.Bool() // has init + if r.Version().Has(pkgbits.HasInit) { + r.Bool() + } for i, n := 0, r.Len(); i < n; i++ { // As if r.obj(), but avoiding the Scope.Lookup call, // to avoid eager loading of imports. r.Sync(pkgbits.SyncObject) - assert(!r.Bool()) + if r.Version().Has(pkgbits.DerivedFuncInstance) { + assert(!r.Bool()) + } r.p.objIdx(r.Reloc(pkgbits.RelocObj)) assert(r.Len() == 0) } @@ -165,7 +168,7 @@ type readerDict struct { // tparams is a slice of the constructed TypeParams for the element. tparams []*types.TypeParam - // devived is a slice of types derived from tparams, which may be + // derived is a slice of types derived from tparams, which may be // instantiated while reading the current element. derived []derivedInfo derivedTypes []types.Type // lazily instantiated from derived @@ -471,7 +474,9 @@ func (r *reader) param() *types.Var { func (r *reader) obj() (types.Object, []types.Type) { r.Sync(pkgbits.SyncObject) - assert(!r.Bool()) + if r.Version().Has(pkgbits.DerivedFuncInstance) { + assert(!r.Bool()) + } pkg, name := r.p.objIdx(r.Reloc(pkgbits.RelocObj)) obj := pkgScope(pkg).Lookup(name) @@ -525,8 +530,12 @@ func (pr *pkgReader) objIdx(idx pkgbits.Index) (*types.Package, string) { case pkgbits.ObjAlias: pos := r.pos() + var tparams []*types.TypeParam + if r.Version().Has(pkgbits.AliasTypeParamNames) { + tparams = r.typeParamNames() + } typ := r.typ() - declare(types.NewTypeName(pos, objPkg, objName, typ)) + declare(aliases.NewAlias(r.p.aliases, pos, objPkg, objName, typ, tparams)) case pkgbits.ObjConst: pos := r.pos() @@ -553,7 +562,7 @@ func (pr *pkgReader) objIdx(idx pkgbits.Index) (*types.Package, string) { // If the underlying type is an interface, we need to // duplicate its methods so we can replace the receiver // parameter's type (#49906). - if iface, ok := underlying.(*types.Interface); ok && iface.NumExplicitMethods() != 0 { + if iface, ok := types.Unalias(underlying).(*types.Interface); ok && iface.NumExplicitMethods() != 0 { methods := make([]*types.Func, iface.NumExplicitMethods()) for i := range methods { fn := iface.ExplicitMethod(i) @@ -632,7 +641,10 @@ func (pr *pkgReader) objDictIdx(idx pkgbits.Index) *readerDict { dict.derived = make([]derivedInfo, r.Len()) dict.derivedTypes = make([]types.Type, len(dict.derived)) for i := range dict.derived { - dict.derived[i] = derivedInfo{r.Reloc(pkgbits.RelocType), r.Bool()} + dict.derived[i] = derivedInfo{idx: r.Reloc(pkgbits.RelocType)} + if r.Version().Has(pkgbits.DerivedInfoNeeded) { + assert(!r.Bool()) + } } pr.retireReader(r) @@ -726,3 +738,17 @@ func pkgScope(pkg *types.Package) *types.Scope { } return types.Universe } + +// See cmd/compile/internal/types.SplitVargenSuffix. +func splitVargenSuffix(name string) (base, suffix string) { + i := len(name) + for i > 0 && name[i-1] >= '0' && name[i-1] <= '9' { + i-- + } + const dot = "·" + if i >= len(dot) && name[i-len(dot):i] == dot { + i -= len(dot) + return name[:i], name[i:] + } + return name, "" +} diff --git a/vendor/golang.org/x/tools/internal/gocommand/invoke.go b/vendor/golang.org/x/tools/internal/gocommand/invoke.go index 53cf66da..e333efc8 100644 --- a/vendor/golang.org/x/tools/internal/gocommand/invoke.go +++ b/vendor/golang.org/x/tools/internal/gocommand/invoke.go @@ -8,12 +8,14 @@ package gocommand import ( "bytes" "context" + "encoding/json" "errors" "fmt" "io" "log" "os" - "reflect" + "os/exec" + "path/filepath" "regexp" "runtime" "strconv" @@ -21,12 +23,9 @@ import ( "sync" "time" - exec "golang.org/x/sys/execabs" - "golang.org/x/tools/internal/event" "golang.org/x/tools/internal/event/keys" "golang.org/x/tools/internal/event/label" - "golang.org/x/tools/internal/event/tag" ) // An Runner will run go command invocations and serialize @@ -56,11 +55,14 @@ func (runner *Runner) initialize() { // 1.14: go: updating go.mod: existing contents have changed since last read var modConcurrencyError = regexp.MustCompile(`go:.*go.mod.*contents have changed`) -// verb is an event label for the go command verb. -var verb = keys.NewString("verb", "go command verb") +// event keys for go command invocations +var ( + verb = keys.NewString("verb", "go command verb") + directory = keys.NewString("directory", "") +) func invLabels(inv Invocation) []label.Label { - return []label.Label{verb.Of(inv.Verb), tag.Directory.Of(inv.WorkingDir)} + return []label.Label{verb.Of(inv.Verb), directory.Of(inv.WorkingDir)} } // Run is a convenience wrapper around RunRaw. @@ -85,6 +87,7 @@ func (runner *Runner) RunPiped(ctx context.Context, inv Invocation, stdout, stde // RunRaw runs the invocation, serializing requests only if they fight over // go.mod changes. +// Postcondition: both error results have same nilness. func (runner *Runner) RunRaw(ctx context.Context, inv Invocation) (*bytes.Buffer, *bytes.Buffer, error, error) { ctx, done := event.Start(ctx, "gocommand.Runner.RunRaw", invLabels(inv)...) defer done() @@ -95,23 +98,24 @@ func (runner *Runner) RunRaw(ctx context.Context, inv Invocation) (*bytes.Buffer stdout, stderr, friendlyErr, err := runner.runConcurrent(ctx, inv) // If we encounter a load concurrency error, we need to retry serially. - if friendlyErr == nil || !modConcurrencyError.MatchString(friendlyErr.Error()) { - return stdout, stderr, friendlyErr, err + if friendlyErr != nil && modConcurrencyError.MatchString(friendlyErr.Error()) { + event.Error(ctx, "Load concurrency error, will retry serially", err) + + // Run serially by calling runPiped. + stdout.Reset() + stderr.Reset() + friendlyErr, err = runner.runPiped(ctx, inv, stdout, stderr) } - event.Error(ctx, "Load concurrency error, will retry serially", err) - // Run serially by calling runPiped. - stdout.Reset() - stderr.Reset() - friendlyErr, err = runner.runPiped(ctx, inv, stdout, stderr) return stdout, stderr, friendlyErr, err } +// Postcondition: both error results have same nilness. func (runner *Runner) runConcurrent(ctx context.Context, inv Invocation) (*bytes.Buffer, *bytes.Buffer, error, error) { // Wait for 1 worker to become available. select { case <-ctx.Done(): - return nil, nil, nil, ctx.Err() + return nil, nil, ctx.Err(), ctx.Err() case runner.inFlight <- struct{}{}: defer func() { <-runner.inFlight }() } @@ -121,6 +125,7 @@ func (runner *Runner) runConcurrent(ctx context.Context, inv Invocation) (*bytes return stdout, stderr, friendlyErr, err } +// Postcondition: both error results have same nilness. func (runner *Runner) runPiped(ctx context.Context, inv Invocation, stdout, stderr io.Writer) (error, error) { // Make sure the runner is always initialized. runner.initialize() @@ -129,7 +134,7 @@ func (runner *Runner) runPiped(ctx context.Context, inv Invocation, stdout, stde // runPiped commands. select { case <-ctx.Done(): - return nil, ctx.Err() + return ctx.Err(), ctx.Err() case runner.serialized <- struct{}{}: defer func() { <-runner.serialized }() } @@ -139,7 +144,7 @@ func (runner *Runner) runPiped(ctx context.Context, inv Invocation, stdout, stde for i := 0; i < maxInFlight; i++ { select { case <-ctx.Done(): - return nil, ctx.Err() + return ctx.Err(), ctx.Err() case runner.inFlight <- struct{}{}: // Make sure we always "return" any workers we took. defer func() { <-runner.inFlight }() @@ -156,12 +161,17 @@ type Invocation struct { BuildFlags []string // If ModFlag is set, the go command is invoked with -mod=ModFlag. + // TODO(rfindley): remove, in favor of Args. ModFlag string // If ModFile is set, the go command is invoked with -modfile=ModFile. + // TODO(rfindley): remove, in favor of Args. ModFile string - // If Overlay is set, the go command is invoked with -overlay=Overlay. + // Overlay is the name of the JSON overlay file that describes + // unsaved editor buffers; see [WriteOverlays]. + // If set, the go command is invoked with -overlay=Overlay. + // TODO(rfindley): remove, in favor of Args. Overlay string // If CleanEnv is set, the invocation will run only with the environment @@ -172,6 +182,7 @@ type Invocation struct { Logf func(format string, args ...interface{}) } +// Postcondition: both error results have same nilness. func (i *Invocation) runWithFriendlyError(ctx context.Context, stdout, stderr io.Writer) (friendlyError error, rawError error) { rawError = i.run(ctx, stdout, stderr) if rawError != nil { @@ -188,12 +199,14 @@ func (i *Invocation) runWithFriendlyError(ctx context.Context, stdout, stderr io return } -func (i *Invocation) run(ctx context.Context, stdout, stderr io.Writer) error { - log := i.Logf - if log == nil { - log = func(string, ...interface{}) {} +// logf logs if i.Logf is non-nil. +func (i *Invocation) logf(format string, args ...any) { + if i.Logf != nil { + i.Logf(format, args...) } +} +func (i *Invocation) run(ctx context.Context, stdout, stderr io.Writer) error { goArgs := []string{i.Verb} appendModFile := func() { @@ -236,23 +249,23 @@ func (i *Invocation) run(ctx context.Context, stdout, stderr io.Writer) error { cmd.Stdout = stdout cmd.Stderr = stderr - // cmd.WaitDelay was added only in go1.20 (see #50436). - if waitDelay := reflect.ValueOf(cmd).Elem().FieldByName("WaitDelay"); waitDelay.IsValid() { - // https://go.dev/issue/59541: don't wait forever copying stderr - // after the command has exited. - // After CL 484741 we copy stdout manually, so we we'll stop reading that as - // soon as ctx is done. However, we also don't want to wait around forever - // for stderr. Give a much-longer-than-reasonable delay and then assume that - // something has wedged in the kernel or runtime. - waitDelay.Set(reflect.ValueOf(30 * time.Second)) - } - - // On darwin the cwd gets resolved to the real path, which breaks anything that - // expects the working directory to keep the original path, including the + // https://go.dev/issue/59541: don't wait forever copying stderr + // after the command has exited. + // After CL 484741 we copy stdout manually, so we we'll stop reading that as + // soon as ctx is done. However, we also don't want to wait around forever + // for stderr. Give a much-longer-than-reasonable delay and then assume that + // something has wedged in the kernel or runtime. + cmd.WaitDelay = 30 * time.Second + + // The cwd gets resolved to the real path. On Darwin, where + // /tmp is a symlink, this breaks anything that expects the + // working directory to keep the original path, including the // go command when dealing with modules. - // The Go stdlib has a special feature where if the cwd and the PWD are the - // same node then it trusts the PWD, so by setting it in the env for the child - // process we fix up all the paths returned by the go command. + // + // os.Getwd has a special feature where if the cwd and the PWD + // are the same node then it trusts the PWD, so by setting it + // in the env for the child process we fix up all the paths + // returned by the go command. if !i.CleanEnv { cmd.Env = os.Environ() } @@ -262,7 +275,12 @@ func (i *Invocation) run(ctx context.Context, stdout, stderr io.Writer) error { cmd.Dir = i.WorkingDir } - defer func(start time.Time) { log("%s for %v", time.Since(start), cmdDebugStr(cmd)) }(time.Now()) + debugStr := cmdDebugStr(cmd) + i.logf("starting %v", debugStr) + start := time.Now() + defer func() { + i.logf("%s for %v", time.Since(start), debugStr) + }() return runCmdContext(ctx, cmd) } @@ -343,6 +361,7 @@ func runCmdContext(ctx context.Context, cmd *exec.Cmd) (err error) { } } + startTime := time.Now() err = cmd.Start() if stdoutW != nil { // The child process has inherited the pipe file, @@ -369,7 +388,7 @@ func runCmdContext(ctx context.Context, cmd *exec.Cmd) (err error) { case err := <-resChan: return err case <-timer.C: - HandleHangingGoCommand(cmd.Process) + HandleHangingGoCommand(startTime, cmd) case <-ctx.Done(): } } else { @@ -403,7 +422,7 @@ func runCmdContext(ctx context.Context, cmd *exec.Cmd) (err error) { return <-resChan } -func HandleHangingGoCommand(proc *os.Process) { +func HandleHangingGoCommand(start time.Time, cmd *exec.Cmd) { switch runtime.GOOS { case "linux", "darwin", "freebsd", "netbsd": fmt.Fprintln(os.Stderr, `DETECTED A HANGING GO COMMAND @@ -436,7 +455,7 @@ See golang/go#54461 for more details.`) panic(fmt.Sprintf("running %s: %v", listFiles, err)) } } - panic(fmt.Sprintf("detected hanging go command (pid %d): see golang/go#54461 for more details", proc.Pid)) + panic(fmt.Sprintf("detected hanging go command (golang/go#54461); waited %s\n\tcommand:%s\n\tpid:%d", time.Since(start), cmd, cmd.Process.Pid)) } func cmdDebugStr(cmd *exec.Cmd) string { @@ -460,3 +479,73 @@ func cmdDebugStr(cmd *exec.Cmd) string { } return fmt.Sprintf("GOROOT=%v GOPATH=%v GO111MODULE=%v GOPROXY=%v PWD=%v %v", env["GOROOT"], env["GOPATH"], env["GO111MODULE"], env["GOPROXY"], env["PWD"], strings.Join(args, " ")) } + +// WriteOverlays writes each value in the overlay (see the Overlay +// field of go/packages.Config) to a temporary file and returns the name +// of a JSON file describing the mapping that is suitable for the "go +// list -overlay" flag. +// +// On success, the caller must call the cleanup function exactly once +// when the files are no longer needed. +func WriteOverlays(overlay map[string][]byte) (filename string, cleanup func(), err error) { + // Do nothing if there are no overlays in the config. + if len(overlay) == 0 { + return "", func() {}, nil + } + + dir, err := os.MkdirTemp("", "gocommand-*") + if err != nil { + return "", nil, err + } + + // The caller must clean up this directory, + // unless this function returns an error. + // (The cleanup operand of each return + // statement below is ignored.) + defer func() { + cleanup = func() { + os.RemoveAll(dir) + } + if err != nil { + cleanup() + cleanup = nil + } + }() + + // Write each map entry to a temporary file. + overlays := make(map[string]string) + for k, v := range overlay { + // Use a unique basename for each file (001-foo.go), + // to avoid creating nested directories. + base := fmt.Sprintf("%d-%s", 1+len(overlays), filepath.Base(k)) + filename := filepath.Join(dir, base) + err := os.WriteFile(filename, v, 0666) + if err != nil { + return "", nil, err + } + overlays[k] = filename + } + + // Write the JSON overlay file that maps logical file names to temp files. + // + // OverlayJSON is the format overlay files are expected to be in. + // The Replace map maps from overlaid paths to replacement paths: + // the Go command will forward all reads trying to open + // each overlaid path to its replacement path, or consider the overlaid + // path not to exist if the replacement path is empty. + // + // From golang/go#39958. + type OverlayJSON struct { + Replace map[string]string `json:"replace,omitempty"` + } + b, err := json.Marshal(OverlayJSON{Replace: overlays}) + if err != nil { + return "", nil, err + } + filename = filepath.Join(dir, "overlay.json") + if err := os.WriteFile(filename, b, 0666); err != nil { + return "", nil, err + } + + return filename, nil, nil +} diff --git a/vendor/golang.org/x/tools/internal/gocommand/vendor.go b/vendor/golang.org/x/tools/internal/gocommand/vendor.go index 2d3d408c..e38d1fb4 100644 --- a/vendor/golang.org/x/tools/internal/gocommand/vendor.go +++ b/vendor/golang.org/x/tools/internal/gocommand/vendor.go @@ -107,3 +107,57 @@ func getMainModuleAnd114(ctx context.Context, inv Invocation, r *Runner) (*Modul } return mod, lines[4] == "go1.14", nil } + +// WorkspaceVendorEnabled reports whether workspace vendoring is enabled. It takes a *Runner to execute Go commands +// with the supplied context.Context and Invocation. The Invocation can contain pre-defined fields, +// of which only Verb and Args are modified to run the appropriate Go command. +// Inspired by setDefaultBuildMod in modload/init.go +func WorkspaceVendorEnabled(ctx context.Context, inv Invocation, r *Runner) (bool, []*ModuleJSON, error) { + inv.Verb = "env" + inv.Args = []string{"GOWORK"} + stdout, err := r.Run(ctx, inv) + if err != nil { + return false, nil, err + } + goWork := string(bytes.TrimSpace(stdout.Bytes())) + if fi, err := os.Stat(filepath.Join(filepath.Dir(goWork), "vendor")); err == nil && fi.IsDir() { + mainMods, err := getWorkspaceMainModules(ctx, inv, r) + if err != nil { + return false, nil, err + } + return true, mainMods, nil + } + return false, nil, nil +} + +// getWorkspaceMainModules gets the main modules' information. +// This is the information needed to figure out if vendoring should be enabled. +func getWorkspaceMainModules(ctx context.Context, inv Invocation, r *Runner) ([]*ModuleJSON, error) { + const format = `{{.Path}} +{{.Dir}} +{{.GoMod}} +{{.GoVersion}} +` + inv.Verb = "list" + inv.Args = []string{"-m", "-f", format} + stdout, err := r.Run(ctx, inv) + if err != nil { + return nil, err + } + + lines := strings.Split(strings.TrimSuffix(stdout.String(), "\n"), "\n") + if len(lines) < 4 { + return nil, fmt.Errorf("unexpected stdout: %q", stdout.String()) + } + mods := make([]*ModuleJSON, 0, len(lines)/4) + for i := 0; i < len(lines); i += 4 { + mods = append(mods, &ModuleJSON{ + Path: lines[i], + Dir: lines[i+1], + GoMod: lines[i+2], + GoVersion: lines[i+3], + Main: true, + }) + } + return mods, nil +} diff --git a/vendor/golang.org/x/tools/internal/gopathwalk/walk.go b/vendor/golang.org/x/tools/internal/gopathwalk/walk.go index 452e342c..83615155 100644 --- a/vendor/golang.org/x/tools/internal/gopathwalk/walk.go +++ b/vendor/golang.org/x/tools/internal/gopathwalk/walk.go @@ -9,21 +9,27 @@ package gopathwalk import ( "bufio" "bytes" - "log" + "io" + "io/fs" "os" "path/filepath" + "runtime" "strings" + "sync" "time" - - "golang.org/x/tools/internal/fastwalk" ) // Options controls the behavior of a Walk call. type Options struct { // If Logf is non-nil, debug logging is enabled through this function. Logf func(format string, args ...interface{}) + // Search module caches. Also disables legacy goimports ignore rules. ModulesEnabled bool + + // Maximum number of concurrent calls to user-provided callbacks, + // or 0 for GOMAXPROCS. + Concurrency int } // RootType indicates the type of a Root. @@ -44,22 +50,28 @@ type Root struct { Type RootType } -// Walk walks Go source directories ($GOROOT, $GOPATH, etc) to find packages. -// For each package found, add will be called (concurrently) with the absolute +// Walk concurrently walks Go source directories ($GOROOT, $GOPATH, etc) to find packages. +// +// For each package found, add will be called with the absolute // paths of the containing source directory and the package directory. -// add will be called concurrently. +// +// Unlike filepath.WalkDir, Walk follows symbolic links +// (while guarding against cycles). func Walk(roots []Root, add func(root Root, dir string), opts Options) { WalkSkip(roots, add, func(Root, string) bool { return false }, opts) } -// WalkSkip walks Go source directories ($GOROOT, $GOPATH, etc) to find packages. -// For each package found, add will be called (concurrently) with the absolute +// WalkSkip concurrently walks Go source directories ($GOROOT, $GOPATH, etc) to +// find packages. +// +// For each package found, add will be called with the absolute // paths of the containing source directory and the package directory. -// For each directory that will be scanned, skip will be called (concurrently) +// For each directory that will be scanned, skip will be called // with the absolute paths of the containing source directory and the directory. // If skip returns false on a directory it will be processed. -// add will be called concurrently. -// skip will be called concurrently. +// +// Unlike filepath.WalkDir, WalkSkip follows symbolic links +// (while guarding against cycles). func WalkSkip(roots []Root, add func(root Root, dir string), skip func(root Root, dir string) bool, opts Options) { for _, root := range roots { walkDir(root, add, skip, opts) @@ -68,34 +80,51 @@ func WalkSkip(roots []Root, add func(root Root, dir string), skip func(root Root // walkDir creates a walker and starts fastwalk with this walker. func walkDir(root Root, add func(Root, string), skip func(root Root, dir string) bool, opts Options) { + if opts.Logf == nil { + opts.Logf = func(format string, args ...interface{}) {} + } if _, err := os.Stat(root.Path); os.IsNotExist(err) { - if opts.Logf != nil { - opts.Logf("skipping nonexistent directory: %v", root.Path) - } + opts.Logf("skipping nonexistent directory: %v", root.Path) return } start := time.Now() - if opts.Logf != nil { - opts.Logf("scanning %s", root.Path) + opts.Logf("scanning %s", root.Path) + + concurrency := opts.Concurrency + if concurrency == 0 { + // The walk be either CPU-bound or I/O-bound, depending on what the + // caller-supplied add function does and the details of the user's platform + // and machine. Rather than trying to fine-tune the concurrency level for a + // specific environment, we default to GOMAXPROCS: it is likely to be a good + // choice for a CPU-bound add function, and if it is instead I/O-bound, then + // dealing with I/O saturation is arguably the job of the kernel and/or + // runtime. (Oversaturating I/O seems unlikely to harm performance as badly + // as failing to saturate would.) + concurrency = runtime.GOMAXPROCS(0) } w := &walker{ root: root, add: add, skip: skip, opts: opts, + sem: make(chan struct{}, concurrency), } w.init() - if err := fastwalk.Walk(root.Path, w.walk); err != nil { - logf := opts.Logf - if logf == nil { - logf = log.Printf - } - logf("scanning directory %v: %v", root.Path, err) - } - if opts.Logf != nil { - opts.Logf("scanned %s in %v", root.Path, time.Since(start)) + w.sem <- struct{}{} + path := root.Path + if path == "" { + path = "." } + if fi, err := os.Lstat(path); err == nil { + w.walk(path, nil, fs.FileInfoToDirEntry(fi)) + } else { + w.opts.Logf("scanning directory %v: %v", root.Path, err) + } + <-w.sem + w.walking.Wait() + + opts.Logf("scanned %s in %v", root.Path, time.Since(start)) } // walker is the callback for fastwalk.Walk. @@ -105,7 +134,18 @@ type walker struct { skip func(Root, string) bool // The callback that will be invoked for every dir. dir is skipped if it returns true. opts Options // Options passed to Walk by the user. - ignoredDirs []os.FileInfo // The ignored directories, loaded from .goimportsignore files. + walking sync.WaitGroup + sem chan struct{} // Channel of semaphore tokens; send to acquire, receive to release. + ignoredDirs []string + + added sync.Map // map[string]bool +} + +// A symlinkList is a linked list of os.FileInfos for parent directories +// reached via symlinks. +type symlinkList struct { + info os.FileInfo + prev *symlinkList } // init initializes the walker based on its Options @@ -121,14 +161,8 @@ func (w *walker) init() { for _, p := range ignoredPaths { full := filepath.Join(w.root.Path, p) - if fi, err := os.Stat(full); err == nil { - w.ignoredDirs = append(w.ignoredDirs, fi) - if w.opts.Logf != nil { - w.opts.Logf("Directory added to ignore list: %s", full) - } - } else if w.opts.Logf != nil { - w.opts.Logf("Error statting ignored directory: %v", err) - } + w.ignoredDirs = append(w.ignoredDirs, full) + w.opts.Logf("Directory added to ignore list: %s", full) } } @@ -138,12 +172,10 @@ func (w *walker) init() { func (w *walker) getIgnoredDirs(path string) []string { file := filepath.Join(path, ".goimportsignore") slurp, err := os.ReadFile(file) - if w.opts.Logf != nil { - if err != nil { - w.opts.Logf("%v", err) - } else { - w.opts.Logf("Read %s", file) - } + if err != nil { + w.opts.Logf("%v", err) + } else { + w.opts.Logf("Read %s", file) } if err != nil { return nil @@ -162,9 +194,9 @@ func (w *walker) getIgnoredDirs(path string) []string { } // shouldSkipDir reports whether the file should be skipped or not. -func (w *walker) shouldSkipDir(fi os.FileInfo, dir string) bool { +func (w *walker) shouldSkipDir(dir string) bool { for _, ignoredDir := range w.ignoredDirs { - if os.SameFile(fi, ignoredDir) { + if dir == ignoredDir { return true } } @@ -176,85 +208,130 @@ func (w *walker) shouldSkipDir(fi os.FileInfo, dir string) bool { } // walk walks through the given path. -func (w *walker) walk(path string, typ os.FileMode) error { - if typ.IsRegular() { - dir := filepath.Dir(path) - if dir == w.root.Path && (w.root.Type == RootGOROOT || w.root.Type == RootGOPATH) { - // Doesn't make sense to have regular files - // directly in your $GOPATH/src or $GOROOT/src. - return fastwalk.ErrSkipFiles - } - if !strings.HasSuffix(path, ".go") { - return nil +// +// Errors are logged if w.opts.Logf is non-nil, but otherwise ignored. +func (w *walker) walk(path string, pathSymlinks *symlinkList, d fs.DirEntry) { + if d.Type()&os.ModeSymlink != 0 { + // Walk the symlink's target rather than the symlink itself. + // + // (Note that os.Stat, unlike the lower-lever os.Readlink, + // follows arbitrarily many layers of symlinks, so it will eventually + // reach either a non-symlink or a nonexistent target.) + // + // TODO(bcmills): 'go list all' itself ignores symlinks within GOROOT/src + // and GOPATH/src. Do we really need to traverse them here? If so, why? + + fi, err := os.Stat(path) + if err != nil { + w.opts.Logf("%v", err) + return } - w.add(w.root, dir) - return fastwalk.ErrSkipFiles - } - if typ == os.ModeDir { - base := filepath.Base(path) - if base == "" || base[0] == '.' || base[0] == '_' || - base == "testdata" || - (w.root.Type == RootGOROOT && w.opts.ModulesEnabled && base == "vendor") || - (!w.opts.ModulesEnabled && base == "node_modules") { - return filepath.SkipDir + // Avoid walking symlink cycles: if we have already followed a symlink to + // this directory as a parent of itself, don't follow it again. + // + // This doesn't catch the first time through a cycle, but it also minimizes + // the number of extra stat calls we make if we *don't* encounter a cycle. + // Since we don't actually expect to encounter symlink cycles in practice, + // this seems like the right tradeoff. + for parent := pathSymlinks; parent != nil; parent = parent.prev { + if os.SameFile(fi, parent.info) { + return + } } - fi, err := os.Lstat(path) - if err == nil && w.shouldSkipDir(fi, path) { - return filepath.SkipDir + + pathSymlinks = &symlinkList{ + info: fi, + prev: pathSymlinks, } - return nil + d = fs.FileInfoToDirEntry(fi) } - if typ == os.ModeSymlink { - base := filepath.Base(path) - if strings.HasPrefix(base, ".#") { - // Emacs noise. - return nil + + if d.Type().IsRegular() { + if !strings.HasSuffix(path, ".go") { + return } - if w.shouldTraverse(path) { - return fastwalk.ErrTraverseLink + + dir := filepath.Dir(path) + if dir == w.root.Path && (w.root.Type == RootGOROOT || w.root.Type == RootGOPATH) { + // Doesn't make sense to have regular files + // directly in your $GOPATH/src or $GOROOT/src. + // + // TODO(bcmills): there are many levels of directory within + // RootModuleCache where this also wouldn't make sense, + // Can we generalize this to any directory without a corresponding + // import path? + return } - } - return nil -} -// shouldTraverse reports whether the symlink fi, found in dir, -// should be followed. It makes sure symlinks were never visited -// before to avoid symlink loops. -func (w *walker) shouldTraverse(path string) bool { - ts, err := os.Stat(path) - if err != nil { - logf := w.opts.Logf - if logf == nil { - logf = log.Printf + if _, dup := w.added.LoadOrStore(dir, true); !dup { + w.add(w.root, dir) } - logf("%v", err) - return false } - if !ts.IsDir() { - return false + + if !d.IsDir() { + return + } + + base := filepath.Base(path) + if base == "" || base[0] == '.' || base[0] == '_' || + base == "testdata" || + (w.root.Type == RootGOROOT && w.opts.ModulesEnabled && base == "vendor") || + (!w.opts.ModulesEnabled && base == "node_modules") || + w.shouldSkipDir(path) { + return } - if w.shouldSkipDir(ts, filepath.Dir(path)) { - return false + + // Read the directory and walk its entries. + + f, err := os.Open(path) + if err != nil { + w.opts.Logf("%v", err) + return } - // Check for symlink loops by statting each directory component - // and seeing if any are the same file as ts. + defer f.Close() + for { - parent := filepath.Dir(path) - if parent == path { - // Made it to the root without seeing a cycle. - // Use this symlink. - return true - } - parentInfo, err := os.Stat(parent) + // We impose an arbitrary limit on the number of ReadDir results per + // directory to limit the amount of memory consumed for stale or upcoming + // directory entries. The limit trades off CPU (number of syscalls to read + // the whole directory) against RAM (reachable directory entries other than + // the one currently being processed). + // + // Since we process the directories recursively, we will end up maintaining + // a slice of entries for each level of the directory tree. + // (Compare https://go.dev/issue/36197.) + ents, err := f.ReadDir(1024) if err != nil { - return false + if err != io.EOF { + w.opts.Logf("%v", err) + } + break } - if os.SameFile(ts, parentInfo) { - // Cycle. Don't traverse. - return false + + for _, d := range ents { + nextPath := filepath.Join(path, d.Name()) + if d.IsDir() { + select { + case w.sem <- struct{}{}: + // Got a new semaphore token, so we can traverse the directory concurrently. + d := d + w.walking.Add(1) + go func() { + defer func() { + <-w.sem + w.walking.Done() + }() + w.walk(nextPath, pathSymlinks, d) + }() + continue + + default: + // No tokens available, so traverse serially. + } + } + + w.walk(nextPath, pathSymlinks, d) } - path = parent } - } diff --git a/vendor/golang.org/x/tools/internal/imports/fix.go b/vendor/golang.org/x/tools/internal/imports/fix.go index d4f1b4e8..c1510817 100644 --- a/vendor/golang.org/x/tools/internal/imports/fix.go +++ b/vendor/golang.org/x/tools/internal/imports/fix.go @@ -13,6 +13,8 @@ import ( "go/build" "go/parser" "go/token" + "go/types" + "io/fs" "io/ioutil" "os" "path" @@ -25,10 +27,12 @@ import ( "unicode" "unicode/utf8" + "golang.org/x/sync/errgroup" "golang.org/x/tools/go/ast/astutil" "golang.org/x/tools/internal/event" "golang.org/x/tools/internal/gocommand" "golang.org/x/tools/internal/gopathwalk" + "golang.org/x/tools/internal/stdlib" ) // importToGroup is a list of functions which map from an import path to @@ -101,19 +105,25 @@ type packageInfo struct { // parseOtherFiles parses all the Go files in srcDir except filename, including // test files if filename looks like a test. -func parseOtherFiles(fset *token.FileSet, srcDir, filename string) []*ast.File { +// +// It returns an error only if ctx is cancelled. Files with parse errors are +// ignored. +func parseOtherFiles(ctx context.Context, fset *token.FileSet, srcDir, filename string) ([]*ast.File, error) { // This could use go/packages but it doesn't buy much, and it fails // with https://golang.org/issue/26296 in LoadFiles mode in some cases. considerTests := strings.HasSuffix(filename, "_test.go") fileBase := filepath.Base(filename) - packageFileInfos, err := ioutil.ReadDir(srcDir) + packageFileInfos, err := os.ReadDir(srcDir) if err != nil { - return nil + return nil, ctx.Err() } var files []*ast.File for _, fi := range packageFileInfos { + if ctx.Err() != nil { + return nil, ctx.Err() + } if fi.Name() == fileBase || !strings.HasSuffix(fi.Name(), ".go") { continue } @@ -121,7 +131,7 @@ func parseOtherFiles(fset *token.FileSet, srcDir, filename string) []*ast.File { continue } - f, err := parser.ParseFile(fset, filepath.Join(srcDir, fi.Name()), nil, 0) + f, err := parser.ParseFile(fset, filepath.Join(srcDir, fi.Name()), nil, parser.SkipObjectResolution) if err != nil { continue } @@ -129,7 +139,7 @@ func parseOtherFiles(fset *token.FileSet, srcDir, filename string) []*ast.File { files = append(files, f) } - return files + return files, ctx.Err() } // addGlobals puts the names of package vars into the provided map. @@ -253,7 +263,7 @@ type pass struct { otherFiles []*ast.File // sibling files. // Intermediate state, generated by load. - existingImports map[string]*ImportInfo + existingImports map[string][]*ImportInfo allRefs references missingRefs references @@ -298,6 +308,20 @@ func (p *pass) loadPackageNames(imports []*ImportInfo) error { return nil } +// if there is a trailing major version, remove it +func withoutVersion(nm string) string { + if v := path.Base(nm); len(v) > 0 && v[0] == 'v' { + if _, err := strconv.Atoi(v[1:]); err == nil { + // this is, for instance, called with rand/v2 and returns rand + if len(v) < len(nm) { + xnm := nm[:len(nm)-len(v)-1] + return path.Base(xnm) + } + } + } + return nm +} + // importIdentifier returns the identifier that imp will introduce. It will // guess if the package name has not been loaded, e.g. because the source // is not available. @@ -307,7 +331,7 @@ func (p *pass) importIdentifier(imp *ImportInfo) string { } known := p.knownPackages[imp.ImportPath] if known != nil && known.name != "" { - return known.name + return withoutVersion(known.name) } return ImportPathToAssumedName(imp.ImportPath) } @@ -318,7 +342,7 @@ func (p *pass) importIdentifier(imp *ImportInfo) string { func (p *pass) load() ([]*ImportFix, bool) { p.knownPackages = map[string]*packageInfo{} p.missingRefs = references{} - p.existingImports = map[string]*ImportInfo{} + p.existingImports = map[string][]*ImportInfo{} // Load basic information about the file in question. p.allRefs = collectReferences(p.f) @@ -342,14 +366,12 @@ func (p *pass) load() ([]*ImportFix, bool) { if p.loadRealPackageNames { err := p.loadPackageNames(append(imports, p.candidates...)) if err != nil { - if p.env.Logf != nil { - p.env.Logf("loading package names: %v", err) - } + p.env.logf("loading package names: %v", err) return nil, false } } for _, imp := range imports { - p.existingImports[p.importIdentifier(imp)] = imp + p.existingImports[p.importIdentifier(imp)] = append(p.existingImports[p.importIdentifier(imp)], imp) } // Find missing references. @@ -388,31 +410,33 @@ func (p *pass) fix() ([]*ImportFix, bool) { // Found everything, or giving up. Add the new imports and remove any unused. var fixes []*ImportFix - for _, imp := range p.existingImports { - // We deliberately ignore globals here, because we can't be sure - // they're in the same package. People do things like put multiple - // main packages in the same directory, and we don't want to - // remove imports if they happen to have the same name as a var in - // a different package. - if _, ok := p.allRefs[p.importIdentifier(imp)]; !ok { - fixes = append(fixes, &ImportFix{ - StmtInfo: *imp, - IdentName: p.importIdentifier(imp), - FixType: DeleteImport, - }) - continue - } + for _, identifierImports := range p.existingImports { + for _, imp := range identifierImports { + // We deliberately ignore globals here, because we can't be sure + // they're in the same package. People do things like put multiple + // main packages in the same directory, and we don't want to + // remove imports if they happen to have the same name as a var in + // a different package. + if _, ok := p.allRefs[p.importIdentifier(imp)]; !ok { + fixes = append(fixes, &ImportFix{ + StmtInfo: *imp, + IdentName: p.importIdentifier(imp), + FixType: DeleteImport, + }) + continue + } - // An existing import may need to update its import name to be correct. - if name := p.importSpecName(imp); name != imp.Name { - fixes = append(fixes, &ImportFix{ - StmtInfo: ImportInfo{ - Name: name, - ImportPath: imp.ImportPath, - }, - IdentName: p.importIdentifier(imp), - FixType: SetImportName, - }) + // An existing import may need to update its import name to be correct. + if name := p.importSpecName(imp); name != imp.Name { + fixes = append(fixes, &ImportFix{ + StmtInfo: ImportInfo{ + Name: name, + ImportPath: imp.ImportPath, + }, + IdentName: p.importIdentifier(imp), + FixType: SetImportName, + }) + } } } // Collecting fixes involved map iteration, so sort for stability. See @@ -507,9 +531,9 @@ func (p *pass) assumeSiblingImportsValid() { } for left, rights := range refs { if imp, ok := importsByName[left]; ok { - if m, ok := stdlib[imp.ImportPath]; ok { + if m, ok := stdlib.PackageSymbols[imp.ImportPath]; ok { // We have the stdlib in memory; no need to guess. - rights = copyExports(m) + rights = symbolNameSet(m) } p.addCandidate(imp, &packageInfo{ // no name; we already know it. @@ -541,6 +565,8 @@ func (p *pass) addCandidate(imp *ImportInfo, pkg *packageInfo) { // // This is declared as a variable rather than a function so goimports can // easily be extended by adding a file with an init function. +// +// DO NOT REMOVE: used internally at Google. var fixImports = fixImportsDefault func fixImportsDefault(fset *token.FileSet, f *ast.File, filename string, env *ProcessEnv) error { @@ -560,9 +586,7 @@ func getFixes(ctx context.Context, fset *token.FileSet, f *ast.File, filename st return nil, err } srcDir := filepath.Dir(abs) - if env.Logf != nil { - env.Logf("fixImports(filename=%q), abs=%q, srcDir=%q ...", filename, abs, srcDir) - } + env.logf("fixImports(filename=%q), abs=%q, srcDir=%q ...", filename, abs, srcDir) // First pass: looking only at f, and using the naive algorithm to // derive package names from import paths, see if the file is already @@ -573,7 +597,10 @@ func getFixes(ctx context.Context, fset *token.FileSet, f *ast.File, filename st return fixes, nil } - otherFiles := parseOtherFiles(fset, srcDir, filename) + otherFiles, err := parseOtherFiles(ctx, fset, srcDir, filename) + if err != nil { + return nil, err + } // Second pass: add information from other files in the same package, // like their package vars and imports. @@ -637,7 +664,7 @@ func getCandidatePkgs(ctx context.Context, wrappedCallback *scanCallback, filena dupCheck := map[string]struct{}{} // Start off with the standard library. - for importPath, exports := range stdlib { + for importPath, symbols := range stdlib.PackageSymbols { p := &pkg{ dir: filepath.Join(goenv["GOROOT"], "src", importPath), importPathShort: importPath, @@ -646,6 +673,13 @@ func getCandidatePkgs(ctx context.Context, wrappedCallback *scanCallback, filena } dupCheck[importPath] = struct{}{} if notSelf(p) && wrappedCallback.dirFound(p) && wrappedCallback.packageNameLoaded(p) { + var exports []stdlib.Symbol + for _, sym := range symbols { + switch sym.Kind { + case stdlib.Func, stdlib.Type, stdlib.Var, stdlib.Const: + exports = append(exports, sym) + } + } wrappedCallback.exportsLoaded(p, exports) } } @@ -666,7 +700,7 @@ func getCandidatePkgs(ctx context.Context, wrappedCallback *scanCallback, filena dupCheck[pkg.importPathShort] = struct{}{} return notSelf(pkg) && wrappedCallback.packageNameLoaded(pkg) }, - exportsLoaded: func(pkg *pkg, exports []string) { + exportsLoaded: func(pkg *pkg, exports []stdlib.Symbol) { // If we're an x_test, load the package under test's test variant. if strings.HasSuffix(filePkg, "_test") && pkg.dir == filepath.Dir(filename) { var err error @@ -697,20 +731,21 @@ func ScoreImportPaths(ctx context.Context, env *ProcessEnv, paths []string) (map return result, nil } -func PrimeCache(ctx context.Context, env *ProcessEnv) error { +func PrimeCache(ctx context.Context, resolver Resolver) error { // Fully scan the disk for directories, but don't actually read any Go files. callback := &scanCallback{ - rootFound: func(gopathwalk.Root) bool { - return true + rootFound: func(root gopathwalk.Root) bool { + // See getCandidatePkgs: walking GOROOT is apparently expensive and + // unnecessary. + return root.Type != gopathwalk.RootGOROOT }, dirFound: func(pkg *pkg) bool { return false }, - packageNameLoaded: func(pkg *pkg) bool { - return false - }, + // packageNameLoaded and exportsLoaded must never be called. } - return getCandidatePkgs(ctx, callback, "", "", env) + + return resolver.scan(ctx, callback) } func candidateImportName(pkg *pkg) string { @@ -790,7 +825,7 @@ func GetImportPaths(ctx context.Context, wrapped func(ImportFix), searchPrefix, // A PackageExport is a package and its exports. type PackageExport struct { Fix *ImportFix - Exports []string + Exports []stdlib.Symbol } // GetPackageExports returns all known packages with name pkg and their exports. @@ -805,8 +840,8 @@ func GetPackageExports(ctx context.Context, wrapped func(PackageExport), searchP packageNameLoaded: func(pkg *pkg) bool { return pkg.packageName == searchPkg }, - exportsLoaded: func(pkg *pkg, exports []string) { - sort.Strings(exports) + exportsLoaded: func(pkg *pkg, exports []stdlib.Symbol) { + sortSymbols(exports) wrapped(PackageExport{ Fix: &ImportFix{ StmtInfo: ImportInfo{ @@ -824,16 +859,45 @@ func GetPackageExports(ctx context.Context, wrapped func(PackageExport), searchP return getCandidatePkgs(ctx, callback, filename, filePkg, env) } -var requiredGoEnvVars = []string{"GO111MODULE", "GOFLAGS", "GOINSECURE", "GOMOD", "GOMODCACHE", "GONOPROXY", "GONOSUMDB", "GOPATH", "GOPROXY", "GOROOT", "GOSUMDB", "GOWORK"} +// TODO(rfindley): we should depend on GOOS and GOARCH, to provide accurate +// imports when doing cross-platform development. +var requiredGoEnvVars = []string{ + "GO111MODULE", + "GOFLAGS", + "GOINSECURE", + "GOMOD", + "GOMODCACHE", + "GONOPROXY", + "GONOSUMDB", + "GOPATH", + "GOPROXY", + "GOROOT", + "GOSUMDB", + "GOWORK", +} // ProcessEnv contains environment variables and settings that affect the use of // the go command, the go/build package, etc. +// +// ...a ProcessEnv *also* overwrites its Env along with derived state in the +// form of the resolver. And because it is lazily initialized, an env may just +// be broken and unusable, but there is no way for the caller to detect that: +// all queries will just fail. +// +// TODO(rfindley): refactor this package so that this type (perhaps renamed to +// just Env or Config) is an immutable configuration struct, to be exchanged +// for an initialized object via a constructor that returns an error. Perhaps +// the signature should be `func NewResolver(*Env) (*Resolver, error)`, where +// resolver is a concrete type used for resolving imports. Via this +// refactoring, we can avoid the need to call ProcessEnv.init and +// ProcessEnv.GoEnv everywhere, and implicitly fix all the places where this +// these are misused. Also, we'd delegate the caller the decision of how to +// handle a broken environment. type ProcessEnv struct { GocmdRunner *gocommand.Runner BuildFlags []string ModFlag string - ModFile string // SkipPathInScan returns true if the path should be skipped from scans of // the RootCurrentModule root type. The function argument is a clean, @@ -843,7 +907,7 @@ type ProcessEnv struct { // Env overrides the OS environment, and can be used to specify // GOPROXY, GO111MODULE, etc. PATH cannot be set here, because // exec.Command will not honor it. - // Specifying all of RequiredGoEnvVars avoids a call to `go env`. + // Specifying all of requiredGoEnvVars avoids a call to `go env`. Env map[string]string WorkingDir string @@ -851,9 +915,17 @@ type ProcessEnv struct { // If Logf is non-nil, debug logging is enabled through this function. Logf func(format string, args ...interface{}) - initialized bool + // If set, ModCache holds a shared cache of directory info to use across + // multiple ProcessEnvs. + ModCache *DirInfoCache + + initialized bool // see TODO above - resolver Resolver + // resolver and resolverErr are lazily evaluated (see GetResolver). + // This is unclean, but see the big TODO in the docstring for ProcessEnv + // above: for now, we can't be sure that the ProcessEnv is fully initialized. + resolver Resolver + resolverErr error } func (e *ProcessEnv) goEnv() (map[string]string, error) { @@ -933,20 +1005,43 @@ func (e *ProcessEnv) env() []string { } func (e *ProcessEnv) GetResolver() (Resolver, error) { - if e.resolver != nil { - return e.resolver, nil - } if err := e.init(); err != nil { return nil, err } - if len(e.Env["GOMOD"]) == 0 && len(e.Env["GOWORK"]) == 0 { - e.resolver = newGopathResolver(e) - return e.resolver, nil + + if e.resolver == nil && e.resolverErr == nil { + // TODO(rfindley): we should only use a gopathResolver here if the working + // directory is actually *in* GOPATH. (I seem to recall an open gopls issue + // for this behavior, but I can't find it). + // + // For gopls, we can optionally explicitly choose a resolver type, since we + // already know the view type. + if len(e.Env["GOMOD"]) == 0 && len(e.Env["GOWORK"]) == 0 { + e.resolver = newGopathResolver(e) + e.logf("created gopath resolver") + } else if r, err := newModuleResolver(e, e.ModCache); err != nil { + e.resolverErr = err + e.logf("failed to create module resolver: %v", err) + } else { + e.resolver = Resolver(r) + e.logf("created module resolver") + } + } + + return e.resolver, e.resolverErr +} + +// logf logs if e.Logf is non-nil. +func (e *ProcessEnv) logf(format string, args ...any) { + if e.Logf != nil { + e.Logf(format, args...) } - e.resolver = newModuleResolver(e) - return e.resolver, nil } +// buildContext returns the build.Context to use for matching files. +// +// TODO(rfindley): support dynamic GOOS, GOARCH here, when doing cross-platform +// development. func (e *ProcessEnv) buildContext() (*build.Context, error) { ctx := build.Default goenv, err := e.goEnv() @@ -996,24 +1091,40 @@ func addStdlibCandidates(pass *pass, refs references) error { if err != nil { return err } + localbase := func(nm string) string { + ans := path.Base(nm) + if ans[0] == 'v' { + // this is called, for instance, with math/rand/v2 and returns rand/v2 + if _, err := strconv.Atoi(ans[1:]); err == nil { + ix := strings.LastIndex(nm, ans) + more := path.Base(nm[:ix]) + ans = path.Join(more, ans) + } + } + return ans + } add := func(pkg string) { // Prevent self-imports. if path.Base(pkg) == pass.f.Name.Name && filepath.Join(goenv["GOROOT"], "src", pkg) == pass.srcDir { return } - exports := copyExports(stdlib[pkg]) + exports := symbolNameSet(stdlib.PackageSymbols[pkg]) pass.addCandidate( &ImportInfo{ImportPath: pkg}, - &packageInfo{name: path.Base(pkg), exports: exports}) + &packageInfo{name: localbase(pkg), exports: exports}) } for left := range refs { if left == "rand" { - // Make sure we try crypto/rand before math/rand. + // Make sure we try crypto/rand before any version of math/rand as both have Int() + // and our policy is to recommend crypto add("crypto/rand") - add("math/rand") + // if the user's no later than go1.21, this should be "math/rand" + // but we have no way of figuring out what the user is using + // TODO: investigate using the toolchain version to disambiguate in the stdlib + add("math/rand/v2") continue } - for importPath := range stdlib { + for importPath := range stdlib.PackageSymbols { if path.Base(importPath) == left { add(importPath) } @@ -1026,15 +1137,23 @@ func addStdlibCandidates(pass *pass, refs references) error { type Resolver interface { // loadPackageNames loads the package names in importPaths. loadPackageNames(importPaths []string, srcDir string) (map[string]string, error) + // scan works with callback to search for packages. See scanCallback for details. scan(ctx context.Context, callback *scanCallback) error - // loadExports returns the set of exported symbols in the package at dir. - // loadExports may be called concurrently. - loadExports(ctx context.Context, pkg *pkg, includeTest bool) (string, []string, error) + + // loadExports returns the package name and set of exported symbols in the + // package at dir. loadExports may be called concurrently. + loadExports(ctx context.Context, pkg *pkg, includeTest bool) (string, []stdlib.Symbol, error) + // scoreImportPath returns the relevance for an import path. scoreImportPath(ctx context.Context, path string) float64 - ClearForNewScan() + // ClearForNewScan returns a new Resolver based on the receiver that has + // cleared its internal caches of directory contents. + // + // The new resolver should be primed and then set via + // [ProcessEnv.UpdateResolver]. + ClearForNewScan() Resolver } // A scanCallback controls a call to scan and receives its results. @@ -1053,7 +1172,7 @@ type scanCallback struct { // If it returns true, the package's exports will be loaded. packageNameLoaded func(pkg *pkg) bool // exportsLoaded is called when a package's exports have been loaded. - exportsLoaded func(pkg *pkg, exports []string) + exportsLoaded func(pkg *pkg, exports []stdlib.Symbol) } func addExternalCandidates(ctx context.Context, pass *pass, refs references, filename string) error { @@ -1091,7 +1210,7 @@ func addExternalCandidates(ctx context.Context, pass *pass, refs references, fil if err != nil { return err } - if err = resolver.scan(context.Background(), callback); err != nil { + if err = resolver.scan(ctx, callback); err != nil { return err } @@ -1100,57 +1219,66 @@ func addExternalCandidates(ctx context.Context, pass *pass, refs references, fil imp *ImportInfo pkg *packageInfo } - results := make(chan result, len(refs)) + results := make([]*result, len(refs)) - ctx, cancel := context.WithCancel(context.TODO()) - var wg sync.WaitGroup - defer func() { - cancel() - wg.Wait() - }() - var ( - firstErr error - firstErrOnce sync.Once - ) - for pkgName, symbols := range refs { - wg.Add(1) - go func(pkgName string, symbols map[string]bool) { - defer wg.Done() + g, ctx := errgroup.WithContext(ctx) + + searcher := symbolSearcher{ + logf: pass.env.logf, + srcDir: pass.srcDir, + xtest: strings.HasSuffix(pass.f.Name.Name, "_test"), + loadExports: resolver.loadExports, + } - found, err := findImport(ctx, pass, found[pkgName], pkgName, symbols, filename) + i := 0 + for pkgName, symbols := range refs { + index := i // claim an index in results + i++ + pkgName := pkgName + symbols := symbols + g.Go(func() error { + found, err := searcher.search(ctx, found[pkgName], pkgName, symbols) if err != nil { - firstErrOnce.Do(func() { - firstErr = err - cancel() - }) - return + return err } - if found == nil { - return // No matching package. + return nil // No matching package. } imp := &ImportInfo{ ImportPath: found.importPathShort, } - pkg := &packageInfo{ name: pkgName, exports: symbols, } - results <- result{imp, pkg} - }(pkgName, symbols) + results[index] = &result{imp, pkg} + return nil + }) + } + if err := g.Wait(); err != nil { + return err } - go func() { - wg.Wait() - close(results) - }() - for result := range results { + for _, result := range results { + if result == nil { + continue + } + // Don't offer completions that would shadow predeclared + // names, such as github.com/coreos/etcd/error. + if types.Universe.Lookup(result.pkg.name) != nil { // predeclared + // Ideally we would skip this candidate only + // if the predeclared name is actually + // referenced by the file, but that's a lot + // trickier to compute and would still create + // an import that is likely to surprise the + // user before long. + continue + } pass.addCandidate(result.imp, result.pkg) } - return firstErr + return nil } // notIdentifier reports whether ch is an invalid identifier character. @@ -1190,31 +1318,22 @@ func ImportPathToAssumedName(importPath string) string { type gopathResolver struct { env *ProcessEnv walked bool - cache *dirInfoCache + cache *DirInfoCache scanSema chan struct{} // scanSema prevents concurrent scans. } func newGopathResolver(env *ProcessEnv) *gopathResolver { r := &gopathResolver{ - env: env, - cache: &dirInfoCache{ - dirs: map[string]*directoryPackageInfo{}, - listeners: map[*int]cacheListener{}, - }, + env: env, + cache: NewDirInfoCache(), scanSema: make(chan struct{}, 1), } r.scanSema <- struct{}{} return r } -func (r *gopathResolver) ClearForNewScan() { - <-r.scanSema - r.cache = &dirInfoCache{ - dirs: map[string]*directoryPackageInfo{}, - listeners: map[*int]cacheListener{}, - } - r.walked = false - r.scanSema <- struct{}{} +func (r *gopathResolver) ClearForNewScan() Resolver { + return newGopathResolver(r.env) } func (r *gopathResolver) loadPackageNames(importPaths []string, srcDir string) (map[string]string, error) { @@ -1232,7 +1351,7 @@ func (r *gopathResolver) loadPackageNames(importPaths []string, srcDir string) ( // importPathToName finds out the actual package name, as declared in its .go files. func importPathToName(bctx *build.Context, importPath, srcDir string) string { // Fast path for standard library without going to disk. - if _, ok := stdlib[importPath]; ok { + if stdlib.HasPackage(importPath) { return path.Base(importPath) // stdlib packages always match their paths. } @@ -1430,7 +1549,7 @@ func (r *gopathResolver) scan(ctx context.Context, callback *scanCallback) error } func (r *gopathResolver) scoreImportPath(ctx context.Context, path string) float64 { - if _, ok := stdlib[path]; ok { + if stdlib.HasPackage(path) { return MaxRelevance } return MaxRelevance - 1 @@ -1447,7 +1566,7 @@ func filterRoots(roots []gopathwalk.Root, include func(gopathwalk.Root) bool) [] return result } -func (r *gopathResolver) loadExports(ctx context.Context, pkg *pkg, includeTest bool) (string, []string, error) { +func (r *gopathResolver) loadExports(ctx context.Context, pkg *pkg, includeTest bool) (string, []stdlib.Symbol, error) { if info, ok := r.cache.Load(pkg.dir); ok && !includeTest { return r.cache.CacheExports(ctx, r.env, info) } @@ -1467,13 +1586,13 @@ func VendorlessPath(ipath string) string { return ipath } -func loadExportsFromFiles(ctx context.Context, env *ProcessEnv, dir string, includeTest bool) (string, []string, error) { +func loadExportsFromFiles(ctx context.Context, env *ProcessEnv, dir string, includeTest bool) (string, []stdlib.Symbol, error) { // Look for non-test, buildable .go files which could provide exports. - all, err := ioutil.ReadDir(dir) + all, err := os.ReadDir(dir) if err != nil { return "", nil, err } - var files []os.FileInfo + var files []fs.DirEntry for _, fi := range all { name := fi.Name() if !strings.HasSuffix(name, ".go") || (!includeTest && strings.HasSuffix(name, "_test.go")) { @@ -1491,7 +1610,7 @@ func loadExportsFromFiles(ctx context.Context, env *ProcessEnv, dir string, incl } var pkgName string - var exports []string + var exports []stdlib.Symbol fset := token.NewFileSet() for _, fi := range files { select { @@ -1501,11 +1620,10 @@ func loadExportsFromFiles(ctx context.Context, env *ProcessEnv, dir string, incl } fullFile := filepath.Join(dir, fi.Name()) + // Legacy ast.Object resolution is needed here. f, err := parser.ParseFile(fset, fullFile, nil, 0) if err != nil { - if env.Logf != nil { - env.Logf("error parsing %v: %v", fullFile, err) - } + env.logf("error parsing %v: %v", fullFile, err) continue } if f.Name.Name == "documentation" { @@ -1518,40 +1636,72 @@ func loadExportsFromFiles(ctx context.Context, env *ProcessEnv, dir string, incl continue } pkgName = f.Name.Name - for name := range f.Scope.Objects { + for name, obj := range f.Scope.Objects { if ast.IsExported(name) { - exports = append(exports, name) + var kind stdlib.Kind + switch obj.Kind { + case ast.Con: + kind = stdlib.Const + case ast.Typ: + kind = stdlib.Type + case ast.Var: + kind = stdlib.Var + case ast.Fun: + kind = stdlib.Func + } + exports = append(exports, stdlib.Symbol{ + Name: name, + Kind: kind, + Version: 0, // unknown; be permissive + }) } } } + sortSymbols(exports) - if env.Logf != nil { - sortedExports := append([]string(nil), exports...) - sort.Strings(sortedExports) - env.Logf("loaded exports in dir %v (package %v): %v", dir, pkgName, strings.Join(sortedExports, ", ")) - } + env.logf("loaded exports in dir %v (package %v): %v", dir, pkgName, exports) return pkgName, exports, nil } -// findImport searches for a package with the given symbols. -// If no package is found, findImport returns ("", false, nil) -func findImport(ctx context.Context, pass *pass, candidates []pkgDistance, pkgName string, symbols map[string]bool, filename string) (*pkg, error) { +func sortSymbols(syms []stdlib.Symbol) { + sort.Slice(syms, func(i, j int) bool { + return syms[i].Name < syms[j].Name + }) +} + +// A symbolSearcher searches for a package with a set of symbols, among a set +// of candidates. See [symbolSearcher.search]. +// +// The search occurs within the scope of a single file, with context captured +// in srcDir and xtest. +type symbolSearcher struct { + logf func(string, ...any) + srcDir string // directory containing the file + xtest bool // if set, the file containing is an x_test file + loadExports func(ctx context.Context, pkg *pkg, includeTest bool) (string, []stdlib.Symbol, error) +} + +// search searches the provided candidates for a package containing all +// exported symbols. +// +// If successful, returns the resulting package. +func (s *symbolSearcher) search(ctx context.Context, candidates []pkgDistance, pkgName string, symbols map[string]bool) (*pkg, error) { // Sort the candidates by their import package length, // assuming that shorter package names are better than long // ones. Note that this sorts by the de-vendored name, so // there's no "penalty" for vendoring. sort.Sort(byDistanceOrImportPathShortLength(candidates)) - if pass.env.Logf != nil { + if s.logf != nil { for i, c := range candidates { - pass.env.Logf("%s candidate %d/%d: %v in %v", pkgName, i+1, len(candidates), c.pkg.importPathShort, c.pkg.dir) + s.logf("%s candidate %d/%d: %v in %v", pkgName, i+1, len(candidates), c.pkg.importPathShort, c.pkg.dir) } } - resolver, err := pass.env.GetResolver() - if err != nil { - return nil, err - } - // Collect exports for packages with matching names. + // Arrange rescv so that we can we can await results in order of relevance + // and exit as soon as we find the first match. + // + // Search with bounded concurrency, returning as soon as the first result + // among rescv is non-nil. rescv := make([]chan *pkg, len(candidates)) for i := range candidates { rescv[i] = make(chan *pkg, 1) @@ -1559,6 +1709,7 @@ func findImport(ctx context.Context, pass *pass, candidates []pkgDistance, pkgNa const maxConcurrentPackageImport = 4 loadExportsSem := make(chan struct{}, maxConcurrentPackageImport) + // Ensure that all work is completed at exit. ctx, cancel := context.WithCancel(ctx) var wg sync.WaitGroup defer func() { @@ -1566,6 +1717,7 @@ func findImport(ctx context.Context, pass *pass, candidates []pkgDistance, pkgNa wg.Wait() }() + // Start the search. wg.Add(1) go func() { defer wg.Done() @@ -1576,55 +1728,67 @@ func findImport(ctx context.Context, pass *pass, candidates []pkgDistance, pkgNa return } + i := i + c := c wg.Add(1) - go func(c pkgDistance, resc chan<- *pkg) { + go func() { defer func() { <-loadExportsSem wg.Done() }() - - if pass.env.Logf != nil { - pass.env.Logf("loading exports in dir %s (seeking package %s)", c.pkg.dir, pkgName) + if s.logf != nil { + s.logf("loading exports in dir %s (seeking package %s)", c.pkg.dir, pkgName) } - // If we're an x_test, load the package under test's test variant. - includeTest := strings.HasSuffix(pass.f.Name.Name, "_test") && c.pkg.dir == pass.srcDir - _, exports, err := resolver.loadExports(ctx, c.pkg, includeTest) + pkg, err := s.searchOne(ctx, c, symbols) if err != nil { - if pass.env.Logf != nil { - pass.env.Logf("loading exports in dir %s (seeking package %s): %v", c.pkg.dir, pkgName, err) - } - resc <- nil - return - } - - exportsMap := make(map[string]bool, len(exports)) - for _, sym := range exports { - exportsMap[sym] = true - } - - // If it doesn't have the right - // symbols, send nil to mean no match. - for symbol := range symbols { - if !exportsMap[symbol] { - resc <- nil - return + if s.logf != nil && ctx.Err() == nil { + s.logf("loading exports in dir %s (seeking package %s): %v", c.pkg.dir, pkgName, err) } + pkg = nil } - resc <- c.pkg - }(c, rescv[i]) + rescv[i] <- pkg // may be nil + }() } }() + // Await the first (best) result. for _, resc := range rescv { - pkg := <-resc - if pkg == nil { - continue + select { + case r := <-resc: + if r != nil { + return r, nil + } + case <-ctx.Done(): + return nil, ctx.Err() } - return pkg, nil } return nil, nil } +func (s *symbolSearcher) searchOne(ctx context.Context, c pkgDistance, symbols map[string]bool) (*pkg, error) { + if ctx.Err() != nil { + return nil, ctx.Err() + } + // If we're considering the package under test from an x_test, load the + // test variant. + includeTest := s.xtest && c.pkg.dir == s.srcDir + _, exports, err := s.loadExports(ctx, c.pkg, includeTest) + if err != nil { + return nil, err + } + + exportsMap := make(map[string]bool, len(exports)) + for _, sym := range exports { + exportsMap[sym.Name] = true + } + for symbol := range symbols { + if !exportsMap[symbol] { + return nil, nil // no match + } + } + return c.pkg, nil +} + // pkgIsCandidate reports whether pkg is a candidate for satisfying the // finding which package pkgIdent in the file named by filename is trying // to refer to. @@ -1644,58 +1808,24 @@ func pkgIsCandidate(filename string, refs references, pkg *pkg) bool { } // Speed optimization to minimize disk I/O: - // the last two components on disk must contain the - // package name somewhere. // - // This permits mismatch naming like directory - // "go-foo" being package "foo", or "pkg.v3" being "pkg", - // or directory "google.golang.org/api/cloudbilling/v1" - // being package "cloudbilling", but doesn't - // permit a directory "foo" to be package - // "bar", which is strongly discouraged - // anyway. There's no reason goimports needs - // to be slow just to accommodate that. + // Use the matchesPath heuristic to filter to package paths that could + // reasonably match a dangling reference. + // + // This permits mismatch naming like directory "go-foo" being package "foo", + // or "pkg.v3" being "pkg", or directory + // "google.golang.org/api/cloudbilling/v1" being package "cloudbilling", but + // doesn't permit a directory "foo" to be package "bar", which is strongly + // discouraged anyway. There's no reason goimports needs to be slow just to + // accommodate that. for pkgIdent := range refs { - lastTwo := lastTwoComponents(pkg.importPathShort) - if strings.Contains(lastTwo, pkgIdent) { + if matchesPath(pkgIdent, pkg.importPathShort) { return true } - if hasHyphenOrUpperASCII(lastTwo) && !hasHyphenOrUpperASCII(pkgIdent) { - lastTwo = lowerASCIIAndRemoveHyphen(lastTwo) - if strings.Contains(lastTwo, pkgIdent) { - return true - } - } } return false } -func hasHyphenOrUpperASCII(s string) bool { - for i := 0; i < len(s); i++ { - b := s[i] - if b == '-' || ('A' <= b && b <= 'Z') { - return true - } - } - return false -} - -func lowerASCIIAndRemoveHyphen(s string) (ret string) { - buf := make([]byte, 0, len(s)) - for i := 0; i < len(s); i++ { - b := s[i] - switch { - case b == '-': - continue - case 'A' <= b && b <= 'Z': - buf = append(buf, b+('a'-'A')) - default: - buf = append(buf, b) - } - } - return string(buf) -} - // canUse reports whether the package in dir is usable from filename, // respecting the Go "internal" and "vendor" visibility rules. func canUse(filename, dir string) bool { @@ -1736,19 +1866,84 @@ func canUse(filename, dir string) bool { return !strings.Contains(relSlash, "/vendor/") && !strings.Contains(relSlash, "/internal/") && !strings.HasSuffix(relSlash, "/internal") } -// lastTwoComponents returns at most the last two path components -// of v, using either / or \ as the path separator. -func lastTwoComponents(v string) string { +// matchesPath reports whether ident may match a potential package name +// referred to by path, using heuristics to filter out unidiomatic package +// names. +// +// Specifically, it checks whether either of the last two '/'- or '\'-delimited +// path segments matches the identifier. The segment-matching heuristic must +// allow for various conventions around segment naming, including go-foo, +// foo-go, and foo.v3. To handle all of these, matching considers both (1) the +// entire segment, ignoring '-' and '.', as well as (2) the last subsegment +// separated by '-' or '.'. So the segment foo-go matches all of the following +// identifiers: foo, go, and foogo. All matches are case insensitive (for ASCII +// identifiers). +// +// See the docstring for [pkgIsCandidate] for an explanation of how this +// heuristic filters potential candidate packages. +func matchesPath(ident, path string) bool { + // Ignore case, for ASCII. + lowerIfASCII := func(b byte) byte { + if 'A' <= b && b <= 'Z' { + return b + ('a' - 'A') + } + return b + } + + // match reports whether path[start:end] matches ident, ignoring [.-]. + match := func(start, end int) bool { + ii := len(ident) - 1 // current byte in ident + pi := end - 1 // current byte in path + for ; pi >= start && ii >= 0; pi-- { + pb := path[pi] + if pb == '-' || pb == '.' { + continue + } + pb = lowerIfASCII(pb) + ib := lowerIfASCII(ident[ii]) + if pb != ib { + return false + } + ii-- + } + return ii < 0 && pi < start // all bytes matched + } + + // segmentEnd and subsegmentEnd hold the end points of the current segment + // and subsegment intervals. + segmentEnd := len(path) + subsegmentEnd := len(path) + + // Count slashes; we only care about the last two segments. nslash := 0 - for i := len(v) - 1; i >= 0; i-- { - if v[i] == '/' || v[i] == '\\' { + + for i := len(path) - 1; i >= 0; i-- { + switch b := path[i]; b { + // TODO(rfindley): we handle backlashes here only because the previous + // heuristic handled backslashes. This is perhaps overly defensive, but is + // the result of many lessons regarding Chesterton's fence and the + // goimports codebase. + // + // However, this function is only ever called with something called an + // 'importPath'. Is it possible that this is a real import path, and + // therefore we need only consider forward slashes? + case '/', '\\': + if match(i+1, segmentEnd) || match(i+1, subsegmentEnd) { + return true + } nslash++ if nslash == 2 { - return v[i:] + return false // did not match above + } + segmentEnd, subsegmentEnd = i, i // reset + case '-', '.': + if match(i+1, subsegmentEnd) { + return true } + subsegmentEnd = i } } - return v + return match(0, segmentEnd) || match(0, subsegmentEnd) } type visitFn func(node ast.Node) ast.Visitor @@ -1757,10 +1952,13 @@ func (fn visitFn) Visit(node ast.Node) ast.Visitor { return fn(node) } -func copyExports(pkg []string) map[string]bool { - m := make(map[string]bool, len(pkg)) - for _, v := range pkg { - m[v] = true +func symbolNameSet(symbols []stdlib.Symbol) map[string]bool { + names := make(map[string]bool) + for _, sym := range symbols { + switch sym.Kind { + case stdlib.Const, stdlib.Var, stdlib.Type, stdlib.Func: + names[sym.Name] = true + } } - return m + return names } diff --git a/vendor/golang.org/x/tools/internal/imports/imports.go b/vendor/golang.org/x/tools/internal/imports/imports.go index 58e637b9..ff6b59a5 100644 --- a/vendor/golang.org/x/tools/internal/imports/imports.go +++ b/vendor/golang.org/x/tools/internal/imports/imports.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:generate go run mkstdlib.go - // Package imports implements a Go pretty-printer (like package "go/format") // that also adds or removes import statements as necessary. package imports @@ -88,7 +86,7 @@ func ApplyFixes(fixes []*ImportFix, filename string, src []byte, opt *Options, e // Don't use parse() -- we don't care about fragments or statement lists // here, and we need to work with unparseable files. fileSet := token.NewFileSet() - parserMode := parser.Mode(0) + parserMode := parser.SkipObjectResolution if opt.Comments { parserMode |= parser.ParseComments } @@ -109,7 +107,7 @@ func ApplyFixes(fixes []*ImportFix, filename string, src []byte, opt *Options, e } // formatFile formats the file syntax tree. -// It may mutate the token.FileSet. +// It may mutate the token.FileSet and the ast.File. // // If an adjust function is provided, it is called after formatting // with the original source (formatFile's src parameter) and the @@ -167,7 +165,7 @@ func formatFile(fset *token.FileSet, file *ast.File, src []byte, adjust func(ori // parse parses src, which was read from filename, // as a Go source file or statement list. func parse(fset *token.FileSet, filename string, src []byte, opt *Options) (*ast.File, func(orig, src []byte) []byte, error) { - parserMode := parser.Mode(0) + var parserMode parser.Mode // legacy ast.Object resolution is required here if opt.Comments { parserMode |= parser.ParseComments } @@ -236,7 +234,7 @@ func parse(fset *token.FileSet, filename string, src []byte, opt *Options) (*ast src = src[:len(src)-len("}\n")] // Gofmt has also indented the function body one level. // Remove that indent. - src = bytes.Replace(src, []byte("\n\t"), []byte("\n"), -1) + src = bytes.ReplaceAll(src, []byte("\n\t"), []byte("\n")) return matchSpace(orig, src) } return file, adjust, nil diff --git a/vendor/golang.org/x/tools/internal/imports/mod.go b/vendor/golang.org/x/tools/internal/imports/mod.go index 977d2389..8555e3f8 100644 --- a/vendor/golang.org/x/tools/internal/imports/mod.go +++ b/vendor/golang.org/x/tools/internal/imports/mod.go @@ -9,7 +9,6 @@ import ( "context" "encoding/json" "fmt" - "io/ioutil" "os" "path" "path/filepath" @@ -22,78 +21,138 @@ import ( "golang.org/x/tools/internal/event" "golang.org/x/tools/internal/gocommand" "golang.org/x/tools/internal/gopathwalk" + "golang.org/x/tools/internal/stdlib" ) -// ModuleResolver implements resolver for modules using the go command as little -// as feasible. +// Notes(rfindley): ModuleResolver appears to be heavily optimized for scanning +// as fast as possible, which is desirable for a call to goimports from the +// command line, but it doesn't work as well for gopls, where it suffers from +// slow startup (golang/go#44863) and intermittent hanging (golang/go#59216), +// both caused by populating the cache, albeit in slightly different ways. +// +// A high level list of TODOs: +// - Optimize the scan itself, as there is some redundancy statting and +// reading go.mod files. +// - Invert the relationship between ProcessEnv and Resolver (see the +// docstring of ProcessEnv). +// - Make it easier to use an external resolver implementation. +// +// Smaller TODOs are annotated in the code below. + +// ModuleResolver implements the Resolver interface for a workspace using +// modules. +// +// A goal of the ModuleResolver is to invoke the Go command as little as +// possible. To this end, it runs the Go command only for listing module +// information (i.e. `go list -m -e -json ...`). Package scanning, the process +// of loading package information for the modules, is implemented internally +// via the scan method. +// +// It has two types of state: the state derived from the go command, which +// is populated by init, and the state derived from scans, which is populated +// via scan. A root is considered scanned if it has been walked to discover +// directories. However, if the scan did not require additional information +// from the directory (such as package name or exports), the directory +// information itself may be partially populated. It will be lazily filled in +// as needed by scans, using the scanCallback. type ModuleResolver struct { - env *ProcessEnv - moduleCacheDir string - dummyVendorMod *gocommand.ModuleJSON // If vendoring is enabled, the pseudo-module that represents the /vendor directory. - roots []gopathwalk.Root - scanSema chan struct{} // scanSema prevents concurrent scans and guards scannedRoots. - scannedRoots map[gopathwalk.Root]bool - - initialized bool - mains []*gocommand.ModuleJSON - mainByDir map[string]*gocommand.ModuleJSON - modsByModPath []*gocommand.ModuleJSON // All modules, ordered by # of path components in module Path... - modsByDir []*gocommand.ModuleJSON // ...or number of path components in their Dir. - - // moduleCacheCache stores information about the module cache. - moduleCacheCache *dirInfoCache - otherCache *dirInfoCache + env *ProcessEnv + + // Module state, populated during construction + dummyVendorMod *gocommand.ModuleJSON // if vendoring is enabled, a pseudo-module to represent the /vendor directory + moduleCacheDir string // GOMODCACHE, inferred from GOPATH if unset + roots []gopathwalk.Root // roots to scan, in approximate order of importance + mains []*gocommand.ModuleJSON // main modules + mainByDir map[string]*gocommand.ModuleJSON // module information by dir, to join with roots + modsByModPath []*gocommand.ModuleJSON // all modules, ordered by # of path components in their module path + modsByDir []*gocommand.ModuleJSON // ...or by the number of path components in their Dir. + + // Scanning state, populated by scan + + // scanSema prevents concurrent scans, and guards scannedRoots and the cache + // fields below (though the caches themselves are concurrency safe). + // Receive to acquire, send to release. + scanSema chan struct{} + scannedRoots map[gopathwalk.Root]bool // if true, root has been walked + + // Caches of directory info, populated by scans and scan callbacks + // + // moduleCacheCache stores cached information about roots in the module + // cache, which are immutable and therefore do not need to be invalidated. + // + // otherCache stores information about all other roots (even GOROOT), which + // may change. + moduleCacheCache *DirInfoCache + otherCache *DirInfoCache } -func newModuleResolver(e *ProcessEnv) *ModuleResolver { +// newModuleResolver returns a new module-aware goimports resolver. +// +// Note: use caution when modifying this constructor: changes must also be +// reflected in ModuleResolver.ClearForNewScan. +func newModuleResolver(e *ProcessEnv, moduleCacheCache *DirInfoCache) (*ModuleResolver, error) { r := &ModuleResolver{ env: e, scanSema: make(chan struct{}, 1), } - r.scanSema <- struct{}{} - return r -} - -func (r *ModuleResolver) init() error { - if r.initialized { - return nil - } + r.scanSema <- struct{}{} // release goenv, err := r.env.goEnv() if err != nil { - return err + return nil, err } + + // TODO(rfindley): can we refactor to share logic with r.env.invokeGo? inv := gocommand.Invocation{ BuildFlags: r.env.BuildFlags, ModFlag: r.env.ModFlag, - ModFile: r.env.ModFile, Env: r.env.env(), Logf: r.env.Logf, WorkingDir: r.env.WorkingDir, } vendorEnabled := false - var mainModVendor *gocommand.ModuleJSON - - // Module vendor directories are ignored in workspace mode: - // https://go.googlesource.com/proposal/+/master/design/45713-workspace.md - if len(r.env.Env["GOWORK"]) == 0 { + var mainModVendor *gocommand.ModuleJSON // for module vendoring + var mainModsVendor []*gocommand.ModuleJSON // for workspace vendoring + + goWork := r.env.Env["GOWORK"] + if len(goWork) == 0 { + // TODO(rfindley): VendorEnabled runs the go command to get GOFLAGS, but + // they should be available from the ProcessEnv. Can we avoid the redundant + // invocation? vendorEnabled, mainModVendor, err = gocommand.VendorEnabled(context.TODO(), inv, r.env.GocmdRunner) if err != nil { - return err + return nil, err + } + } else { + vendorEnabled, mainModsVendor, err = gocommand.WorkspaceVendorEnabled(context.Background(), inv, r.env.GocmdRunner) + if err != nil { + return nil, err } } - if mainModVendor != nil && vendorEnabled { - // Vendor mode is on, so all the non-Main modules are irrelevant, - // and we need to search /vendor for everything. - r.mains = []*gocommand.ModuleJSON{mainModVendor} - r.dummyVendorMod = &gocommand.ModuleJSON{ - Path: "", - Dir: filepath.Join(mainModVendor.Dir, "vendor"), + if vendorEnabled { + if mainModVendor != nil { + // Module vendor mode is on, so all the non-Main modules are irrelevant, + // and we need to search /vendor for everything. + r.mains = []*gocommand.ModuleJSON{mainModVendor} + r.dummyVendorMod = &gocommand.ModuleJSON{ + Path: "", + Dir: filepath.Join(mainModVendor.Dir, "vendor"), + } + r.modsByModPath = []*gocommand.ModuleJSON{mainModVendor, r.dummyVendorMod} + r.modsByDir = []*gocommand.ModuleJSON{mainModVendor, r.dummyVendorMod} + } else { + // Workspace vendor mode is on, so all the non-Main modules are irrelevant, + // and we need to search /vendor for everything. + r.mains = mainModsVendor + r.dummyVendorMod = &gocommand.ModuleJSON{ + Path: "", + Dir: filepath.Join(filepath.Dir(goWork), "vendor"), + } + r.modsByModPath = append(append([]*gocommand.ModuleJSON{}, mainModsVendor...), r.dummyVendorMod) + r.modsByDir = append(append([]*gocommand.ModuleJSON{}, mainModsVendor...), r.dummyVendorMod) } - r.modsByModPath = []*gocommand.ModuleJSON{mainModVendor, r.dummyVendorMod} - r.modsByDir = []*gocommand.ModuleJSON{mainModVendor, r.dummyVendorMod} } else { // Vendor mode is off, so run go list -m ... to find everything. err := r.initAllMods() @@ -101,19 +160,14 @@ func (r *ModuleResolver) init() error { // GO111MODULE=on. Other errors are fatal. if err != nil { if errMsg := err.Error(); !strings.Contains(errMsg, "working directory is not part of a module") && !strings.Contains(errMsg, "go.mod file not found") { - return err + return nil, err } } } - if gmc := r.env.Env["GOMODCACHE"]; gmc != "" { - r.moduleCacheDir = gmc - } else { - gopaths := filepath.SplitList(goenv["GOPATH"]) - if len(gopaths) == 0 { - return fmt.Errorf("empty GOPATH") - } - r.moduleCacheDir = filepath.Join(gopaths[0], "/pkg/mod") + r.moduleCacheDir = gomodcacheForEnv(goenv) + if r.moduleCacheDir == "" { + return nil, fmt.Errorf("cannot resolve GOMODCACHE") } sort.Slice(r.modsByModPath, func(i, j int) bool { @@ -129,8 +183,9 @@ func (r *ModuleResolver) init() error { return count(j) < count(i) // descending order }) - r.roots = []gopathwalk.Root{ - {Path: filepath.Join(goenv["GOROOT"], "/src"), Type: gopathwalk.RootGOROOT}, + r.roots = []gopathwalk.Root{} + if goenv["GOROOT"] != "" { // "" happens in tests + r.roots = append(r.roots, gopathwalk.Root{Path: filepath.Join(goenv["GOROOT"], "/src"), Type: gopathwalk.RootGOROOT}) } r.mainByDir = make(map[string]*gocommand.ModuleJSON) for _, main := range r.mains { @@ -142,7 +197,11 @@ func (r *ModuleResolver) init() error { } else { addDep := func(mod *gocommand.ModuleJSON) { if mod.Replace == nil { - // This is redundant with the cache, but we'll skip it cheaply enough. + // This is redundant with the cache, but we'll skip it cheaply enough + // when we encounter it in the module cache scan. + // + // Including it at a lower index in r.roots than the module cache dir + // helps prioritize matches from within existing dependencies. r.roots = append(r.roots, gopathwalk.Root{Path: mod.Dir, Type: gopathwalk.RootModuleCache}) } else { r.roots = append(r.roots, gopathwalk.Root{Path: mod.Dir, Type: gopathwalk.RootOther}) @@ -159,24 +218,43 @@ func (r *ModuleResolver) init() error { addDep(mod) } } + // If provided, share the moduleCacheCache. + // + // TODO(rfindley): The module cache is immutable. However, the loaded + // exports do depend on GOOS and GOARCH. Fortunately, the + // ProcessEnv.buildContext does not adjust these from build.DefaultContext + // (even though it should). So for now, this is OK to share, but we need to + // add logic for handling GOOS/GOARCH. + r.moduleCacheCache = moduleCacheCache r.roots = append(r.roots, gopathwalk.Root{Path: r.moduleCacheDir, Type: gopathwalk.RootModuleCache}) } r.scannedRoots = map[gopathwalk.Root]bool{} if r.moduleCacheCache == nil { - r.moduleCacheCache = &dirInfoCache{ - dirs: map[string]*directoryPackageInfo{}, - listeners: map[*int]cacheListener{}, - } + r.moduleCacheCache = NewDirInfoCache() } - if r.otherCache == nil { - r.otherCache = &dirInfoCache{ - dirs: map[string]*directoryPackageInfo{}, - listeners: map[*int]cacheListener{}, - } - } - r.initialized = true - return nil + r.otherCache = NewDirInfoCache() + return r, nil +} + +// gomodcacheForEnv returns the GOMODCACHE value to use based on the given env +// map, which must have GOMODCACHE and GOPATH populated. +// +// TODO(rfindley): this is defensive refactoring. +// 1. Is this even relevant anymore? Can't we just read GOMODCACHE. +// 2. Use this to separate module cache scanning from other scanning. +func gomodcacheForEnv(goenv map[string]string) string { + if gmc := goenv["GOMODCACHE"]; gmc != "" { + // golang/go#67156: ensure that the module cache is clean, since it is + // assumed as a prefix to directories scanned by gopathwalk, which are + // themselves clean. + return filepath.Clean(gmc) + } + gopaths := filepath.SplitList(goenv["GOPATH"]) + if len(gopaths) == 0 { + return "" + } + return filepath.Join(gopaths[0], "/pkg/mod") } func (r *ModuleResolver) initAllMods() error { @@ -190,9 +268,7 @@ func (r *ModuleResolver) initAllMods() error { return err } if mod.Dir == "" { - if r.env.Logf != nil { - r.env.Logf("module %v has not been downloaded and will be ignored", mod.Path) - } + r.env.logf("module %v has not been downloaded and will be ignored", mod.Path) // Can't do anything with a module that's not downloaded. continue } @@ -207,30 +283,86 @@ func (r *ModuleResolver) initAllMods() error { return nil } -func (r *ModuleResolver) ClearForNewScan() { - <-r.scanSema - r.scannedRoots = map[gopathwalk.Root]bool{} - r.otherCache = &dirInfoCache{ - dirs: map[string]*directoryPackageInfo{}, - listeners: map[*int]cacheListener{}, +// ClearForNewScan invalidates the last scan. +// +// It preserves the set of roots, but forgets about the set of directories. +// Though it forgets the set of module cache directories, it remembers their +// contents, since they are assumed to be immutable. +func (r *ModuleResolver) ClearForNewScan() Resolver { + <-r.scanSema // acquire r, to guard scannedRoots + r2 := &ModuleResolver{ + env: r.env, + dummyVendorMod: r.dummyVendorMod, + moduleCacheDir: r.moduleCacheDir, + roots: r.roots, + mains: r.mains, + mainByDir: r.mainByDir, + modsByModPath: r.modsByModPath, + + scanSema: make(chan struct{}, 1), + scannedRoots: make(map[gopathwalk.Root]bool), + otherCache: NewDirInfoCache(), + moduleCacheCache: r.moduleCacheCache, + } + r2.scanSema <- struct{}{} // r2 must start released + // Invalidate root scans. We don't need to invalidate module cache roots, + // because they are immutable. + // (We don't support a use case where GOMODCACHE is cleaned in the middle of + // e.g. a gopls session: the user must restart gopls to get accurate + // imports.) + // + // Scanning for new directories in GOMODCACHE should be handled elsewhere, + // via a call to ScanModuleCache. + for _, root := range r.roots { + if root.Type == gopathwalk.RootModuleCache && r.scannedRoots[root] { + r2.scannedRoots[root] = true + } } - r.scanSema <- struct{}{} + r.scanSema <- struct{}{} // release r + return r2 } -func (r *ModuleResolver) ClearForNewMod() { - <-r.scanSema - *r = ModuleResolver{ - env: r.env, - moduleCacheCache: r.moduleCacheCache, - otherCache: r.otherCache, - scanSema: r.scanSema, +// ClearModuleInfo invalidates resolver state that depends on go.mod file +// contents (essentially, the output of go list -m -json ...). +// +// Notably, it does not forget directory contents, which are reset +// asynchronously via ClearForNewScan. +// +// If the ProcessEnv is a GOPATH environment, ClearModuleInfo is a no op. +// +// TODO(rfindley): move this to a new env.go, consolidating ProcessEnv methods. +func (e *ProcessEnv) ClearModuleInfo() { + if r, ok := e.resolver.(*ModuleResolver); ok { + resolver, err := newModuleResolver(e, e.ModCache) + if err != nil { + e.resolver = nil + e.resolverErr = err + return + } + + <-r.scanSema // acquire (guards caches) + resolver.moduleCacheCache = r.moduleCacheCache + resolver.otherCache = r.otherCache + r.scanSema <- struct{}{} // release + + e.UpdateResolver(resolver) } - r.init() - r.scanSema <- struct{}{} } -// findPackage returns the module and directory that contains the package at -// the given import path, or returns nil, "" if no module is in scope. +// UpdateResolver sets the resolver for the ProcessEnv to use in imports +// operations. Only for use with the result of [Resolver.ClearForNewScan]. +// +// TODO(rfindley): this awkward API is a result of the (arguably) inverted +// relationship between configuration and state described in the doc comment +// for [ProcessEnv]. +func (e *ProcessEnv) UpdateResolver(r Resolver) { + e.resolver = r + e.resolverErr = nil +} + +// findPackage returns the module and directory from within the main modules +// and their dependencies that contains the package at the given import path, +// or returns nil, "" if no module is in scope. func (r *ModuleResolver) findPackage(importPath string) (*gocommand.ModuleJSON, string) { // This can't find packages in the stdlib, but that's harmless for all // the existing code paths. @@ -265,7 +397,7 @@ func (r *ModuleResolver) findPackage(importPath string) (*gocommand.ModuleJSON, } // Not cached. Read the filesystem. - pkgFiles, err := ioutil.ReadDir(pkgDir) + pkgFiles, err := os.ReadDir(pkgDir) if err != nil { continue } @@ -296,10 +428,6 @@ func (r *ModuleResolver) cacheStore(info directoryPackageInfo) { } } -func (r *ModuleResolver) cacheKeys() []string { - return append(r.moduleCacheCache.Keys(), r.otherCache.Keys()...) -} - // cachePackageName caches the package name for a dir already in the cache. func (r *ModuleResolver) cachePackageName(info directoryPackageInfo) (string, error) { if info.rootType == gopathwalk.RootModuleCache { @@ -308,7 +436,7 @@ func (r *ModuleResolver) cachePackageName(info directoryPackageInfo) (string, er return r.otherCache.CachePackageName(info) } -func (r *ModuleResolver) cacheExports(ctx context.Context, env *ProcessEnv, info directoryPackageInfo) (string, []string, error) { +func (r *ModuleResolver) cacheExports(ctx context.Context, env *ProcessEnv, info directoryPackageInfo) (string, []stdlib.Symbol, error) { if info.rootType == gopathwalk.RootModuleCache { return r.moduleCacheCache.CacheExports(ctx, env, info) } @@ -368,15 +496,15 @@ func (r *ModuleResolver) dirIsNestedModule(dir string, mod *gocommand.ModuleJSON return modDir != mod.Dir } -func (r *ModuleResolver) modInfo(dir string) (modDir string, modName string) { - readModName := func(modFile string) string { - modBytes, err := ioutil.ReadFile(modFile) - if err != nil { - return "" - } - return modulePath(modBytes) +func readModName(modFile string) string { + modBytes, err := os.ReadFile(modFile) + if err != nil { + return "" } + return modulePath(modBytes) +} +func (r *ModuleResolver) modInfo(dir string) (modDir, modName string) { if r.dirInModuleCache(dir) { if matches := modCacheRegexp.FindStringSubmatch(dir); len(matches) == 3 { index := strings.Index(dir, matches[1]+"@"+matches[2]) @@ -410,11 +538,9 @@ func (r *ModuleResolver) dirInModuleCache(dir string) bool { } func (r *ModuleResolver) loadPackageNames(importPaths []string, srcDir string) (map[string]string, error) { - if err := r.init(); err != nil { - return nil, err - } names := map[string]string{} for _, path := range importPaths { + // TODO(rfindley): shouldn't this use the dirInfoCache? _, packageDir := r.findPackage(path) if packageDir == "" { continue @@ -432,10 +558,6 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error ctx, done := event.Start(ctx, "imports.ModuleResolver.scan") defer done() - if err := r.init(); err != nil { - return err - } - processDir := func(info directoryPackageInfo) { // Skip this directory if we were not able to get the package information successfully. if scanned, err := info.reachedStatus(directoryScanned); !scanned || err != nil { @@ -445,18 +567,18 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error if err != nil { return } - if !callback.dirFound(pkg) { return } + pkg.packageName, err = r.cachePackageName(info) if err != nil { return } - if !callback.packageNameLoaded(pkg) { return } + _, exports, err := r.loadExports(ctx, pkg, false) if err != nil { return @@ -495,7 +617,6 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error return packageScanned } - // Add anything new to the cache, and process it if we're still listening. add := func(root gopathwalk.Root, dir string) { r.cacheStore(r.scanDirForPackage(root, dir)) } @@ -510,9 +631,9 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error select { case <-ctx.Done(): return - case <-r.scanSema: + case <-r.scanSema: // acquire } - defer func() { r.scanSema <- struct{}{} }() + defer func() { r.scanSema <- struct{}{} }() // release // We have the lock on r.scannedRoots, and no other scans can run. for _, root := range roots { if ctx.Err() != nil { @@ -535,7 +656,7 @@ func (r *ModuleResolver) scan(ctx context.Context, callback *scanCallback) error } func (r *ModuleResolver) scoreImportPath(ctx context.Context, path string) float64 { - if _, ok := stdlib[path]; ok { + if stdlib.HasPackage(path) { return MaxRelevance } mod, _ := r.findPackage(path) @@ -613,10 +734,7 @@ func (r *ModuleResolver) canonicalize(info directoryPackageInfo) (*pkg, error) { return res, nil } -func (r *ModuleResolver) loadExports(ctx context.Context, pkg *pkg, includeTest bool) (string, []string, error) { - if err := r.init(); err != nil { - return "", nil, err - } +func (r *ModuleResolver) loadExports(ctx context.Context, pkg *pkg, includeTest bool) (string, []stdlib.Symbol, error) { if info, ok := r.cacheLoad(pkg.dir); ok && !includeTest { return r.cacheExports(ctx, r.env, info) } @@ -625,8 +743,8 @@ func (r *ModuleResolver) loadExports(ctx context.Context, pkg *pkg, includeTest func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) directoryPackageInfo { subdir := "" - if dir != root.Path { - subdir = dir[len(root.Path)+len("/"):] + if prefix := root.Path + string(filepath.Separator); strings.HasPrefix(dir, prefix) { + subdir = dir[len(prefix):] } importPath := filepath.ToSlash(subdir) if strings.HasPrefix(importPath, "vendor/") { @@ -649,9 +767,7 @@ func (r *ModuleResolver) scanDirForPackage(root gopathwalk.Root, dir string) dir } modPath, err := module.UnescapePath(filepath.ToSlash(matches[1])) if err != nil { - if r.env.Logf != nil { - r.env.Logf("decoding module cache path %q: %v", subdir, err) - } + r.env.logf("decoding module cache path %q: %v", subdir, err) return directoryPackageInfo{ status: directoryScanned, err: fmt.Errorf("decoding module cache path %q: %v", subdir, err), diff --git a/vendor/golang.org/x/tools/internal/imports/mod_cache.go b/vendor/golang.org/x/tools/internal/imports/mod_cache.go index 45690abb..b1192696 100644 --- a/vendor/golang.org/x/tools/internal/imports/mod_cache.go +++ b/vendor/golang.org/x/tools/internal/imports/mod_cache.go @@ -7,9 +7,14 @@ package imports import ( "context" "fmt" + "path" + "path/filepath" + "strings" "sync" + "golang.org/x/mod/module" "golang.org/x/tools/internal/gopathwalk" + "golang.org/x/tools/internal/stdlib" ) // To find packages to import, the resolver needs to know about all of @@ -39,6 +44,8 @@ const ( exportsLoaded ) +// directoryPackageInfo holds (possibly incomplete) information about packages +// contained in a given directory. type directoryPackageInfo struct { // status indicates the extent to which this struct has been filled in. status directoryPackageStatus @@ -63,8 +70,11 @@ type directoryPackageInfo struct { packageName string // the package name, as declared in the source. // Set when status >= exportsLoaded. - - exports []string + // TODO(rfindley): it's hard to see this, but exports depend implicitly on + // the default build context GOOS and GOARCH. + // + // We can make this explicit, and key exports by GOOS, GOARCH. + exports []stdlib.Symbol } // reachedStatus returns true when info has a status at least target and any error associated with @@ -79,7 +89,7 @@ func (info *directoryPackageInfo) reachedStatus(target directoryPackageStatus) ( return true, nil } -// dirInfoCache is a concurrency safe map for storing information about +// DirInfoCache is a concurrency-safe map for storing information about // directories that may contain packages. // // The information in this cache is built incrementally. Entries are initialized in scan. @@ -92,21 +102,26 @@ func (info *directoryPackageInfo) reachedStatus(target directoryPackageStatus) ( // The information in the cache is not expected to change for the cache's // lifetime, so there is no protection against competing writes. Users should // take care not to hold the cache across changes to the underlying files. -// -// TODO(suzmue): consider other concurrency strategies and data structures (RWLocks, sync.Map, etc) -type dirInfoCache struct { +type DirInfoCache struct { mu sync.Mutex // dirs stores information about packages in directories, keyed by absolute path. dirs map[string]*directoryPackageInfo listeners map[*int]cacheListener } +func NewDirInfoCache() *DirInfoCache { + return &DirInfoCache{ + dirs: make(map[string]*directoryPackageInfo), + listeners: make(map[*int]cacheListener), + } +} + type cacheListener func(directoryPackageInfo) // ScanAndListen calls listener on all the items in the cache, and on anything // newly added. The returned stop function waits for all in-flight callbacks to // finish and blocks new ones. -func (d *dirInfoCache) ScanAndListen(ctx context.Context, listener cacheListener) func() { +func (d *DirInfoCache) ScanAndListen(ctx context.Context, listener cacheListener) func() { ctx, cancel := context.WithCancel(ctx) // Flushing out all the callbacks is tricky without knowing how many there @@ -162,8 +177,10 @@ func (d *dirInfoCache) ScanAndListen(ctx context.Context, listener cacheListener } // Store stores the package info for dir. -func (d *dirInfoCache) Store(dir string, info directoryPackageInfo) { +func (d *DirInfoCache) Store(dir string, info directoryPackageInfo) { d.mu.Lock() + // TODO(rfindley, golang/go#59216): should we overwrite an existing entry? + // That seems incorrect as the cache should be idempotent. _, old := d.dirs[dir] d.dirs[dir] = &info var listeners []cacheListener @@ -180,7 +197,7 @@ func (d *dirInfoCache) Store(dir string, info directoryPackageInfo) { } // Load returns a copy of the directoryPackageInfo for absolute directory dir. -func (d *dirInfoCache) Load(dir string) (directoryPackageInfo, bool) { +func (d *DirInfoCache) Load(dir string) (directoryPackageInfo, bool) { d.mu.Lock() defer d.mu.Unlock() info, ok := d.dirs[dir] @@ -191,7 +208,7 @@ func (d *dirInfoCache) Load(dir string) (directoryPackageInfo, bool) { } // Keys returns the keys currently present in d. -func (d *dirInfoCache) Keys() (keys []string) { +func (d *DirInfoCache) Keys() (keys []string) { d.mu.Lock() defer d.mu.Unlock() for key := range d.dirs { @@ -200,7 +217,7 @@ func (d *dirInfoCache) Keys() (keys []string) { return keys } -func (d *dirInfoCache) CachePackageName(info directoryPackageInfo) (string, error) { +func (d *DirInfoCache) CachePackageName(info directoryPackageInfo) (string, error) { if loaded, err := info.reachedStatus(nameLoaded); loaded { return info.packageName, err } @@ -213,7 +230,7 @@ func (d *dirInfoCache) CachePackageName(info directoryPackageInfo) (string, erro return info.packageName, info.err } -func (d *dirInfoCache) CacheExports(ctx context.Context, env *ProcessEnv, info directoryPackageInfo) (string, []string, error) { +func (d *DirInfoCache) CacheExports(ctx context.Context, env *ProcessEnv, info directoryPackageInfo) (string, []stdlib.Symbol, error) { if reached, _ := info.reachedStatus(exportsLoaded); reached { return info.packageName, info.exports, info.err } @@ -234,3 +251,81 @@ func (d *dirInfoCache) CacheExports(ctx context.Context, env *ProcessEnv, info d d.Store(info.dir, info) return info.packageName, info.exports, info.err } + +// ScanModuleCache walks the given directory, which must be a GOMODCACHE value, +// for directory package information, storing the results in cache. +func ScanModuleCache(dir string, cache *DirInfoCache, logf func(string, ...any)) { + // Note(rfindley): it's hard to see, but this function attempts to implement + // just the side effects on cache of calling PrimeCache with a ProcessEnv + // that has the given dir as its GOMODCACHE. + // + // Teasing out the control flow, we see that we can avoid any handling of + // vendor/ and can infer module info entirely from the path, simplifying the + // logic here. + + root := gopathwalk.Root{ + Path: filepath.Clean(dir), + Type: gopathwalk.RootModuleCache, + } + + directoryInfo := func(root gopathwalk.Root, dir string) directoryPackageInfo { + // This is a copy of ModuleResolver.scanDirForPackage, trimmed down to + // logic that applies to a module cache directory. + + subdir := "" + if dir != root.Path { + subdir = dir[len(root.Path)+len("/"):] + } + + matches := modCacheRegexp.FindStringSubmatch(subdir) + if len(matches) == 0 { + return directoryPackageInfo{ + status: directoryScanned, + err: fmt.Errorf("invalid module cache path: %v", subdir), + } + } + modPath, err := module.UnescapePath(filepath.ToSlash(matches[1])) + if err != nil { + if logf != nil { + logf("decoding module cache path %q: %v", subdir, err) + } + return directoryPackageInfo{ + status: directoryScanned, + err: fmt.Errorf("decoding module cache path %q: %v", subdir, err), + } + } + importPath := path.Join(modPath, filepath.ToSlash(matches[3])) + index := strings.Index(dir, matches[1]+"@"+matches[2]) + modDir := filepath.Join(dir[:index], matches[1]+"@"+matches[2]) + modName := readModName(filepath.Join(modDir, "go.mod")) + return directoryPackageInfo{ + status: directoryScanned, + dir: dir, + rootType: root.Type, + nonCanonicalImportPath: importPath, + moduleDir: modDir, + moduleName: modName, + } + } + + add := func(root gopathwalk.Root, dir string) { + info := directoryInfo(root, dir) + cache.Store(info.dir, info) + } + + skip := func(_ gopathwalk.Root, dir string) bool { + // Skip directories that have already been scanned. + // + // Note that gopathwalk only adds "package" directories, which must contain + // a .go file, and all such package directories in the module cache are + // immutable. So if we can load a dir, it can be skipped. + info, ok := cache.Load(dir) + if !ok { + return false + } + packageScanned, _ := info.reachedStatus(directoryScanned) + return packageScanned + } + + gopathwalk.WalkSkip([]gopathwalk.Root{root}, add, skip, gopathwalk.Options{Logf: logf, ModulesEnabled: true}) +} diff --git a/vendor/golang.org/x/tools/internal/imports/sortimports.go b/vendor/golang.org/x/tools/internal/imports/sortimports.go index 1a0a7ebd..da8194fd 100644 --- a/vendor/golang.org/x/tools/internal/imports/sortimports.go +++ b/vendor/golang.org/x/tools/internal/imports/sortimports.go @@ -18,7 +18,7 @@ import ( // sortImports sorts runs of consecutive import lines in import blocks in f. // It also removes duplicate imports when it is possible to do so without data loss. // -// It may mutate the token.File. +// It may mutate the token.File and the ast.File. func sortImports(localPrefix string, tokFile *token.File, f *ast.File) { for i, d := range f.Decls { d, ok := d.(*ast.GenDecl) diff --git a/vendor/golang.org/x/tools/internal/imports/zstdlib.go b/vendor/golang.org/x/tools/internal/imports/zstdlib.go deleted file mode 100644 index 9f992c2b..00000000 --- a/vendor/golang.org/x/tools/internal/imports/zstdlib.go +++ /dev/null @@ -1,11345 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Code generated by mkstdlib.go. DO NOT EDIT. - -package imports - -var stdlib = map[string][]string{ - "archive/tar": { - "ErrFieldTooLong", - "ErrHeader", - "ErrInsecurePath", - "ErrWriteAfterClose", - "ErrWriteTooLong", - "FileInfoHeader", - "Format", - "FormatGNU", - "FormatPAX", - "FormatUSTAR", - "FormatUnknown", - "Header", - "NewReader", - "NewWriter", - "Reader", - "TypeBlock", - "TypeChar", - "TypeCont", - "TypeDir", - "TypeFifo", - "TypeGNULongLink", - "TypeGNULongName", - "TypeGNUSparse", - "TypeLink", - "TypeReg", - "TypeRegA", - "TypeSymlink", - "TypeXGlobalHeader", - "TypeXHeader", - "Writer", - }, - "archive/zip": { - "Compressor", - "Decompressor", - "Deflate", - "ErrAlgorithm", - "ErrChecksum", - "ErrFormat", - "ErrInsecurePath", - "File", - "FileHeader", - "FileInfoHeader", - "NewReader", - "NewWriter", - "OpenReader", - "ReadCloser", - "Reader", - "RegisterCompressor", - "RegisterDecompressor", - "Store", - "Writer", - }, - "bufio": { - "ErrAdvanceTooFar", - "ErrBadReadCount", - "ErrBufferFull", - "ErrFinalToken", - "ErrInvalidUnreadByte", - "ErrInvalidUnreadRune", - "ErrNegativeAdvance", - "ErrNegativeCount", - "ErrTooLong", - "MaxScanTokenSize", - "NewReadWriter", - "NewReader", - "NewReaderSize", - "NewScanner", - "NewWriter", - "NewWriterSize", - "ReadWriter", - "Reader", - "ScanBytes", - "ScanLines", - "ScanRunes", - "ScanWords", - "Scanner", - "SplitFunc", - "Writer", - }, - "bytes": { - "Buffer", - "Clone", - "Compare", - "Contains", - "ContainsAny", - "ContainsFunc", - "ContainsRune", - "Count", - "Cut", - "CutPrefix", - "CutSuffix", - "Equal", - "EqualFold", - "ErrTooLarge", - "Fields", - "FieldsFunc", - "HasPrefix", - "HasSuffix", - "Index", - "IndexAny", - "IndexByte", - "IndexFunc", - "IndexRune", - "Join", - "LastIndex", - "LastIndexAny", - "LastIndexByte", - "LastIndexFunc", - "Map", - "MinRead", - "NewBuffer", - "NewBufferString", - "NewReader", - "Reader", - "Repeat", - "Replace", - "ReplaceAll", - "Runes", - "Split", - "SplitAfter", - "SplitAfterN", - "SplitN", - "Title", - "ToLower", - "ToLowerSpecial", - "ToTitle", - "ToTitleSpecial", - "ToUpper", - "ToUpperSpecial", - "ToValidUTF8", - "Trim", - "TrimFunc", - "TrimLeft", - "TrimLeftFunc", - "TrimPrefix", - "TrimRight", - "TrimRightFunc", - "TrimSpace", - "TrimSuffix", - }, - "cmp": { - "Compare", - "Less", - "Ordered", - }, - "compress/bzip2": { - "NewReader", - "StructuralError", - }, - "compress/flate": { - "BestCompression", - "BestSpeed", - "CorruptInputError", - "DefaultCompression", - "HuffmanOnly", - "InternalError", - "NewReader", - "NewReaderDict", - "NewWriter", - "NewWriterDict", - "NoCompression", - "ReadError", - "Reader", - "Resetter", - "WriteError", - "Writer", - }, - "compress/gzip": { - "BestCompression", - "BestSpeed", - "DefaultCompression", - "ErrChecksum", - "ErrHeader", - "Header", - "HuffmanOnly", - "NewReader", - "NewWriter", - "NewWriterLevel", - "NoCompression", - "Reader", - "Writer", - }, - "compress/lzw": { - "LSB", - "MSB", - "NewReader", - "NewWriter", - "Order", - "Reader", - "Writer", - }, - "compress/zlib": { - "BestCompression", - "BestSpeed", - "DefaultCompression", - "ErrChecksum", - "ErrDictionary", - "ErrHeader", - "HuffmanOnly", - "NewReader", - "NewReaderDict", - "NewWriter", - "NewWriterLevel", - "NewWriterLevelDict", - "NoCompression", - "Resetter", - "Writer", - }, - "container/heap": { - "Fix", - "Init", - "Interface", - "Pop", - "Push", - "Remove", - }, - "container/list": { - "Element", - "List", - "New", - }, - "container/ring": { - "New", - "Ring", - }, - "context": { - "AfterFunc", - "Background", - "CancelCauseFunc", - "CancelFunc", - "Canceled", - "Cause", - "Context", - "DeadlineExceeded", - "TODO", - "WithCancel", - "WithCancelCause", - "WithDeadline", - "WithDeadlineCause", - "WithTimeout", - "WithTimeoutCause", - "WithValue", - "WithoutCancel", - }, - "crypto": { - "BLAKE2b_256", - "BLAKE2b_384", - "BLAKE2b_512", - "BLAKE2s_256", - "Decrypter", - "DecrypterOpts", - "Hash", - "MD4", - "MD5", - "MD5SHA1", - "PrivateKey", - "PublicKey", - "RIPEMD160", - "RegisterHash", - "SHA1", - "SHA224", - "SHA256", - "SHA384", - "SHA3_224", - "SHA3_256", - "SHA3_384", - "SHA3_512", - "SHA512", - "SHA512_224", - "SHA512_256", - "Signer", - "SignerOpts", - }, - "crypto/aes": { - "BlockSize", - "KeySizeError", - "NewCipher", - }, - "crypto/cipher": { - "AEAD", - "Block", - "BlockMode", - "NewCBCDecrypter", - "NewCBCEncrypter", - "NewCFBDecrypter", - "NewCFBEncrypter", - "NewCTR", - "NewGCM", - "NewGCMWithNonceSize", - "NewGCMWithTagSize", - "NewOFB", - "Stream", - "StreamReader", - "StreamWriter", - }, - "crypto/des": { - "BlockSize", - "KeySizeError", - "NewCipher", - "NewTripleDESCipher", - }, - "crypto/dsa": { - "ErrInvalidPublicKey", - "GenerateKey", - "GenerateParameters", - "L1024N160", - "L2048N224", - "L2048N256", - "L3072N256", - "ParameterSizes", - "Parameters", - "PrivateKey", - "PublicKey", - "Sign", - "Verify", - }, - "crypto/ecdh": { - "Curve", - "P256", - "P384", - "P521", - "PrivateKey", - "PublicKey", - "X25519", - }, - "crypto/ecdsa": { - "GenerateKey", - "PrivateKey", - "PublicKey", - "Sign", - "SignASN1", - "Verify", - "VerifyASN1", - }, - "crypto/ed25519": { - "GenerateKey", - "NewKeyFromSeed", - "Options", - "PrivateKey", - "PrivateKeySize", - "PublicKey", - "PublicKeySize", - "SeedSize", - "Sign", - "SignatureSize", - "Verify", - "VerifyWithOptions", - }, - "crypto/elliptic": { - "Curve", - "CurveParams", - "GenerateKey", - "Marshal", - "MarshalCompressed", - "P224", - "P256", - "P384", - "P521", - "Unmarshal", - "UnmarshalCompressed", - }, - "crypto/hmac": { - "Equal", - "New", - }, - "crypto/md5": { - "BlockSize", - "New", - "Size", - "Sum", - }, - "crypto/rand": { - "Int", - "Prime", - "Read", - "Reader", - }, - "crypto/rc4": { - "Cipher", - "KeySizeError", - "NewCipher", - }, - "crypto/rsa": { - "CRTValue", - "DecryptOAEP", - "DecryptPKCS1v15", - "DecryptPKCS1v15SessionKey", - "EncryptOAEP", - "EncryptPKCS1v15", - "ErrDecryption", - "ErrMessageTooLong", - "ErrVerification", - "GenerateKey", - "GenerateMultiPrimeKey", - "OAEPOptions", - "PKCS1v15DecryptOptions", - "PSSOptions", - "PSSSaltLengthAuto", - "PSSSaltLengthEqualsHash", - "PrecomputedValues", - "PrivateKey", - "PublicKey", - "SignPKCS1v15", - "SignPSS", - "VerifyPKCS1v15", - "VerifyPSS", - }, - "crypto/sha1": { - "BlockSize", - "New", - "Size", - "Sum", - }, - "crypto/sha256": { - "BlockSize", - "New", - "New224", - "Size", - "Size224", - "Sum224", - "Sum256", - }, - "crypto/sha512": { - "BlockSize", - "New", - "New384", - "New512_224", - "New512_256", - "Size", - "Size224", - "Size256", - "Size384", - "Sum384", - "Sum512", - "Sum512_224", - "Sum512_256", - }, - "crypto/subtle": { - "ConstantTimeByteEq", - "ConstantTimeCompare", - "ConstantTimeCopy", - "ConstantTimeEq", - "ConstantTimeLessOrEq", - "ConstantTimeSelect", - "XORBytes", - }, - "crypto/tls": { - "AlertError", - "Certificate", - "CertificateRequestInfo", - "CertificateVerificationError", - "CipherSuite", - "CipherSuiteName", - "CipherSuites", - "Client", - "ClientAuthType", - "ClientHelloInfo", - "ClientSessionCache", - "ClientSessionState", - "Config", - "Conn", - "ConnectionState", - "CurveID", - "CurveP256", - "CurveP384", - "CurveP521", - "Dial", - "DialWithDialer", - "Dialer", - "ECDSAWithP256AndSHA256", - "ECDSAWithP384AndSHA384", - "ECDSAWithP521AndSHA512", - "ECDSAWithSHA1", - "Ed25519", - "InsecureCipherSuites", - "Listen", - "LoadX509KeyPair", - "NewLRUClientSessionCache", - "NewListener", - "NewResumptionState", - "NoClientCert", - "PKCS1WithSHA1", - "PKCS1WithSHA256", - "PKCS1WithSHA384", - "PKCS1WithSHA512", - "PSSWithSHA256", - "PSSWithSHA384", - "PSSWithSHA512", - "ParseSessionState", - "QUICClient", - "QUICConfig", - "QUICConn", - "QUICEncryptionLevel", - "QUICEncryptionLevelApplication", - "QUICEncryptionLevelEarly", - "QUICEncryptionLevelHandshake", - "QUICEncryptionLevelInitial", - "QUICEvent", - "QUICEventKind", - "QUICHandshakeDone", - "QUICNoEvent", - "QUICRejectedEarlyData", - "QUICServer", - "QUICSessionTicketOptions", - "QUICSetReadSecret", - "QUICSetWriteSecret", - "QUICTransportParameters", - "QUICTransportParametersRequired", - "QUICWriteData", - "RecordHeaderError", - "RenegotiateFreelyAsClient", - "RenegotiateNever", - "RenegotiateOnceAsClient", - "RenegotiationSupport", - "RequestClientCert", - "RequireAndVerifyClientCert", - "RequireAnyClientCert", - "Server", - "SessionState", - "SignatureScheme", - "TLS_AES_128_GCM_SHA256", - "TLS_AES_256_GCM_SHA384", - "TLS_CHACHA20_POLY1305_SHA256", - "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA", - "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256", - "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", - "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", - "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", - "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", - "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256", - "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA", - "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", - "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", - "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", - "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", - "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", - "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", - "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305", - "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256", - "TLS_ECDHE_RSA_WITH_RC4_128_SHA", - "TLS_FALLBACK_SCSV", - "TLS_RSA_WITH_3DES_EDE_CBC_SHA", - "TLS_RSA_WITH_AES_128_CBC_SHA", - "TLS_RSA_WITH_AES_128_CBC_SHA256", - "TLS_RSA_WITH_AES_128_GCM_SHA256", - "TLS_RSA_WITH_AES_256_CBC_SHA", - "TLS_RSA_WITH_AES_256_GCM_SHA384", - "TLS_RSA_WITH_RC4_128_SHA", - "VerifyClientCertIfGiven", - "VersionName", - "VersionSSL30", - "VersionTLS10", - "VersionTLS11", - "VersionTLS12", - "VersionTLS13", - "X25519", - "X509KeyPair", - }, - "crypto/x509": { - "CANotAuthorizedForExtKeyUsage", - "CANotAuthorizedForThisName", - "CertPool", - "Certificate", - "CertificateInvalidError", - "CertificateRequest", - "ConstraintViolationError", - "CreateCertificate", - "CreateCertificateRequest", - "CreateRevocationList", - "DSA", - "DSAWithSHA1", - "DSAWithSHA256", - "DecryptPEMBlock", - "ECDSA", - "ECDSAWithSHA1", - "ECDSAWithSHA256", - "ECDSAWithSHA384", - "ECDSAWithSHA512", - "Ed25519", - "EncryptPEMBlock", - "ErrUnsupportedAlgorithm", - "Expired", - "ExtKeyUsage", - "ExtKeyUsageAny", - "ExtKeyUsageClientAuth", - "ExtKeyUsageCodeSigning", - "ExtKeyUsageEmailProtection", - "ExtKeyUsageIPSECEndSystem", - "ExtKeyUsageIPSECTunnel", - "ExtKeyUsageIPSECUser", - "ExtKeyUsageMicrosoftCommercialCodeSigning", - "ExtKeyUsageMicrosoftKernelCodeSigning", - "ExtKeyUsageMicrosoftServerGatedCrypto", - "ExtKeyUsageNetscapeServerGatedCrypto", - "ExtKeyUsageOCSPSigning", - "ExtKeyUsageServerAuth", - "ExtKeyUsageTimeStamping", - "HostnameError", - "IncompatibleUsage", - "IncorrectPasswordError", - "InsecureAlgorithmError", - "InvalidReason", - "IsEncryptedPEMBlock", - "KeyUsage", - "KeyUsageCRLSign", - "KeyUsageCertSign", - "KeyUsageContentCommitment", - "KeyUsageDataEncipherment", - "KeyUsageDecipherOnly", - "KeyUsageDigitalSignature", - "KeyUsageEncipherOnly", - "KeyUsageKeyAgreement", - "KeyUsageKeyEncipherment", - "MD2WithRSA", - "MD5WithRSA", - "MarshalECPrivateKey", - "MarshalPKCS1PrivateKey", - "MarshalPKCS1PublicKey", - "MarshalPKCS8PrivateKey", - "MarshalPKIXPublicKey", - "NameConstraintsWithoutSANs", - "NameMismatch", - "NewCertPool", - "NotAuthorizedToSign", - "PEMCipher", - "PEMCipher3DES", - "PEMCipherAES128", - "PEMCipherAES192", - "PEMCipherAES256", - "PEMCipherDES", - "ParseCRL", - "ParseCertificate", - "ParseCertificateRequest", - "ParseCertificates", - "ParseDERCRL", - "ParseECPrivateKey", - "ParsePKCS1PrivateKey", - "ParsePKCS1PublicKey", - "ParsePKCS8PrivateKey", - "ParsePKIXPublicKey", - "ParseRevocationList", - "PublicKeyAlgorithm", - "PureEd25519", - "RSA", - "RevocationList", - "RevocationListEntry", - "SHA1WithRSA", - "SHA256WithRSA", - "SHA256WithRSAPSS", - "SHA384WithRSA", - "SHA384WithRSAPSS", - "SHA512WithRSA", - "SHA512WithRSAPSS", - "SetFallbackRoots", - "SignatureAlgorithm", - "SystemCertPool", - "SystemRootsError", - "TooManyConstraints", - "TooManyIntermediates", - "UnconstrainedName", - "UnhandledCriticalExtension", - "UnknownAuthorityError", - "UnknownPublicKeyAlgorithm", - "UnknownSignatureAlgorithm", - "VerifyOptions", - }, - "crypto/x509/pkix": { - "AlgorithmIdentifier", - "AttributeTypeAndValue", - "AttributeTypeAndValueSET", - "CertificateList", - "Extension", - "Name", - "RDNSequence", - "RelativeDistinguishedNameSET", - "RevokedCertificate", - "TBSCertificateList", - }, - "database/sql": { - "ColumnType", - "Conn", - "DB", - "DBStats", - "Drivers", - "ErrConnDone", - "ErrNoRows", - "ErrTxDone", - "IsolationLevel", - "LevelDefault", - "LevelLinearizable", - "LevelReadCommitted", - "LevelReadUncommitted", - "LevelRepeatableRead", - "LevelSerializable", - "LevelSnapshot", - "LevelWriteCommitted", - "Named", - "NamedArg", - "NullBool", - "NullByte", - "NullFloat64", - "NullInt16", - "NullInt32", - "NullInt64", - "NullString", - "NullTime", - "Open", - "OpenDB", - "Out", - "RawBytes", - "Register", - "Result", - "Row", - "Rows", - "Scanner", - "Stmt", - "Tx", - "TxOptions", - }, - "database/sql/driver": { - "Bool", - "ColumnConverter", - "Conn", - "ConnBeginTx", - "ConnPrepareContext", - "Connector", - "DefaultParameterConverter", - "Driver", - "DriverContext", - "ErrBadConn", - "ErrRemoveArgument", - "ErrSkip", - "Execer", - "ExecerContext", - "Int32", - "IsScanValue", - "IsValue", - "IsolationLevel", - "NamedValue", - "NamedValueChecker", - "NotNull", - "Null", - "Pinger", - "Queryer", - "QueryerContext", - "Result", - "ResultNoRows", - "Rows", - "RowsAffected", - "RowsColumnTypeDatabaseTypeName", - "RowsColumnTypeLength", - "RowsColumnTypeNullable", - "RowsColumnTypePrecisionScale", - "RowsColumnTypeScanType", - "RowsNextResultSet", - "SessionResetter", - "Stmt", - "StmtExecContext", - "StmtQueryContext", - "String", - "Tx", - "TxOptions", - "Validator", - "Value", - "ValueConverter", - "Valuer", - }, - "debug/buildinfo": { - "BuildInfo", - "Read", - "ReadFile", - }, - "debug/dwarf": { - "AddrType", - "ArrayType", - "Attr", - "AttrAbstractOrigin", - "AttrAccessibility", - "AttrAddrBase", - "AttrAddrClass", - "AttrAlignment", - "AttrAllocated", - "AttrArtificial", - "AttrAssociated", - "AttrBaseTypes", - "AttrBinaryScale", - "AttrBitOffset", - "AttrBitSize", - "AttrByteSize", - "AttrCallAllCalls", - "AttrCallAllSourceCalls", - "AttrCallAllTailCalls", - "AttrCallColumn", - "AttrCallDataLocation", - "AttrCallDataValue", - "AttrCallFile", - "AttrCallLine", - "AttrCallOrigin", - "AttrCallPC", - "AttrCallParameter", - "AttrCallReturnPC", - "AttrCallTailCall", - "AttrCallTarget", - "AttrCallTargetClobbered", - "AttrCallValue", - "AttrCalling", - "AttrCommonRef", - "AttrCompDir", - "AttrConstExpr", - "AttrConstValue", - "AttrContainingType", - "AttrCount", - "AttrDataBitOffset", - "AttrDataLocation", - "AttrDataMemberLoc", - "AttrDecimalScale", - "AttrDecimalSign", - "AttrDeclColumn", - "AttrDeclFile", - "AttrDeclLine", - "AttrDeclaration", - "AttrDefaultValue", - "AttrDefaulted", - "AttrDeleted", - "AttrDescription", - "AttrDigitCount", - "AttrDiscr", - "AttrDiscrList", - "AttrDiscrValue", - "AttrDwoName", - "AttrElemental", - "AttrEncoding", - "AttrEndianity", - "AttrEntrypc", - "AttrEnumClass", - "AttrExplicit", - "AttrExportSymbols", - "AttrExtension", - "AttrExternal", - "AttrFrameBase", - "AttrFriend", - "AttrHighpc", - "AttrIdentifierCase", - "AttrImport", - "AttrInline", - "AttrIsOptional", - "AttrLanguage", - "AttrLinkageName", - "AttrLocation", - "AttrLoclistsBase", - "AttrLowerBound", - "AttrLowpc", - "AttrMacroInfo", - "AttrMacros", - "AttrMainSubprogram", - "AttrMutable", - "AttrName", - "AttrNamelistItem", - "AttrNoreturn", - "AttrObjectPointer", - "AttrOrdering", - "AttrPictureString", - "AttrPriority", - "AttrProducer", - "AttrPrototyped", - "AttrPure", - "AttrRanges", - "AttrRank", - "AttrRecursive", - "AttrReference", - "AttrReturnAddr", - "AttrRnglistsBase", - "AttrRvalueReference", - "AttrSegment", - "AttrSibling", - "AttrSignature", - "AttrSmall", - "AttrSpecification", - "AttrStartScope", - "AttrStaticLink", - "AttrStmtList", - "AttrStrOffsetsBase", - "AttrStride", - "AttrStrideSize", - "AttrStringLength", - "AttrStringLengthBitSize", - "AttrStringLengthByteSize", - "AttrThreadsScaled", - "AttrTrampoline", - "AttrType", - "AttrUpperBound", - "AttrUseLocation", - "AttrUseUTF8", - "AttrVarParam", - "AttrVirtuality", - "AttrVisibility", - "AttrVtableElemLoc", - "BasicType", - "BoolType", - "CharType", - "Class", - "ClassAddrPtr", - "ClassAddress", - "ClassBlock", - "ClassConstant", - "ClassExprLoc", - "ClassFlag", - "ClassLinePtr", - "ClassLocList", - "ClassLocListPtr", - "ClassMacPtr", - "ClassRangeListPtr", - "ClassReference", - "ClassReferenceAlt", - "ClassReferenceSig", - "ClassRngList", - "ClassRngListsPtr", - "ClassStrOffsetsPtr", - "ClassString", - "ClassStringAlt", - "ClassUnknown", - "CommonType", - "ComplexType", - "Data", - "DecodeError", - "DotDotDotType", - "Entry", - "EnumType", - "EnumValue", - "ErrUnknownPC", - "Field", - "FloatType", - "FuncType", - "IntType", - "LineEntry", - "LineFile", - "LineReader", - "LineReaderPos", - "New", - "Offset", - "PtrType", - "QualType", - "Reader", - "StructField", - "StructType", - "Tag", - "TagAccessDeclaration", - "TagArrayType", - "TagAtomicType", - "TagBaseType", - "TagCallSite", - "TagCallSiteParameter", - "TagCatchDwarfBlock", - "TagClassType", - "TagCoarrayType", - "TagCommonDwarfBlock", - "TagCommonInclusion", - "TagCompileUnit", - "TagCondition", - "TagConstType", - "TagConstant", - "TagDwarfProcedure", - "TagDynamicType", - "TagEntryPoint", - "TagEnumerationType", - "TagEnumerator", - "TagFileType", - "TagFormalParameter", - "TagFriend", - "TagGenericSubrange", - "TagImmutableType", - "TagImportedDeclaration", - "TagImportedModule", - "TagImportedUnit", - "TagInheritance", - "TagInlinedSubroutine", - "TagInterfaceType", - "TagLabel", - "TagLexDwarfBlock", - "TagMember", - "TagModule", - "TagMutableType", - "TagNamelist", - "TagNamelistItem", - "TagNamespace", - "TagPackedType", - "TagPartialUnit", - "TagPointerType", - "TagPtrToMemberType", - "TagReferenceType", - "TagRestrictType", - "TagRvalueReferenceType", - "TagSetType", - "TagSharedType", - "TagSkeletonUnit", - "TagStringType", - "TagStructType", - "TagSubprogram", - "TagSubrangeType", - "TagSubroutineType", - "TagTemplateAlias", - "TagTemplateTypeParameter", - "TagTemplateValueParameter", - "TagThrownType", - "TagTryDwarfBlock", - "TagTypeUnit", - "TagTypedef", - "TagUnionType", - "TagUnspecifiedParameters", - "TagUnspecifiedType", - "TagVariable", - "TagVariant", - "TagVariantPart", - "TagVolatileType", - "TagWithStmt", - "Type", - "TypedefType", - "UcharType", - "UintType", - "UnspecifiedType", - "UnsupportedType", - "VoidType", - }, - "debug/elf": { - "ARM_MAGIC_TRAMP_NUMBER", - "COMPRESS_HIOS", - "COMPRESS_HIPROC", - "COMPRESS_LOOS", - "COMPRESS_LOPROC", - "COMPRESS_ZLIB", - "COMPRESS_ZSTD", - "Chdr32", - "Chdr64", - "Class", - "CompressionType", - "DF_1_CONFALT", - "DF_1_DIRECT", - "DF_1_DISPRELDNE", - "DF_1_DISPRELPND", - "DF_1_EDITED", - "DF_1_ENDFILTEE", - "DF_1_GLOBAL", - "DF_1_GLOBAUDIT", - "DF_1_GROUP", - "DF_1_IGNMULDEF", - "DF_1_INITFIRST", - "DF_1_INTERPOSE", - "DF_1_KMOD", - "DF_1_LOADFLTR", - "DF_1_NOCOMMON", - "DF_1_NODEFLIB", - "DF_1_NODELETE", - "DF_1_NODIRECT", - "DF_1_NODUMP", - "DF_1_NOHDR", - "DF_1_NOKSYMS", - "DF_1_NOOPEN", - "DF_1_NORELOC", - "DF_1_NOW", - "DF_1_ORIGIN", - "DF_1_PIE", - "DF_1_SINGLETON", - "DF_1_STUB", - "DF_1_SYMINTPOSE", - "DF_1_TRANS", - "DF_1_WEAKFILTER", - "DF_BIND_NOW", - "DF_ORIGIN", - "DF_STATIC_TLS", - "DF_SYMBOLIC", - "DF_TEXTREL", - "DT_ADDRRNGHI", - "DT_ADDRRNGLO", - "DT_AUDIT", - "DT_AUXILIARY", - "DT_BIND_NOW", - "DT_CHECKSUM", - "DT_CONFIG", - "DT_DEBUG", - "DT_DEPAUDIT", - "DT_ENCODING", - "DT_FEATURE", - "DT_FILTER", - "DT_FINI", - "DT_FINI_ARRAY", - "DT_FINI_ARRAYSZ", - "DT_FLAGS", - "DT_FLAGS_1", - "DT_GNU_CONFLICT", - "DT_GNU_CONFLICTSZ", - "DT_GNU_HASH", - "DT_GNU_LIBLIST", - "DT_GNU_LIBLISTSZ", - "DT_GNU_PRELINKED", - "DT_HASH", - "DT_HIOS", - "DT_HIPROC", - "DT_INIT", - "DT_INIT_ARRAY", - "DT_INIT_ARRAYSZ", - "DT_JMPREL", - "DT_LOOS", - "DT_LOPROC", - "DT_MIPS_AUX_DYNAMIC", - "DT_MIPS_BASE_ADDRESS", - "DT_MIPS_COMPACT_SIZE", - "DT_MIPS_CONFLICT", - "DT_MIPS_CONFLICTNO", - "DT_MIPS_CXX_FLAGS", - "DT_MIPS_DELTA_CLASS", - "DT_MIPS_DELTA_CLASSSYM", - "DT_MIPS_DELTA_CLASSSYM_NO", - "DT_MIPS_DELTA_CLASS_NO", - "DT_MIPS_DELTA_INSTANCE", - "DT_MIPS_DELTA_INSTANCE_NO", - "DT_MIPS_DELTA_RELOC", - "DT_MIPS_DELTA_RELOC_NO", - "DT_MIPS_DELTA_SYM", - "DT_MIPS_DELTA_SYM_NO", - "DT_MIPS_DYNSTR_ALIGN", - "DT_MIPS_FLAGS", - "DT_MIPS_GOTSYM", - "DT_MIPS_GP_VALUE", - "DT_MIPS_HIDDEN_GOTIDX", - "DT_MIPS_HIPAGENO", - "DT_MIPS_ICHECKSUM", - "DT_MIPS_INTERFACE", - "DT_MIPS_INTERFACE_SIZE", - "DT_MIPS_IVERSION", - "DT_MIPS_LIBLIST", - "DT_MIPS_LIBLISTNO", - "DT_MIPS_LOCALPAGE_GOTIDX", - "DT_MIPS_LOCAL_GOTIDX", - "DT_MIPS_LOCAL_GOTNO", - "DT_MIPS_MSYM", - "DT_MIPS_OPTIONS", - "DT_MIPS_PERF_SUFFIX", - "DT_MIPS_PIXIE_INIT", - "DT_MIPS_PLTGOT", - "DT_MIPS_PROTECTED_GOTIDX", - "DT_MIPS_RLD_MAP", - "DT_MIPS_RLD_MAP_REL", - "DT_MIPS_RLD_TEXT_RESOLVE_ADDR", - "DT_MIPS_RLD_VERSION", - "DT_MIPS_RWPLT", - "DT_MIPS_SYMBOL_LIB", - "DT_MIPS_SYMTABNO", - "DT_MIPS_TIME_STAMP", - "DT_MIPS_UNREFEXTNO", - "DT_MOVEENT", - "DT_MOVESZ", - "DT_MOVETAB", - "DT_NEEDED", - "DT_NULL", - "DT_PLTGOT", - "DT_PLTPAD", - "DT_PLTPADSZ", - "DT_PLTREL", - "DT_PLTRELSZ", - "DT_POSFLAG_1", - "DT_PPC64_GLINK", - "DT_PPC64_OPD", - "DT_PPC64_OPDSZ", - "DT_PPC64_OPT", - "DT_PPC_GOT", - "DT_PPC_OPT", - "DT_PREINIT_ARRAY", - "DT_PREINIT_ARRAYSZ", - "DT_REL", - "DT_RELA", - "DT_RELACOUNT", - "DT_RELAENT", - "DT_RELASZ", - "DT_RELCOUNT", - "DT_RELENT", - "DT_RELSZ", - "DT_RPATH", - "DT_RUNPATH", - "DT_SONAME", - "DT_SPARC_REGISTER", - "DT_STRSZ", - "DT_STRTAB", - "DT_SYMBOLIC", - "DT_SYMENT", - "DT_SYMINENT", - "DT_SYMINFO", - "DT_SYMINSZ", - "DT_SYMTAB", - "DT_SYMTAB_SHNDX", - "DT_TEXTREL", - "DT_TLSDESC_GOT", - "DT_TLSDESC_PLT", - "DT_USED", - "DT_VALRNGHI", - "DT_VALRNGLO", - "DT_VERDEF", - "DT_VERDEFNUM", - "DT_VERNEED", - "DT_VERNEEDNUM", - "DT_VERSYM", - "Data", - "Dyn32", - "Dyn64", - "DynFlag", - "DynFlag1", - "DynTag", - "EI_ABIVERSION", - "EI_CLASS", - "EI_DATA", - "EI_NIDENT", - "EI_OSABI", - "EI_PAD", - "EI_VERSION", - "ELFCLASS32", - "ELFCLASS64", - "ELFCLASSNONE", - "ELFDATA2LSB", - "ELFDATA2MSB", - "ELFDATANONE", - "ELFMAG", - "ELFOSABI_86OPEN", - "ELFOSABI_AIX", - "ELFOSABI_ARM", - "ELFOSABI_AROS", - "ELFOSABI_CLOUDABI", - "ELFOSABI_FENIXOS", - "ELFOSABI_FREEBSD", - "ELFOSABI_HPUX", - "ELFOSABI_HURD", - "ELFOSABI_IRIX", - "ELFOSABI_LINUX", - "ELFOSABI_MODESTO", - "ELFOSABI_NETBSD", - "ELFOSABI_NONE", - "ELFOSABI_NSK", - "ELFOSABI_OPENBSD", - "ELFOSABI_OPENVMS", - "ELFOSABI_SOLARIS", - "ELFOSABI_STANDALONE", - "ELFOSABI_TRU64", - "EM_386", - "EM_486", - "EM_56800EX", - "EM_68HC05", - "EM_68HC08", - "EM_68HC11", - "EM_68HC12", - "EM_68HC16", - "EM_68K", - "EM_78KOR", - "EM_8051", - "EM_860", - "EM_88K", - "EM_960", - "EM_AARCH64", - "EM_ALPHA", - "EM_ALPHA_STD", - "EM_ALTERA_NIOS2", - "EM_AMDGPU", - "EM_ARC", - "EM_ARCA", - "EM_ARC_COMPACT", - "EM_ARC_COMPACT2", - "EM_ARM", - "EM_AVR", - "EM_AVR32", - "EM_BA1", - "EM_BA2", - "EM_BLACKFIN", - "EM_BPF", - "EM_C166", - "EM_CDP", - "EM_CE", - "EM_CLOUDSHIELD", - "EM_COGE", - "EM_COLDFIRE", - "EM_COOL", - "EM_COREA_1ST", - "EM_COREA_2ND", - "EM_CR", - "EM_CR16", - "EM_CRAYNV2", - "EM_CRIS", - "EM_CRX", - "EM_CSR_KALIMBA", - "EM_CUDA", - "EM_CYPRESS_M8C", - "EM_D10V", - "EM_D30V", - "EM_DSP24", - "EM_DSPIC30F", - "EM_DXP", - "EM_ECOG1", - "EM_ECOG16", - "EM_ECOG1X", - "EM_ECOG2", - "EM_ETPU", - "EM_EXCESS", - "EM_F2MC16", - "EM_FIREPATH", - "EM_FR20", - "EM_FR30", - "EM_FT32", - "EM_FX66", - "EM_H8S", - "EM_H8_300", - "EM_H8_300H", - "EM_H8_500", - "EM_HUANY", - "EM_IA_64", - "EM_INTEL205", - "EM_INTEL206", - "EM_INTEL207", - "EM_INTEL208", - "EM_INTEL209", - "EM_IP2K", - "EM_JAVELIN", - "EM_K10M", - "EM_KM32", - "EM_KMX16", - "EM_KMX32", - "EM_KMX8", - "EM_KVARC", - "EM_L10M", - "EM_LANAI", - "EM_LATTICEMICO32", - "EM_LOONGARCH", - "EM_M16C", - "EM_M32", - "EM_M32C", - "EM_M32R", - "EM_MANIK", - "EM_MAX", - "EM_MAXQ30", - "EM_MCHP_PIC", - "EM_MCST_ELBRUS", - "EM_ME16", - "EM_METAG", - "EM_MICROBLAZE", - "EM_MIPS", - "EM_MIPS_RS3_LE", - "EM_MIPS_RS4_BE", - "EM_MIPS_X", - "EM_MMA", - "EM_MMDSP_PLUS", - "EM_MMIX", - "EM_MN10200", - "EM_MN10300", - "EM_MOXIE", - "EM_MSP430", - "EM_NCPU", - "EM_NDR1", - "EM_NDS32", - "EM_NONE", - "EM_NORC", - "EM_NS32K", - "EM_OPEN8", - "EM_OPENRISC", - "EM_PARISC", - "EM_PCP", - "EM_PDP10", - "EM_PDP11", - "EM_PDSP", - "EM_PJ", - "EM_PPC", - "EM_PPC64", - "EM_PRISM", - "EM_QDSP6", - "EM_R32C", - "EM_RCE", - "EM_RH32", - "EM_RISCV", - "EM_RL78", - "EM_RS08", - "EM_RX", - "EM_S370", - "EM_S390", - "EM_SCORE7", - "EM_SEP", - "EM_SE_C17", - "EM_SE_C33", - "EM_SH", - "EM_SHARC", - "EM_SLE9X", - "EM_SNP1K", - "EM_SPARC", - "EM_SPARC32PLUS", - "EM_SPARCV9", - "EM_ST100", - "EM_ST19", - "EM_ST200", - "EM_ST7", - "EM_ST9PLUS", - "EM_STARCORE", - "EM_STM8", - "EM_STXP7X", - "EM_SVX", - "EM_TILE64", - "EM_TILEGX", - "EM_TILEPRO", - "EM_TINYJ", - "EM_TI_ARP32", - "EM_TI_C2000", - "EM_TI_C5500", - "EM_TI_C6000", - "EM_TI_PRU", - "EM_TMM_GPP", - "EM_TPC", - "EM_TRICORE", - "EM_TRIMEDIA", - "EM_TSK3000", - "EM_UNICORE", - "EM_V800", - "EM_V850", - "EM_VAX", - "EM_VIDEOCORE", - "EM_VIDEOCORE3", - "EM_VIDEOCORE5", - "EM_VISIUM", - "EM_VPP500", - "EM_X86_64", - "EM_XCORE", - "EM_XGATE", - "EM_XIMO16", - "EM_XTENSA", - "EM_Z80", - "EM_ZSP", - "ET_CORE", - "ET_DYN", - "ET_EXEC", - "ET_HIOS", - "ET_HIPROC", - "ET_LOOS", - "ET_LOPROC", - "ET_NONE", - "ET_REL", - "EV_CURRENT", - "EV_NONE", - "ErrNoSymbols", - "File", - "FileHeader", - "FormatError", - "Header32", - "Header64", - "ImportedSymbol", - "Machine", - "NT_FPREGSET", - "NT_PRPSINFO", - "NT_PRSTATUS", - "NType", - "NewFile", - "OSABI", - "Open", - "PF_MASKOS", - "PF_MASKPROC", - "PF_R", - "PF_W", - "PF_X", - "PT_AARCH64_ARCHEXT", - "PT_AARCH64_UNWIND", - "PT_ARM_ARCHEXT", - "PT_ARM_EXIDX", - "PT_DYNAMIC", - "PT_GNU_EH_FRAME", - "PT_GNU_MBIND_HI", - "PT_GNU_MBIND_LO", - "PT_GNU_PROPERTY", - "PT_GNU_RELRO", - "PT_GNU_STACK", - "PT_HIOS", - "PT_HIPROC", - "PT_INTERP", - "PT_LOAD", - "PT_LOOS", - "PT_LOPROC", - "PT_MIPS_ABIFLAGS", - "PT_MIPS_OPTIONS", - "PT_MIPS_REGINFO", - "PT_MIPS_RTPROC", - "PT_NOTE", - "PT_NULL", - "PT_OPENBSD_BOOTDATA", - "PT_OPENBSD_RANDOMIZE", - "PT_OPENBSD_WXNEEDED", - "PT_PAX_FLAGS", - "PT_PHDR", - "PT_S390_PGSTE", - "PT_SHLIB", - "PT_SUNWSTACK", - "PT_SUNW_EH_FRAME", - "PT_TLS", - "Prog", - "Prog32", - "Prog64", - "ProgFlag", - "ProgHeader", - "ProgType", - "R_386", - "R_386_16", - "R_386_32", - "R_386_32PLT", - "R_386_8", - "R_386_COPY", - "R_386_GLOB_DAT", - "R_386_GOT32", - "R_386_GOT32X", - "R_386_GOTOFF", - "R_386_GOTPC", - "R_386_IRELATIVE", - "R_386_JMP_SLOT", - "R_386_NONE", - "R_386_PC16", - "R_386_PC32", - "R_386_PC8", - "R_386_PLT32", - "R_386_RELATIVE", - "R_386_SIZE32", - "R_386_TLS_DESC", - "R_386_TLS_DESC_CALL", - "R_386_TLS_DTPMOD32", - "R_386_TLS_DTPOFF32", - "R_386_TLS_GD", - "R_386_TLS_GD_32", - "R_386_TLS_GD_CALL", - "R_386_TLS_GD_POP", - "R_386_TLS_GD_PUSH", - "R_386_TLS_GOTDESC", - "R_386_TLS_GOTIE", - "R_386_TLS_IE", - "R_386_TLS_IE_32", - "R_386_TLS_LDM", - "R_386_TLS_LDM_32", - "R_386_TLS_LDM_CALL", - "R_386_TLS_LDM_POP", - "R_386_TLS_LDM_PUSH", - "R_386_TLS_LDO_32", - "R_386_TLS_LE", - "R_386_TLS_LE_32", - "R_386_TLS_TPOFF", - "R_386_TLS_TPOFF32", - "R_390", - "R_390_12", - "R_390_16", - "R_390_20", - "R_390_32", - "R_390_64", - "R_390_8", - "R_390_COPY", - "R_390_GLOB_DAT", - "R_390_GOT12", - "R_390_GOT16", - "R_390_GOT20", - "R_390_GOT32", - "R_390_GOT64", - "R_390_GOTENT", - "R_390_GOTOFF", - "R_390_GOTOFF16", - "R_390_GOTOFF64", - "R_390_GOTPC", - "R_390_GOTPCDBL", - "R_390_GOTPLT12", - "R_390_GOTPLT16", - "R_390_GOTPLT20", - "R_390_GOTPLT32", - "R_390_GOTPLT64", - "R_390_GOTPLTENT", - "R_390_GOTPLTOFF16", - "R_390_GOTPLTOFF32", - "R_390_GOTPLTOFF64", - "R_390_JMP_SLOT", - "R_390_NONE", - "R_390_PC16", - "R_390_PC16DBL", - "R_390_PC32", - "R_390_PC32DBL", - "R_390_PC64", - "R_390_PLT16DBL", - "R_390_PLT32", - "R_390_PLT32DBL", - "R_390_PLT64", - "R_390_RELATIVE", - "R_390_TLS_DTPMOD", - "R_390_TLS_DTPOFF", - "R_390_TLS_GD32", - "R_390_TLS_GD64", - "R_390_TLS_GDCALL", - "R_390_TLS_GOTIE12", - "R_390_TLS_GOTIE20", - "R_390_TLS_GOTIE32", - "R_390_TLS_GOTIE64", - "R_390_TLS_IE32", - "R_390_TLS_IE64", - "R_390_TLS_IEENT", - "R_390_TLS_LDCALL", - "R_390_TLS_LDM32", - "R_390_TLS_LDM64", - "R_390_TLS_LDO32", - "R_390_TLS_LDO64", - "R_390_TLS_LE32", - "R_390_TLS_LE64", - "R_390_TLS_LOAD", - "R_390_TLS_TPOFF", - "R_AARCH64", - "R_AARCH64_ABS16", - "R_AARCH64_ABS32", - "R_AARCH64_ABS64", - "R_AARCH64_ADD_ABS_LO12_NC", - "R_AARCH64_ADR_GOT_PAGE", - "R_AARCH64_ADR_PREL_LO21", - "R_AARCH64_ADR_PREL_PG_HI21", - "R_AARCH64_ADR_PREL_PG_HI21_NC", - "R_AARCH64_CALL26", - "R_AARCH64_CONDBR19", - "R_AARCH64_COPY", - "R_AARCH64_GLOB_DAT", - "R_AARCH64_GOT_LD_PREL19", - "R_AARCH64_IRELATIVE", - "R_AARCH64_JUMP26", - "R_AARCH64_JUMP_SLOT", - "R_AARCH64_LD64_GOTOFF_LO15", - "R_AARCH64_LD64_GOTPAGE_LO15", - "R_AARCH64_LD64_GOT_LO12_NC", - "R_AARCH64_LDST128_ABS_LO12_NC", - "R_AARCH64_LDST16_ABS_LO12_NC", - "R_AARCH64_LDST32_ABS_LO12_NC", - "R_AARCH64_LDST64_ABS_LO12_NC", - "R_AARCH64_LDST8_ABS_LO12_NC", - "R_AARCH64_LD_PREL_LO19", - "R_AARCH64_MOVW_SABS_G0", - "R_AARCH64_MOVW_SABS_G1", - "R_AARCH64_MOVW_SABS_G2", - "R_AARCH64_MOVW_UABS_G0", - "R_AARCH64_MOVW_UABS_G0_NC", - "R_AARCH64_MOVW_UABS_G1", - "R_AARCH64_MOVW_UABS_G1_NC", - "R_AARCH64_MOVW_UABS_G2", - "R_AARCH64_MOVW_UABS_G2_NC", - "R_AARCH64_MOVW_UABS_G3", - "R_AARCH64_NONE", - "R_AARCH64_NULL", - "R_AARCH64_P32_ABS16", - "R_AARCH64_P32_ABS32", - "R_AARCH64_P32_ADD_ABS_LO12_NC", - "R_AARCH64_P32_ADR_GOT_PAGE", - "R_AARCH64_P32_ADR_PREL_LO21", - "R_AARCH64_P32_ADR_PREL_PG_HI21", - "R_AARCH64_P32_CALL26", - "R_AARCH64_P32_CONDBR19", - "R_AARCH64_P32_COPY", - "R_AARCH64_P32_GLOB_DAT", - "R_AARCH64_P32_GOT_LD_PREL19", - "R_AARCH64_P32_IRELATIVE", - "R_AARCH64_P32_JUMP26", - "R_AARCH64_P32_JUMP_SLOT", - "R_AARCH64_P32_LD32_GOT_LO12_NC", - "R_AARCH64_P32_LDST128_ABS_LO12_NC", - "R_AARCH64_P32_LDST16_ABS_LO12_NC", - "R_AARCH64_P32_LDST32_ABS_LO12_NC", - "R_AARCH64_P32_LDST64_ABS_LO12_NC", - "R_AARCH64_P32_LDST8_ABS_LO12_NC", - "R_AARCH64_P32_LD_PREL_LO19", - "R_AARCH64_P32_MOVW_SABS_G0", - "R_AARCH64_P32_MOVW_UABS_G0", - "R_AARCH64_P32_MOVW_UABS_G0_NC", - "R_AARCH64_P32_MOVW_UABS_G1", - "R_AARCH64_P32_PREL16", - "R_AARCH64_P32_PREL32", - "R_AARCH64_P32_RELATIVE", - "R_AARCH64_P32_TLSDESC", - "R_AARCH64_P32_TLSDESC_ADD_LO12_NC", - "R_AARCH64_P32_TLSDESC_ADR_PAGE21", - "R_AARCH64_P32_TLSDESC_ADR_PREL21", - "R_AARCH64_P32_TLSDESC_CALL", - "R_AARCH64_P32_TLSDESC_LD32_LO12_NC", - "R_AARCH64_P32_TLSDESC_LD_PREL19", - "R_AARCH64_P32_TLSGD_ADD_LO12_NC", - "R_AARCH64_P32_TLSGD_ADR_PAGE21", - "R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21", - "R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC", - "R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19", - "R_AARCH64_P32_TLSLE_ADD_TPREL_HI12", - "R_AARCH64_P32_TLSLE_ADD_TPREL_LO12", - "R_AARCH64_P32_TLSLE_ADD_TPREL_LO12_NC", - "R_AARCH64_P32_TLSLE_MOVW_TPREL_G0", - "R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC", - "R_AARCH64_P32_TLSLE_MOVW_TPREL_G1", - "R_AARCH64_P32_TLS_DTPMOD", - "R_AARCH64_P32_TLS_DTPREL", - "R_AARCH64_P32_TLS_TPREL", - "R_AARCH64_P32_TSTBR14", - "R_AARCH64_PREL16", - "R_AARCH64_PREL32", - "R_AARCH64_PREL64", - "R_AARCH64_RELATIVE", - "R_AARCH64_TLSDESC", - "R_AARCH64_TLSDESC_ADD", - "R_AARCH64_TLSDESC_ADD_LO12_NC", - "R_AARCH64_TLSDESC_ADR_PAGE21", - "R_AARCH64_TLSDESC_ADR_PREL21", - "R_AARCH64_TLSDESC_CALL", - "R_AARCH64_TLSDESC_LD64_LO12_NC", - "R_AARCH64_TLSDESC_LDR", - "R_AARCH64_TLSDESC_LD_PREL19", - "R_AARCH64_TLSDESC_OFF_G0_NC", - "R_AARCH64_TLSDESC_OFF_G1", - "R_AARCH64_TLSGD_ADD_LO12_NC", - "R_AARCH64_TLSGD_ADR_PAGE21", - "R_AARCH64_TLSGD_ADR_PREL21", - "R_AARCH64_TLSGD_MOVW_G0_NC", - "R_AARCH64_TLSGD_MOVW_G1", - "R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21", - "R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC", - "R_AARCH64_TLSIE_LD_GOTTPREL_PREL19", - "R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC", - "R_AARCH64_TLSIE_MOVW_GOTTPREL_G1", - "R_AARCH64_TLSLD_ADR_PAGE21", - "R_AARCH64_TLSLD_ADR_PREL21", - "R_AARCH64_TLSLD_LDST128_DTPREL_LO12", - "R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC", - "R_AARCH64_TLSLE_ADD_TPREL_HI12", - "R_AARCH64_TLSLE_ADD_TPREL_LO12", - "R_AARCH64_TLSLE_ADD_TPREL_LO12_NC", - "R_AARCH64_TLSLE_LDST128_TPREL_LO12", - "R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC", - "R_AARCH64_TLSLE_MOVW_TPREL_G0", - "R_AARCH64_TLSLE_MOVW_TPREL_G0_NC", - "R_AARCH64_TLSLE_MOVW_TPREL_G1", - "R_AARCH64_TLSLE_MOVW_TPREL_G1_NC", - "R_AARCH64_TLSLE_MOVW_TPREL_G2", - "R_AARCH64_TLS_DTPMOD64", - "R_AARCH64_TLS_DTPREL64", - "R_AARCH64_TLS_TPREL64", - "R_AARCH64_TSTBR14", - "R_ALPHA", - "R_ALPHA_BRADDR", - "R_ALPHA_COPY", - "R_ALPHA_GLOB_DAT", - "R_ALPHA_GPDISP", - "R_ALPHA_GPREL32", - "R_ALPHA_GPRELHIGH", - "R_ALPHA_GPRELLOW", - "R_ALPHA_GPVALUE", - "R_ALPHA_HINT", - "R_ALPHA_IMMED_BR_HI32", - "R_ALPHA_IMMED_GP_16", - "R_ALPHA_IMMED_GP_HI32", - "R_ALPHA_IMMED_LO32", - "R_ALPHA_IMMED_SCN_HI32", - "R_ALPHA_JMP_SLOT", - "R_ALPHA_LITERAL", - "R_ALPHA_LITUSE", - "R_ALPHA_NONE", - "R_ALPHA_OP_PRSHIFT", - "R_ALPHA_OP_PSUB", - "R_ALPHA_OP_PUSH", - "R_ALPHA_OP_STORE", - "R_ALPHA_REFLONG", - "R_ALPHA_REFQUAD", - "R_ALPHA_RELATIVE", - "R_ALPHA_SREL16", - "R_ALPHA_SREL32", - "R_ALPHA_SREL64", - "R_ARM", - "R_ARM_ABS12", - "R_ARM_ABS16", - "R_ARM_ABS32", - "R_ARM_ABS32_NOI", - "R_ARM_ABS8", - "R_ARM_ALU_PCREL_15_8", - "R_ARM_ALU_PCREL_23_15", - "R_ARM_ALU_PCREL_7_0", - "R_ARM_ALU_PC_G0", - "R_ARM_ALU_PC_G0_NC", - "R_ARM_ALU_PC_G1", - "R_ARM_ALU_PC_G1_NC", - "R_ARM_ALU_PC_G2", - "R_ARM_ALU_SBREL_19_12_NC", - "R_ARM_ALU_SBREL_27_20_CK", - "R_ARM_ALU_SB_G0", - "R_ARM_ALU_SB_G0_NC", - "R_ARM_ALU_SB_G1", - "R_ARM_ALU_SB_G1_NC", - "R_ARM_ALU_SB_G2", - "R_ARM_AMP_VCALL9", - "R_ARM_BASE_ABS", - "R_ARM_CALL", - "R_ARM_COPY", - "R_ARM_GLOB_DAT", - "R_ARM_GNU_VTENTRY", - "R_ARM_GNU_VTINHERIT", - "R_ARM_GOT32", - "R_ARM_GOTOFF", - "R_ARM_GOTOFF12", - "R_ARM_GOTPC", - "R_ARM_GOTRELAX", - "R_ARM_GOT_ABS", - "R_ARM_GOT_BREL12", - "R_ARM_GOT_PREL", - "R_ARM_IRELATIVE", - "R_ARM_JUMP24", - "R_ARM_JUMP_SLOT", - "R_ARM_LDC_PC_G0", - "R_ARM_LDC_PC_G1", - "R_ARM_LDC_PC_G2", - "R_ARM_LDC_SB_G0", - "R_ARM_LDC_SB_G1", - "R_ARM_LDC_SB_G2", - "R_ARM_LDRS_PC_G0", - "R_ARM_LDRS_PC_G1", - "R_ARM_LDRS_PC_G2", - "R_ARM_LDRS_SB_G0", - "R_ARM_LDRS_SB_G1", - "R_ARM_LDRS_SB_G2", - "R_ARM_LDR_PC_G1", - "R_ARM_LDR_PC_G2", - "R_ARM_LDR_SBREL_11_10_NC", - "R_ARM_LDR_SB_G0", - "R_ARM_LDR_SB_G1", - "R_ARM_LDR_SB_G2", - "R_ARM_ME_TOO", - "R_ARM_MOVT_ABS", - "R_ARM_MOVT_BREL", - "R_ARM_MOVT_PREL", - "R_ARM_MOVW_ABS_NC", - "R_ARM_MOVW_BREL", - "R_ARM_MOVW_BREL_NC", - "R_ARM_MOVW_PREL_NC", - "R_ARM_NONE", - "R_ARM_PC13", - "R_ARM_PC24", - "R_ARM_PLT32", - "R_ARM_PLT32_ABS", - "R_ARM_PREL31", - "R_ARM_PRIVATE_0", - "R_ARM_PRIVATE_1", - "R_ARM_PRIVATE_10", - "R_ARM_PRIVATE_11", - "R_ARM_PRIVATE_12", - "R_ARM_PRIVATE_13", - "R_ARM_PRIVATE_14", - "R_ARM_PRIVATE_15", - "R_ARM_PRIVATE_2", - "R_ARM_PRIVATE_3", - "R_ARM_PRIVATE_4", - "R_ARM_PRIVATE_5", - "R_ARM_PRIVATE_6", - "R_ARM_PRIVATE_7", - "R_ARM_PRIVATE_8", - "R_ARM_PRIVATE_9", - "R_ARM_RABS32", - "R_ARM_RBASE", - "R_ARM_REL32", - "R_ARM_REL32_NOI", - "R_ARM_RELATIVE", - "R_ARM_RPC24", - "R_ARM_RREL32", - "R_ARM_RSBREL32", - "R_ARM_RXPC25", - "R_ARM_SBREL31", - "R_ARM_SBREL32", - "R_ARM_SWI24", - "R_ARM_TARGET1", - "R_ARM_TARGET2", - "R_ARM_THM_ABS5", - "R_ARM_THM_ALU_ABS_G0_NC", - "R_ARM_THM_ALU_ABS_G1_NC", - "R_ARM_THM_ALU_ABS_G2_NC", - "R_ARM_THM_ALU_ABS_G3", - "R_ARM_THM_ALU_PREL_11_0", - "R_ARM_THM_GOT_BREL12", - "R_ARM_THM_JUMP11", - "R_ARM_THM_JUMP19", - "R_ARM_THM_JUMP24", - "R_ARM_THM_JUMP6", - "R_ARM_THM_JUMP8", - "R_ARM_THM_MOVT_ABS", - "R_ARM_THM_MOVT_BREL", - "R_ARM_THM_MOVT_PREL", - "R_ARM_THM_MOVW_ABS_NC", - "R_ARM_THM_MOVW_BREL", - "R_ARM_THM_MOVW_BREL_NC", - "R_ARM_THM_MOVW_PREL_NC", - "R_ARM_THM_PC12", - "R_ARM_THM_PC22", - "R_ARM_THM_PC8", - "R_ARM_THM_RPC22", - "R_ARM_THM_SWI8", - "R_ARM_THM_TLS_CALL", - "R_ARM_THM_TLS_DESCSEQ16", - "R_ARM_THM_TLS_DESCSEQ32", - "R_ARM_THM_XPC22", - "R_ARM_TLS_CALL", - "R_ARM_TLS_DESCSEQ", - "R_ARM_TLS_DTPMOD32", - "R_ARM_TLS_DTPOFF32", - "R_ARM_TLS_GD32", - "R_ARM_TLS_GOTDESC", - "R_ARM_TLS_IE12GP", - "R_ARM_TLS_IE32", - "R_ARM_TLS_LDM32", - "R_ARM_TLS_LDO12", - "R_ARM_TLS_LDO32", - "R_ARM_TLS_LE12", - "R_ARM_TLS_LE32", - "R_ARM_TLS_TPOFF32", - "R_ARM_V4BX", - "R_ARM_XPC25", - "R_INFO", - "R_INFO32", - "R_LARCH", - "R_LARCH_32", - "R_LARCH_32_PCREL", - "R_LARCH_64", - "R_LARCH_ABS64_HI12", - "R_LARCH_ABS64_LO20", - "R_LARCH_ABS_HI20", - "R_LARCH_ABS_LO12", - "R_LARCH_ADD16", - "R_LARCH_ADD24", - "R_LARCH_ADD32", - "R_LARCH_ADD64", - "R_LARCH_ADD8", - "R_LARCH_B16", - "R_LARCH_B21", - "R_LARCH_B26", - "R_LARCH_COPY", - "R_LARCH_GNU_VTENTRY", - "R_LARCH_GNU_VTINHERIT", - "R_LARCH_GOT64_HI12", - "R_LARCH_GOT64_LO20", - "R_LARCH_GOT64_PC_HI12", - "R_LARCH_GOT64_PC_LO20", - "R_LARCH_GOT_HI20", - "R_LARCH_GOT_LO12", - "R_LARCH_GOT_PC_HI20", - "R_LARCH_GOT_PC_LO12", - "R_LARCH_IRELATIVE", - "R_LARCH_JUMP_SLOT", - "R_LARCH_MARK_LA", - "R_LARCH_MARK_PCREL", - "R_LARCH_NONE", - "R_LARCH_PCALA64_HI12", - "R_LARCH_PCALA64_LO20", - "R_LARCH_PCALA_HI20", - "R_LARCH_PCALA_LO12", - "R_LARCH_RELATIVE", - "R_LARCH_RELAX", - "R_LARCH_SOP_ADD", - "R_LARCH_SOP_AND", - "R_LARCH_SOP_ASSERT", - "R_LARCH_SOP_IF_ELSE", - "R_LARCH_SOP_NOT", - "R_LARCH_SOP_POP_32_S_0_10_10_16_S2", - "R_LARCH_SOP_POP_32_S_0_5_10_16_S2", - "R_LARCH_SOP_POP_32_S_10_12", - "R_LARCH_SOP_POP_32_S_10_16", - "R_LARCH_SOP_POP_32_S_10_16_S2", - "R_LARCH_SOP_POP_32_S_10_5", - "R_LARCH_SOP_POP_32_S_5_20", - "R_LARCH_SOP_POP_32_U", - "R_LARCH_SOP_POP_32_U_10_12", - "R_LARCH_SOP_PUSH_ABSOLUTE", - "R_LARCH_SOP_PUSH_DUP", - "R_LARCH_SOP_PUSH_GPREL", - "R_LARCH_SOP_PUSH_PCREL", - "R_LARCH_SOP_PUSH_PLT_PCREL", - "R_LARCH_SOP_PUSH_TLS_GD", - "R_LARCH_SOP_PUSH_TLS_GOT", - "R_LARCH_SOP_PUSH_TLS_TPREL", - "R_LARCH_SOP_SL", - "R_LARCH_SOP_SR", - "R_LARCH_SOP_SUB", - "R_LARCH_SUB16", - "R_LARCH_SUB24", - "R_LARCH_SUB32", - "R_LARCH_SUB64", - "R_LARCH_SUB8", - "R_LARCH_TLS_DTPMOD32", - "R_LARCH_TLS_DTPMOD64", - "R_LARCH_TLS_DTPREL32", - "R_LARCH_TLS_DTPREL64", - "R_LARCH_TLS_GD_HI20", - "R_LARCH_TLS_GD_PC_HI20", - "R_LARCH_TLS_IE64_HI12", - "R_LARCH_TLS_IE64_LO20", - "R_LARCH_TLS_IE64_PC_HI12", - "R_LARCH_TLS_IE64_PC_LO20", - "R_LARCH_TLS_IE_HI20", - "R_LARCH_TLS_IE_LO12", - "R_LARCH_TLS_IE_PC_HI20", - "R_LARCH_TLS_IE_PC_LO12", - "R_LARCH_TLS_LD_HI20", - "R_LARCH_TLS_LD_PC_HI20", - "R_LARCH_TLS_LE64_HI12", - "R_LARCH_TLS_LE64_LO20", - "R_LARCH_TLS_LE_HI20", - "R_LARCH_TLS_LE_LO12", - "R_LARCH_TLS_TPREL32", - "R_LARCH_TLS_TPREL64", - "R_MIPS", - "R_MIPS_16", - "R_MIPS_26", - "R_MIPS_32", - "R_MIPS_64", - "R_MIPS_ADD_IMMEDIATE", - "R_MIPS_CALL16", - "R_MIPS_CALL_HI16", - "R_MIPS_CALL_LO16", - "R_MIPS_DELETE", - "R_MIPS_GOT16", - "R_MIPS_GOT_DISP", - "R_MIPS_GOT_HI16", - "R_MIPS_GOT_LO16", - "R_MIPS_GOT_OFST", - "R_MIPS_GOT_PAGE", - "R_MIPS_GPREL16", - "R_MIPS_GPREL32", - "R_MIPS_HI16", - "R_MIPS_HIGHER", - "R_MIPS_HIGHEST", - "R_MIPS_INSERT_A", - "R_MIPS_INSERT_B", - "R_MIPS_JALR", - "R_MIPS_LITERAL", - "R_MIPS_LO16", - "R_MIPS_NONE", - "R_MIPS_PC16", - "R_MIPS_PJUMP", - "R_MIPS_REL16", - "R_MIPS_REL32", - "R_MIPS_RELGOT", - "R_MIPS_SCN_DISP", - "R_MIPS_SHIFT5", - "R_MIPS_SHIFT6", - "R_MIPS_SUB", - "R_MIPS_TLS_DTPMOD32", - "R_MIPS_TLS_DTPMOD64", - "R_MIPS_TLS_DTPREL32", - "R_MIPS_TLS_DTPREL64", - "R_MIPS_TLS_DTPREL_HI16", - "R_MIPS_TLS_DTPREL_LO16", - "R_MIPS_TLS_GD", - "R_MIPS_TLS_GOTTPREL", - "R_MIPS_TLS_LDM", - "R_MIPS_TLS_TPREL32", - "R_MIPS_TLS_TPREL64", - "R_MIPS_TLS_TPREL_HI16", - "R_MIPS_TLS_TPREL_LO16", - "R_PPC", - "R_PPC64", - "R_PPC64_ADDR14", - "R_PPC64_ADDR14_BRNTAKEN", - "R_PPC64_ADDR14_BRTAKEN", - "R_PPC64_ADDR16", - "R_PPC64_ADDR16_DS", - "R_PPC64_ADDR16_HA", - "R_PPC64_ADDR16_HI", - "R_PPC64_ADDR16_HIGH", - "R_PPC64_ADDR16_HIGHA", - "R_PPC64_ADDR16_HIGHER", - "R_PPC64_ADDR16_HIGHER34", - "R_PPC64_ADDR16_HIGHERA", - "R_PPC64_ADDR16_HIGHERA34", - "R_PPC64_ADDR16_HIGHEST", - "R_PPC64_ADDR16_HIGHEST34", - "R_PPC64_ADDR16_HIGHESTA", - "R_PPC64_ADDR16_HIGHESTA34", - "R_PPC64_ADDR16_LO", - "R_PPC64_ADDR16_LO_DS", - "R_PPC64_ADDR24", - "R_PPC64_ADDR32", - "R_PPC64_ADDR64", - "R_PPC64_ADDR64_LOCAL", - "R_PPC64_COPY", - "R_PPC64_D28", - "R_PPC64_D34", - "R_PPC64_D34_HA30", - "R_PPC64_D34_HI30", - "R_PPC64_D34_LO", - "R_PPC64_DTPMOD64", - "R_PPC64_DTPREL16", - "R_PPC64_DTPREL16_DS", - "R_PPC64_DTPREL16_HA", - "R_PPC64_DTPREL16_HI", - "R_PPC64_DTPREL16_HIGH", - "R_PPC64_DTPREL16_HIGHA", - "R_PPC64_DTPREL16_HIGHER", - "R_PPC64_DTPREL16_HIGHERA", - "R_PPC64_DTPREL16_HIGHEST", - "R_PPC64_DTPREL16_HIGHESTA", - "R_PPC64_DTPREL16_LO", - "R_PPC64_DTPREL16_LO_DS", - "R_PPC64_DTPREL34", - "R_PPC64_DTPREL64", - "R_PPC64_ENTRY", - "R_PPC64_GLOB_DAT", - "R_PPC64_GNU_VTENTRY", - "R_PPC64_GNU_VTINHERIT", - "R_PPC64_GOT16", - "R_PPC64_GOT16_DS", - "R_PPC64_GOT16_HA", - "R_PPC64_GOT16_HI", - "R_PPC64_GOT16_LO", - "R_PPC64_GOT16_LO_DS", - "R_PPC64_GOT_DTPREL16_DS", - "R_PPC64_GOT_DTPREL16_HA", - "R_PPC64_GOT_DTPREL16_HI", - "R_PPC64_GOT_DTPREL16_LO_DS", - "R_PPC64_GOT_DTPREL_PCREL34", - "R_PPC64_GOT_PCREL34", - "R_PPC64_GOT_TLSGD16", - "R_PPC64_GOT_TLSGD16_HA", - "R_PPC64_GOT_TLSGD16_HI", - "R_PPC64_GOT_TLSGD16_LO", - "R_PPC64_GOT_TLSGD_PCREL34", - "R_PPC64_GOT_TLSLD16", - "R_PPC64_GOT_TLSLD16_HA", - "R_PPC64_GOT_TLSLD16_HI", - "R_PPC64_GOT_TLSLD16_LO", - "R_PPC64_GOT_TLSLD_PCREL34", - "R_PPC64_GOT_TPREL16_DS", - "R_PPC64_GOT_TPREL16_HA", - "R_PPC64_GOT_TPREL16_HI", - "R_PPC64_GOT_TPREL16_LO_DS", - "R_PPC64_GOT_TPREL_PCREL34", - "R_PPC64_IRELATIVE", - "R_PPC64_JMP_IREL", - "R_PPC64_JMP_SLOT", - "R_PPC64_NONE", - "R_PPC64_PCREL28", - "R_PPC64_PCREL34", - "R_PPC64_PCREL_OPT", - "R_PPC64_PLT16_HA", - "R_PPC64_PLT16_HI", - "R_PPC64_PLT16_LO", - "R_PPC64_PLT16_LO_DS", - "R_PPC64_PLT32", - "R_PPC64_PLT64", - "R_PPC64_PLTCALL", - "R_PPC64_PLTCALL_NOTOC", - "R_PPC64_PLTGOT16", - "R_PPC64_PLTGOT16_DS", - "R_PPC64_PLTGOT16_HA", - "R_PPC64_PLTGOT16_HI", - "R_PPC64_PLTGOT16_LO", - "R_PPC64_PLTGOT_LO_DS", - "R_PPC64_PLTREL32", - "R_PPC64_PLTREL64", - "R_PPC64_PLTSEQ", - "R_PPC64_PLTSEQ_NOTOC", - "R_PPC64_PLT_PCREL34", - "R_PPC64_PLT_PCREL34_NOTOC", - "R_PPC64_REL14", - "R_PPC64_REL14_BRNTAKEN", - "R_PPC64_REL14_BRTAKEN", - "R_PPC64_REL16", - "R_PPC64_REL16DX_HA", - "R_PPC64_REL16_HA", - "R_PPC64_REL16_HI", - "R_PPC64_REL16_HIGH", - "R_PPC64_REL16_HIGHA", - "R_PPC64_REL16_HIGHER", - "R_PPC64_REL16_HIGHER34", - "R_PPC64_REL16_HIGHERA", - "R_PPC64_REL16_HIGHERA34", - "R_PPC64_REL16_HIGHEST", - "R_PPC64_REL16_HIGHEST34", - "R_PPC64_REL16_HIGHESTA", - "R_PPC64_REL16_HIGHESTA34", - "R_PPC64_REL16_LO", - "R_PPC64_REL24", - "R_PPC64_REL24_NOTOC", - "R_PPC64_REL24_P9NOTOC", - "R_PPC64_REL30", - "R_PPC64_REL32", - "R_PPC64_REL64", - "R_PPC64_RELATIVE", - "R_PPC64_SECTOFF", - "R_PPC64_SECTOFF_DS", - "R_PPC64_SECTOFF_HA", - "R_PPC64_SECTOFF_HI", - "R_PPC64_SECTOFF_LO", - "R_PPC64_SECTOFF_LO_DS", - "R_PPC64_TLS", - "R_PPC64_TLSGD", - "R_PPC64_TLSLD", - "R_PPC64_TOC", - "R_PPC64_TOC16", - "R_PPC64_TOC16_DS", - "R_PPC64_TOC16_HA", - "R_PPC64_TOC16_HI", - "R_PPC64_TOC16_LO", - "R_PPC64_TOC16_LO_DS", - "R_PPC64_TOCSAVE", - "R_PPC64_TPREL16", - "R_PPC64_TPREL16_DS", - "R_PPC64_TPREL16_HA", - "R_PPC64_TPREL16_HI", - "R_PPC64_TPREL16_HIGH", - "R_PPC64_TPREL16_HIGHA", - "R_PPC64_TPREL16_HIGHER", - "R_PPC64_TPREL16_HIGHERA", - "R_PPC64_TPREL16_HIGHEST", - "R_PPC64_TPREL16_HIGHESTA", - "R_PPC64_TPREL16_LO", - "R_PPC64_TPREL16_LO_DS", - "R_PPC64_TPREL34", - "R_PPC64_TPREL64", - "R_PPC64_UADDR16", - "R_PPC64_UADDR32", - "R_PPC64_UADDR64", - "R_PPC_ADDR14", - "R_PPC_ADDR14_BRNTAKEN", - "R_PPC_ADDR14_BRTAKEN", - "R_PPC_ADDR16", - "R_PPC_ADDR16_HA", - "R_PPC_ADDR16_HI", - "R_PPC_ADDR16_LO", - "R_PPC_ADDR24", - "R_PPC_ADDR32", - "R_PPC_COPY", - "R_PPC_DTPMOD32", - "R_PPC_DTPREL16", - "R_PPC_DTPREL16_HA", - "R_PPC_DTPREL16_HI", - "R_PPC_DTPREL16_LO", - "R_PPC_DTPREL32", - "R_PPC_EMB_BIT_FLD", - "R_PPC_EMB_MRKREF", - "R_PPC_EMB_NADDR16", - "R_PPC_EMB_NADDR16_HA", - "R_PPC_EMB_NADDR16_HI", - "R_PPC_EMB_NADDR16_LO", - "R_PPC_EMB_NADDR32", - "R_PPC_EMB_RELSDA", - "R_PPC_EMB_RELSEC16", - "R_PPC_EMB_RELST_HA", - "R_PPC_EMB_RELST_HI", - "R_PPC_EMB_RELST_LO", - "R_PPC_EMB_SDA21", - "R_PPC_EMB_SDA2I16", - "R_PPC_EMB_SDA2REL", - "R_PPC_EMB_SDAI16", - "R_PPC_GLOB_DAT", - "R_PPC_GOT16", - "R_PPC_GOT16_HA", - "R_PPC_GOT16_HI", - "R_PPC_GOT16_LO", - "R_PPC_GOT_TLSGD16", - "R_PPC_GOT_TLSGD16_HA", - "R_PPC_GOT_TLSGD16_HI", - "R_PPC_GOT_TLSGD16_LO", - "R_PPC_GOT_TLSLD16", - "R_PPC_GOT_TLSLD16_HA", - "R_PPC_GOT_TLSLD16_HI", - "R_PPC_GOT_TLSLD16_LO", - "R_PPC_GOT_TPREL16", - "R_PPC_GOT_TPREL16_HA", - "R_PPC_GOT_TPREL16_HI", - "R_PPC_GOT_TPREL16_LO", - "R_PPC_JMP_SLOT", - "R_PPC_LOCAL24PC", - "R_PPC_NONE", - "R_PPC_PLT16_HA", - "R_PPC_PLT16_HI", - "R_PPC_PLT16_LO", - "R_PPC_PLT32", - "R_PPC_PLTREL24", - "R_PPC_PLTREL32", - "R_PPC_REL14", - "R_PPC_REL14_BRNTAKEN", - "R_PPC_REL14_BRTAKEN", - "R_PPC_REL24", - "R_PPC_REL32", - "R_PPC_RELATIVE", - "R_PPC_SDAREL16", - "R_PPC_SECTOFF", - "R_PPC_SECTOFF_HA", - "R_PPC_SECTOFF_HI", - "R_PPC_SECTOFF_LO", - "R_PPC_TLS", - "R_PPC_TPREL16", - "R_PPC_TPREL16_HA", - "R_PPC_TPREL16_HI", - "R_PPC_TPREL16_LO", - "R_PPC_TPREL32", - "R_PPC_UADDR16", - "R_PPC_UADDR32", - "R_RISCV", - "R_RISCV_32", - "R_RISCV_32_PCREL", - "R_RISCV_64", - "R_RISCV_ADD16", - "R_RISCV_ADD32", - "R_RISCV_ADD64", - "R_RISCV_ADD8", - "R_RISCV_ALIGN", - "R_RISCV_BRANCH", - "R_RISCV_CALL", - "R_RISCV_CALL_PLT", - "R_RISCV_COPY", - "R_RISCV_GNU_VTENTRY", - "R_RISCV_GNU_VTINHERIT", - "R_RISCV_GOT_HI20", - "R_RISCV_GPREL_I", - "R_RISCV_GPREL_S", - "R_RISCV_HI20", - "R_RISCV_JAL", - "R_RISCV_JUMP_SLOT", - "R_RISCV_LO12_I", - "R_RISCV_LO12_S", - "R_RISCV_NONE", - "R_RISCV_PCREL_HI20", - "R_RISCV_PCREL_LO12_I", - "R_RISCV_PCREL_LO12_S", - "R_RISCV_RELATIVE", - "R_RISCV_RELAX", - "R_RISCV_RVC_BRANCH", - "R_RISCV_RVC_JUMP", - "R_RISCV_RVC_LUI", - "R_RISCV_SET16", - "R_RISCV_SET32", - "R_RISCV_SET6", - "R_RISCV_SET8", - "R_RISCV_SUB16", - "R_RISCV_SUB32", - "R_RISCV_SUB6", - "R_RISCV_SUB64", - "R_RISCV_SUB8", - "R_RISCV_TLS_DTPMOD32", - "R_RISCV_TLS_DTPMOD64", - "R_RISCV_TLS_DTPREL32", - "R_RISCV_TLS_DTPREL64", - "R_RISCV_TLS_GD_HI20", - "R_RISCV_TLS_GOT_HI20", - "R_RISCV_TLS_TPREL32", - "R_RISCV_TLS_TPREL64", - "R_RISCV_TPREL_ADD", - "R_RISCV_TPREL_HI20", - "R_RISCV_TPREL_I", - "R_RISCV_TPREL_LO12_I", - "R_RISCV_TPREL_LO12_S", - "R_RISCV_TPREL_S", - "R_SPARC", - "R_SPARC_10", - "R_SPARC_11", - "R_SPARC_13", - "R_SPARC_16", - "R_SPARC_22", - "R_SPARC_32", - "R_SPARC_5", - "R_SPARC_6", - "R_SPARC_64", - "R_SPARC_7", - "R_SPARC_8", - "R_SPARC_COPY", - "R_SPARC_DISP16", - "R_SPARC_DISP32", - "R_SPARC_DISP64", - "R_SPARC_DISP8", - "R_SPARC_GLOB_DAT", - "R_SPARC_GLOB_JMP", - "R_SPARC_GOT10", - "R_SPARC_GOT13", - "R_SPARC_GOT22", - "R_SPARC_H44", - "R_SPARC_HH22", - "R_SPARC_HI22", - "R_SPARC_HIPLT22", - "R_SPARC_HIX22", - "R_SPARC_HM10", - "R_SPARC_JMP_SLOT", - "R_SPARC_L44", - "R_SPARC_LM22", - "R_SPARC_LO10", - "R_SPARC_LOPLT10", - "R_SPARC_LOX10", - "R_SPARC_M44", - "R_SPARC_NONE", - "R_SPARC_OLO10", - "R_SPARC_PC10", - "R_SPARC_PC22", - "R_SPARC_PCPLT10", - "R_SPARC_PCPLT22", - "R_SPARC_PCPLT32", - "R_SPARC_PC_HH22", - "R_SPARC_PC_HM10", - "R_SPARC_PC_LM22", - "R_SPARC_PLT32", - "R_SPARC_PLT64", - "R_SPARC_REGISTER", - "R_SPARC_RELATIVE", - "R_SPARC_UA16", - "R_SPARC_UA32", - "R_SPARC_UA64", - "R_SPARC_WDISP16", - "R_SPARC_WDISP19", - "R_SPARC_WDISP22", - "R_SPARC_WDISP30", - "R_SPARC_WPLT30", - "R_SYM32", - "R_SYM64", - "R_TYPE32", - "R_TYPE64", - "R_X86_64", - "R_X86_64_16", - "R_X86_64_32", - "R_X86_64_32S", - "R_X86_64_64", - "R_X86_64_8", - "R_X86_64_COPY", - "R_X86_64_DTPMOD64", - "R_X86_64_DTPOFF32", - "R_X86_64_DTPOFF64", - "R_X86_64_GLOB_DAT", - "R_X86_64_GOT32", - "R_X86_64_GOT64", - "R_X86_64_GOTOFF64", - "R_X86_64_GOTPC32", - "R_X86_64_GOTPC32_TLSDESC", - "R_X86_64_GOTPC64", - "R_X86_64_GOTPCREL", - "R_X86_64_GOTPCREL64", - "R_X86_64_GOTPCRELX", - "R_X86_64_GOTPLT64", - "R_X86_64_GOTTPOFF", - "R_X86_64_IRELATIVE", - "R_X86_64_JMP_SLOT", - "R_X86_64_NONE", - "R_X86_64_PC16", - "R_X86_64_PC32", - "R_X86_64_PC32_BND", - "R_X86_64_PC64", - "R_X86_64_PC8", - "R_X86_64_PLT32", - "R_X86_64_PLT32_BND", - "R_X86_64_PLTOFF64", - "R_X86_64_RELATIVE", - "R_X86_64_RELATIVE64", - "R_X86_64_REX_GOTPCRELX", - "R_X86_64_SIZE32", - "R_X86_64_SIZE64", - "R_X86_64_TLSDESC", - "R_X86_64_TLSDESC_CALL", - "R_X86_64_TLSGD", - "R_X86_64_TLSLD", - "R_X86_64_TPOFF32", - "R_X86_64_TPOFF64", - "Rel32", - "Rel64", - "Rela32", - "Rela64", - "SHF_ALLOC", - "SHF_COMPRESSED", - "SHF_EXECINSTR", - "SHF_GROUP", - "SHF_INFO_LINK", - "SHF_LINK_ORDER", - "SHF_MASKOS", - "SHF_MASKPROC", - "SHF_MERGE", - "SHF_OS_NONCONFORMING", - "SHF_STRINGS", - "SHF_TLS", - "SHF_WRITE", - "SHN_ABS", - "SHN_COMMON", - "SHN_HIOS", - "SHN_HIPROC", - "SHN_HIRESERVE", - "SHN_LOOS", - "SHN_LOPROC", - "SHN_LORESERVE", - "SHN_UNDEF", - "SHN_XINDEX", - "SHT_DYNAMIC", - "SHT_DYNSYM", - "SHT_FINI_ARRAY", - "SHT_GNU_ATTRIBUTES", - "SHT_GNU_HASH", - "SHT_GNU_LIBLIST", - "SHT_GNU_VERDEF", - "SHT_GNU_VERNEED", - "SHT_GNU_VERSYM", - "SHT_GROUP", - "SHT_HASH", - "SHT_HIOS", - "SHT_HIPROC", - "SHT_HIUSER", - "SHT_INIT_ARRAY", - "SHT_LOOS", - "SHT_LOPROC", - "SHT_LOUSER", - "SHT_MIPS_ABIFLAGS", - "SHT_NOBITS", - "SHT_NOTE", - "SHT_NULL", - "SHT_PREINIT_ARRAY", - "SHT_PROGBITS", - "SHT_REL", - "SHT_RELA", - "SHT_SHLIB", - "SHT_STRTAB", - "SHT_SYMTAB", - "SHT_SYMTAB_SHNDX", - "STB_GLOBAL", - "STB_HIOS", - "STB_HIPROC", - "STB_LOCAL", - "STB_LOOS", - "STB_LOPROC", - "STB_WEAK", - "STT_COMMON", - "STT_FILE", - "STT_FUNC", - "STT_HIOS", - "STT_HIPROC", - "STT_LOOS", - "STT_LOPROC", - "STT_NOTYPE", - "STT_OBJECT", - "STT_SECTION", - "STT_TLS", - "STV_DEFAULT", - "STV_HIDDEN", - "STV_INTERNAL", - "STV_PROTECTED", - "ST_BIND", - "ST_INFO", - "ST_TYPE", - "ST_VISIBILITY", - "Section", - "Section32", - "Section64", - "SectionFlag", - "SectionHeader", - "SectionIndex", - "SectionType", - "Sym32", - "Sym32Size", - "Sym64", - "Sym64Size", - "SymBind", - "SymType", - "SymVis", - "Symbol", - "Type", - "Version", - }, - "debug/gosym": { - "DecodingError", - "Func", - "LineTable", - "NewLineTable", - "NewTable", - "Obj", - "Sym", - "Table", - "UnknownFileError", - "UnknownLineError", - }, - "debug/macho": { - "ARM64_RELOC_ADDEND", - "ARM64_RELOC_BRANCH26", - "ARM64_RELOC_GOT_LOAD_PAGE21", - "ARM64_RELOC_GOT_LOAD_PAGEOFF12", - "ARM64_RELOC_PAGE21", - "ARM64_RELOC_PAGEOFF12", - "ARM64_RELOC_POINTER_TO_GOT", - "ARM64_RELOC_SUBTRACTOR", - "ARM64_RELOC_TLVP_LOAD_PAGE21", - "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", - "ARM64_RELOC_UNSIGNED", - "ARM_RELOC_BR24", - "ARM_RELOC_HALF", - "ARM_RELOC_HALF_SECTDIFF", - "ARM_RELOC_LOCAL_SECTDIFF", - "ARM_RELOC_PAIR", - "ARM_RELOC_PB_LA_PTR", - "ARM_RELOC_SECTDIFF", - "ARM_RELOC_VANILLA", - "ARM_THUMB_32BIT_BRANCH", - "ARM_THUMB_RELOC_BR22", - "Cpu", - "Cpu386", - "CpuAmd64", - "CpuArm", - "CpuArm64", - "CpuPpc", - "CpuPpc64", - "Dylib", - "DylibCmd", - "Dysymtab", - "DysymtabCmd", - "ErrNotFat", - "FatArch", - "FatArchHeader", - "FatFile", - "File", - "FileHeader", - "FlagAllModsBound", - "FlagAllowStackExecution", - "FlagAppExtensionSafe", - "FlagBindAtLoad", - "FlagBindsToWeak", - "FlagCanonical", - "FlagDeadStrippableDylib", - "FlagDyldLink", - "FlagForceFlat", - "FlagHasTLVDescriptors", - "FlagIncrLink", - "FlagLazyInit", - "FlagNoFixPrebinding", - "FlagNoHeapExecution", - "FlagNoMultiDefs", - "FlagNoReexportedDylibs", - "FlagNoUndefs", - "FlagPIE", - "FlagPrebindable", - "FlagPrebound", - "FlagRootSafe", - "FlagSetuidSafe", - "FlagSplitSegs", - "FlagSubsectionsViaSymbols", - "FlagTwoLevel", - "FlagWeakDefines", - "FormatError", - "GENERIC_RELOC_LOCAL_SECTDIFF", - "GENERIC_RELOC_PAIR", - "GENERIC_RELOC_PB_LA_PTR", - "GENERIC_RELOC_SECTDIFF", - "GENERIC_RELOC_TLV", - "GENERIC_RELOC_VANILLA", - "Load", - "LoadBytes", - "LoadCmd", - "LoadCmdDylib", - "LoadCmdDylinker", - "LoadCmdDysymtab", - "LoadCmdRpath", - "LoadCmdSegment", - "LoadCmdSegment64", - "LoadCmdSymtab", - "LoadCmdThread", - "LoadCmdUnixThread", - "Magic32", - "Magic64", - "MagicFat", - "NewFatFile", - "NewFile", - "Nlist32", - "Nlist64", - "Open", - "OpenFat", - "Regs386", - "RegsAMD64", - "Reloc", - "RelocTypeARM", - "RelocTypeARM64", - "RelocTypeGeneric", - "RelocTypeX86_64", - "Rpath", - "RpathCmd", - "Section", - "Section32", - "Section64", - "SectionHeader", - "Segment", - "Segment32", - "Segment64", - "SegmentHeader", - "Symbol", - "Symtab", - "SymtabCmd", - "Thread", - "Type", - "TypeBundle", - "TypeDylib", - "TypeExec", - "TypeObj", - "X86_64_RELOC_BRANCH", - "X86_64_RELOC_GOT", - "X86_64_RELOC_GOT_LOAD", - "X86_64_RELOC_SIGNED", - "X86_64_RELOC_SIGNED_1", - "X86_64_RELOC_SIGNED_2", - "X86_64_RELOC_SIGNED_4", - "X86_64_RELOC_SUBTRACTOR", - "X86_64_RELOC_TLV", - "X86_64_RELOC_UNSIGNED", - }, - "debug/pe": { - "COFFSymbol", - "COFFSymbolAuxFormat5", - "COFFSymbolSize", - "DataDirectory", - "File", - "FileHeader", - "FormatError", - "IMAGE_COMDAT_SELECT_ANY", - "IMAGE_COMDAT_SELECT_ASSOCIATIVE", - "IMAGE_COMDAT_SELECT_EXACT_MATCH", - "IMAGE_COMDAT_SELECT_LARGEST", - "IMAGE_COMDAT_SELECT_NODUPLICATES", - "IMAGE_COMDAT_SELECT_SAME_SIZE", - "IMAGE_DIRECTORY_ENTRY_ARCHITECTURE", - "IMAGE_DIRECTORY_ENTRY_BASERELOC", - "IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT", - "IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR", - "IMAGE_DIRECTORY_ENTRY_DEBUG", - "IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT", - "IMAGE_DIRECTORY_ENTRY_EXCEPTION", - "IMAGE_DIRECTORY_ENTRY_EXPORT", - "IMAGE_DIRECTORY_ENTRY_GLOBALPTR", - "IMAGE_DIRECTORY_ENTRY_IAT", - "IMAGE_DIRECTORY_ENTRY_IMPORT", - "IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG", - "IMAGE_DIRECTORY_ENTRY_RESOURCE", - "IMAGE_DIRECTORY_ENTRY_SECURITY", - "IMAGE_DIRECTORY_ENTRY_TLS", - "IMAGE_DLLCHARACTERISTICS_APPCONTAINER", - "IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE", - "IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY", - "IMAGE_DLLCHARACTERISTICS_GUARD_CF", - "IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA", - "IMAGE_DLLCHARACTERISTICS_NO_BIND", - "IMAGE_DLLCHARACTERISTICS_NO_ISOLATION", - "IMAGE_DLLCHARACTERISTICS_NO_SEH", - "IMAGE_DLLCHARACTERISTICS_NX_COMPAT", - "IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE", - "IMAGE_DLLCHARACTERISTICS_WDM_DRIVER", - "IMAGE_FILE_32BIT_MACHINE", - "IMAGE_FILE_AGGRESIVE_WS_TRIM", - "IMAGE_FILE_BYTES_REVERSED_HI", - "IMAGE_FILE_BYTES_REVERSED_LO", - "IMAGE_FILE_DEBUG_STRIPPED", - "IMAGE_FILE_DLL", - "IMAGE_FILE_EXECUTABLE_IMAGE", - "IMAGE_FILE_LARGE_ADDRESS_AWARE", - "IMAGE_FILE_LINE_NUMS_STRIPPED", - "IMAGE_FILE_LOCAL_SYMS_STRIPPED", - "IMAGE_FILE_MACHINE_AM33", - "IMAGE_FILE_MACHINE_AMD64", - "IMAGE_FILE_MACHINE_ARM", - "IMAGE_FILE_MACHINE_ARM64", - "IMAGE_FILE_MACHINE_ARMNT", - "IMAGE_FILE_MACHINE_EBC", - "IMAGE_FILE_MACHINE_I386", - "IMAGE_FILE_MACHINE_IA64", - "IMAGE_FILE_MACHINE_LOONGARCH32", - "IMAGE_FILE_MACHINE_LOONGARCH64", - "IMAGE_FILE_MACHINE_M32R", - "IMAGE_FILE_MACHINE_MIPS16", - "IMAGE_FILE_MACHINE_MIPSFPU", - "IMAGE_FILE_MACHINE_MIPSFPU16", - "IMAGE_FILE_MACHINE_POWERPC", - "IMAGE_FILE_MACHINE_POWERPCFP", - "IMAGE_FILE_MACHINE_R4000", - "IMAGE_FILE_MACHINE_RISCV128", - "IMAGE_FILE_MACHINE_RISCV32", - "IMAGE_FILE_MACHINE_RISCV64", - "IMAGE_FILE_MACHINE_SH3", - "IMAGE_FILE_MACHINE_SH3DSP", - "IMAGE_FILE_MACHINE_SH4", - "IMAGE_FILE_MACHINE_SH5", - "IMAGE_FILE_MACHINE_THUMB", - "IMAGE_FILE_MACHINE_UNKNOWN", - "IMAGE_FILE_MACHINE_WCEMIPSV2", - "IMAGE_FILE_NET_RUN_FROM_SWAP", - "IMAGE_FILE_RELOCS_STRIPPED", - "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP", - "IMAGE_FILE_SYSTEM", - "IMAGE_FILE_UP_SYSTEM_ONLY", - "IMAGE_SCN_CNT_CODE", - "IMAGE_SCN_CNT_INITIALIZED_DATA", - "IMAGE_SCN_CNT_UNINITIALIZED_DATA", - "IMAGE_SCN_LNK_COMDAT", - "IMAGE_SCN_MEM_DISCARDABLE", - "IMAGE_SCN_MEM_EXECUTE", - "IMAGE_SCN_MEM_READ", - "IMAGE_SCN_MEM_WRITE", - "IMAGE_SUBSYSTEM_EFI_APPLICATION", - "IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER", - "IMAGE_SUBSYSTEM_EFI_ROM", - "IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER", - "IMAGE_SUBSYSTEM_NATIVE", - "IMAGE_SUBSYSTEM_NATIVE_WINDOWS", - "IMAGE_SUBSYSTEM_OS2_CUI", - "IMAGE_SUBSYSTEM_POSIX_CUI", - "IMAGE_SUBSYSTEM_UNKNOWN", - "IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION", - "IMAGE_SUBSYSTEM_WINDOWS_CE_GUI", - "IMAGE_SUBSYSTEM_WINDOWS_CUI", - "IMAGE_SUBSYSTEM_WINDOWS_GUI", - "IMAGE_SUBSYSTEM_XBOX", - "ImportDirectory", - "NewFile", - "Open", - "OptionalHeader32", - "OptionalHeader64", - "Reloc", - "Section", - "SectionHeader", - "SectionHeader32", - "StringTable", - "Symbol", - }, - "debug/plan9obj": { - "ErrNoSymbols", - "File", - "FileHeader", - "Magic386", - "Magic64", - "MagicAMD64", - "MagicARM", - "NewFile", - "Open", - "Section", - "SectionHeader", - "Sym", - }, - "embed": { - "FS", - }, - "encoding": { - "BinaryMarshaler", - "BinaryUnmarshaler", - "TextMarshaler", - "TextUnmarshaler", - }, - "encoding/ascii85": { - "CorruptInputError", - "Decode", - "Encode", - "MaxEncodedLen", - "NewDecoder", - "NewEncoder", - }, - "encoding/asn1": { - "BitString", - "ClassApplication", - "ClassContextSpecific", - "ClassPrivate", - "ClassUniversal", - "Enumerated", - "Flag", - "Marshal", - "MarshalWithParams", - "NullBytes", - "NullRawValue", - "ObjectIdentifier", - "RawContent", - "RawValue", - "StructuralError", - "SyntaxError", - "TagBMPString", - "TagBitString", - "TagBoolean", - "TagEnum", - "TagGeneralString", - "TagGeneralizedTime", - "TagIA5String", - "TagInteger", - "TagNull", - "TagNumericString", - "TagOID", - "TagOctetString", - "TagPrintableString", - "TagSequence", - "TagSet", - "TagT61String", - "TagUTCTime", - "TagUTF8String", - "Unmarshal", - "UnmarshalWithParams", - }, - "encoding/base32": { - "CorruptInputError", - "Encoding", - "HexEncoding", - "NewDecoder", - "NewEncoder", - "NewEncoding", - "NoPadding", - "StdEncoding", - "StdPadding", - }, - "encoding/base64": { - "CorruptInputError", - "Encoding", - "NewDecoder", - "NewEncoder", - "NewEncoding", - "NoPadding", - "RawStdEncoding", - "RawURLEncoding", - "StdEncoding", - "StdPadding", - "URLEncoding", - }, - "encoding/binary": { - "AppendByteOrder", - "AppendUvarint", - "AppendVarint", - "BigEndian", - "ByteOrder", - "LittleEndian", - "MaxVarintLen16", - "MaxVarintLen32", - "MaxVarintLen64", - "NativeEndian", - "PutUvarint", - "PutVarint", - "Read", - "ReadUvarint", - "ReadVarint", - "Size", - "Uvarint", - "Varint", - "Write", - }, - "encoding/csv": { - "ErrBareQuote", - "ErrFieldCount", - "ErrQuote", - "ErrTrailingComma", - "NewReader", - "NewWriter", - "ParseError", - "Reader", - "Writer", - }, - "encoding/gob": { - "CommonType", - "Decoder", - "Encoder", - "GobDecoder", - "GobEncoder", - "NewDecoder", - "NewEncoder", - "Register", - "RegisterName", - }, - "encoding/hex": { - "Decode", - "DecodeString", - "DecodedLen", - "Dump", - "Dumper", - "Encode", - "EncodeToString", - "EncodedLen", - "ErrLength", - "InvalidByteError", - "NewDecoder", - "NewEncoder", - }, - "encoding/json": { - "Compact", - "Decoder", - "Delim", - "Encoder", - "HTMLEscape", - "Indent", - "InvalidUTF8Error", - "InvalidUnmarshalError", - "Marshal", - "MarshalIndent", - "Marshaler", - "MarshalerError", - "NewDecoder", - "NewEncoder", - "Number", - "RawMessage", - "SyntaxError", - "Token", - "Unmarshal", - "UnmarshalFieldError", - "UnmarshalTypeError", - "Unmarshaler", - "UnsupportedTypeError", - "UnsupportedValueError", - "Valid", - }, - "encoding/pem": { - "Block", - "Decode", - "Encode", - "EncodeToMemory", - }, - "encoding/xml": { - "Attr", - "CharData", - "Comment", - "CopyToken", - "Decoder", - "Directive", - "Encoder", - "EndElement", - "Escape", - "EscapeText", - "HTMLAutoClose", - "HTMLEntity", - "Header", - "Marshal", - "MarshalIndent", - "Marshaler", - "MarshalerAttr", - "Name", - "NewDecoder", - "NewEncoder", - "NewTokenDecoder", - "ProcInst", - "StartElement", - "SyntaxError", - "TagPathError", - "Token", - "TokenReader", - "Unmarshal", - "UnmarshalError", - "Unmarshaler", - "UnmarshalerAttr", - "UnsupportedTypeError", - }, - "errors": { - "As", - "ErrUnsupported", - "Is", - "Join", - "New", - "Unwrap", - }, - "expvar": { - "Do", - "Float", - "Func", - "Get", - "Handler", - "Int", - "KeyValue", - "Map", - "NewFloat", - "NewInt", - "NewMap", - "NewString", - "Publish", - "String", - "Var", - }, - "flag": { - "Arg", - "Args", - "Bool", - "BoolFunc", - "BoolVar", - "CommandLine", - "ContinueOnError", - "Duration", - "DurationVar", - "ErrHelp", - "ErrorHandling", - "ExitOnError", - "Flag", - "FlagSet", - "Float64", - "Float64Var", - "Func", - "Getter", - "Int", - "Int64", - "Int64Var", - "IntVar", - "Lookup", - "NArg", - "NFlag", - "NewFlagSet", - "PanicOnError", - "Parse", - "Parsed", - "PrintDefaults", - "Set", - "String", - "StringVar", - "TextVar", - "Uint", - "Uint64", - "Uint64Var", - "UintVar", - "UnquoteUsage", - "Usage", - "Value", - "Var", - "Visit", - "VisitAll", - }, - "fmt": { - "Append", - "Appendf", - "Appendln", - "Errorf", - "FormatString", - "Formatter", - "Fprint", - "Fprintf", - "Fprintln", - "Fscan", - "Fscanf", - "Fscanln", - "GoStringer", - "Print", - "Printf", - "Println", - "Scan", - "ScanState", - "Scanf", - "Scanln", - "Scanner", - "Sprint", - "Sprintf", - "Sprintln", - "Sscan", - "Sscanf", - "Sscanln", - "State", - "Stringer", - }, - "go/ast": { - "ArrayType", - "AssignStmt", - "Bad", - "BadDecl", - "BadExpr", - "BadStmt", - "BasicLit", - "BinaryExpr", - "BlockStmt", - "BranchStmt", - "CallExpr", - "CaseClause", - "ChanDir", - "ChanType", - "CommClause", - "Comment", - "CommentGroup", - "CommentMap", - "CompositeLit", - "Con", - "Decl", - "DeclStmt", - "DeferStmt", - "Ellipsis", - "EmptyStmt", - "Expr", - "ExprStmt", - "Field", - "FieldFilter", - "FieldList", - "File", - "FileExports", - "Filter", - "FilterDecl", - "FilterFile", - "FilterFuncDuplicates", - "FilterImportDuplicates", - "FilterPackage", - "FilterUnassociatedComments", - "ForStmt", - "Fprint", - "Fun", - "FuncDecl", - "FuncLit", - "FuncType", - "GenDecl", - "GoStmt", - "Ident", - "IfStmt", - "ImportSpec", - "Importer", - "IncDecStmt", - "IndexExpr", - "IndexListExpr", - "Inspect", - "InterfaceType", - "IsExported", - "IsGenerated", - "KeyValueExpr", - "LabeledStmt", - "Lbl", - "MapType", - "MergeMode", - "MergePackageFiles", - "NewCommentMap", - "NewIdent", - "NewObj", - "NewPackage", - "NewScope", - "Node", - "NotNilFilter", - "ObjKind", - "Object", - "Package", - "PackageExports", - "ParenExpr", - "Pkg", - "Print", - "RECV", - "RangeStmt", - "ReturnStmt", - "SEND", - "Scope", - "SelectStmt", - "SelectorExpr", - "SendStmt", - "SliceExpr", - "SortImports", - "Spec", - "StarExpr", - "Stmt", - "StructType", - "SwitchStmt", - "Typ", - "TypeAssertExpr", - "TypeSpec", - "TypeSwitchStmt", - "UnaryExpr", - "ValueSpec", - "Var", - "Visitor", - "Walk", - }, - "go/build": { - "AllowBinary", - "ArchChar", - "Context", - "Default", - "Directive", - "FindOnly", - "IgnoreVendor", - "Import", - "ImportComment", - "ImportDir", - "ImportMode", - "IsLocalImport", - "MultiplePackageError", - "NoGoError", - "Package", - "ToolDir", - }, - "go/build/constraint": { - "AndExpr", - "Expr", - "GoVersion", - "IsGoBuild", - "IsPlusBuild", - "NotExpr", - "OrExpr", - "Parse", - "PlusBuildLines", - "SyntaxError", - "TagExpr", - }, - "go/constant": { - "BinaryOp", - "BitLen", - "Bool", - "BoolVal", - "Bytes", - "Compare", - "Complex", - "Denom", - "Float", - "Float32Val", - "Float64Val", - "Imag", - "Int", - "Int64Val", - "Kind", - "Make", - "MakeBool", - "MakeFloat64", - "MakeFromBytes", - "MakeFromLiteral", - "MakeImag", - "MakeInt64", - "MakeString", - "MakeUint64", - "MakeUnknown", - "Num", - "Real", - "Shift", - "Sign", - "String", - "StringVal", - "ToComplex", - "ToFloat", - "ToInt", - "Uint64Val", - "UnaryOp", - "Unknown", - "Val", - "Value", - }, - "go/doc": { - "AllDecls", - "AllMethods", - "Example", - "Examples", - "Filter", - "Func", - "IllegalPrefixes", - "IsPredeclared", - "Mode", - "New", - "NewFromFiles", - "Note", - "Package", - "PreserveAST", - "Synopsis", - "ToHTML", - "ToText", - "Type", - "Value", - }, - "go/doc/comment": { - "Block", - "Code", - "DefaultLookupPackage", - "Doc", - "DocLink", - "Heading", - "Italic", - "Link", - "LinkDef", - "List", - "ListItem", - "Paragraph", - "Parser", - "Plain", - "Printer", - "Text", - }, - "go/format": { - "Node", - "Source", - }, - "go/importer": { - "Default", - "For", - "ForCompiler", - "Lookup", - }, - "go/parser": { - "AllErrors", - "DeclarationErrors", - "ImportsOnly", - "Mode", - "PackageClauseOnly", - "ParseComments", - "ParseDir", - "ParseExpr", - "ParseExprFrom", - "ParseFile", - "SkipObjectResolution", - "SpuriousErrors", - "Trace", - }, - "go/printer": { - "CommentedNode", - "Config", - "Fprint", - "Mode", - "RawFormat", - "SourcePos", - "TabIndent", - "UseSpaces", - }, - "go/scanner": { - "Error", - "ErrorHandler", - "ErrorList", - "Mode", - "PrintError", - "ScanComments", - "Scanner", - }, - "go/token": { - "ADD", - "ADD_ASSIGN", - "AND", - "AND_ASSIGN", - "AND_NOT", - "AND_NOT_ASSIGN", - "ARROW", - "ASSIGN", - "BREAK", - "CASE", - "CHAN", - "CHAR", - "COLON", - "COMMA", - "COMMENT", - "CONST", - "CONTINUE", - "DEC", - "DEFAULT", - "DEFER", - "DEFINE", - "ELLIPSIS", - "ELSE", - "EOF", - "EQL", - "FALLTHROUGH", - "FLOAT", - "FOR", - "FUNC", - "File", - "FileSet", - "GEQ", - "GO", - "GOTO", - "GTR", - "HighestPrec", - "IDENT", - "IF", - "ILLEGAL", - "IMAG", - "IMPORT", - "INC", - "INT", - "INTERFACE", - "IsExported", - "IsIdentifier", - "IsKeyword", - "LAND", - "LBRACE", - "LBRACK", - "LEQ", - "LOR", - "LPAREN", - "LSS", - "Lookup", - "LowestPrec", - "MAP", - "MUL", - "MUL_ASSIGN", - "NEQ", - "NOT", - "NewFileSet", - "NoPos", - "OR", - "OR_ASSIGN", - "PACKAGE", - "PERIOD", - "Pos", - "Position", - "QUO", - "QUO_ASSIGN", - "RANGE", - "RBRACE", - "RBRACK", - "REM", - "REM_ASSIGN", - "RETURN", - "RPAREN", - "SELECT", - "SEMICOLON", - "SHL", - "SHL_ASSIGN", - "SHR", - "SHR_ASSIGN", - "STRING", - "STRUCT", - "SUB", - "SUB_ASSIGN", - "SWITCH", - "TILDE", - "TYPE", - "Token", - "UnaryPrec", - "VAR", - "XOR", - "XOR_ASSIGN", - }, - "go/types": { - "ArgumentError", - "Array", - "AssertableTo", - "AssignableTo", - "Basic", - "BasicInfo", - "BasicKind", - "Bool", - "Builtin", - "Byte", - "Chan", - "ChanDir", - "CheckExpr", - "Checker", - "Comparable", - "Complex128", - "Complex64", - "Config", - "Const", - "Context", - "ConvertibleTo", - "DefPredeclaredTestFuncs", - "Default", - "Error", - "Eval", - "ExprString", - "FieldVal", - "Float32", - "Float64", - "Func", - "Id", - "Identical", - "IdenticalIgnoreTags", - "Implements", - "ImportMode", - "Importer", - "ImporterFrom", - "Info", - "Initializer", - "Instance", - "Instantiate", - "Int", - "Int16", - "Int32", - "Int64", - "Int8", - "Interface", - "Invalid", - "IsBoolean", - "IsComplex", - "IsConstType", - "IsFloat", - "IsInteger", - "IsInterface", - "IsNumeric", - "IsOrdered", - "IsString", - "IsUnsigned", - "IsUntyped", - "Label", - "LookupFieldOrMethod", - "Map", - "MethodExpr", - "MethodSet", - "MethodVal", - "MissingMethod", - "Named", - "NewArray", - "NewChan", - "NewChecker", - "NewConst", - "NewContext", - "NewField", - "NewFunc", - "NewInterface", - "NewInterfaceType", - "NewLabel", - "NewMap", - "NewMethodSet", - "NewNamed", - "NewPackage", - "NewParam", - "NewPkgName", - "NewPointer", - "NewScope", - "NewSignature", - "NewSignatureType", - "NewSlice", - "NewStruct", - "NewTerm", - "NewTuple", - "NewTypeName", - "NewTypeParam", - "NewUnion", - "NewVar", - "Nil", - "Object", - "ObjectString", - "Package", - "PkgName", - "Pointer", - "Qualifier", - "RecvOnly", - "RelativeTo", - "Rune", - "Satisfies", - "Scope", - "Selection", - "SelectionKind", - "SelectionString", - "SendOnly", - "SendRecv", - "Signature", - "Sizes", - "SizesFor", - "Slice", - "StdSizes", - "String", - "Struct", - "Term", - "Tuple", - "Typ", - "Type", - "TypeAndValue", - "TypeList", - "TypeName", - "TypeParam", - "TypeParamList", - "TypeString", - "Uint", - "Uint16", - "Uint32", - "Uint64", - "Uint8", - "Uintptr", - "Union", - "Universe", - "Unsafe", - "UnsafePointer", - "UntypedBool", - "UntypedComplex", - "UntypedFloat", - "UntypedInt", - "UntypedNil", - "UntypedRune", - "UntypedString", - "Var", - "WriteExpr", - "WriteSignature", - "WriteType", - }, - "hash": { - "Hash", - "Hash32", - "Hash64", - }, - "hash/adler32": { - "Checksum", - "New", - "Size", - }, - "hash/crc32": { - "Castagnoli", - "Checksum", - "ChecksumIEEE", - "IEEE", - "IEEETable", - "Koopman", - "MakeTable", - "New", - "NewIEEE", - "Size", - "Table", - "Update", - }, - "hash/crc64": { - "Checksum", - "ECMA", - "ISO", - "MakeTable", - "New", - "Size", - "Table", - "Update", - }, - "hash/fnv": { - "New128", - "New128a", - "New32", - "New32a", - "New64", - "New64a", - }, - "hash/maphash": { - "Bytes", - "Hash", - "MakeSeed", - "Seed", - "String", - }, - "html": { - "EscapeString", - "UnescapeString", - }, - "html/template": { - "CSS", - "ErrAmbigContext", - "ErrBadHTML", - "ErrBranchEnd", - "ErrEndContext", - "ErrJSTemplate", - "ErrNoSuchTemplate", - "ErrOutputContext", - "ErrPartialCharset", - "ErrPartialEscape", - "ErrPredefinedEscaper", - "ErrRangeLoopReentry", - "ErrSlashAmbig", - "Error", - "ErrorCode", - "FuncMap", - "HTML", - "HTMLAttr", - "HTMLEscape", - "HTMLEscapeString", - "HTMLEscaper", - "IsTrue", - "JS", - "JSEscape", - "JSEscapeString", - "JSEscaper", - "JSStr", - "Must", - "New", - "OK", - "ParseFS", - "ParseFiles", - "ParseGlob", - "Srcset", - "Template", - "URL", - "URLQueryEscaper", - }, - "image": { - "Alpha", - "Alpha16", - "Black", - "CMYK", - "Config", - "Decode", - "DecodeConfig", - "ErrFormat", - "Gray", - "Gray16", - "Image", - "NRGBA", - "NRGBA64", - "NYCbCrA", - "NewAlpha", - "NewAlpha16", - "NewCMYK", - "NewGray", - "NewGray16", - "NewNRGBA", - "NewNRGBA64", - "NewNYCbCrA", - "NewPaletted", - "NewRGBA", - "NewRGBA64", - "NewUniform", - "NewYCbCr", - "Opaque", - "Paletted", - "PalettedImage", - "Point", - "Pt", - "RGBA", - "RGBA64", - "RGBA64Image", - "Rect", - "Rectangle", - "RegisterFormat", - "Transparent", - "Uniform", - "White", - "YCbCr", - "YCbCrSubsampleRatio", - "YCbCrSubsampleRatio410", - "YCbCrSubsampleRatio411", - "YCbCrSubsampleRatio420", - "YCbCrSubsampleRatio422", - "YCbCrSubsampleRatio440", - "YCbCrSubsampleRatio444", - "ZP", - "ZR", - }, - "image/color": { - "Alpha", - "Alpha16", - "Alpha16Model", - "AlphaModel", - "Black", - "CMYK", - "CMYKModel", - "CMYKToRGB", - "Color", - "Gray", - "Gray16", - "Gray16Model", - "GrayModel", - "Model", - "ModelFunc", - "NRGBA", - "NRGBA64", - "NRGBA64Model", - "NRGBAModel", - "NYCbCrA", - "NYCbCrAModel", - "Opaque", - "Palette", - "RGBA", - "RGBA64", - "RGBA64Model", - "RGBAModel", - "RGBToCMYK", - "RGBToYCbCr", - "Transparent", - "White", - "YCbCr", - "YCbCrModel", - "YCbCrToRGB", - }, - "image/color/palette": { - "Plan9", - "WebSafe", - }, - "image/draw": { - "Draw", - "DrawMask", - "Drawer", - "FloydSteinberg", - "Image", - "Op", - "Over", - "Quantizer", - "RGBA64Image", - "Src", - }, - "image/gif": { - "Decode", - "DecodeAll", - "DecodeConfig", - "DisposalBackground", - "DisposalNone", - "DisposalPrevious", - "Encode", - "EncodeAll", - "GIF", - "Options", - }, - "image/jpeg": { - "Decode", - "DecodeConfig", - "DefaultQuality", - "Encode", - "FormatError", - "Options", - "Reader", - "UnsupportedError", - }, - "image/png": { - "BestCompression", - "BestSpeed", - "CompressionLevel", - "Decode", - "DecodeConfig", - "DefaultCompression", - "Encode", - "Encoder", - "EncoderBuffer", - "EncoderBufferPool", - "FormatError", - "NoCompression", - "UnsupportedError", - }, - "index/suffixarray": { - "Index", - "New", - }, - "io": { - "ByteReader", - "ByteScanner", - "ByteWriter", - "Closer", - "Copy", - "CopyBuffer", - "CopyN", - "Discard", - "EOF", - "ErrClosedPipe", - "ErrNoProgress", - "ErrShortBuffer", - "ErrShortWrite", - "ErrUnexpectedEOF", - "LimitReader", - "LimitedReader", - "MultiReader", - "MultiWriter", - "NewOffsetWriter", - "NewSectionReader", - "NopCloser", - "OffsetWriter", - "Pipe", - "PipeReader", - "PipeWriter", - "ReadAll", - "ReadAtLeast", - "ReadCloser", - "ReadFull", - "ReadSeekCloser", - "ReadSeeker", - "ReadWriteCloser", - "ReadWriteSeeker", - "ReadWriter", - "Reader", - "ReaderAt", - "ReaderFrom", - "RuneReader", - "RuneScanner", - "SectionReader", - "SeekCurrent", - "SeekEnd", - "SeekStart", - "Seeker", - "StringWriter", - "TeeReader", - "WriteCloser", - "WriteSeeker", - "WriteString", - "Writer", - "WriterAt", - "WriterTo", - }, - "io/fs": { - "DirEntry", - "ErrClosed", - "ErrExist", - "ErrInvalid", - "ErrNotExist", - "ErrPermission", - "FS", - "File", - "FileInfo", - "FileInfoToDirEntry", - "FileMode", - "FormatDirEntry", - "FormatFileInfo", - "Glob", - "GlobFS", - "ModeAppend", - "ModeCharDevice", - "ModeDevice", - "ModeDir", - "ModeExclusive", - "ModeIrregular", - "ModeNamedPipe", - "ModePerm", - "ModeSetgid", - "ModeSetuid", - "ModeSocket", - "ModeSticky", - "ModeSymlink", - "ModeTemporary", - "ModeType", - "PathError", - "ReadDir", - "ReadDirFS", - "ReadDirFile", - "ReadFile", - "ReadFileFS", - "SkipAll", - "SkipDir", - "Stat", - "StatFS", - "Sub", - "SubFS", - "ValidPath", - "WalkDir", - "WalkDirFunc", - }, - "io/ioutil": { - "Discard", - "NopCloser", - "ReadAll", - "ReadDir", - "ReadFile", - "TempDir", - "TempFile", - "WriteFile", - }, - "log": { - "Default", - "Fatal", - "Fatalf", - "Fatalln", - "Flags", - "LUTC", - "Ldate", - "Llongfile", - "Lmicroseconds", - "Lmsgprefix", - "Logger", - "Lshortfile", - "LstdFlags", - "Ltime", - "New", - "Output", - "Panic", - "Panicf", - "Panicln", - "Prefix", - "Print", - "Printf", - "Println", - "SetFlags", - "SetOutput", - "SetPrefix", - "Writer", - }, - "log/slog": { - "Any", - "AnyValue", - "Attr", - "Bool", - "BoolValue", - "Debug", - "DebugContext", - "Default", - "Duration", - "DurationValue", - "Error", - "ErrorContext", - "Float64", - "Float64Value", - "Group", - "GroupValue", - "Handler", - "HandlerOptions", - "Info", - "InfoContext", - "Int", - "Int64", - "Int64Value", - "IntValue", - "JSONHandler", - "Kind", - "KindAny", - "KindBool", - "KindDuration", - "KindFloat64", - "KindGroup", - "KindInt64", - "KindLogValuer", - "KindString", - "KindTime", - "KindUint64", - "Level", - "LevelDebug", - "LevelError", - "LevelInfo", - "LevelKey", - "LevelVar", - "LevelWarn", - "Leveler", - "Log", - "LogAttrs", - "LogValuer", - "Logger", - "MessageKey", - "New", - "NewJSONHandler", - "NewLogLogger", - "NewRecord", - "NewTextHandler", - "Record", - "SetDefault", - "Source", - "SourceKey", - "String", - "StringValue", - "TextHandler", - "Time", - "TimeKey", - "TimeValue", - "Uint64", - "Uint64Value", - "Value", - "Warn", - "WarnContext", - "With", - }, - "log/syslog": { - "Dial", - "LOG_ALERT", - "LOG_AUTH", - "LOG_AUTHPRIV", - "LOG_CRIT", - "LOG_CRON", - "LOG_DAEMON", - "LOG_DEBUG", - "LOG_EMERG", - "LOG_ERR", - "LOG_FTP", - "LOG_INFO", - "LOG_KERN", - "LOG_LOCAL0", - "LOG_LOCAL1", - "LOG_LOCAL2", - "LOG_LOCAL3", - "LOG_LOCAL4", - "LOG_LOCAL5", - "LOG_LOCAL6", - "LOG_LOCAL7", - "LOG_LPR", - "LOG_MAIL", - "LOG_NEWS", - "LOG_NOTICE", - "LOG_SYSLOG", - "LOG_USER", - "LOG_UUCP", - "LOG_WARNING", - "New", - "NewLogger", - "Priority", - "Writer", - }, - "maps": { - "Clone", - "Copy", - "DeleteFunc", - "Equal", - "EqualFunc", - }, - "math": { - "Abs", - "Acos", - "Acosh", - "Asin", - "Asinh", - "Atan", - "Atan2", - "Atanh", - "Cbrt", - "Ceil", - "Copysign", - "Cos", - "Cosh", - "Dim", - "E", - "Erf", - "Erfc", - "Erfcinv", - "Erfinv", - "Exp", - "Exp2", - "Expm1", - "FMA", - "Float32bits", - "Float32frombits", - "Float64bits", - "Float64frombits", - "Floor", - "Frexp", - "Gamma", - "Hypot", - "Ilogb", - "Inf", - "IsInf", - "IsNaN", - "J0", - "J1", - "Jn", - "Ldexp", - "Lgamma", - "Ln10", - "Ln2", - "Log", - "Log10", - "Log10E", - "Log1p", - "Log2", - "Log2E", - "Logb", - "Max", - "MaxFloat32", - "MaxFloat64", - "MaxInt", - "MaxInt16", - "MaxInt32", - "MaxInt64", - "MaxInt8", - "MaxUint", - "MaxUint16", - "MaxUint32", - "MaxUint64", - "MaxUint8", - "Min", - "MinInt", - "MinInt16", - "MinInt32", - "MinInt64", - "MinInt8", - "Mod", - "Modf", - "NaN", - "Nextafter", - "Nextafter32", - "Phi", - "Pi", - "Pow", - "Pow10", - "Remainder", - "Round", - "RoundToEven", - "Signbit", - "Sin", - "Sincos", - "Sinh", - "SmallestNonzeroFloat32", - "SmallestNonzeroFloat64", - "Sqrt", - "Sqrt2", - "SqrtE", - "SqrtPhi", - "SqrtPi", - "Tan", - "Tanh", - "Trunc", - "Y0", - "Y1", - "Yn", - }, - "math/big": { - "Above", - "Accuracy", - "AwayFromZero", - "Below", - "ErrNaN", - "Exact", - "Float", - "Int", - "Jacobi", - "MaxBase", - "MaxExp", - "MaxPrec", - "MinExp", - "NewFloat", - "NewInt", - "NewRat", - "ParseFloat", - "Rat", - "RoundingMode", - "ToNearestAway", - "ToNearestEven", - "ToNegativeInf", - "ToPositiveInf", - "ToZero", - "Word", - }, - "math/bits": { - "Add", - "Add32", - "Add64", - "Div", - "Div32", - "Div64", - "LeadingZeros", - "LeadingZeros16", - "LeadingZeros32", - "LeadingZeros64", - "LeadingZeros8", - "Len", - "Len16", - "Len32", - "Len64", - "Len8", - "Mul", - "Mul32", - "Mul64", - "OnesCount", - "OnesCount16", - "OnesCount32", - "OnesCount64", - "OnesCount8", - "Rem", - "Rem32", - "Rem64", - "Reverse", - "Reverse16", - "Reverse32", - "Reverse64", - "Reverse8", - "ReverseBytes", - "ReverseBytes16", - "ReverseBytes32", - "ReverseBytes64", - "RotateLeft", - "RotateLeft16", - "RotateLeft32", - "RotateLeft64", - "RotateLeft8", - "Sub", - "Sub32", - "Sub64", - "TrailingZeros", - "TrailingZeros16", - "TrailingZeros32", - "TrailingZeros64", - "TrailingZeros8", - "UintSize", - }, - "math/cmplx": { - "Abs", - "Acos", - "Acosh", - "Asin", - "Asinh", - "Atan", - "Atanh", - "Conj", - "Cos", - "Cosh", - "Cot", - "Exp", - "Inf", - "IsInf", - "IsNaN", - "Log", - "Log10", - "NaN", - "Phase", - "Polar", - "Pow", - "Rect", - "Sin", - "Sinh", - "Sqrt", - "Tan", - "Tanh", - }, - "math/rand": { - "ExpFloat64", - "Float32", - "Float64", - "Int", - "Int31", - "Int31n", - "Int63", - "Int63n", - "Intn", - "New", - "NewSource", - "NewZipf", - "NormFloat64", - "Perm", - "Rand", - "Read", - "Seed", - "Shuffle", - "Source", - "Source64", - "Uint32", - "Uint64", - "Zipf", - }, - "mime": { - "AddExtensionType", - "BEncoding", - "ErrInvalidMediaParameter", - "ExtensionsByType", - "FormatMediaType", - "ParseMediaType", - "QEncoding", - "TypeByExtension", - "WordDecoder", - "WordEncoder", - }, - "mime/multipart": { - "ErrMessageTooLarge", - "File", - "FileHeader", - "Form", - "NewReader", - "NewWriter", - "Part", - "Reader", - "Writer", - }, - "mime/quotedprintable": { - "NewReader", - "NewWriter", - "Reader", - "Writer", - }, - "net": { - "Addr", - "AddrError", - "Buffers", - "CIDRMask", - "Conn", - "DNSConfigError", - "DNSError", - "DefaultResolver", - "Dial", - "DialIP", - "DialTCP", - "DialTimeout", - "DialUDP", - "DialUnix", - "Dialer", - "ErrClosed", - "ErrWriteToConnected", - "Error", - "FileConn", - "FileListener", - "FilePacketConn", - "FlagBroadcast", - "FlagLoopback", - "FlagMulticast", - "FlagPointToPoint", - "FlagRunning", - "FlagUp", - "Flags", - "HardwareAddr", - "IP", - "IPAddr", - "IPConn", - "IPMask", - "IPNet", - "IPv4", - "IPv4Mask", - "IPv4allrouter", - "IPv4allsys", - "IPv4bcast", - "IPv4len", - "IPv4zero", - "IPv6interfacelocalallnodes", - "IPv6len", - "IPv6linklocalallnodes", - "IPv6linklocalallrouters", - "IPv6loopback", - "IPv6unspecified", - "IPv6zero", - "Interface", - "InterfaceAddrs", - "InterfaceByIndex", - "InterfaceByName", - "Interfaces", - "InvalidAddrError", - "JoinHostPort", - "Listen", - "ListenConfig", - "ListenIP", - "ListenMulticastUDP", - "ListenPacket", - "ListenTCP", - "ListenUDP", - "ListenUnix", - "ListenUnixgram", - "Listener", - "LookupAddr", - "LookupCNAME", - "LookupHost", - "LookupIP", - "LookupMX", - "LookupNS", - "LookupPort", - "LookupSRV", - "LookupTXT", - "MX", - "NS", - "OpError", - "PacketConn", - "ParseCIDR", - "ParseError", - "ParseIP", - "ParseMAC", - "Pipe", - "ResolveIPAddr", - "ResolveTCPAddr", - "ResolveUDPAddr", - "ResolveUnixAddr", - "Resolver", - "SRV", - "SplitHostPort", - "TCPAddr", - "TCPAddrFromAddrPort", - "TCPConn", - "TCPListener", - "UDPAddr", - "UDPAddrFromAddrPort", - "UDPConn", - "UnixAddr", - "UnixConn", - "UnixListener", - "UnknownNetworkError", - }, - "net/http": { - "AllowQuerySemicolons", - "CanonicalHeaderKey", - "Client", - "CloseNotifier", - "ConnState", - "Cookie", - "CookieJar", - "DefaultClient", - "DefaultMaxHeaderBytes", - "DefaultMaxIdleConnsPerHost", - "DefaultServeMux", - "DefaultTransport", - "DetectContentType", - "Dir", - "ErrAbortHandler", - "ErrBodyNotAllowed", - "ErrBodyReadAfterClose", - "ErrContentLength", - "ErrHandlerTimeout", - "ErrHeaderTooLong", - "ErrHijacked", - "ErrLineTooLong", - "ErrMissingBoundary", - "ErrMissingContentLength", - "ErrMissingFile", - "ErrNoCookie", - "ErrNoLocation", - "ErrNotMultipart", - "ErrNotSupported", - "ErrSchemeMismatch", - "ErrServerClosed", - "ErrShortBody", - "ErrSkipAltProtocol", - "ErrUnexpectedTrailer", - "ErrUseLastResponse", - "ErrWriteAfterFlush", - "Error", - "FS", - "File", - "FileServer", - "FileSystem", - "Flusher", - "Get", - "Handle", - "HandleFunc", - "Handler", - "HandlerFunc", - "Head", - "Header", - "Hijacker", - "ListenAndServe", - "ListenAndServeTLS", - "LocalAddrContextKey", - "MaxBytesError", - "MaxBytesHandler", - "MaxBytesReader", - "MethodConnect", - "MethodDelete", - "MethodGet", - "MethodHead", - "MethodOptions", - "MethodPatch", - "MethodPost", - "MethodPut", - "MethodTrace", - "NewFileTransport", - "NewRequest", - "NewRequestWithContext", - "NewResponseController", - "NewServeMux", - "NoBody", - "NotFound", - "NotFoundHandler", - "ParseHTTPVersion", - "ParseTime", - "Post", - "PostForm", - "ProtocolError", - "ProxyFromEnvironment", - "ProxyURL", - "PushOptions", - "Pusher", - "ReadRequest", - "ReadResponse", - "Redirect", - "RedirectHandler", - "Request", - "Response", - "ResponseController", - "ResponseWriter", - "RoundTripper", - "SameSite", - "SameSiteDefaultMode", - "SameSiteLaxMode", - "SameSiteNoneMode", - "SameSiteStrictMode", - "Serve", - "ServeContent", - "ServeFile", - "ServeMux", - "ServeTLS", - "Server", - "ServerContextKey", - "SetCookie", - "StateActive", - "StateClosed", - "StateHijacked", - "StateIdle", - "StateNew", - "StatusAccepted", - "StatusAlreadyReported", - "StatusBadGateway", - "StatusBadRequest", - "StatusConflict", - "StatusContinue", - "StatusCreated", - "StatusEarlyHints", - "StatusExpectationFailed", - "StatusFailedDependency", - "StatusForbidden", - "StatusFound", - "StatusGatewayTimeout", - "StatusGone", - "StatusHTTPVersionNotSupported", - "StatusIMUsed", - "StatusInsufficientStorage", - "StatusInternalServerError", - "StatusLengthRequired", - "StatusLocked", - "StatusLoopDetected", - "StatusMethodNotAllowed", - "StatusMisdirectedRequest", - "StatusMovedPermanently", - "StatusMultiStatus", - "StatusMultipleChoices", - "StatusNetworkAuthenticationRequired", - "StatusNoContent", - "StatusNonAuthoritativeInfo", - "StatusNotAcceptable", - "StatusNotExtended", - "StatusNotFound", - "StatusNotImplemented", - "StatusNotModified", - "StatusOK", - "StatusPartialContent", - "StatusPaymentRequired", - "StatusPermanentRedirect", - "StatusPreconditionFailed", - "StatusPreconditionRequired", - "StatusProcessing", - "StatusProxyAuthRequired", - "StatusRequestEntityTooLarge", - "StatusRequestHeaderFieldsTooLarge", - "StatusRequestTimeout", - "StatusRequestURITooLong", - "StatusRequestedRangeNotSatisfiable", - "StatusResetContent", - "StatusSeeOther", - "StatusServiceUnavailable", - "StatusSwitchingProtocols", - "StatusTeapot", - "StatusTemporaryRedirect", - "StatusText", - "StatusTooEarly", - "StatusTooManyRequests", - "StatusUnauthorized", - "StatusUnavailableForLegalReasons", - "StatusUnprocessableEntity", - "StatusUnsupportedMediaType", - "StatusUpgradeRequired", - "StatusUseProxy", - "StatusVariantAlsoNegotiates", - "StripPrefix", - "TimeFormat", - "TimeoutHandler", - "TrailerPrefix", - "Transport", - }, - "net/http/cgi": { - "Handler", - "Request", - "RequestFromMap", - "Serve", - }, - "net/http/cookiejar": { - "Jar", - "New", - "Options", - "PublicSuffixList", - }, - "net/http/fcgi": { - "ErrConnClosed", - "ErrRequestAborted", - "ProcessEnv", - "Serve", - }, - "net/http/httptest": { - "DefaultRemoteAddr", - "NewRecorder", - "NewRequest", - "NewServer", - "NewTLSServer", - "NewUnstartedServer", - "ResponseRecorder", - "Server", - }, - "net/http/httptrace": { - "ClientTrace", - "ContextClientTrace", - "DNSDoneInfo", - "DNSStartInfo", - "GotConnInfo", - "WithClientTrace", - "WroteRequestInfo", - }, - "net/http/httputil": { - "BufferPool", - "ClientConn", - "DumpRequest", - "DumpRequestOut", - "DumpResponse", - "ErrClosed", - "ErrLineTooLong", - "ErrPersistEOF", - "ErrPipeline", - "NewChunkedReader", - "NewChunkedWriter", - "NewClientConn", - "NewProxyClientConn", - "NewServerConn", - "NewSingleHostReverseProxy", - "ProxyRequest", - "ReverseProxy", - "ServerConn", - }, - "net/http/pprof": { - "Cmdline", - "Handler", - "Index", - "Profile", - "Symbol", - "Trace", - }, - "net/mail": { - "Address", - "AddressParser", - "ErrHeaderNotPresent", - "Header", - "Message", - "ParseAddress", - "ParseAddressList", - "ParseDate", - "ReadMessage", - }, - "net/netip": { - "Addr", - "AddrFrom16", - "AddrFrom4", - "AddrFromSlice", - "AddrPort", - "AddrPortFrom", - "IPv4Unspecified", - "IPv6LinkLocalAllNodes", - "IPv6LinkLocalAllRouters", - "IPv6Loopback", - "IPv6Unspecified", - "MustParseAddr", - "MustParseAddrPort", - "MustParsePrefix", - "ParseAddr", - "ParseAddrPort", - "ParsePrefix", - "Prefix", - "PrefixFrom", - }, - "net/rpc": { - "Accept", - "Call", - "Client", - "ClientCodec", - "DefaultDebugPath", - "DefaultRPCPath", - "DefaultServer", - "Dial", - "DialHTTP", - "DialHTTPPath", - "ErrShutdown", - "HandleHTTP", - "NewClient", - "NewClientWithCodec", - "NewServer", - "Register", - "RegisterName", - "Request", - "Response", - "ServeCodec", - "ServeConn", - "ServeRequest", - "Server", - "ServerCodec", - "ServerError", - }, - "net/rpc/jsonrpc": { - "Dial", - "NewClient", - "NewClientCodec", - "NewServerCodec", - "ServeConn", - }, - "net/smtp": { - "Auth", - "CRAMMD5Auth", - "Client", - "Dial", - "NewClient", - "PlainAuth", - "SendMail", - "ServerInfo", - }, - "net/textproto": { - "CanonicalMIMEHeaderKey", - "Conn", - "Dial", - "Error", - "MIMEHeader", - "NewConn", - "NewReader", - "NewWriter", - "Pipeline", - "ProtocolError", - "Reader", - "TrimBytes", - "TrimString", - "Writer", - }, - "net/url": { - "Error", - "EscapeError", - "InvalidHostError", - "JoinPath", - "Parse", - "ParseQuery", - "ParseRequestURI", - "PathEscape", - "PathUnescape", - "QueryEscape", - "QueryUnescape", - "URL", - "User", - "UserPassword", - "Userinfo", - "Values", - }, - "os": { - "Args", - "Chdir", - "Chmod", - "Chown", - "Chtimes", - "Clearenv", - "Create", - "CreateTemp", - "DevNull", - "DirEntry", - "DirFS", - "Environ", - "ErrClosed", - "ErrDeadlineExceeded", - "ErrExist", - "ErrInvalid", - "ErrNoDeadline", - "ErrNotExist", - "ErrPermission", - "ErrProcessDone", - "Executable", - "Exit", - "Expand", - "ExpandEnv", - "File", - "FileInfo", - "FileMode", - "FindProcess", - "Getegid", - "Getenv", - "Geteuid", - "Getgid", - "Getgroups", - "Getpagesize", - "Getpid", - "Getppid", - "Getuid", - "Getwd", - "Hostname", - "Interrupt", - "IsExist", - "IsNotExist", - "IsPathSeparator", - "IsPermission", - "IsTimeout", - "Kill", - "Lchown", - "Link", - "LinkError", - "LookupEnv", - "Lstat", - "Mkdir", - "MkdirAll", - "MkdirTemp", - "ModeAppend", - "ModeCharDevice", - "ModeDevice", - "ModeDir", - "ModeExclusive", - "ModeIrregular", - "ModeNamedPipe", - "ModePerm", - "ModeSetgid", - "ModeSetuid", - "ModeSocket", - "ModeSticky", - "ModeSymlink", - "ModeTemporary", - "ModeType", - "NewFile", - "NewSyscallError", - "O_APPEND", - "O_CREATE", - "O_EXCL", - "O_RDONLY", - "O_RDWR", - "O_SYNC", - "O_TRUNC", - "O_WRONLY", - "Open", - "OpenFile", - "PathError", - "PathListSeparator", - "PathSeparator", - "Pipe", - "ProcAttr", - "Process", - "ProcessState", - "ReadDir", - "ReadFile", - "Readlink", - "Remove", - "RemoveAll", - "Rename", - "SEEK_CUR", - "SEEK_END", - "SEEK_SET", - "SameFile", - "Setenv", - "Signal", - "StartProcess", - "Stat", - "Stderr", - "Stdin", - "Stdout", - "Symlink", - "SyscallError", - "TempDir", - "Truncate", - "Unsetenv", - "UserCacheDir", - "UserConfigDir", - "UserHomeDir", - "WriteFile", - }, - "os/exec": { - "Cmd", - "Command", - "CommandContext", - "ErrDot", - "ErrNotFound", - "ErrWaitDelay", - "Error", - "ExitError", - "LookPath", - }, - "os/signal": { - "Ignore", - "Ignored", - "Notify", - "NotifyContext", - "Reset", - "Stop", - }, - "os/user": { - "Current", - "Group", - "Lookup", - "LookupGroup", - "LookupGroupId", - "LookupId", - "UnknownGroupError", - "UnknownGroupIdError", - "UnknownUserError", - "UnknownUserIdError", - "User", - }, - "path": { - "Base", - "Clean", - "Dir", - "ErrBadPattern", - "Ext", - "IsAbs", - "Join", - "Match", - "Split", - }, - "path/filepath": { - "Abs", - "Base", - "Clean", - "Dir", - "ErrBadPattern", - "EvalSymlinks", - "Ext", - "FromSlash", - "Glob", - "HasPrefix", - "IsAbs", - "IsLocal", - "Join", - "ListSeparator", - "Match", - "Rel", - "Separator", - "SkipAll", - "SkipDir", - "Split", - "SplitList", - "ToSlash", - "VolumeName", - "Walk", - "WalkDir", - "WalkFunc", - }, - "plugin": { - "Open", - "Plugin", - "Symbol", - }, - "reflect": { - "Append", - "AppendSlice", - "Array", - "ArrayOf", - "Bool", - "BothDir", - "Chan", - "ChanDir", - "ChanOf", - "Complex128", - "Complex64", - "Copy", - "DeepEqual", - "Float32", - "Float64", - "Func", - "FuncOf", - "Indirect", - "Int", - "Int16", - "Int32", - "Int64", - "Int8", - "Interface", - "Invalid", - "Kind", - "MakeChan", - "MakeFunc", - "MakeMap", - "MakeMapWithSize", - "MakeSlice", - "Map", - "MapIter", - "MapOf", - "Method", - "New", - "NewAt", - "Pointer", - "PointerTo", - "Ptr", - "PtrTo", - "RecvDir", - "Select", - "SelectCase", - "SelectDefault", - "SelectDir", - "SelectRecv", - "SelectSend", - "SendDir", - "Slice", - "SliceHeader", - "SliceOf", - "String", - "StringHeader", - "Struct", - "StructField", - "StructOf", - "StructTag", - "Swapper", - "Type", - "TypeOf", - "Uint", - "Uint16", - "Uint32", - "Uint64", - "Uint8", - "Uintptr", - "UnsafePointer", - "Value", - "ValueError", - "ValueOf", - "VisibleFields", - "Zero", - }, - "regexp": { - "Compile", - "CompilePOSIX", - "Match", - "MatchReader", - "MatchString", - "MustCompile", - "MustCompilePOSIX", - "QuoteMeta", - "Regexp", - }, - "regexp/syntax": { - "ClassNL", - "Compile", - "DotNL", - "EmptyBeginLine", - "EmptyBeginText", - "EmptyEndLine", - "EmptyEndText", - "EmptyNoWordBoundary", - "EmptyOp", - "EmptyOpContext", - "EmptyWordBoundary", - "ErrInternalError", - "ErrInvalidCharClass", - "ErrInvalidCharRange", - "ErrInvalidEscape", - "ErrInvalidNamedCapture", - "ErrInvalidPerlOp", - "ErrInvalidRepeatOp", - "ErrInvalidRepeatSize", - "ErrInvalidUTF8", - "ErrLarge", - "ErrMissingBracket", - "ErrMissingParen", - "ErrMissingRepeatArgument", - "ErrNestingDepth", - "ErrTrailingBackslash", - "ErrUnexpectedParen", - "Error", - "ErrorCode", - "Flags", - "FoldCase", - "Inst", - "InstAlt", - "InstAltMatch", - "InstCapture", - "InstEmptyWidth", - "InstFail", - "InstMatch", - "InstNop", - "InstOp", - "InstRune", - "InstRune1", - "InstRuneAny", - "InstRuneAnyNotNL", - "IsWordChar", - "Literal", - "MatchNL", - "NonGreedy", - "OneLine", - "Op", - "OpAlternate", - "OpAnyChar", - "OpAnyCharNotNL", - "OpBeginLine", - "OpBeginText", - "OpCapture", - "OpCharClass", - "OpConcat", - "OpEmptyMatch", - "OpEndLine", - "OpEndText", - "OpLiteral", - "OpNoMatch", - "OpNoWordBoundary", - "OpPlus", - "OpQuest", - "OpRepeat", - "OpStar", - "OpWordBoundary", - "POSIX", - "Parse", - "Perl", - "PerlX", - "Prog", - "Regexp", - "Simple", - "UnicodeGroups", - "WasDollar", - }, - "runtime": { - "BlockProfile", - "BlockProfileRecord", - "Breakpoint", - "CPUProfile", - "Caller", - "Callers", - "CallersFrames", - "Compiler", - "Error", - "Frame", - "Frames", - "Func", - "FuncForPC", - "GC", - "GOARCH", - "GOMAXPROCS", - "GOOS", - "GOROOT", - "Goexit", - "GoroutineProfile", - "Gosched", - "KeepAlive", - "LockOSThread", - "MemProfile", - "MemProfileRate", - "MemProfileRecord", - "MemStats", - "MutexProfile", - "NumCPU", - "NumCgoCall", - "NumGoroutine", - "PanicNilError", - "Pinner", - "ReadMemStats", - "ReadTrace", - "SetBlockProfileRate", - "SetCPUProfileRate", - "SetCgoTraceback", - "SetFinalizer", - "SetMutexProfileFraction", - "Stack", - "StackRecord", - "StartTrace", - "StopTrace", - "ThreadCreateProfile", - "TypeAssertionError", - "UnlockOSThread", - "Version", - }, - "runtime/cgo": { - "Handle", - "Incomplete", - "NewHandle", - }, - "runtime/coverage": { - "ClearCounters", - "WriteCounters", - "WriteCountersDir", - "WriteMeta", - "WriteMetaDir", - }, - "runtime/debug": { - "BuildInfo", - "BuildSetting", - "FreeOSMemory", - "GCStats", - "Module", - "ParseBuildInfo", - "PrintStack", - "ReadBuildInfo", - "ReadGCStats", - "SetGCPercent", - "SetMaxStack", - "SetMaxThreads", - "SetMemoryLimit", - "SetPanicOnFault", - "SetTraceback", - "Stack", - "WriteHeapDump", - }, - "runtime/metrics": { - "All", - "Description", - "Float64Histogram", - "KindBad", - "KindFloat64", - "KindFloat64Histogram", - "KindUint64", - "Read", - "Sample", - "Value", - "ValueKind", - }, - "runtime/pprof": { - "Do", - "ForLabels", - "Label", - "LabelSet", - "Labels", - "Lookup", - "NewProfile", - "Profile", - "Profiles", - "SetGoroutineLabels", - "StartCPUProfile", - "StopCPUProfile", - "WithLabels", - "WriteHeapProfile", - }, - "runtime/trace": { - "IsEnabled", - "Log", - "Logf", - "NewTask", - "Region", - "Start", - "StartRegion", - "Stop", - "Task", - "WithRegion", - }, - "slices": { - "BinarySearch", - "BinarySearchFunc", - "Clip", - "Clone", - "Compact", - "CompactFunc", - "Compare", - "CompareFunc", - "Contains", - "ContainsFunc", - "Delete", - "DeleteFunc", - "Equal", - "EqualFunc", - "Grow", - "Index", - "IndexFunc", - "Insert", - "IsSorted", - "IsSortedFunc", - "Max", - "MaxFunc", - "Min", - "MinFunc", - "Replace", - "Reverse", - "Sort", - "SortFunc", - "SortStableFunc", - }, - "sort": { - "Find", - "Float64Slice", - "Float64s", - "Float64sAreSorted", - "IntSlice", - "Interface", - "Ints", - "IntsAreSorted", - "IsSorted", - "Reverse", - "Search", - "SearchFloat64s", - "SearchInts", - "SearchStrings", - "Slice", - "SliceIsSorted", - "SliceStable", - "Sort", - "Stable", - "StringSlice", - "Strings", - "StringsAreSorted", - }, - "strconv": { - "AppendBool", - "AppendFloat", - "AppendInt", - "AppendQuote", - "AppendQuoteRune", - "AppendQuoteRuneToASCII", - "AppendQuoteRuneToGraphic", - "AppendQuoteToASCII", - "AppendQuoteToGraphic", - "AppendUint", - "Atoi", - "CanBackquote", - "ErrRange", - "ErrSyntax", - "FormatBool", - "FormatComplex", - "FormatFloat", - "FormatInt", - "FormatUint", - "IntSize", - "IsGraphic", - "IsPrint", - "Itoa", - "NumError", - "ParseBool", - "ParseComplex", - "ParseFloat", - "ParseInt", - "ParseUint", - "Quote", - "QuoteRune", - "QuoteRuneToASCII", - "QuoteRuneToGraphic", - "QuoteToASCII", - "QuoteToGraphic", - "QuotedPrefix", - "Unquote", - "UnquoteChar", - }, - "strings": { - "Builder", - "Clone", - "Compare", - "Contains", - "ContainsAny", - "ContainsFunc", - "ContainsRune", - "Count", - "Cut", - "CutPrefix", - "CutSuffix", - "EqualFold", - "Fields", - "FieldsFunc", - "HasPrefix", - "HasSuffix", - "Index", - "IndexAny", - "IndexByte", - "IndexFunc", - "IndexRune", - "Join", - "LastIndex", - "LastIndexAny", - "LastIndexByte", - "LastIndexFunc", - "Map", - "NewReader", - "NewReplacer", - "Reader", - "Repeat", - "Replace", - "ReplaceAll", - "Replacer", - "Split", - "SplitAfter", - "SplitAfterN", - "SplitN", - "Title", - "ToLower", - "ToLowerSpecial", - "ToTitle", - "ToTitleSpecial", - "ToUpper", - "ToUpperSpecial", - "ToValidUTF8", - "Trim", - "TrimFunc", - "TrimLeft", - "TrimLeftFunc", - "TrimPrefix", - "TrimRight", - "TrimRightFunc", - "TrimSpace", - "TrimSuffix", - }, - "sync": { - "Cond", - "Locker", - "Map", - "Mutex", - "NewCond", - "Once", - "OnceFunc", - "OnceValue", - "OnceValues", - "Pool", - "RWMutex", - "WaitGroup", - }, - "sync/atomic": { - "AddInt32", - "AddInt64", - "AddUint32", - "AddUint64", - "AddUintptr", - "Bool", - "CompareAndSwapInt32", - "CompareAndSwapInt64", - "CompareAndSwapPointer", - "CompareAndSwapUint32", - "CompareAndSwapUint64", - "CompareAndSwapUintptr", - "Int32", - "Int64", - "LoadInt32", - "LoadInt64", - "LoadPointer", - "LoadUint32", - "LoadUint64", - "LoadUintptr", - "Pointer", - "StoreInt32", - "StoreInt64", - "StorePointer", - "StoreUint32", - "StoreUint64", - "StoreUintptr", - "SwapInt32", - "SwapInt64", - "SwapPointer", - "SwapUint32", - "SwapUint64", - "SwapUintptr", - "Uint32", - "Uint64", - "Uintptr", - "Value", - }, - "syscall": { - "AF_ALG", - "AF_APPLETALK", - "AF_ARP", - "AF_ASH", - "AF_ATM", - "AF_ATMPVC", - "AF_ATMSVC", - "AF_AX25", - "AF_BLUETOOTH", - "AF_BRIDGE", - "AF_CAIF", - "AF_CAN", - "AF_CCITT", - "AF_CHAOS", - "AF_CNT", - "AF_COIP", - "AF_DATAKIT", - "AF_DECnet", - "AF_DLI", - "AF_E164", - "AF_ECMA", - "AF_ECONET", - "AF_ENCAP", - "AF_FILE", - "AF_HYLINK", - "AF_IEEE80211", - "AF_IEEE802154", - "AF_IMPLINK", - "AF_INET", - "AF_INET6", - "AF_INET6_SDP", - "AF_INET_SDP", - "AF_IPX", - "AF_IRDA", - "AF_ISDN", - "AF_ISO", - "AF_IUCV", - "AF_KEY", - "AF_LAT", - "AF_LINK", - "AF_LLC", - "AF_LOCAL", - "AF_MAX", - "AF_MPLS", - "AF_NATM", - "AF_NDRV", - "AF_NETBEUI", - "AF_NETBIOS", - "AF_NETGRAPH", - "AF_NETLINK", - "AF_NETROM", - "AF_NS", - "AF_OROUTE", - "AF_OSI", - "AF_PACKET", - "AF_PHONET", - "AF_PPP", - "AF_PPPOX", - "AF_PUP", - "AF_RDS", - "AF_RESERVED_36", - "AF_ROSE", - "AF_ROUTE", - "AF_RXRPC", - "AF_SCLUSTER", - "AF_SECURITY", - "AF_SIP", - "AF_SLOW", - "AF_SNA", - "AF_SYSTEM", - "AF_TIPC", - "AF_UNIX", - "AF_UNSPEC", - "AF_UTUN", - "AF_VENDOR00", - "AF_VENDOR01", - "AF_VENDOR02", - "AF_VENDOR03", - "AF_VENDOR04", - "AF_VENDOR05", - "AF_VENDOR06", - "AF_VENDOR07", - "AF_VENDOR08", - "AF_VENDOR09", - "AF_VENDOR10", - "AF_VENDOR11", - "AF_VENDOR12", - "AF_VENDOR13", - "AF_VENDOR14", - "AF_VENDOR15", - "AF_VENDOR16", - "AF_VENDOR17", - "AF_VENDOR18", - "AF_VENDOR19", - "AF_VENDOR20", - "AF_VENDOR21", - "AF_VENDOR22", - "AF_VENDOR23", - "AF_VENDOR24", - "AF_VENDOR25", - "AF_VENDOR26", - "AF_VENDOR27", - "AF_VENDOR28", - "AF_VENDOR29", - "AF_VENDOR30", - "AF_VENDOR31", - "AF_VENDOR32", - "AF_VENDOR33", - "AF_VENDOR34", - "AF_VENDOR35", - "AF_VENDOR36", - "AF_VENDOR37", - "AF_VENDOR38", - "AF_VENDOR39", - "AF_VENDOR40", - "AF_VENDOR41", - "AF_VENDOR42", - "AF_VENDOR43", - "AF_VENDOR44", - "AF_VENDOR45", - "AF_VENDOR46", - "AF_VENDOR47", - "AF_WANPIPE", - "AF_X25", - "AI_CANONNAME", - "AI_NUMERICHOST", - "AI_PASSIVE", - "APPLICATION_ERROR", - "ARPHRD_ADAPT", - "ARPHRD_APPLETLK", - "ARPHRD_ARCNET", - "ARPHRD_ASH", - "ARPHRD_ATM", - "ARPHRD_AX25", - "ARPHRD_BIF", - "ARPHRD_CHAOS", - "ARPHRD_CISCO", - "ARPHRD_CSLIP", - "ARPHRD_CSLIP6", - "ARPHRD_DDCMP", - "ARPHRD_DLCI", - "ARPHRD_ECONET", - "ARPHRD_EETHER", - "ARPHRD_ETHER", - "ARPHRD_EUI64", - "ARPHRD_FCAL", - "ARPHRD_FCFABRIC", - "ARPHRD_FCPL", - "ARPHRD_FCPP", - "ARPHRD_FDDI", - "ARPHRD_FRAD", - "ARPHRD_FRELAY", - "ARPHRD_HDLC", - "ARPHRD_HIPPI", - "ARPHRD_HWX25", - "ARPHRD_IEEE1394", - "ARPHRD_IEEE802", - "ARPHRD_IEEE80211", - "ARPHRD_IEEE80211_PRISM", - "ARPHRD_IEEE80211_RADIOTAP", - "ARPHRD_IEEE802154", - "ARPHRD_IEEE802154_PHY", - "ARPHRD_IEEE802_TR", - "ARPHRD_INFINIBAND", - "ARPHRD_IPDDP", - "ARPHRD_IPGRE", - "ARPHRD_IRDA", - "ARPHRD_LAPB", - "ARPHRD_LOCALTLK", - "ARPHRD_LOOPBACK", - "ARPHRD_METRICOM", - "ARPHRD_NETROM", - "ARPHRD_NONE", - "ARPHRD_PIMREG", - "ARPHRD_PPP", - "ARPHRD_PRONET", - "ARPHRD_RAWHDLC", - "ARPHRD_ROSE", - "ARPHRD_RSRVD", - "ARPHRD_SIT", - "ARPHRD_SKIP", - "ARPHRD_SLIP", - "ARPHRD_SLIP6", - "ARPHRD_STRIP", - "ARPHRD_TUNNEL", - "ARPHRD_TUNNEL6", - "ARPHRD_VOID", - "ARPHRD_X25", - "AUTHTYPE_CLIENT", - "AUTHTYPE_SERVER", - "Accept", - "Accept4", - "AcceptEx", - "Access", - "Acct", - "AddrinfoW", - "Adjtime", - "Adjtimex", - "AllThreadsSyscall", - "AllThreadsSyscall6", - "AttachLsf", - "B0", - "B1000000", - "B110", - "B115200", - "B1152000", - "B1200", - "B134", - "B14400", - "B150", - "B1500000", - "B1800", - "B19200", - "B200", - "B2000000", - "B230400", - "B2400", - "B2500000", - "B28800", - "B300", - "B3000000", - "B3500000", - "B38400", - "B4000000", - "B460800", - "B4800", - "B50", - "B500000", - "B57600", - "B576000", - "B600", - "B7200", - "B75", - "B76800", - "B921600", - "B9600", - "BASE_PROTOCOL", - "BIOCFEEDBACK", - "BIOCFLUSH", - "BIOCGBLEN", - "BIOCGDIRECTION", - "BIOCGDIRFILT", - "BIOCGDLT", - "BIOCGDLTLIST", - "BIOCGETBUFMODE", - "BIOCGETIF", - "BIOCGETZMAX", - "BIOCGFEEDBACK", - "BIOCGFILDROP", - "BIOCGHDRCMPLT", - "BIOCGRSIG", - "BIOCGRTIMEOUT", - "BIOCGSEESENT", - "BIOCGSTATS", - "BIOCGSTATSOLD", - "BIOCGTSTAMP", - "BIOCIMMEDIATE", - "BIOCLOCK", - "BIOCPROMISC", - "BIOCROTZBUF", - "BIOCSBLEN", - "BIOCSDIRECTION", - "BIOCSDIRFILT", - "BIOCSDLT", - "BIOCSETBUFMODE", - "BIOCSETF", - "BIOCSETFNR", - "BIOCSETIF", - "BIOCSETWF", - "BIOCSETZBUF", - "BIOCSFEEDBACK", - "BIOCSFILDROP", - "BIOCSHDRCMPLT", - "BIOCSRSIG", - "BIOCSRTIMEOUT", - "BIOCSSEESENT", - "BIOCSTCPF", - "BIOCSTSTAMP", - "BIOCSUDPF", - "BIOCVERSION", - "BPF_A", - "BPF_ABS", - "BPF_ADD", - "BPF_ALIGNMENT", - "BPF_ALIGNMENT32", - "BPF_ALU", - "BPF_AND", - "BPF_B", - "BPF_BUFMODE_BUFFER", - "BPF_BUFMODE_ZBUF", - "BPF_DFLTBUFSIZE", - "BPF_DIRECTION_IN", - "BPF_DIRECTION_OUT", - "BPF_DIV", - "BPF_H", - "BPF_IMM", - "BPF_IND", - "BPF_JA", - "BPF_JEQ", - "BPF_JGE", - "BPF_JGT", - "BPF_JMP", - "BPF_JSET", - "BPF_K", - "BPF_LD", - "BPF_LDX", - "BPF_LEN", - "BPF_LSH", - "BPF_MAJOR_VERSION", - "BPF_MAXBUFSIZE", - "BPF_MAXINSNS", - "BPF_MEM", - "BPF_MEMWORDS", - "BPF_MINBUFSIZE", - "BPF_MINOR_VERSION", - "BPF_MISC", - "BPF_MSH", - "BPF_MUL", - "BPF_NEG", - "BPF_OR", - "BPF_RELEASE", - "BPF_RET", - "BPF_RSH", - "BPF_ST", - "BPF_STX", - "BPF_SUB", - "BPF_TAX", - "BPF_TXA", - "BPF_T_BINTIME", - "BPF_T_BINTIME_FAST", - "BPF_T_BINTIME_MONOTONIC", - "BPF_T_BINTIME_MONOTONIC_FAST", - "BPF_T_FAST", - "BPF_T_FLAG_MASK", - "BPF_T_FORMAT_MASK", - "BPF_T_MICROTIME", - "BPF_T_MICROTIME_FAST", - "BPF_T_MICROTIME_MONOTONIC", - "BPF_T_MICROTIME_MONOTONIC_FAST", - "BPF_T_MONOTONIC", - "BPF_T_MONOTONIC_FAST", - "BPF_T_NANOTIME", - "BPF_T_NANOTIME_FAST", - "BPF_T_NANOTIME_MONOTONIC", - "BPF_T_NANOTIME_MONOTONIC_FAST", - "BPF_T_NONE", - "BPF_T_NORMAL", - "BPF_W", - "BPF_X", - "BRKINT", - "Bind", - "BindToDevice", - "BpfBuflen", - "BpfDatalink", - "BpfHdr", - "BpfHeadercmpl", - "BpfInsn", - "BpfInterface", - "BpfJump", - "BpfProgram", - "BpfStat", - "BpfStats", - "BpfStmt", - "BpfTimeout", - "BpfTimeval", - "BpfVersion", - "BpfZbuf", - "BpfZbufHeader", - "ByHandleFileInformation", - "BytePtrFromString", - "ByteSliceFromString", - "CCR0_FLUSH", - "CERT_CHAIN_POLICY_AUTHENTICODE", - "CERT_CHAIN_POLICY_AUTHENTICODE_TS", - "CERT_CHAIN_POLICY_BASE", - "CERT_CHAIN_POLICY_BASIC_CONSTRAINTS", - "CERT_CHAIN_POLICY_EV", - "CERT_CHAIN_POLICY_MICROSOFT_ROOT", - "CERT_CHAIN_POLICY_NT_AUTH", - "CERT_CHAIN_POLICY_SSL", - "CERT_E_CN_NO_MATCH", - "CERT_E_EXPIRED", - "CERT_E_PURPOSE", - "CERT_E_ROLE", - "CERT_E_UNTRUSTEDROOT", - "CERT_STORE_ADD_ALWAYS", - "CERT_STORE_DEFER_CLOSE_UNTIL_LAST_FREE_FLAG", - "CERT_STORE_PROV_MEMORY", - "CERT_TRUST_HAS_EXCLUDED_NAME_CONSTRAINT", - "CERT_TRUST_HAS_NOT_DEFINED_NAME_CONSTRAINT", - "CERT_TRUST_HAS_NOT_PERMITTED_NAME_CONSTRAINT", - "CERT_TRUST_HAS_NOT_SUPPORTED_CRITICAL_EXT", - "CERT_TRUST_HAS_NOT_SUPPORTED_NAME_CONSTRAINT", - "CERT_TRUST_INVALID_BASIC_CONSTRAINTS", - "CERT_TRUST_INVALID_EXTENSION", - "CERT_TRUST_INVALID_NAME_CONSTRAINTS", - "CERT_TRUST_INVALID_POLICY_CONSTRAINTS", - "CERT_TRUST_IS_CYCLIC", - "CERT_TRUST_IS_EXPLICIT_DISTRUST", - "CERT_TRUST_IS_NOT_SIGNATURE_VALID", - "CERT_TRUST_IS_NOT_TIME_VALID", - "CERT_TRUST_IS_NOT_VALID_FOR_USAGE", - "CERT_TRUST_IS_OFFLINE_REVOCATION", - "CERT_TRUST_IS_REVOKED", - "CERT_TRUST_IS_UNTRUSTED_ROOT", - "CERT_TRUST_NO_ERROR", - "CERT_TRUST_NO_ISSUANCE_CHAIN_POLICY", - "CERT_TRUST_REVOCATION_STATUS_UNKNOWN", - "CFLUSH", - "CLOCAL", - "CLONE_CHILD_CLEARTID", - "CLONE_CHILD_SETTID", - "CLONE_CLEAR_SIGHAND", - "CLONE_CSIGNAL", - "CLONE_DETACHED", - "CLONE_FILES", - "CLONE_FS", - "CLONE_INTO_CGROUP", - "CLONE_IO", - "CLONE_NEWCGROUP", - "CLONE_NEWIPC", - "CLONE_NEWNET", - "CLONE_NEWNS", - "CLONE_NEWPID", - "CLONE_NEWTIME", - "CLONE_NEWUSER", - "CLONE_NEWUTS", - "CLONE_PARENT", - "CLONE_PARENT_SETTID", - "CLONE_PID", - "CLONE_PIDFD", - "CLONE_PTRACE", - "CLONE_SETTLS", - "CLONE_SIGHAND", - "CLONE_SYSVSEM", - "CLONE_THREAD", - "CLONE_UNTRACED", - "CLONE_VFORK", - "CLONE_VM", - "CPUID_CFLUSH", - "CREAD", - "CREATE_ALWAYS", - "CREATE_NEW", - "CREATE_NEW_PROCESS_GROUP", - "CREATE_UNICODE_ENVIRONMENT", - "CRYPT_DEFAULT_CONTAINER_OPTIONAL", - "CRYPT_DELETEKEYSET", - "CRYPT_MACHINE_KEYSET", - "CRYPT_NEWKEYSET", - "CRYPT_SILENT", - "CRYPT_VERIFYCONTEXT", - "CS5", - "CS6", - "CS7", - "CS8", - "CSIZE", - "CSTART", - "CSTATUS", - "CSTOP", - "CSTOPB", - "CSUSP", - "CTL_MAXNAME", - "CTL_NET", - "CTL_QUERY", - "CTRL_BREAK_EVENT", - "CTRL_CLOSE_EVENT", - "CTRL_C_EVENT", - "CTRL_LOGOFF_EVENT", - "CTRL_SHUTDOWN_EVENT", - "CancelIo", - "CancelIoEx", - "CertAddCertificateContextToStore", - "CertChainContext", - "CertChainElement", - "CertChainPara", - "CertChainPolicyPara", - "CertChainPolicyStatus", - "CertCloseStore", - "CertContext", - "CertCreateCertificateContext", - "CertEnhKeyUsage", - "CertEnumCertificatesInStore", - "CertFreeCertificateChain", - "CertFreeCertificateContext", - "CertGetCertificateChain", - "CertInfo", - "CertOpenStore", - "CertOpenSystemStore", - "CertRevocationCrlInfo", - "CertRevocationInfo", - "CertSimpleChain", - "CertTrustListInfo", - "CertTrustStatus", - "CertUsageMatch", - "CertVerifyCertificateChainPolicy", - "Chdir", - "CheckBpfVersion", - "Chflags", - "Chmod", - "Chown", - "Chroot", - "Clearenv", - "Close", - "CloseHandle", - "CloseOnExec", - "Closesocket", - "CmsgLen", - "CmsgSpace", - "Cmsghdr", - "CommandLineToArgv", - "ComputerName", - "Conn", - "Connect", - "ConnectEx", - "ConvertSidToStringSid", - "ConvertStringSidToSid", - "CopySid", - "Creat", - "CreateDirectory", - "CreateFile", - "CreateFileMapping", - "CreateHardLink", - "CreateIoCompletionPort", - "CreatePipe", - "CreateProcess", - "CreateProcessAsUser", - "CreateSymbolicLink", - "CreateToolhelp32Snapshot", - "Credential", - "CryptAcquireContext", - "CryptGenRandom", - "CryptReleaseContext", - "DIOCBSFLUSH", - "DIOCOSFPFLUSH", - "DLL", - "DLLError", - "DLT_A429", - "DLT_A653_ICM", - "DLT_AIRONET_HEADER", - "DLT_AOS", - "DLT_APPLE_IP_OVER_IEEE1394", - "DLT_ARCNET", - "DLT_ARCNET_LINUX", - "DLT_ATM_CLIP", - "DLT_ATM_RFC1483", - "DLT_AURORA", - "DLT_AX25", - "DLT_AX25_KISS", - "DLT_BACNET_MS_TP", - "DLT_BLUETOOTH_HCI_H4", - "DLT_BLUETOOTH_HCI_H4_WITH_PHDR", - "DLT_CAN20B", - "DLT_CAN_SOCKETCAN", - "DLT_CHAOS", - "DLT_CHDLC", - "DLT_CISCO_IOS", - "DLT_C_HDLC", - "DLT_C_HDLC_WITH_DIR", - "DLT_DBUS", - "DLT_DECT", - "DLT_DOCSIS", - "DLT_DVB_CI", - "DLT_ECONET", - "DLT_EN10MB", - "DLT_EN3MB", - "DLT_ENC", - "DLT_ERF", - "DLT_ERF_ETH", - "DLT_ERF_POS", - "DLT_FC_2", - "DLT_FC_2_WITH_FRAME_DELIMS", - "DLT_FDDI", - "DLT_FLEXRAY", - "DLT_FRELAY", - "DLT_FRELAY_WITH_DIR", - "DLT_GCOM_SERIAL", - "DLT_GCOM_T1E1", - "DLT_GPF_F", - "DLT_GPF_T", - "DLT_GPRS_LLC", - "DLT_GSMTAP_ABIS", - "DLT_GSMTAP_UM", - "DLT_HDLC", - "DLT_HHDLC", - "DLT_HIPPI", - "DLT_IBM_SN", - "DLT_IBM_SP", - "DLT_IEEE802", - "DLT_IEEE802_11", - "DLT_IEEE802_11_RADIO", - "DLT_IEEE802_11_RADIO_AVS", - "DLT_IEEE802_15_4", - "DLT_IEEE802_15_4_LINUX", - "DLT_IEEE802_15_4_NOFCS", - "DLT_IEEE802_15_4_NONASK_PHY", - "DLT_IEEE802_16_MAC_CPS", - "DLT_IEEE802_16_MAC_CPS_RADIO", - "DLT_IPFILTER", - "DLT_IPMB", - "DLT_IPMB_LINUX", - "DLT_IPNET", - "DLT_IPOIB", - "DLT_IPV4", - "DLT_IPV6", - "DLT_IP_OVER_FC", - "DLT_JUNIPER_ATM1", - "DLT_JUNIPER_ATM2", - "DLT_JUNIPER_ATM_CEMIC", - "DLT_JUNIPER_CHDLC", - "DLT_JUNIPER_ES", - "DLT_JUNIPER_ETHER", - "DLT_JUNIPER_FIBRECHANNEL", - "DLT_JUNIPER_FRELAY", - "DLT_JUNIPER_GGSN", - "DLT_JUNIPER_ISM", - "DLT_JUNIPER_MFR", - "DLT_JUNIPER_MLFR", - "DLT_JUNIPER_MLPPP", - "DLT_JUNIPER_MONITOR", - "DLT_JUNIPER_PIC_PEER", - "DLT_JUNIPER_PPP", - "DLT_JUNIPER_PPPOE", - "DLT_JUNIPER_PPPOE_ATM", - "DLT_JUNIPER_SERVICES", - "DLT_JUNIPER_SRX_E2E", - "DLT_JUNIPER_ST", - "DLT_JUNIPER_VP", - "DLT_JUNIPER_VS", - "DLT_LAPB_WITH_DIR", - "DLT_LAPD", - "DLT_LIN", - "DLT_LINUX_EVDEV", - "DLT_LINUX_IRDA", - "DLT_LINUX_LAPD", - "DLT_LINUX_PPP_WITHDIRECTION", - "DLT_LINUX_SLL", - "DLT_LOOP", - "DLT_LTALK", - "DLT_MATCHING_MAX", - "DLT_MATCHING_MIN", - "DLT_MFR", - "DLT_MOST", - "DLT_MPEG_2_TS", - "DLT_MPLS", - "DLT_MTP2", - "DLT_MTP2_WITH_PHDR", - "DLT_MTP3", - "DLT_MUX27010", - "DLT_NETANALYZER", - "DLT_NETANALYZER_TRANSPARENT", - "DLT_NFC_LLCP", - "DLT_NFLOG", - "DLT_NG40", - "DLT_NULL", - "DLT_PCI_EXP", - "DLT_PFLOG", - "DLT_PFSYNC", - "DLT_PPI", - "DLT_PPP", - "DLT_PPP_BSDOS", - "DLT_PPP_ETHER", - "DLT_PPP_PPPD", - "DLT_PPP_SERIAL", - "DLT_PPP_WITH_DIR", - "DLT_PPP_WITH_DIRECTION", - "DLT_PRISM_HEADER", - "DLT_PRONET", - "DLT_RAIF1", - "DLT_RAW", - "DLT_RAWAF_MASK", - "DLT_RIO", - "DLT_SCCP", - "DLT_SITA", - "DLT_SLIP", - "DLT_SLIP_BSDOS", - "DLT_STANAG_5066_D_PDU", - "DLT_SUNATM", - "DLT_SYMANTEC_FIREWALL", - "DLT_TZSP", - "DLT_USB", - "DLT_USB_LINUX", - "DLT_USB_LINUX_MMAPPED", - "DLT_USER0", - "DLT_USER1", - "DLT_USER10", - "DLT_USER11", - "DLT_USER12", - "DLT_USER13", - "DLT_USER14", - "DLT_USER15", - "DLT_USER2", - "DLT_USER3", - "DLT_USER4", - "DLT_USER5", - "DLT_USER6", - "DLT_USER7", - "DLT_USER8", - "DLT_USER9", - "DLT_WIHART", - "DLT_X2E_SERIAL", - "DLT_X2E_XORAYA", - "DNSMXData", - "DNSPTRData", - "DNSRecord", - "DNSSRVData", - "DNSTXTData", - "DNS_INFO_NO_RECORDS", - "DNS_TYPE_A", - "DNS_TYPE_A6", - "DNS_TYPE_AAAA", - "DNS_TYPE_ADDRS", - "DNS_TYPE_AFSDB", - "DNS_TYPE_ALL", - "DNS_TYPE_ANY", - "DNS_TYPE_ATMA", - "DNS_TYPE_AXFR", - "DNS_TYPE_CERT", - "DNS_TYPE_CNAME", - "DNS_TYPE_DHCID", - "DNS_TYPE_DNAME", - "DNS_TYPE_DNSKEY", - "DNS_TYPE_DS", - "DNS_TYPE_EID", - "DNS_TYPE_GID", - "DNS_TYPE_GPOS", - "DNS_TYPE_HINFO", - "DNS_TYPE_ISDN", - "DNS_TYPE_IXFR", - "DNS_TYPE_KEY", - "DNS_TYPE_KX", - "DNS_TYPE_LOC", - "DNS_TYPE_MAILA", - "DNS_TYPE_MAILB", - "DNS_TYPE_MB", - "DNS_TYPE_MD", - "DNS_TYPE_MF", - "DNS_TYPE_MG", - "DNS_TYPE_MINFO", - "DNS_TYPE_MR", - "DNS_TYPE_MX", - "DNS_TYPE_NAPTR", - "DNS_TYPE_NBSTAT", - "DNS_TYPE_NIMLOC", - "DNS_TYPE_NS", - "DNS_TYPE_NSAP", - "DNS_TYPE_NSAPPTR", - "DNS_TYPE_NSEC", - "DNS_TYPE_NULL", - "DNS_TYPE_NXT", - "DNS_TYPE_OPT", - "DNS_TYPE_PTR", - "DNS_TYPE_PX", - "DNS_TYPE_RP", - "DNS_TYPE_RRSIG", - "DNS_TYPE_RT", - "DNS_TYPE_SIG", - "DNS_TYPE_SINK", - "DNS_TYPE_SOA", - "DNS_TYPE_SRV", - "DNS_TYPE_TEXT", - "DNS_TYPE_TKEY", - "DNS_TYPE_TSIG", - "DNS_TYPE_UID", - "DNS_TYPE_UINFO", - "DNS_TYPE_UNSPEC", - "DNS_TYPE_WINS", - "DNS_TYPE_WINSR", - "DNS_TYPE_WKS", - "DNS_TYPE_X25", - "DT_BLK", - "DT_CHR", - "DT_DIR", - "DT_FIFO", - "DT_LNK", - "DT_REG", - "DT_SOCK", - "DT_UNKNOWN", - "DT_WHT", - "DUPLICATE_CLOSE_SOURCE", - "DUPLICATE_SAME_ACCESS", - "DeleteFile", - "DetachLsf", - "DeviceIoControl", - "Dirent", - "DnsNameCompare", - "DnsQuery", - "DnsRecordListFree", - "DnsSectionAdditional", - "DnsSectionAnswer", - "DnsSectionAuthority", - "DnsSectionQuestion", - "Dup", - "Dup2", - "Dup3", - "DuplicateHandle", - "E2BIG", - "EACCES", - "EADDRINUSE", - "EADDRNOTAVAIL", - "EADV", - "EAFNOSUPPORT", - "EAGAIN", - "EALREADY", - "EAUTH", - "EBADARCH", - "EBADE", - "EBADEXEC", - "EBADF", - "EBADFD", - "EBADMACHO", - "EBADMSG", - "EBADR", - "EBADRPC", - "EBADRQC", - "EBADSLT", - "EBFONT", - "EBUSY", - "ECANCELED", - "ECAPMODE", - "ECHILD", - "ECHO", - "ECHOCTL", - "ECHOE", - "ECHOK", - "ECHOKE", - "ECHONL", - "ECHOPRT", - "ECHRNG", - "ECOMM", - "ECONNABORTED", - "ECONNREFUSED", - "ECONNRESET", - "EDEADLK", - "EDEADLOCK", - "EDESTADDRREQ", - "EDEVERR", - "EDOM", - "EDOOFUS", - "EDOTDOT", - "EDQUOT", - "EEXIST", - "EFAULT", - "EFBIG", - "EFER_LMA", - "EFER_LME", - "EFER_NXE", - "EFER_SCE", - "EFTYPE", - "EHOSTDOWN", - "EHOSTUNREACH", - "EHWPOISON", - "EIDRM", - "EILSEQ", - "EINPROGRESS", - "EINTR", - "EINVAL", - "EIO", - "EIPSEC", - "EISCONN", - "EISDIR", - "EISNAM", - "EKEYEXPIRED", - "EKEYREJECTED", - "EKEYREVOKED", - "EL2HLT", - "EL2NSYNC", - "EL3HLT", - "EL3RST", - "ELAST", - "ELF_NGREG", - "ELF_PRARGSZ", - "ELIBACC", - "ELIBBAD", - "ELIBEXEC", - "ELIBMAX", - "ELIBSCN", - "ELNRNG", - "ELOOP", - "EMEDIUMTYPE", - "EMFILE", - "EMLINK", - "EMSGSIZE", - "EMT_TAGOVF", - "EMULTIHOP", - "EMUL_ENABLED", - "EMUL_LINUX", - "EMUL_LINUX32", - "EMUL_MAXID", - "EMUL_NATIVE", - "ENAMETOOLONG", - "ENAVAIL", - "ENDRUNDISC", - "ENEEDAUTH", - "ENETDOWN", - "ENETRESET", - "ENETUNREACH", - "ENFILE", - "ENOANO", - "ENOATTR", - "ENOBUFS", - "ENOCSI", - "ENODATA", - "ENODEV", - "ENOENT", - "ENOEXEC", - "ENOKEY", - "ENOLCK", - "ENOLINK", - "ENOMEDIUM", - "ENOMEM", - "ENOMSG", - "ENONET", - "ENOPKG", - "ENOPOLICY", - "ENOPROTOOPT", - "ENOSPC", - "ENOSR", - "ENOSTR", - "ENOSYS", - "ENOTBLK", - "ENOTCAPABLE", - "ENOTCONN", - "ENOTDIR", - "ENOTEMPTY", - "ENOTNAM", - "ENOTRECOVERABLE", - "ENOTSOCK", - "ENOTSUP", - "ENOTTY", - "ENOTUNIQ", - "ENXIO", - "EN_SW_CTL_INF", - "EN_SW_CTL_PREC", - "EN_SW_CTL_ROUND", - "EN_SW_DATACHAIN", - "EN_SW_DENORM", - "EN_SW_INVOP", - "EN_SW_OVERFLOW", - "EN_SW_PRECLOSS", - "EN_SW_UNDERFLOW", - "EN_SW_ZERODIV", - "EOPNOTSUPP", - "EOVERFLOW", - "EOWNERDEAD", - "EPERM", - "EPFNOSUPPORT", - "EPIPE", - "EPOLLERR", - "EPOLLET", - "EPOLLHUP", - "EPOLLIN", - "EPOLLMSG", - "EPOLLONESHOT", - "EPOLLOUT", - "EPOLLPRI", - "EPOLLRDBAND", - "EPOLLRDHUP", - "EPOLLRDNORM", - "EPOLLWRBAND", - "EPOLLWRNORM", - "EPOLL_CLOEXEC", - "EPOLL_CTL_ADD", - "EPOLL_CTL_DEL", - "EPOLL_CTL_MOD", - "EPOLL_NONBLOCK", - "EPROCLIM", - "EPROCUNAVAIL", - "EPROGMISMATCH", - "EPROGUNAVAIL", - "EPROTO", - "EPROTONOSUPPORT", - "EPROTOTYPE", - "EPWROFF", - "EQFULL", - "ERANGE", - "EREMCHG", - "EREMOTE", - "EREMOTEIO", - "ERESTART", - "ERFKILL", - "EROFS", - "ERPCMISMATCH", - "ERROR_ACCESS_DENIED", - "ERROR_ALREADY_EXISTS", - "ERROR_BROKEN_PIPE", - "ERROR_BUFFER_OVERFLOW", - "ERROR_DIR_NOT_EMPTY", - "ERROR_ENVVAR_NOT_FOUND", - "ERROR_FILE_EXISTS", - "ERROR_FILE_NOT_FOUND", - "ERROR_HANDLE_EOF", - "ERROR_INSUFFICIENT_BUFFER", - "ERROR_IO_PENDING", - "ERROR_MOD_NOT_FOUND", - "ERROR_MORE_DATA", - "ERROR_NETNAME_DELETED", - "ERROR_NOT_FOUND", - "ERROR_NO_MORE_FILES", - "ERROR_OPERATION_ABORTED", - "ERROR_PATH_NOT_FOUND", - "ERROR_PRIVILEGE_NOT_HELD", - "ERROR_PROC_NOT_FOUND", - "ESHLIBVERS", - "ESHUTDOWN", - "ESOCKTNOSUPPORT", - "ESPIPE", - "ESRCH", - "ESRMNT", - "ESTALE", - "ESTRPIPE", - "ETHERCAP_JUMBO_MTU", - "ETHERCAP_VLAN_HWTAGGING", - "ETHERCAP_VLAN_MTU", - "ETHERMIN", - "ETHERMTU", - "ETHERMTU_JUMBO", - "ETHERTYPE_8023", - "ETHERTYPE_AARP", - "ETHERTYPE_ACCTON", - "ETHERTYPE_AEONIC", - "ETHERTYPE_ALPHA", - "ETHERTYPE_AMBER", - "ETHERTYPE_AMOEBA", - "ETHERTYPE_AOE", - "ETHERTYPE_APOLLO", - "ETHERTYPE_APOLLODOMAIN", - "ETHERTYPE_APPLETALK", - "ETHERTYPE_APPLITEK", - "ETHERTYPE_ARGONAUT", - "ETHERTYPE_ARP", - "ETHERTYPE_AT", - "ETHERTYPE_ATALK", - "ETHERTYPE_ATOMIC", - "ETHERTYPE_ATT", - "ETHERTYPE_ATTSTANFORD", - "ETHERTYPE_AUTOPHON", - "ETHERTYPE_AXIS", - "ETHERTYPE_BCLOOP", - "ETHERTYPE_BOFL", - "ETHERTYPE_CABLETRON", - "ETHERTYPE_CHAOS", - "ETHERTYPE_COMDESIGN", - "ETHERTYPE_COMPUGRAPHIC", - "ETHERTYPE_COUNTERPOINT", - "ETHERTYPE_CRONUS", - "ETHERTYPE_CRONUSVLN", - "ETHERTYPE_DCA", - "ETHERTYPE_DDE", - "ETHERTYPE_DEBNI", - "ETHERTYPE_DECAM", - "ETHERTYPE_DECCUST", - "ETHERTYPE_DECDIAG", - "ETHERTYPE_DECDNS", - "ETHERTYPE_DECDTS", - "ETHERTYPE_DECEXPER", - "ETHERTYPE_DECLAST", - "ETHERTYPE_DECLTM", - "ETHERTYPE_DECMUMPS", - "ETHERTYPE_DECNETBIOS", - "ETHERTYPE_DELTACON", - "ETHERTYPE_DIDDLE", - "ETHERTYPE_DLOG1", - "ETHERTYPE_DLOG2", - "ETHERTYPE_DN", - "ETHERTYPE_DOGFIGHT", - "ETHERTYPE_DSMD", - "ETHERTYPE_ECMA", - "ETHERTYPE_ENCRYPT", - "ETHERTYPE_ES", - "ETHERTYPE_EXCELAN", - "ETHERTYPE_EXPERDATA", - "ETHERTYPE_FLIP", - "ETHERTYPE_FLOWCONTROL", - "ETHERTYPE_FRARP", - "ETHERTYPE_GENDYN", - "ETHERTYPE_HAYES", - "ETHERTYPE_HIPPI_FP", - "ETHERTYPE_HITACHI", - "ETHERTYPE_HP", - "ETHERTYPE_IEEEPUP", - "ETHERTYPE_IEEEPUPAT", - "ETHERTYPE_IMLBL", - "ETHERTYPE_IMLBLDIAG", - "ETHERTYPE_IP", - "ETHERTYPE_IPAS", - "ETHERTYPE_IPV6", - "ETHERTYPE_IPX", - "ETHERTYPE_IPXNEW", - "ETHERTYPE_KALPANA", - "ETHERTYPE_LANBRIDGE", - "ETHERTYPE_LANPROBE", - "ETHERTYPE_LAT", - "ETHERTYPE_LBACK", - "ETHERTYPE_LITTLE", - "ETHERTYPE_LLDP", - "ETHERTYPE_LOGICRAFT", - "ETHERTYPE_LOOPBACK", - "ETHERTYPE_MATRA", - "ETHERTYPE_MAX", - "ETHERTYPE_MERIT", - "ETHERTYPE_MICP", - "ETHERTYPE_MOPDL", - "ETHERTYPE_MOPRC", - "ETHERTYPE_MOTOROLA", - "ETHERTYPE_MPLS", - "ETHERTYPE_MPLS_MCAST", - "ETHERTYPE_MUMPS", - "ETHERTYPE_NBPCC", - "ETHERTYPE_NBPCLAIM", - "ETHERTYPE_NBPCLREQ", - "ETHERTYPE_NBPCLRSP", - "ETHERTYPE_NBPCREQ", - "ETHERTYPE_NBPCRSP", - "ETHERTYPE_NBPDG", - "ETHERTYPE_NBPDGB", - "ETHERTYPE_NBPDLTE", - "ETHERTYPE_NBPRAR", - "ETHERTYPE_NBPRAS", - "ETHERTYPE_NBPRST", - "ETHERTYPE_NBPSCD", - "ETHERTYPE_NBPVCD", - "ETHERTYPE_NBS", - "ETHERTYPE_NCD", - "ETHERTYPE_NESTAR", - "ETHERTYPE_NETBEUI", - "ETHERTYPE_NOVELL", - "ETHERTYPE_NS", - "ETHERTYPE_NSAT", - "ETHERTYPE_NSCOMPAT", - "ETHERTYPE_NTRAILER", - "ETHERTYPE_OS9", - "ETHERTYPE_OS9NET", - "ETHERTYPE_PACER", - "ETHERTYPE_PAE", - "ETHERTYPE_PCS", - "ETHERTYPE_PLANNING", - "ETHERTYPE_PPP", - "ETHERTYPE_PPPOE", - "ETHERTYPE_PPPOEDISC", - "ETHERTYPE_PRIMENTS", - "ETHERTYPE_PUP", - "ETHERTYPE_PUPAT", - "ETHERTYPE_QINQ", - "ETHERTYPE_RACAL", - "ETHERTYPE_RATIONAL", - "ETHERTYPE_RAWFR", - "ETHERTYPE_RCL", - "ETHERTYPE_RDP", - "ETHERTYPE_RETIX", - "ETHERTYPE_REVARP", - "ETHERTYPE_SCA", - "ETHERTYPE_SECTRA", - "ETHERTYPE_SECUREDATA", - "ETHERTYPE_SGITW", - "ETHERTYPE_SG_BOUNCE", - "ETHERTYPE_SG_DIAG", - "ETHERTYPE_SG_NETGAMES", - "ETHERTYPE_SG_RESV", - "ETHERTYPE_SIMNET", - "ETHERTYPE_SLOW", - "ETHERTYPE_SLOWPROTOCOLS", - "ETHERTYPE_SNA", - "ETHERTYPE_SNMP", - "ETHERTYPE_SONIX", - "ETHERTYPE_SPIDER", - "ETHERTYPE_SPRITE", - "ETHERTYPE_STP", - "ETHERTYPE_TALARIS", - "ETHERTYPE_TALARISMC", - "ETHERTYPE_TCPCOMP", - "ETHERTYPE_TCPSM", - "ETHERTYPE_TEC", - "ETHERTYPE_TIGAN", - "ETHERTYPE_TRAIL", - "ETHERTYPE_TRANSETHER", - "ETHERTYPE_TYMSHARE", - "ETHERTYPE_UBBST", - "ETHERTYPE_UBDEBUG", - "ETHERTYPE_UBDIAGLOOP", - "ETHERTYPE_UBDL", - "ETHERTYPE_UBNIU", - "ETHERTYPE_UBNMC", - "ETHERTYPE_VALID", - "ETHERTYPE_VARIAN", - "ETHERTYPE_VAXELN", - "ETHERTYPE_VEECO", - "ETHERTYPE_VEXP", - "ETHERTYPE_VGLAB", - "ETHERTYPE_VINES", - "ETHERTYPE_VINESECHO", - "ETHERTYPE_VINESLOOP", - "ETHERTYPE_VITAL", - "ETHERTYPE_VLAN", - "ETHERTYPE_VLTLMAN", - "ETHERTYPE_VPROD", - "ETHERTYPE_VURESERVED", - "ETHERTYPE_WATERLOO", - "ETHERTYPE_WELLFLEET", - "ETHERTYPE_X25", - "ETHERTYPE_X75", - "ETHERTYPE_XNSSM", - "ETHERTYPE_XTP", - "ETHER_ADDR_LEN", - "ETHER_ALIGN", - "ETHER_CRC_LEN", - "ETHER_CRC_POLY_BE", - "ETHER_CRC_POLY_LE", - "ETHER_HDR_LEN", - "ETHER_MAX_DIX_LEN", - "ETHER_MAX_LEN", - "ETHER_MAX_LEN_JUMBO", - "ETHER_MIN_LEN", - "ETHER_PPPOE_ENCAP_LEN", - "ETHER_TYPE_LEN", - "ETHER_VLAN_ENCAP_LEN", - "ETH_P_1588", - "ETH_P_8021Q", - "ETH_P_802_2", - "ETH_P_802_3", - "ETH_P_AARP", - "ETH_P_ALL", - "ETH_P_AOE", - "ETH_P_ARCNET", - "ETH_P_ARP", - "ETH_P_ATALK", - "ETH_P_ATMFATE", - "ETH_P_ATMMPOA", - "ETH_P_AX25", - "ETH_P_BPQ", - "ETH_P_CAIF", - "ETH_P_CAN", - "ETH_P_CONTROL", - "ETH_P_CUST", - "ETH_P_DDCMP", - "ETH_P_DEC", - "ETH_P_DIAG", - "ETH_P_DNA_DL", - "ETH_P_DNA_RC", - "ETH_P_DNA_RT", - "ETH_P_DSA", - "ETH_P_ECONET", - "ETH_P_EDSA", - "ETH_P_FCOE", - "ETH_P_FIP", - "ETH_P_HDLC", - "ETH_P_IEEE802154", - "ETH_P_IEEEPUP", - "ETH_P_IEEEPUPAT", - "ETH_P_IP", - "ETH_P_IPV6", - "ETH_P_IPX", - "ETH_P_IRDA", - "ETH_P_LAT", - "ETH_P_LINK_CTL", - "ETH_P_LOCALTALK", - "ETH_P_LOOP", - "ETH_P_MOBITEX", - "ETH_P_MPLS_MC", - "ETH_P_MPLS_UC", - "ETH_P_PAE", - "ETH_P_PAUSE", - "ETH_P_PHONET", - "ETH_P_PPPTALK", - "ETH_P_PPP_DISC", - "ETH_P_PPP_MP", - "ETH_P_PPP_SES", - "ETH_P_PUP", - "ETH_P_PUPAT", - "ETH_P_RARP", - "ETH_P_SCA", - "ETH_P_SLOW", - "ETH_P_SNAP", - "ETH_P_TEB", - "ETH_P_TIPC", - "ETH_P_TRAILER", - "ETH_P_TR_802_2", - "ETH_P_WAN_PPP", - "ETH_P_WCCP", - "ETH_P_X25", - "ETIME", - "ETIMEDOUT", - "ETOOMANYREFS", - "ETXTBSY", - "EUCLEAN", - "EUNATCH", - "EUSERS", - "EVFILT_AIO", - "EVFILT_FS", - "EVFILT_LIO", - "EVFILT_MACHPORT", - "EVFILT_PROC", - "EVFILT_READ", - "EVFILT_SIGNAL", - "EVFILT_SYSCOUNT", - "EVFILT_THREADMARKER", - "EVFILT_TIMER", - "EVFILT_USER", - "EVFILT_VM", - "EVFILT_VNODE", - "EVFILT_WRITE", - "EV_ADD", - "EV_CLEAR", - "EV_DELETE", - "EV_DISABLE", - "EV_DISPATCH", - "EV_DROP", - "EV_ENABLE", - "EV_EOF", - "EV_ERROR", - "EV_FLAG0", - "EV_FLAG1", - "EV_ONESHOT", - "EV_OOBAND", - "EV_POLL", - "EV_RECEIPT", - "EV_SYSFLAGS", - "EWINDOWS", - "EWOULDBLOCK", - "EXDEV", - "EXFULL", - "EXTA", - "EXTB", - "EXTPROC", - "Environ", - "EpollCreate", - "EpollCreate1", - "EpollCtl", - "EpollEvent", - "EpollWait", - "Errno", - "EscapeArg", - "Exchangedata", - "Exec", - "Exit", - "ExitProcess", - "FD_CLOEXEC", - "FD_SETSIZE", - "FILE_ACTION_ADDED", - "FILE_ACTION_MODIFIED", - "FILE_ACTION_REMOVED", - "FILE_ACTION_RENAMED_NEW_NAME", - "FILE_ACTION_RENAMED_OLD_NAME", - "FILE_APPEND_DATA", - "FILE_ATTRIBUTE_ARCHIVE", - "FILE_ATTRIBUTE_DIRECTORY", - "FILE_ATTRIBUTE_HIDDEN", - "FILE_ATTRIBUTE_NORMAL", - "FILE_ATTRIBUTE_READONLY", - "FILE_ATTRIBUTE_REPARSE_POINT", - "FILE_ATTRIBUTE_SYSTEM", - "FILE_BEGIN", - "FILE_CURRENT", - "FILE_END", - "FILE_FLAG_BACKUP_SEMANTICS", - "FILE_FLAG_OPEN_REPARSE_POINT", - "FILE_FLAG_OVERLAPPED", - "FILE_LIST_DIRECTORY", - "FILE_MAP_COPY", - "FILE_MAP_EXECUTE", - "FILE_MAP_READ", - "FILE_MAP_WRITE", - "FILE_NOTIFY_CHANGE_ATTRIBUTES", - "FILE_NOTIFY_CHANGE_CREATION", - "FILE_NOTIFY_CHANGE_DIR_NAME", - "FILE_NOTIFY_CHANGE_FILE_NAME", - "FILE_NOTIFY_CHANGE_LAST_ACCESS", - "FILE_NOTIFY_CHANGE_LAST_WRITE", - "FILE_NOTIFY_CHANGE_SIZE", - "FILE_SHARE_DELETE", - "FILE_SHARE_READ", - "FILE_SHARE_WRITE", - "FILE_SKIP_COMPLETION_PORT_ON_SUCCESS", - "FILE_SKIP_SET_EVENT_ON_HANDLE", - "FILE_TYPE_CHAR", - "FILE_TYPE_DISK", - "FILE_TYPE_PIPE", - "FILE_TYPE_REMOTE", - "FILE_TYPE_UNKNOWN", - "FILE_WRITE_ATTRIBUTES", - "FLUSHO", - "FORMAT_MESSAGE_ALLOCATE_BUFFER", - "FORMAT_MESSAGE_ARGUMENT_ARRAY", - "FORMAT_MESSAGE_FROM_HMODULE", - "FORMAT_MESSAGE_FROM_STRING", - "FORMAT_MESSAGE_FROM_SYSTEM", - "FORMAT_MESSAGE_IGNORE_INSERTS", - "FORMAT_MESSAGE_MAX_WIDTH_MASK", - "FSCTL_GET_REPARSE_POINT", - "F_ADDFILESIGS", - "F_ADDSIGS", - "F_ALLOCATEALL", - "F_ALLOCATECONTIG", - "F_CANCEL", - "F_CHKCLEAN", - "F_CLOSEM", - "F_DUP2FD", - "F_DUP2FD_CLOEXEC", - "F_DUPFD", - "F_DUPFD_CLOEXEC", - "F_EXLCK", - "F_FINDSIGS", - "F_FLUSH_DATA", - "F_FREEZE_FS", - "F_FSCTL", - "F_FSDIRMASK", - "F_FSIN", - "F_FSINOUT", - "F_FSOUT", - "F_FSPRIV", - "F_FSVOID", - "F_FULLFSYNC", - "F_GETCODEDIR", - "F_GETFD", - "F_GETFL", - "F_GETLEASE", - "F_GETLK", - "F_GETLK64", - "F_GETLKPID", - "F_GETNOSIGPIPE", - "F_GETOWN", - "F_GETOWN_EX", - "F_GETPATH", - "F_GETPATH_MTMINFO", - "F_GETPIPE_SZ", - "F_GETPROTECTIONCLASS", - "F_GETPROTECTIONLEVEL", - "F_GETSIG", - "F_GLOBAL_NOCACHE", - "F_LOCK", - "F_LOG2PHYS", - "F_LOG2PHYS_EXT", - "F_MARKDEPENDENCY", - "F_MAXFD", - "F_NOCACHE", - "F_NODIRECT", - "F_NOTIFY", - "F_OGETLK", - "F_OK", - "F_OSETLK", - "F_OSETLKW", - "F_PARAM_MASK", - "F_PARAM_MAX", - "F_PATHPKG_CHECK", - "F_PEOFPOSMODE", - "F_PREALLOCATE", - "F_RDADVISE", - "F_RDAHEAD", - "F_RDLCK", - "F_READAHEAD", - "F_READBOOTSTRAP", - "F_SETBACKINGSTORE", - "F_SETFD", - "F_SETFL", - "F_SETLEASE", - "F_SETLK", - "F_SETLK64", - "F_SETLKW", - "F_SETLKW64", - "F_SETLKWTIMEOUT", - "F_SETLK_REMOTE", - "F_SETNOSIGPIPE", - "F_SETOWN", - "F_SETOWN_EX", - "F_SETPIPE_SZ", - "F_SETPROTECTIONCLASS", - "F_SETSIG", - "F_SETSIZE", - "F_SHLCK", - "F_SINGLE_WRITER", - "F_TEST", - "F_THAW_FS", - "F_TLOCK", - "F_TRANSCODEKEY", - "F_ULOCK", - "F_UNLCK", - "F_UNLCKSYS", - "F_VOLPOSMODE", - "F_WRITEBOOTSTRAP", - "F_WRLCK", - "Faccessat", - "Fallocate", - "Fbootstraptransfer_t", - "Fchdir", - "Fchflags", - "Fchmod", - "Fchmodat", - "Fchown", - "Fchownat", - "FcntlFlock", - "FdSet", - "Fdatasync", - "FileNotifyInformation", - "Filetime", - "FindClose", - "FindFirstFile", - "FindNextFile", - "Flock", - "Flock_t", - "FlushBpf", - "FlushFileBuffers", - "FlushViewOfFile", - "ForkExec", - "ForkLock", - "FormatMessage", - "Fpathconf", - "FreeAddrInfoW", - "FreeEnvironmentStrings", - "FreeLibrary", - "Fsid", - "Fstat", - "Fstatat", - "Fstatfs", - "Fstore_t", - "Fsync", - "Ftruncate", - "FullPath", - "Futimes", - "Futimesat", - "GENERIC_ALL", - "GENERIC_EXECUTE", - "GENERIC_READ", - "GENERIC_WRITE", - "GUID", - "GetAcceptExSockaddrs", - "GetAdaptersInfo", - "GetAddrInfoW", - "GetCommandLine", - "GetComputerName", - "GetConsoleMode", - "GetCurrentDirectory", - "GetCurrentProcess", - "GetEnvironmentStrings", - "GetEnvironmentVariable", - "GetExitCodeProcess", - "GetFileAttributes", - "GetFileAttributesEx", - "GetFileExInfoStandard", - "GetFileExMaxInfoLevel", - "GetFileInformationByHandle", - "GetFileType", - "GetFullPathName", - "GetHostByName", - "GetIfEntry", - "GetLastError", - "GetLengthSid", - "GetLongPathName", - "GetProcAddress", - "GetProcessTimes", - "GetProtoByName", - "GetQueuedCompletionStatus", - "GetServByName", - "GetShortPathName", - "GetStartupInfo", - "GetStdHandle", - "GetSystemTimeAsFileTime", - "GetTempPath", - "GetTimeZoneInformation", - "GetTokenInformation", - "GetUserNameEx", - "GetUserProfileDirectory", - "GetVersion", - "Getcwd", - "Getdents", - "Getdirentries", - "Getdtablesize", - "Getegid", - "Getenv", - "Geteuid", - "Getfsstat", - "Getgid", - "Getgroups", - "Getpagesize", - "Getpeername", - "Getpgid", - "Getpgrp", - "Getpid", - "Getppid", - "Getpriority", - "Getrlimit", - "Getrusage", - "Getsid", - "Getsockname", - "Getsockopt", - "GetsockoptByte", - "GetsockoptICMPv6Filter", - "GetsockoptIPMreq", - "GetsockoptIPMreqn", - "GetsockoptIPv6MTUInfo", - "GetsockoptIPv6Mreq", - "GetsockoptInet4Addr", - "GetsockoptInt", - "GetsockoptUcred", - "Gettid", - "Gettimeofday", - "Getuid", - "Getwd", - "Getxattr", - "HANDLE_FLAG_INHERIT", - "HKEY_CLASSES_ROOT", - "HKEY_CURRENT_CONFIG", - "HKEY_CURRENT_USER", - "HKEY_DYN_DATA", - "HKEY_LOCAL_MACHINE", - "HKEY_PERFORMANCE_DATA", - "HKEY_USERS", - "HUPCL", - "Handle", - "Hostent", - "ICANON", - "ICMP6_FILTER", - "ICMPV6_FILTER", - "ICMPv6Filter", - "ICRNL", - "IEXTEN", - "IFAN_ARRIVAL", - "IFAN_DEPARTURE", - "IFA_ADDRESS", - "IFA_ANYCAST", - "IFA_BROADCAST", - "IFA_CACHEINFO", - "IFA_F_DADFAILED", - "IFA_F_DEPRECATED", - "IFA_F_HOMEADDRESS", - "IFA_F_NODAD", - "IFA_F_OPTIMISTIC", - "IFA_F_PERMANENT", - "IFA_F_SECONDARY", - "IFA_F_TEMPORARY", - "IFA_F_TENTATIVE", - "IFA_LABEL", - "IFA_LOCAL", - "IFA_MAX", - "IFA_MULTICAST", - "IFA_ROUTE", - "IFA_UNSPEC", - "IFF_ALLMULTI", - "IFF_ALTPHYS", - "IFF_AUTOMEDIA", - "IFF_BROADCAST", - "IFF_CANTCHANGE", - "IFF_CANTCONFIG", - "IFF_DEBUG", - "IFF_DRV_OACTIVE", - "IFF_DRV_RUNNING", - "IFF_DYING", - "IFF_DYNAMIC", - "IFF_LINK0", - "IFF_LINK1", - "IFF_LINK2", - "IFF_LOOPBACK", - "IFF_MASTER", - "IFF_MONITOR", - "IFF_MULTICAST", - "IFF_NOARP", - "IFF_NOTRAILERS", - "IFF_NO_PI", - "IFF_OACTIVE", - "IFF_ONE_QUEUE", - "IFF_POINTOPOINT", - "IFF_POINTTOPOINT", - "IFF_PORTSEL", - "IFF_PPROMISC", - "IFF_PROMISC", - "IFF_RENAMING", - "IFF_RUNNING", - "IFF_SIMPLEX", - "IFF_SLAVE", - "IFF_SMART", - "IFF_STATICARP", - "IFF_TAP", - "IFF_TUN", - "IFF_TUN_EXCL", - "IFF_UP", - "IFF_VNET_HDR", - "IFLA_ADDRESS", - "IFLA_BROADCAST", - "IFLA_COST", - "IFLA_IFALIAS", - "IFLA_IFNAME", - "IFLA_LINK", - "IFLA_LINKINFO", - "IFLA_LINKMODE", - "IFLA_MAP", - "IFLA_MASTER", - "IFLA_MAX", - "IFLA_MTU", - "IFLA_NET_NS_PID", - "IFLA_OPERSTATE", - "IFLA_PRIORITY", - "IFLA_PROTINFO", - "IFLA_QDISC", - "IFLA_STATS", - "IFLA_TXQLEN", - "IFLA_UNSPEC", - "IFLA_WEIGHT", - "IFLA_WIRELESS", - "IFNAMSIZ", - "IFT_1822", - "IFT_A12MPPSWITCH", - "IFT_AAL2", - "IFT_AAL5", - "IFT_ADSL", - "IFT_AFLANE8023", - "IFT_AFLANE8025", - "IFT_ARAP", - "IFT_ARCNET", - "IFT_ARCNETPLUS", - "IFT_ASYNC", - "IFT_ATM", - "IFT_ATMDXI", - "IFT_ATMFUNI", - "IFT_ATMIMA", - "IFT_ATMLOGICAL", - "IFT_ATMRADIO", - "IFT_ATMSUBINTERFACE", - "IFT_ATMVCIENDPT", - "IFT_ATMVIRTUAL", - "IFT_BGPPOLICYACCOUNTING", - "IFT_BLUETOOTH", - "IFT_BRIDGE", - "IFT_BSC", - "IFT_CARP", - "IFT_CCTEMUL", - "IFT_CELLULAR", - "IFT_CEPT", - "IFT_CES", - "IFT_CHANNEL", - "IFT_CNR", - "IFT_COFFEE", - "IFT_COMPOSITELINK", - "IFT_DCN", - "IFT_DIGITALPOWERLINE", - "IFT_DIGITALWRAPPEROVERHEADCHANNEL", - "IFT_DLSW", - "IFT_DOCSCABLEDOWNSTREAM", - "IFT_DOCSCABLEMACLAYER", - "IFT_DOCSCABLEUPSTREAM", - "IFT_DOCSCABLEUPSTREAMCHANNEL", - "IFT_DS0", - "IFT_DS0BUNDLE", - "IFT_DS1FDL", - "IFT_DS3", - "IFT_DTM", - "IFT_DUMMY", - "IFT_DVBASILN", - "IFT_DVBASIOUT", - "IFT_DVBRCCDOWNSTREAM", - "IFT_DVBRCCMACLAYER", - "IFT_DVBRCCUPSTREAM", - "IFT_ECONET", - "IFT_ENC", - "IFT_EON", - "IFT_EPLRS", - "IFT_ESCON", - "IFT_ETHER", - "IFT_FAITH", - "IFT_FAST", - "IFT_FASTETHER", - "IFT_FASTETHERFX", - "IFT_FDDI", - "IFT_FIBRECHANNEL", - "IFT_FRAMERELAYINTERCONNECT", - "IFT_FRAMERELAYMPI", - "IFT_FRDLCIENDPT", - "IFT_FRELAY", - "IFT_FRELAYDCE", - "IFT_FRF16MFRBUNDLE", - "IFT_FRFORWARD", - "IFT_G703AT2MB", - "IFT_G703AT64K", - "IFT_GIF", - "IFT_GIGABITETHERNET", - "IFT_GR303IDT", - "IFT_GR303RDT", - "IFT_H323GATEKEEPER", - "IFT_H323PROXY", - "IFT_HDH1822", - "IFT_HDLC", - "IFT_HDSL2", - "IFT_HIPERLAN2", - "IFT_HIPPI", - "IFT_HIPPIINTERFACE", - "IFT_HOSTPAD", - "IFT_HSSI", - "IFT_HY", - "IFT_IBM370PARCHAN", - "IFT_IDSL", - "IFT_IEEE1394", - "IFT_IEEE80211", - "IFT_IEEE80212", - "IFT_IEEE8023ADLAG", - "IFT_IFGSN", - "IFT_IMT", - "IFT_INFINIBAND", - "IFT_INTERLEAVE", - "IFT_IP", - "IFT_IPFORWARD", - "IFT_IPOVERATM", - "IFT_IPOVERCDLC", - "IFT_IPOVERCLAW", - "IFT_IPSWITCH", - "IFT_IPXIP", - "IFT_ISDN", - "IFT_ISDNBASIC", - "IFT_ISDNPRIMARY", - "IFT_ISDNS", - "IFT_ISDNU", - "IFT_ISO88022LLC", - "IFT_ISO88023", - "IFT_ISO88024", - "IFT_ISO88025", - "IFT_ISO88025CRFPINT", - "IFT_ISO88025DTR", - "IFT_ISO88025FIBER", - "IFT_ISO88026", - "IFT_ISUP", - "IFT_L2VLAN", - "IFT_L3IPVLAN", - "IFT_L3IPXVLAN", - "IFT_LAPB", - "IFT_LAPD", - "IFT_LAPF", - "IFT_LINEGROUP", - "IFT_LOCALTALK", - "IFT_LOOP", - "IFT_MEDIAMAILOVERIP", - "IFT_MFSIGLINK", - "IFT_MIOX25", - "IFT_MODEM", - "IFT_MPC", - "IFT_MPLS", - "IFT_MPLSTUNNEL", - "IFT_MSDSL", - "IFT_MVL", - "IFT_MYRINET", - "IFT_NFAS", - "IFT_NSIP", - "IFT_OPTICALCHANNEL", - "IFT_OPTICALTRANSPORT", - "IFT_OTHER", - "IFT_P10", - "IFT_P80", - "IFT_PARA", - "IFT_PDP", - "IFT_PFLOG", - "IFT_PFLOW", - "IFT_PFSYNC", - "IFT_PLC", - "IFT_PON155", - "IFT_PON622", - "IFT_POS", - "IFT_PPP", - "IFT_PPPMULTILINKBUNDLE", - "IFT_PROPATM", - "IFT_PROPBWAP2MP", - "IFT_PROPCNLS", - "IFT_PROPDOCSWIRELESSDOWNSTREAM", - "IFT_PROPDOCSWIRELESSMACLAYER", - "IFT_PROPDOCSWIRELESSUPSTREAM", - "IFT_PROPMUX", - "IFT_PROPVIRTUAL", - "IFT_PROPWIRELESSP2P", - "IFT_PTPSERIAL", - "IFT_PVC", - "IFT_Q2931", - "IFT_QLLC", - "IFT_RADIOMAC", - "IFT_RADSL", - "IFT_REACHDSL", - "IFT_RFC1483", - "IFT_RS232", - "IFT_RSRB", - "IFT_SDLC", - "IFT_SDSL", - "IFT_SHDSL", - "IFT_SIP", - "IFT_SIPSIG", - "IFT_SIPTG", - "IFT_SLIP", - "IFT_SMDSDXI", - "IFT_SMDSICIP", - "IFT_SONET", - "IFT_SONETOVERHEADCHANNEL", - "IFT_SONETPATH", - "IFT_SONETVT", - "IFT_SRP", - "IFT_SS7SIGLINK", - "IFT_STACKTOSTACK", - "IFT_STARLAN", - "IFT_STF", - "IFT_T1", - "IFT_TDLC", - "IFT_TELINK", - "IFT_TERMPAD", - "IFT_TR008", - "IFT_TRANSPHDLC", - "IFT_TUNNEL", - "IFT_ULTRA", - "IFT_USB", - "IFT_V11", - "IFT_V35", - "IFT_V36", - "IFT_V37", - "IFT_VDSL", - "IFT_VIRTUALIPADDRESS", - "IFT_VIRTUALTG", - "IFT_VOICEDID", - "IFT_VOICEEM", - "IFT_VOICEEMFGD", - "IFT_VOICEENCAP", - "IFT_VOICEFGDEANA", - "IFT_VOICEFXO", - "IFT_VOICEFXS", - "IFT_VOICEOVERATM", - "IFT_VOICEOVERCABLE", - "IFT_VOICEOVERFRAMERELAY", - "IFT_VOICEOVERIP", - "IFT_X213", - "IFT_X25", - "IFT_X25DDN", - "IFT_X25HUNTGROUP", - "IFT_X25MLP", - "IFT_X25PLE", - "IFT_XETHER", - "IGNBRK", - "IGNCR", - "IGNORE", - "IGNPAR", - "IMAXBEL", - "INFINITE", - "INLCR", - "INPCK", - "INVALID_FILE_ATTRIBUTES", - "IN_ACCESS", - "IN_ALL_EVENTS", - "IN_ATTRIB", - "IN_CLASSA_HOST", - "IN_CLASSA_MAX", - "IN_CLASSA_NET", - "IN_CLASSA_NSHIFT", - "IN_CLASSB_HOST", - "IN_CLASSB_MAX", - "IN_CLASSB_NET", - "IN_CLASSB_NSHIFT", - "IN_CLASSC_HOST", - "IN_CLASSC_NET", - "IN_CLASSC_NSHIFT", - "IN_CLASSD_HOST", - "IN_CLASSD_NET", - "IN_CLASSD_NSHIFT", - "IN_CLOEXEC", - "IN_CLOSE", - "IN_CLOSE_NOWRITE", - "IN_CLOSE_WRITE", - "IN_CREATE", - "IN_DELETE", - "IN_DELETE_SELF", - "IN_DONT_FOLLOW", - "IN_EXCL_UNLINK", - "IN_IGNORED", - "IN_ISDIR", - "IN_LINKLOCALNETNUM", - "IN_LOOPBACKNET", - "IN_MASK_ADD", - "IN_MODIFY", - "IN_MOVE", - "IN_MOVED_FROM", - "IN_MOVED_TO", - "IN_MOVE_SELF", - "IN_NONBLOCK", - "IN_ONESHOT", - "IN_ONLYDIR", - "IN_OPEN", - "IN_Q_OVERFLOW", - "IN_RFC3021_HOST", - "IN_RFC3021_MASK", - "IN_RFC3021_NET", - "IN_RFC3021_NSHIFT", - "IN_UNMOUNT", - "IOC_IN", - "IOC_INOUT", - "IOC_OUT", - "IOC_VENDOR", - "IOC_WS2", - "IO_REPARSE_TAG_SYMLINK", - "IPMreq", - "IPMreqn", - "IPPROTO_3PC", - "IPPROTO_ADFS", - "IPPROTO_AH", - "IPPROTO_AHIP", - "IPPROTO_APES", - "IPPROTO_ARGUS", - "IPPROTO_AX25", - "IPPROTO_BHA", - "IPPROTO_BLT", - "IPPROTO_BRSATMON", - "IPPROTO_CARP", - "IPPROTO_CFTP", - "IPPROTO_CHAOS", - "IPPROTO_CMTP", - "IPPROTO_COMP", - "IPPROTO_CPHB", - "IPPROTO_CPNX", - "IPPROTO_DCCP", - "IPPROTO_DDP", - "IPPROTO_DGP", - "IPPROTO_DIVERT", - "IPPROTO_DIVERT_INIT", - "IPPROTO_DIVERT_RESP", - "IPPROTO_DONE", - "IPPROTO_DSTOPTS", - "IPPROTO_EGP", - "IPPROTO_EMCON", - "IPPROTO_ENCAP", - "IPPROTO_EON", - "IPPROTO_ESP", - "IPPROTO_ETHERIP", - "IPPROTO_FRAGMENT", - "IPPROTO_GGP", - "IPPROTO_GMTP", - "IPPROTO_GRE", - "IPPROTO_HELLO", - "IPPROTO_HMP", - "IPPROTO_HOPOPTS", - "IPPROTO_ICMP", - "IPPROTO_ICMPV6", - "IPPROTO_IDP", - "IPPROTO_IDPR", - "IPPROTO_IDRP", - "IPPROTO_IGMP", - "IPPROTO_IGP", - "IPPROTO_IGRP", - "IPPROTO_IL", - "IPPROTO_INLSP", - "IPPROTO_INP", - "IPPROTO_IP", - "IPPROTO_IPCOMP", - "IPPROTO_IPCV", - "IPPROTO_IPEIP", - "IPPROTO_IPIP", - "IPPROTO_IPPC", - "IPPROTO_IPV4", - "IPPROTO_IPV6", - "IPPROTO_IPV6_ICMP", - "IPPROTO_IRTP", - "IPPROTO_KRYPTOLAN", - "IPPROTO_LARP", - "IPPROTO_LEAF1", - "IPPROTO_LEAF2", - "IPPROTO_MAX", - "IPPROTO_MAXID", - "IPPROTO_MEAS", - "IPPROTO_MH", - "IPPROTO_MHRP", - "IPPROTO_MICP", - "IPPROTO_MOBILE", - "IPPROTO_MPLS", - "IPPROTO_MTP", - "IPPROTO_MUX", - "IPPROTO_ND", - "IPPROTO_NHRP", - "IPPROTO_NONE", - "IPPROTO_NSP", - "IPPROTO_NVPII", - "IPPROTO_OLD_DIVERT", - "IPPROTO_OSPFIGP", - "IPPROTO_PFSYNC", - "IPPROTO_PGM", - "IPPROTO_PIGP", - "IPPROTO_PIM", - "IPPROTO_PRM", - "IPPROTO_PUP", - "IPPROTO_PVP", - "IPPROTO_RAW", - "IPPROTO_RCCMON", - "IPPROTO_RDP", - "IPPROTO_ROUTING", - "IPPROTO_RSVP", - "IPPROTO_RVD", - "IPPROTO_SATEXPAK", - "IPPROTO_SATMON", - "IPPROTO_SCCSP", - "IPPROTO_SCTP", - "IPPROTO_SDRP", - "IPPROTO_SEND", - "IPPROTO_SEP", - "IPPROTO_SKIP", - "IPPROTO_SPACER", - "IPPROTO_SRPC", - "IPPROTO_ST", - "IPPROTO_SVMTP", - "IPPROTO_SWIPE", - "IPPROTO_TCF", - "IPPROTO_TCP", - "IPPROTO_TLSP", - "IPPROTO_TP", - "IPPROTO_TPXX", - "IPPROTO_TRUNK1", - "IPPROTO_TRUNK2", - "IPPROTO_TTP", - "IPPROTO_UDP", - "IPPROTO_UDPLITE", - "IPPROTO_VINES", - "IPPROTO_VISA", - "IPPROTO_VMTP", - "IPPROTO_VRRP", - "IPPROTO_WBEXPAK", - "IPPROTO_WBMON", - "IPPROTO_WSN", - "IPPROTO_XNET", - "IPPROTO_XTP", - "IPV6_2292DSTOPTS", - "IPV6_2292HOPLIMIT", - "IPV6_2292HOPOPTS", - "IPV6_2292NEXTHOP", - "IPV6_2292PKTINFO", - "IPV6_2292PKTOPTIONS", - "IPV6_2292RTHDR", - "IPV6_ADDRFORM", - "IPV6_ADD_MEMBERSHIP", - "IPV6_AUTHHDR", - "IPV6_AUTH_LEVEL", - "IPV6_AUTOFLOWLABEL", - "IPV6_BINDANY", - "IPV6_BINDV6ONLY", - "IPV6_BOUND_IF", - "IPV6_CHECKSUM", - "IPV6_DEFAULT_MULTICAST_HOPS", - "IPV6_DEFAULT_MULTICAST_LOOP", - "IPV6_DEFHLIM", - "IPV6_DONTFRAG", - "IPV6_DROP_MEMBERSHIP", - "IPV6_DSTOPTS", - "IPV6_ESP_NETWORK_LEVEL", - "IPV6_ESP_TRANS_LEVEL", - "IPV6_FAITH", - "IPV6_FLOWINFO_MASK", - "IPV6_FLOWLABEL_MASK", - "IPV6_FRAGTTL", - "IPV6_FW_ADD", - "IPV6_FW_DEL", - "IPV6_FW_FLUSH", - "IPV6_FW_GET", - "IPV6_FW_ZERO", - "IPV6_HLIMDEC", - "IPV6_HOPLIMIT", - "IPV6_HOPOPTS", - "IPV6_IPCOMP_LEVEL", - "IPV6_IPSEC_POLICY", - "IPV6_JOIN_ANYCAST", - "IPV6_JOIN_GROUP", - "IPV6_LEAVE_ANYCAST", - "IPV6_LEAVE_GROUP", - "IPV6_MAXHLIM", - "IPV6_MAXOPTHDR", - "IPV6_MAXPACKET", - "IPV6_MAX_GROUP_SRC_FILTER", - "IPV6_MAX_MEMBERSHIPS", - "IPV6_MAX_SOCK_SRC_FILTER", - "IPV6_MIN_MEMBERSHIPS", - "IPV6_MMTU", - "IPV6_MSFILTER", - "IPV6_MTU", - "IPV6_MTU_DISCOVER", - "IPV6_MULTICAST_HOPS", - "IPV6_MULTICAST_IF", - "IPV6_MULTICAST_LOOP", - "IPV6_NEXTHOP", - "IPV6_OPTIONS", - "IPV6_PATHMTU", - "IPV6_PIPEX", - "IPV6_PKTINFO", - "IPV6_PMTUDISC_DO", - "IPV6_PMTUDISC_DONT", - "IPV6_PMTUDISC_PROBE", - "IPV6_PMTUDISC_WANT", - "IPV6_PORTRANGE", - "IPV6_PORTRANGE_DEFAULT", - "IPV6_PORTRANGE_HIGH", - "IPV6_PORTRANGE_LOW", - "IPV6_PREFER_TEMPADDR", - "IPV6_RECVDSTOPTS", - "IPV6_RECVDSTPORT", - "IPV6_RECVERR", - "IPV6_RECVHOPLIMIT", - "IPV6_RECVHOPOPTS", - "IPV6_RECVPATHMTU", - "IPV6_RECVPKTINFO", - "IPV6_RECVRTHDR", - "IPV6_RECVTCLASS", - "IPV6_ROUTER_ALERT", - "IPV6_RTABLE", - "IPV6_RTHDR", - "IPV6_RTHDRDSTOPTS", - "IPV6_RTHDR_LOOSE", - "IPV6_RTHDR_STRICT", - "IPV6_RTHDR_TYPE_0", - "IPV6_RXDSTOPTS", - "IPV6_RXHOPOPTS", - "IPV6_SOCKOPT_RESERVED1", - "IPV6_TCLASS", - "IPV6_UNICAST_HOPS", - "IPV6_USE_MIN_MTU", - "IPV6_V6ONLY", - "IPV6_VERSION", - "IPV6_VERSION_MASK", - "IPV6_XFRM_POLICY", - "IP_ADD_MEMBERSHIP", - "IP_ADD_SOURCE_MEMBERSHIP", - "IP_AUTH_LEVEL", - "IP_BINDANY", - "IP_BLOCK_SOURCE", - "IP_BOUND_IF", - "IP_DEFAULT_MULTICAST_LOOP", - "IP_DEFAULT_MULTICAST_TTL", - "IP_DF", - "IP_DIVERTFL", - "IP_DONTFRAG", - "IP_DROP_MEMBERSHIP", - "IP_DROP_SOURCE_MEMBERSHIP", - "IP_DUMMYNET3", - "IP_DUMMYNET_CONFIGURE", - "IP_DUMMYNET_DEL", - "IP_DUMMYNET_FLUSH", - "IP_DUMMYNET_GET", - "IP_EF", - "IP_ERRORMTU", - "IP_ESP_NETWORK_LEVEL", - "IP_ESP_TRANS_LEVEL", - "IP_FAITH", - "IP_FREEBIND", - "IP_FW3", - "IP_FW_ADD", - "IP_FW_DEL", - "IP_FW_FLUSH", - "IP_FW_GET", - "IP_FW_NAT_CFG", - "IP_FW_NAT_DEL", - "IP_FW_NAT_GET_CONFIG", - "IP_FW_NAT_GET_LOG", - "IP_FW_RESETLOG", - "IP_FW_TABLE_ADD", - "IP_FW_TABLE_DEL", - "IP_FW_TABLE_FLUSH", - "IP_FW_TABLE_GETSIZE", - "IP_FW_TABLE_LIST", - "IP_FW_ZERO", - "IP_HDRINCL", - "IP_IPCOMP_LEVEL", - "IP_IPSECFLOWINFO", - "IP_IPSEC_LOCAL_AUTH", - "IP_IPSEC_LOCAL_CRED", - "IP_IPSEC_LOCAL_ID", - "IP_IPSEC_POLICY", - "IP_IPSEC_REMOTE_AUTH", - "IP_IPSEC_REMOTE_CRED", - "IP_IPSEC_REMOTE_ID", - "IP_MAXPACKET", - "IP_MAX_GROUP_SRC_FILTER", - "IP_MAX_MEMBERSHIPS", - "IP_MAX_SOCK_MUTE_FILTER", - "IP_MAX_SOCK_SRC_FILTER", - "IP_MAX_SOURCE_FILTER", - "IP_MF", - "IP_MINFRAGSIZE", - "IP_MINTTL", - "IP_MIN_MEMBERSHIPS", - "IP_MSFILTER", - "IP_MSS", - "IP_MTU", - "IP_MTU_DISCOVER", - "IP_MULTICAST_IF", - "IP_MULTICAST_IFINDEX", - "IP_MULTICAST_LOOP", - "IP_MULTICAST_TTL", - "IP_MULTICAST_VIF", - "IP_NAT__XXX", - "IP_OFFMASK", - "IP_OLD_FW_ADD", - "IP_OLD_FW_DEL", - "IP_OLD_FW_FLUSH", - "IP_OLD_FW_GET", - "IP_OLD_FW_RESETLOG", - "IP_OLD_FW_ZERO", - "IP_ONESBCAST", - "IP_OPTIONS", - "IP_ORIGDSTADDR", - "IP_PASSSEC", - "IP_PIPEX", - "IP_PKTINFO", - "IP_PKTOPTIONS", - "IP_PMTUDISC", - "IP_PMTUDISC_DO", - "IP_PMTUDISC_DONT", - "IP_PMTUDISC_PROBE", - "IP_PMTUDISC_WANT", - "IP_PORTRANGE", - "IP_PORTRANGE_DEFAULT", - "IP_PORTRANGE_HIGH", - "IP_PORTRANGE_LOW", - "IP_RECVDSTADDR", - "IP_RECVDSTPORT", - "IP_RECVERR", - "IP_RECVIF", - "IP_RECVOPTS", - "IP_RECVORIGDSTADDR", - "IP_RECVPKTINFO", - "IP_RECVRETOPTS", - "IP_RECVRTABLE", - "IP_RECVTOS", - "IP_RECVTTL", - "IP_RETOPTS", - "IP_RF", - "IP_ROUTER_ALERT", - "IP_RSVP_OFF", - "IP_RSVP_ON", - "IP_RSVP_VIF_OFF", - "IP_RSVP_VIF_ON", - "IP_RTABLE", - "IP_SENDSRCADDR", - "IP_STRIPHDR", - "IP_TOS", - "IP_TRAFFIC_MGT_BACKGROUND", - "IP_TRANSPARENT", - "IP_TTL", - "IP_UNBLOCK_SOURCE", - "IP_XFRM_POLICY", - "IPv6MTUInfo", - "IPv6Mreq", - "ISIG", - "ISTRIP", - "IUCLC", - "IUTF8", - "IXANY", - "IXOFF", - "IXON", - "IfAddrmsg", - "IfAnnounceMsghdr", - "IfData", - "IfInfomsg", - "IfMsghdr", - "IfaMsghdr", - "IfmaMsghdr", - "IfmaMsghdr2", - "ImplementsGetwd", - "Inet4Pktinfo", - "Inet6Pktinfo", - "InotifyAddWatch", - "InotifyEvent", - "InotifyInit", - "InotifyInit1", - "InotifyRmWatch", - "InterfaceAddrMessage", - "InterfaceAnnounceMessage", - "InterfaceInfo", - "InterfaceMessage", - "InterfaceMulticastAddrMessage", - "InvalidHandle", - "Ioperm", - "Iopl", - "Iovec", - "IpAdapterInfo", - "IpAddrString", - "IpAddressString", - "IpMaskString", - "Issetugid", - "KEY_ALL_ACCESS", - "KEY_CREATE_LINK", - "KEY_CREATE_SUB_KEY", - "KEY_ENUMERATE_SUB_KEYS", - "KEY_EXECUTE", - "KEY_NOTIFY", - "KEY_QUERY_VALUE", - "KEY_READ", - "KEY_SET_VALUE", - "KEY_WOW64_32KEY", - "KEY_WOW64_64KEY", - "KEY_WRITE", - "Kevent", - "Kevent_t", - "Kill", - "Klogctl", - "Kqueue", - "LANG_ENGLISH", - "LAYERED_PROTOCOL", - "LCNT_OVERLOAD_FLUSH", - "LINUX_REBOOT_CMD_CAD_OFF", - "LINUX_REBOOT_CMD_CAD_ON", - "LINUX_REBOOT_CMD_HALT", - "LINUX_REBOOT_CMD_KEXEC", - "LINUX_REBOOT_CMD_POWER_OFF", - "LINUX_REBOOT_CMD_RESTART", - "LINUX_REBOOT_CMD_RESTART2", - "LINUX_REBOOT_CMD_SW_SUSPEND", - "LINUX_REBOOT_MAGIC1", - "LINUX_REBOOT_MAGIC2", - "LOCK_EX", - "LOCK_NB", - "LOCK_SH", - "LOCK_UN", - "LazyDLL", - "LazyProc", - "Lchown", - "Linger", - "Link", - "Listen", - "Listxattr", - "LoadCancelIoEx", - "LoadConnectEx", - "LoadCreateSymbolicLink", - "LoadDLL", - "LoadGetAddrInfo", - "LoadLibrary", - "LoadSetFileCompletionNotificationModes", - "LocalFree", - "Log2phys_t", - "LookupAccountName", - "LookupAccountSid", - "LookupSID", - "LsfJump", - "LsfSocket", - "LsfStmt", - "Lstat", - "MADV_AUTOSYNC", - "MADV_CAN_REUSE", - "MADV_CORE", - "MADV_DOFORK", - "MADV_DONTFORK", - "MADV_DONTNEED", - "MADV_FREE", - "MADV_FREE_REUSABLE", - "MADV_FREE_REUSE", - "MADV_HUGEPAGE", - "MADV_HWPOISON", - "MADV_MERGEABLE", - "MADV_NOCORE", - "MADV_NOHUGEPAGE", - "MADV_NORMAL", - "MADV_NOSYNC", - "MADV_PROTECT", - "MADV_RANDOM", - "MADV_REMOVE", - "MADV_SEQUENTIAL", - "MADV_SPACEAVAIL", - "MADV_UNMERGEABLE", - "MADV_WILLNEED", - "MADV_ZERO_WIRED_PAGES", - "MAP_32BIT", - "MAP_ALIGNED_SUPER", - "MAP_ALIGNMENT_16MB", - "MAP_ALIGNMENT_1TB", - "MAP_ALIGNMENT_256TB", - "MAP_ALIGNMENT_4GB", - "MAP_ALIGNMENT_64KB", - "MAP_ALIGNMENT_64PB", - "MAP_ALIGNMENT_MASK", - "MAP_ALIGNMENT_SHIFT", - "MAP_ANON", - "MAP_ANONYMOUS", - "MAP_COPY", - "MAP_DENYWRITE", - "MAP_EXECUTABLE", - "MAP_FILE", - "MAP_FIXED", - "MAP_FLAGMASK", - "MAP_GROWSDOWN", - "MAP_HASSEMAPHORE", - "MAP_HUGETLB", - "MAP_INHERIT", - "MAP_INHERIT_COPY", - "MAP_INHERIT_DEFAULT", - "MAP_INHERIT_DONATE_COPY", - "MAP_INHERIT_NONE", - "MAP_INHERIT_SHARE", - "MAP_JIT", - "MAP_LOCKED", - "MAP_NOCACHE", - "MAP_NOCORE", - "MAP_NOEXTEND", - "MAP_NONBLOCK", - "MAP_NORESERVE", - "MAP_NOSYNC", - "MAP_POPULATE", - "MAP_PREFAULT_READ", - "MAP_PRIVATE", - "MAP_RENAME", - "MAP_RESERVED0080", - "MAP_RESERVED0100", - "MAP_SHARED", - "MAP_STACK", - "MAP_TRYFIXED", - "MAP_TYPE", - "MAP_WIRED", - "MAXIMUM_REPARSE_DATA_BUFFER_SIZE", - "MAXLEN_IFDESCR", - "MAXLEN_PHYSADDR", - "MAX_ADAPTER_ADDRESS_LENGTH", - "MAX_ADAPTER_DESCRIPTION_LENGTH", - "MAX_ADAPTER_NAME_LENGTH", - "MAX_COMPUTERNAME_LENGTH", - "MAX_INTERFACE_NAME_LEN", - "MAX_LONG_PATH", - "MAX_PATH", - "MAX_PROTOCOL_CHAIN", - "MCL_CURRENT", - "MCL_FUTURE", - "MNT_DETACH", - "MNT_EXPIRE", - "MNT_FORCE", - "MSG_BCAST", - "MSG_CMSG_CLOEXEC", - "MSG_COMPAT", - "MSG_CONFIRM", - "MSG_CONTROLMBUF", - "MSG_CTRUNC", - "MSG_DONTROUTE", - "MSG_DONTWAIT", - "MSG_EOF", - "MSG_EOR", - "MSG_ERRQUEUE", - "MSG_FASTOPEN", - "MSG_FIN", - "MSG_FLUSH", - "MSG_HAVEMORE", - "MSG_HOLD", - "MSG_IOVUSRSPACE", - "MSG_LENUSRSPACE", - "MSG_MCAST", - "MSG_MORE", - "MSG_NAMEMBUF", - "MSG_NBIO", - "MSG_NEEDSA", - "MSG_NOSIGNAL", - "MSG_NOTIFICATION", - "MSG_OOB", - "MSG_PEEK", - "MSG_PROXY", - "MSG_RCVMORE", - "MSG_RST", - "MSG_SEND", - "MSG_SYN", - "MSG_TRUNC", - "MSG_TRYHARD", - "MSG_USERFLAGS", - "MSG_WAITALL", - "MSG_WAITFORONE", - "MSG_WAITSTREAM", - "MS_ACTIVE", - "MS_ASYNC", - "MS_BIND", - "MS_DEACTIVATE", - "MS_DIRSYNC", - "MS_INVALIDATE", - "MS_I_VERSION", - "MS_KERNMOUNT", - "MS_KILLPAGES", - "MS_MANDLOCK", - "MS_MGC_MSK", - "MS_MGC_VAL", - "MS_MOVE", - "MS_NOATIME", - "MS_NODEV", - "MS_NODIRATIME", - "MS_NOEXEC", - "MS_NOSUID", - "MS_NOUSER", - "MS_POSIXACL", - "MS_PRIVATE", - "MS_RDONLY", - "MS_REC", - "MS_RELATIME", - "MS_REMOUNT", - "MS_RMT_MASK", - "MS_SHARED", - "MS_SILENT", - "MS_SLAVE", - "MS_STRICTATIME", - "MS_SYNC", - "MS_SYNCHRONOUS", - "MS_UNBINDABLE", - "Madvise", - "MapViewOfFile", - "MaxTokenInfoClass", - "Mclpool", - "MibIfRow", - "Mkdir", - "Mkdirat", - "Mkfifo", - "Mknod", - "Mknodat", - "Mlock", - "Mlockall", - "Mmap", - "Mount", - "MoveFile", - "Mprotect", - "Msghdr", - "Munlock", - "Munlockall", - "Munmap", - "MustLoadDLL", - "NAME_MAX", - "NETLINK_ADD_MEMBERSHIP", - "NETLINK_AUDIT", - "NETLINK_BROADCAST_ERROR", - "NETLINK_CONNECTOR", - "NETLINK_DNRTMSG", - "NETLINK_DROP_MEMBERSHIP", - "NETLINK_ECRYPTFS", - "NETLINK_FIB_LOOKUP", - "NETLINK_FIREWALL", - "NETLINK_GENERIC", - "NETLINK_INET_DIAG", - "NETLINK_IP6_FW", - "NETLINK_ISCSI", - "NETLINK_KOBJECT_UEVENT", - "NETLINK_NETFILTER", - "NETLINK_NFLOG", - "NETLINK_NO_ENOBUFS", - "NETLINK_PKTINFO", - "NETLINK_RDMA", - "NETLINK_ROUTE", - "NETLINK_SCSITRANSPORT", - "NETLINK_SELINUX", - "NETLINK_UNUSED", - "NETLINK_USERSOCK", - "NETLINK_XFRM", - "NET_RT_DUMP", - "NET_RT_DUMP2", - "NET_RT_FLAGS", - "NET_RT_IFLIST", - "NET_RT_IFLIST2", - "NET_RT_IFLISTL", - "NET_RT_IFMALIST", - "NET_RT_MAXID", - "NET_RT_OIFLIST", - "NET_RT_OOIFLIST", - "NET_RT_STAT", - "NET_RT_STATS", - "NET_RT_TABLE", - "NET_RT_TRASH", - "NLA_ALIGNTO", - "NLA_F_NESTED", - "NLA_F_NET_BYTEORDER", - "NLA_HDRLEN", - "NLMSG_ALIGNTO", - "NLMSG_DONE", - "NLMSG_ERROR", - "NLMSG_HDRLEN", - "NLMSG_MIN_TYPE", - "NLMSG_NOOP", - "NLMSG_OVERRUN", - "NLM_F_ACK", - "NLM_F_APPEND", - "NLM_F_ATOMIC", - "NLM_F_CREATE", - "NLM_F_DUMP", - "NLM_F_ECHO", - "NLM_F_EXCL", - "NLM_F_MATCH", - "NLM_F_MULTI", - "NLM_F_REPLACE", - "NLM_F_REQUEST", - "NLM_F_ROOT", - "NOFLSH", - "NOTE_ABSOLUTE", - "NOTE_ATTRIB", - "NOTE_BACKGROUND", - "NOTE_CHILD", - "NOTE_CRITICAL", - "NOTE_DELETE", - "NOTE_EOF", - "NOTE_EXEC", - "NOTE_EXIT", - "NOTE_EXITSTATUS", - "NOTE_EXIT_CSERROR", - "NOTE_EXIT_DECRYPTFAIL", - "NOTE_EXIT_DETAIL", - "NOTE_EXIT_DETAIL_MASK", - "NOTE_EXIT_MEMORY", - "NOTE_EXIT_REPARENTED", - "NOTE_EXTEND", - "NOTE_FFAND", - "NOTE_FFCOPY", - "NOTE_FFCTRLMASK", - "NOTE_FFLAGSMASK", - "NOTE_FFNOP", - "NOTE_FFOR", - "NOTE_FORK", - "NOTE_LEEWAY", - "NOTE_LINK", - "NOTE_LOWAT", - "NOTE_NONE", - "NOTE_NSECONDS", - "NOTE_PCTRLMASK", - "NOTE_PDATAMASK", - "NOTE_REAP", - "NOTE_RENAME", - "NOTE_RESOURCEEND", - "NOTE_REVOKE", - "NOTE_SECONDS", - "NOTE_SIGNAL", - "NOTE_TRACK", - "NOTE_TRACKERR", - "NOTE_TRIGGER", - "NOTE_TRUNCATE", - "NOTE_USECONDS", - "NOTE_VM_ERROR", - "NOTE_VM_PRESSURE", - "NOTE_VM_PRESSURE_SUDDEN_TERMINATE", - "NOTE_VM_PRESSURE_TERMINATE", - "NOTE_WRITE", - "NameCanonical", - "NameCanonicalEx", - "NameDisplay", - "NameDnsDomain", - "NameFullyQualifiedDN", - "NameSamCompatible", - "NameServicePrincipal", - "NameUniqueId", - "NameUnknown", - "NameUserPrincipal", - "Nanosleep", - "NetApiBufferFree", - "NetGetJoinInformation", - "NetSetupDomainName", - "NetSetupUnjoined", - "NetSetupUnknownStatus", - "NetSetupWorkgroupName", - "NetUserGetInfo", - "NetlinkMessage", - "NetlinkRIB", - "NetlinkRouteAttr", - "NetlinkRouteRequest", - "NewCallback", - "NewCallbackCDecl", - "NewLazyDLL", - "NlAttr", - "NlMsgerr", - "NlMsghdr", - "NsecToFiletime", - "NsecToTimespec", - "NsecToTimeval", - "Ntohs", - "OCRNL", - "OFDEL", - "OFILL", - "OFIOGETBMAP", - "OID_PKIX_KP_SERVER_AUTH", - "OID_SERVER_GATED_CRYPTO", - "OID_SGC_NETSCAPE", - "OLCUC", - "ONLCR", - "ONLRET", - "ONOCR", - "ONOEOT", - "OPEN_ALWAYS", - "OPEN_EXISTING", - "OPOST", - "O_ACCMODE", - "O_ALERT", - "O_ALT_IO", - "O_APPEND", - "O_ASYNC", - "O_CLOEXEC", - "O_CREAT", - "O_DIRECT", - "O_DIRECTORY", - "O_DP_GETRAWENCRYPTED", - "O_DSYNC", - "O_EVTONLY", - "O_EXCL", - "O_EXEC", - "O_EXLOCK", - "O_FSYNC", - "O_LARGEFILE", - "O_NDELAY", - "O_NOATIME", - "O_NOCTTY", - "O_NOFOLLOW", - "O_NONBLOCK", - "O_NOSIGPIPE", - "O_POPUP", - "O_RDONLY", - "O_RDWR", - "O_RSYNC", - "O_SHLOCK", - "O_SYMLINK", - "O_SYNC", - "O_TRUNC", - "O_TTY_INIT", - "O_WRONLY", - "Open", - "OpenCurrentProcessToken", - "OpenProcess", - "OpenProcessToken", - "Openat", - "Overlapped", - "PACKET_ADD_MEMBERSHIP", - "PACKET_BROADCAST", - "PACKET_DROP_MEMBERSHIP", - "PACKET_FASTROUTE", - "PACKET_HOST", - "PACKET_LOOPBACK", - "PACKET_MR_ALLMULTI", - "PACKET_MR_MULTICAST", - "PACKET_MR_PROMISC", - "PACKET_MULTICAST", - "PACKET_OTHERHOST", - "PACKET_OUTGOING", - "PACKET_RECV_OUTPUT", - "PACKET_RX_RING", - "PACKET_STATISTICS", - "PAGE_EXECUTE_READ", - "PAGE_EXECUTE_READWRITE", - "PAGE_EXECUTE_WRITECOPY", - "PAGE_READONLY", - "PAGE_READWRITE", - "PAGE_WRITECOPY", - "PARENB", - "PARMRK", - "PARODD", - "PENDIN", - "PFL_HIDDEN", - "PFL_MATCHES_PROTOCOL_ZERO", - "PFL_MULTIPLE_PROTO_ENTRIES", - "PFL_NETWORKDIRECT_PROVIDER", - "PFL_RECOMMENDED_PROTO_ENTRY", - "PF_FLUSH", - "PKCS_7_ASN_ENCODING", - "PMC5_PIPELINE_FLUSH", - "PRIO_PGRP", - "PRIO_PROCESS", - "PRIO_USER", - "PRI_IOFLUSH", - "PROCESS_QUERY_INFORMATION", - "PROCESS_TERMINATE", - "PROT_EXEC", - "PROT_GROWSDOWN", - "PROT_GROWSUP", - "PROT_NONE", - "PROT_READ", - "PROT_WRITE", - "PROV_DH_SCHANNEL", - "PROV_DSS", - "PROV_DSS_DH", - "PROV_EC_ECDSA_FULL", - "PROV_EC_ECDSA_SIG", - "PROV_EC_ECNRA_FULL", - "PROV_EC_ECNRA_SIG", - "PROV_FORTEZZA", - "PROV_INTEL_SEC", - "PROV_MS_EXCHANGE", - "PROV_REPLACE_OWF", - "PROV_RNG", - "PROV_RSA_AES", - "PROV_RSA_FULL", - "PROV_RSA_SCHANNEL", - "PROV_RSA_SIG", - "PROV_SPYRUS_LYNKS", - "PROV_SSL", - "PR_CAPBSET_DROP", - "PR_CAPBSET_READ", - "PR_CLEAR_SECCOMP_FILTER", - "PR_ENDIAN_BIG", - "PR_ENDIAN_LITTLE", - "PR_ENDIAN_PPC_LITTLE", - "PR_FPEMU_NOPRINT", - "PR_FPEMU_SIGFPE", - "PR_FP_EXC_ASYNC", - "PR_FP_EXC_DISABLED", - "PR_FP_EXC_DIV", - "PR_FP_EXC_INV", - "PR_FP_EXC_NONRECOV", - "PR_FP_EXC_OVF", - "PR_FP_EXC_PRECISE", - "PR_FP_EXC_RES", - "PR_FP_EXC_SW_ENABLE", - "PR_FP_EXC_UND", - "PR_GET_DUMPABLE", - "PR_GET_ENDIAN", - "PR_GET_FPEMU", - "PR_GET_FPEXC", - "PR_GET_KEEPCAPS", - "PR_GET_NAME", - "PR_GET_PDEATHSIG", - "PR_GET_SECCOMP", - "PR_GET_SECCOMP_FILTER", - "PR_GET_SECUREBITS", - "PR_GET_TIMERSLACK", - "PR_GET_TIMING", - "PR_GET_TSC", - "PR_GET_UNALIGN", - "PR_MCE_KILL", - "PR_MCE_KILL_CLEAR", - "PR_MCE_KILL_DEFAULT", - "PR_MCE_KILL_EARLY", - "PR_MCE_KILL_GET", - "PR_MCE_KILL_LATE", - "PR_MCE_KILL_SET", - "PR_SECCOMP_FILTER_EVENT", - "PR_SECCOMP_FILTER_SYSCALL", - "PR_SET_DUMPABLE", - "PR_SET_ENDIAN", - "PR_SET_FPEMU", - "PR_SET_FPEXC", - "PR_SET_KEEPCAPS", - "PR_SET_NAME", - "PR_SET_PDEATHSIG", - "PR_SET_PTRACER", - "PR_SET_SECCOMP", - "PR_SET_SECCOMP_FILTER", - "PR_SET_SECUREBITS", - "PR_SET_TIMERSLACK", - "PR_SET_TIMING", - "PR_SET_TSC", - "PR_SET_UNALIGN", - "PR_TASK_PERF_EVENTS_DISABLE", - "PR_TASK_PERF_EVENTS_ENABLE", - "PR_TIMING_STATISTICAL", - "PR_TIMING_TIMESTAMP", - "PR_TSC_ENABLE", - "PR_TSC_SIGSEGV", - "PR_UNALIGN_NOPRINT", - "PR_UNALIGN_SIGBUS", - "PTRACE_ARCH_PRCTL", - "PTRACE_ATTACH", - "PTRACE_CONT", - "PTRACE_DETACH", - "PTRACE_EVENT_CLONE", - "PTRACE_EVENT_EXEC", - "PTRACE_EVENT_EXIT", - "PTRACE_EVENT_FORK", - "PTRACE_EVENT_VFORK", - "PTRACE_EVENT_VFORK_DONE", - "PTRACE_GETCRUNCHREGS", - "PTRACE_GETEVENTMSG", - "PTRACE_GETFPREGS", - "PTRACE_GETFPXREGS", - "PTRACE_GETHBPREGS", - "PTRACE_GETREGS", - "PTRACE_GETREGSET", - "PTRACE_GETSIGINFO", - "PTRACE_GETVFPREGS", - "PTRACE_GETWMMXREGS", - "PTRACE_GET_THREAD_AREA", - "PTRACE_KILL", - "PTRACE_OLDSETOPTIONS", - "PTRACE_O_MASK", - "PTRACE_O_TRACECLONE", - "PTRACE_O_TRACEEXEC", - "PTRACE_O_TRACEEXIT", - "PTRACE_O_TRACEFORK", - "PTRACE_O_TRACESYSGOOD", - "PTRACE_O_TRACEVFORK", - "PTRACE_O_TRACEVFORKDONE", - "PTRACE_PEEKDATA", - "PTRACE_PEEKTEXT", - "PTRACE_PEEKUSR", - "PTRACE_POKEDATA", - "PTRACE_POKETEXT", - "PTRACE_POKEUSR", - "PTRACE_SETCRUNCHREGS", - "PTRACE_SETFPREGS", - "PTRACE_SETFPXREGS", - "PTRACE_SETHBPREGS", - "PTRACE_SETOPTIONS", - "PTRACE_SETREGS", - "PTRACE_SETREGSET", - "PTRACE_SETSIGINFO", - "PTRACE_SETVFPREGS", - "PTRACE_SETWMMXREGS", - "PTRACE_SET_SYSCALL", - "PTRACE_SET_THREAD_AREA", - "PTRACE_SINGLEBLOCK", - "PTRACE_SINGLESTEP", - "PTRACE_SYSCALL", - "PTRACE_SYSEMU", - "PTRACE_SYSEMU_SINGLESTEP", - "PTRACE_TRACEME", - "PT_ATTACH", - "PT_ATTACHEXC", - "PT_CONTINUE", - "PT_DATA_ADDR", - "PT_DENY_ATTACH", - "PT_DETACH", - "PT_FIRSTMACH", - "PT_FORCEQUOTA", - "PT_KILL", - "PT_MASK", - "PT_READ_D", - "PT_READ_I", - "PT_READ_U", - "PT_SIGEXC", - "PT_STEP", - "PT_TEXT_ADDR", - "PT_TEXT_END_ADDR", - "PT_THUPDATE", - "PT_TRACE_ME", - "PT_WRITE_D", - "PT_WRITE_I", - "PT_WRITE_U", - "ParseDirent", - "ParseNetlinkMessage", - "ParseNetlinkRouteAttr", - "ParseRoutingMessage", - "ParseRoutingSockaddr", - "ParseSocketControlMessage", - "ParseUnixCredentials", - "ParseUnixRights", - "PathMax", - "Pathconf", - "Pause", - "Pipe", - "Pipe2", - "PivotRoot", - "Pointer", - "PostQueuedCompletionStatus", - "Pread", - "Proc", - "ProcAttr", - "Process32First", - "Process32Next", - "ProcessEntry32", - "ProcessInformation", - "Protoent", - "PtraceAttach", - "PtraceCont", - "PtraceDetach", - "PtraceGetEventMsg", - "PtraceGetRegs", - "PtracePeekData", - "PtracePeekText", - "PtracePokeData", - "PtracePokeText", - "PtraceRegs", - "PtraceSetOptions", - "PtraceSetRegs", - "PtraceSingleStep", - "PtraceSyscall", - "Pwrite", - "REG_BINARY", - "REG_DWORD", - "REG_DWORD_BIG_ENDIAN", - "REG_DWORD_LITTLE_ENDIAN", - "REG_EXPAND_SZ", - "REG_FULL_RESOURCE_DESCRIPTOR", - "REG_LINK", - "REG_MULTI_SZ", - "REG_NONE", - "REG_QWORD", - "REG_QWORD_LITTLE_ENDIAN", - "REG_RESOURCE_LIST", - "REG_RESOURCE_REQUIREMENTS_LIST", - "REG_SZ", - "RLIMIT_AS", - "RLIMIT_CORE", - "RLIMIT_CPU", - "RLIMIT_CPU_USAGE_MONITOR", - "RLIMIT_DATA", - "RLIMIT_FSIZE", - "RLIMIT_NOFILE", - "RLIMIT_STACK", - "RLIM_INFINITY", - "RTAX_ADVMSS", - "RTAX_AUTHOR", - "RTAX_BRD", - "RTAX_CWND", - "RTAX_DST", - "RTAX_FEATURES", - "RTAX_FEATURE_ALLFRAG", - "RTAX_FEATURE_ECN", - "RTAX_FEATURE_SACK", - "RTAX_FEATURE_TIMESTAMP", - "RTAX_GATEWAY", - "RTAX_GENMASK", - "RTAX_HOPLIMIT", - "RTAX_IFA", - "RTAX_IFP", - "RTAX_INITCWND", - "RTAX_INITRWND", - "RTAX_LABEL", - "RTAX_LOCK", - "RTAX_MAX", - "RTAX_MTU", - "RTAX_NETMASK", - "RTAX_REORDERING", - "RTAX_RTO_MIN", - "RTAX_RTT", - "RTAX_RTTVAR", - "RTAX_SRC", - "RTAX_SRCMASK", - "RTAX_SSTHRESH", - "RTAX_TAG", - "RTAX_UNSPEC", - "RTAX_WINDOW", - "RTA_ALIGNTO", - "RTA_AUTHOR", - "RTA_BRD", - "RTA_CACHEINFO", - "RTA_DST", - "RTA_FLOW", - "RTA_GATEWAY", - "RTA_GENMASK", - "RTA_IFA", - "RTA_IFP", - "RTA_IIF", - "RTA_LABEL", - "RTA_MAX", - "RTA_METRICS", - "RTA_MULTIPATH", - "RTA_NETMASK", - "RTA_OIF", - "RTA_PREFSRC", - "RTA_PRIORITY", - "RTA_SRC", - "RTA_SRCMASK", - "RTA_TABLE", - "RTA_TAG", - "RTA_UNSPEC", - "RTCF_DIRECTSRC", - "RTCF_DOREDIRECT", - "RTCF_LOG", - "RTCF_MASQ", - "RTCF_NAT", - "RTCF_VALVE", - "RTF_ADDRCLASSMASK", - "RTF_ADDRCONF", - "RTF_ALLONLINK", - "RTF_ANNOUNCE", - "RTF_BLACKHOLE", - "RTF_BROADCAST", - "RTF_CACHE", - "RTF_CLONED", - "RTF_CLONING", - "RTF_CONDEMNED", - "RTF_DEFAULT", - "RTF_DELCLONE", - "RTF_DONE", - "RTF_DYNAMIC", - "RTF_FLOW", - "RTF_FMASK", - "RTF_GATEWAY", - "RTF_GWFLAG_COMPAT", - "RTF_HOST", - "RTF_IFREF", - "RTF_IFSCOPE", - "RTF_INTERFACE", - "RTF_IRTT", - "RTF_LINKRT", - "RTF_LLDATA", - "RTF_LLINFO", - "RTF_LOCAL", - "RTF_MASK", - "RTF_MODIFIED", - "RTF_MPATH", - "RTF_MPLS", - "RTF_MSS", - "RTF_MTU", - "RTF_MULTICAST", - "RTF_NAT", - "RTF_NOFORWARD", - "RTF_NONEXTHOP", - "RTF_NOPMTUDISC", - "RTF_PERMANENT_ARP", - "RTF_PINNED", - "RTF_POLICY", - "RTF_PRCLONING", - "RTF_PROTO1", - "RTF_PROTO2", - "RTF_PROTO3", - "RTF_PROXY", - "RTF_REINSTATE", - "RTF_REJECT", - "RTF_RNH_LOCKED", - "RTF_ROUTER", - "RTF_SOURCE", - "RTF_SRC", - "RTF_STATIC", - "RTF_STICKY", - "RTF_THROW", - "RTF_TUNNEL", - "RTF_UP", - "RTF_USETRAILERS", - "RTF_WASCLONED", - "RTF_WINDOW", - "RTF_XRESOLVE", - "RTM_ADD", - "RTM_BASE", - "RTM_CHANGE", - "RTM_CHGADDR", - "RTM_DELACTION", - "RTM_DELADDR", - "RTM_DELADDRLABEL", - "RTM_DELETE", - "RTM_DELLINK", - "RTM_DELMADDR", - "RTM_DELNEIGH", - "RTM_DELQDISC", - "RTM_DELROUTE", - "RTM_DELRULE", - "RTM_DELTCLASS", - "RTM_DELTFILTER", - "RTM_DESYNC", - "RTM_F_CLONED", - "RTM_F_EQUALIZE", - "RTM_F_NOTIFY", - "RTM_F_PREFIX", - "RTM_GET", - "RTM_GET2", - "RTM_GETACTION", - "RTM_GETADDR", - "RTM_GETADDRLABEL", - "RTM_GETANYCAST", - "RTM_GETDCB", - "RTM_GETLINK", - "RTM_GETMULTICAST", - "RTM_GETNEIGH", - "RTM_GETNEIGHTBL", - "RTM_GETQDISC", - "RTM_GETROUTE", - "RTM_GETRULE", - "RTM_GETTCLASS", - "RTM_GETTFILTER", - "RTM_IEEE80211", - "RTM_IFANNOUNCE", - "RTM_IFINFO", - "RTM_IFINFO2", - "RTM_LLINFO_UPD", - "RTM_LOCK", - "RTM_LOSING", - "RTM_MAX", - "RTM_MAXSIZE", - "RTM_MISS", - "RTM_NEWACTION", - "RTM_NEWADDR", - "RTM_NEWADDRLABEL", - "RTM_NEWLINK", - "RTM_NEWMADDR", - "RTM_NEWMADDR2", - "RTM_NEWNDUSEROPT", - "RTM_NEWNEIGH", - "RTM_NEWNEIGHTBL", - "RTM_NEWPREFIX", - "RTM_NEWQDISC", - "RTM_NEWROUTE", - "RTM_NEWRULE", - "RTM_NEWTCLASS", - "RTM_NEWTFILTER", - "RTM_NR_FAMILIES", - "RTM_NR_MSGTYPES", - "RTM_OIFINFO", - "RTM_OLDADD", - "RTM_OLDDEL", - "RTM_OOIFINFO", - "RTM_REDIRECT", - "RTM_RESOLVE", - "RTM_RTTUNIT", - "RTM_SETDCB", - "RTM_SETGATE", - "RTM_SETLINK", - "RTM_SETNEIGHTBL", - "RTM_VERSION", - "RTNH_ALIGNTO", - "RTNH_F_DEAD", - "RTNH_F_ONLINK", - "RTNH_F_PERVASIVE", - "RTNLGRP_IPV4_IFADDR", - "RTNLGRP_IPV4_MROUTE", - "RTNLGRP_IPV4_ROUTE", - "RTNLGRP_IPV4_RULE", - "RTNLGRP_IPV6_IFADDR", - "RTNLGRP_IPV6_IFINFO", - "RTNLGRP_IPV6_MROUTE", - "RTNLGRP_IPV6_PREFIX", - "RTNLGRP_IPV6_ROUTE", - "RTNLGRP_IPV6_RULE", - "RTNLGRP_LINK", - "RTNLGRP_ND_USEROPT", - "RTNLGRP_NEIGH", - "RTNLGRP_NONE", - "RTNLGRP_NOTIFY", - "RTNLGRP_TC", - "RTN_ANYCAST", - "RTN_BLACKHOLE", - "RTN_BROADCAST", - "RTN_LOCAL", - "RTN_MAX", - "RTN_MULTICAST", - "RTN_NAT", - "RTN_PROHIBIT", - "RTN_THROW", - "RTN_UNICAST", - "RTN_UNREACHABLE", - "RTN_UNSPEC", - "RTN_XRESOLVE", - "RTPROT_BIRD", - "RTPROT_BOOT", - "RTPROT_DHCP", - "RTPROT_DNROUTED", - "RTPROT_GATED", - "RTPROT_KERNEL", - "RTPROT_MRT", - "RTPROT_NTK", - "RTPROT_RA", - "RTPROT_REDIRECT", - "RTPROT_STATIC", - "RTPROT_UNSPEC", - "RTPROT_XORP", - "RTPROT_ZEBRA", - "RTV_EXPIRE", - "RTV_HOPCOUNT", - "RTV_MTU", - "RTV_RPIPE", - "RTV_RTT", - "RTV_RTTVAR", - "RTV_SPIPE", - "RTV_SSTHRESH", - "RTV_WEIGHT", - "RT_CACHING_CONTEXT", - "RT_CLASS_DEFAULT", - "RT_CLASS_LOCAL", - "RT_CLASS_MAIN", - "RT_CLASS_MAX", - "RT_CLASS_UNSPEC", - "RT_DEFAULT_FIB", - "RT_NORTREF", - "RT_SCOPE_HOST", - "RT_SCOPE_LINK", - "RT_SCOPE_NOWHERE", - "RT_SCOPE_SITE", - "RT_SCOPE_UNIVERSE", - "RT_TABLEID_MAX", - "RT_TABLE_COMPAT", - "RT_TABLE_DEFAULT", - "RT_TABLE_LOCAL", - "RT_TABLE_MAIN", - "RT_TABLE_MAX", - "RT_TABLE_UNSPEC", - "RUSAGE_CHILDREN", - "RUSAGE_SELF", - "RUSAGE_THREAD", - "Radvisory_t", - "RawConn", - "RawSockaddr", - "RawSockaddrAny", - "RawSockaddrDatalink", - "RawSockaddrInet4", - "RawSockaddrInet6", - "RawSockaddrLinklayer", - "RawSockaddrNetlink", - "RawSockaddrUnix", - "RawSyscall", - "RawSyscall6", - "Read", - "ReadConsole", - "ReadDirectoryChanges", - "ReadDirent", - "ReadFile", - "Readlink", - "Reboot", - "Recvfrom", - "Recvmsg", - "RegCloseKey", - "RegEnumKeyEx", - "RegOpenKeyEx", - "RegQueryInfoKey", - "RegQueryValueEx", - "RemoveDirectory", - "Removexattr", - "Rename", - "Renameat", - "Revoke", - "Rlimit", - "Rmdir", - "RouteMessage", - "RouteRIB", - "RoutingMessage", - "RtAttr", - "RtGenmsg", - "RtMetrics", - "RtMsg", - "RtMsghdr", - "RtNexthop", - "Rusage", - "SCM_BINTIME", - "SCM_CREDENTIALS", - "SCM_CREDS", - "SCM_RIGHTS", - "SCM_TIMESTAMP", - "SCM_TIMESTAMPING", - "SCM_TIMESTAMPNS", - "SCM_TIMESTAMP_MONOTONIC", - "SHUT_RD", - "SHUT_RDWR", - "SHUT_WR", - "SID", - "SIDAndAttributes", - "SIGABRT", - "SIGALRM", - "SIGBUS", - "SIGCHLD", - "SIGCLD", - "SIGCONT", - "SIGEMT", - "SIGFPE", - "SIGHUP", - "SIGILL", - "SIGINFO", - "SIGINT", - "SIGIO", - "SIGIOT", - "SIGKILL", - "SIGLIBRT", - "SIGLWP", - "SIGPIPE", - "SIGPOLL", - "SIGPROF", - "SIGPWR", - "SIGQUIT", - "SIGSEGV", - "SIGSTKFLT", - "SIGSTOP", - "SIGSYS", - "SIGTERM", - "SIGTHR", - "SIGTRAP", - "SIGTSTP", - "SIGTTIN", - "SIGTTOU", - "SIGUNUSED", - "SIGURG", - "SIGUSR1", - "SIGUSR2", - "SIGVTALRM", - "SIGWINCH", - "SIGXCPU", - "SIGXFSZ", - "SIOCADDDLCI", - "SIOCADDMULTI", - "SIOCADDRT", - "SIOCAIFADDR", - "SIOCAIFGROUP", - "SIOCALIFADDR", - "SIOCARPIPLL", - "SIOCATMARK", - "SIOCAUTOADDR", - "SIOCAUTONETMASK", - "SIOCBRDGADD", - "SIOCBRDGADDS", - "SIOCBRDGARL", - "SIOCBRDGDADDR", - "SIOCBRDGDEL", - "SIOCBRDGDELS", - "SIOCBRDGFLUSH", - "SIOCBRDGFRL", - "SIOCBRDGGCACHE", - "SIOCBRDGGFD", - "SIOCBRDGGHT", - "SIOCBRDGGIFFLGS", - "SIOCBRDGGMA", - "SIOCBRDGGPARAM", - "SIOCBRDGGPRI", - "SIOCBRDGGRL", - "SIOCBRDGGSIFS", - "SIOCBRDGGTO", - "SIOCBRDGIFS", - "SIOCBRDGRTS", - "SIOCBRDGSADDR", - "SIOCBRDGSCACHE", - "SIOCBRDGSFD", - "SIOCBRDGSHT", - "SIOCBRDGSIFCOST", - "SIOCBRDGSIFFLGS", - "SIOCBRDGSIFPRIO", - "SIOCBRDGSMA", - "SIOCBRDGSPRI", - "SIOCBRDGSPROTO", - "SIOCBRDGSTO", - "SIOCBRDGSTXHC", - "SIOCDARP", - "SIOCDELDLCI", - "SIOCDELMULTI", - "SIOCDELRT", - "SIOCDEVPRIVATE", - "SIOCDIFADDR", - "SIOCDIFGROUP", - "SIOCDIFPHYADDR", - "SIOCDLIFADDR", - "SIOCDRARP", - "SIOCGARP", - "SIOCGDRVSPEC", - "SIOCGETKALIVE", - "SIOCGETLABEL", - "SIOCGETPFLOW", - "SIOCGETPFSYNC", - "SIOCGETSGCNT", - "SIOCGETVIFCNT", - "SIOCGETVLAN", - "SIOCGHIWAT", - "SIOCGIFADDR", - "SIOCGIFADDRPREF", - "SIOCGIFALIAS", - "SIOCGIFALTMTU", - "SIOCGIFASYNCMAP", - "SIOCGIFBOND", - "SIOCGIFBR", - "SIOCGIFBRDADDR", - "SIOCGIFCAP", - "SIOCGIFCONF", - "SIOCGIFCOUNT", - "SIOCGIFDATA", - "SIOCGIFDESCR", - "SIOCGIFDEVMTU", - "SIOCGIFDLT", - "SIOCGIFDSTADDR", - "SIOCGIFENCAP", - "SIOCGIFFIB", - "SIOCGIFFLAGS", - "SIOCGIFGATTR", - "SIOCGIFGENERIC", - "SIOCGIFGMEMB", - "SIOCGIFGROUP", - "SIOCGIFHARDMTU", - "SIOCGIFHWADDR", - "SIOCGIFINDEX", - "SIOCGIFKPI", - "SIOCGIFMAC", - "SIOCGIFMAP", - "SIOCGIFMEDIA", - "SIOCGIFMEM", - "SIOCGIFMETRIC", - "SIOCGIFMTU", - "SIOCGIFNAME", - "SIOCGIFNETMASK", - "SIOCGIFPDSTADDR", - "SIOCGIFPFLAGS", - "SIOCGIFPHYS", - "SIOCGIFPRIORITY", - "SIOCGIFPSRCADDR", - "SIOCGIFRDOMAIN", - "SIOCGIFRTLABEL", - "SIOCGIFSLAVE", - "SIOCGIFSTATUS", - "SIOCGIFTIMESLOT", - "SIOCGIFTXQLEN", - "SIOCGIFVLAN", - "SIOCGIFWAKEFLAGS", - "SIOCGIFXFLAGS", - "SIOCGLIFADDR", - "SIOCGLIFPHYADDR", - "SIOCGLIFPHYRTABLE", - "SIOCGLIFPHYTTL", - "SIOCGLINKSTR", - "SIOCGLOWAT", - "SIOCGPGRP", - "SIOCGPRIVATE_0", - "SIOCGPRIVATE_1", - "SIOCGRARP", - "SIOCGSPPPPARAMS", - "SIOCGSTAMP", - "SIOCGSTAMPNS", - "SIOCGVH", - "SIOCGVNETID", - "SIOCIFCREATE", - "SIOCIFCREATE2", - "SIOCIFDESTROY", - "SIOCIFGCLONERS", - "SIOCINITIFADDR", - "SIOCPROTOPRIVATE", - "SIOCRSLVMULTI", - "SIOCRTMSG", - "SIOCSARP", - "SIOCSDRVSPEC", - "SIOCSETKALIVE", - "SIOCSETLABEL", - "SIOCSETPFLOW", - "SIOCSETPFSYNC", - "SIOCSETVLAN", - "SIOCSHIWAT", - "SIOCSIFADDR", - "SIOCSIFADDRPREF", - "SIOCSIFALTMTU", - "SIOCSIFASYNCMAP", - "SIOCSIFBOND", - "SIOCSIFBR", - "SIOCSIFBRDADDR", - "SIOCSIFCAP", - "SIOCSIFDESCR", - "SIOCSIFDSTADDR", - "SIOCSIFENCAP", - "SIOCSIFFIB", - "SIOCSIFFLAGS", - "SIOCSIFGATTR", - "SIOCSIFGENERIC", - "SIOCSIFHWADDR", - "SIOCSIFHWBROADCAST", - "SIOCSIFKPI", - "SIOCSIFLINK", - "SIOCSIFLLADDR", - "SIOCSIFMAC", - "SIOCSIFMAP", - "SIOCSIFMEDIA", - "SIOCSIFMEM", - "SIOCSIFMETRIC", - "SIOCSIFMTU", - "SIOCSIFNAME", - "SIOCSIFNETMASK", - "SIOCSIFPFLAGS", - "SIOCSIFPHYADDR", - "SIOCSIFPHYS", - "SIOCSIFPRIORITY", - "SIOCSIFRDOMAIN", - "SIOCSIFRTLABEL", - "SIOCSIFRVNET", - "SIOCSIFSLAVE", - "SIOCSIFTIMESLOT", - "SIOCSIFTXQLEN", - "SIOCSIFVLAN", - "SIOCSIFVNET", - "SIOCSIFXFLAGS", - "SIOCSLIFPHYADDR", - "SIOCSLIFPHYRTABLE", - "SIOCSLIFPHYTTL", - "SIOCSLINKSTR", - "SIOCSLOWAT", - "SIOCSPGRP", - "SIOCSRARP", - "SIOCSSPPPPARAMS", - "SIOCSVH", - "SIOCSVNETID", - "SIOCZIFDATA", - "SIO_GET_EXTENSION_FUNCTION_POINTER", - "SIO_GET_INTERFACE_LIST", - "SIO_KEEPALIVE_VALS", - "SIO_UDP_CONNRESET", - "SOCK_CLOEXEC", - "SOCK_DCCP", - "SOCK_DGRAM", - "SOCK_FLAGS_MASK", - "SOCK_MAXADDRLEN", - "SOCK_NONBLOCK", - "SOCK_NOSIGPIPE", - "SOCK_PACKET", - "SOCK_RAW", - "SOCK_RDM", - "SOCK_SEQPACKET", - "SOCK_STREAM", - "SOL_AAL", - "SOL_ATM", - "SOL_DECNET", - "SOL_ICMPV6", - "SOL_IP", - "SOL_IPV6", - "SOL_IRDA", - "SOL_PACKET", - "SOL_RAW", - "SOL_SOCKET", - "SOL_TCP", - "SOL_X25", - "SOMAXCONN", - "SO_ACCEPTCONN", - "SO_ACCEPTFILTER", - "SO_ATTACH_FILTER", - "SO_BINDANY", - "SO_BINDTODEVICE", - "SO_BINTIME", - "SO_BROADCAST", - "SO_BSDCOMPAT", - "SO_DEBUG", - "SO_DETACH_FILTER", - "SO_DOMAIN", - "SO_DONTROUTE", - "SO_DONTTRUNC", - "SO_ERROR", - "SO_KEEPALIVE", - "SO_LABEL", - "SO_LINGER", - "SO_LINGER_SEC", - "SO_LISTENINCQLEN", - "SO_LISTENQLEN", - "SO_LISTENQLIMIT", - "SO_MARK", - "SO_NETPROC", - "SO_NKE", - "SO_NOADDRERR", - "SO_NOHEADER", - "SO_NOSIGPIPE", - "SO_NOTIFYCONFLICT", - "SO_NO_CHECK", - "SO_NO_DDP", - "SO_NO_OFFLOAD", - "SO_NP_EXTENSIONS", - "SO_NREAD", - "SO_NUMRCVPKT", - "SO_NWRITE", - "SO_OOBINLINE", - "SO_OVERFLOWED", - "SO_PASSCRED", - "SO_PASSSEC", - "SO_PEERCRED", - "SO_PEERLABEL", - "SO_PEERNAME", - "SO_PEERSEC", - "SO_PRIORITY", - "SO_PROTOCOL", - "SO_PROTOTYPE", - "SO_RANDOMPORT", - "SO_RCVBUF", - "SO_RCVBUFFORCE", - "SO_RCVLOWAT", - "SO_RCVTIMEO", - "SO_RESTRICTIONS", - "SO_RESTRICT_DENYIN", - "SO_RESTRICT_DENYOUT", - "SO_RESTRICT_DENYSET", - "SO_REUSEADDR", - "SO_REUSEPORT", - "SO_REUSESHAREUID", - "SO_RTABLE", - "SO_RXQ_OVFL", - "SO_SECURITY_AUTHENTICATION", - "SO_SECURITY_ENCRYPTION_NETWORK", - "SO_SECURITY_ENCRYPTION_TRANSPORT", - "SO_SETFIB", - "SO_SNDBUF", - "SO_SNDBUFFORCE", - "SO_SNDLOWAT", - "SO_SNDTIMEO", - "SO_SPLICE", - "SO_TIMESTAMP", - "SO_TIMESTAMPING", - "SO_TIMESTAMPNS", - "SO_TIMESTAMP_MONOTONIC", - "SO_TYPE", - "SO_UPCALLCLOSEWAIT", - "SO_UPDATE_ACCEPT_CONTEXT", - "SO_UPDATE_CONNECT_CONTEXT", - "SO_USELOOPBACK", - "SO_USER_COOKIE", - "SO_VENDOR", - "SO_WANTMORE", - "SO_WANTOOBFLAG", - "SSLExtraCertChainPolicyPara", - "STANDARD_RIGHTS_ALL", - "STANDARD_RIGHTS_EXECUTE", - "STANDARD_RIGHTS_READ", - "STANDARD_RIGHTS_REQUIRED", - "STANDARD_RIGHTS_WRITE", - "STARTF_USESHOWWINDOW", - "STARTF_USESTDHANDLES", - "STD_ERROR_HANDLE", - "STD_INPUT_HANDLE", - "STD_OUTPUT_HANDLE", - "SUBLANG_ENGLISH_US", - "SW_FORCEMINIMIZE", - "SW_HIDE", - "SW_MAXIMIZE", - "SW_MINIMIZE", - "SW_NORMAL", - "SW_RESTORE", - "SW_SHOW", - "SW_SHOWDEFAULT", - "SW_SHOWMAXIMIZED", - "SW_SHOWMINIMIZED", - "SW_SHOWMINNOACTIVE", - "SW_SHOWNA", - "SW_SHOWNOACTIVATE", - "SW_SHOWNORMAL", - "SYMBOLIC_LINK_FLAG_DIRECTORY", - "SYNCHRONIZE", - "SYSCTL_VERSION", - "SYSCTL_VERS_0", - "SYSCTL_VERS_1", - "SYSCTL_VERS_MASK", - "SYS_ABORT2", - "SYS_ACCEPT", - "SYS_ACCEPT4", - "SYS_ACCEPT_NOCANCEL", - "SYS_ACCESS", - "SYS_ACCESS_EXTENDED", - "SYS_ACCT", - "SYS_ADD_KEY", - "SYS_ADD_PROFIL", - "SYS_ADJFREQ", - "SYS_ADJTIME", - "SYS_ADJTIMEX", - "SYS_AFS_SYSCALL", - "SYS_AIO_CANCEL", - "SYS_AIO_ERROR", - "SYS_AIO_FSYNC", - "SYS_AIO_MLOCK", - "SYS_AIO_READ", - "SYS_AIO_RETURN", - "SYS_AIO_SUSPEND", - "SYS_AIO_SUSPEND_NOCANCEL", - "SYS_AIO_WAITCOMPLETE", - "SYS_AIO_WRITE", - "SYS_ALARM", - "SYS_ARCH_PRCTL", - "SYS_ARM_FADVISE64_64", - "SYS_ARM_SYNC_FILE_RANGE", - "SYS_ATGETMSG", - "SYS_ATPGETREQ", - "SYS_ATPGETRSP", - "SYS_ATPSNDREQ", - "SYS_ATPSNDRSP", - "SYS_ATPUTMSG", - "SYS_ATSOCKET", - "SYS_AUDIT", - "SYS_AUDITCTL", - "SYS_AUDITON", - "SYS_AUDIT_SESSION_JOIN", - "SYS_AUDIT_SESSION_PORT", - "SYS_AUDIT_SESSION_SELF", - "SYS_BDFLUSH", - "SYS_BIND", - "SYS_BINDAT", - "SYS_BREAK", - "SYS_BRK", - "SYS_BSDTHREAD_CREATE", - "SYS_BSDTHREAD_REGISTER", - "SYS_BSDTHREAD_TERMINATE", - "SYS_CAPGET", - "SYS_CAPSET", - "SYS_CAP_ENTER", - "SYS_CAP_FCNTLS_GET", - "SYS_CAP_FCNTLS_LIMIT", - "SYS_CAP_GETMODE", - "SYS_CAP_GETRIGHTS", - "SYS_CAP_IOCTLS_GET", - "SYS_CAP_IOCTLS_LIMIT", - "SYS_CAP_NEW", - "SYS_CAP_RIGHTS_GET", - "SYS_CAP_RIGHTS_LIMIT", - "SYS_CHDIR", - "SYS_CHFLAGS", - "SYS_CHFLAGSAT", - "SYS_CHMOD", - "SYS_CHMOD_EXTENDED", - "SYS_CHOWN", - "SYS_CHOWN32", - "SYS_CHROOT", - "SYS_CHUD", - "SYS_CLOCK_ADJTIME", - "SYS_CLOCK_GETCPUCLOCKID2", - "SYS_CLOCK_GETRES", - "SYS_CLOCK_GETTIME", - "SYS_CLOCK_NANOSLEEP", - "SYS_CLOCK_SETTIME", - "SYS_CLONE", - "SYS_CLOSE", - "SYS_CLOSEFROM", - "SYS_CLOSE_NOCANCEL", - "SYS_CONNECT", - "SYS_CONNECTAT", - "SYS_CONNECT_NOCANCEL", - "SYS_COPYFILE", - "SYS_CPUSET", - "SYS_CPUSET_GETAFFINITY", - "SYS_CPUSET_GETID", - "SYS_CPUSET_SETAFFINITY", - "SYS_CPUSET_SETID", - "SYS_CREAT", - "SYS_CREATE_MODULE", - "SYS_CSOPS", - "SYS_CSOPS_AUDITTOKEN", - "SYS_DELETE", - "SYS_DELETE_MODULE", - "SYS_DUP", - "SYS_DUP2", - "SYS_DUP3", - "SYS_EACCESS", - "SYS_EPOLL_CREATE", - "SYS_EPOLL_CREATE1", - "SYS_EPOLL_CTL", - "SYS_EPOLL_CTL_OLD", - "SYS_EPOLL_PWAIT", - "SYS_EPOLL_WAIT", - "SYS_EPOLL_WAIT_OLD", - "SYS_EVENTFD", - "SYS_EVENTFD2", - "SYS_EXCHANGEDATA", - "SYS_EXECVE", - "SYS_EXIT", - "SYS_EXIT_GROUP", - "SYS_EXTATTRCTL", - "SYS_EXTATTR_DELETE_FD", - "SYS_EXTATTR_DELETE_FILE", - "SYS_EXTATTR_DELETE_LINK", - "SYS_EXTATTR_GET_FD", - "SYS_EXTATTR_GET_FILE", - "SYS_EXTATTR_GET_LINK", - "SYS_EXTATTR_LIST_FD", - "SYS_EXTATTR_LIST_FILE", - "SYS_EXTATTR_LIST_LINK", - "SYS_EXTATTR_SET_FD", - "SYS_EXTATTR_SET_FILE", - "SYS_EXTATTR_SET_LINK", - "SYS_FACCESSAT", - "SYS_FADVISE64", - "SYS_FADVISE64_64", - "SYS_FALLOCATE", - "SYS_FANOTIFY_INIT", - "SYS_FANOTIFY_MARK", - "SYS_FCHDIR", - "SYS_FCHFLAGS", - "SYS_FCHMOD", - "SYS_FCHMODAT", - "SYS_FCHMOD_EXTENDED", - "SYS_FCHOWN", - "SYS_FCHOWN32", - "SYS_FCHOWNAT", - "SYS_FCHROOT", - "SYS_FCNTL", - "SYS_FCNTL64", - "SYS_FCNTL_NOCANCEL", - "SYS_FDATASYNC", - "SYS_FEXECVE", - "SYS_FFCLOCK_GETCOUNTER", - "SYS_FFCLOCK_GETESTIMATE", - "SYS_FFCLOCK_SETESTIMATE", - "SYS_FFSCTL", - "SYS_FGETATTRLIST", - "SYS_FGETXATTR", - "SYS_FHOPEN", - "SYS_FHSTAT", - "SYS_FHSTATFS", - "SYS_FILEPORT_MAKEFD", - "SYS_FILEPORT_MAKEPORT", - "SYS_FKTRACE", - "SYS_FLISTXATTR", - "SYS_FLOCK", - "SYS_FORK", - "SYS_FPATHCONF", - "SYS_FREEBSD6_FTRUNCATE", - "SYS_FREEBSD6_LSEEK", - "SYS_FREEBSD6_MMAP", - "SYS_FREEBSD6_PREAD", - "SYS_FREEBSD6_PWRITE", - "SYS_FREEBSD6_TRUNCATE", - "SYS_FREMOVEXATTR", - "SYS_FSCTL", - "SYS_FSETATTRLIST", - "SYS_FSETXATTR", - "SYS_FSGETPATH", - "SYS_FSTAT", - "SYS_FSTAT64", - "SYS_FSTAT64_EXTENDED", - "SYS_FSTATAT", - "SYS_FSTATAT64", - "SYS_FSTATFS", - "SYS_FSTATFS64", - "SYS_FSTATV", - "SYS_FSTATVFS1", - "SYS_FSTAT_EXTENDED", - "SYS_FSYNC", - "SYS_FSYNC_NOCANCEL", - "SYS_FSYNC_RANGE", - "SYS_FTIME", - "SYS_FTRUNCATE", - "SYS_FTRUNCATE64", - "SYS_FUTEX", - "SYS_FUTIMENS", - "SYS_FUTIMES", - "SYS_FUTIMESAT", - "SYS_GETATTRLIST", - "SYS_GETAUDIT", - "SYS_GETAUDIT_ADDR", - "SYS_GETAUID", - "SYS_GETCONTEXT", - "SYS_GETCPU", - "SYS_GETCWD", - "SYS_GETDENTS", - "SYS_GETDENTS64", - "SYS_GETDIRENTRIES", - "SYS_GETDIRENTRIES64", - "SYS_GETDIRENTRIESATTR", - "SYS_GETDTABLECOUNT", - "SYS_GETDTABLESIZE", - "SYS_GETEGID", - "SYS_GETEGID32", - "SYS_GETEUID", - "SYS_GETEUID32", - "SYS_GETFH", - "SYS_GETFSSTAT", - "SYS_GETFSSTAT64", - "SYS_GETGID", - "SYS_GETGID32", - "SYS_GETGROUPS", - "SYS_GETGROUPS32", - "SYS_GETHOSTUUID", - "SYS_GETITIMER", - "SYS_GETLCID", - "SYS_GETLOGIN", - "SYS_GETLOGINCLASS", - "SYS_GETPEERNAME", - "SYS_GETPGID", - "SYS_GETPGRP", - "SYS_GETPID", - "SYS_GETPMSG", - "SYS_GETPPID", - "SYS_GETPRIORITY", - "SYS_GETRESGID", - "SYS_GETRESGID32", - "SYS_GETRESUID", - "SYS_GETRESUID32", - "SYS_GETRLIMIT", - "SYS_GETRTABLE", - "SYS_GETRUSAGE", - "SYS_GETSGROUPS", - "SYS_GETSID", - "SYS_GETSOCKNAME", - "SYS_GETSOCKOPT", - "SYS_GETTHRID", - "SYS_GETTID", - "SYS_GETTIMEOFDAY", - "SYS_GETUID", - "SYS_GETUID32", - "SYS_GETVFSSTAT", - "SYS_GETWGROUPS", - "SYS_GETXATTR", - "SYS_GET_KERNEL_SYMS", - "SYS_GET_MEMPOLICY", - "SYS_GET_ROBUST_LIST", - "SYS_GET_THREAD_AREA", - "SYS_GSSD_SYSCALL", - "SYS_GTTY", - "SYS_IDENTITYSVC", - "SYS_IDLE", - "SYS_INITGROUPS", - "SYS_INIT_MODULE", - "SYS_INOTIFY_ADD_WATCH", - "SYS_INOTIFY_INIT", - "SYS_INOTIFY_INIT1", - "SYS_INOTIFY_RM_WATCH", - "SYS_IOCTL", - "SYS_IOPERM", - "SYS_IOPL", - "SYS_IOPOLICYSYS", - "SYS_IOPRIO_GET", - "SYS_IOPRIO_SET", - "SYS_IO_CANCEL", - "SYS_IO_DESTROY", - "SYS_IO_GETEVENTS", - "SYS_IO_SETUP", - "SYS_IO_SUBMIT", - "SYS_IPC", - "SYS_ISSETUGID", - "SYS_JAIL", - "SYS_JAIL_ATTACH", - "SYS_JAIL_GET", - "SYS_JAIL_REMOVE", - "SYS_JAIL_SET", - "SYS_KAS_INFO", - "SYS_KDEBUG_TRACE", - "SYS_KENV", - "SYS_KEVENT", - "SYS_KEVENT64", - "SYS_KEXEC_LOAD", - "SYS_KEYCTL", - "SYS_KILL", - "SYS_KLDFIND", - "SYS_KLDFIRSTMOD", - "SYS_KLDLOAD", - "SYS_KLDNEXT", - "SYS_KLDSTAT", - "SYS_KLDSYM", - "SYS_KLDUNLOAD", - "SYS_KLDUNLOADF", - "SYS_KMQ_NOTIFY", - "SYS_KMQ_OPEN", - "SYS_KMQ_SETATTR", - "SYS_KMQ_TIMEDRECEIVE", - "SYS_KMQ_TIMEDSEND", - "SYS_KMQ_UNLINK", - "SYS_KQUEUE", - "SYS_KQUEUE1", - "SYS_KSEM_CLOSE", - "SYS_KSEM_DESTROY", - "SYS_KSEM_GETVALUE", - "SYS_KSEM_INIT", - "SYS_KSEM_OPEN", - "SYS_KSEM_POST", - "SYS_KSEM_TIMEDWAIT", - "SYS_KSEM_TRYWAIT", - "SYS_KSEM_UNLINK", - "SYS_KSEM_WAIT", - "SYS_KTIMER_CREATE", - "SYS_KTIMER_DELETE", - "SYS_KTIMER_GETOVERRUN", - "SYS_KTIMER_GETTIME", - "SYS_KTIMER_SETTIME", - "SYS_KTRACE", - "SYS_LCHFLAGS", - "SYS_LCHMOD", - "SYS_LCHOWN", - "SYS_LCHOWN32", - "SYS_LEDGER", - "SYS_LGETFH", - "SYS_LGETXATTR", - "SYS_LINK", - "SYS_LINKAT", - "SYS_LIO_LISTIO", - "SYS_LISTEN", - "SYS_LISTXATTR", - "SYS_LLISTXATTR", - "SYS_LOCK", - "SYS_LOOKUP_DCOOKIE", - "SYS_LPATHCONF", - "SYS_LREMOVEXATTR", - "SYS_LSEEK", - "SYS_LSETXATTR", - "SYS_LSTAT", - "SYS_LSTAT64", - "SYS_LSTAT64_EXTENDED", - "SYS_LSTATV", - "SYS_LSTAT_EXTENDED", - "SYS_LUTIMES", - "SYS_MAC_SYSCALL", - "SYS_MADVISE", - "SYS_MADVISE1", - "SYS_MAXSYSCALL", - "SYS_MBIND", - "SYS_MIGRATE_PAGES", - "SYS_MINCORE", - "SYS_MINHERIT", - "SYS_MKCOMPLEX", - "SYS_MKDIR", - "SYS_MKDIRAT", - "SYS_MKDIR_EXTENDED", - "SYS_MKFIFO", - "SYS_MKFIFOAT", - "SYS_MKFIFO_EXTENDED", - "SYS_MKNOD", - "SYS_MKNODAT", - "SYS_MLOCK", - "SYS_MLOCKALL", - "SYS_MMAP", - "SYS_MMAP2", - "SYS_MODCTL", - "SYS_MODFIND", - "SYS_MODFNEXT", - "SYS_MODIFY_LDT", - "SYS_MODNEXT", - "SYS_MODSTAT", - "SYS_MODWATCH", - "SYS_MOUNT", - "SYS_MOVE_PAGES", - "SYS_MPROTECT", - "SYS_MPX", - "SYS_MQUERY", - "SYS_MQ_GETSETATTR", - "SYS_MQ_NOTIFY", - "SYS_MQ_OPEN", - "SYS_MQ_TIMEDRECEIVE", - "SYS_MQ_TIMEDSEND", - "SYS_MQ_UNLINK", - "SYS_MREMAP", - "SYS_MSGCTL", - "SYS_MSGGET", - "SYS_MSGRCV", - "SYS_MSGRCV_NOCANCEL", - "SYS_MSGSND", - "SYS_MSGSND_NOCANCEL", - "SYS_MSGSYS", - "SYS_MSYNC", - "SYS_MSYNC_NOCANCEL", - "SYS_MUNLOCK", - "SYS_MUNLOCKALL", - "SYS_MUNMAP", - "SYS_NAME_TO_HANDLE_AT", - "SYS_NANOSLEEP", - "SYS_NEWFSTATAT", - "SYS_NFSCLNT", - "SYS_NFSSERVCTL", - "SYS_NFSSVC", - "SYS_NFSTAT", - "SYS_NICE", - "SYS_NLM_SYSCALL", - "SYS_NLSTAT", - "SYS_NMOUNT", - "SYS_NSTAT", - "SYS_NTP_ADJTIME", - "SYS_NTP_GETTIME", - "SYS_NUMA_GETAFFINITY", - "SYS_NUMA_SETAFFINITY", - "SYS_OABI_SYSCALL_BASE", - "SYS_OBREAK", - "SYS_OLDFSTAT", - "SYS_OLDLSTAT", - "SYS_OLDOLDUNAME", - "SYS_OLDSTAT", - "SYS_OLDUNAME", - "SYS_OPEN", - "SYS_OPENAT", - "SYS_OPENBSD_POLL", - "SYS_OPEN_BY_HANDLE_AT", - "SYS_OPEN_DPROTECTED_NP", - "SYS_OPEN_EXTENDED", - "SYS_OPEN_NOCANCEL", - "SYS_OVADVISE", - "SYS_PACCEPT", - "SYS_PATHCONF", - "SYS_PAUSE", - "SYS_PCICONFIG_IOBASE", - "SYS_PCICONFIG_READ", - "SYS_PCICONFIG_WRITE", - "SYS_PDFORK", - "SYS_PDGETPID", - "SYS_PDKILL", - "SYS_PERF_EVENT_OPEN", - "SYS_PERSONALITY", - "SYS_PID_HIBERNATE", - "SYS_PID_RESUME", - "SYS_PID_SHUTDOWN_SOCKETS", - "SYS_PID_SUSPEND", - "SYS_PIPE", - "SYS_PIPE2", - "SYS_PIVOT_ROOT", - "SYS_PMC_CONTROL", - "SYS_PMC_GET_INFO", - "SYS_POLL", - "SYS_POLLTS", - "SYS_POLL_NOCANCEL", - "SYS_POSIX_FADVISE", - "SYS_POSIX_FALLOCATE", - "SYS_POSIX_OPENPT", - "SYS_POSIX_SPAWN", - "SYS_PPOLL", - "SYS_PRCTL", - "SYS_PREAD", - "SYS_PREAD64", - "SYS_PREADV", - "SYS_PREAD_NOCANCEL", - "SYS_PRLIMIT64", - "SYS_PROCCTL", - "SYS_PROCESS_POLICY", - "SYS_PROCESS_VM_READV", - "SYS_PROCESS_VM_WRITEV", - "SYS_PROC_INFO", - "SYS_PROF", - "SYS_PROFIL", - "SYS_PSELECT", - "SYS_PSELECT6", - "SYS_PSET_ASSIGN", - "SYS_PSET_CREATE", - "SYS_PSET_DESTROY", - "SYS_PSYNCH_CVBROAD", - "SYS_PSYNCH_CVCLRPREPOST", - "SYS_PSYNCH_CVSIGNAL", - "SYS_PSYNCH_CVWAIT", - "SYS_PSYNCH_MUTEXDROP", - "SYS_PSYNCH_MUTEXWAIT", - "SYS_PSYNCH_RW_DOWNGRADE", - "SYS_PSYNCH_RW_LONGRDLOCK", - "SYS_PSYNCH_RW_RDLOCK", - "SYS_PSYNCH_RW_UNLOCK", - "SYS_PSYNCH_RW_UNLOCK2", - "SYS_PSYNCH_RW_UPGRADE", - "SYS_PSYNCH_RW_WRLOCK", - "SYS_PSYNCH_RW_YIELDWRLOCK", - "SYS_PTRACE", - "SYS_PUTPMSG", - "SYS_PWRITE", - "SYS_PWRITE64", - "SYS_PWRITEV", - "SYS_PWRITE_NOCANCEL", - "SYS_QUERY_MODULE", - "SYS_QUOTACTL", - "SYS_RASCTL", - "SYS_RCTL_ADD_RULE", - "SYS_RCTL_GET_LIMITS", - "SYS_RCTL_GET_RACCT", - "SYS_RCTL_GET_RULES", - "SYS_RCTL_REMOVE_RULE", - "SYS_READ", - "SYS_READAHEAD", - "SYS_READDIR", - "SYS_READLINK", - "SYS_READLINKAT", - "SYS_READV", - "SYS_READV_NOCANCEL", - "SYS_READ_NOCANCEL", - "SYS_REBOOT", - "SYS_RECV", - "SYS_RECVFROM", - "SYS_RECVFROM_NOCANCEL", - "SYS_RECVMMSG", - "SYS_RECVMSG", - "SYS_RECVMSG_NOCANCEL", - "SYS_REMAP_FILE_PAGES", - "SYS_REMOVEXATTR", - "SYS_RENAME", - "SYS_RENAMEAT", - "SYS_REQUEST_KEY", - "SYS_RESTART_SYSCALL", - "SYS_REVOKE", - "SYS_RFORK", - "SYS_RMDIR", - "SYS_RTPRIO", - "SYS_RTPRIO_THREAD", - "SYS_RT_SIGACTION", - "SYS_RT_SIGPENDING", - "SYS_RT_SIGPROCMASK", - "SYS_RT_SIGQUEUEINFO", - "SYS_RT_SIGRETURN", - "SYS_RT_SIGSUSPEND", - "SYS_RT_SIGTIMEDWAIT", - "SYS_RT_TGSIGQUEUEINFO", - "SYS_SBRK", - "SYS_SCHED_GETAFFINITY", - "SYS_SCHED_GETPARAM", - "SYS_SCHED_GETSCHEDULER", - "SYS_SCHED_GET_PRIORITY_MAX", - "SYS_SCHED_GET_PRIORITY_MIN", - "SYS_SCHED_RR_GET_INTERVAL", - "SYS_SCHED_SETAFFINITY", - "SYS_SCHED_SETPARAM", - "SYS_SCHED_SETSCHEDULER", - "SYS_SCHED_YIELD", - "SYS_SCTP_GENERIC_RECVMSG", - "SYS_SCTP_GENERIC_SENDMSG", - "SYS_SCTP_GENERIC_SENDMSG_IOV", - "SYS_SCTP_PEELOFF", - "SYS_SEARCHFS", - "SYS_SECURITY", - "SYS_SELECT", - "SYS_SELECT_NOCANCEL", - "SYS_SEMCONFIG", - "SYS_SEMCTL", - "SYS_SEMGET", - "SYS_SEMOP", - "SYS_SEMSYS", - "SYS_SEMTIMEDOP", - "SYS_SEM_CLOSE", - "SYS_SEM_DESTROY", - "SYS_SEM_GETVALUE", - "SYS_SEM_INIT", - "SYS_SEM_OPEN", - "SYS_SEM_POST", - "SYS_SEM_TRYWAIT", - "SYS_SEM_UNLINK", - "SYS_SEM_WAIT", - "SYS_SEM_WAIT_NOCANCEL", - "SYS_SEND", - "SYS_SENDFILE", - "SYS_SENDFILE64", - "SYS_SENDMMSG", - "SYS_SENDMSG", - "SYS_SENDMSG_NOCANCEL", - "SYS_SENDTO", - "SYS_SENDTO_NOCANCEL", - "SYS_SETATTRLIST", - "SYS_SETAUDIT", - "SYS_SETAUDIT_ADDR", - "SYS_SETAUID", - "SYS_SETCONTEXT", - "SYS_SETDOMAINNAME", - "SYS_SETEGID", - "SYS_SETEUID", - "SYS_SETFIB", - "SYS_SETFSGID", - "SYS_SETFSGID32", - "SYS_SETFSUID", - "SYS_SETFSUID32", - "SYS_SETGID", - "SYS_SETGID32", - "SYS_SETGROUPS", - "SYS_SETGROUPS32", - "SYS_SETHOSTNAME", - "SYS_SETITIMER", - "SYS_SETLCID", - "SYS_SETLOGIN", - "SYS_SETLOGINCLASS", - "SYS_SETNS", - "SYS_SETPGID", - "SYS_SETPRIORITY", - "SYS_SETPRIVEXEC", - "SYS_SETREGID", - "SYS_SETREGID32", - "SYS_SETRESGID", - "SYS_SETRESGID32", - "SYS_SETRESUID", - "SYS_SETRESUID32", - "SYS_SETREUID", - "SYS_SETREUID32", - "SYS_SETRLIMIT", - "SYS_SETRTABLE", - "SYS_SETSGROUPS", - "SYS_SETSID", - "SYS_SETSOCKOPT", - "SYS_SETTID", - "SYS_SETTID_WITH_PID", - "SYS_SETTIMEOFDAY", - "SYS_SETUID", - "SYS_SETUID32", - "SYS_SETWGROUPS", - "SYS_SETXATTR", - "SYS_SET_MEMPOLICY", - "SYS_SET_ROBUST_LIST", - "SYS_SET_THREAD_AREA", - "SYS_SET_TID_ADDRESS", - "SYS_SGETMASK", - "SYS_SHARED_REGION_CHECK_NP", - "SYS_SHARED_REGION_MAP_AND_SLIDE_NP", - "SYS_SHMAT", - "SYS_SHMCTL", - "SYS_SHMDT", - "SYS_SHMGET", - "SYS_SHMSYS", - "SYS_SHM_OPEN", - "SYS_SHM_UNLINK", - "SYS_SHUTDOWN", - "SYS_SIGACTION", - "SYS_SIGALTSTACK", - "SYS_SIGNAL", - "SYS_SIGNALFD", - "SYS_SIGNALFD4", - "SYS_SIGPENDING", - "SYS_SIGPROCMASK", - "SYS_SIGQUEUE", - "SYS_SIGQUEUEINFO", - "SYS_SIGRETURN", - "SYS_SIGSUSPEND", - "SYS_SIGSUSPEND_NOCANCEL", - "SYS_SIGTIMEDWAIT", - "SYS_SIGWAIT", - "SYS_SIGWAITINFO", - "SYS_SOCKET", - "SYS_SOCKETCALL", - "SYS_SOCKETPAIR", - "SYS_SPLICE", - "SYS_SSETMASK", - "SYS_SSTK", - "SYS_STACK_SNAPSHOT", - "SYS_STAT", - "SYS_STAT64", - "SYS_STAT64_EXTENDED", - "SYS_STATFS", - "SYS_STATFS64", - "SYS_STATV", - "SYS_STATVFS1", - "SYS_STAT_EXTENDED", - "SYS_STIME", - "SYS_STTY", - "SYS_SWAPCONTEXT", - "SYS_SWAPCTL", - "SYS_SWAPOFF", - "SYS_SWAPON", - "SYS_SYMLINK", - "SYS_SYMLINKAT", - "SYS_SYNC", - "SYS_SYNCFS", - "SYS_SYNC_FILE_RANGE", - "SYS_SYSARCH", - "SYS_SYSCALL", - "SYS_SYSCALL_BASE", - "SYS_SYSFS", - "SYS_SYSINFO", - "SYS_SYSLOG", - "SYS_TEE", - "SYS_TGKILL", - "SYS_THREAD_SELFID", - "SYS_THR_CREATE", - "SYS_THR_EXIT", - "SYS_THR_KILL", - "SYS_THR_KILL2", - "SYS_THR_NEW", - "SYS_THR_SELF", - "SYS_THR_SET_NAME", - "SYS_THR_SUSPEND", - "SYS_THR_WAKE", - "SYS_TIME", - "SYS_TIMERFD_CREATE", - "SYS_TIMERFD_GETTIME", - "SYS_TIMERFD_SETTIME", - "SYS_TIMER_CREATE", - "SYS_TIMER_DELETE", - "SYS_TIMER_GETOVERRUN", - "SYS_TIMER_GETTIME", - "SYS_TIMER_SETTIME", - "SYS_TIMES", - "SYS_TKILL", - "SYS_TRUNCATE", - "SYS_TRUNCATE64", - "SYS_TUXCALL", - "SYS_UGETRLIMIT", - "SYS_ULIMIT", - "SYS_UMASK", - "SYS_UMASK_EXTENDED", - "SYS_UMOUNT", - "SYS_UMOUNT2", - "SYS_UNAME", - "SYS_UNDELETE", - "SYS_UNLINK", - "SYS_UNLINKAT", - "SYS_UNMOUNT", - "SYS_UNSHARE", - "SYS_USELIB", - "SYS_USTAT", - "SYS_UTIME", - "SYS_UTIMENSAT", - "SYS_UTIMES", - "SYS_UTRACE", - "SYS_UUIDGEN", - "SYS_VADVISE", - "SYS_VFORK", - "SYS_VHANGUP", - "SYS_VM86", - "SYS_VM86OLD", - "SYS_VMSPLICE", - "SYS_VM_PRESSURE_MONITOR", - "SYS_VSERVER", - "SYS_WAIT4", - "SYS_WAIT4_NOCANCEL", - "SYS_WAIT6", - "SYS_WAITEVENT", - "SYS_WAITID", - "SYS_WAITID_NOCANCEL", - "SYS_WAITPID", - "SYS_WATCHEVENT", - "SYS_WORKQ_KERNRETURN", - "SYS_WORKQ_OPEN", - "SYS_WRITE", - "SYS_WRITEV", - "SYS_WRITEV_NOCANCEL", - "SYS_WRITE_NOCANCEL", - "SYS_YIELD", - "SYS__LLSEEK", - "SYS__LWP_CONTINUE", - "SYS__LWP_CREATE", - "SYS__LWP_CTL", - "SYS__LWP_DETACH", - "SYS__LWP_EXIT", - "SYS__LWP_GETNAME", - "SYS__LWP_GETPRIVATE", - "SYS__LWP_KILL", - "SYS__LWP_PARK", - "SYS__LWP_SELF", - "SYS__LWP_SETNAME", - "SYS__LWP_SETPRIVATE", - "SYS__LWP_SUSPEND", - "SYS__LWP_UNPARK", - "SYS__LWP_UNPARK_ALL", - "SYS__LWP_WAIT", - "SYS__LWP_WAKEUP", - "SYS__NEWSELECT", - "SYS__PSET_BIND", - "SYS__SCHED_GETAFFINITY", - "SYS__SCHED_GETPARAM", - "SYS__SCHED_SETAFFINITY", - "SYS__SCHED_SETPARAM", - "SYS__SYSCTL", - "SYS__UMTX_LOCK", - "SYS__UMTX_OP", - "SYS__UMTX_UNLOCK", - "SYS___ACL_ACLCHECK_FD", - "SYS___ACL_ACLCHECK_FILE", - "SYS___ACL_ACLCHECK_LINK", - "SYS___ACL_DELETE_FD", - "SYS___ACL_DELETE_FILE", - "SYS___ACL_DELETE_LINK", - "SYS___ACL_GET_FD", - "SYS___ACL_GET_FILE", - "SYS___ACL_GET_LINK", - "SYS___ACL_SET_FD", - "SYS___ACL_SET_FILE", - "SYS___ACL_SET_LINK", - "SYS___CAP_RIGHTS_GET", - "SYS___CLONE", - "SYS___DISABLE_THREADSIGNAL", - "SYS___GETCWD", - "SYS___GETLOGIN", - "SYS___GET_TCB", - "SYS___MAC_EXECVE", - "SYS___MAC_GETFSSTAT", - "SYS___MAC_GET_FD", - "SYS___MAC_GET_FILE", - "SYS___MAC_GET_LCID", - "SYS___MAC_GET_LCTX", - "SYS___MAC_GET_LINK", - "SYS___MAC_GET_MOUNT", - "SYS___MAC_GET_PID", - "SYS___MAC_GET_PROC", - "SYS___MAC_MOUNT", - "SYS___MAC_SET_FD", - "SYS___MAC_SET_FILE", - "SYS___MAC_SET_LCTX", - "SYS___MAC_SET_LINK", - "SYS___MAC_SET_PROC", - "SYS___MAC_SYSCALL", - "SYS___OLD_SEMWAIT_SIGNAL", - "SYS___OLD_SEMWAIT_SIGNAL_NOCANCEL", - "SYS___POSIX_CHOWN", - "SYS___POSIX_FCHOWN", - "SYS___POSIX_LCHOWN", - "SYS___POSIX_RENAME", - "SYS___PTHREAD_CANCELED", - "SYS___PTHREAD_CHDIR", - "SYS___PTHREAD_FCHDIR", - "SYS___PTHREAD_KILL", - "SYS___PTHREAD_MARKCANCEL", - "SYS___PTHREAD_SIGMASK", - "SYS___QUOTACTL", - "SYS___SEMCTL", - "SYS___SEMWAIT_SIGNAL", - "SYS___SEMWAIT_SIGNAL_NOCANCEL", - "SYS___SETLOGIN", - "SYS___SETUGID", - "SYS___SET_TCB", - "SYS___SIGACTION_SIGTRAMP", - "SYS___SIGTIMEDWAIT", - "SYS___SIGWAIT", - "SYS___SIGWAIT_NOCANCEL", - "SYS___SYSCTL", - "SYS___TFORK", - "SYS___THREXIT", - "SYS___THRSIGDIVERT", - "SYS___THRSLEEP", - "SYS___THRWAKEUP", - "S_ARCH1", - "S_ARCH2", - "S_BLKSIZE", - "S_IEXEC", - "S_IFBLK", - "S_IFCHR", - "S_IFDIR", - "S_IFIFO", - "S_IFLNK", - "S_IFMT", - "S_IFREG", - "S_IFSOCK", - "S_IFWHT", - "S_IREAD", - "S_IRGRP", - "S_IROTH", - "S_IRUSR", - "S_IRWXG", - "S_IRWXO", - "S_IRWXU", - "S_ISGID", - "S_ISTXT", - "S_ISUID", - "S_ISVTX", - "S_IWGRP", - "S_IWOTH", - "S_IWRITE", - "S_IWUSR", - "S_IXGRP", - "S_IXOTH", - "S_IXUSR", - "S_LOGIN_SET", - "SecurityAttributes", - "Seek", - "Select", - "Sendfile", - "Sendmsg", - "SendmsgN", - "Sendto", - "Servent", - "SetBpf", - "SetBpfBuflen", - "SetBpfDatalink", - "SetBpfHeadercmpl", - "SetBpfImmediate", - "SetBpfInterface", - "SetBpfPromisc", - "SetBpfTimeout", - "SetCurrentDirectory", - "SetEndOfFile", - "SetEnvironmentVariable", - "SetFileAttributes", - "SetFileCompletionNotificationModes", - "SetFilePointer", - "SetFileTime", - "SetHandleInformation", - "SetKevent", - "SetLsfPromisc", - "SetNonblock", - "Setdomainname", - "Setegid", - "Setenv", - "Seteuid", - "Setfsgid", - "Setfsuid", - "Setgid", - "Setgroups", - "Sethostname", - "Setlogin", - "Setpgid", - "Setpriority", - "Setprivexec", - "Setregid", - "Setresgid", - "Setresuid", - "Setreuid", - "Setrlimit", - "Setsid", - "Setsockopt", - "SetsockoptByte", - "SetsockoptICMPv6Filter", - "SetsockoptIPMreq", - "SetsockoptIPMreqn", - "SetsockoptIPv6Mreq", - "SetsockoptInet4Addr", - "SetsockoptInt", - "SetsockoptLinger", - "SetsockoptString", - "SetsockoptTimeval", - "Settimeofday", - "Setuid", - "Setxattr", - "Shutdown", - "SidTypeAlias", - "SidTypeComputer", - "SidTypeDeletedAccount", - "SidTypeDomain", - "SidTypeGroup", - "SidTypeInvalid", - "SidTypeLabel", - "SidTypeUnknown", - "SidTypeUser", - "SidTypeWellKnownGroup", - "Signal", - "SizeofBpfHdr", - "SizeofBpfInsn", - "SizeofBpfProgram", - "SizeofBpfStat", - "SizeofBpfVersion", - "SizeofBpfZbuf", - "SizeofBpfZbufHeader", - "SizeofCmsghdr", - "SizeofICMPv6Filter", - "SizeofIPMreq", - "SizeofIPMreqn", - "SizeofIPv6MTUInfo", - "SizeofIPv6Mreq", - "SizeofIfAddrmsg", - "SizeofIfAnnounceMsghdr", - "SizeofIfData", - "SizeofIfInfomsg", - "SizeofIfMsghdr", - "SizeofIfaMsghdr", - "SizeofIfmaMsghdr", - "SizeofIfmaMsghdr2", - "SizeofInet4Pktinfo", - "SizeofInet6Pktinfo", - "SizeofInotifyEvent", - "SizeofLinger", - "SizeofMsghdr", - "SizeofNlAttr", - "SizeofNlMsgerr", - "SizeofNlMsghdr", - "SizeofRtAttr", - "SizeofRtGenmsg", - "SizeofRtMetrics", - "SizeofRtMsg", - "SizeofRtMsghdr", - "SizeofRtNexthop", - "SizeofSockFilter", - "SizeofSockFprog", - "SizeofSockaddrAny", - "SizeofSockaddrDatalink", - "SizeofSockaddrInet4", - "SizeofSockaddrInet6", - "SizeofSockaddrLinklayer", - "SizeofSockaddrNetlink", - "SizeofSockaddrUnix", - "SizeofTCPInfo", - "SizeofUcred", - "SlicePtrFromStrings", - "SockFilter", - "SockFprog", - "Sockaddr", - "SockaddrDatalink", - "SockaddrGen", - "SockaddrInet4", - "SockaddrInet6", - "SockaddrLinklayer", - "SockaddrNetlink", - "SockaddrUnix", - "Socket", - "SocketControlMessage", - "SocketDisableIPv6", - "Socketpair", - "Splice", - "StartProcess", - "StartupInfo", - "Stat", - "Stat_t", - "Statfs", - "Statfs_t", - "Stderr", - "Stdin", - "Stdout", - "StringBytePtr", - "StringByteSlice", - "StringSlicePtr", - "StringToSid", - "StringToUTF16", - "StringToUTF16Ptr", - "Symlink", - "Sync", - "SyncFileRange", - "SysProcAttr", - "SysProcIDMap", - "Syscall", - "Syscall12", - "Syscall15", - "Syscall18", - "Syscall6", - "Syscall9", - "SyscallN", - "Sysctl", - "SysctlUint32", - "Sysctlnode", - "Sysinfo", - "Sysinfo_t", - "Systemtime", - "TCGETS", - "TCIFLUSH", - "TCIOFLUSH", - "TCOFLUSH", - "TCPInfo", - "TCPKeepalive", - "TCP_CA_NAME_MAX", - "TCP_CONGCTL", - "TCP_CONGESTION", - "TCP_CONNECTIONTIMEOUT", - "TCP_CORK", - "TCP_DEFER_ACCEPT", - "TCP_ENABLE_ECN", - "TCP_INFO", - "TCP_KEEPALIVE", - "TCP_KEEPCNT", - "TCP_KEEPIDLE", - "TCP_KEEPINIT", - "TCP_KEEPINTVL", - "TCP_LINGER2", - "TCP_MAXBURST", - "TCP_MAXHLEN", - "TCP_MAXOLEN", - "TCP_MAXSEG", - "TCP_MAXWIN", - "TCP_MAX_SACK", - "TCP_MAX_WINSHIFT", - "TCP_MD5SIG", - "TCP_MD5SIG_MAXKEYLEN", - "TCP_MINMSS", - "TCP_MINMSSOVERLOAD", - "TCP_MSS", - "TCP_NODELAY", - "TCP_NOOPT", - "TCP_NOPUSH", - "TCP_NOTSENT_LOWAT", - "TCP_NSTATES", - "TCP_QUICKACK", - "TCP_RXT_CONNDROPTIME", - "TCP_RXT_FINDROP", - "TCP_SACK_ENABLE", - "TCP_SENDMOREACKS", - "TCP_SYNCNT", - "TCP_VENDOR", - "TCP_WINDOW_CLAMP", - "TCSAFLUSH", - "TCSETS", - "TF_DISCONNECT", - "TF_REUSE_SOCKET", - "TF_USE_DEFAULT_WORKER", - "TF_USE_KERNEL_APC", - "TF_USE_SYSTEM_THREAD", - "TF_WRITE_BEHIND", - "TH32CS_INHERIT", - "TH32CS_SNAPALL", - "TH32CS_SNAPHEAPLIST", - "TH32CS_SNAPMODULE", - "TH32CS_SNAPMODULE32", - "TH32CS_SNAPPROCESS", - "TH32CS_SNAPTHREAD", - "TIME_ZONE_ID_DAYLIGHT", - "TIME_ZONE_ID_STANDARD", - "TIME_ZONE_ID_UNKNOWN", - "TIOCCBRK", - "TIOCCDTR", - "TIOCCONS", - "TIOCDCDTIMESTAMP", - "TIOCDRAIN", - "TIOCDSIMICROCODE", - "TIOCEXCL", - "TIOCEXT", - "TIOCFLAG_CDTRCTS", - "TIOCFLAG_CLOCAL", - "TIOCFLAG_CRTSCTS", - "TIOCFLAG_MDMBUF", - "TIOCFLAG_PPS", - "TIOCFLAG_SOFTCAR", - "TIOCFLUSH", - "TIOCGDEV", - "TIOCGDRAINWAIT", - "TIOCGETA", - "TIOCGETD", - "TIOCGFLAGS", - "TIOCGICOUNT", - "TIOCGLCKTRMIOS", - "TIOCGLINED", - "TIOCGPGRP", - "TIOCGPTN", - "TIOCGQSIZE", - "TIOCGRANTPT", - "TIOCGRS485", - "TIOCGSERIAL", - "TIOCGSID", - "TIOCGSIZE", - "TIOCGSOFTCAR", - "TIOCGTSTAMP", - "TIOCGWINSZ", - "TIOCINQ", - "TIOCIXOFF", - "TIOCIXON", - "TIOCLINUX", - "TIOCMBIC", - "TIOCMBIS", - "TIOCMGDTRWAIT", - "TIOCMGET", - "TIOCMIWAIT", - "TIOCMODG", - "TIOCMODS", - "TIOCMSDTRWAIT", - "TIOCMSET", - "TIOCM_CAR", - "TIOCM_CD", - "TIOCM_CTS", - "TIOCM_DCD", - "TIOCM_DSR", - "TIOCM_DTR", - "TIOCM_LE", - "TIOCM_RI", - "TIOCM_RNG", - "TIOCM_RTS", - "TIOCM_SR", - "TIOCM_ST", - "TIOCNOTTY", - "TIOCNXCL", - "TIOCOUTQ", - "TIOCPKT", - "TIOCPKT_DATA", - "TIOCPKT_DOSTOP", - "TIOCPKT_FLUSHREAD", - "TIOCPKT_FLUSHWRITE", - "TIOCPKT_IOCTL", - "TIOCPKT_NOSTOP", - "TIOCPKT_START", - "TIOCPKT_STOP", - "TIOCPTMASTER", - "TIOCPTMGET", - "TIOCPTSNAME", - "TIOCPTYGNAME", - "TIOCPTYGRANT", - "TIOCPTYUNLK", - "TIOCRCVFRAME", - "TIOCREMOTE", - "TIOCSBRK", - "TIOCSCONS", - "TIOCSCTTY", - "TIOCSDRAINWAIT", - "TIOCSDTR", - "TIOCSERCONFIG", - "TIOCSERGETLSR", - "TIOCSERGETMULTI", - "TIOCSERGSTRUCT", - "TIOCSERGWILD", - "TIOCSERSETMULTI", - "TIOCSERSWILD", - "TIOCSER_TEMT", - "TIOCSETA", - "TIOCSETAF", - "TIOCSETAW", - "TIOCSETD", - "TIOCSFLAGS", - "TIOCSIG", - "TIOCSLCKTRMIOS", - "TIOCSLINED", - "TIOCSPGRP", - "TIOCSPTLCK", - "TIOCSQSIZE", - "TIOCSRS485", - "TIOCSSERIAL", - "TIOCSSIZE", - "TIOCSSOFTCAR", - "TIOCSTART", - "TIOCSTAT", - "TIOCSTI", - "TIOCSTOP", - "TIOCSTSTAMP", - "TIOCSWINSZ", - "TIOCTIMESTAMP", - "TIOCUCNTL", - "TIOCVHANGUP", - "TIOCXMTFRAME", - "TOKEN_ADJUST_DEFAULT", - "TOKEN_ADJUST_GROUPS", - "TOKEN_ADJUST_PRIVILEGES", - "TOKEN_ADJUST_SESSIONID", - "TOKEN_ALL_ACCESS", - "TOKEN_ASSIGN_PRIMARY", - "TOKEN_DUPLICATE", - "TOKEN_EXECUTE", - "TOKEN_IMPERSONATE", - "TOKEN_QUERY", - "TOKEN_QUERY_SOURCE", - "TOKEN_READ", - "TOKEN_WRITE", - "TOSTOP", - "TRUNCATE_EXISTING", - "TUNATTACHFILTER", - "TUNDETACHFILTER", - "TUNGETFEATURES", - "TUNGETIFF", - "TUNGETSNDBUF", - "TUNGETVNETHDRSZ", - "TUNSETDEBUG", - "TUNSETGROUP", - "TUNSETIFF", - "TUNSETLINK", - "TUNSETNOCSUM", - "TUNSETOFFLOAD", - "TUNSETOWNER", - "TUNSETPERSIST", - "TUNSETSNDBUF", - "TUNSETTXFILTER", - "TUNSETVNETHDRSZ", - "Tee", - "TerminateProcess", - "Termios", - "Tgkill", - "Time", - "Time_t", - "Times", - "Timespec", - "TimespecToNsec", - "Timeval", - "Timeval32", - "TimevalToNsec", - "Timex", - "Timezoneinformation", - "Tms", - "Token", - "TokenAccessInformation", - "TokenAuditPolicy", - "TokenDefaultDacl", - "TokenElevation", - "TokenElevationType", - "TokenGroups", - "TokenGroupsAndPrivileges", - "TokenHasRestrictions", - "TokenImpersonationLevel", - "TokenIntegrityLevel", - "TokenLinkedToken", - "TokenLogonSid", - "TokenMandatoryPolicy", - "TokenOrigin", - "TokenOwner", - "TokenPrimaryGroup", - "TokenPrivileges", - "TokenRestrictedSids", - "TokenSandBoxInert", - "TokenSessionId", - "TokenSessionReference", - "TokenSource", - "TokenStatistics", - "TokenType", - "TokenUIAccess", - "TokenUser", - "TokenVirtualizationAllowed", - "TokenVirtualizationEnabled", - "Tokenprimarygroup", - "Tokenuser", - "TranslateAccountName", - "TranslateName", - "TransmitFile", - "TransmitFileBuffers", - "Truncate", - "UNIX_PATH_MAX", - "USAGE_MATCH_TYPE_AND", - "USAGE_MATCH_TYPE_OR", - "UTF16FromString", - "UTF16PtrFromString", - "UTF16ToString", - "Ucred", - "Umask", - "Uname", - "Undelete", - "UnixCredentials", - "UnixRights", - "Unlink", - "Unlinkat", - "UnmapViewOfFile", - "Unmount", - "Unsetenv", - "Unshare", - "UserInfo10", - "Ustat", - "Ustat_t", - "Utimbuf", - "Utime", - "Utimes", - "UtimesNano", - "Utsname", - "VDISCARD", - "VDSUSP", - "VEOF", - "VEOL", - "VEOL2", - "VERASE", - "VERASE2", - "VINTR", - "VKILL", - "VLNEXT", - "VMIN", - "VQUIT", - "VREPRINT", - "VSTART", - "VSTATUS", - "VSTOP", - "VSUSP", - "VSWTC", - "VT0", - "VT1", - "VTDLY", - "VTIME", - "VWERASE", - "VirtualLock", - "VirtualUnlock", - "WAIT_ABANDONED", - "WAIT_FAILED", - "WAIT_OBJECT_0", - "WAIT_TIMEOUT", - "WALL", - "WALLSIG", - "WALTSIG", - "WCLONE", - "WCONTINUED", - "WCOREFLAG", - "WEXITED", - "WLINUXCLONE", - "WNOHANG", - "WNOTHREAD", - "WNOWAIT", - "WNOZOMBIE", - "WOPTSCHECKED", - "WORDSIZE", - "WSABuf", - "WSACleanup", - "WSADESCRIPTION_LEN", - "WSAData", - "WSAEACCES", - "WSAECONNABORTED", - "WSAECONNRESET", - "WSAEnumProtocols", - "WSAID_CONNECTEX", - "WSAIoctl", - "WSAPROTOCOL_LEN", - "WSAProtocolChain", - "WSAProtocolInfo", - "WSARecv", - "WSARecvFrom", - "WSASYS_STATUS_LEN", - "WSASend", - "WSASendTo", - "WSASendto", - "WSAStartup", - "WSTOPPED", - "WTRAPPED", - "WUNTRACED", - "Wait4", - "WaitForSingleObject", - "WaitStatus", - "Win32FileAttributeData", - "Win32finddata", - "Write", - "WriteConsole", - "WriteFile", - "X509_ASN_ENCODING", - "XCASE", - "XP1_CONNECTIONLESS", - "XP1_CONNECT_DATA", - "XP1_DISCONNECT_DATA", - "XP1_EXPEDITED_DATA", - "XP1_GRACEFUL_CLOSE", - "XP1_GUARANTEED_DELIVERY", - "XP1_GUARANTEED_ORDER", - "XP1_IFS_HANDLES", - "XP1_MESSAGE_ORIENTED", - "XP1_MULTIPOINT_CONTROL_PLANE", - "XP1_MULTIPOINT_DATA_PLANE", - "XP1_PARTIAL_MESSAGE", - "XP1_PSEUDO_STREAM", - "XP1_QOS_SUPPORTED", - "XP1_SAN_SUPPORT_SDP", - "XP1_SUPPORT_BROADCAST", - "XP1_SUPPORT_MULTIPOINT", - "XP1_UNI_RECV", - "XP1_UNI_SEND", - }, - "syscall/js": { - "CopyBytesToGo", - "CopyBytesToJS", - "Error", - "Func", - "FuncOf", - "Global", - "Null", - "Type", - "TypeBoolean", - "TypeFunction", - "TypeNull", - "TypeNumber", - "TypeObject", - "TypeString", - "TypeSymbol", - "TypeUndefined", - "Undefined", - "Value", - "ValueError", - "ValueOf", - }, - "testing": { - "AllocsPerRun", - "B", - "Benchmark", - "BenchmarkResult", - "Cover", - "CoverBlock", - "CoverMode", - "Coverage", - "F", - "Init", - "InternalBenchmark", - "InternalExample", - "InternalFuzzTarget", - "InternalTest", - "M", - "Main", - "MainStart", - "PB", - "RegisterCover", - "RunBenchmarks", - "RunExamples", - "RunTests", - "Short", - "T", - "TB", - "Testing", - "Verbose", - }, - "testing/fstest": { - "MapFS", - "MapFile", - "TestFS", - }, - "testing/iotest": { - "DataErrReader", - "ErrReader", - "ErrTimeout", - "HalfReader", - "NewReadLogger", - "NewWriteLogger", - "OneByteReader", - "TestReader", - "TimeoutReader", - "TruncateWriter", - }, - "testing/quick": { - "Check", - "CheckEqual", - "CheckEqualError", - "CheckError", - "Config", - "Generator", - "SetupError", - "Value", - }, - "testing/slogtest": { - "TestHandler", - }, - "text/scanner": { - "Char", - "Comment", - "EOF", - "Float", - "GoTokens", - "GoWhitespace", - "Ident", - "Int", - "Position", - "RawString", - "ScanChars", - "ScanComments", - "ScanFloats", - "ScanIdents", - "ScanInts", - "ScanRawStrings", - "ScanStrings", - "Scanner", - "SkipComments", - "String", - "TokenString", - }, - "text/tabwriter": { - "AlignRight", - "Debug", - "DiscardEmptyColumns", - "Escape", - "FilterHTML", - "NewWriter", - "StripEscape", - "TabIndent", - "Writer", - }, - "text/template": { - "ExecError", - "FuncMap", - "HTMLEscape", - "HTMLEscapeString", - "HTMLEscaper", - "IsTrue", - "JSEscape", - "JSEscapeString", - "JSEscaper", - "Must", - "New", - "ParseFS", - "ParseFiles", - "ParseGlob", - "Template", - "URLQueryEscaper", - }, - "text/template/parse": { - "ActionNode", - "BoolNode", - "BranchNode", - "BreakNode", - "ChainNode", - "CommandNode", - "CommentNode", - "ContinueNode", - "DotNode", - "FieldNode", - "IdentifierNode", - "IfNode", - "IsEmptyTree", - "ListNode", - "Mode", - "New", - "NewIdentifier", - "NilNode", - "Node", - "NodeAction", - "NodeBool", - "NodeBreak", - "NodeChain", - "NodeCommand", - "NodeComment", - "NodeContinue", - "NodeDot", - "NodeField", - "NodeIdentifier", - "NodeIf", - "NodeList", - "NodeNil", - "NodeNumber", - "NodePipe", - "NodeRange", - "NodeString", - "NodeTemplate", - "NodeText", - "NodeType", - "NodeVariable", - "NodeWith", - "NumberNode", - "Parse", - "ParseComments", - "PipeNode", - "Pos", - "RangeNode", - "SkipFuncCheck", - "StringNode", - "TemplateNode", - "TextNode", - "Tree", - "VariableNode", - "WithNode", - }, - "time": { - "ANSIC", - "After", - "AfterFunc", - "April", - "August", - "Date", - "DateOnly", - "DateTime", - "December", - "Duration", - "February", - "FixedZone", - "Friday", - "Hour", - "January", - "July", - "June", - "Kitchen", - "Layout", - "LoadLocation", - "LoadLocationFromTZData", - "Local", - "Location", - "March", - "May", - "Microsecond", - "Millisecond", - "Minute", - "Monday", - "Month", - "Nanosecond", - "NewTicker", - "NewTimer", - "November", - "Now", - "October", - "Parse", - "ParseDuration", - "ParseError", - "ParseInLocation", - "RFC1123", - "RFC1123Z", - "RFC3339", - "RFC3339Nano", - "RFC822", - "RFC822Z", - "RFC850", - "RubyDate", - "Saturday", - "Second", - "September", - "Since", - "Sleep", - "Stamp", - "StampMicro", - "StampMilli", - "StampNano", - "Sunday", - "Thursday", - "Tick", - "Ticker", - "Time", - "TimeOnly", - "Timer", - "Tuesday", - "UTC", - "Unix", - "UnixDate", - "UnixMicro", - "UnixMilli", - "Until", - "Wednesday", - "Weekday", - }, - "unicode": { - "ASCII_Hex_Digit", - "Adlam", - "Ahom", - "Anatolian_Hieroglyphs", - "Arabic", - "Armenian", - "Avestan", - "AzeriCase", - "Balinese", - "Bamum", - "Bassa_Vah", - "Batak", - "Bengali", - "Bhaiksuki", - "Bidi_Control", - "Bopomofo", - "Brahmi", - "Braille", - "Buginese", - "Buhid", - "C", - "Canadian_Aboriginal", - "Carian", - "CaseRange", - "CaseRanges", - "Categories", - "Caucasian_Albanian", - "Cc", - "Cf", - "Chakma", - "Cham", - "Cherokee", - "Chorasmian", - "Co", - "Common", - "Coptic", - "Cs", - "Cuneiform", - "Cypriot", - "Cypro_Minoan", - "Cyrillic", - "Dash", - "Deprecated", - "Deseret", - "Devanagari", - "Diacritic", - "Digit", - "Dives_Akuru", - "Dogra", - "Duployan", - "Egyptian_Hieroglyphs", - "Elbasan", - "Elymaic", - "Ethiopic", - "Extender", - "FoldCategory", - "FoldScript", - "Georgian", - "Glagolitic", - "Gothic", - "Grantha", - "GraphicRanges", - "Greek", - "Gujarati", - "Gunjala_Gondi", - "Gurmukhi", - "Han", - "Hangul", - "Hanifi_Rohingya", - "Hanunoo", - "Hatran", - "Hebrew", - "Hex_Digit", - "Hiragana", - "Hyphen", - "IDS_Binary_Operator", - "IDS_Trinary_Operator", - "Ideographic", - "Imperial_Aramaic", - "In", - "Inherited", - "Inscriptional_Pahlavi", - "Inscriptional_Parthian", - "Is", - "IsControl", - "IsDigit", - "IsGraphic", - "IsLetter", - "IsLower", - "IsMark", - "IsNumber", - "IsOneOf", - "IsPrint", - "IsPunct", - "IsSpace", - "IsSymbol", - "IsTitle", - "IsUpper", - "Javanese", - "Join_Control", - "Kaithi", - "Kannada", - "Katakana", - "Kawi", - "Kayah_Li", - "Kharoshthi", - "Khitan_Small_Script", - "Khmer", - "Khojki", - "Khudawadi", - "L", - "Lao", - "Latin", - "Lepcha", - "Letter", - "Limbu", - "Linear_A", - "Linear_B", - "Lisu", - "Ll", - "Lm", - "Lo", - "Logical_Order_Exception", - "Lower", - "LowerCase", - "Lt", - "Lu", - "Lycian", - "Lydian", - "M", - "Mahajani", - "Makasar", - "Malayalam", - "Mandaic", - "Manichaean", - "Marchen", - "Mark", - "Masaram_Gondi", - "MaxASCII", - "MaxCase", - "MaxLatin1", - "MaxRune", - "Mc", - "Me", - "Medefaidrin", - "Meetei_Mayek", - "Mende_Kikakui", - "Meroitic_Cursive", - "Meroitic_Hieroglyphs", - "Miao", - "Mn", - "Modi", - "Mongolian", - "Mro", - "Multani", - "Myanmar", - "N", - "Nabataean", - "Nag_Mundari", - "Nandinagari", - "Nd", - "New_Tai_Lue", - "Newa", - "Nko", - "Nl", - "No", - "Noncharacter_Code_Point", - "Number", - "Nushu", - "Nyiakeng_Puachue_Hmong", - "Ogham", - "Ol_Chiki", - "Old_Hungarian", - "Old_Italic", - "Old_North_Arabian", - "Old_Permic", - "Old_Persian", - "Old_Sogdian", - "Old_South_Arabian", - "Old_Turkic", - "Old_Uyghur", - "Oriya", - "Osage", - "Osmanya", - "Other", - "Other_Alphabetic", - "Other_Default_Ignorable_Code_Point", - "Other_Grapheme_Extend", - "Other_ID_Continue", - "Other_ID_Start", - "Other_Lowercase", - "Other_Math", - "Other_Uppercase", - "P", - "Pahawh_Hmong", - "Palmyrene", - "Pattern_Syntax", - "Pattern_White_Space", - "Pau_Cin_Hau", - "Pc", - "Pd", - "Pe", - "Pf", - "Phags_Pa", - "Phoenician", - "Pi", - "Po", - "Prepended_Concatenation_Mark", - "PrintRanges", - "Properties", - "Ps", - "Psalter_Pahlavi", - "Punct", - "Quotation_Mark", - "Radical", - "Range16", - "Range32", - "RangeTable", - "Regional_Indicator", - "Rejang", - "ReplacementChar", - "Runic", - "S", - "STerm", - "Samaritan", - "Saurashtra", - "Sc", - "Scripts", - "Sentence_Terminal", - "Sharada", - "Shavian", - "Siddham", - "SignWriting", - "SimpleFold", - "Sinhala", - "Sk", - "Sm", - "So", - "Soft_Dotted", - "Sogdian", - "Sora_Sompeng", - "Soyombo", - "Space", - "SpecialCase", - "Sundanese", - "Syloti_Nagri", - "Symbol", - "Syriac", - "Tagalog", - "Tagbanwa", - "Tai_Le", - "Tai_Tham", - "Tai_Viet", - "Takri", - "Tamil", - "Tangsa", - "Tangut", - "Telugu", - "Terminal_Punctuation", - "Thaana", - "Thai", - "Tibetan", - "Tifinagh", - "Tirhuta", - "Title", - "TitleCase", - "To", - "ToLower", - "ToTitle", - "ToUpper", - "Toto", - "TurkishCase", - "Ugaritic", - "Unified_Ideograph", - "Upper", - "UpperCase", - "UpperLower", - "Vai", - "Variation_Selector", - "Version", - "Vithkuqi", - "Wancho", - "Warang_Citi", - "White_Space", - "Yezidi", - "Yi", - "Z", - "Zanabazar_Square", - "Zl", - "Zp", - "Zs", - }, - "unicode/utf16": { - "AppendRune", - "Decode", - "DecodeRune", - "Encode", - "EncodeRune", - "IsSurrogate", - }, - "unicode/utf8": { - "AppendRune", - "DecodeLastRune", - "DecodeLastRuneInString", - "DecodeRune", - "DecodeRuneInString", - "EncodeRune", - "FullRune", - "FullRuneInString", - "MaxRune", - "RuneCount", - "RuneCountInString", - "RuneError", - "RuneLen", - "RuneSelf", - "RuneStart", - "UTFMax", - "Valid", - "ValidRune", - "ValidString", - }, - "unsafe": { - "Add", - "Alignof", - "Offsetof", - "Pointer", - "Sizeof", - "Slice", - "SliceData", - "String", - "StringData", - }, -} diff --git a/vendor/golang.org/x/tools/internal/packagesinternal/packages.go b/vendor/golang.org/x/tools/internal/packagesinternal/packages.go index d9950b1f..44719de1 100644 --- a/vendor/golang.org/x/tools/internal/packagesinternal/packages.go +++ b/vendor/golang.org/x/tools/internal/packagesinternal/packages.go @@ -5,10 +5,6 @@ // Package packagesinternal exposes internal-only fields from go/packages. package packagesinternal -import ( - "golang.org/x/tools/internal/gocommand" -) - var GetForTest = func(p interface{}) string { return "" } var GetDepsErrors = func(p interface{}) []*PackageError { return nil } @@ -18,10 +14,6 @@ type PackageError struct { Err string // the error itself } -var GetGoCmdRunner = func(config interface{}) *gocommand.Runner { return nil } - -var SetGoCmdRunner = func(config interface{}, runner *gocommand.Runner) {} - var TypecheckCgo int var DepsErrors int // must be set as a LoadMode to call GetDepsErrors var ForTest int // must be set as a LoadMode to call GetForTest diff --git a/vendor/golang.org/x/tools/internal/pkgbits/decoder.go b/vendor/golang.org/x/tools/internal/pkgbits/decoder.go index b92e8e6e..f6cb37c5 100644 --- a/vendor/golang.org/x/tools/internal/pkgbits/decoder.go +++ b/vendor/golang.org/x/tools/internal/pkgbits/decoder.go @@ -21,7 +21,7 @@ import ( // export data. type PkgDecoder struct { // version is the file format version. - version uint32 + version Version // sync indicates whether the file uses sync markers. sync bool @@ -68,8 +68,6 @@ func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync } // NewPkgDecoder returns a PkgDecoder initialized to read the Unified // IR export data from input. pkgPath is the package path for the // compilation unit that produced the export data. -// -// TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014. func NewPkgDecoder(pkgPath, input string) PkgDecoder { pr := PkgDecoder{ pkgPath: pkgPath, @@ -80,14 +78,15 @@ func NewPkgDecoder(pkgPath, input string) PkgDecoder { r := strings.NewReader(input) - assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil) + var ver uint32 + assert(binary.Read(r, binary.LittleEndian, &ver) == nil) + pr.version = Version(ver) - switch pr.version { - default: - panic(fmt.Errorf("unsupported version: %v", pr.version)) - case 0: - // no flags - case 1: + if pr.version >= numVersions { + panic(fmt.Errorf("cannot decode %q, export data version %d is greater than maximum supported version %d", pkgPath, pr.version, numVersions-1)) + } + + if pr.version.Has(Flags) { var flags uint32 assert(binary.Read(r, binary.LittleEndian, &flags) == nil) pr.sync = flags&flagSyncMarkers != 0 @@ -102,7 +101,9 @@ func NewPkgDecoder(pkgPath, input string) PkgDecoder { assert(err == nil) pr.elemData = input[pos:] - assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1])) + + const fingerprintSize = 8 + assert(len(pr.elemData)-fingerprintSize == int(pr.elemEnds[len(pr.elemEnds)-1])) return pr } @@ -136,7 +137,7 @@ func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int { absIdx += int(pr.elemEndsEnds[k-1]) } if absIdx >= int(pr.elemEndsEnds[k]) { - errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) + panicf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds) } return absIdx } @@ -193,9 +194,7 @@ func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder { Idx: idx, } - // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved. - r.Data = *strings.NewReader(pr.DataIdx(k, idx)) - + r.Data.Reset(pr.DataIdx(k, idx)) r.Sync(SyncRelocs) r.Relocs = make([]RelocEnt, r.Len()) for i := range r.Relocs { @@ -244,7 +243,7 @@ type Decoder struct { func (r *Decoder) checkErr(err error) { if err != nil { - errorf("unexpected decoding error: %w", err) + panicf("unexpected decoding error: %w", err) } } @@ -515,3 +514,6 @@ func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) { return path, name, tag } + +// Version reports the version of the bitstream. +func (w *Decoder) Version() Version { return w.common.version } diff --git a/vendor/golang.org/x/tools/internal/pkgbits/encoder.go b/vendor/golang.org/x/tools/internal/pkgbits/encoder.go index 6482617a..c17a1239 100644 --- a/vendor/golang.org/x/tools/internal/pkgbits/encoder.go +++ b/vendor/golang.org/x/tools/internal/pkgbits/encoder.go @@ -12,18 +12,15 @@ import ( "io" "math/big" "runtime" + "strings" ) -// currentVersion is the current version number. -// -// - v0: initial prototype -// -// - v1: adds the flags uint32 word -const currentVersion uint32 = 1 - // A PkgEncoder provides methods for encoding a package's Unified IR // export data. type PkgEncoder struct { + // version of the bitstream. + version Version + // elems holds the bitstream for previously encoded elements. elems [numRelocs][]string @@ -47,8 +44,9 @@ func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } // export data files, but can help diagnosing desync errors in // higher-level Unified IR reader/writer code. If syncFrames is // negative, then sync markers are omitted entirely. -func NewPkgEncoder(syncFrames int) PkgEncoder { +func NewPkgEncoder(version Version, syncFrames int) PkgEncoder { return PkgEncoder{ + version: version, stringsIdx: make(map[string]Index), syncFrames: syncFrames, } @@ -64,13 +62,15 @@ func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { assert(binary.Write(out, binary.LittleEndian, x) == nil) } - writeUint32(currentVersion) + writeUint32(uint32(pw.version)) - var flags uint32 - if pw.SyncMarkers() { - flags |= flagSyncMarkers + if pw.version.Has(Flags) { + var flags uint32 + if pw.SyncMarkers() { + flags |= flagSyncMarkers + } + writeUint32(flags) } - writeUint32(flags) // Write elemEndsEnds. var sum uint32 @@ -159,7 +159,7 @@ type Encoder struct { // Flush finalizes the element's bitstream and returns its Index. func (w *Encoder) Flush() Index { - var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved + var sb strings.Builder // Backup the data so we write the relocations at the front. var tmp bytes.Buffer @@ -189,7 +189,7 @@ func (w *Encoder) Flush() Index { func (w *Encoder) checkErr(err error) { if err != nil { - errorf("unexpected encoding error: %v", err) + panicf("unexpected encoding error: %v", err) } } @@ -320,8 +320,14 @@ func (w *Encoder) Code(c Code) { // section (if not already present), and then writing a relocation // into the element bitstream. func (w *Encoder) String(s string) { + w.StringRef(w.p.StringIdx(s)) +} + +// StringRef writes a reference to the given index, which must be a +// previously encoded string value. +func (w *Encoder) StringRef(idx Index) { w.Sync(SyncString) - w.Reloc(RelocString, w.p.StringIdx(s)) + w.Reloc(RelocString, idx) } // Strings encodes and writes a variable-length slice of strings into @@ -348,7 +354,7 @@ func (w *Encoder) Value(val constant.Value) { func (w *Encoder) scalar(val constant.Value) { switch v := constant.Val(val).(type) { default: - errorf("unhandled %v (%v)", val, val.Kind()) + panicf("unhandled %v (%v)", val, val.Kind()) case bool: w.Code(ValBool) w.Bool(v) @@ -381,3 +387,6 @@ func (w *Encoder) bigFloat(v *big.Float) { b := v.Append(nil, 'p', -1) w.String(string(b)) // TODO: More efficient encoding. } + +// Version reports the version of the bitstream. +func (w *Encoder) Version() Version { return w.p.version } diff --git a/vendor/golang.org/x/tools/internal/pkgbits/frames_go1.go b/vendor/golang.org/x/tools/internal/pkgbits/frames_go1.go deleted file mode 100644 index 5294f6a6..00000000 --- a/vendor/golang.org/x/tools/internal/pkgbits/frames_go1.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.7 -// +build !go1.7 - -// TODO(mdempsky): Remove after #44505 is resolved - -package pkgbits - -import "runtime" - -func walkFrames(pcs []uintptr, visit frameVisitor) { - for _, pc := range pcs { - fn := runtime.FuncForPC(pc) - file, line := fn.FileLine(pc) - - visit(file, line, fn.Name(), pc-fn.Entry()) - } -} diff --git a/vendor/golang.org/x/tools/internal/pkgbits/frames_go17.go b/vendor/golang.org/x/tools/internal/pkgbits/frames_go17.go deleted file mode 100644 index 2324ae7a..00000000 --- a/vendor/golang.org/x/tools/internal/pkgbits/frames_go17.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.7 -// +build go1.7 - -package pkgbits - -import "runtime" - -// walkFrames calls visit for each call frame represented by pcs. -// -// pcs should be a slice of PCs, as returned by runtime.Callers. -func walkFrames(pcs []uintptr, visit frameVisitor) { - if len(pcs) == 0 { - return - } - - frames := runtime.CallersFrames(pcs) - for { - frame, more := frames.Next() - visit(frame.File, frame.Line, frame.Function, frame.PC-frame.Entry) - if !more { - return - } - } -} diff --git a/vendor/golang.org/x/tools/internal/pkgbits/support.go b/vendor/golang.org/x/tools/internal/pkgbits/support.go index ad26d3b2..50534a29 100644 --- a/vendor/golang.org/x/tools/internal/pkgbits/support.go +++ b/vendor/golang.org/x/tools/internal/pkgbits/support.go @@ -12,6 +12,6 @@ func assert(b bool) { } } -func errorf(format string, args ...interface{}) { +func panicf(format string, args ...any) { panic(fmt.Errorf(format, args...)) } diff --git a/vendor/golang.org/x/tools/internal/pkgbits/sync.go b/vendor/golang.org/x/tools/internal/pkgbits/sync.go index 5bd51ef7..1520b73a 100644 --- a/vendor/golang.org/x/tools/internal/pkgbits/sync.go +++ b/vendor/golang.org/x/tools/internal/pkgbits/sync.go @@ -6,6 +6,7 @@ package pkgbits import ( "fmt" + "runtime" "strings" ) @@ -23,6 +24,24 @@ func fmtFrames(pcs ...uintptr) []string { type frameVisitor func(file string, line int, name string, offset uintptr) +// walkFrames calls visit for each call frame represented by pcs. +// +// pcs should be a slice of PCs, as returned by runtime.Callers. +func walkFrames(pcs []uintptr, visit frameVisitor) { + if len(pcs) == 0 { + return + } + + frames := runtime.CallersFrames(pcs) + for { + frame, more := frames.Next() + visit(frame.File, frame.Line, frame.Function, frame.PC-frame.Entry) + if !more { + return + } + } +} + // SyncMarker is an enum type that represents markers that may be // written to export data to ensure the reader and writer stay // synchronized. @@ -110,4 +129,8 @@ const ( SyncStmtsEnd SyncLabel SyncOptLabel + + SyncMultiExpr + SyncRType + SyncConvRTTI ) diff --git a/vendor/golang.org/x/tools/internal/pkgbits/syncmarker_string.go b/vendor/golang.org/x/tools/internal/pkgbits/syncmarker_string.go index 4a5b0ca5..582ad56d 100644 --- a/vendor/golang.org/x/tools/internal/pkgbits/syncmarker_string.go +++ b/vendor/golang.org/x/tools/internal/pkgbits/syncmarker_string.go @@ -74,11 +74,14 @@ func _() { _ = x[SyncStmtsEnd-64] _ = x[SyncLabel-65] _ = x[SyncOptLabel-66] + _ = x[SyncMultiExpr-67] + _ = x[SyncRType-68] + _ = x[SyncConvRTTI-69] } -const _SyncMarker_name = "EOFBoolInt64Uint64StringValueValRelocsRelocUseRelocPublicPosPosBaseObjectObject1PkgPkgDefMethodTypeTypeIdxTypeParamNamesSignatureParamsParamCodeObjSymLocalIdentSelectorPrivateFuncExtVarExtTypeExtPragmaExprListExprsExprExprTypeAssignOpFuncLitCompLitDeclFuncBodyOpenScopeCloseScopeCloseAnotherScopeDeclNamesDeclNameStmtsBlockStmtIfStmtForStmtSwitchStmtRangeStmtCaseClauseCommClauseSelectStmtDeclsLabeledStmtUseObjLocalAddLocalLinknameStmt1StmtsEndLabelOptLabel" +const _SyncMarker_name = "EOFBoolInt64Uint64StringValueValRelocsRelocUseRelocPublicPosPosBaseObjectObject1PkgPkgDefMethodTypeTypeIdxTypeParamNamesSignatureParamsParamCodeObjSymLocalIdentSelectorPrivateFuncExtVarExtTypeExtPragmaExprListExprsExprExprTypeAssignOpFuncLitCompLitDeclFuncBodyOpenScopeCloseScopeCloseAnotherScopeDeclNamesDeclNameStmtsBlockStmtIfStmtForStmtSwitchStmtRangeStmtCaseClauseCommClauseSelectStmtDeclsLabeledStmtUseObjLocalAddLocalLinknameStmt1StmtsEndLabelOptLabelMultiExprRTypeConvRTTI" -var _SyncMarker_index = [...]uint16{0, 3, 7, 12, 18, 24, 29, 32, 38, 43, 51, 57, 60, 67, 73, 80, 83, 89, 95, 99, 106, 120, 129, 135, 140, 147, 150, 160, 168, 175, 182, 188, 195, 201, 209, 214, 218, 226, 232, 234, 241, 248, 252, 260, 269, 279, 296, 305, 313, 318, 327, 333, 340, 350, 359, 369, 379, 389, 394, 405, 416, 424, 432, 437, 445, 450, 458} +var _SyncMarker_index = [...]uint16{0, 3, 7, 12, 18, 24, 29, 32, 38, 43, 51, 57, 60, 67, 73, 80, 83, 89, 95, 99, 106, 120, 129, 135, 140, 147, 150, 160, 168, 175, 182, 188, 195, 201, 209, 214, 218, 226, 232, 234, 241, 248, 252, 260, 269, 279, 296, 305, 313, 318, 327, 333, 340, 350, 359, 369, 379, 389, 394, 405, 416, 424, 432, 437, 445, 450, 458, 467, 472, 480} func (i SyncMarker) String() string { i -= 1 diff --git a/vendor/golang.org/x/tools/internal/pkgbits/version.go b/vendor/golang.org/x/tools/internal/pkgbits/version.go new file mode 100644 index 00000000..53af9df2 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/pkgbits/version.go @@ -0,0 +1,85 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package pkgbits + +// Version indicates a version of a unified IR bitstream. +// Each Version indicates the addition, removal, or change of +// new data in the bitstream. +// +// These are serialized to disk and the interpretation remains fixed. +type Version uint32 + +const ( + // V0: initial prototype. + // + // All data that is not assigned a Field is in version V0 + // and has not been deprecated. + V0 Version = iota + + // V1: adds the Flags uint32 word + V1 + + // V2: removes unused legacy fields and supports type parameters for aliases. + // - remove the legacy "has init" bool from the public root + // - remove obj's "derived func instance" bool + // - add a TypeParamNames field to ObjAlias + // - remove derived info "needed" bool + V2 + + numVersions = iota +) + +// Field denotes a unit of data in the serialized unified IR bitstream. +// It is conceptually a like field in a structure. +// +// We only really need Fields when the data may or may not be present +// in a stream based on the Version of the bitstream. +// +// Unlike much of pkgbits, Fields are not serialized and +// can change values as needed. +type Field int + +const ( + // Flags in a uint32 in the header of a bitstream + // that is used to indicate whether optional features are enabled. + Flags Field = iota + + // Deprecated: HasInit was a bool indicating whether a package + // has any init functions. + HasInit + + // Deprecated: DerivedFuncInstance was a bool indicating + // whether an object was a function instance. + DerivedFuncInstance + + // ObjAlias has a list of TypeParamNames. + AliasTypeParamNames + + // Deprecated: DerivedInfoNeeded was a bool indicating + // whether a type was a derived type. + DerivedInfoNeeded + + numFields = iota +) + +// introduced is the version a field was added. +var introduced = [numFields]Version{ + Flags: V1, + AliasTypeParamNames: V2, +} + +// removed is the version a field was removed in or 0 for fields +// that have not yet been deprecated. +// (So removed[f]-1 is the last version it is included in.) +var removed = [numFields]Version{ + HasInit: V2, + DerivedFuncInstance: V2, + DerivedInfoNeeded: V2, +} + +// Has reports whether field f is present in a bitstream at version v. +func (v Version) Has(f Field) bool { + return introduced[f] <= v && (v < removed[f] || removed[f] == V0) +} diff --git a/vendor/golang.org/x/tools/internal/stdlib/manifest.go b/vendor/golang.org/x/tools/internal/stdlib/manifest.go new file mode 100644 index 00000000..cdaac9ab --- /dev/null +++ b/vendor/golang.org/x/tools/internal/stdlib/manifest.go @@ -0,0 +1,17431 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by generate.go. DO NOT EDIT. + +package stdlib + +var PackageSymbols = map[string][]Symbol{ + "archive/tar": { + {"(*Header).FileInfo", Method, 1}, + {"(*Reader).Next", Method, 0}, + {"(*Reader).Read", Method, 0}, + {"(*Writer).AddFS", Method, 22}, + {"(*Writer).Close", Method, 0}, + {"(*Writer).Flush", Method, 0}, + {"(*Writer).Write", Method, 0}, + {"(*Writer).WriteHeader", Method, 0}, + {"(Format).String", Method, 10}, + {"ErrFieldTooLong", Var, 0}, + {"ErrHeader", Var, 0}, + {"ErrInsecurePath", Var, 20}, + {"ErrWriteAfterClose", Var, 0}, + {"ErrWriteTooLong", Var, 0}, + {"FileInfoHeader", Func, 1}, + {"FileInfoNames", Type, 23}, + {"Format", Type, 10}, + {"FormatGNU", Const, 10}, + {"FormatPAX", Const, 10}, + {"FormatUSTAR", Const, 10}, + {"FormatUnknown", Const, 10}, + {"Header", Type, 0}, + {"Header.AccessTime", Field, 0}, + {"Header.ChangeTime", Field, 0}, + {"Header.Devmajor", Field, 0}, + {"Header.Devminor", Field, 0}, + {"Header.Format", Field, 10}, + {"Header.Gid", Field, 0}, + {"Header.Gname", Field, 0}, + {"Header.Linkname", Field, 0}, + {"Header.ModTime", Field, 0}, + {"Header.Mode", Field, 0}, + {"Header.Name", Field, 0}, + {"Header.PAXRecords", Field, 10}, + {"Header.Size", Field, 0}, + {"Header.Typeflag", Field, 0}, + {"Header.Uid", Field, 0}, + {"Header.Uname", Field, 0}, + {"Header.Xattrs", Field, 3}, + {"NewReader", Func, 0}, + {"NewWriter", Func, 0}, + {"Reader", Type, 0}, + {"TypeBlock", Const, 0}, + {"TypeChar", Const, 0}, + {"TypeCont", Const, 0}, + {"TypeDir", Const, 0}, + {"TypeFifo", Const, 0}, + {"TypeGNULongLink", Const, 1}, + {"TypeGNULongName", Const, 1}, + {"TypeGNUSparse", Const, 3}, + {"TypeLink", Const, 0}, + {"TypeReg", Const, 0}, + {"TypeRegA", Const, 0}, + {"TypeSymlink", Const, 0}, + {"TypeXGlobalHeader", Const, 0}, + {"TypeXHeader", Const, 0}, + {"Writer", Type, 0}, + }, + "archive/zip": { + {"(*File).DataOffset", Method, 2}, + {"(*File).FileInfo", Method, 0}, + {"(*File).ModTime", Method, 0}, + {"(*File).Mode", Method, 0}, + {"(*File).Open", Method, 0}, + {"(*File).OpenRaw", Method, 17}, + {"(*File).SetModTime", Method, 0}, + {"(*File).SetMode", Method, 0}, + {"(*FileHeader).FileInfo", Method, 0}, + {"(*FileHeader).ModTime", Method, 0}, + {"(*FileHeader).Mode", Method, 0}, + {"(*FileHeader).SetModTime", Method, 0}, + {"(*FileHeader).SetMode", Method, 0}, + {"(*ReadCloser).Close", Method, 0}, + {"(*ReadCloser).Open", Method, 16}, + {"(*ReadCloser).RegisterDecompressor", Method, 6}, + {"(*Reader).Open", Method, 16}, + {"(*Reader).RegisterDecompressor", Method, 6}, + {"(*Writer).AddFS", Method, 22}, + {"(*Writer).Close", Method, 0}, + {"(*Writer).Copy", Method, 17}, + {"(*Writer).Create", Method, 0}, + {"(*Writer).CreateHeader", Method, 0}, + {"(*Writer).CreateRaw", Method, 17}, + {"(*Writer).Flush", Method, 4}, + {"(*Writer).RegisterCompressor", Method, 6}, + {"(*Writer).SetComment", Method, 10}, + {"(*Writer).SetOffset", Method, 5}, + {"Compressor", Type, 2}, + {"Decompressor", Type, 2}, + {"Deflate", Const, 0}, + {"ErrAlgorithm", Var, 0}, + {"ErrChecksum", Var, 0}, + {"ErrFormat", Var, 0}, + {"ErrInsecurePath", Var, 20}, + {"File", Type, 0}, + {"File.FileHeader", Field, 0}, + {"FileHeader", Type, 0}, + {"FileHeader.CRC32", Field, 0}, + {"FileHeader.Comment", Field, 0}, + {"FileHeader.CompressedSize", Field, 0}, + {"FileHeader.CompressedSize64", Field, 1}, + {"FileHeader.CreatorVersion", Field, 0}, + {"FileHeader.ExternalAttrs", Field, 0}, + {"FileHeader.Extra", Field, 0}, + {"FileHeader.Flags", Field, 0}, + {"FileHeader.Method", Field, 0}, + {"FileHeader.Modified", Field, 10}, + {"FileHeader.ModifiedDate", Field, 0}, + {"FileHeader.ModifiedTime", Field, 0}, + {"FileHeader.Name", Field, 0}, + {"FileHeader.NonUTF8", Field, 10}, + {"FileHeader.ReaderVersion", Field, 0}, + {"FileHeader.UncompressedSize", Field, 0}, + {"FileHeader.UncompressedSize64", Field, 1}, + {"FileInfoHeader", Func, 0}, + {"NewReader", Func, 0}, + {"NewWriter", Func, 0}, + {"OpenReader", Func, 0}, + {"ReadCloser", Type, 0}, + {"ReadCloser.Reader", Field, 0}, + {"Reader", Type, 0}, + {"Reader.Comment", Field, 0}, + {"Reader.File", Field, 0}, + {"RegisterCompressor", Func, 2}, + {"RegisterDecompressor", Func, 2}, + {"Store", Const, 0}, + {"Writer", Type, 0}, + }, + "bufio": { + {"(*Reader).Buffered", Method, 0}, + {"(*Reader).Discard", Method, 5}, + {"(*Reader).Peek", Method, 0}, + {"(*Reader).Read", Method, 0}, + {"(*Reader).ReadByte", Method, 0}, + {"(*Reader).ReadBytes", Method, 0}, + {"(*Reader).ReadLine", Method, 0}, + {"(*Reader).ReadRune", Method, 0}, + {"(*Reader).ReadSlice", Method, 0}, + {"(*Reader).ReadString", Method, 0}, + {"(*Reader).Reset", Method, 2}, + {"(*Reader).Size", Method, 10}, + {"(*Reader).UnreadByte", Method, 0}, + {"(*Reader).UnreadRune", Method, 0}, + {"(*Reader).WriteTo", Method, 1}, + {"(*Scanner).Buffer", Method, 6}, + {"(*Scanner).Bytes", Method, 1}, + {"(*Scanner).Err", Method, 1}, + {"(*Scanner).Scan", Method, 1}, + {"(*Scanner).Split", Method, 1}, + {"(*Scanner).Text", Method, 1}, + {"(*Writer).Available", Method, 0}, + {"(*Writer).AvailableBuffer", Method, 18}, + {"(*Writer).Buffered", Method, 0}, + {"(*Writer).Flush", Method, 0}, + {"(*Writer).ReadFrom", Method, 1}, + {"(*Writer).Reset", Method, 2}, + {"(*Writer).Size", Method, 10}, + {"(*Writer).Write", Method, 0}, + {"(*Writer).WriteByte", Method, 0}, + {"(*Writer).WriteRune", Method, 0}, + {"(*Writer).WriteString", Method, 0}, + {"(ReadWriter).Available", Method, 0}, + {"(ReadWriter).AvailableBuffer", Method, 18}, + {"(ReadWriter).Discard", Method, 5}, + {"(ReadWriter).Flush", Method, 0}, + {"(ReadWriter).Peek", Method, 0}, + {"(ReadWriter).Read", Method, 0}, + {"(ReadWriter).ReadByte", Method, 0}, + {"(ReadWriter).ReadBytes", Method, 0}, + {"(ReadWriter).ReadFrom", Method, 1}, + {"(ReadWriter).ReadLine", Method, 0}, + {"(ReadWriter).ReadRune", Method, 0}, + {"(ReadWriter).ReadSlice", Method, 0}, + {"(ReadWriter).ReadString", Method, 0}, + {"(ReadWriter).UnreadByte", Method, 0}, + {"(ReadWriter).UnreadRune", Method, 0}, + {"(ReadWriter).Write", Method, 0}, + {"(ReadWriter).WriteByte", Method, 0}, + {"(ReadWriter).WriteRune", Method, 0}, + {"(ReadWriter).WriteString", Method, 0}, + {"(ReadWriter).WriteTo", Method, 1}, + {"ErrAdvanceTooFar", Var, 1}, + {"ErrBadReadCount", Var, 15}, + {"ErrBufferFull", Var, 0}, + {"ErrFinalToken", Var, 6}, + {"ErrInvalidUnreadByte", Var, 0}, + {"ErrInvalidUnreadRune", Var, 0}, + {"ErrNegativeAdvance", Var, 1}, + {"ErrNegativeCount", Var, 0}, + {"ErrTooLong", Var, 1}, + {"MaxScanTokenSize", Const, 1}, + {"NewReadWriter", Func, 0}, + {"NewReader", Func, 0}, + {"NewReaderSize", Func, 0}, + {"NewScanner", Func, 1}, + {"NewWriter", Func, 0}, + {"NewWriterSize", Func, 0}, + {"ReadWriter", Type, 0}, + {"ReadWriter.Reader", Field, 0}, + {"ReadWriter.Writer", Field, 0}, + {"Reader", Type, 0}, + {"ScanBytes", Func, 1}, + {"ScanLines", Func, 1}, + {"ScanRunes", Func, 1}, + {"ScanWords", Func, 1}, + {"Scanner", Type, 1}, + {"SplitFunc", Type, 1}, + {"Writer", Type, 0}, + }, + "bytes": { + {"(*Buffer).Available", Method, 21}, + {"(*Buffer).AvailableBuffer", Method, 21}, + {"(*Buffer).Bytes", Method, 0}, + {"(*Buffer).Cap", Method, 5}, + {"(*Buffer).Grow", Method, 1}, + {"(*Buffer).Len", Method, 0}, + {"(*Buffer).Next", Method, 0}, + {"(*Buffer).Read", Method, 0}, + {"(*Buffer).ReadByte", Method, 0}, + {"(*Buffer).ReadBytes", Method, 0}, + {"(*Buffer).ReadFrom", Method, 0}, + {"(*Buffer).ReadRune", Method, 0}, + {"(*Buffer).ReadString", Method, 0}, + {"(*Buffer).Reset", Method, 0}, + {"(*Buffer).String", Method, 0}, + {"(*Buffer).Truncate", Method, 0}, + {"(*Buffer).UnreadByte", Method, 0}, + {"(*Buffer).UnreadRune", Method, 0}, + {"(*Buffer).Write", Method, 0}, + {"(*Buffer).WriteByte", Method, 0}, + {"(*Buffer).WriteRune", Method, 0}, + {"(*Buffer).WriteString", Method, 0}, + {"(*Buffer).WriteTo", Method, 0}, + {"(*Reader).Len", Method, 0}, + {"(*Reader).Read", Method, 0}, + {"(*Reader).ReadAt", Method, 0}, + {"(*Reader).ReadByte", Method, 0}, + {"(*Reader).ReadRune", Method, 0}, + {"(*Reader).Reset", Method, 7}, + {"(*Reader).Seek", Method, 0}, + {"(*Reader).Size", Method, 5}, + {"(*Reader).UnreadByte", Method, 0}, + {"(*Reader).UnreadRune", Method, 0}, + {"(*Reader).WriteTo", Method, 1}, + {"Buffer", Type, 0}, + {"Clone", Func, 20}, + {"Compare", Func, 0}, + {"Contains", Func, 0}, + {"ContainsAny", Func, 7}, + {"ContainsFunc", Func, 21}, + {"ContainsRune", Func, 7}, + {"Count", Func, 0}, + {"Cut", Func, 18}, + {"CutPrefix", Func, 20}, + {"CutSuffix", Func, 20}, + {"Equal", Func, 0}, + {"EqualFold", Func, 0}, + {"ErrTooLarge", Var, 0}, + {"Fields", Func, 0}, + {"FieldsFunc", Func, 0}, + {"HasPrefix", Func, 0}, + {"HasSuffix", Func, 0}, + {"Index", Func, 0}, + {"IndexAny", Func, 0}, + {"IndexByte", Func, 0}, + {"IndexFunc", Func, 0}, + {"IndexRune", Func, 0}, + {"Join", Func, 0}, + {"LastIndex", Func, 0}, + {"LastIndexAny", Func, 0}, + {"LastIndexByte", Func, 5}, + {"LastIndexFunc", Func, 0}, + {"Map", Func, 0}, + {"MinRead", Const, 0}, + {"NewBuffer", Func, 0}, + {"NewBufferString", Func, 0}, + {"NewReader", Func, 0}, + {"Reader", Type, 0}, + {"Repeat", Func, 0}, + {"Replace", Func, 0}, + {"ReplaceAll", Func, 12}, + {"Runes", Func, 0}, + {"Split", Func, 0}, + {"SplitAfter", Func, 0}, + {"SplitAfterN", Func, 0}, + {"SplitN", Func, 0}, + {"Title", Func, 0}, + {"ToLower", Func, 0}, + {"ToLowerSpecial", Func, 0}, + {"ToTitle", Func, 0}, + {"ToTitleSpecial", Func, 0}, + {"ToUpper", Func, 0}, + {"ToUpperSpecial", Func, 0}, + {"ToValidUTF8", Func, 13}, + {"Trim", Func, 0}, + {"TrimFunc", Func, 0}, + {"TrimLeft", Func, 0}, + {"TrimLeftFunc", Func, 0}, + {"TrimPrefix", Func, 1}, + {"TrimRight", Func, 0}, + {"TrimRightFunc", Func, 0}, + {"TrimSpace", Func, 0}, + {"TrimSuffix", Func, 1}, + }, + "cmp": { + {"Compare", Func, 21}, + {"Less", Func, 21}, + {"Or", Func, 22}, + {"Ordered", Type, 21}, + }, + "compress/bzip2": { + {"(StructuralError).Error", Method, 0}, + {"NewReader", Func, 0}, + {"StructuralError", Type, 0}, + }, + "compress/flate": { + {"(*ReadError).Error", Method, 0}, + {"(*WriteError).Error", Method, 0}, + {"(*Writer).Close", Method, 0}, + {"(*Writer).Flush", Method, 0}, + {"(*Writer).Reset", Method, 2}, + {"(*Writer).Write", Method, 0}, + {"(CorruptInputError).Error", Method, 0}, + {"(InternalError).Error", Method, 0}, + {"BestCompression", Const, 0}, + {"BestSpeed", Const, 0}, + {"CorruptInputError", Type, 0}, + {"DefaultCompression", Const, 0}, + {"HuffmanOnly", Const, 7}, + {"InternalError", Type, 0}, + {"NewReader", Func, 0}, + {"NewReaderDict", Func, 0}, + {"NewWriter", Func, 0}, + {"NewWriterDict", Func, 0}, + {"NoCompression", Const, 0}, + {"ReadError", Type, 0}, + {"ReadError.Err", Field, 0}, + {"ReadError.Offset", Field, 0}, + {"Reader", Type, 0}, + {"Resetter", Type, 4}, + {"WriteError", Type, 0}, + {"WriteError.Err", Field, 0}, + {"WriteError.Offset", Field, 0}, + {"Writer", Type, 0}, + }, + "compress/gzip": { + {"(*Reader).Close", Method, 0}, + {"(*Reader).Multistream", Method, 4}, + {"(*Reader).Read", Method, 0}, + {"(*Reader).Reset", Method, 3}, + {"(*Writer).Close", Method, 0}, + {"(*Writer).Flush", Method, 1}, + {"(*Writer).Reset", Method, 2}, + {"(*Writer).Write", Method, 0}, + {"BestCompression", Const, 0}, + {"BestSpeed", Const, 0}, + {"DefaultCompression", Const, 0}, + {"ErrChecksum", Var, 0}, + {"ErrHeader", Var, 0}, + {"Header", Type, 0}, + {"Header.Comment", Field, 0}, + {"Header.Extra", Field, 0}, + {"Header.ModTime", Field, 0}, + {"Header.Name", Field, 0}, + {"Header.OS", Field, 0}, + {"HuffmanOnly", Const, 8}, + {"NewReader", Func, 0}, + {"NewWriter", Func, 0}, + {"NewWriterLevel", Func, 0}, + {"NoCompression", Const, 0}, + {"Reader", Type, 0}, + {"Reader.Header", Field, 0}, + {"Writer", Type, 0}, + {"Writer.Header", Field, 0}, + }, + "compress/lzw": { + {"(*Reader).Close", Method, 17}, + {"(*Reader).Read", Method, 17}, + {"(*Reader).Reset", Method, 17}, + {"(*Writer).Close", Method, 17}, + {"(*Writer).Reset", Method, 17}, + {"(*Writer).Write", Method, 17}, + {"LSB", Const, 0}, + {"MSB", Const, 0}, + {"NewReader", Func, 0}, + {"NewWriter", Func, 0}, + {"Order", Type, 0}, + {"Reader", Type, 17}, + {"Writer", Type, 17}, + }, + "compress/zlib": { + {"(*Writer).Close", Method, 0}, + {"(*Writer).Flush", Method, 0}, + {"(*Writer).Reset", Method, 2}, + {"(*Writer).Write", Method, 0}, + {"BestCompression", Const, 0}, + {"BestSpeed", Const, 0}, + {"DefaultCompression", Const, 0}, + {"ErrChecksum", Var, 0}, + {"ErrDictionary", Var, 0}, + {"ErrHeader", Var, 0}, + {"HuffmanOnly", Const, 8}, + {"NewReader", Func, 0}, + {"NewReaderDict", Func, 0}, + {"NewWriter", Func, 0}, + {"NewWriterLevel", Func, 0}, + {"NewWriterLevelDict", Func, 0}, + {"NoCompression", Const, 0}, + {"Resetter", Type, 4}, + {"Writer", Type, 0}, + }, + "container/heap": { + {"Fix", Func, 2}, + {"Init", Func, 0}, + {"Interface", Type, 0}, + {"Pop", Func, 0}, + {"Push", Func, 0}, + {"Remove", Func, 0}, + }, + "container/list": { + {"(*Element).Next", Method, 0}, + {"(*Element).Prev", Method, 0}, + {"(*List).Back", Method, 0}, + {"(*List).Front", Method, 0}, + {"(*List).Init", Method, 0}, + {"(*List).InsertAfter", Method, 0}, + {"(*List).InsertBefore", Method, 0}, + {"(*List).Len", Method, 0}, + {"(*List).MoveAfter", Method, 2}, + {"(*List).MoveBefore", Method, 2}, + {"(*List).MoveToBack", Method, 0}, + {"(*List).MoveToFront", Method, 0}, + {"(*List).PushBack", Method, 0}, + {"(*List).PushBackList", Method, 0}, + {"(*List).PushFront", Method, 0}, + {"(*List).PushFrontList", Method, 0}, + {"(*List).Remove", Method, 0}, + {"Element", Type, 0}, + {"Element.Value", Field, 0}, + {"List", Type, 0}, + {"New", Func, 0}, + }, + "container/ring": { + {"(*Ring).Do", Method, 0}, + {"(*Ring).Len", Method, 0}, + {"(*Ring).Link", Method, 0}, + {"(*Ring).Move", Method, 0}, + {"(*Ring).Next", Method, 0}, + {"(*Ring).Prev", Method, 0}, + {"(*Ring).Unlink", Method, 0}, + {"New", Func, 0}, + {"Ring", Type, 0}, + {"Ring.Value", Field, 0}, + }, + "context": { + {"AfterFunc", Func, 21}, + {"Background", Func, 7}, + {"CancelCauseFunc", Type, 20}, + {"CancelFunc", Type, 7}, + {"Canceled", Var, 7}, + {"Cause", Func, 20}, + {"Context", Type, 7}, + {"DeadlineExceeded", Var, 7}, + {"TODO", Func, 7}, + {"WithCancel", Func, 7}, + {"WithCancelCause", Func, 20}, + {"WithDeadline", Func, 7}, + {"WithDeadlineCause", Func, 21}, + {"WithTimeout", Func, 7}, + {"WithTimeoutCause", Func, 21}, + {"WithValue", Func, 7}, + {"WithoutCancel", Func, 21}, + }, + "crypto": { + {"(Hash).Available", Method, 0}, + {"(Hash).HashFunc", Method, 4}, + {"(Hash).New", Method, 0}, + {"(Hash).Size", Method, 0}, + {"(Hash).String", Method, 15}, + {"BLAKE2b_256", Const, 9}, + {"BLAKE2b_384", Const, 9}, + {"BLAKE2b_512", Const, 9}, + {"BLAKE2s_256", Const, 9}, + {"Decrypter", Type, 5}, + {"DecrypterOpts", Type, 5}, + {"Hash", Type, 0}, + {"MD4", Const, 0}, + {"MD5", Const, 0}, + {"MD5SHA1", Const, 0}, + {"PrivateKey", Type, 0}, + {"PublicKey", Type, 2}, + {"RIPEMD160", Const, 0}, + {"RegisterHash", Func, 0}, + {"SHA1", Const, 0}, + {"SHA224", Const, 0}, + {"SHA256", Const, 0}, + {"SHA384", Const, 0}, + {"SHA3_224", Const, 4}, + {"SHA3_256", Const, 4}, + {"SHA3_384", Const, 4}, + {"SHA3_512", Const, 4}, + {"SHA512", Const, 0}, + {"SHA512_224", Const, 5}, + {"SHA512_256", Const, 5}, + {"Signer", Type, 4}, + {"SignerOpts", Type, 4}, + }, + "crypto/aes": { + {"(KeySizeError).Error", Method, 0}, + {"BlockSize", Const, 0}, + {"KeySizeError", Type, 0}, + {"NewCipher", Func, 0}, + }, + "crypto/cipher": { + {"(StreamReader).Read", Method, 0}, + {"(StreamWriter).Close", Method, 0}, + {"(StreamWriter).Write", Method, 0}, + {"AEAD", Type, 2}, + {"Block", Type, 0}, + {"BlockMode", Type, 0}, + {"NewCBCDecrypter", Func, 0}, + {"NewCBCEncrypter", Func, 0}, + {"NewCFBDecrypter", Func, 0}, + {"NewCFBEncrypter", Func, 0}, + {"NewCTR", Func, 0}, + {"NewGCM", Func, 2}, + {"NewGCMWithNonceSize", Func, 5}, + {"NewGCMWithTagSize", Func, 11}, + {"NewOFB", Func, 0}, + {"Stream", Type, 0}, + {"StreamReader", Type, 0}, + {"StreamReader.R", Field, 0}, + {"StreamReader.S", Field, 0}, + {"StreamWriter", Type, 0}, + {"StreamWriter.Err", Field, 0}, + {"StreamWriter.S", Field, 0}, + {"StreamWriter.W", Field, 0}, + }, + "crypto/des": { + {"(KeySizeError).Error", Method, 0}, + {"BlockSize", Const, 0}, + {"KeySizeError", Type, 0}, + {"NewCipher", Func, 0}, + {"NewTripleDESCipher", Func, 0}, + }, + "crypto/dsa": { + {"ErrInvalidPublicKey", Var, 0}, + {"GenerateKey", Func, 0}, + {"GenerateParameters", Func, 0}, + {"L1024N160", Const, 0}, + {"L2048N224", Const, 0}, + {"L2048N256", Const, 0}, + {"L3072N256", Const, 0}, + {"ParameterSizes", Type, 0}, + {"Parameters", Type, 0}, + {"Parameters.G", Field, 0}, + {"Parameters.P", Field, 0}, + {"Parameters.Q", Field, 0}, + {"PrivateKey", Type, 0}, + {"PrivateKey.PublicKey", Field, 0}, + {"PrivateKey.X", Field, 0}, + {"PublicKey", Type, 0}, + {"PublicKey.Parameters", Field, 0}, + {"PublicKey.Y", Field, 0}, + {"Sign", Func, 0}, + {"Verify", Func, 0}, + }, + "crypto/ecdh": { + {"(*PrivateKey).Bytes", Method, 20}, + {"(*PrivateKey).Curve", Method, 20}, + {"(*PrivateKey).ECDH", Method, 20}, + {"(*PrivateKey).Equal", Method, 20}, + {"(*PrivateKey).Public", Method, 20}, + {"(*PrivateKey).PublicKey", Method, 20}, + {"(*PublicKey).Bytes", Method, 20}, + {"(*PublicKey).Curve", Method, 20}, + {"(*PublicKey).Equal", Method, 20}, + {"Curve", Type, 20}, + {"P256", Func, 20}, + {"P384", Func, 20}, + {"P521", Func, 20}, + {"PrivateKey", Type, 20}, + {"PublicKey", Type, 20}, + {"X25519", Func, 20}, + }, + "crypto/ecdsa": { + {"(*PrivateKey).ECDH", Method, 20}, + {"(*PrivateKey).Equal", Method, 15}, + {"(*PrivateKey).Public", Method, 4}, + {"(*PrivateKey).Sign", Method, 4}, + {"(*PublicKey).ECDH", Method, 20}, + {"(*PublicKey).Equal", Method, 15}, + {"(PrivateKey).Add", Method, 0}, + {"(PrivateKey).Double", Method, 0}, + {"(PrivateKey).IsOnCurve", Method, 0}, + {"(PrivateKey).Params", Method, 0}, + {"(PrivateKey).ScalarBaseMult", Method, 0}, + {"(PrivateKey).ScalarMult", Method, 0}, + {"(PublicKey).Add", Method, 0}, + {"(PublicKey).Double", Method, 0}, + {"(PublicKey).IsOnCurve", Method, 0}, + {"(PublicKey).Params", Method, 0}, + {"(PublicKey).ScalarBaseMult", Method, 0}, + {"(PublicKey).ScalarMult", Method, 0}, + {"GenerateKey", Func, 0}, + {"PrivateKey", Type, 0}, + {"PrivateKey.D", Field, 0}, + {"PrivateKey.PublicKey", Field, 0}, + {"PublicKey", Type, 0}, + {"PublicKey.Curve", Field, 0}, + {"PublicKey.X", Field, 0}, + {"PublicKey.Y", Field, 0}, + {"Sign", Func, 0}, + {"SignASN1", Func, 15}, + {"Verify", Func, 0}, + {"VerifyASN1", Func, 15}, + }, + "crypto/ed25519": { + {"(*Options).HashFunc", Method, 20}, + {"(PrivateKey).Equal", Method, 15}, + {"(PrivateKey).Public", Method, 13}, + {"(PrivateKey).Seed", Method, 13}, + {"(PrivateKey).Sign", Method, 13}, + {"(PublicKey).Equal", Method, 15}, + {"GenerateKey", Func, 13}, + {"NewKeyFromSeed", Func, 13}, + {"Options", Type, 20}, + {"Options.Context", Field, 20}, + {"Options.Hash", Field, 20}, + {"PrivateKey", Type, 13}, + {"PrivateKeySize", Const, 13}, + {"PublicKey", Type, 13}, + {"PublicKeySize", Const, 13}, + {"SeedSize", Const, 13}, + {"Sign", Func, 13}, + {"SignatureSize", Const, 13}, + {"Verify", Func, 13}, + {"VerifyWithOptions", Func, 20}, + }, + "crypto/elliptic": { + {"(*CurveParams).Add", Method, 0}, + {"(*CurveParams).Double", Method, 0}, + {"(*CurveParams).IsOnCurve", Method, 0}, + {"(*CurveParams).Params", Method, 0}, + {"(*CurveParams).ScalarBaseMult", Method, 0}, + {"(*CurveParams).ScalarMult", Method, 0}, + {"Curve", Type, 0}, + {"CurveParams", Type, 0}, + {"CurveParams.B", Field, 0}, + {"CurveParams.BitSize", Field, 0}, + {"CurveParams.Gx", Field, 0}, + {"CurveParams.Gy", Field, 0}, + {"CurveParams.N", Field, 0}, + {"CurveParams.Name", Field, 5}, + {"CurveParams.P", Field, 0}, + {"GenerateKey", Func, 0}, + {"Marshal", Func, 0}, + {"MarshalCompressed", Func, 15}, + {"P224", Func, 0}, + {"P256", Func, 0}, + {"P384", Func, 0}, + {"P521", Func, 0}, + {"Unmarshal", Func, 0}, + {"UnmarshalCompressed", Func, 15}, + }, + "crypto/hmac": { + {"Equal", Func, 1}, + {"New", Func, 0}, + }, + "crypto/md5": { + {"BlockSize", Const, 0}, + {"New", Func, 0}, + {"Size", Const, 0}, + {"Sum", Func, 2}, + }, + "crypto/rand": { + {"Int", Func, 0}, + {"Prime", Func, 0}, + {"Read", Func, 0}, + {"Reader", Var, 0}, + }, + "crypto/rc4": { + {"(*Cipher).Reset", Method, 0}, + {"(*Cipher).XORKeyStream", Method, 0}, + {"(KeySizeError).Error", Method, 0}, + {"Cipher", Type, 0}, + {"KeySizeError", Type, 0}, + {"NewCipher", Func, 0}, + }, + "crypto/rsa": { + {"(*PSSOptions).HashFunc", Method, 4}, + {"(*PrivateKey).Decrypt", Method, 5}, + {"(*PrivateKey).Equal", Method, 15}, + {"(*PrivateKey).Precompute", Method, 0}, + {"(*PrivateKey).Public", Method, 4}, + {"(*PrivateKey).Sign", Method, 4}, + {"(*PrivateKey).Size", Method, 11}, + {"(*PrivateKey).Validate", Method, 0}, + {"(*PublicKey).Equal", Method, 15}, + {"(*PublicKey).Size", Method, 11}, + {"CRTValue", Type, 0}, + {"CRTValue.Coeff", Field, 0}, + {"CRTValue.Exp", Field, 0}, + {"CRTValue.R", Field, 0}, + {"DecryptOAEP", Func, 0}, + {"DecryptPKCS1v15", Func, 0}, + {"DecryptPKCS1v15SessionKey", Func, 0}, + {"EncryptOAEP", Func, 0}, + {"EncryptPKCS1v15", Func, 0}, + {"ErrDecryption", Var, 0}, + {"ErrMessageTooLong", Var, 0}, + {"ErrVerification", Var, 0}, + {"GenerateKey", Func, 0}, + {"GenerateMultiPrimeKey", Func, 0}, + {"OAEPOptions", Type, 5}, + {"OAEPOptions.Hash", Field, 5}, + {"OAEPOptions.Label", Field, 5}, + {"OAEPOptions.MGFHash", Field, 20}, + {"PKCS1v15DecryptOptions", Type, 5}, + {"PKCS1v15DecryptOptions.SessionKeyLen", Field, 5}, + {"PSSOptions", Type, 2}, + {"PSSOptions.Hash", Field, 4}, + {"PSSOptions.SaltLength", Field, 2}, + {"PSSSaltLengthAuto", Const, 2}, + {"PSSSaltLengthEqualsHash", Const, 2}, + {"PrecomputedValues", Type, 0}, + {"PrecomputedValues.CRTValues", Field, 0}, + {"PrecomputedValues.Dp", Field, 0}, + {"PrecomputedValues.Dq", Field, 0}, + {"PrecomputedValues.Qinv", Field, 0}, + {"PrivateKey", Type, 0}, + {"PrivateKey.D", Field, 0}, + {"PrivateKey.Precomputed", Field, 0}, + {"PrivateKey.Primes", Field, 0}, + {"PrivateKey.PublicKey", Field, 0}, + {"PublicKey", Type, 0}, + {"PublicKey.E", Field, 0}, + {"PublicKey.N", Field, 0}, + {"SignPKCS1v15", Func, 0}, + {"SignPSS", Func, 2}, + {"VerifyPKCS1v15", Func, 0}, + {"VerifyPSS", Func, 2}, + }, + "crypto/sha1": { + {"BlockSize", Const, 0}, + {"New", Func, 0}, + {"Size", Const, 0}, + {"Sum", Func, 2}, + }, + "crypto/sha256": { + {"BlockSize", Const, 0}, + {"New", Func, 0}, + {"New224", Func, 0}, + {"Size", Const, 0}, + {"Size224", Const, 0}, + {"Sum224", Func, 2}, + {"Sum256", Func, 2}, + }, + "crypto/sha512": { + {"BlockSize", Const, 0}, + {"New", Func, 0}, + {"New384", Func, 0}, + {"New512_224", Func, 5}, + {"New512_256", Func, 5}, + {"Size", Const, 0}, + {"Size224", Const, 5}, + {"Size256", Const, 5}, + {"Size384", Const, 0}, + {"Sum384", Func, 2}, + {"Sum512", Func, 2}, + {"Sum512_224", Func, 5}, + {"Sum512_256", Func, 5}, + }, + "crypto/subtle": { + {"ConstantTimeByteEq", Func, 0}, + {"ConstantTimeCompare", Func, 0}, + {"ConstantTimeCopy", Func, 0}, + {"ConstantTimeEq", Func, 0}, + {"ConstantTimeLessOrEq", Func, 2}, + {"ConstantTimeSelect", Func, 0}, + {"XORBytes", Func, 20}, + }, + "crypto/tls": { + {"(*CertificateRequestInfo).Context", Method, 17}, + {"(*CertificateRequestInfo).SupportsCertificate", Method, 14}, + {"(*CertificateVerificationError).Error", Method, 20}, + {"(*CertificateVerificationError).Unwrap", Method, 20}, + {"(*ClientHelloInfo).Context", Method, 17}, + {"(*ClientHelloInfo).SupportsCertificate", Method, 14}, + {"(*ClientSessionState).ResumptionState", Method, 21}, + {"(*Config).BuildNameToCertificate", Method, 0}, + {"(*Config).Clone", Method, 8}, + {"(*Config).DecryptTicket", Method, 21}, + {"(*Config).EncryptTicket", Method, 21}, + {"(*Config).SetSessionTicketKeys", Method, 5}, + {"(*Conn).Close", Method, 0}, + {"(*Conn).CloseWrite", Method, 8}, + {"(*Conn).ConnectionState", Method, 0}, + {"(*Conn).Handshake", Method, 0}, + {"(*Conn).HandshakeContext", Method, 17}, + {"(*Conn).LocalAddr", Method, 0}, + {"(*Conn).NetConn", Method, 18}, + {"(*Conn).OCSPResponse", Method, 0}, + {"(*Conn).Read", Method, 0}, + {"(*Conn).RemoteAddr", Method, 0}, + {"(*Conn).SetDeadline", Method, 0}, + {"(*Conn).SetReadDeadline", Method, 0}, + {"(*Conn).SetWriteDeadline", Method, 0}, + {"(*Conn).VerifyHostname", Method, 0}, + {"(*Conn).Write", Method, 0}, + {"(*ConnectionState).ExportKeyingMaterial", Method, 11}, + {"(*Dialer).Dial", Method, 15}, + {"(*Dialer).DialContext", Method, 15}, + {"(*ECHRejectionError).Error", Method, 23}, + {"(*QUICConn).Close", Method, 21}, + {"(*QUICConn).ConnectionState", Method, 21}, + {"(*QUICConn).HandleData", Method, 21}, + {"(*QUICConn).NextEvent", Method, 21}, + {"(*QUICConn).SendSessionTicket", Method, 21}, + {"(*QUICConn).SetTransportParameters", Method, 21}, + {"(*QUICConn).Start", Method, 21}, + {"(*QUICConn).StoreSession", Method, 23}, + {"(*SessionState).Bytes", Method, 21}, + {"(AlertError).Error", Method, 21}, + {"(ClientAuthType).String", Method, 15}, + {"(CurveID).String", Method, 15}, + {"(QUICEncryptionLevel).String", Method, 21}, + {"(RecordHeaderError).Error", Method, 6}, + {"(SignatureScheme).String", Method, 15}, + {"AlertError", Type, 21}, + {"Certificate", Type, 0}, + {"Certificate.Certificate", Field, 0}, + {"Certificate.Leaf", Field, 0}, + {"Certificate.OCSPStaple", Field, 0}, + {"Certificate.PrivateKey", Field, 0}, + {"Certificate.SignedCertificateTimestamps", Field, 5}, + {"Certificate.SupportedSignatureAlgorithms", Field, 14}, + {"CertificateRequestInfo", Type, 8}, + {"CertificateRequestInfo.AcceptableCAs", Field, 8}, + {"CertificateRequestInfo.SignatureSchemes", Field, 8}, + {"CertificateRequestInfo.Version", Field, 14}, + {"CertificateVerificationError", Type, 20}, + {"CertificateVerificationError.Err", Field, 20}, + {"CertificateVerificationError.UnverifiedCertificates", Field, 20}, + {"CipherSuite", Type, 14}, + {"CipherSuite.ID", Field, 14}, + {"CipherSuite.Insecure", Field, 14}, + {"CipherSuite.Name", Field, 14}, + {"CipherSuite.SupportedVersions", Field, 14}, + {"CipherSuiteName", Func, 14}, + {"CipherSuites", Func, 14}, + {"Client", Func, 0}, + {"ClientAuthType", Type, 0}, + {"ClientHelloInfo", Type, 4}, + {"ClientHelloInfo.CipherSuites", Field, 4}, + {"ClientHelloInfo.Conn", Field, 8}, + {"ClientHelloInfo.ServerName", Field, 4}, + {"ClientHelloInfo.SignatureSchemes", Field, 8}, + {"ClientHelloInfo.SupportedCurves", Field, 4}, + {"ClientHelloInfo.SupportedPoints", Field, 4}, + {"ClientHelloInfo.SupportedProtos", Field, 8}, + {"ClientHelloInfo.SupportedVersions", Field, 8}, + {"ClientSessionCache", Type, 3}, + {"ClientSessionState", Type, 3}, + {"Config", Type, 0}, + {"Config.Certificates", Field, 0}, + {"Config.CipherSuites", Field, 0}, + {"Config.ClientAuth", Field, 0}, + {"Config.ClientCAs", Field, 0}, + {"Config.ClientSessionCache", Field, 3}, + {"Config.CurvePreferences", Field, 3}, + {"Config.DynamicRecordSizingDisabled", Field, 7}, + {"Config.EncryptedClientHelloConfigList", Field, 23}, + {"Config.EncryptedClientHelloRejectionVerify", Field, 23}, + {"Config.GetCertificate", Field, 4}, + {"Config.GetClientCertificate", Field, 8}, + {"Config.GetConfigForClient", Field, 8}, + {"Config.InsecureSkipVerify", Field, 0}, + {"Config.KeyLogWriter", Field, 8}, + {"Config.MaxVersion", Field, 2}, + {"Config.MinVersion", Field, 2}, + {"Config.NameToCertificate", Field, 0}, + {"Config.NextProtos", Field, 0}, + {"Config.PreferServerCipherSuites", Field, 1}, + {"Config.Rand", Field, 0}, + {"Config.Renegotiation", Field, 7}, + {"Config.RootCAs", Field, 0}, + {"Config.ServerName", Field, 0}, + {"Config.SessionTicketKey", Field, 1}, + {"Config.SessionTicketsDisabled", Field, 1}, + {"Config.Time", Field, 0}, + {"Config.UnwrapSession", Field, 21}, + {"Config.VerifyConnection", Field, 15}, + {"Config.VerifyPeerCertificate", Field, 8}, + {"Config.WrapSession", Field, 21}, + {"Conn", Type, 0}, + {"ConnectionState", Type, 0}, + {"ConnectionState.CipherSuite", Field, 0}, + {"ConnectionState.DidResume", Field, 1}, + {"ConnectionState.ECHAccepted", Field, 23}, + {"ConnectionState.HandshakeComplete", Field, 0}, + {"ConnectionState.NegotiatedProtocol", Field, 0}, + {"ConnectionState.NegotiatedProtocolIsMutual", Field, 0}, + {"ConnectionState.OCSPResponse", Field, 5}, + {"ConnectionState.PeerCertificates", Field, 0}, + {"ConnectionState.ServerName", Field, 0}, + {"ConnectionState.SignedCertificateTimestamps", Field, 5}, + {"ConnectionState.TLSUnique", Field, 4}, + {"ConnectionState.VerifiedChains", Field, 0}, + {"ConnectionState.Version", Field, 3}, + {"CurveID", Type, 3}, + {"CurveP256", Const, 3}, + {"CurveP384", Const, 3}, + {"CurveP521", Const, 3}, + {"Dial", Func, 0}, + {"DialWithDialer", Func, 3}, + {"Dialer", Type, 15}, + {"Dialer.Config", Field, 15}, + {"Dialer.NetDialer", Field, 15}, + {"ECDSAWithP256AndSHA256", Const, 8}, + {"ECDSAWithP384AndSHA384", Const, 8}, + {"ECDSAWithP521AndSHA512", Const, 8}, + {"ECDSAWithSHA1", Const, 10}, + {"ECHRejectionError", Type, 23}, + {"ECHRejectionError.RetryConfigList", Field, 23}, + {"Ed25519", Const, 13}, + {"InsecureCipherSuites", Func, 14}, + {"Listen", Func, 0}, + {"LoadX509KeyPair", Func, 0}, + {"NewLRUClientSessionCache", Func, 3}, + {"NewListener", Func, 0}, + {"NewResumptionState", Func, 21}, + {"NoClientCert", Const, 0}, + {"PKCS1WithSHA1", Const, 8}, + {"PKCS1WithSHA256", Const, 8}, + {"PKCS1WithSHA384", Const, 8}, + {"PKCS1WithSHA512", Const, 8}, + {"PSSWithSHA256", Const, 8}, + {"PSSWithSHA384", Const, 8}, + {"PSSWithSHA512", Const, 8}, + {"ParseSessionState", Func, 21}, + {"QUICClient", Func, 21}, + {"QUICConfig", Type, 21}, + {"QUICConfig.EnableSessionEvents", Field, 23}, + {"QUICConfig.TLSConfig", Field, 21}, + {"QUICConn", Type, 21}, + {"QUICEncryptionLevel", Type, 21}, + {"QUICEncryptionLevelApplication", Const, 21}, + {"QUICEncryptionLevelEarly", Const, 21}, + {"QUICEncryptionLevelHandshake", Const, 21}, + {"QUICEncryptionLevelInitial", Const, 21}, + {"QUICEvent", Type, 21}, + {"QUICEvent.Data", Field, 21}, + {"QUICEvent.Kind", Field, 21}, + {"QUICEvent.Level", Field, 21}, + {"QUICEvent.SessionState", Field, 23}, + {"QUICEvent.Suite", Field, 21}, + {"QUICEventKind", Type, 21}, + {"QUICHandshakeDone", Const, 21}, + {"QUICNoEvent", Const, 21}, + {"QUICRejectedEarlyData", Const, 21}, + {"QUICResumeSession", Const, 23}, + {"QUICServer", Func, 21}, + {"QUICSessionTicketOptions", Type, 21}, + {"QUICSessionTicketOptions.EarlyData", Field, 21}, + {"QUICSessionTicketOptions.Extra", Field, 23}, + {"QUICSetReadSecret", Const, 21}, + {"QUICSetWriteSecret", Const, 21}, + {"QUICStoreSession", Const, 23}, + {"QUICTransportParameters", Const, 21}, + {"QUICTransportParametersRequired", Const, 21}, + {"QUICWriteData", Const, 21}, + {"RecordHeaderError", Type, 6}, + {"RecordHeaderError.Conn", Field, 12}, + {"RecordHeaderError.Msg", Field, 6}, + {"RecordHeaderError.RecordHeader", Field, 6}, + {"RenegotiateFreelyAsClient", Const, 7}, + {"RenegotiateNever", Const, 7}, + {"RenegotiateOnceAsClient", Const, 7}, + {"RenegotiationSupport", Type, 7}, + {"RequestClientCert", Const, 0}, + {"RequireAndVerifyClientCert", Const, 0}, + {"RequireAnyClientCert", Const, 0}, + {"Server", Func, 0}, + {"SessionState", Type, 21}, + {"SessionState.EarlyData", Field, 21}, + {"SessionState.Extra", Field, 21}, + {"SignatureScheme", Type, 8}, + {"TLS_AES_128_GCM_SHA256", Const, 12}, + {"TLS_AES_256_GCM_SHA384", Const, 12}, + {"TLS_CHACHA20_POLY1305_SHA256", Const, 12}, + {"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA", Const, 2}, + {"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256", Const, 8}, + {"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", Const, 2}, + {"TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", Const, 2}, + {"TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", Const, 5}, + {"TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", Const, 8}, + {"TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256", Const, 14}, + {"TLS_ECDHE_ECDSA_WITH_RC4_128_SHA", Const, 2}, + {"TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", Const, 0}, + {"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", Const, 0}, + {"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", Const, 8}, + {"TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", Const, 2}, + {"TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", Const, 1}, + {"TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", Const, 5}, + {"TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305", Const, 8}, + {"TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256", Const, 14}, + {"TLS_ECDHE_RSA_WITH_RC4_128_SHA", Const, 0}, + {"TLS_FALLBACK_SCSV", Const, 4}, + {"TLS_RSA_WITH_3DES_EDE_CBC_SHA", Const, 0}, + {"TLS_RSA_WITH_AES_128_CBC_SHA", Const, 0}, + {"TLS_RSA_WITH_AES_128_CBC_SHA256", Const, 8}, + {"TLS_RSA_WITH_AES_128_GCM_SHA256", Const, 6}, + {"TLS_RSA_WITH_AES_256_CBC_SHA", Const, 1}, + {"TLS_RSA_WITH_AES_256_GCM_SHA384", Const, 6}, + {"TLS_RSA_WITH_RC4_128_SHA", Const, 0}, + {"VerifyClientCertIfGiven", Const, 0}, + {"VersionName", Func, 21}, + {"VersionSSL30", Const, 2}, + {"VersionTLS10", Const, 2}, + {"VersionTLS11", Const, 2}, + {"VersionTLS12", Const, 2}, + {"VersionTLS13", Const, 12}, + {"X25519", Const, 8}, + {"X509KeyPair", Func, 0}, + }, + "crypto/x509": { + {"(*CertPool).AddCert", Method, 0}, + {"(*CertPool).AddCertWithConstraint", Method, 22}, + {"(*CertPool).AppendCertsFromPEM", Method, 0}, + {"(*CertPool).Clone", Method, 19}, + {"(*CertPool).Equal", Method, 19}, + {"(*CertPool).Subjects", Method, 0}, + {"(*Certificate).CheckCRLSignature", Method, 0}, + {"(*Certificate).CheckSignature", Method, 0}, + {"(*Certificate).CheckSignatureFrom", Method, 0}, + {"(*Certificate).CreateCRL", Method, 0}, + {"(*Certificate).Equal", Method, 0}, + {"(*Certificate).Verify", Method, 0}, + {"(*Certificate).VerifyHostname", Method, 0}, + {"(*CertificateRequest).CheckSignature", Method, 5}, + {"(*OID).UnmarshalBinary", Method, 23}, + {"(*OID).UnmarshalText", Method, 23}, + {"(*RevocationList).CheckSignatureFrom", Method, 19}, + {"(CertificateInvalidError).Error", Method, 0}, + {"(ConstraintViolationError).Error", Method, 0}, + {"(HostnameError).Error", Method, 0}, + {"(InsecureAlgorithmError).Error", Method, 6}, + {"(OID).Equal", Method, 22}, + {"(OID).EqualASN1OID", Method, 22}, + {"(OID).MarshalBinary", Method, 23}, + {"(OID).MarshalText", Method, 23}, + {"(OID).String", Method, 22}, + {"(PublicKeyAlgorithm).String", Method, 10}, + {"(SignatureAlgorithm).String", Method, 6}, + {"(SystemRootsError).Error", Method, 1}, + {"(SystemRootsError).Unwrap", Method, 16}, + {"(UnhandledCriticalExtension).Error", Method, 0}, + {"(UnknownAuthorityError).Error", Method, 0}, + {"CANotAuthorizedForExtKeyUsage", Const, 10}, + {"CANotAuthorizedForThisName", Const, 0}, + {"CertPool", Type, 0}, + {"Certificate", Type, 0}, + {"Certificate.AuthorityKeyId", Field, 0}, + {"Certificate.BasicConstraintsValid", Field, 0}, + {"Certificate.CRLDistributionPoints", Field, 2}, + {"Certificate.DNSNames", Field, 0}, + {"Certificate.EmailAddresses", Field, 0}, + {"Certificate.ExcludedDNSDomains", Field, 9}, + {"Certificate.ExcludedEmailAddresses", Field, 10}, + {"Certificate.ExcludedIPRanges", Field, 10}, + {"Certificate.ExcludedURIDomains", Field, 10}, + {"Certificate.ExtKeyUsage", Field, 0}, + {"Certificate.Extensions", Field, 2}, + {"Certificate.ExtraExtensions", Field, 2}, + {"Certificate.IPAddresses", Field, 1}, + {"Certificate.IsCA", Field, 0}, + {"Certificate.Issuer", Field, 0}, + {"Certificate.IssuingCertificateURL", Field, 2}, + {"Certificate.KeyUsage", Field, 0}, + {"Certificate.MaxPathLen", Field, 0}, + {"Certificate.MaxPathLenZero", Field, 4}, + {"Certificate.NotAfter", Field, 0}, + {"Certificate.NotBefore", Field, 0}, + {"Certificate.OCSPServer", Field, 2}, + {"Certificate.PermittedDNSDomains", Field, 0}, + {"Certificate.PermittedDNSDomainsCritical", Field, 0}, + {"Certificate.PermittedEmailAddresses", Field, 10}, + {"Certificate.PermittedIPRanges", Field, 10}, + {"Certificate.PermittedURIDomains", Field, 10}, + {"Certificate.Policies", Field, 22}, + {"Certificate.PolicyIdentifiers", Field, 0}, + {"Certificate.PublicKey", Field, 0}, + {"Certificate.PublicKeyAlgorithm", Field, 0}, + {"Certificate.Raw", Field, 0}, + {"Certificate.RawIssuer", Field, 0}, + {"Certificate.RawSubject", Field, 0}, + {"Certificate.RawSubjectPublicKeyInfo", Field, 0}, + {"Certificate.RawTBSCertificate", Field, 0}, + {"Certificate.SerialNumber", Field, 0}, + {"Certificate.Signature", Field, 0}, + {"Certificate.SignatureAlgorithm", Field, 0}, + {"Certificate.Subject", Field, 0}, + {"Certificate.SubjectKeyId", Field, 0}, + {"Certificate.URIs", Field, 10}, + {"Certificate.UnhandledCriticalExtensions", Field, 5}, + {"Certificate.UnknownExtKeyUsage", Field, 0}, + {"Certificate.Version", Field, 0}, + {"CertificateInvalidError", Type, 0}, + {"CertificateInvalidError.Cert", Field, 0}, + {"CertificateInvalidError.Detail", Field, 10}, + {"CertificateInvalidError.Reason", Field, 0}, + {"CertificateRequest", Type, 3}, + {"CertificateRequest.Attributes", Field, 3}, + {"CertificateRequest.DNSNames", Field, 3}, + {"CertificateRequest.EmailAddresses", Field, 3}, + {"CertificateRequest.Extensions", Field, 3}, + {"CertificateRequest.ExtraExtensions", Field, 3}, + {"CertificateRequest.IPAddresses", Field, 3}, + {"CertificateRequest.PublicKey", Field, 3}, + {"CertificateRequest.PublicKeyAlgorithm", Field, 3}, + {"CertificateRequest.Raw", Field, 3}, + {"CertificateRequest.RawSubject", Field, 3}, + {"CertificateRequest.RawSubjectPublicKeyInfo", Field, 3}, + {"CertificateRequest.RawTBSCertificateRequest", Field, 3}, + {"CertificateRequest.Signature", Field, 3}, + {"CertificateRequest.SignatureAlgorithm", Field, 3}, + {"CertificateRequest.Subject", Field, 3}, + {"CertificateRequest.URIs", Field, 10}, + {"CertificateRequest.Version", Field, 3}, + {"ConstraintViolationError", Type, 0}, + {"CreateCertificate", Func, 0}, + {"CreateCertificateRequest", Func, 3}, + {"CreateRevocationList", Func, 15}, + {"DSA", Const, 0}, + {"DSAWithSHA1", Const, 0}, + {"DSAWithSHA256", Const, 0}, + {"DecryptPEMBlock", Func, 1}, + {"ECDSA", Const, 1}, + {"ECDSAWithSHA1", Const, 1}, + {"ECDSAWithSHA256", Const, 1}, + {"ECDSAWithSHA384", Const, 1}, + {"ECDSAWithSHA512", Const, 1}, + {"Ed25519", Const, 13}, + {"EncryptPEMBlock", Func, 1}, + {"ErrUnsupportedAlgorithm", Var, 0}, + {"Expired", Const, 0}, + {"ExtKeyUsage", Type, 0}, + {"ExtKeyUsageAny", Const, 0}, + {"ExtKeyUsageClientAuth", Const, 0}, + {"ExtKeyUsageCodeSigning", Const, 0}, + {"ExtKeyUsageEmailProtection", Const, 0}, + {"ExtKeyUsageIPSECEndSystem", Const, 1}, + {"ExtKeyUsageIPSECTunnel", Const, 1}, + {"ExtKeyUsageIPSECUser", Const, 1}, + {"ExtKeyUsageMicrosoftCommercialCodeSigning", Const, 10}, + {"ExtKeyUsageMicrosoftKernelCodeSigning", Const, 10}, + {"ExtKeyUsageMicrosoftServerGatedCrypto", Const, 1}, + {"ExtKeyUsageNetscapeServerGatedCrypto", Const, 1}, + {"ExtKeyUsageOCSPSigning", Const, 0}, + {"ExtKeyUsageServerAuth", Const, 0}, + {"ExtKeyUsageTimeStamping", Const, 0}, + {"HostnameError", Type, 0}, + {"HostnameError.Certificate", Field, 0}, + {"HostnameError.Host", Field, 0}, + {"IncompatibleUsage", Const, 1}, + {"IncorrectPasswordError", Var, 1}, + {"InsecureAlgorithmError", Type, 6}, + {"InvalidReason", Type, 0}, + {"IsEncryptedPEMBlock", Func, 1}, + {"KeyUsage", Type, 0}, + {"KeyUsageCRLSign", Const, 0}, + {"KeyUsageCertSign", Const, 0}, + {"KeyUsageContentCommitment", Const, 0}, + {"KeyUsageDataEncipherment", Const, 0}, + {"KeyUsageDecipherOnly", Const, 0}, + {"KeyUsageDigitalSignature", Const, 0}, + {"KeyUsageEncipherOnly", Const, 0}, + {"KeyUsageKeyAgreement", Const, 0}, + {"KeyUsageKeyEncipherment", Const, 0}, + {"MD2WithRSA", Const, 0}, + {"MD5WithRSA", Const, 0}, + {"MarshalECPrivateKey", Func, 2}, + {"MarshalPKCS1PrivateKey", Func, 0}, + {"MarshalPKCS1PublicKey", Func, 10}, + {"MarshalPKCS8PrivateKey", Func, 10}, + {"MarshalPKIXPublicKey", Func, 0}, + {"NameConstraintsWithoutSANs", Const, 10}, + {"NameMismatch", Const, 8}, + {"NewCertPool", Func, 0}, + {"NotAuthorizedToSign", Const, 0}, + {"OID", Type, 22}, + {"OIDFromInts", Func, 22}, + {"PEMCipher", Type, 1}, + {"PEMCipher3DES", Const, 1}, + {"PEMCipherAES128", Const, 1}, + {"PEMCipherAES192", Const, 1}, + {"PEMCipherAES256", Const, 1}, + {"PEMCipherDES", Const, 1}, + {"ParseCRL", Func, 0}, + {"ParseCertificate", Func, 0}, + {"ParseCertificateRequest", Func, 3}, + {"ParseCertificates", Func, 0}, + {"ParseDERCRL", Func, 0}, + {"ParseECPrivateKey", Func, 1}, + {"ParseOID", Func, 23}, + {"ParsePKCS1PrivateKey", Func, 0}, + {"ParsePKCS1PublicKey", Func, 10}, + {"ParsePKCS8PrivateKey", Func, 0}, + {"ParsePKIXPublicKey", Func, 0}, + {"ParseRevocationList", Func, 19}, + {"PublicKeyAlgorithm", Type, 0}, + {"PureEd25519", Const, 13}, + {"RSA", Const, 0}, + {"RevocationList", Type, 15}, + {"RevocationList.AuthorityKeyId", Field, 19}, + {"RevocationList.Extensions", Field, 19}, + {"RevocationList.ExtraExtensions", Field, 15}, + {"RevocationList.Issuer", Field, 19}, + {"RevocationList.NextUpdate", Field, 15}, + {"RevocationList.Number", Field, 15}, + {"RevocationList.Raw", Field, 19}, + {"RevocationList.RawIssuer", Field, 19}, + {"RevocationList.RawTBSRevocationList", Field, 19}, + {"RevocationList.RevokedCertificateEntries", Field, 21}, + {"RevocationList.RevokedCertificates", Field, 15}, + {"RevocationList.Signature", Field, 19}, + {"RevocationList.SignatureAlgorithm", Field, 15}, + {"RevocationList.ThisUpdate", Field, 15}, + {"RevocationListEntry", Type, 21}, + {"RevocationListEntry.Extensions", Field, 21}, + {"RevocationListEntry.ExtraExtensions", Field, 21}, + {"RevocationListEntry.Raw", Field, 21}, + {"RevocationListEntry.ReasonCode", Field, 21}, + {"RevocationListEntry.RevocationTime", Field, 21}, + {"RevocationListEntry.SerialNumber", Field, 21}, + {"SHA1WithRSA", Const, 0}, + {"SHA256WithRSA", Const, 0}, + {"SHA256WithRSAPSS", Const, 8}, + {"SHA384WithRSA", Const, 0}, + {"SHA384WithRSAPSS", Const, 8}, + {"SHA512WithRSA", Const, 0}, + {"SHA512WithRSAPSS", Const, 8}, + {"SetFallbackRoots", Func, 20}, + {"SignatureAlgorithm", Type, 0}, + {"SystemCertPool", Func, 7}, + {"SystemRootsError", Type, 1}, + {"SystemRootsError.Err", Field, 7}, + {"TooManyConstraints", Const, 10}, + {"TooManyIntermediates", Const, 0}, + {"UnconstrainedName", Const, 10}, + {"UnhandledCriticalExtension", Type, 0}, + {"UnknownAuthorityError", Type, 0}, + {"UnknownAuthorityError.Cert", Field, 8}, + {"UnknownPublicKeyAlgorithm", Const, 0}, + {"UnknownSignatureAlgorithm", Const, 0}, + {"VerifyOptions", Type, 0}, + {"VerifyOptions.CurrentTime", Field, 0}, + {"VerifyOptions.DNSName", Field, 0}, + {"VerifyOptions.Intermediates", Field, 0}, + {"VerifyOptions.KeyUsages", Field, 1}, + {"VerifyOptions.MaxConstraintComparisions", Field, 10}, + {"VerifyOptions.Roots", Field, 0}, + }, + "crypto/x509/pkix": { + {"(*CertificateList).HasExpired", Method, 0}, + {"(*Name).FillFromRDNSequence", Method, 0}, + {"(Name).String", Method, 10}, + {"(Name).ToRDNSequence", Method, 0}, + {"(RDNSequence).String", Method, 10}, + {"AlgorithmIdentifier", Type, 0}, + {"AlgorithmIdentifier.Algorithm", Field, 0}, + {"AlgorithmIdentifier.Parameters", Field, 0}, + {"AttributeTypeAndValue", Type, 0}, + {"AttributeTypeAndValue.Type", Field, 0}, + {"AttributeTypeAndValue.Value", Field, 0}, + {"AttributeTypeAndValueSET", Type, 3}, + {"AttributeTypeAndValueSET.Type", Field, 3}, + {"AttributeTypeAndValueSET.Value", Field, 3}, + {"CertificateList", Type, 0}, + {"CertificateList.SignatureAlgorithm", Field, 0}, + {"CertificateList.SignatureValue", Field, 0}, + {"CertificateList.TBSCertList", Field, 0}, + {"Extension", Type, 0}, + {"Extension.Critical", Field, 0}, + {"Extension.Id", Field, 0}, + {"Extension.Value", Field, 0}, + {"Name", Type, 0}, + {"Name.CommonName", Field, 0}, + {"Name.Country", Field, 0}, + {"Name.ExtraNames", Field, 5}, + {"Name.Locality", Field, 0}, + {"Name.Names", Field, 0}, + {"Name.Organization", Field, 0}, + {"Name.OrganizationalUnit", Field, 0}, + {"Name.PostalCode", Field, 0}, + {"Name.Province", Field, 0}, + {"Name.SerialNumber", Field, 0}, + {"Name.StreetAddress", Field, 0}, + {"RDNSequence", Type, 0}, + {"RelativeDistinguishedNameSET", Type, 0}, + {"RevokedCertificate", Type, 0}, + {"RevokedCertificate.Extensions", Field, 0}, + {"RevokedCertificate.RevocationTime", Field, 0}, + {"RevokedCertificate.SerialNumber", Field, 0}, + {"TBSCertificateList", Type, 0}, + {"TBSCertificateList.Extensions", Field, 0}, + {"TBSCertificateList.Issuer", Field, 0}, + {"TBSCertificateList.NextUpdate", Field, 0}, + {"TBSCertificateList.Raw", Field, 0}, + {"TBSCertificateList.RevokedCertificates", Field, 0}, + {"TBSCertificateList.Signature", Field, 0}, + {"TBSCertificateList.ThisUpdate", Field, 0}, + {"TBSCertificateList.Version", Field, 0}, + }, + "database/sql": { + {"(*ColumnType).DatabaseTypeName", Method, 8}, + {"(*ColumnType).DecimalSize", Method, 8}, + {"(*ColumnType).Length", Method, 8}, + {"(*ColumnType).Name", Method, 8}, + {"(*ColumnType).Nullable", Method, 8}, + {"(*ColumnType).ScanType", Method, 8}, + {"(*Conn).BeginTx", Method, 9}, + {"(*Conn).Close", Method, 9}, + {"(*Conn).ExecContext", Method, 9}, + {"(*Conn).PingContext", Method, 9}, + {"(*Conn).PrepareContext", Method, 9}, + {"(*Conn).QueryContext", Method, 9}, + {"(*Conn).QueryRowContext", Method, 9}, + {"(*Conn).Raw", Method, 13}, + {"(*DB).Begin", Method, 0}, + {"(*DB).BeginTx", Method, 8}, + {"(*DB).Close", Method, 0}, + {"(*DB).Conn", Method, 9}, + {"(*DB).Driver", Method, 0}, + {"(*DB).Exec", Method, 0}, + {"(*DB).ExecContext", Method, 8}, + {"(*DB).Ping", Method, 1}, + {"(*DB).PingContext", Method, 8}, + {"(*DB).Prepare", Method, 0}, + {"(*DB).PrepareContext", Method, 8}, + {"(*DB).Query", Method, 0}, + {"(*DB).QueryContext", Method, 8}, + {"(*DB).QueryRow", Method, 0}, + {"(*DB).QueryRowContext", Method, 8}, + {"(*DB).SetConnMaxIdleTime", Method, 15}, + {"(*DB).SetConnMaxLifetime", Method, 6}, + {"(*DB).SetMaxIdleConns", Method, 1}, + {"(*DB).SetMaxOpenConns", Method, 2}, + {"(*DB).Stats", Method, 5}, + {"(*Null).Scan", Method, 22}, + {"(*NullBool).Scan", Method, 0}, + {"(*NullByte).Scan", Method, 17}, + {"(*NullFloat64).Scan", Method, 0}, + {"(*NullInt16).Scan", Method, 17}, + {"(*NullInt32).Scan", Method, 13}, + {"(*NullInt64).Scan", Method, 0}, + {"(*NullString).Scan", Method, 0}, + {"(*NullTime).Scan", Method, 13}, + {"(*Row).Err", Method, 15}, + {"(*Row).Scan", Method, 0}, + {"(*Rows).Close", Method, 0}, + {"(*Rows).ColumnTypes", Method, 8}, + {"(*Rows).Columns", Method, 0}, + {"(*Rows).Err", Method, 0}, + {"(*Rows).Next", Method, 0}, + {"(*Rows).NextResultSet", Method, 8}, + {"(*Rows).Scan", Method, 0}, + {"(*Stmt).Close", Method, 0}, + {"(*Stmt).Exec", Method, 0}, + {"(*Stmt).ExecContext", Method, 8}, + {"(*Stmt).Query", Method, 0}, + {"(*Stmt).QueryContext", Method, 8}, + {"(*Stmt).QueryRow", Method, 0}, + {"(*Stmt).QueryRowContext", Method, 8}, + {"(*Tx).Commit", Method, 0}, + {"(*Tx).Exec", Method, 0}, + {"(*Tx).ExecContext", Method, 8}, + {"(*Tx).Prepare", Method, 0}, + {"(*Tx).PrepareContext", Method, 8}, + {"(*Tx).Query", Method, 0}, + {"(*Tx).QueryContext", Method, 8}, + {"(*Tx).QueryRow", Method, 0}, + {"(*Tx).QueryRowContext", Method, 8}, + {"(*Tx).Rollback", Method, 0}, + {"(*Tx).Stmt", Method, 0}, + {"(*Tx).StmtContext", Method, 8}, + {"(IsolationLevel).String", Method, 11}, + {"(Null).Value", Method, 22}, + {"(NullBool).Value", Method, 0}, + {"(NullByte).Value", Method, 17}, + {"(NullFloat64).Value", Method, 0}, + {"(NullInt16).Value", Method, 17}, + {"(NullInt32).Value", Method, 13}, + {"(NullInt64).Value", Method, 0}, + {"(NullString).Value", Method, 0}, + {"(NullTime).Value", Method, 13}, + {"ColumnType", Type, 8}, + {"Conn", Type, 9}, + {"DB", Type, 0}, + {"DBStats", Type, 5}, + {"DBStats.Idle", Field, 11}, + {"DBStats.InUse", Field, 11}, + {"DBStats.MaxIdleClosed", Field, 11}, + {"DBStats.MaxIdleTimeClosed", Field, 15}, + {"DBStats.MaxLifetimeClosed", Field, 11}, + {"DBStats.MaxOpenConnections", Field, 11}, + {"DBStats.OpenConnections", Field, 5}, + {"DBStats.WaitCount", Field, 11}, + {"DBStats.WaitDuration", Field, 11}, + {"Drivers", Func, 4}, + {"ErrConnDone", Var, 9}, + {"ErrNoRows", Var, 0}, + {"ErrTxDone", Var, 0}, + {"IsolationLevel", Type, 8}, + {"LevelDefault", Const, 8}, + {"LevelLinearizable", Const, 8}, + {"LevelReadCommitted", Const, 8}, + {"LevelReadUncommitted", Const, 8}, + {"LevelRepeatableRead", Const, 8}, + {"LevelSerializable", Const, 8}, + {"LevelSnapshot", Const, 8}, + {"LevelWriteCommitted", Const, 8}, + {"Named", Func, 8}, + {"NamedArg", Type, 8}, + {"NamedArg.Name", Field, 8}, + {"NamedArg.Value", Field, 8}, + {"Null", Type, 22}, + {"Null.V", Field, 22}, + {"Null.Valid", Field, 22}, + {"NullBool", Type, 0}, + {"NullBool.Bool", Field, 0}, + {"NullBool.Valid", Field, 0}, + {"NullByte", Type, 17}, + {"NullByte.Byte", Field, 17}, + {"NullByte.Valid", Field, 17}, + {"NullFloat64", Type, 0}, + {"NullFloat64.Float64", Field, 0}, + {"NullFloat64.Valid", Field, 0}, + {"NullInt16", Type, 17}, + {"NullInt16.Int16", Field, 17}, + {"NullInt16.Valid", Field, 17}, + {"NullInt32", Type, 13}, + {"NullInt32.Int32", Field, 13}, + {"NullInt32.Valid", Field, 13}, + {"NullInt64", Type, 0}, + {"NullInt64.Int64", Field, 0}, + {"NullInt64.Valid", Field, 0}, + {"NullString", Type, 0}, + {"NullString.String", Field, 0}, + {"NullString.Valid", Field, 0}, + {"NullTime", Type, 13}, + {"NullTime.Time", Field, 13}, + {"NullTime.Valid", Field, 13}, + {"Open", Func, 0}, + {"OpenDB", Func, 10}, + {"Out", Type, 9}, + {"Out.Dest", Field, 9}, + {"Out.In", Field, 9}, + {"RawBytes", Type, 0}, + {"Register", Func, 0}, + {"Result", Type, 0}, + {"Row", Type, 0}, + {"Rows", Type, 0}, + {"Scanner", Type, 0}, + {"Stmt", Type, 0}, + {"Tx", Type, 0}, + {"TxOptions", Type, 8}, + {"TxOptions.Isolation", Field, 8}, + {"TxOptions.ReadOnly", Field, 8}, + }, + "database/sql/driver": { + {"(NotNull).ConvertValue", Method, 0}, + {"(Null).ConvertValue", Method, 0}, + {"(RowsAffected).LastInsertId", Method, 0}, + {"(RowsAffected).RowsAffected", Method, 0}, + {"Bool", Var, 0}, + {"ColumnConverter", Type, 0}, + {"Conn", Type, 0}, + {"ConnBeginTx", Type, 8}, + {"ConnPrepareContext", Type, 8}, + {"Connector", Type, 10}, + {"DefaultParameterConverter", Var, 0}, + {"Driver", Type, 0}, + {"DriverContext", Type, 10}, + {"ErrBadConn", Var, 0}, + {"ErrRemoveArgument", Var, 9}, + {"ErrSkip", Var, 0}, + {"Execer", Type, 0}, + {"ExecerContext", Type, 8}, + {"Int32", Var, 0}, + {"IsScanValue", Func, 0}, + {"IsValue", Func, 0}, + {"IsolationLevel", Type, 8}, + {"NamedValue", Type, 8}, + {"NamedValue.Name", Field, 8}, + {"NamedValue.Ordinal", Field, 8}, + {"NamedValue.Value", Field, 8}, + {"NamedValueChecker", Type, 9}, + {"NotNull", Type, 0}, + {"NotNull.Converter", Field, 0}, + {"Null", Type, 0}, + {"Null.Converter", Field, 0}, + {"Pinger", Type, 8}, + {"Queryer", Type, 1}, + {"QueryerContext", Type, 8}, + {"Result", Type, 0}, + {"ResultNoRows", Var, 0}, + {"Rows", Type, 0}, + {"RowsAffected", Type, 0}, + {"RowsColumnTypeDatabaseTypeName", Type, 8}, + {"RowsColumnTypeLength", Type, 8}, + {"RowsColumnTypeNullable", Type, 8}, + {"RowsColumnTypePrecisionScale", Type, 8}, + {"RowsColumnTypeScanType", Type, 8}, + {"RowsNextResultSet", Type, 8}, + {"SessionResetter", Type, 10}, + {"Stmt", Type, 0}, + {"StmtExecContext", Type, 8}, + {"StmtQueryContext", Type, 8}, + {"String", Var, 0}, + {"Tx", Type, 0}, + {"TxOptions", Type, 8}, + {"TxOptions.Isolation", Field, 8}, + {"TxOptions.ReadOnly", Field, 8}, + {"Validator", Type, 15}, + {"Value", Type, 0}, + {"ValueConverter", Type, 0}, + {"Valuer", Type, 0}, + }, + "debug/buildinfo": { + {"BuildInfo", Type, 18}, + {"Read", Func, 18}, + {"ReadFile", Func, 18}, + }, + "debug/dwarf": { + {"(*AddrType).Basic", Method, 0}, + {"(*AddrType).Common", Method, 0}, + {"(*AddrType).Size", Method, 0}, + {"(*AddrType).String", Method, 0}, + {"(*ArrayType).Common", Method, 0}, + {"(*ArrayType).Size", Method, 0}, + {"(*ArrayType).String", Method, 0}, + {"(*BasicType).Basic", Method, 0}, + {"(*BasicType).Common", Method, 0}, + {"(*BasicType).Size", Method, 0}, + {"(*BasicType).String", Method, 0}, + {"(*BoolType).Basic", Method, 0}, + {"(*BoolType).Common", Method, 0}, + {"(*BoolType).Size", Method, 0}, + {"(*BoolType).String", Method, 0}, + {"(*CharType).Basic", Method, 0}, + {"(*CharType).Common", Method, 0}, + {"(*CharType).Size", Method, 0}, + {"(*CharType).String", Method, 0}, + {"(*CommonType).Common", Method, 0}, + {"(*CommonType).Size", Method, 0}, + {"(*ComplexType).Basic", Method, 0}, + {"(*ComplexType).Common", Method, 0}, + {"(*ComplexType).Size", Method, 0}, + {"(*ComplexType).String", Method, 0}, + {"(*Data).AddSection", Method, 14}, + {"(*Data).AddTypes", Method, 3}, + {"(*Data).LineReader", Method, 5}, + {"(*Data).Ranges", Method, 7}, + {"(*Data).Reader", Method, 0}, + {"(*Data).Type", Method, 0}, + {"(*DotDotDotType).Common", Method, 0}, + {"(*DotDotDotType).Size", Method, 0}, + {"(*DotDotDotType).String", Method, 0}, + {"(*Entry).AttrField", Method, 5}, + {"(*Entry).Val", Method, 0}, + {"(*EnumType).Common", Method, 0}, + {"(*EnumType).Size", Method, 0}, + {"(*EnumType).String", Method, 0}, + {"(*FloatType).Basic", Method, 0}, + {"(*FloatType).Common", Method, 0}, + {"(*FloatType).Size", Method, 0}, + {"(*FloatType).String", Method, 0}, + {"(*FuncType).Common", Method, 0}, + {"(*FuncType).Size", Method, 0}, + {"(*FuncType).String", Method, 0}, + {"(*IntType).Basic", Method, 0}, + {"(*IntType).Common", Method, 0}, + {"(*IntType).Size", Method, 0}, + {"(*IntType).String", Method, 0}, + {"(*LineReader).Files", Method, 14}, + {"(*LineReader).Next", Method, 5}, + {"(*LineReader).Reset", Method, 5}, + {"(*LineReader).Seek", Method, 5}, + {"(*LineReader).SeekPC", Method, 5}, + {"(*LineReader).Tell", Method, 5}, + {"(*PtrType).Common", Method, 0}, + {"(*PtrType).Size", Method, 0}, + {"(*PtrType).String", Method, 0}, + {"(*QualType).Common", Method, 0}, + {"(*QualType).Size", Method, 0}, + {"(*QualType).String", Method, 0}, + {"(*Reader).AddressSize", Method, 5}, + {"(*Reader).ByteOrder", Method, 14}, + {"(*Reader).Next", Method, 0}, + {"(*Reader).Seek", Method, 0}, + {"(*Reader).SeekPC", Method, 7}, + {"(*Reader).SkipChildren", Method, 0}, + {"(*StructType).Common", Method, 0}, + {"(*StructType).Defn", Method, 0}, + {"(*StructType).Size", Method, 0}, + {"(*StructType).String", Method, 0}, + {"(*TypedefType).Common", Method, 0}, + {"(*TypedefType).Size", Method, 0}, + {"(*TypedefType).String", Method, 0}, + {"(*UcharType).Basic", Method, 0}, + {"(*UcharType).Common", Method, 0}, + {"(*UcharType).Size", Method, 0}, + {"(*UcharType).String", Method, 0}, + {"(*UintType).Basic", Method, 0}, + {"(*UintType).Common", Method, 0}, + {"(*UintType).Size", Method, 0}, + {"(*UintType).String", Method, 0}, + {"(*UnspecifiedType).Basic", Method, 4}, + {"(*UnspecifiedType).Common", Method, 4}, + {"(*UnspecifiedType).Size", Method, 4}, + {"(*UnspecifiedType).String", Method, 4}, + {"(*UnsupportedType).Common", Method, 13}, + {"(*UnsupportedType).Size", Method, 13}, + {"(*UnsupportedType).String", Method, 13}, + {"(*VoidType).Common", Method, 0}, + {"(*VoidType).Size", Method, 0}, + {"(*VoidType).String", Method, 0}, + {"(Attr).GoString", Method, 0}, + {"(Attr).String", Method, 0}, + {"(Class).GoString", Method, 5}, + {"(Class).String", Method, 5}, + {"(DecodeError).Error", Method, 0}, + {"(Tag).GoString", Method, 0}, + {"(Tag).String", Method, 0}, + {"AddrType", Type, 0}, + {"AddrType.BasicType", Field, 0}, + {"ArrayType", Type, 0}, + {"ArrayType.CommonType", Field, 0}, + {"ArrayType.Count", Field, 0}, + {"ArrayType.StrideBitSize", Field, 0}, + {"ArrayType.Type", Field, 0}, + {"Attr", Type, 0}, + {"AttrAbstractOrigin", Const, 0}, + {"AttrAccessibility", Const, 0}, + {"AttrAddrBase", Const, 14}, + {"AttrAddrClass", Const, 0}, + {"AttrAlignment", Const, 14}, + {"AttrAllocated", Const, 0}, + {"AttrArtificial", Const, 0}, + {"AttrAssociated", Const, 0}, + {"AttrBaseTypes", Const, 0}, + {"AttrBinaryScale", Const, 14}, + {"AttrBitOffset", Const, 0}, + {"AttrBitSize", Const, 0}, + {"AttrByteSize", Const, 0}, + {"AttrCallAllCalls", Const, 14}, + {"AttrCallAllSourceCalls", Const, 14}, + {"AttrCallAllTailCalls", Const, 14}, + {"AttrCallColumn", Const, 0}, + {"AttrCallDataLocation", Const, 14}, + {"AttrCallDataValue", Const, 14}, + {"AttrCallFile", Const, 0}, + {"AttrCallLine", Const, 0}, + {"AttrCallOrigin", Const, 14}, + {"AttrCallPC", Const, 14}, + {"AttrCallParameter", Const, 14}, + {"AttrCallReturnPC", Const, 14}, + {"AttrCallTailCall", Const, 14}, + {"AttrCallTarget", Const, 14}, + {"AttrCallTargetClobbered", Const, 14}, + {"AttrCallValue", Const, 14}, + {"AttrCalling", Const, 0}, + {"AttrCommonRef", Const, 0}, + {"AttrCompDir", Const, 0}, + {"AttrConstExpr", Const, 14}, + {"AttrConstValue", Const, 0}, + {"AttrContainingType", Const, 0}, + {"AttrCount", Const, 0}, + {"AttrDataBitOffset", Const, 14}, + {"AttrDataLocation", Const, 0}, + {"AttrDataMemberLoc", Const, 0}, + {"AttrDecimalScale", Const, 14}, + {"AttrDecimalSign", Const, 14}, + {"AttrDeclColumn", Const, 0}, + {"AttrDeclFile", Const, 0}, + {"AttrDeclLine", Const, 0}, + {"AttrDeclaration", Const, 0}, + {"AttrDefaultValue", Const, 0}, + {"AttrDefaulted", Const, 14}, + {"AttrDeleted", Const, 14}, + {"AttrDescription", Const, 0}, + {"AttrDigitCount", Const, 14}, + {"AttrDiscr", Const, 0}, + {"AttrDiscrList", Const, 0}, + {"AttrDiscrValue", Const, 0}, + {"AttrDwoName", Const, 14}, + {"AttrElemental", Const, 14}, + {"AttrEncoding", Const, 0}, + {"AttrEndianity", Const, 14}, + {"AttrEntrypc", Const, 0}, + {"AttrEnumClass", Const, 14}, + {"AttrExplicit", Const, 14}, + {"AttrExportSymbols", Const, 14}, + {"AttrExtension", Const, 0}, + {"AttrExternal", Const, 0}, + {"AttrFrameBase", Const, 0}, + {"AttrFriend", Const, 0}, + {"AttrHighpc", Const, 0}, + {"AttrIdentifierCase", Const, 0}, + {"AttrImport", Const, 0}, + {"AttrInline", Const, 0}, + {"AttrIsOptional", Const, 0}, + {"AttrLanguage", Const, 0}, + {"AttrLinkageName", Const, 14}, + {"AttrLocation", Const, 0}, + {"AttrLoclistsBase", Const, 14}, + {"AttrLowerBound", Const, 0}, + {"AttrLowpc", Const, 0}, + {"AttrMacroInfo", Const, 0}, + {"AttrMacros", Const, 14}, + {"AttrMainSubprogram", Const, 14}, + {"AttrMutable", Const, 14}, + {"AttrName", Const, 0}, + {"AttrNamelistItem", Const, 0}, + {"AttrNoreturn", Const, 14}, + {"AttrObjectPointer", Const, 14}, + {"AttrOrdering", Const, 0}, + {"AttrPictureString", Const, 14}, + {"AttrPriority", Const, 0}, + {"AttrProducer", Const, 0}, + {"AttrPrototyped", Const, 0}, + {"AttrPure", Const, 14}, + {"AttrRanges", Const, 0}, + {"AttrRank", Const, 14}, + {"AttrRecursive", Const, 14}, + {"AttrReference", Const, 14}, + {"AttrReturnAddr", Const, 0}, + {"AttrRnglistsBase", Const, 14}, + {"AttrRvalueReference", Const, 14}, + {"AttrSegment", Const, 0}, + {"AttrSibling", Const, 0}, + {"AttrSignature", Const, 14}, + {"AttrSmall", Const, 14}, + {"AttrSpecification", Const, 0}, + {"AttrStartScope", Const, 0}, + {"AttrStaticLink", Const, 0}, + {"AttrStmtList", Const, 0}, + {"AttrStrOffsetsBase", Const, 14}, + {"AttrStride", Const, 0}, + {"AttrStrideSize", Const, 0}, + {"AttrStringLength", Const, 0}, + {"AttrStringLengthBitSize", Const, 14}, + {"AttrStringLengthByteSize", Const, 14}, + {"AttrThreadsScaled", Const, 14}, + {"AttrTrampoline", Const, 0}, + {"AttrType", Const, 0}, + {"AttrUpperBound", Const, 0}, + {"AttrUseLocation", Const, 0}, + {"AttrUseUTF8", Const, 0}, + {"AttrVarParam", Const, 0}, + {"AttrVirtuality", Const, 0}, + {"AttrVisibility", Const, 0}, + {"AttrVtableElemLoc", Const, 0}, + {"BasicType", Type, 0}, + {"BasicType.BitOffset", Field, 0}, + {"BasicType.BitSize", Field, 0}, + {"BasicType.CommonType", Field, 0}, + {"BasicType.DataBitOffset", Field, 18}, + {"BoolType", Type, 0}, + {"BoolType.BasicType", Field, 0}, + {"CharType", Type, 0}, + {"CharType.BasicType", Field, 0}, + {"Class", Type, 5}, + {"ClassAddrPtr", Const, 14}, + {"ClassAddress", Const, 5}, + {"ClassBlock", Const, 5}, + {"ClassConstant", Const, 5}, + {"ClassExprLoc", Const, 5}, + {"ClassFlag", Const, 5}, + {"ClassLinePtr", Const, 5}, + {"ClassLocList", Const, 14}, + {"ClassLocListPtr", Const, 5}, + {"ClassMacPtr", Const, 5}, + {"ClassRangeListPtr", Const, 5}, + {"ClassReference", Const, 5}, + {"ClassReferenceAlt", Const, 5}, + {"ClassReferenceSig", Const, 5}, + {"ClassRngList", Const, 14}, + {"ClassRngListsPtr", Const, 14}, + {"ClassStrOffsetsPtr", Const, 14}, + {"ClassString", Const, 5}, + {"ClassStringAlt", Const, 5}, + {"ClassUnknown", Const, 6}, + {"CommonType", Type, 0}, + {"CommonType.ByteSize", Field, 0}, + {"CommonType.Name", Field, 0}, + {"ComplexType", Type, 0}, + {"ComplexType.BasicType", Field, 0}, + {"Data", Type, 0}, + {"DecodeError", Type, 0}, + {"DecodeError.Err", Field, 0}, + {"DecodeError.Name", Field, 0}, + {"DecodeError.Offset", Field, 0}, + {"DotDotDotType", Type, 0}, + {"DotDotDotType.CommonType", Field, 0}, + {"Entry", Type, 0}, + {"Entry.Children", Field, 0}, + {"Entry.Field", Field, 0}, + {"Entry.Offset", Field, 0}, + {"Entry.Tag", Field, 0}, + {"EnumType", Type, 0}, + {"EnumType.CommonType", Field, 0}, + {"EnumType.EnumName", Field, 0}, + {"EnumType.Val", Field, 0}, + {"EnumValue", Type, 0}, + {"EnumValue.Name", Field, 0}, + {"EnumValue.Val", Field, 0}, + {"ErrUnknownPC", Var, 5}, + {"Field", Type, 0}, + {"Field.Attr", Field, 0}, + {"Field.Class", Field, 5}, + {"Field.Val", Field, 0}, + {"FloatType", Type, 0}, + {"FloatType.BasicType", Field, 0}, + {"FuncType", Type, 0}, + {"FuncType.CommonType", Field, 0}, + {"FuncType.ParamType", Field, 0}, + {"FuncType.ReturnType", Field, 0}, + {"IntType", Type, 0}, + {"IntType.BasicType", Field, 0}, + {"LineEntry", Type, 5}, + {"LineEntry.Address", Field, 5}, + {"LineEntry.BasicBlock", Field, 5}, + {"LineEntry.Column", Field, 5}, + {"LineEntry.Discriminator", Field, 5}, + {"LineEntry.EndSequence", Field, 5}, + {"LineEntry.EpilogueBegin", Field, 5}, + {"LineEntry.File", Field, 5}, + {"LineEntry.ISA", Field, 5}, + {"LineEntry.IsStmt", Field, 5}, + {"LineEntry.Line", Field, 5}, + {"LineEntry.OpIndex", Field, 5}, + {"LineEntry.PrologueEnd", Field, 5}, + {"LineFile", Type, 5}, + {"LineFile.Length", Field, 5}, + {"LineFile.Mtime", Field, 5}, + {"LineFile.Name", Field, 5}, + {"LineReader", Type, 5}, + {"LineReaderPos", Type, 5}, + {"New", Func, 0}, + {"Offset", Type, 0}, + {"PtrType", Type, 0}, + {"PtrType.CommonType", Field, 0}, + {"PtrType.Type", Field, 0}, + {"QualType", Type, 0}, + {"QualType.CommonType", Field, 0}, + {"QualType.Qual", Field, 0}, + {"QualType.Type", Field, 0}, + {"Reader", Type, 0}, + {"StructField", Type, 0}, + {"StructField.BitOffset", Field, 0}, + {"StructField.BitSize", Field, 0}, + {"StructField.ByteOffset", Field, 0}, + {"StructField.ByteSize", Field, 0}, + {"StructField.DataBitOffset", Field, 18}, + {"StructField.Name", Field, 0}, + {"StructField.Type", Field, 0}, + {"StructType", Type, 0}, + {"StructType.CommonType", Field, 0}, + {"StructType.Field", Field, 0}, + {"StructType.Incomplete", Field, 0}, + {"StructType.Kind", Field, 0}, + {"StructType.StructName", Field, 0}, + {"Tag", Type, 0}, + {"TagAccessDeclaration", Const, 0}, + {"TagArrayType", Const, 0}, + {"TagAtomicType", Const, 14}, + {"TagBaseType", Const, 0}, + {"TagCallSite", Const, 14}, + {"TagCallSiteParameter", Const, 14}, + {"TagCatchDwarfBlock", Const, 0}, + {"TagClassType", Const, 0}, + {"TagCoarrayType", Const, 14}, + {"TagCommonDwarfBlock", Const, 0}, + {"TagCommonInclusion", Const, 0}, + {"TagCompileUnit", Const, 0}, + {"TagCondition", Const, 3}, + {"TagConstType", Const, 0}, + {"TagConstant", Const, 0}, + {"TagDwarfProcedure", Const, 0}, + {"TagDynamicType", Const, 14}, + {"TagEntryPoint", Const, 0}, + {"TagEnumerationType", Const, 0}, + {"TagEnumerator", Const, 0}, + {"TagFileType", Const, 0}, + {"TagFormalParameter", Const, 0}, + {"TagFriend", Const, 0}, + {"TagGenericSubrange", Const, 14}, + {"TagImmutableType", Const, 14}, + {"TagImportedDeclaration", Const, 0}, + {"TagImportedModule", Const, 0}, + {"TagImportedUnit", Const, 0}, + {"TagInheritance", Const, 0}, + {"TagInlinedSubroutine", Const, 0}, + {"TagInterfaceType", Const, 0}, + {"TagLabel", Const, 0}, + {"TagLexDwarfBlock", Const, 0}, + {"TagMember", Const, 0}, + {"TagModule", Const, 0}, + {"TagMutableType", Const, 0}, + {"TagNamelist", Const, 0}, + {"TagNamelistItem", Const, 0}, + {"TagNamespace", Const, 0}, + {"TagPackedType", Const, 0}, + {"TagPartialUnit", Const, 0}, + {"TagPointerType", Const, 0}, + {"TagPtrToMemberType", Const, 0}, + {"TagReferenceType", Const, 0}, + {"TagRestrictType", Const, 0}, + {"TagRvalueReferenceType", Const, 3}, + {"TagSetType", Const, 0}, + {"TagSharedType", Const, 3}, + {"TagSkeletonUnit", Const, 14}, + {"TagStringType", Const, 0}, + {"TagStructType", Const, 0}, + {"TagSubprogram", Const, 0}, + {"TagSubrangeType", Const, 0}, + {"TagSubroutineType", Const, 0}, + {"TagTemplateAlias", Const, 3}, + {"TagTemplateTypeParameter", Const, 0}, + {"TagTemplateValueParameter", Const, 0}, + {"TagThrownType", Const, 0}, + {"TagTryDwarfBlock", Const, 0}, + {"TagTypeUnit", Const, 3}, + {"TagTypedef", Const, 0}, + {"TagUnionType", Const, 0}, + {"TagUnspecifiedParameters", Const, 0}, + {"TagUnspecifiedType", Const, 0}, + {"TagVariable", Const, 0}, + {"TagVariant", Const, 0}, + {"TagVariantPart", Const, 0}, + {"TagVolatileType", Const, 0}, + {"TagWithStmt", Const, 0}, + {"Type", Type, 0}, + {"TypedefType", Type, 0}, + {"TypedefType.CommonType", Field, 0}, + {"TypedefType.Type", Field, 0}, + {"UcharType", Type, 0}, + {"UcharType.BasicType", Field, 0}, + {"UintType", Type, 0}, + {"UintType.BasicType", Field, 0}, + {"UnspecifiedType", Type, 4}, + {"UnspecifiedType.BasicType", Field, 4}, + {"UnsupportedType", Type, 13}, + {"UnsupportedType.CommonType", Field, 13}, + {"UnsupportedType.Tag", Field, 13}, + {"VoidType", Type, 0}, + {"VoidType.CommonType", Field, 0}, + }, + "debug/elf": { + {"(*File).Close", Method, 0}, + {"(*File).DWARF", Method, 0}, + {"(*File).DynString", Method, 1}, + {"(*File).DynValue", Method, 21}, + {"(*File).DynamicSymbols", Method, 4}, + {"(*File).ImportedLibraries", Method, 0}, + {"(*File).ImportedSymbols", Method, 0}, + {"(*File).Section", Method, 0}, + {"(*File).SectionByType", Method, 0}, + {"(*File).Symbols", Method, 0}, + {"(*FormatError).Error", Method, 0}, + {"(*Prog).Open", Method, 0}, + {"(*Section).Data", Method, 0}, + {"(*Section).Open", Method, 0}, + {"(Class).GoString", Method, 0}, + {"(Class).String", Method, 0}, + {"(CompressionType).GoString", Method, 6}, + {"(CompressionType).String", Method, 6}, + {"(Data).GoString", Method, 0}, + {"(Data).String", Method, 0}, + {"(DynFlag).GoString", Method, 0}, + {"(DynFlag).String", Method, 0}, + {"(DynFlag1).GoString", Method, 21}, + {"(DynFlag1).String", Method, 21}, + {"(DynTag).GoString", Method, 0}, + {"(DynTag).String", Method, 0}, + {"(Machine).GoString", Method, 0}, + {"(Machine).String", Method, 0}, + {"(NType).GoString", Method, 0}, + {"(NType).String", Method, 0}, + {"(OSABI).GoString", Method, 0}, + {"(OSABI).String", Method, 0}, + {"(Prog).ReadAt", Method, 0}, + {"(ProgFlag).GoString", Method, 0}, + {"(ProgFlag).String", Method, 0}, + {"(ProgType).GoString", Method, 0}, + {"(ProgType).String", Method, 0}, + {"(R_386).GoString", Method, 0}, + {"(R_386).String", Method, 0}, + {"(R_390).GoString", Method, 7}, + {"(R_390).String", Method, 7}, + {"(R_AARCH64).GoString", Method, 4}, + {"(R_AARCH64).String", Method, 4}, + {"(R_ALPHA).GoString", Method, 0}, + {"(R_ALPHA).String", Method, 0}, + {"(R_ARM).GoString", Method, 0}, + {"(R_ARM).String", Method, 0}, + {"(R_LARCH).GoString", Method, 19}, + {"(R_LARCH).String", Method, 19}, + {"(R_MIPS).GoString", Method, 6}, + {"(R_MIPS).String", Method, 6}, + {"(R_PPC).GoString", Method, 0}, + {"(R_PPC).String", Method, 0}, + {"(R_PPC64).GoString", Method, 5}, + {"(R_PPC64).String", Method, 5}, + {"(R_RISCV).GoString", Method, 11}, + {"(R_RISCV).String", Method, 11}, + {"(R_SPARC).GoString", Method, 0}, + {"(R_SPARC).String", Method, 0}, + {"(R_X86_64).GoString", Method, 0}, + {"(R_X86_64).String", Method, 0}, + {"(Section).ReadAt", Method, 0}, + {"(SectionFlag).GoString", Method, 0}, + {"(SectionFlag).String", Method, 0}, + {"(SectionIndex).GoString", Method, 0}, + {"(SectionIndex).String", Method, 0}, + {"(SectionType).GoString", Method, 0}, + {"(SectionType).String", Method, 0}, + {"(SymBind).GoString", Method, 0}, + {"(SymBind).String", Method, 0}, + {"(SymType).GoString", Method, 0}, + {"(SymType).String", Method, 0}, + {"(SymVis).GoString", Method, 0}, + {"(SymVis).String", Method, 0}, + {"(Type).GoString", Method, 0}, + {"(Type).String", Method, 0}, + {"(Version).GoString", Method, 0}, + {"(Version).String", Method, 0}, + {"ARM_MAGIC_TRAMP_NUMBER", Const, 0}, + {"COMPRESS_HIOS", Const, 6}, + {"COMPRESS_HIPROC", Const, 6}, + {"COMPRESS_LOOS", Const, 6}, + {"COMPRESS_LOPROC", Const, 6}, + {"COMPRESS_ZLIB", Const, 6}, + {"COMPRESS_ZSTD", Const, 21}, + {"Chdr32", Type, 6}, + {"Chdr32.Addralign", Field, 6}, + {"Chdr32.Size", Field, 6}, + {"Chdr32.Type", Field, 6}, + {"Chdr64", Type, 6}, + {"Chdr64.Addralign", Field, 6}, + {"Chdr64.Size", Field, 6}, + {"Chdr64.Type", Field, 6}, + {"Class", Type, 0}, + {"CompressionType", Type, 6}, + {"DF_1_CONFALT", Const, 21}, + {"DF_1_DIRECT", Const, 21}, + {"DF_1_DISPRELDNE", Const, 21}, + {"DF_1_DISPRELPND", Const, 21}, + {"DF_1_EDITED", Const, 21}, + {"DF_1_ENDFILTEE", Const, 21}, + {"DF_1_GLOBAL", Const, 21}, + {"DF_1_GLOBAUDIT", Const, 21}, + {"DF_1_GROUP", Const, 21}, + {"DF_1_IGNMULDEF", Const, 21}, + {"DF_1_INITFIRST", Const, 21}, + {"DF_1_INTERPOSE", Const, 21}, + {"DF_1_KMOD", Const, 21}, + {"DF_1_LOADFLTR", Const, 21}, + {"DF_1_NOCOMMON", Const, 21}, + {"DF_1_NODEFLIB", Const, 21}, + {"DF_1_NODELETE", Const, 21}, + {"DF_1_NODIRECT", Const, 21}, + {"DF_1_NODUMP", Const, 21}, + {"DF_1_NOHDR", Const, 21}, + {"DF_1_NOKSYMS", Const, 21}, + {"DF_1_NOOPEN", Const, 21}, + {"DF_1_NORELOC", Const, 21}, + {"DF_1_NOW", Const, 21}, + {"DF_1_ORIGIN", Const, 21}, + {"DF_1_PIE", Const, 21}, + {"DF_1_SINGLETON", Const, 21}, + {"DF_1_STUB", Const, 21}, + {"DF_1_SYMINTPOSE", Const, 21}, + {"DF_1_TRANS", Const, 21}, + {"DF_1_WEAKFILTER", Const, 21}, + {"DF_BIND_NOW", Const, 0}, + {"DF_ORIGIN", Const, 0}, + {"DF_STATIC_TLS", Const, 0}, + {"DF_SYMBOLIC", Const, 0}, + {"DF_TEXTREL", Const, 0}, + {"DT_ADDRRNGHI", Const, 16}, + {"DT_ADDRRNGLO", Const, 16}, + {"DT_AUDIT", Const, 16}, + {"DT_AUXILIARY", Const, 16}, + {"DT_BIND_NOW", Const, 0}, + {"DT_CHECKSUM", Const, 16}, + {"DT_CONFIG", Const, 16}, + {"DT_DEBUG", Const, 0}, + {"DT_DEPAUDIT", Const, 16}, + {"DT_ENCODING", Const, 0}, + {"DT_FEATURE", Const, 16}, + {"DT_FILTER", Const, 16}, + {"DT_FINI", Const, 0}, + {"DT_FINI_ARRAY", Const, 0}, + {"DT_FINI_ARRAYSZ", Const, 0}, + {"DT_FLAGS", Const, 0}, + {"DT_FLAGS_1", Const, 16}, + {"DT_GNU_CONFLICT", Const, 16}, + {"DT_GNU_CONFLICTSZ", Const, 16}, + {"DT_GNU_HASH", Const, 16}, + {"DT_GNU_LIBLIST", Const, 16}, + {"DT_GNU_LIBLISTSZ", Const, 16}, + {"DT_GNU_PRELINKED", Const, 16}, + {"DT_HASH", Const, 0}, + {"DT_HIOS", Const, 0}, + {"DT_HIPROC", Const, 0}, + {"DT_INIT", Const, 0}, + {"DT_INIT_ARRAY", Const, 0}, + {"DT_INIT_ARRAYSZ", Const, 0}, + {"DT_JMPREL", Const, 0}, + {"DT_LOOS", Const, 0}, + {"DT_LOPROC", Const, 0}, + {"DT_MIPS_AUX_DYNAMIC", Const, 16}, + {"DT_MIPS_BASE_ADDRESS", Const, 16}, + {"DT_MIPS_COMPACT_SIZE", Const, 16}, + {"DT_MIPS_CONFLICT", Const, 16}, + {"DT_MIPS_CONFLICTNO", Const, 16}, + {"DT_MIPS_CXX_FLAGS", Const, 16}, + {"DT_MIPS_DELTA_CLASS", Const, 16}, + {"DT_MIPS_DELTA_CLASSSYM", Const, 16}, + {"DT_MIPS_DELTA_CLASSSYM_NO", Const, 16}, + {"DT_MIPS_DELTA_CLASS_NO", Const, 16}, + {"DT_MIPS_DELTA_INSTANCE", Const, 16}, + {"DT_MIPS_DELTA_INSTANCE_NO", Const, 16}, + {"DT_MIPS_DELTA_RELOC", Const, 16}, + {"DT_MIPS_DELTA_RELOC_NO", Const, 16}, + {"DT_MIPS_DELTA_SYM", Const, 16}, + {"DT_MIPS_DELTA_SYM_NO", Const, 16}, + {"DT_MIPS_DYNSTR_ALIGN", Const, 16}, + {"DT_MIPS_FLAGS", Const, 16}, + {"DT_MIPS_GOTSYM", Const, 16}, + {"DT_MIPS_GP_VALUE", Const, 16}, + {"DT_MIPS_HIDDEN_GOTIDX", Const, 16}, + {"DT_MIPS_HIPAGENO", Const, 16}, + {"DT_MIPS_ICHECKSUM", Const, 16}, + {"DT_MIPS_INTERFACE", Const, 16}, + {"DT_MIPS_INTERFACE_SIZE", Const, 16}, + {"DT_MIPS_IVERSION", Const, 16}, + {"DT_MIPS_LIBLIST", Const, 16}, + {"DT_MIPS_LIBLISTNO", Const, 16}, + {"DT_MIPS_LOCALPAGE_GOTIDX", Const, 16}, + {"DT_MIPS_LOCAL_GOTIDX", Const, 16}, + {"DT_MIPS_LOCAL_GOTNO", Const, 16}, + {"DT_MIPS_MSYM", Const, 16}, + {"DT_MIPS_OPTIONS", Const, 16}, + {"DT_MIPS_PERF_SUFFIX", Const, 16}, + {"DT_MIPS_PIXIE_INIT", Const, 16}, + {"DT_MIPS_PLTGOT", Const, 16}, + {"DT_MIPS_PROTECTED_GOTIDX", Const, 16}, + {"DT_MIPS_RLD_MAP", Const, 16}, + {"DT_MIPS_RLD_MAP_REL", Const, 16}, + {"DT_MIPS_RLD_TEXT_RESOLVE_ADDR", Const, 16}, + {"DT_MIPS_RLD_VERSION", Const, 16}, + {"DT_MIPS_RWPLT", Const, 16}, + {"DT_MIPS_SYMBOL_LIB", Const, 16}, + {"DT_MIPS_SYMTABNO", Const, 16}, + {"DT_MIPS_TIME_STAMP", Const, 16}, + {"DT_MIPS_UNREFEXTNO", Const, 16}, + {"DT_MOVEENT", Const, 16}, + {"DT_MOVESZ", Const, 16}, + {"DT_MOVETAB", Const, 16}, + {"DT_NEEDED", Const, 0}, + {"DT_NULL", Const, 0}, + {"DT_PLTGOT", Const, 0}, + {"DT_PLTPAD", Const, 16}, + {"DT_PLTPADSZ", Const, 16}, + {"DT_PLTREL", Const, 0}, + {"DT_PLTRELSZ", Const, 0}, + {"DT_POSFLAG_1", Const, 16}, + {"DT_PPC64_GLINK", Const, 16}, + {"DT_PPC64_OPD", Const, 16}, + {"DT_PPC64_OPDSZ", Const, 16}, + {"DT_PPC64_OPT", Const, 16}, + {"DT_PPC_GOT", Const, 16}, + {"DT_PPC_OPT", Const, 16}, + {"DT_PREINIT_ARRAY", Const, 0}, + {"DT_PREINIT_ARRAYSZ", Const, 0}, + {"DT_REL", Const, 0}, + {"DT_RELA", Const, 0}, + {"DT_RELACOUNT", Const, 16}, + {"DT_RELAENT", Const, 0}, + {"DT_RELASZ", Const, 0}, + {"DT_RELCOUNT", Const, 16}, + {"DT_RELENT", Const, 0}, + {"DT_RELSZ", Const, 0}, + {"DT_RPATH", Const, 0}, + {"DT_RUNPATH", Const, 0}, + {"DT_SONAME", Const, 0}, + {"DT_SPARC_REGISTER", Const, 16}, + {"DT_STRSZ", Const, 0}, + {"DT_STRTAB", Const, 0}, + {"DT_SYMBOLIC", Const, 0}, + {"DT_SYMENT", Const, 0}, + {"DT_SYMINENT", Const, 16}, + {"DT_SYMINFO", Const, 16}, + {"DT_SYMINSZ", Const, 16}, + {"DT_SYMTAB", Const, 0}, + {"DT_SYMTAB_SHNDX", Const, 16}, + {"DT_TEXTREL", Const, 0}, + {"DT_TLSDESC_GOT", Const, 16}, + {"DT_TLSDESC_PLT", Const, 16}, + {"DT_USED", Const, 16}, + {"DT_VALRNGHI", Const, 16}, + {"DT_VALRNGLO", Const, 16}, + {"DT_VERDEF", Const, 16}, + {"DT_VERDEFNUM", Const, 16}, + {"DT_VERNEED", Const, 0}, + {"DT_VERNEEDNUM", Const, 0}, + {"DT_VERSYM", Const, 0}, + {"Data", Type, 0}, + {"Dyn32", Type, 0}, + {"Dyn32.Tag", Field, 0}, + {"Dyn32.Val", Field, 0}, + {"Dyn64", Type, 0}, + {"Dyn64.Tag", Field, 0}, + {"Dyn64.Val", Field, 0}, + {"DynFlag", Type, 0}, + {"DynFlag1", Type, 21}, + {"DynTag", Type, 0}, + {"EI_ABIVERSION", Const, 0}, + {"EI_CLASS", Const, 0}, + {"EI_DATA", Const, 0}, + {"EI_NIDENT", Const, 0}, + {"EI_OSABI", Const, 0}, + {"EI_PAD", Const, 0}, + {"EI_VERSION", Const, 0}, + {"ELFCLASS32", Const, 0}, + {"ELFCLASS64", Const, 0}, + {"ELFCLASSNONE", Const, 0}, + {"ELFDATA2LSB", Const, 0}, + {"ELFDATA2MSB", Const, 0}, + {"ELFDATANONE", Const, 0}, + {"ELFMAG", Const, 0}, + {"ELFOSABI_86OPEN", Const, 0}, + {"ELFOSABI_AIX", Const, 0}, + {"ELFOSABI_ARM", Const, 0}, + {"ELFOSABI_AROS", Const, 11}, + {"ELFOSABI_CLOUDABI", Const, 11}, + {"ELFOSABI_FENIXOS", Const, 11}, + {"ELFOSABI_FREEBSD", Const, 0}, + {"ELFOSABI_HPUX", Const, 0}, + {"ELFOSABI_HURD", Const, 0}, + {"ELFOSABI_IRIX", Const, 0}, + {"ELFOSABI_LINUX", Const, 0}, + {"ELFOSABI_MODESTO", Const, 0}, + {"ELFOSABI_NETBSD", Const, 0}, + {"ELFOSABI_NONE", Const, 0}, + {"ELFOSABI_NSK", Const, 0}, + {"ELFOSABI_OPENBSD", Const, 0}, + {"ELFOSABI_OPENVMS", Const, 0}, + {"ELFOSABI_SOLARIS", Const, 0}, + {"ELFOSABI_STANDALONE", Const, 0}, + {"ELFOSABI_TRU64", Const, 0}, + {"EM_386", Const, 0}, + {"EM_486", Const, 0}, + {"EM_56800EX", Const, 11}, + {"EM_68HC05", Const, 11}, + {"EM_68HC08", Const, 11}, + {"EM_68HC11", Const, 11}, + {"EM_68HC12", Const, 0}, + {"EM_68HC16", Const, 11}, + {"EM_68K", Const, 0}, + {"EM_78KOR", Const, 11}, + {"EM_8051", Const, 11}, + {"EM_860", Const, 0}, + {"EM_88K", Const, 0}, + {"EM_960", Const, 0}, + {"EM_AARCH64", Const, 4}, + {"EM_ALPHA", Const, 0}, + {"EM_ALPHA_STD", Const, 0}, + {"EM_ALTERA_NIOS2", Const, 11}, + {"EM_AMDGPU", Const, 11}, + {"EM_ARC", Const, 0}, + {"EM_ARCA", Const, 11}, + {"EM_ARC_COMPACT", Const, 11}, + {"EM_ARC_COMPACT2", Const, 11}, + {"EM_ARM", Const, 0}, + {"EM_AVR", Const, 11}, + {"EM_AVR32", Const, 11}, + {"EM_BA1", Const, 11}, + {"EM_BA2", Const, 11}, + {"EM_BLACKFIN", Const, 11}, + {"EM_BPF", Const, 11}, + {"EM_C166", Const, 11}, + {"EM_CDP", Const, 11}, + {"EM_CE", Const, 11}, + {"EM_CLOUDSHIELD", Const, 11}, + {"EM_COGE", Const, 11}, + {"EM_COLDFIRE", Const, 0}, + {"EM_COOL", Const, 11}, + {"EM_COREA_1ST", Const, 11}, + {"EM_COREA_2ND", Const, 11}, + {"EM_CR", Const, 11}, + {"EM_CR16", Const, 11}, + {"EM_CRAYNV2", Const, 11}, + {"EM_CRIS", Const, 11}, + {"EM_CRX", Const, 11}, + {"EM_CSR_KALIMBA", Const, 11}, + {"EM_CUDA", Const, 11}, + {"EM_CYPRESS_M8C", Const, 11}, + {"EM_D10V", Const, 11}, + {"EM_D30V", Const, 11}, + {"EM_DSP24", Const, 11}, + {"EM_DSPIC30F", Const, 11}, + {"EM_DXP", Const, 11}, + {"EM_ECOG1", Const, 11}, + {"EM_ECOG16", Const, 11}, + {"EM_ECOG1X", Const, 11}, + {"EM_ECOG2", Const, 11}, + {"EM_ETPU", Const, 11}, + {"EM_EXCESS", Const, 11}, + {"EM_F2MC16", Const, 11}, + {"EM_FIREPATH", Const, 11}, + {"EM_FR20", Const, 0}, + {"EM_FR30", Const, 11}, + {"EM_FT32", Const, 11}, + {"EM_FX66", Const, 11}, + {"EM_H8S", Const, 0}, + {"EM_H8_300", Const, 0}, + {"EM_H8_300H", Const, 0}, + {"EM_H8_500", Const, 0}, + {"EM_HUANY", Const, 11}, + {"EM_IA_64", Const, 0}, + {"EM_INTEL205", Const, 11}, + {"EM_INTEL206", Const, 11}, + {"EM_INTEL207", Const, 11}, + {"EM_INTEL208", Const, 11}, + {"EM_INTEL209", Const, 11}, + {"EM_IP2K", Const, 11}, + {"EM_JAVELIN", Const, 11}, + {"EM_K10M", Const, 11}, + {"EM_KM32", Const, 11}, + {"EM_KMX16", Const, 11}, + {"EM_KMX32", Const, 11}, + {"EM_KMX8", Const, 11}, + {"EM_KVARC", Const, 11}, + {"EM_L10M", Const, 11}, + {"EM_LANAI", Const, 11}, + {"EM_LATTICEMICO32", Const, 11}, + {"EM_LOONGARCH", Const, 19}, + {"EM_M16C", Const, 11}, + {"EM_M32", Const, 0}, + {"EM_M32C", Const, 11}, + {"EM_M32R", Const, 11}, + {"EM_MANIK", Const, 11}, + {"EM_MAX", Const, 11}, + {"EM_MAXQ30", Const, 11}, + {"EM_MCHP_PIC", Const, 11}, + {"EM_MCST_ELBRUS", Const, 11}, + {"EM_ME16", Const, 0}, + {"EM_METAG", Const, 11}, + {"EM_MICROBLAZE", Const, 11}, + {"EM_MIPS", Const, 0}, + {"EM_MIPS_RS3_LE", Const, 0}, + {"EM_MIPS_RS4_BE", Const, 0}, + {"EM_MIPS_X", Const, 0}, + {"EM_MMA", Const, 0}, + {"EM_MMDSP_PLUS", Const, 11}, + {"EM_MMIX", Const, 11}, + {"EM_MN10200", Const, 11}, + {"EM_MN10300", Const, 11}, + {"EM_MOXIE", Const, 11}, + {"EM_MSP430", Const, 11}, + {"EM_NCPU", Const, 0}, + {"EM_NDR1", Const, 0}, + {"EM_NDS32", Const, 11}, + {"EM_NONE", Const, 0}, + {"EM_NORC", Const, 11}, + {"EM_NS32K", Const, 11}, + {"EM_OPEN8", Const, 11}, + {"EM_OPENRISC", Const, 11}, + {"EM_PARISC", Const, 0}, + {"EM_PCP", Const, 0}, + {"EM_PDP10", Const, 11}, + {"EM_PDP11", Const, 11}, + {"EM_PDSP", Const, 11}, + {"EM_PJ", Const, 11}, + {"EM_PPC", Const, 0}, + {"EM_PPC64", Const, 0}, + {"EM_PRISM", Const, 11}, + {"EM_QDSP6", Const, 11}, + {"EM_R32C", Const, 11}, + {"EM_RCE", Const, 0}, + {"EM_RH32", Const, 0}, + {"EM_RISCV", Const, 11}, + {"EM_RL78", Const, 11}, + {"EM_RS08", Const, 11}, + {"EM_RX", Const, 11}, + {"EM_S370", Const, 0}, + {"EM_S390", Const, 0}, + {"EM_SCORE7", Const, 11}, + {"EM_SEP", Const, 11}, + {"EM_SE_C17", Const, 11}, + {"EM_SE_C33", Const, 11}, + {"EM_SH", Const, 0}, + {"EM_SHARC", Const, 11}, + {"EM_SLE9X", Const, 11}, + {"EM_SNP1K", Const, 11}, + {"EM_SPARC", Const, 0}, + {"EM_SPARC32PLUS", Const, 0}, + {"EM_SPARCV9", Const, 0}, + {"EM_ST100", Const, 0}, + {"EM_ST19", Const, 11}, + {"EM_ST200", Const, 11}, + {"EM_ST7", Const, 11}, + {"EM_ST9PLUS", Const, 11}, + {"EM_STARCORE", Const, 0}, + {"EM_STM8", Const, 11}, + {"EM_STXP7X", Const, 11}, + {"EM_SVX", Const, 11}, + {"EM_TILE64", Const, 11}, + {"EM_TILEGX", Const, 11}, + {"EM_TILEPRO", Const, 11}, + {"EM_TINYJ", Const, 0}, + {"EM_TI_ARP32", Const, 11}, + {"EM_TI_C2000", Const, 11}, + {"EM_TI_C5500", Const, 11}, + {"EM_TI_C6000", Const, 11}, + {"EM_TI_PRU", Const, 11}, + {"EM_TMM_GPP", Const, 11}, + {"EM_TPC", Const, 11}, + {"EM_TRICORE", Const, 0}, + {"EM_TRIMEDIA", Const, 11}, + {"EM_TSK3000", Const, 11}, + {"EM_UNICORE", Const, 11}, + {"EM_V800", Const, 0}, + {"EM_V850", Const, 11}, + {"EM_VAX", Const, 11}, + {"EM_VIDEOCORE", Const, 11}, + {"EM_VIDEOCORE3", Const, 11}, + {"EM_VIDEOCORE5", Const, 11}, + {"EM_VISIUM", Const, 11}, + {"EM_VPP500", Const, 0}, + {"EM_X86_64", Const, 0}, + {"EM_XCORE", Const, 11}, + {"EM_XGATE", Const, 11}, + {"EM_XIMO16", Const, 11}, + {"EM_XTENSA", Const, 11}, + {"EM_Z80", Const, 11}, + {"EM_ZSP", Const, 11}, + {"ET_CORE", Const, 0}, + {"ET_DYN", Const, 0}, + {"ET_EXEC", Const, 0}, + {"ET_HIOS", Const, 0}, + {"ET_HIPROC", Const, 0}, + {"ET_LOOS", Const, 0}, + {"ET_LOPROC", Const, 0}, + {"ET_NONE", Const, 0}, + {"ET_REL", Const, 0}, + {"EV_CURRENT", Const, 0}, + {"EV_NONE", Const, 0}, + {"ErrNoSymbols", Var, 4}, + {"File", Type, 0}, + {"File.FileHeader", Field, 0}, + {"File.Progs", Field, 0}, + {"File.Sections", Field, 0}, + {"FileHeader", Type, 0}, + {"FileHeader.ABIVersion", Field, 0}, + {"FileHeader.ByteOrder", Field, 0}, + {"FileHeader.Class", Field, 0}, + {"FileHeader.Data", Field, 0}, + {"FileHeader.Entry", Field, 1}, + {"FileHeader.Machine", Field, 0}, + {"FileHeader.OSABI", Field, 0}, + {"FileHeader.Type", Field, 0}, + {"FileHeader.Version", Field, 0}, + {"FormatError", Type, 0}, + {"Header32", Type, 0}, + {"Header32.Ehsize", Field, 0}, + {"Header32.Entry", Field, 0}, + {"Header32.Flags", Field, 0}, + {"Header32.Ident", Field, 0}, + {"Header32.Machine", Field, 0}, + {"Header32.Phentsize", Field, 0}, + {"Header32.Phnum", Field, 0}, + {"Header32.Phoff", Field, 0}, + {"Header32.Shentsize", Field, 0}, + {"Header32.Shnum", Field, 0}, + {"Header32.Shoff", Field, 0}, + {"Header32.Shstrndx", Field, 0}, + {"Header32.Type", Field, 0}, + {"Header32.Version", Field, 0}, + {"Header64", Type, 0}, + {"Header64.Ehsize", Field, 0}, + {"Header64.Entry", Field, 0}, + {"Header64.Flags", Field, 0}, + {"Header64.Ident", Field, 0}, + {"Header64.Machine", Field, 0}, + {"Header64.Phentsize", Field, 0}, + {"Header64.Phnum", Field, 0}, + {"Header64.Phoff", Field, 0}, + {"Header64.Shentsize", Field, 0}, + {"Header64.Shnum", Field, 0}, + {"Header64.Shoff", Field, 0}, + {"Header64.Shstrndx", Field, 0}, + {"Header64.Type", Field, 0}, + {"Header64.Version", Field, 0}, + {"ImportedSymbol", Type, 0}, + {"ImportedSymbol.Library", Field, 0}, + {"ImportedSymbol.Name", Field, 0}, + {"ImportedSymbol.Version", Field, 0}, + {"Machine", Type, 0}, + {"NT_FPREGSET", Const, 0}, + {"NT_PRPSINFO", Const, 0}, + {"NT_PRSTATUS", Const, 0}, + {"NType", Type, 0}, + {"NewFile", Func, 0}, + {"OSABI", Type, 0}, + {"Open", Func, 0}, + {"PF_MASKOS", Const, 0}, + {"PF_MASKPROC", Const, 0}, + {"PF_R", Const, 0}, + {"PF_W", Const, 0}, + {"PF_X", Const, 0}, + {"PT_AARCH64_ARCHEXT", Const, 16}, + {"PT_AARCH64_UNWIND", Const, 16}, + {"PT_ARM_ARCHEXT", Const, 16}, + {"PT_ARM_EXIDX", Const, 16}, + {"PT_DYNAMIC", Const, 0}, + {"PT_GNU_EH_FRAME", Const, 16}, + {"PT_GNU_MBIND_HI", Const, 16}, + {"PT_GNU_MBIND_LO", Const, 16}, + {"PT_GNU_PROPERTY", Const, 16}, + {"PT_GNU_RELRO", Const, 16}, + {"PT_GNU_STACK", Const, 16}, + {"PT_HIOS", Const, 0}, + {"PT_HIPROC", Const, 0}, + {"PT_INTERP", Const, 0}, + {"PT_LOAD", Const, 0}, + {"PT_LOOS", Const, 0}, + {"PT_LOPROC", Const, 0}, + {"PT_MIPS_ABIFLAGS", Const, 16}, + {"PT_MIPS_OPTIONS", Const, 16}, + {"PT_MIPS_REGINFO", Const, 16}, + {"PT_MIPS_RTPROC", Const, 16}, + {"PT_NOTE", Const, 0}, + {"PT_NULL", Const, 0}, + {"PT_OPENBSD_BOOTDATA", Const, 16}, + {"PT_OPENBSD_NOBTCFI", Const, 23}, + {"PT_OPENBSD_RANDOMIZE", Const, 16}, + {"PT_OPENBSD_WXNEEDED", Const, 16}, + {"PT_PAX_FLAGS", Const, 16}, + {"PT_PHDR", Const, 0}, + {"PT_S390_PGSTE", Const, 16}, + {"PT_SHLIB", Const, 0}, + {"PT_SUNWSTACK", Const, 16}, + {"PT_SUNW_EH_FRAME", Const, 16}, + {"PT_TLS", Const, 0}, + {"Prog", Type, 0}, + {"Prog.ProgHeader", Field, 0}, + {"Prog.ReaderAt", Field, 0}, + {"Prog32", Type, 0}, + {"Prog32.Align", Field, 0}, + {"Prog32.Filesz", Field, 0}, + {"Prog32.Flags", Field, 0}, + {"Prog32.Memsz", Field, 0}, + {"Prog32.Off", Field, 0}, + {"Prog32.Paddr", Field, 0}, + {"Prog32.Type", Field, 0}, + {"Prog32.Vaddr", Field, 0}, + {"Prog64", Type, 0}, + {"Prog64.Align", Field, 0}, + {"Prog64.Filesz", Field, 0}, + {"Prog64.Flags", Field, 0}, + {"Prog64.Memsz", Field, 0}, + {"Prog64.Off", Field, 0}, + {"Prog64.Paddr", Field, 0}, + {"Prog64.Type", Field, 0}, + {"Prog64.Vaddr", Field, 0}, + {"ProgFlag", Type, 0}, + {"ProgHeader", Type, 0}, + {"ProgHeader.Align", Field, 0}, + {"ProgHeader.Filesz", Field, 0}, + {"ProgHeader.Flags", Field, 0}, + {"ProgHeader.Memsz", Field, 0}, + {"ProgHeader.Off", Field, 0}, + {"ProgHeader.Paddr", Field, 0}, + {"ProgHeader.Type", Field, 0}, + {"ProgHeader.Vaddr", Field, 0}, + {"ProgType", Type, 0}, + {"R_386", Type, 0}, + {"R_386_16", Const, 10}, + {"R_386_32", Const, 0}, + {"R_386_32PLT", Const, 10}, + {"R_386_8", Const, 10}, + {"R_386_COPY", Const, 0}, + {"R_386_GLOB_DAT", Const, 0}, + {"R_386_GOT32", Const, 0}, + {"R_386_GOT32X", Const, 10}, + {"R_386_GOTOFF", Const, 0}, + {"R_386_GOTPC", Const, 0}, + {"R_386_IRELATIVE", Const, 10}, + {"R_386_JMP_SLOT", Const, 0}, + {"R_386_NONE", Const, 0}, + {"R_386_PC16", Const, 10}, + {"R_386_PC32", Const, 0}, + {"R_386_PC8", Const, 10}, + {"R_386_PLT32", Const, 0}, + {"R_386_RELATIVE", Const, 0}, + {"R_386_SIZE32", Const, 10}, + {"R_386_TLS_DESC", Const, 10}, + {"R_386_TLS_DESC_CALL", Const, 10}, + {"R_386_TLS_DTPMOD32", Const, 0}, + {"R_386_TLS_DTPOFF32", Const, 0}, + {"R_386_TLS_GD", Const, 0}, + {"R_386_TLS_GD_32", Const, 0}, + {"R_386_TLS_GD_CALL", Const, 0}, + {"R_386_TLS_GD_POP", Const, 0}, + {"R_386_TLS_GD_PUSH", Const, 0}, + {"R_386_TLS_GOTDESC", Const, 10}, + {"R_386_TLS_GOTIE", Const, 0}, + {"R_386_TLS_IE", Const, 0}, + {"R_386_TLS_IE_32", Const, 0}, + {"R_386_TLS_LDM", Const, 0}, + {"R_386_TLS_LDM_32", Const, 0}, + {"R_386_TLS_LDM_CALL", Const, 0}, + {"R_386_TLS_LDM_POP", Const, 0}, + {"R_386_TLS_LDM_PUSH", Const, 0}, + {"R_386_TLS_LDO_32", Const, 0}, + {"R_386_TLS_LE", Const, 0}, + {"R_386_TLS_LE_32", Const, 0}, + {"R_386_TLS_TPOFF", Const, 0}, + {"R_386_TLS_TPOFF32", Const, 0}, + {"R_390", Type, 7}, + {"R_390_12", Const, 7}, + {"R_390_16", Const, 7}, + {"R_390_20", Const, 7}, + {"R_390_32", Const, 7}, + {"R_390_64", Const, 7}, + {"R_390_8", Const, 7}, + {"R_390_COPY", Const, 7}, + {"R_390_GLOB_DAT", Const, 7}, + {"R_390_GOT12", Const, 7}, + {"R_390_GOT16", Const, 7}, + {"R_390_GOT20", Const, 7}, + {"R_390_GOT32", Const, 7}, + {"R_390_GOT64", Const, 7}, + {"R_390_GOTENT", Const, 7}, + {"R_390_GOTOFF", Const, 7}, + {"R_390_GOTOFF16", Const, 7}, + {"R_390_GOTOFF64", Const, 7}, + {"R_390_GOTPC", Const, 7}, + {"R_390_GOTPCDBL", Const, 7}, + {"R_390_GOTPLT12", Const, 7}, + {"R_390_GOTPLT16", Const, 7}, + {"R_390_GOTPLT20", Const, 7}, + {"R_390_GOTPLT32", Const, 7}, + {"R_390_GOTPLT64", Const, 7}, + {"R_390_GOTPLTENT", Const, 7}, + {"R_390_GOTPLTOFF16", Const, 7}, + {"R_390_GOTPLTOFF32", Const, 7}, + {"R_390_GOTPLTOFF64", Const, 7}, + {"R_390_JMP_SLOT", Const, 7}, + {"R_390_NONE", Const, 7}, + {"R_390_PC16", Const, 7}, + {"R_390_PC16DBL", Const, 7}, + {"R_390_PC32", Const, 7}, + {"R_390_PC32DBL", Const, 7}, + {"R_390_PC64", Const, 7}, + {"R_390_PLT16DBL", Const, 7}, + {"R_390_PLT32", Const, 7}, + {"R_390_PLT32DBL", Const, 7}, + {"R_390_PLT64", Const, 7}, + {"R_390_RELATIVE", Const, 7}, + {"R_390_TLS_DTPMOD", Const, 7}, + {"R_390_TLS_DTPOFF", Const, 7}, + {"R_390_TLS_GD32", Const, 7}, + {"R_390_TLS_GD64", Const, 7}, + {"R_390_TLS_GDCALL", Const, 7}, + {"R_390_TLS_GOTIE12", Const, 7}, + {"R_390_TLS_GOTIE20", Const, 7}, + {"R_390_TLS_GOTIE32", Const, 7}, + {"R_390_TLS_GOTIE64", Const, 7}, + {"R_390_TLS_IE32", Const, 7}, + {"R_390_TLS_IE64", Const, 7}, + {"R_390_TLS_IEENT", Const, 7}, + {"R_390_TLS_LDCALL", Const, 7}, + {"R_390_TLS_LDM32", Const, 7}, + {"R_390_TLS_LDM64", Const, 7}, + {"R_390_TLS_LDO32", Const, 7}, + {"R_390_TLS_LDO64", Const, 7}, + {"R_390_TLS_LE32", Const, 7}, + {"R_390_TLS_LE64", Const, 7}, + {"R_390_TLS_LOAD", Const, 7}, + {"R_390_TLS_TPOFF", Const, 7}, + {"R_AARCH64", Type, 4}, + {"R_AARCH64_ABS16", Const, 4}, + {"R_AARCH64_ABS32", Const, 4}, + {"R_AARCH64_ABS64", Const, 4}, + {"R_AARCH64_ADD_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_ADR_GOT_PAGE", Const, 4}, + {"R_AARCH64_ADR_PREL_LO21", Const, 4}, + {"R_AARCH64_ADR_PREL_PG_HI21", Const, 4}, + {"R_AARCH64_ADR_PREL_PG_HI21_NC", Const, 4}, + {"R_AARCH64_CALL26", Const, 4}, + {"R_AARCH64_CONDBR19", Const, 4}, + {"R_AARCH64_COPY", Const, 4}, + {"R_AARCH64_GLOB_DAT", Const, 4}, + {"R_AARCH64_GOT_LD_PREL19", Const, 4}, + {"R_AARCH64_IRELATIVE", Const, 4}, + {"R_AARCH64_JUMP26", Const, 4}, + {"R_AARCH64_JUMP_SLOT", Const, 4}, + {"R_AARCH64_LD64_GOTOFF_LO15", Const, 10}, + {"R_AARCH64_LD64_GOTPAGE_LO15", Const, 10}, + {"R_AARCH64_LD64_GOT_LO12_NC", Const, 4}, + {"R_AARCH64_LDST128_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_LDST16_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_LDST32_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_LDST64_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_LDST8_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_LD_PREL_LO19", Const, 4}, + {"R_AARCH64_MOVW_SABS_G0", Const, 4}, + {"R_AARCH64_MOVW_SABS_G1", Const, 4}, + {"R_AARCH64_MOVW_SABS_G2", Const, 4}, + {"R_AARCH64_MOVW_UABS_G0", Const, 4}, + {"R_AARCH64_MOVW_UABS_G0_NC", Const, 4}, + {"R_AARCH64_MOVW_UABS_G1", Const, 4}, + {"R_AARCH64_MOVW_UABS_G1_NC", Const, 4}, + {"R_AARCH64_MOVW_UABS_G2", Const, 4}, + {"R_AARCH64_MOVW_UABS_G2_NC", Const, 4}, + {"R_AARCH64_MOVW_UABS_G3", Const, 4}, + {"R_AARCH64_NONE", Const, 4}, + {"R_AARCH64_NULL", Const, 4}, + {"R_AARCH64_P32_ABS16", Const, 4}, + {"R_AARCH64_P32_ABS32", Const, 4}, + {"R_AARCH64_P32_ADD_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_P32_ADR_GOT_PAGE", Const, 4}, + {"R_AARCH64_P32_ADR_PREL_LO21", Const, 4}, + {"R_AARCH64_P32_ADR_PREL_PG_HI21", Const, 4}, + {"R_AARCH64_P32_CALL26", Const, 4}, + {"R_AARCH64_P32_CONDBR19", Const, 4}, + {"R_AARCH64_P32_COPY", Const, 4}, + {"R_AARCH64_P32_GLOB_DAT", Const, 4}, + {"R_AARCH64_P32_GOT_LD_PREL19", Const, 4}, + {"R_AARCH64_P32_IRELATIVE", Const, 4}, + {"R_AARCH64_P32_JUMP26", Const, 4}, + {"R_AARCH64_P32_JUMP_SLOT", Const, 4}, + {"R_AARCH64_P32_LD32_GOT_LO12_NC", Const, 4}, + {"R_AARCH64_P32_LDST128_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_P32_LDST16_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_P32_LDST32_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_P32_LDST64_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_P32_LDST8_ABS_LO12_NC", Const, 4}, + {"R_AARCH64_P32_LD_PREL_LO19", Const, 4}, + {"R_AARCH64_P32_MOVW_SABS_G0", Const, 4}, + {"R_AARCH64_P32_MOVW_UABS_G0", Const, 4}, + {"R_AARCH64_P32_MOVW_UABS_G0_NC", Const, 4}, + {"R_AARCH64_P32_MOVW_UABS_G1", Const, 4}, + {"R_AARCH64_P32_PREL16", Const, 4}, + {"R_AARCH64_P32_PREL32", Const, 4}, + {"R_AARCH64_P32_RELATIVE", Const, 4}, + {"R_AARCH64_P32_TLSDESC", Const, 4}, + {"R_AARCH64_P32_TLSDESC_ADD_LO12_NC", Const, 4}, + {"R_AARCH64_P32_TLSDESC_ADR_PAGE21", Const, 4}, + {"R_AARCH64_P32_TLSDESC_ADR_PREL21", Const, 4}, + {"R_AARCH64_P32_TLSDESC_CALL", Const, 4}, + {"R_AARCH64_P32_TLSDESC_LD32_LO12_NC", Const, 4}, + {"R_AARCH64_P32_TLSDESC_LD_PREL19", Const, 4}, + {"R_AARCH64_P32_TLSGD_ADD_LO12_NC", Const, 4}, + {"R_AARCH64_P32_TLSGD_ADR_PAGE21", Const, 4}, + {"R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21", Const, 4}, + {"R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC", Const, 4}, + {"R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19", Const, 4}, + {"R_AARCH64_P32_TLSLE_ADD_TPREL_HI12", Const, 4}, + {"R_AARCH64_P32_TLSLE_ADD_TPREL_LO12", Const, 4}, + {"R_AARCH64_P32_TLSLE_ADD_TPREL_LO12_NC", Const, 4}, + {"R_AARCH64_P32_TLSLE_MOVW_TPREL_G0", Const, 4}, + {"R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC", Const, 4}, + {"R_AARCH64_P32_TLSLE_MOVW_TPREL_G1", Const, 4}, + {"R_AARCH64_P32_TLS_DTPMOD", Const, 4}, + {"R_AARCH64_P32_TLS_DTPREL", Const, 4}, + {"R_AARCH64_P32_TLS_TPREL", Const, 4}, + {"R_AARCH64_P32_TSTBR14", Const, 4}, + {"R_AARCH64_PREL16", Const, 4}, + {"R_AARCH64_PREL32", Const, 4}, + {"R_AARCH64_PREL64", Const, 4}, + {"R_AARCH64_RELATIVE", Const, 4}, + {"R_AARCH64_TLSDESC", Const, 4}, + {"R_AARCH64_TLSDESC_ADD", Const, 4}, + {"R_AARCH64_TLSDESC_ADD_LO12_NC", Const, 4}, + {"R_AARCH64_TLSDESC_ADR_PAGE21", Const, 4}, + {"R_AARCH64_TLSDESC_ADR_PREL21", Const, 4}, + {"R_AARCH64_TLSDESC_CALL", Const, 4}, + {"R_AARCH64_TLSDESC_LD64_LO12_NC", Const, 4}, + {"R_AARCH64_TLSDESC_LDR", Const, 4}, + {"R_AARCH64_TLSDESC_LD_PREL19", Const, 4}, + {"R_AARCH64_TLSDESC_OFF_G0_NC", Const, 4}, + {"R_AARCH64_TLSDESC_OFF_G1", Const, 4}, + {"R_AARCH64_TLSGD_ADD_LO12_NC", Const, 4}, + {"R_AARCH64_TLSGD_ADR_PAGE21", Const, 4}, + {"R_AARCH64_TLSGD_ADR_PREL21", Const, 10}, + {"R_AARCH64_TLSGD_MOVW_G0_NC", Const, 10}, + {"R_AARCH64_TLSGD_MOVW_G1", Const, 10}, + {"R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21", Const, 4}, + {"R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC", Const, 4}, + {"R_AARCH64_TLSIE_LD_GOTTPREL_PREL19", Const, 4}, + {"R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC", Const, 4}, + {"R_AARCH64_TLSIE_MOVW_GOTTPREL_G1", Const, 4}, + {"R_AARCH64_TLSLD_ADR_PAGE21", Const, 10}, + {"R_AARCH64_TLSLD_ADR_PREL21", Const, 10}, + {"R_AARCH64_TLSLD_LDST128_DTPREL_LO12", Const, 10}, + {"R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC", Const, 10}, + {"R_AARCH64_TLSLE_ADD_TPREL_HI12", Const, 4}, + {"R_AARCH64_TLSLE_ADD_TPREL_LO12", Const, 4}, + {"R_AARCH64_TLSLE_ADD_TPREL_LO12_NC", Const, 4}, + {"R_AARCH64_TLSLE_LDST128_TPREL_LO12", Const, 10}, + {"R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC", Const, 10}, + {"R_AARCH64_TLSLE_MOVW_TPREL_G0", Const, 4}, + {"R_AARCH64_TLSLE_MOVW_TPREL_G0_NC", Const, 4}, + {"R_AARCH64_TLSLE_MOVW_TPREL_G1", Const, 4}, + {"R_AARCH64_TLSLE_MOVW_TPREL_G1_NC", Const, 4}, + {"R_AARCH64_TLSLE_MOVW_TPREL_G2", Const, 4}, + {"R_AARCH64_TLS_DTPMOD64", Const, 4}, + {"R_AARCH64_TLS_DTPREL64", Const, 4}, + {"R_AARCH64_TLS_TPREL64", Const, 4}, + {"R_AARCH64_TSTBR14", Const, 4}, + {"R_ALPHA", Type, 0}, + {"R_ALPHA_BRADDR", Const, 0}, + {"R_ALPHA_COPY", Const, 0}, + {"R_ALPHA_GLOB_DAT", Const, 0}, + {"R_ALPHA_GPDISP", Const, 0}, + {"R_ALPHA_GPREL32", Const, 0}, + {"R_ALPHA_GPRELHIGH", Const, 0}, + {"R_ALPHA_GPRELLOW", Const, 0}, + {"R_ALPHA_GPVALUE", Const, 0}, + {"R_ALPHA_HINT", Const, 0}, + {"R_ALPHA_IMMED_BR_HI32", Const, 0}, + {"R_ALPHA_IMMED_GP_16", Const, 0}, + {"R_ALPHA_IMMED_GP_HI32", Const, 0}, + {"R_ALPHA_IMMED_LO32", Const, 0}, + {"R_ALPHA_IMMED_SCN_HI32", Const, 0}, + {"R_ALPHA_JMP_SLOT", Const, 0}, + {"R_ALPHA_LITERAL", Const, 0}, + {"R_ALPHA_LITUSE", Const, 0}, + {"R_ALPHA_NONE", Const, 0}, + {"R_ALPHA_OP_PRSHIFT", Const, 0}, + {"R_ALPHA_OP_PSUB", Const, 0}, + {"R_ALPHA_OP_PUSH", Const, 0}, + {"R_ALPHA_OP_STORE", Const, 0}, + {"R_ALPHA_REFLONG", Const, 0}, + {"R_ALPHA_REFQUAD", Const, 0}, + {"R_ALPHA_RELATIVE", Const, 0}, + {"R_ALPHA_SREL16", Const, 0}, + {"R_ALPHA_SREL32", Const, 0}, + {"R_ALPHA_SREL64", Const, 0}, + {"R_ARM", Type, 0}, + {"R_ARM_ABS12", Const, 0}, + {"R_ARM_ABS16", Const, 0}, + {"R_ARM_ABS32", Const, 0}, + {"R_ARM_ABS32_NOI", Const, 10}, + {"R_ARM_ABS8", Const, 0}, + {"R_ARM_ALU_PCREL_15_8", Const, 10}, + {"R_ARM_ALU_PCREL_23_15", Const, 10}, + {"R_ARM_ALU_PCREL_7_0", Const, 10}, + {"R_ARM_ALU_PC_G0", Const, 10}, + {"R_ARM_ALU_PC_G0_NC", Const, 10}, + {"R_ARM_ALU_PC_G1", Const, 10}, + {"R_ARM_ALU_PC_G1_NC", Const, 10}, + {"R_ARM_ALU_PC_G2", Const, 10}, + {"R_ARM_ALU_SBREL_19_12_NC", Const, 10}, + {"R_ARM_ALU_SBREL_27_20_CK", Const, 10}, + {"R_ARM_ALU_SB_G0", Const, 10}, + {"R_ARM_ALU_SB_G0_NC", Const, 10}, + {"R_ARM_ALU_SB_G1", Const, 10}, + {"R_ARM_ALU_SB_G1_NC", Const, 10}, + {"R_ARM_ALU_SB_G2", Const, 10}, + {"R_ARM_AMP_VCALL9", Const, 0}, + {"R_ARM_BASE_ABS", Const, 10}, + {"R_ARM_CALL", Const, 10}, + {"R_ARM_COPY", Const, 0}, + {"R_ARM_GLOB_DAT", Const, 0}, + {"R_ARM_GNU_VTENTRY", Const, 0}, + {"R_ARM_GNU_VTINHERIT", Const, 0}, + {"R_ARM_GOT32", Const, 0}, + {"R_ARM_GOTOFF", Const, 0}, + {"R_ARM_GOTOFF12", Const, 10}, + {"R_ARM_GOTPC", Const, 0}, + {"R_ARM_GOTRELAX", Const, 10}, + {"R_ARM_GOT_ABS", Const, 10}, + {"R_ARM_GOT_BREL12", Const, 10}, + {"R_ARM_GOT_PREL", Const, 10}, + {"R_ARM_IRELATIVE", Const, 10}, + {"R_ARM_JUMP24", Const, 10}, + {"R_ARM_JUMP_SLOT", Const, 0}, + {"R_ARM_LDC_PC_G0", Const, 10}, + {"R_ARM_LDC_PC_G1", Const, 10}, + {"R_ARM_LDC_PC_G2", Const, 10}, + {"R_ARM_LDC_SB_G0", Const, 10}, + {"R_ARM_LDC_SB_G1", Const, 10}, + {"R_ARM_LDC_SB_G2", Const, 10}, + {"R_ARM_LDRS_PC_G0", Const, 10}, + {"R_ARM_LDRS_PC_G1", Const, 10}, + {"R_ARM_LDRS_PC_G2", Const, 10}, + {"R_ARM_LDRS_SB_G0", Const, 10}, + {"R_ARM_LDRS_SB_G1", Const, 10}, + {"R_ARM_LDRS_SB_G2", Const, 10}, + {"R_ARM_LDR_PC_G1", Const, 10}, + {"R_ARM_LDR_PC_G2", Const, 10}, + {"R_ARM_LDR_SBREL_11_10_NC", Const, 10}, + {"R_ARM_LDR_SB_G0", Const, 10}, + {"R_ARM_LDR_SB_G1", Const, 10}, + {"R_ARM_LDR_SB_G2", Const, 10}, + {"R_ARM_ME_TOO", Const, 10}, + {"R_ARM_MOVT_ABS", Const, 10}, + {"R_ARM_MOVT_BREL", Const, 10}, + {"R_ARM_MOVT_PREL", Const, 10}, + {"R_ARM_MOVW_ABS_NC", Const, 10}, + {"R_ARM_MOVW_BREL", Const, 10}, + {"R_ARM_MOVW_BREL_NC", Const, 10}, + {"R_ARM_MOVW_PREL_NC", Const, 10}, + {"R_ARM_NONE", Const, 0}, + {"R_ARM_PC13", Const, 0}, + {"R_ARM_PC24", Const, 0}, + {"R_ARM_PLT32", Const, 0}, + {"R_ARM_PLT32_ABS", Const, 10}, + {"R_ARM_PREL31", Const, 10}, + {"R_ARM_PRIVATE_0", Const, 10}, + {"R_ARM_PRIVATE_1", Const, 10}, + {"R_ARM_PRIVATE_10", Const, 10}, + {"R_ARM_PRIVATE_11", Const, 10}, + {"R_ARM_PRIVATE_12", Const, 10}, + {"R_ARM_PRIVATE_13", Const, 10}, + {"R_ARM_PRIVATE_14", Const, 10}, + {"R_ARM_PRIVATE_15", Const, 10}, + {"R_ARM_PRIVATE_2", Const, 10}, + {"R_ARM_PRIVATE_3", Const, 10}, + {"R_ARM_PRIVATE_4", Const, 10}, + {"R_ARM_PRIVATE_5", Const, 10}, + {"R_ARM_PRIVATE_6", Const, 10}, + {"R_ARM_PRIVATE_7", Const, 10}, + {"R_ARM_PRIVATE_8", Const, 10}, + {"R_ARM_PRIVATE_9", Const, 10}, + {"R_ARM_RABS32", Const, 0}, + {"R_ARM_RBASE", Const, 0}, + {"R_ARM_REL32", Const, 0}, + {"R_ARM_REL32_NOI", Const, 10}, + {"R_ARM_RELATIVE", Const, 0}, + {"R_ARM_RPC24", Const, 0}, + {"R_ARM_RREL32", Const, 0}, + {"R_ARM_RSBREL32", Const, 0}, + {"R_ARM_RXPC25", Const, 10}, + {"R_ARM_SBREL31", Const, 10}, + {"R_ARM_SBREL32", Const, 0}, + {"R_ARM_SWI24", Const, 0}, + {"R_ARM_TARGET1", Const, 10}, + {"R_ARM_TARGET2", Const, 10}, + {"R_ARM_THM_ABS5", Const, 0}, + {"R_ARM_THM_ALU_ABS_G0_NC", Const, 10}, + {"R_ARM_THM_ALU_ABS_G1_NC", Const, 10}, + {"R_ARM_THM_ALU_ABS_G2_NC", Const, 10}, + {"R_ARM_THM_ALU_ABS_G3", Const, 10}, + {"R_ARM_THM_ALU_PREL_11_0", Const, 10}, + {"R_ARM_THM_GOT_BREL12", Const, 10}, + {"R_ARM_THM_JUMP11", Const, 10}, + {"R_ARM_THM_JUMP19", Const, 10}, + {"R_ARM_THM_JUMP24", Const, 10}, + {"R_ARM_THM_JUMP6", Const, 10}, + {"R_ARM_THM_JUMP8", Const, 10}, + {"R_ARM_THM_MOVT_ABS", Const, 10}, + {"R_ARM_THM_MOVT_BREL", Const, 10}, + {"R_ARM_THM_MOVT_PREL", Const, 10}, + {"R_ARM_THM_MOVW_ABS_NC", Const, 10}, + {"R_ARM_THM_MOVW_BREL", Const, 10}, + {"R_ARM_THM_MOVW_BREL_NC", Const, 10}, + {"R_ARM_THM_MOVW_PREL_NC", Const, 10}, + {"R_ARM_THM_PC12", Const, 10}, + {"R_ARM_THM_PC22", Const, 0}, + {"R_ARM_THM_PC8", Const, 0}, + {"R_ARM_THM_RPC22", Const, 0}, + {"R_ARM_THM_SWI8", Const, 0}, + {"R_ARM_THM_TLS_CALL", Const, 10}, + {"R_ARM_THM_TLS_DESCSEQ16", Const, 10}, + {"R_ARM_THM_TLS_DESCSEQ32", Const, 10}, + {"R_ARM_THM_XPC22", Const, 0}, + {"R_ARM_TLS_CALL", Const, 10}, + {"R_ARM_TLS_DESCSEQ", Const, 10}, + {"R_ARM_TLS_DTPMOD32", Const, 10}, + {"R_ARM_TLS_DTPOFF32", Const, 10}, + {"R_ARM_TLS_GD32", Const, 10}, + {"R_ARM_TLS_GOTDESC", Const, 10}, + {"R_ARM_TLS_IE12GP", Const, 10}, + {"R_ARM_TLS_IE32", Const, 10}, + {"R_ARM_TLS_LDM32", Const, 10}, + {"R_ARM_TLS_LDO12", Const, 10}, + {"R_ARM_TLS_LDO32", Const, 10}, + {"R_ARM_TLS_LE12", Const, 10}, + {"R_ARM_TLS_LE32", Const, 10}, + {"R_ARM_TLS_TPOFF32", Const, 10}, + {"R_ARM_V4BX", Const, 10}, + {"R_ARM_XPC25", Const, 0}, + {"R_INFO", Func, 0}, + {"R_INFO32", Func, 0}, + {"R_LARCH", Type, 19}, + {"R_LARCH_32", Const, 19}, + {"R_LARCH_32_PCREL", Const, 20}, + {"R_LARCH_64", Const, 19}, + {"R_LARCH_64_PCREL", Const, 22}, + {"R_LARCH_ABS64_HI12", Const, 20}, + {"R_LARCH_ABS64_LO20", Const, 20}, + {"R_LARCH_ABS_HI20", Const, 20}, + {"R_LARCH_ABS_LO12", Const, 20}, + {"R_LARCH_ADD16", Const, 19}, + {"R_LARCH_ADD24", Const, 19}, + {"R_LARCH_ADD32", Const, 19}, + {"R_LARCH_ADD6", Const, 22}, + {"R_LARCH_ADD64", Const, 19}, + {"R_LARCH_ADD8", Const, 19}, + {"R_LARCH_ADD_ULEB128", Const, 22}, + {"R_LARCH_ALIGN", Const, 22}, + {"R_LARCH_B16", Const, 20}, + {"R_LARCH_B21", Const, 20}, + {"R_LARCH_B26", Const, 20}, + {"R_LARCH_CFA", Const, 22}, + {"R_LARCH_COPY", Const, 19}, + {"R_LARCH_DELETE", Const, 22}, + {"R_LARCH_GNU_VTENTRY", Const, 20}, + {"R_LARCH_GNU_VTINHERIT", Const, 20}, + {"R_LARCH_GOT64_HI12", Const, 20}, + {"R_LARCH_GOT64_LO20", Const, 20}, + {"R_LARCH_GOT64_PC_HI12", Const, 20}, + {"R_LARCH_GOT64_PC_LO20", Const, 20}, + {"R_LARCH_GOT_HI20", Const, 20}, + {"R_LARCH_GOT_LO12", Const, 20}, + {"R_LARCH_GOT_PC_HI20", Const, 20}, + {"R_LARCH_GOT_PC_LO12", Const, 20}, + {"R_LARCH_IRELATIVE", Const, 19}, + {"R_LARCH_JUMP_SLOT", Const, 19}, + {"R_LARCH_MARK_LA", Const, 19}, + {"R_LARCH_MARK_PCREL", Const, 19}, + {"R_LARCH_NONE", Const, 19}, + {"R_LARCH_PCALA64_HI12", Const, 20}, + {"R_LARCH_PCALA64_LO20", Const, 20}, + {"R_LARCH_PCALA_HI20", Const, 20}, + {"R_LARCH_PCALA_LO12", Const, 20}, + {"R_LARCH_PCREL20_S2", Const, 22}, + {"R_LARCH_RELATIVE", Const, 19}, + {"R_LARCH_RELAX", Const, 20}, + {"R_LARCH_SOP_ADD", Const, 19}, + {"R_LARCH_SOP_AND", Const, 19}, + {"R_LARCH_SOP_ASSERT", Const, 19}, + {"R_LARCH_SOP_IF_ELSE", Const, 19}, + {"R_LARCH_SOP_NOT", Const, 19}, + {"R_LARCH_SOP_POP_32_S_0_10_10_16_S2", Const, 19}, + {"R_LARCH_SOP_POP_32_S_0_5_10_16_S2", Const, 19}, + {"R_LARCH_SOP_POP_32_S_10_12", Const, 19}, + {"R_LARCH_SOP_POP_32_S_10_16", Const, 19}, + {"R_LARCH_SOP_POP_32_S_10_16_S2", Const, 19}, + {"R_LARCH_SOP_POP_32_S_10_5", Const, 19}, + {"R_LARCH_SOP_POP_32_S_5_20", Const, 19}, + {"R_LARCH_SOP_POP_32_U", Const, 19}, + {"R_LARCH_SOP_POP_32_U_10_12", Const, 19}, + {"R_LARCH_SOP_PUSH_ABSOLUTE", Const, 19}, + {"R_LARCH_SOP_PUSH_DUP", Const, 19}, + {"R_LARCH_SOP_PUSH_GPREL", Const, 19}, + {"R_LARCH_SOP_PUSH_PCREL", Const, 19}, + {"R_LARCH_SOP_PUSH_PLT_PCREL", Const, 19}, + {"R_LARCH_SOP_PUSH_TLS_GD", Const, 19}, + {"R_LARCH_SOP_PUSH_TLS_GOT", Const, 19}, + {"R_LARCH_SOP_PUSH_TLS_TPREL", Const, 19}, + {"R_LARCH_SOP_SL", Const, 19}, + {"R_LARCH_SOP_SR", Const, 19}, + {"R_LARCH_SOP_SUB", Const, 19}, + {"R_LARCH_SUB16", Const, 19}, + {"R_LARCH_SUB24", Const, 19}, + {"R_LARCH_SUB32", Const, 19}, + {"R_LARCH_SUB6", Const, 22}, + {"R_LARCH_SUB64", Const, 19}, + {"R_LARCH_SUB8", Const, 19}, + {"R_LARCH_SUB_ULEB128", Const, 22}, + {"R_LARCH_TLS_DTPMOD32", Const, 19}, + {"R_LARCH_TLS_DTPMOD64", Const, 19}, + {"R_LARCH_TLS_DTPREL32", Const, 19}, + {"R_LARCH_TLS_DTPREL64", Const, 19}, + {"R_LARCH_TLS_GD_HI20", Const, 20}, + {"R_LARCH_TLS_GD_PC_HI20", Const, 20}, + {"R_LARCH_TLS_IE64_HI12", Const, 20}, + {"R_LARCH_TLS_IE64_LO20", Const, 20}, + {"R_LARCH_TLS_IE64_PC_HI12", Const, 20}, + {"R_LARCH_TLS_IE64_PC_LO20", Const, 20}, + {"R_LARCH_TLS_IE_HI20", Const, 20}, + {"R_LARCH_TLS_IE_LO12", Const, 20}, + {"R_LARCH_TLS_IE_PC_HI20", Const, 20}, + {"R_LARCH_TLS_IE_PC_LO12", Const, 20}, + {"R_LARCH_TLS_LD_HI20", Const, 20}, + {"R_LARCH_TLS_LD_PC_HI20", Const, 20}, + {"R_LARCH_TLS_LE64_HI12", Const, 20}, + {"R_LARCH_TLS_LE64_LO20", Const, 20}, + {"R_LARCH_TLS_LE_HI20", Const, 20}, + {"R_LARCH_TLS_LE_LO12", Const, 20}, + {"R_LARCH_TLS_TPREL32", Const, 19}, + {"R_LARCH_TLS_TPREL64", Const, 19}, + {"R_MIPS", Type, 6}, + {"R_MIPS_16", Const, 6}, + {"R_MIPS_26", Const, 6}, + {"R_MIPS_32", Const, 6}, + {"R_MIPS_64", Const, 6}, + {"R_MIPS_ADD_IMMEDIATE", Const, 6}, + {"R_MIPS_CALL16", Const, 6}, + {"R_MIPS_CALL_HI16", Const, 6}, + {"R_MIPS_CALL_LO16", Const, 6}, + {"R_MIPS_DELETE", Const, 6}, + {"R_MIPS_GOT16", Const, 6}, + {"R_MIPS_GOT_DISP", Const, 6}, + {"R_MIPS_GOT_HI16", Const, 6}, + {"R_MIPS_GOT_LO16", Const, 6}, + {"R_MIPS_GOT_OFST", Const, 6}, + {"R_MIPS_GOT_PAGE", Const, 6}, + {"R_MIPS_GPREL16", Const, 6}, + {"R_MIPS_GPREL32", Const, 6}, + {"R_MIPS_HI16", Const, 6}, + {"R_MIPS_HIGHER", Const, 6}, + {"R_MIPS_HIGHEST", Const, 6}, + {"R_MIPS_INSERT_A", Const, 6}, + {"R_MIPS_INSERT_B", Const, 6}, + {"R_MIPS_JALR", Const, 6}, + {"R_MIPS_LITERAL", Const, 6}, + {"R_MIPS_LO16", Const, 6}, + {"R_MIPS_NONE", Const, 6}, + {"R_MIPS_PC16", Const, 6}, + {"R_MIPS_PC32", Const, 22}, + {"R_MIPS_PJUMP", Const, 6}, + {"R_MIPS_REL16", Const, 6}, + {"R_MIPS_REL32", Const, 6}, + {"R_MIPS_RELGOT", Const, 6}, + {"R_MIPS_SCN_DISP", Const, 6}, + {"R_MIPS_SHIFT5", Const, 6}, + {"R_MIPS_SHIFT6", Const, 6}, + {"R_MIPS_SUB", Const, 6}, + {"R_MIPS_TLS_DTPMOD32", Const, 6}, + {"R_MIPS_TLS_DTPMOD64", Const, 6}, + {"R_MIPS_TLS_DTPREL32", Const, 6}, + {"R_MIPS_TLS_DTPREL64", Const, 6}, + {"R_MIPS_TLS_DTPREL_HI16", Const, 6}, + {"R_MIPS_TLS_DTPREL_LO16", Const, 6}, + {"R_MIPS_TLS_GD", Const, 6}, + {"R_MIPS_TLS_GOTTPREL", Const, 6}, + {"R_MIPS_TLS_LDM", Const, 6}, + {"R_MIPS_TLS_TPREL32", Const, 6}, + {"R_MIPS_TLS_TPREL64", Const, 6}, + {"R_MIPS_TLS_TPREL_HI16", Const, 6}, + {"R_MIPS_TLS_TPREL_LO16", Const, 6}, + {"R_PPC", Type, 0}, + {"R_PPC64", Type, 5}, + {"R_PPC64_ADDR14", Const, 5}, + {"R_PPC64_ADDR14_BRNTAKEN", Const, 5}, + {"R_PPC64_ADDR14_BRTAKEN", Const, 5}, + {"R_PPC64_ADDR16", Const, 5}, + {"R_PPC64_ADDR16_DS", Const, 5}, + {"R_PPC64_ADDR16_HA", Const, 5}, + {"R_PPC64_ADDR16_HI", Const, 5}, + {"R_PPC64_ADDR16_HIGH", Const, 10}, + {"R_PPC64_ADDR16_HIGHA", Const, 10}, + {"R_PPC64_ADDR16_HIGHER", Const, 5}, + {"R_PPC64_ADDR16_HIGHER34", Const, 20}, + {"R_PPC64_ADDR16_HIGHERA", Const, 5}, + {"R_PPC64_ADDR16_HIGHERA34", Const, 20}, + {"R_PPC64_ADDR16_HIGHEST", Const, 5}, + {"R_PPC64_ADDR16_HIGHEST34", Const, 20}, + {"R_PPC64_ADDR16_HIGHESTA", Const, 5}, + {"R_PPC64_ADDR16_HIGHESTA34", Const, 20}, + {"R_PPC64_ADDR16_LO", Const, 5}, + {"R_PPC64_ADDR16_LO_DS", Const, 5}, + {"R_PPC64_ADDR24", Const, 5}, + {"R_PPC64_ADDR32", Const, 5}, + {"R_PPC64_ADDR64", Const, 5}, + {"R_PPC64_ADDR64_LOCAL", Const, 10}, + {"R_PPC64_COPY", Const, 20}, + {"R_PPC64_D28", Const, 20}, + {"R_PPC64_D34", Const, 20}, + {"R_PPC64_D34_HA30", Const, 20}, + {"R_PPC64_D34_HI30", Const, 20}, + {"R_PPC64_D34_LO", Const, 20}, + {"R_PPC64_DTPMOD64", Const, 5}, + {"R_PPC64_DTPREL16", Const, 5}, + {"R_PPC64_DTPREL16_DS", Const, 5}, + {"R_PPC64_DTPREL16_HA", Const, 5}, + {"R_PPC64_DTPREL16_HI", Const, 5}, + {"R_PPC64_DTPREL16_HIGH", Const, 10}, + {"R_PPC64_DTPREL16_HIGHA", Const, 10}, + {"R_PPC64_DTPREL16_HIGHER", Const, 5}, + {"R_PPC64_DTPREL16_HIGHERA", Const, 5}, + {"R_PPC64_DTPREL16_HIGHEST", Const, 5}, + {"R_PPC64_DTPREL16_HIGHESTA", Const, 5}, + {"R_PPC64_DTPREL16_LO", Const, 5}, + {"R_PPC64_DTPREL16_LO_DS", Const, 5}, + {"R_PPC64_DTPREL34", Const, 20}, + {"R_PPC64_DTPREL64", Const, 5}, + {"R_PPC64_ENTRY", Const, 10}, + {"R_PPC64_GLOB_DAT", Const, 20}, + {"R_PPC64_GNU_VTENTRY", Const, 20}, + {"R_PPC64_GNU_VTINHERIT", Const, 20}, + {"R_PPC64_GOT16", Const, 5}, + {"R_PPC64_GOT16_DS", Const, 5}, + {"R_PPC64_GOT16_HA", Const, 5}, + {"R_PPC64_GOT16_HI", Const, 5}, + {"R_PPC64_GOT16_LO", Const, 5}, + {"R_PPC64_GOT16_LO_DS", Const, 5}, + {"R_PPC64_GOT_DTPREL16_DS", Const, 5}, + {"R_PPC64_GOT_DTPREL16_HA", Const, 5}, + {"R_PPC64_GOT_DTPREL16_HI", Const, 5}, + {"R_PPC64_GOT_DTPREL16_LO_DS", Const, 5}, + {"R_PPC64_GOT_DTPREL_PCREL34", Const, 20}, + {"R_PPC64_GOT_PCREL34", Const, 20}, + {"R_PPC64_GOT_TLSGD16", Const, 5}, + {"R_PPC64_GOT_TLSGD16_HA", Const, 5}, + {"R_PPC64_GOT_TLSGD16_HI", Const, 5}, + {"R_PPC64_GOT_TLSGD16_LO", Const, 5}, + {"R_PPC64_GOT_TLSGD_PCREL34", Const, 20}, + {"R_PPC64_GOT_TLSLD16", Const, 5}, + {"R_PPC64_GOT_TLSLD16_HA", Const, 5}, + {"R_PPC64_GOT_TLSLD16_HI", Const, 5}, + {"R_PPC64_GOT_TLSLD16_LO", Const, 5}, + {"R_PPC64_GOT_TLSLD_PCREL34", Const, 20}, + {"R_PPC64_GOT_TPREL16_DS", Const, 5}, + {"R_PPC64_GOT_TPREL16_HA", Const, 5}, + {"R_PPC64_GOT_TPREL16_HI", Const, 5}, + {"R_PPC64_GOT_TPREL16_LO_DS", Const, 5}, + {"R_PPC64_GOT_TPREL_PCREL34", Const, 20}, + {"R_PPC64_IRELATIVE", Const, 10}, + {"R_PPC64_JMP_IREL", Const, 10}, + {"R_PPC64_JMP_SLOT", Const, 5}, + {"R_PPC64_NONE", Const, 5}, + {"R_PPC64_PCREL28", Const, 20}, + {"R_PPC64_PCREL34", Const, 20}, + {"R_PPC64_PCREL_OPT", Const, 20}, + {"R_PPC64_PLT16_HA", Const, 20}, + {"R_PPC64_PLT16_HI", Const, 20}, + {"R_PPC64_PLT16_LO", Const, 20}, + {"R_PPC64_PLT16_LO_DS", Const, 10}, + {"R_PPC64_PLT32", Const, 20}, + {"R_PPC64_PLT64", Const, 20}, + {"R_PPC64_PLTCALL", Const, 20}, + {"R_PPC64_PLTCALL_NOTOC", Const, 20}, + {"R_PPC64_PLTGOT16", Const, 10}, + {"R_PPC64_PLTGOT16_DS", Const, 10}, + {"R_PPC64_PLTGOT16_HA", Const, 10}, + {"R_PPC64_PLTGOT16_HI", Const, 10}, + {"R_PPC64_PLTGOT16_LO", Const, 10}, + {"R_PPC64_PLTGOT_LO_DS", Const, 10}, + {"R_PPC64_PLTREL32", Const, 20}, + {"R_PPC64_PLTREL64", Const, 20}, + {"R_PPC64_PLTSEQ", Const, 20}, + {"R_PPC64_PLTSEQ_NOTOC", Const, 20}, + {"R_PPC64_PLT_PCREL34", Const, 20}, + {"R_PPC64_PLT_PCREL34_NOTOC", Const, 20}, + {"R_PPC64_REL14", Const, 5}, + {"R_PPC64_REL14_BRNTAKEN", Const, 5}, + {"R_PPC64_REL14_BRTAKEN", Const, 5}, + {"R_PPC64_REL16", Const, 5}, + {"R_PPC64_REL16DX_HA", Const, 10}, + {"R_PPC64_REL16_HA", Const, 5}, + {"R_PPC64_REL16_HI", Const, 5}, + {"R_PPC64_REL16_HIGH", Const, 20}, + {"R_PPC64_REL16_HIGHA", Const, 20}, + {"R_PPC64_REL16_HIGHER", Const, 20}, + {"R_PPC64_REL16_HIGHER34", Const, 20}, + {"R_PPC64_REL16_HIGHERA", Const, 20}, + {"R_PPC64_REL16_HIGHERA34", Const, 20}, + {"R_PPC64_REL16_HIGHEST", Const, 20}, + {"R_PPC64_REL16_HIGHEST34", Const, 20}, + {"R_PPC64_REL16_HIGHESTA", Const, 20}, + {"R_PPC64_REL16_HIGHESTA34", Const, 20}, + {"R_PPC64_REL16_LO", Const, 5}, + {"R_PPC64_REL24", Const, 5}, + {"R_PPC64_REL24_NOTOC", Const, 10}, + {"R_PPC64_REL24_P9NOTOC", Const, 21}, + {"R_PPC64_REL30", Const, 20}, + {"R_PPC64_REL32", Const, 5}, + {"R_PPC64_REL64", Const, 5}, + {"R_PPC64_RELATIVE", Const, 18}, + {"R_PPC64_SECTOFF", Const, 20}, + {"R_PPC64_SECTOFF_DS", Const, 10}, + {"R_PPC64_SECTOFF_HA", Const, 20}, + {"R_PPC64_SECTOFF_HI", Const, 20}, + {"R_PPC64_SECTOFF_LO", Const, 20}, + {"R_PPC64_SECTOFF_LO_DS", Const, 10}, + {"R_PPC64_TLS", Const, 5}, + {"R_PPC64_TLSGD", Const, 5}, + {"R_PPC64_TLSLD", Const, 5}, + {"R_PPC64_TOC", Const, 5}, + {"R_PPC64_TOC16", Const, 5}, + {"R_PPC64_TOC16_DS", Const, 5}, + {"R_PPC64_TOC16_HA", Const, 5}, + {"R_PPC64_TOC16_HI", Const, 5}, + {"R_PPC64_TOC16_LO", Const, 5}, + {"R_PPC64_TOC16_LO_DS", Const, 5}, + {"R_PPC64_TOCSAVE", Const, 10}, + {"R_PPC64_TPREL16", Const, 5}, + {"R_PPC64_TPREL16_DS", Const, 5}, + {"R_PPC64_TPREL16_HA", Const, 5}, + {"R_PPC64_TPREL16_HI", Const, 5}, + {"R_PPC64_TPREL16_HIGH", Const, 10}, + {"R_PPC64_TPREL16_HIGHA", Const, 10}, + {"R_PPC64_TPREL16_HIGHER", Const, 5}, + {"R_PPC64_TPREL16_HIGHERA", Const, 5}, + {"R_PPC64_TPREL16_HIGHEST", Const, 5}, + {"R_PPC64_TPREL16_HIGHESTA", Const, 5}, + {"R_PPC64_TPREL16_LO", Const, 5}, + {"R_PPC64_TPREL16_LO_DS", Const, 5}, + {"R_PPC64_TPREL34", Const, 20}, + {"R_PPC64_TPREL64", Const, 5}, + {"R_PPC64_UADDR16", Const, 20}, + {"R_PPC64_UADDR32", Const, 20}, + {"R_PPC64_UADDR64", Const, 20}, + {"R_PPC_ADDR14", Const, 0}, + {"R_PPC_ADDR14_BRNTAKEN", Const, 0}, + {"R_PPC_ADDR14_BRTAKEN", Const, 0}, + {"R_PPC_ADDR16", Const, 0}, + {"R_PPC_ADDR16_HA", Const, 0}, + {"R_PPC_ADDR16_HI", Const, 0}, + {"R_PPC_ADDR16_LO", Const, 0}, + {"R_PPC_ADDR24", Const, 0}, + {"R_PPC_ADDR32", Const, 0}, + {"R_PPC_COPY", Const, 0}, + {"R_PPC_DTPMOD32", Const, 0}, + {"R_PPC_DTPREL16", Const, 0}, + {"R_PPC_DTPREL16_HA", Const, 0}, + {"R_PPC_DTPREL16_HI", Const, 0}, + {"R_PPC_DTPREL16_LO", Const, 0}, + {"R_PPC_DTPREL32", Const, 0}, + {"R_PPC_EMB_BIT_FLD", Const, 0}, + {"R_PPC_EMB_MRKREF", Const, 0}, + {"R_PPC_EMB_NADDR16", Const, 0}, + {"R_PPC_EMB_NADDR16_HA", Const, 0}, + {"R_PPC_EMB_NADDR16_HI", Const, 0}, + {"R_PPC_EMB_NADDR16_LO", Const, 0}, + {"R_PPC_EMB_NADDR32", Const, 0}, + {"R_PPC_EMB_RELSDA", Const, 0}, + {"R_PPC_EMB_RELSEC16", Const, 0}, + {"R_PPC_EMB_RELST_HA", Const, 0}, + {"R_PPC_EMB_RELST_HI", Const, 0}, + {"R_PPC_EMB_RELST_LO", Const, 0}, + {"R_PPC_EMB_SDA21", Const, 0}, + {"R_PPC_EMB_SDA2I16", Const, 0}, + {"R_PPC_EMB_SDA2REL", Const, 0}, + {"R_PPC_EMB_SDAI16", Const, 0}, + {"R_PPC_GLOB_DAT", Const, 0}, + {"R_PPC_GOT16", Const, 0}, + {"R_PPC_GOT16_HA", Const, 0}, + {"R_PPC_GOT16_HI", Const, 0}, + {"R_PPC_GOT16_LO", Const, 0}, + {"R_PPC_GOT_TLSGD16", Const, 0}, + {"R_PPC_GOT_TLSGD16_HA", Const, 0}, + {"R_PPC_GOT_TLSGD16_HI", Const, 0}, + {"R_PPC_GOT_TLSGD16_LO", Const, 0}, + {"R_PPC_GOT_TLSLD16", Const, 0}, + {"R_PPC_GOT_TLSLD16_HA", Const, 0}, + {"R_PPC_GOT_TLSLD16_HI", Const, 0}, + {"R_PPC_GOT_TLSLD16_LO", Const, 0}, + {"R_PPC_GOT_TPREL16", Const, 0}, + {"R_PPC_GOT_TPREL16_HA", Const, 0}, + {"R_PPC_GOT_TPREL16_HI", Const, 0}, + {"R_PPC_GOT_TPREL16_LO", Const, 0}, + {"R_PPC_JMP_SLOT", Const, 0}, + {"R_PPC_LOCAL24PC", Const, 0}, + {"R_PPC_NONE", Const, 0}, + {"R_PPC_PLT16_HA", Const, 0}, + {"R_PPC_PLT16_HI", Const, 0}, + {"R_PPC_PLT16_LO", Const, 0}, + {"R_PPC_PLT32", Const, 0}, + {"R_PPC_PLTREL24", Const, 0}, + {"R_PPC_PLTREL32", Const, 0}, + {"R_PPC_REL14", Const, 0}, + {"R_PPC_REL14_BRNTAKEN", Const, 0}, + {"R_PPC_REL14_BRTAKEN", Const, 0}, + {"R_PPC_REL24", Const, 0}, + {"R_PPC_REL32", Const, 0}, + {"R_PPC_RELATIVE", Const, 0}, + {"R_PPC_SDAREL16", Const, 0}, + {"R_PPC_SECTOFF", Const, 0}, + {"R_PPC_SECTOFF_HA", Const, 0}, + {"R_PPC_SECTOFF_HI", Const, 0}, + {"R_PPC_SECTOFF_LO", Const, 0}, + {"R_PPC_TLS", Const, 0}, + {"R_PPC_TPREL16", Const, 0}, + {"R_PPC_TPREL16_HA", Const, 0}, + {"R_PPC_TPREL16_HI", Const, 0}, + {"R_PPC_TPREL16_LO", Const, 0}, + {"R_PPC_TPREL32", Const, 0}, + {"R_PPC_UADDR16", Const, 0}, + {"R_PPC_UADDR32", Const, 0}, + {"R_RISCV", Type, 11}, + {"R_RISCV_32", Const, 11}, + {"R_RISCV_32_PCREL", Const, 12}, + {"R_RISCV_64", Const, 11}, + {"R_RISCV_ADD16", Const, 11}, + {"R_RISCV_ADD32", Const, 11}, + {"R_RISCV_ADD64", Const, 11}, + {"R_RISCV_ADD8", Const, 11}, + {"R_RISCV_ALIGN", Const, 11}, + {"R_RISCV_BRANCH", Const, 11}, + {"R_RISCV_CALL", Const, 11}, + {"R_RISCV_CALL_PLT", Const, 11}, + {"R_RISCV_COPY", Const, 11}, + {"R_RISCV_GNU_VTENTRY", Const, 11}, + {"R_RISCV_GNU_VTINHERIT", Const, 11}, + {"R_RISCV_GOT_HI20", Const, 11}, + {"R_RISCV_GPREL_I", Const, 11}, + {"R_RISCV_GPREL_S", Const, 11}, + {"R_RISCV_HI20", Const, 11}, + {"R_RISCV_JAL", Const, 11}, + {"R_RISCV_JUMP_SLOT", Const, 11}, + {"R_RISCV_LO12_I", Const, 11}, + {"R_RISCV_LO12_S", Const, 11}, + {"R_RISCV_NONE", Const, 11}, + {"R_RISCV_PCREL_HI20", Const, 11}, + {"R_RISCV_PCREL_LO12_I", Const, 11}, + {"R_RISCV_PCREL_LO12_S", Const, 11}, + {"R_RISCV_RELATIVE", Const, 11}, + {"R_RISCV_RELAX", Const, 11}, + {"R_RISCV_RVC_BRANCH", Const, 11}, + {"R_RISCV_RVC_JUMP", Const, 11}, + {"R_RISCV_RVC_LUI", Const, 11}, + {"R_RISCV_SET16", Const, 11}, + {"R_RISCV_SET32", Const, 11}, + {"R_RISCV_SET6", Const, 11}, + {"R_RISCV_SET8", Const, 11}, + {"R_RISCV_SUB16", Const, 11}, + {"R_RISCV_SUB32", Const, 11}, + {"R_RISCV_SUB6", Const, 11}, + {"R_RISCV_SUB64", Const, 11}, + {"R_RISCV_SUB8", Const, 11}, + {"R_RISCV_TLS_DTPMOD32", Const, 11}, + {"R_RISCV_TLS_DTPMOD64", Const, 11}, + {"R_RISCV_TLS_DTPREL32", Const, 11}, + {"R_RISCV_TLS_DTPREL64", Const, 11}, + {"R_RISCV_TLS_GD_HI20", Const, 11}, + {"R_RISCV_TLS_GOT_HI20", Const, 11}, + {"R_RISCV_TLS_TPREL32", Const, 11}, + {"R_RISCV_TLS_TPREL64", Const, 11}, + {"R_RISCV_TPREL_ADD", Const, 11}, + {"R_RISCV_TPREL_HI20", Const, 11}, + {"R_RISCV_TPREL_I", Const, 11}, + {"R_RISCV_TPREL_LO12_I", Const, 11}, + {"R_RISCV_TPREL_LO12_S", Const, 11}, + {"R_RISCV_TPREL_S", Const, 11}, + {"R_SPARC", Type, 0}, + {"R_SPARC_10", Const, 0}, + {"R_SPARC_11", Const, 0}, + {"R_SPARC_13", Const, 0}, + {"R_SPARC_16", Const, 0}, + {"R_SPARC_22", Const, 0}, + {"R_SPARC_32", Const, 0}, + {"R_SPARC_5", Const, 0}, + {"R_SPARC_6", Const, 0}, + {"R_SPARC_64", Const, 0}, + {"R_SPARC_7", Const, 0}, + {"R_SPARC_8", Const, 0}, + {"R_SPARC_COPY", Const, 0}, + {"R_SPARC_DISP16", Const, 0}, + {"R_SPARC_DISP32", Const, 0}, + {"R_SPARC_DISP64", Const, 0}, + {"R_SPARC_DISP8", Const, 0}, + {"R_SPARC_GLOB_DAT", Const, 0}, + {"R_SPARC_GLOB_JMP", Const, 0}, + {"R_SPARC_GOT10", Const, 0}, + {"R_SPARC_GOT13", Const, 0}, + {"R_SPARC_GOT22", Const, 0}, + {"R_SPARC_H44", Const, 0}, + {"R_SPARC_HH22", Const, 0}, + {"R_SPARC_HI22", Const, 0}, + {"R_SPARC_HIPLT22", Const, 0}, + {"R_SPARC_HIX22", Const, 0}, + {"R_SPARC_HM10", Const, 0}, + {"R_SPARC_JMP_SLOT", Const, 0}, + {"R_SPARC_L44", Const, 0}, + {"R_SPARC_LM22", Const, 0}, + {"R_SPARC_LO10", Const, 0}, + {"R_SPARC_LOPLT10", Const, 0}, + {"R_SPARC_LOX10", Const, 0}, + {"R_SPARC_M44", Const, 0}, + {"R_SPARC_NONE", Const, 0}, + {"R_SPARC_OLO10", Const, 0}, + {"R_SPARC_PC10", Const, 0}, + {"R_SPARC_PC22", Const, 0}, + {"R_SPARC_PCPLT10", Const, 0}, + {"R_SPARC_PCPLT22", Const, 0}, + {"R_SPARC_PCPLT32", Const, 0}, + {"R_SPARC_PC_HH22", Const, 0}, + {"R_SPARC_PC_HM10", Const, 0}, + {"R_SPARC_PC_LM22", Const, 0}, + {"R_SPARC_PLT32", Const, 0}, + {"R_SPARC_PLT64", Const, 0}, + {"R_SPARC_REGISTER", Const, 0}, + {"R_SPARC_RELATIVE", Const, 0}, + {"R_SPARC_UA16", Const, 0}, + {"R_SPARC_UA32", Const, 0}, + {"R_SPARC_UA64", Const, 0}, + {"R_SPARC_WDISP16", Const, 0}, + {"R_SPARC_WDISP19", Const, 0}, + {"R_SPARC_WDISP22", Const, 0}, + {"R_SPARC_WDISP30", Const, 0}, + {"R_SPARC_WPLT30", Const, 0}, + {"R_SYM32", Func, 0}, + {"R_SYM64", Func, 0}, + {"R_TYPE32", Func, 0}, + {"R_TYPE64", Func, 0}, + {"R_X86_64", Type, 0}, + {"R_X86_64_16", Const, 0}, + {"R_X86_64_32", Const, 0}, + {"R_X86_64_32S", Const, 0}, + {"R_X86_64_64", Const, 0}, + {"R_X86_64_8", Const, 0}, + {"R_X86_64_COPY", Const, 0}, + {"R_X86_64_DTPMOD64", Const, 0}, + {"R_X86_64_DTPOFF32", Const, 0}, + {"R_X86_64_DTPOFF64", Const, 0}, + {"R_X86_64_GLOB_DAT", Const, 0}, + {"R_X86_64_GOT32", Const, 0}, + {"R_X86_64_GOT64", Const, 10}, + {"R_X86_64_GOTOFF64", Const, 10}, + {"R_X86_64_GOTPC32", Const, 10}, + {"R_X86_64_GOTPC32_TLSDESC", Const, 10}, + {"R_X86_64_GOTPC64", Const, 10}, + {"R_X86_64_GOTPCREL", Const, 0}, + {"R_X86_64_GOTPCREL64", Const, 10}, + {"R_X86_64_GOTPCRELX", Const, 10}, + {"R_X86_64_GOTPLT64", Const, 10}, + {"R_X86_64_GOTTPOFF", Const, 0}, + {"R_X86_64_IRELATIVE", Const, 10}, + {"R_X86_64_JMP_SLOT", Const, 0}, + {"R_X86_64_NONE", Const, 0}, + {"R_X86_64_PC16", Const, 0}, + {"R_X86_64_PC32", Const, 0}, + {"R_X86_64_PC32_BND", Const, 10}, + {"R_X86_64_PC64", Const, 10}, + {"R_X86_64_PC8", Const, 0}, + {"R_X86_64_PLT32", Const, 0}, + {"R_X86_64_PLT32_BND", Const, 10}, + {"R_X86_64_PLTOFF64", Const, 10}, + {"R_X86_64_RELATIVE", Const, 0}, + {"R_X86_64_RELATIVE64", Const, 10}, + {"R_X86_64_REX_GOTPCRELX", Const, 10}, + {"R_X86_64_SIZE32", Const, 10}, + {"R_X86_64_SIZE64", Const, 10}, + {"R_X86_64_TLSDESC", Const, 10}, + {"R_X86_64_TLSDESC_CALL", Const, 10}, + {"R_X86_64_TLSGD", Const, 0}, + {"R_X86_64_TLSLD", Const, 0}, + {"R_X86_64_TPOFF32", Const, 0}, + {"R_X86_64_TPOFF64", Const, 0}, + {"Rel32", Type, 0}, + {"Rel32.Info", Field, 0}, + {"Rel32.Off", Field, 0}, + {"Rel64", Type, 0}, + {"Rel64.Info", Field, 0}, + {"Rel64.Off", Field, 0}, + {"Rela32", Type, 0}, + {"Rela32.Addend", Field, 0}, + {"Rela32.Info", Field, 0}, + {"Rela32.Off", Field, 0}, + {"Rela64", Type, 0}, + {"Rela64.Addend", Field, 0}, + {"Rela64.Info", Field, 0}, + {"Rela64.Off", Field, 0}, + {"SHF_ALLOC", Const, 0}, + {"SHF_COMPRESSED", Const, 6}, + {"SHF_EXECINSTR", Const, 0}, + {"SHF_GROUP", Const, 0}, + {"SHF_INFO_LINK", Const, 0}, + {"SHF_LINK_ORDER", Const, 0}, + {"SHF_MASKOS", Const, 0}, + {"SHF_MASKPROC", Const, 0}, + {"SHF_MERGE", Const, 0}, + {"SHF_OS_NONCONFORMING", Const, 0}, + {"SHF_STRINGS", Const, 0}, + {"SHF_TLS", Const, 0}, + {"SHF_WRITE", Const, 0}, + {"SHN_ABS", Const, 0}, + {"SHN_COMMON", Const, 0}, + {"SHN_HIOS", Const, 0}, + {"SHN_HIPROC", Const, 0}, + {"SHN_HIRESERVE", Const, 0}, + {"SHN_LOOS", Const, 0}, + {"SHN_LOPROC", Const, 0}, + {"SHN_LORESERVE", Const, 0}, + {"SHN_UNDEF", Const, 0}, + {"SHN_XINDEX", Const, 0}, + {"SHT_DYNAMIC", Const, 0}, + {"SHT_DYNSYM", Const, 0}, + {"SHT_FINI_ARRAY", Const, 0}, + {"SHT_GNU_ATTRIBUTES", Const, 0}, + {"SHT_GNU_HASH", Const, 0}, + {"SHT_GNU_LIBLIST", Const, 0}, + {"SHT_GNU_VERDEF", Const, 0}, + {"SHT_GNU_VERNEED", Const, 0}, + {"SHT_GNU_VERSYM", Const, 0}, + {"SHT_GROUP", Const, 0}, + {"SHT_HASH", Const, 0}, + {"SHT_HIOS", Const, 0}, + {"SHT_HIPROC", Const, 0}, + {"SHT_HIUSER", Const, 0}, + {"SHT_INIT_ARRAY", Const, 0}, + {"SHT_LOOS", Const, 0}, + {"SHT_LOPROC", Const, 0}, + {"SHT_LOUSER", Const, 0}, + {"SHT_MIPS_ABIFLAGS", Const, 17}, + {"SHT_NOBITS", Const, 0}, + {"SHT_NOTE", Const, 0}, + {"SHT_NULL", Const, 0}, + {"SHT_PREINIT_ARRAY", Const, 0}, + {"SHT_PROGBITS", Const, 0}, + {"SHT_REL", Const, 0}, + {"SHT_RELA", Const, 0}, + {"SHT_SHLIB", Const, 0}, + {"SHT_STRTAB", Const, 0}, + {"SHT_SYMTAB", Const, 0}, + {"SHT_SYMTAB_SHNDX", Const, 0}, + {"STB_GLOBAL", Const, 0}, + {"STB_HIOS", Const, 0}, + {"STB_HIPROC", Const, 0}, + {"STB_LOCAL", Const, 0}, + {"STB_LOOS", Const, 0}, + {"STB_LOPROC", Const, 0}, + {"STB_WEAK", Const, 0}, + {"STT_COMMON", Const, 0}, + {"STT_FILE", Const, 0}, + {"STT_FUNC", Const, 0}, + {"STT_GNU_IFUNC", Const, 23}, + {"STT_HIOS", Const, 0}, + {"STT_HIPROC", Const, 0}, + {"STT_LOOS", Const, 0}, + {"STT_LOPROC", Const, 0}, + {"STT_NOTYPE", Const, 0}, + {"STT_OBJECT", Const, 0}, + {"STT_RELC", Const, 23}, + {"STT_SECTION", Const, 0}, + {"STT_SRELC", Const, 23}, + {"STT_TLS", Const, 0}, + {"STV_DEFAULT", Const, 0}, + {"STV_HIDDEN", Const, 0}, + {"STV_INTERNAL", Const, 0}, + {"STV_PROTECTED", Const, 0}, + {"ST_BIND", Func, 0}, + {"ST_INFO", Func, 0}, + {"ST_TYPE", Func, 0}, + {"ST_VISIBILITY", Func, 0}, + {"Section", Type, 0}, + {"Section.ReaderAt", Field, 0}, + {"Section.SectionHeader", Field, 0}, + {"Section32", Type, 0}, + {"Section32.Addr", Field, 0}, + {"Section32.Addralign", Field, 0}, + {"Section32.Entsize", Field, 0}, + {"Section32.Flags", Field, 0}, + {"Section32.Info", Field, 0}, + {"Section32.Link", Field, 0}, + {"Section32.Name", Field, 0}, + {"Section32.Off", Field, 0}, + {"Section32.Size", Field, 0}, + {"Section32.Type", Field, 0}, + {"Section64", Type, 0}, + {"Section64.Addr", Field, 0}, + {"Section64.Addralign", Field, 0}, + {"Section64.Entsize", Field, 0}, + {"Section64.Flags", Field, 0}, + {"Section64.Info", Field, 0}, + {"Section64.Link", Field, 0}, + {"Section64.Name", Field, 0}, + {"Section64.Off", Field, 0}, + {"Section64.Size", Field, 0}, + {"Section64.Type", Field, 0}, + {"SectionFlag", Type, 0}, + {"SectionHeader", Type, 0}, + {"SectionHeader.Addr", Field, 0}, + {"SectionHeader.Addralign", Field, 0}, + {"SectionHeader.Entsize", Field, 0}, + {"SectionHeader.FileSize", Field, 6}, + {"SectionHeader.Flags", Field, 0}, + {"SectionHeader.Info", Field, 0}, + {"SectionHeader.Link", Field, 0}, + {"SectionHeader.Name", Field, 0}, + {"SectionHeader.Offset", Field, 0}, + {"SectionHeader.Size", Field, 0}, + {"SectionHeader.Type", Field, 0}, + {"SectionIndex", Type, 0}, + {"SectionType", Type, 0}, + {"Sym32", Type, 0}, + {"Sym32.Info", Field, 0}, + {"Sym32.Name", Field, 0}, + {"Sym32.Other", Field, 0}, + {"Sym32.Shndx", Field, 0}, + {"Sym32.Size", Field, 0}, + {"Sym32.Value", Field, 0}, + {"Sym32Size", Const, 0}, + {"Sym64", Type, 0}, + {"Sym64.Info", Field, 0}, + {"Sym64.Name", Field, 0}, + {"Sym64.Other", Field, 0}, + {"Sym64.Shndx", Field, 0}, + {"Sym64.Size", Field, 0}, + {"Sym64.Value", Field, 0}, + {"Sym64Size", Const, 0}, + {"SymBind", Type, 0}, + {"SymType", Type, 0}, + {"SymVis", Type, 0}, + {"Symbol", Type, 0}, + {"Symbol.Info", Field, 0}, + {"Symbol.Library", Field, 13}, + {"Symbol.Name", Field, 0}, + {"Symbol.Other", Field, 0}, + {"Symbol.Section", Field, 0}, + {"Symbol.Size", Field, 0}, + {"Symbol.Value", Field, 0}, + {"Symbol.Version", Field, 13}, + {"Type", Type, 0}, + {"Version", Type, 0}, + }, + "debug/gosym": { + {"(*DecodingError).Error", Method, 0}, + {"(*LineTable).LineToPC", Method, 0}, + {"(*LineTable).PCToLine", Method, 0}, + {"(*Sym).BaseName", Method, 0}, + {"(*Sym).PackageName", Method, 0}, + {"(*Sym).ReceiverName", Method, 0}, + {"(*Sym).Static", Method, 0}, + {"(*Table).LineToPC", Method, 0}, + {"(*Table).LookupFunc", Method, 0}, + {"(*Table).LookupSym", Method, 0}, + {"(*Table).PCToFunc", Method, 0}, + {"(*Table).PCToLine", Method, 0}, + {"(*Table).SymByAddr", Method, 0}, + {"(*UnknownLineError).Error", Method, 0}, + {"(Func).BaseName", Method, 0}, + {"(Func).PackageName", Method, 0}, + {"(Func).ReceiverName", Method, 0}, + {"(Func).Static", Method, 0}, + {"(UnknownFileError).Error", Method, 0}, + {"DecodingError", Type, 0}, + {"Func", Type, 0}, + {"Func.End", Field, 0}, + {"Func.Entry", Field, 0}, + {"Func.FrameSize", Field, 0}, + {"Func.LineTable", Field, 0}, + {"Func.Locals", Field, 0}, + {"Func.Obj", Field, 0}, + {"Func.Params", Field, 0}, + {"Func.Sym", Field, 0}, + {"LineTable", Type, 0}, + {"LineTable.Data", Field, 0}, + {"LineTable.Line", Field, 0}, + {"LineTable.PC", Field, 0}, + {"NewLineTable", Func, 0}, + {"NewTable", Func, 0}, + {"Obj", Type, 0}, + {"Obj.Funcs", Field, 0}, + {"Obj.Paths", Field, 0}, + {"Sym", Type, 0}, + {"Sym.Func", Field, 0}, + {"Sym.GoType", Field, 0}, + {"Sym.Name", Field, 0}, + {"Sym.Type", Field, 0}, + {"Sym.Value", Field, 0}, + {"Table", Type, 0}, + {"Table.Files", Field, 0}, + {"Table.Funcs", Field, 0}, + {"Table.Objs", Field, 0}, + {"Table.Syms", Field, 0}, + {"UnknownFileError", Type, 0}, + {"UnknownLineError", Type, 0}, + {"UnknownLineError.File", Field, 0}, + {"UnknownLineError.Line", Field, 0}, + }, + "debug/macho": { + {"(*FatFile).Close", Method, 3}, + {"(*File).Close", Method, 0}, + {"(*File).DWARF", Method, 0}, + {"(*File).ImportedLibraries", Method, 0}, + {"(*File).ImportedSymbols", Method, 0}, + {"(*File).Section", Method, 0}, + {"(*File).Segment", Method, 0}, + {"(*FormatError).Error", Method, 0}, + {"(*Section).Data", Method, 0}, + {"(*Section).Open", Method, 0}, + {"(*Segment).Data", Method, 0}, + {"(*Segment).Open", Method, 0}, + {"(Cpu).GoString", Method, 0}, + {"(Cpu).String", Method, 0}, + {"(Dylib).Raw", Method, 0}, + {"(Dysymtab).Raw", Method, 0}, + {"(FatArch).Close", Method, 3}, + {"(FatArch).DWARF", Method, 3}, + {"(FatArch).ImportedLibraries", Method, 3}, + {"(FatArch).ImportedSymbols", Method, 3}, + {"(FatArch).Section", Method, 3}, + {"(FatArch).Segment", Method, 3}, + {"(LoadBytes).Raw", Method, 0}, + {"(LoadCmd).GoString", Method, 0}, + {"(LoadCmd).String", Method, 0}, + {"(RelocTypeARM).GoString", Method, 10}, + {"(RelocTypeARM).String", Method, 10}, + {"(RelocTypeARM64).GoString", Method, 10}, + {"(RelocTypeARM64).String", Method, 10}, + {"(RelocTypeGeneric).GoString", Method, 10}, + {"(RelocTypeGeneric).String", Method, 10}, + {"(RelocTypeX86_64).GoString", Method, 10}, + {"(RelocTypeX86_64).String", Method, 10}, + {"(Rpath).Raw", Method, 10}, + {"(Section).ReadAt", Method, 0}, + {"(Segment).Raw", Method, 0}, + {"(Segment).ReadAt", Method, 0}, + {"(Symtab).Raw", Method, 0}, + {"(Type).GoString", Method, 10}, + {"(Type).String", Method, 10}, + {"ARM64_RELOC_ADDEND", Const, 10}, + {"ARM64_RELOC_BRANCH26", Const, 10}, + {"ARM64_RELOC_GOT_LOAD_PAGE21", Const, 10}, + {"ARM64_RELOC_GOT_LOAD_PAGEOFF12", Const, 10}, + {"ARM64_RELOC_PAGE21", Const, 10}, + {"ARM64_RELOC_PAGEOFF12", Const, 10}, + {"ARM64_RELOC_POINTER_TO_GOT", Const, 10}, + {"ARM64_RELOC_SUBTRACTOR", Const, 10}, + {"ARM64_RELOC_TLVP_LOAD_PAGE21", Const, 10}, + {"ARM64_RELOC_TLVP_LOAD_PAGEOFF12", Const, 10}, + {"ARM64_RELOC_UNSIGNED", Const, 10}, + {"ARM_RELOC_BR24", Const, 10}, + {"ARM_RELOC_HALF", Const, 10}, + {"ARM_RELOC_HALF_SECTDIFF", Const, 10}, + {"ARM_RELOC_LOCAL_SECTDIFF", Const, 10}, + {"ARM_RELOC_PAIR", Const, 10}, + {"ARM_RELOC_PB_LA_PTR", Const, 10}, + {"ARM_RELOC_SECTDIFF", Const, 10}, + {"ARM_RELOC_VANILLA", Const, 10}, + {"ARM_THUMB_32BIT_BRANCH", Const, 10}, + {"ARM_THUMB_RELOC_BR22", Const, 10}, + {"Cpu", Type, 0}, + {"Cpu386", Const, 0}, + {"CpuAmd64", Const, 0}, + {"CpuArm", Const, 3}, + {"CpuArm64", Const, 11}, + {"CpuPpc", Const, 3}, + {"CpuPpc64", Const, 3}, + {"Dylib", Type, 0}, + {"Dylib.CompatVersion", Field, 0}, + {"Dylib.CurrentVersion", Field, 0}, + {"Dylib.LoadBytes", Field, 0}, + {"Dylib.Name", Field, 0}, + {"Dylib.Time", Field, 0}, + {"DylibCmd", Type, 0}, + {"DylibCmd.Cmd", Field, 0}, + {"DylibCmd.CompatVersion", Field, 0}, + {"DylibCmd.CurrentVersion", Field, 0}, + {"DylibCmd.Len", Field, 0}, + {"DylibCmd.Name", Field, 0}, + {"DylibCmd.Time", Field, 0}, + {"Dysymtab", Type, 0}, + {"Dysymtab.DysymtabCmd", Field, 0}, + {"Dysymtab.IndirectSyms", Field, 0}, + {"Dysymtab.LoadBytes", Field, 0}, + {"DysymtabCmd", Type, 0}, + {"DysymtabCmd.Cmd", Field, 0}, + {"DysymtabCmd.Extrefsymoff", Field, 0}, + {"DysymtabCmd.Extreloff", Field, 0}, + {"DysymtabCmd.Iextdefsym", Field, 0}, + {"DysymtabCmd.Ilocalsym", Field, 0}, + {"DysymtabCmd.Indirectsymoff", Field, 0}, + {"DysymtabCmd.Iundefsym", Field, 0}, + {"DysymtabCmd.Len", Field, 0}, + {"DysymtabCmd.Locreloff", Field, 0}, + {"DysymtabCmd.Modtaboff", Field, 0}, + {"DysymtabCmd.Nextdefsym", Field, 0}, + {"DysymtabCmd.Nextrefsyms", Field, 0}, + {"DysymtabCmd.Nextrel", Field, 0}, + {"DysymtabCmd.Nindirectsyms", Field, 0}, + {"DysymtabCmd.Nlocalsym", Field, 0}, + {"DysymtabCmd.Nlocrel", Field, 0}, + {"DysymtabCmd.Nmodtab", Field, 0}, + {"DysymtabCmd.Ntoc", Field, 0}, + {"DysymtabCmd.Nundefsym", Field, 0}, + {"DysymtabCmd.Tocoffset", Field, 0}, + {"ErrNotFat", Var, 3}, + {"FatArch", Type, 3}, + {"FatArch.FatArchHeader", Field, 3}, + {"FatArch.File", Field, 3}, + {"FatArchHeader", Type, 3}, + {"FatArchHeader.Align", Field, 3}, + {"FatArchHeader.Cpu", Field, 3}, + {"FatArchHeader.Offset", Field, 3}, + {"FatArchHeader.Size", Field, 3}, + {"FatArchHeader.SubCpu", Field, 3}, + {"FatFile", Type, 3}, + {"FatFile.Arches", Field, 3}, + {"FatFile.Magic", Field, 3}, + {"File", Type, 0}, + {"File.ByteOrder", Field, 0}, + {"File.Dysymtab", Field, 0}, + {"File.FileHeader", Field, 0}, + {"File.Loads", Field, 0}, + {"File.Sections", Field, 0}, + {"File.Symtab", Field, 0}, + {"FileHeader", Type, 0}, + {"FileHeader.Cmdsz", Field, 0}, + {"FileHeader.Cpu", Field, 0}, + {"FileHeader.Flags", Field, 0}, + {"FileHeader.Magic", Field, 0}, + {"FileHeader.Ncmd", Field, 0}, + {"FileHeader.SubCpu", Field, 0}, + {"FileHeader.Type", Field, 0}, + {"FlagAllModsBound", Const, 10}, + {"FlagAllowStackExecution", Const, 10}, + {"FlagAppExtensionSafe", Const, 10}, + {"FlagBindAtLoad", Const, 10}, + {"FlagBindsToWeak", Const, 10}, + {"FlagCanonical", Const, 10}, + {"FlagDeadStrippableDylib", Const, 10}, + {"FlagDyldLink", Const, 10}, + {"FlagForceFlat", Const, 10}, + {"FlagHasTLVDescriptors", Const, 10}, + {"FlagIncrLink", Const, 10}, + {"FlagLazyInit", Const, 10}, + {"FlagNoFixPrebinding", Const, 10}, + {"FlagNoHeapExecution", Const, 10}, + {"FlagNoMultiDefs", Const, 10}, + {"FlagNoReexportedDylibs", Const, 10}, + {"FlagNoUndefs", Const, 10}, + {"FlagPIE", Const, 10}, + {"FlagPrebindable", Const, 10}, + {"FlagPrebound", Const, 10}, + {"FlagRootSafe", Const, 10}, + {"FlagSetuidSafe", Const, 10}, + {"FlagSplitSegs", Const, 10}, + {"FlagSubsectionsViaSymbols", Const, 10}, + {"FlagTwoLevel", Const, 10}, + {"FlagWeakDefines", Const, 10}, + {"FormatError", Type, 0}, + {"GENERIC_RELOC_LOCAL_SECTDIFF", Const, 10}, + {"GENERIC_RELOC_PAIR", Const, 10}, + {"GENERIC_RELOC_PB_LA_PTR", Const, 10}, + {"GENERIC_RELOC_SECTDIFF", Const, 10}, + {"GENERIC_RELOC_TLV", Const, 10}, + {"GENERIC_RELOC_VANILLA", Const, 10}, + {"Load", Type, 0}, + {"LoadBytes", Type, 0}, + {"LoadCmd", Type, 0}, + {"LoadCmdDylib", Const, 0}, + {"LoadCmdDylinker", Const, 0}, + {"LoadCmdDysymtab", Const, 0}, + {"LoadCmdRpath", Const, 10}, + {"LoadCmdSegment", Const, 0}, + {"LoadCmdSegment64", Const, 0}, + {"LoadCmdSymtab", Const, 0}, + {"LoadCmdThread", Const, 0}, + {"LoadCmdUnixThread", Const, 0}, + {"Magic32", Const, 0}, + {"Magic64", Const, 0}, + {"MagicFat", Const, 3}, + {"NewFatFile", Func, 3}, + {"NewFile", Func, 0}, + {"Nlist32", Type, 0}, + {"Nlist32.Desc", Field, 0}, + {"Nlist32.Name", Field, 0}, + {"Nlist32.Sect", Field, 0}, + {"Nlist32.Type", Field, 0}, + {"Nlist32.Value", Field, 0}, + {"Nlist64", Type, 0}, + {"Nlist64.Desc", Field, 0}, + {"Nlist64.Name", Field, 0}, + {"Nlist64.Sect", Field, 0}, + {"Nlist64.Type", Field, 0}, + {"Nlist64.Value", Field, 0}, + {"Open", Func, 0}, + {"OpenFat", Func, 3}, + {"Regs386", Type, 0}, + {"Regs386.AX", Field, 0}, + {"Regs386.BP", Field, 0}, + {"Regs386.BX", Field, 0}, + {"Regs386.CS", Field, 0}, + {"Regs386.CX", Field, 0}, + {"Regs386.DI", Field, 0}, + {"Regs386.DS", Field, 0}, + {"Regs386.DX", Field, 0}, + {"Regs386.ES", Field, 0}, + {"Regs386.FLAGS", Field, 0}, + {"Regs386.FS", Field, 0}, + {"Regs386.GS", Field, 0}, + {"Regs386.IP", Field, 0}, + {"Regs386.SI", Field, 0}, + {"Regs386.SP", Field, 0}, + {"Regs386.SS", Field, 0}, + {"RegsAMD64", Type, 0}, + {"RegsAMD64.AX", Field, 0}, + {"RegsAMD64.BP", Field, 0}, + {"RegsAMD64.BX", Field, 0}, + {"RegsAMD64.CS", Field, 0}, + {"RegsAMD64.CX", Field, 0}, + {"RegsAMD64.DI", Field, 0}, + {"RegsAMD64.DX", Field, 0}, + {"RegsAMD64.FLAGS", Field, 0}, + {"RegsAMD64.FS", Field, 0}, + {"RegsAMD64.GS", Field, 0}, + {"RegsAMD64.IP", Field, 0}, + {"RegsAMD64.R10", Field, 0}, + {"RegsAMD64.R11", Field, 0}, + {"RegsAMD64.R12", Field, 0}, + {"RegsAMD64.R13", Field, 0}, + {"RegsAMD64.R14", Field, 0}, + {"RegsAMD64.R15", Field, 0}, + {"RegsAMD64.R8", Field, 0}, + {"RegsAMD64.R9", Field, 0}, + {"RegsAMD64.SI", Field, 0}, + {"RegsAMD64.SP", Field, 0}, + {"Reloc", Type, 10}, + {"Reloc.Addr", Field, 10}, + {"Reloc.Extern", Field, 10}, + {"Reloc.Len", Field, 10}, + {"Reloc.Pcrel", Field, 10}, + {"Reloc.Scattered", Field, 10}, + {"Reloc.Type", Field, 10}, + {"Reloc.Value", Field, 10}, + {"RelocTypeARM", Type, 10}, + {"RelocTypeARM64", Type, 10}, + {"RelocTypeGeneric", Type, 10}, + {"RelocTypeX86_64", Type, 10}, + {"Rpath", Type, 10}, + {"Rpath.LoadBytes", Field, 10}, + {"Rpath.Path", Field, 10}, + {"RpathCmd", Type, 10}, + {"RpathCmd.Cmd", Field, 10}, + {"RpathCmd.Len", Field, 10}, + {"RpathCmd.Path", Field, 10}, + {"Section", Type, 0}, + {"Section.ReaderAt", Field, 0}, + {"Section.Relocs", Field, 10}, + {"Section.SectionHeader", Field, 0}, + {"Section32", Type, 0}, + {"Section32.Addr", Field, 0}, + {"Section32.Align", Field, 0}, + {"Section32.Flags", Field, 0}, + {"Section32.Name", Field, 0}, + {"Section32.Nreloc", Field, 0}, + {"Section32.Offset", Field, 0}, + {"Section32.Reloff", Field, 0}, + {"Section32.Reserve1", Field, 0}, + {"Section32.Reserve2", Field, 0}, + {"Section32.Seg", Field, 0}, + {"Section32.Size", Field, 0}, + {"Section64", Type, 0}, + {"Section64.Addr", Field, 0}, + {"Section64.Align", Field, 0}, + {"Section64.Flags", Field, 0}, + {"Section64.Name", Field, 0}, + {"Section64.Nreloc", Field, 0}, + {"Section64.Offset", Field, 0}, + {"Section64.Reloff", Field, 0}, + {"Section64.Reserve1", Field, 0}, + {"Section64.Reserve2", Field, 0}, + {"Section64.Reserve3", Field, 0}, + {"Section64.Seg", Field, 0}, + {"Section64.Size", Field, 0}, + {"SectionHeader", Type, 0}, + {"SectionHeader.Addr", Field, 0}, + {"SectionHeader.Align", Field, 0}, + {"SectionHeader.Flags", Field, 0}, + {"SectionHeader.Name", Field, 0}, + {"SectionHeader.Nreloc", Field, 0}, + {"SectionHeader.Offset", Field, 0}, + {"SectionHeader.Reloff", Field, 0}, + {"SectionHeader.Seg", Field, 0}, + {"SectionHeader.Size", Field, 0}, + {"Segment", Type, 0}, + {"Segment.LoadBytes", Field, 0}, + {"Segment.ReaderAt", Field, 0}, + {"Segment.SegmentHeader", Field, 0}, + {"Segment32", Type, 0}, + {"Segment32.Addr", Field, 0}, + {"Segment32.Cmd", Field, 0}, + {"Segment32.Filesz", Field, 0}, + {"Segment32.Flag", Field, 0}, + {"Segment32.Len", Field, 0}, + {"Segment32.Maxprot", Field, 0}, + {"Segment32.Memsz", Field, 0}, + {"Segment32.Name", Field, 0}, + {"Segment32.Nsect", Field, 0}, + {"Segment32.Offset", Field, 0}, + {"Segment32.Prot", Field, 0}, + {"Segment64", Type, 0}, + {"Segment64.Addr", Field, 0}, + {"Segment64.Cmd", Field, 0}, + {"Segment64.Filesz", Field, 0}, + {"Segment64.Flag", Field, 0}, + {"Segment64.Len", Field, 0}, + {"Segment64.Maxprot", Field, 0}, + {"Segment64.Memsz", Field, 0}, + {"Segment64.Name", Field, 0}, + {"Segment64.Nsect", Field, 0}, + {"Segment64.Offset", Field, 0}, + {"Segment64.Prot", Field, 0}, + {"SegmentHeader", Type, 0}, + {"SegmentHeader.Addr", Field, 0}, + {"SegmentHeader.Cmd", Field, 0}, + {"SegmentHeader.Filesz", Field, 0}, + {"SegmentHeader.Flag", Field, 0}, + {"SegmentHeader.Len", Field, 0}, + {"SegmentHeader.Maxprot", Field, 0}, + {"SegmentHeader.Memsz", Field, 0}, + {"SegmentHeader.Name", Field, 0}, + {"SegmentHeader.Nsect", Field, 0}, + {"SegmentHeader.Offset", Field, 0}, + {"SegmentHeader.Prot", Field, 0}, + {"Symbol", Type, 0}, + {"Symbol.Desc", Field, 0}, + {"Symbol.Name", Field, 0}, + {"Symbol.Sect", Field, 0}, + {"Symbol.Type", Field, 0}, + {"Symbol.Value", Field, 0}, + {"Symtab", Type, 0}, + {"Symtab.LoadBytes", Field, 0}, + {"Symtab.Syms", Field, 0}, + {"Symtab.SymtabCmd", Field, 0}, + {"SymtabCmd", Type, 0}, + {"SymtabCmd.Cmd", Field, 0}, + {"SymtabCmd.Len", Field, 0}, + {"SymtabCmd.Nsyms", Field, 0}, + {"SymtabCmd.Stroff", Field, 0}, + {"SymtabCmd.Strsize", Field, 0}, + {"SymtabCmd.Symoff", Field, 0}, + {"Thread", Type, 0}, + {"Thread.Cmd", Field, 0}, + {"Thread.Data", Field, 0}, + {"Thread.Len", Field, 0}, + {"Thread.Type", Field, 0}, + {"Type", Type, 0}, + {"TypeBundle", Const, 3}, + {"TypeDylib", Const, 3}, + {"TypeExec", Const, 0}, + {"TypeObj", Const, 0}, + {"X86_64_RELOC_BRANCH", Const, 10}, + {"X86_64_RELOC_GOT", Const, 10}, + {"X86_64_RELOC_GOT_LOAD", Const, 10}, + {"X86_64_RELOC_SIGNED", Const, 10}, + {"X86_64_RELOC_SIGNED_1", Const, 10}, + {"X86_64_RELOC_SIGNED_2", Const, 10}, + {"X86_64_RELOC_SIGNED_4", Const, 10}, + {"X86_64_RELOC_SUBTRACTOR", Const, 10}, + {"X86_64_RELOC_TLV", Const, 10}, + {"X86_64_RELOC_UNSIGNED", Const, 10}, + }, + "debug/pe": { + {"(*COFFSymbol).FullName", Method, 8}, + {"(*File).COFFSymbolReadSectionDefAux", Method, 19}, + {"(*File).Close", Method, 0}, + {"(*File).DWARF", Method, 0}, + {"(*File).ImportedLibraries", Method, 0}, + {"(*File).ImportedSymbols", Method, 0}, + {"(*File).Section", Method, 0}, + {"(*FormatError).Error", Method, 0}, + {"(*Section).Data", Method, 0}, + {"(*Section).Open", Method, 0}, + {"(Section).ReadAt", Method, 0}, + {"(StringTable).String", Method, 8}, + {"COFFSymbol", Type, 1}, + {"COFFSymbol.Name", Field, 1}, + {"COFFSymbol.NumberOfAuxSymbols", Field, 1}, + {"COFFSymbol.SectionNumber", Field, 1}, + {"COFFSymbol.StorageClass", Field, 1}, + {"COFFSymbol.Type", Field, 1}, + {"COFFSymbol.Value", Field, 1}, + {"COFFSymbolAuxFormat5", Type, 19}, + {"COFFSymbolAuxFormat5.Checksum", Field, 19}, + {"COFFSymbolAuxFormat5.NumLineNumbers", Field, 19}, + {"COFFSymbolAuxFormat5.NumRelocs", Field, 19}, + {"COFFSymbolAuxFormat5.SecNum", Field, 19}, + {"COFFSymbolAuxFormat5.Selection", Field, 19}, + {"COFFSymbolAuxFormat5.Size", Field, 19}, + {"COFFSymbolSize", Const, 1}, + {"DataDirectory", Type, 3}, + {"DataDirectory.Size", Field, 3}, + {"DataDirectory.VirtualAddress", Field, 3}, + {"File", Type, 0}, + {"File.COFFSymbols", Field, 8}, + {"File.FileHeader", Field, 0}, + {"File.OptionalHeader", Field, 3}, + {"File.Sections", Field, 0}, + {"File.StringTable", Field, 8}, + {"File.Symbols", Field, 1}, + {"FileHeader", Type, 0}, + {"FileHeader.Characteristics", Field, 0}, + {"FileHeader.Machine", Field, 0}, + {"FileHeader.NumberOfSections", Field, 0}, + {"FileHeader.NumberOfSymbols", Field, 0}, + {"FileHeader.PointerToSymbolTable", Field, 0}, + {"FileHeader.SizeOfOptionalHeader", Field, 0}, + {"FileHeader.TimeDateStamp", Field, 0}, + {"FormatError", Type, 0}, + {"IMAGE_COMDAT_SELECT_ANY", Const, 19}, + {"IMAGE_COMDAT_SELECT_ASSOCIATIVE", Const, 19}, + {"IMAGE_COMDAT_SELECT_EXACT_MATCH", Const, 19}, + {"IMAGE_COMDAT_SELECT_LARGEST", Const, 19}, + {"IMAGE_COMDAT_SELECT_NODUPLICATES", Const, 19}, + {"IMAGE_COMDAT_SELECT_SAME_SIZE", Const, 19}, + {"IMAGE_DIRECTORY_ENTRY_ARCHITECTURE", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_BASERELOC", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_DEBUG", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_EXCEPTION", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_EXPORT", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_GLOBALPTR", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_IAT", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_IMPORT", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_RESOURCE", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_SECURITY", Const, 11}, + {"IMAGE_DIRECTORY_ENTRY_TLS", Const, 11}, + {"IMAGE_DLLCHARACTERISTICS_APPCONTAINER", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_GUARD_CF", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_NO_BIND", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_NO_ISOLATION", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_NO_SEH", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_NX_COMPAT", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE", Const, 15}, + {"IMAGE_DLLCHARACTERISTICS_WDM_DRIVER", Const, 15}, + {"IMAGE_FILE_32BIT_MACHINE", Const, 15}, + {"IMAGE_FILE_AGGRESIVE_WS_TRIM", Const, 15}, + {"IMAGE_FILE_BYTES_REVERSED_HI", Const, 15}, + {"IMAGE_FILE_BYTES_REVERSED_LO", Const, 15}, + {"IMAGE_FILE_DEBUG_STRIPPED", Const, 15}, + {"IMAGE_FILE_DLL", Const, 15}, + {"IMAGE_FILE_EXECUTABLE_IMAGE", Const, 15}, + {"IMAGE_FILE_LARGE_ADDRESS_AWARE", Const, 15}, + {"IMAGE_FILE_LINE_NUMS_STRIPPED", Const, 15}, + {"IMAGE_FILE_LOCAL_SYMS_STRIPPED", Const, 15}, + {"IMAGE_FILE_MACHINE_AM33", Const, 0}, + {"IMAGE_FILE_MACHINE_AMD64", Const, 0}, + {"IMAGE_FILE_MACHINE_ARM", Const, 0}, + {"IMAGE_FILE_MACHINE_ARM64", Const, 11}, + {"IMAGE_FILE_MACHINE_ARMNT", Const, 12}, + {"IMAGE_FILE_MACHINE_EBC", Const, 0}, + {"IMAGE_FILE_MACHINE_I386", Const, 0}, + {"IMAGE_FILE_MACHINE_IA64", Const, 0}, + {"IMAGE_FILE_MACHINE_LOONGARCH32", Const, 19}, + {"IMAGE_FILE_MACHINE_LOONGARCH64", Const, 19}, + {"IMAGE_FILE_MACHINE_M32R", Const, 0}, + {"IMAGE_FILE_MACHINE_MIPS16", Const, 0}, + {"IMAGE_FILE_MACHINE_MIPSFPU", Const, 0}, + {"IMAGE_FILE_MACHINE_MIPSFPU16", Const, 0}, + {"IMAGE_FILE_MACHINE_POWERPC", Const, 0}, + {"IMAGE_FILE_MACHINE_POWERPCFP", Const, 0}, + {"IMAGE_FILE_MACHINE_R4000", Const, 0}, + {"IMAGE_FILE_MACHINE_RISCV128", Const, 20}, + {"IMAGE_FILE_MACHINE_RISCV32", Const, 20}, + {"IMAGE_FILE_MACHINE_RISCV64", Const, 20}, + {"IMAGE_FILE_MACHINE_SH3", Const, 0}, + {"IMAGE_FILE_MACHINE_SH3DSP", Const, 0}, + {"IMAGE_FILE_MACHINE_SH4", Const, 0}, + {"IMAGE_FILE_MACHINE_SH5", Const, 0}, + {"IMAGE_FILE_MACHINE_THUMB", Const, 0}, + {"IMAGE_FILE_MACHINE_UNKNOWN", Const, 0}, + {"IMAGE_FILE_MACHINE_WCEMIPSV2", Const, 0}, + {"IMAGE_FILE_NET_RUN_FROM_SWAP", Const, 15}, + {"IMAGE_FILE_RELOCS_STRIPPED", Const, 15}, + {"IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP", Const, 15}, + {"IMAGE_FILE_SYSTEM", Const, 15}, + {"IMAGE_FILE_UP_SYSTEM_ONLY", Const, 15}, + {"IMAGE_SCN_CNT_CODE", Const, 19}, + {"IMAGE_SCN_CNT_INITIALIZED_DATA", Const, 19}, + {"IMAGE_SCN_CNT_UNINITIALIZED_DATA", Const, 19}, + {"IMAGE_SCN_LNK_COMDAT", Const, 19}, + {"IMAGE_SCN_MEM_DISCARDABLE", Const, 19}, + {"IMAGE_SCN_MEM_EXECUTE", Const, 19}, + {"IMAGE_SCN_MEM_READ", Const, 19}, + {"IMAGE_SCN_MEM_WRITE", Const, 19}, + {"IMAGE_SUBSYSTEM_EFI_APPLICATION", Const, 15}, + {"IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER", Const, 15}, + {"IMAGE_SUBSYSTEM_EFI_ROM", Const, 15}, + {"IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER", Const, 15}, + {"IMAGE_SUBSYSTEM_NATIVE", Const, 15}, + {"IMAGE_SUBSYSTEM_NATIVE_WINDOWS", Const, 15}, + {"IMAGE_SUBSYSTEM_OS2_CUI", Const, 15}, + {"IMAGE_SUBSYSTEM_POSIX_CUI", Const, 15}, + {"IMAGE_SUBSYSTEM_UNKNOWN", Const, 15}, + {"IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION", Const, 15}, + {"IMAGE_SUBSYSTEM_WINDOWS_CE_GUI", Const, 15}, + {"IMAGE_SUBSYSTEM_WINDOWS_CUI", Const, 15}, + {"IMAGE_SUBSYSTEM_WINDOWS_GUI", Const, 15}, + {"IMAGE_SUBSYSTEM_XBOX", Const, 15}, + {"ImportDirectory", Type, 0}, + {"ImportDirectory.FirstThunk", Field, 0}, + {"ImportDirectory.ForwarderChain", Field, 0}, + {"ImportDirectory.Name", Field, 0}, + {"ImportDirectory.OriginalFirstThunk", Field, 0}, + {"ImportDirectory.TimeDateStamp", Field, 0}, + {"NewFile", Func, 0}, + {"Open", Func, 0}, + {"OptionalHeader32", Type, 3}, + {"OptionalHeader32.AddressOfEntryPoint", Field, 3}, + {"OptionalHeader32.BaseOfCode", Field, 3}, + {"OptionalHeader32.BaseOfData", Field, 3}, + {"OptionalHeader32.CheckSum", Field, 3}, + {"OptionalHeader32.DataDirectory", Field, 3}, + {"OptionalHeader32.DllCharacteristics", Field, 3}, + {"OptionalHeader32.FileAlignment", Field, 3}, + {"OptionalHeader32.ImageBase", Field, 3}, + {"OptionalHeader32.LoaderFlags", Field, 3}, + {"OptionalHeader32.Magic", Field, 3}, + {"OptionalHeader32.MajorImageVersion", Field, 3}, + {"OptionalHeader32.MajorLinkerVersion", Field, 3}, + {"OptionalHeader32.MajorOperatingSystemVersion", Field, 3}, + {"OptionalHeader32.MajorSubsystemVersion", Field, 3}, + {"OptionalHeader32.MinorImageVersion", Field, 3}, + {"OptionalHeader32.MinorLinkerVersion", Field, 3}, + {"OptionalHeader32.MinorOperatingSystemVersion", Field, 3}, + {"OptionalHeader32.MinorSubsystemVersion", Field, 3}, + {"OptionalHeader32.NumberOfRvaAndSizes", Field, 3}, + {"OptionalHeader32.SectionAlignment", Field, 3}, + {"OptionalHeader32.SizeOfCode", Field, 3}, + {"OptionalHeader32.SizeOfHeaders", Field, 3}, + {"OptionalHeader32.SizeOfHeapCommit", Field, 3}, + {"OptionalHeader32.SizeOfHeapReserve", Field, 3}, + {"OptionalHeader32.SizeOfImage", Field, 3}, + {"OptionalHeader32.SizeOfInitializedData", Field, 3}, + {"OptionalHeader32.SizeOfStackCommit", Field, 3}, + {"OptionalHeader32.SizeOfStackReserve", Field, 3}, + {"OptionalHeader32.SizeOfUninitializedData", Field, 3}, + {"OptionalHeader32.Subsystem", Field, 3}, + {"OptionalHeader32.Win32VersionValue", Field, 3}, + {"OptionalHeader64", Type, 3}, + {"OptionalHeader64.AddressOfEntryPoint", Field, 3}, + {"OptionalHeader64.BaseOfCode", Field, 3}, + {"OptionalHeader64.CheckSum", Field, 3}, + {"OptionalHeader64.DataDirectory", Field, 3}, + {"OptionalHeader64.DllCharacteristics", Field, 3}, + {"OptionalHeader64.FileAlignment", Field, 3}, + {"OptionalHeader64.ImageBase", Field, 3}, + {"OptionalHeader64.LoaderFlags", Field, 3}, + {"OptionalHeader64.Magic", Field, 3}, + {"OptionalHeader64.MajorImageVersion", Field, 3}, + {"OptionalHeader64.MajorLinkerVersion", Field, 3}, + {"OptionalHeader64.MajorOperatingSystemVersion", Field, 3}, + {"OptionalHeader64.MajorSubsystemVersion", Field, 3}, + {"OptionalHeader64.MinorImageVersion", Field, 3}, + {"OptionalHeader64.MinorLinkerVersion", Field, 3}, + {"OptionalHeader64.MinorOperatingSystemVersion", Field, 3}, + {"OptionalHeader64.MinorSubsystemVersion", Field, 3}, + {"OptionalHeader64.NumberOfRvaAndSizes", Field, 3}, + {"OptionalHeader64.SectionAlignment", Field, 3}, + {"OptionalHeader64.SizeOfCode", Field, 3}, + {"OptionalHeader64.SizeOfHeaders", Field, 3}, + {"OptionalHeader64.SizeOfHeapCommit", Field, 3}, + {"OptionalHeader64.SizeOfHeapReserve", Field, 3}, + {"OptionalHeader64.SizeOfImage", Field, 3}, + {"OptionalHeader64.SizeOfInitializedData", Field, 3}, + {"OptionalHeader64.SizeOfStackCommit", Field, 3}, + {"OptionalHeader64.SizeOfStackReserve", Field, 3}, + {"OptionalHeader64.SizeOfUninitializedData", Field, 3}, + {"OptionalHeader64.Subsystem", Field, 3}, + {"OptionalHeader64.Win32VersionValue", Field, 3}, + {"Reloc", Type, 8}, + {"Reloc.SymbolTableIndex", Field, 8}, + {"Reloc.Type", Field, 8}, + {"Reloc.VirtualAddress", Field, 8}, + {"Section", Type, 0}, + {"Section.ReaderAt", Field, 0}, + {"Section.Relocs", Field, 8}, + {"Section.SectionHeader", Field, 0}, + {"SectionHeader", Type, 0}, + {"SectionHeader.Characteristics", Field, 0}, + {"SectionHeader.Name", Field, 0}, + {"SectionHeader.NumberOfLineNumbers", Field, 0}, + {"SectionHeader.NumberOfRelocations", Field, 0}, + {"SectionHeader.Offset", Field, 0}, + {"SectionHeader.PointerToLineNumbers", Field, 0}, + {"SectionHeader.PointerToRelocations", Field, 0}, + {"SectionHeader.Size", Field, 0}, + {"SectionHeader.VirtualAddress", Field, 0}, + {"SectionHeader.VirtualSize", Field, 0}, + {"SectionHeader32", Type, 0}, + {"SectionHeader32.Characteristics", Field, 0}, + {"SectionHeader32.Name", Field, 0}, + {"SectionHeader32.NumberOfLineNumbers", Field, 0}, + {"SectionHeader32.NumberOfRelocations", Field, 0}, + {"SectionHeader32.PointerToLineNumbers", Field, 0}, + {"SectionHeader32.PointerToRawData", Field, 0}, + {"SectionHeader32.PointerToRelocations", Field, 0}, + {"SectionHeader32.SizeOfRawData", Field, 0}, + {"SectionHeader32.VirtualAddress", Field, 0}, + {"SectionHeader32.VirtualSize", Field, 0}, + {"StringTable", Type, 8}, + {"Symbol", Type, 1}, + {"Symbol.Name", Field, 1}, + {"Symbol.SectionNumber", Field, 1}, + {"Symbol.StorageClass", Field, 1}, + {"Symbol.Type", Field, 1}, + {"Symbol.Value", Field, 1}, + }, + "debug/plan9obj": { + {"(*File).Close", Method, 3}, + {"(*File).Section", Method, 3}, + {"(*File).Symbols", Method, 3}, + {"(*Section).Data", Method, 3}, + {"(*Section).Open", Method, 3}, + {"(Section).ReadAt", Method, 3}, + {"ErrNoSymbols", Var, 18}, + {"File", Type, 3}, + {"File.FileHeader", Field, 3}, + {"File.Sections", Field, 3}, + {"FileHeader", Type, 3}, + {"FileHeader.Bss", Field, 3}, + {"FileHeader.Entry", Field, 3}, + {"FileHeader.HdrSize", Field, 4}, + {"FileHeader.LoadAddress", Field, 4}, + {"FileHeader.Magic", Field, 3}, + {"FileHeader.PtrSize", Field, 3}, + {"Magic386", Const, 3}, + {"Magic64", Const, 3}, + {"MagicAMD64", Const, 3}, + {"MagicARM", Const, 3}, + {"NewFile", Func, 3}, + {"Open", Func, 3}, + {"Section", Type, 3}, + {"Section.ReaderAt", Field, 3}, + {"Section.SectionHeader", Field, 3}, + {"SectionHeader", Type, 3}, + {"SectionHeader.Name", Field, 3}, + {"SectionHeader.Offset", Field, 3}, + {"SectionHeader.Size", Field, 3}, + {"Sym", Type, 3}, + {"Sym.Name", Field, 3}, + {"Sym.Type", Field, 3}, + {"Sym.Value", Field, 3}, + }, + "embed": { + {"(FS).Open", Method, 16}, + {"(FS).ReadDir", Method, 16}, + {"(FS).ReadFile", Method, 16}, + {"FS", Type, 16}, + }, + "encoding": { + {"BinaryMarshaler", Type, 2}, + {"BinaryUnmarshaler", Type, 2}, + {"TextMarshaler", Type, 2}, + {"TextUnmarshaler", Type, 2}, + }, + "encoding/ascii85": { + {"(CorruptInputError).Error", Method, 0}, + {"CorruptInputError", Type, 0}, + {"Decode", Func, 0}, + {"Encode", Func, 0}, + {"MaxEncodedLen", Func, 0}, + {"NewDecoder", Func, 0}, + {"NewEncoder", Func, 0}, + }, + "encoding/asn1": { + {"(BitString).At", Method, 0}, + {"(BitString).RightAlign", Method, 0}, + {"(ObjectIdentifier).Equal", Method, 0}, + {"(ObjectIdentifier).String", Method, 3}, + {"(StructuralError).Error", Method, 0}, + {"(SyntaxError).Error", Method, 0}, + {"BitString", Type, 0}, + {"BitString.BitLength", Field, 0}, + {"BitString.Bytes", Field, 0}, + {"ClassApplication", Const, 6}, + {"ClassContextSpecific", Const, 6}, + {"ClassPrivate", Const, 6}, + {"ClassUniversal", Const, 6}, + {"Enumerated", Type, 0}, + {"Flag", Type, 0}, + {"Marshal", Func, 0}, + {"MarshalWithParams", Func, 10}, + {"NullBytes", Var, 9}, + {"NullRawValue", Var, 9}, + {"ObjectIdentifier", Type, 0}, + {"RawContent", Type, 0}, + {"RawValue", Type, 0}, + {"RawValue.Bytes", Field, 0}, + {"RawValue.Class", Field, 0}, + {"RawValue.FullBytes", Field, 0}, + {"RawValue.IsCompound", Field, 0}, + {"RawValue.Tag", Field, 0}, + {"StructuralError", Type, 0}, + {"StructuralError.Msg", Field, 0}, + {"SyntaxError", Type, 0}, + {"SyntaxError.Msg", Field, 0}, + {"TagBMPString", Const, 14}, + {"TagBitString", Const, 6}, + {"TagBoolean", Const, 6}, + {"TagEnum", Const, 6}, + {"TagGeneralString", Const, 6}, + {"TagGeneralizedTime", Const, 6}, + {"TagIA5String", Const, 6}, + {"TagInteger", Const, 6}, + {"TagNull", Const, 9}, + {"TagNumericString", Const, 10}, + {"TagOID", Const, 6}, + {"TagOctetString", Const, 6}, + {"TagPrintableString", Const, 6}, + {"TagSequence", Const, 6}, + {"TagSet", Const, 6}, + {"TagT61String", Const, 6}, + {"TagUTCTime", Const, 6}, + {"TagUTF8String", Const, 6}, + {"Unmarshal", Func, 0}, + {"UnmarshalWithParams", Func, 0}, + }, + "encoding/base32": { + {"(*Encoding).AppendDecode", Method, 22}, + {"(*Encoding).AppendEncode", Method, 22}, + {"(*Encoding).Decode", Method, 0}, + {"(*Encoding).DecodeString", Method, 0}, + {"(*Encoding).DecodedLen", Method, 0}, + {"(*Encoding).Encode", Method, 0}, + {"(*Encoding).EncodeToString", Method, 0}, + {"(*Encoding).EncodedLen", Method, 0}, + {"(CorruptInputError).Error", Method, 0}, + {"(Encoding).WithPadding", Method, 9}, + {"CorruptInputError", Type, 0}, + {"Encoding", Type, 0}, + {"HexEncoding", Var, 0}, + {"NewDecoder", Func, 0}, + {"NewEncoder", Func, 0}, + {"NewEncoding", Func, 0}, + {"NoPadding", Const, 9}, + {"StdEncoding", Var, 0}, + {"StdPadding", Const, 9}, + }, + "encoding/base64": { + {"(*Encoding).AppendDecode", Method, 22}, + {"(*Encoding).AppendEncode", Method, 22}, + {"(*Encoding).Decode", Method, 0}, + {"(*Encoding).DecodeString", Method, 0}, + {"(*Encoding).DecodedLen", Method, 0}, + {"(*Encoding).Encode", Method, 0}, + {"(*Encoding).EncodeToString", Method, 0}, + {"(*Encoding).EncodedLen", Method, 0}, + {"(CorruptInputError).Error", Method, 0}, + {"(Encoding).Strict", Method, 8}, + {"(Encoding).WithPadding", Method, 5}, + {"CorruptInputError", Type, 0}, + {"Encoding", Type, 0}, + {"NewDecoder", Func, 0}, + {"NewEncoder", Func, 0}, + {"NewEncoding", Func, 0}, + {"NoPadding", Const, 5}, + {"RawStdEncoding", Var, 5}, + {"RawURLEncoding", Var, 5}, + {"StdEncoding", Var, 0}, + {"StdPadding", Const, 5}, + {"URLEncoding", Var, 0}, + }, + "encoding/binary": { + {"Append", Func, 23}, + {"AppendByteOrder", Type, 19}, + {"AppendUvarint", Func, 19}, + {"AppendVarint", Func, 19}, + {"BigEndian", Var, 0}, + {"ByteOrder", Type, 0}, + {"Decode", Func, 23}, + {"Encode", Func, 23}, + {"LittleEndian", Var, 0}, + {"MaxVarintLen16", Const, 0}, + {"MaxVarintLen32", Const, 0}, + {"MaxVarintLen64", Const, 0}, + {"NativeEndian", Var, 21}, + {"PutUvarint", Func, 0}, + {"PutVarint", Func, 0}, + {"Read", Func, 0}, + {"ReadUvarint", Func, 0}, + {"ReadVarint", Func, 0}, + {"Size", Func, 0}, + {"Uvarint", Func, 0}, + {"Varint", Func, 0}, + {"Write", Func, 0}, + }, + "encoding/csv": { + {"(*ParseError).Error", Method, 0}, + {"(*ParseError).Unwrap", Method, 13}, + {"(*Reader).FieldPos", Method, 17}, + {"(*Reader).InputOffset", Method, 19}, + {"(*Reader).Read", Method, 0}, + {"(*Reader).ReadAll", Method, 0}, + {"(*Writer).Error", Method, 1}, + {"(*Writer).Flush", Method, 0}, + {"(*Writer).Write", Method, 0}, + {"(*Writer).WriteAll", Method, 0}, + {"ErrBareQuote", Var, 0}, + {"ErrFieldCount", Var, 0}, + {"ErrQuote", Var, 0}, + {"ErrTrailingComma", Var, 0}, + {"NewReader", Func, 0}, + {"NewWriter", Func, 0}, + {"ParseError", Type, 0}, + {"ParseError.Column", Field, 0}, + {"ParseError.Err", Field, 0}, + {"ParseError.Line", Field, 0}, + {"ParseError.StartLine", Field, 10}, + {"Reader", Type, 0}, + {"Reader.Comma", Field, 0}, + {"Reader.Comment", Field, 0}, + {"Reader.FieldsPerRecord", Field, 0}, + {"Reader.LazyQuotes", Field, 0}, + {"Reader.ReuseRecord", Field, 9}, + {"Reader.TrailingComma", Field, 0}, + {"Reader.TrimLeadingSpace", Field, 0}, + {"Writer", Type, 0}, + {"Writer.Comma", Field, 0}, + {"Writer.UseCRLF", Field, 0}, + }, + "encoding/gob": { + {"(*Decoder).Decode", Method, 0}, + {"(*Decoder).DecodeValue", Method, 0}, + {"(*Encoder).Encode", Method, 0}, + {"(*Encoder).EncodeValue", Method, 0}, + {"CommonType", Type, 0}, + {"CommonType.Id", Field, 0}, + {"CommonType.Name", Field, 0}, + {"Decoder", Type, 0}, + {"Encoder", Type, 0}, + {"GobDecoder", Type, 0}, + {"GobEncoder", Type, 0}, + {"NewDecoder", Func, 0}, + {"NewEncoder", Func, 0}, + {"Register", Func, 0}, + {"RegisterName", Func, 0}, + }, + "encoding/hex": { + {"(InvalidByteError).Error", Method, 0}, + {"AppendDecode", Func, 22}, + {"AppendEncode", Func, 22}, + {"Decode", Func, 0}, + {"DecodeString", Func, 0}, + {"DecodedLen", Func, 0}, + {"Dump", Func, 0}, + {"Dumper", Func, 0}, + {"Encode", Func, 0}, + {"EncodeToString", Func, 0}, + {"EncodedLen", Func, 0}, + {"ErrLength", Var, 0}, + {"InvalidByteError", Type, 0}, + {"NewDecoder", Func, 10}, + {"NewEncoder", Func, 10}, + }, + "encoding/json": { + {"(*Decoder).Buffered", Method, 1}, + {"(*Decoder).Decode", Method, 0}, + {"(*Decoder).DisallowUnknownFields", Method, 10}, + {"(*Decoder).InputOffset", Method, 14}, + {"(*Decoder).More", Method, 5}, + {"(*Decoder).Token", Method, 5}, + {"(*Decoder).UseNumber", Method, 1}, + {"(*Encoder).Encode", Method, 0}, + {"(*Encoder).SetEscapeHTML", Method, 7}, + {"(*Encoder).SetIndent", Method, 7}, + {"(*InvalidUTF8Error).Error", Method, 0}, + {"(*InvalidUnmarshalError).Error", Method, 0}, + {"(*MarshalerError).Error", Method, 0}, + {"(*MarshalerError).Unwrap", Method, 13}, + {"(*RawMessage).MarshalJSON", Method, 0}, + {"(*RawMessage).UnmarshalJSON", Method, 0}, + {"(*SyntaxError).Error", Method, 0}, + {"(*UnmarshalFieldError).Error", Method, 0}, + {"(*UnmarshalTypeError).Error", Method, 0}, + {"(*UnsupportedTypeError).Error", Method, 0}, + {"(*UnsupportedValueError).Error", Method, 0}, + {"(Delim).String", Method, 5}, + {"(Number).Float64", Method, 1}, + {"(Number).Int64", Method, 1}, + {"(Number).String", Method, 1}, + {"(RawMessage).MarshalJSON", Method, 8}, + {"Compact", Func, 0}, + {"Decoder", Type, 0}, + {"Delim", Type, 5}, + {"Encoder", Type, 0}, + {"HTMLEscape", Func, 0}, + {"Indent", Func, 0}, + {"InvalidUTF8Error", Type, 0}, + {"InvalidUTF8Error.S", Field, 0}, + {"InvalidUnmarshalError", Type, 0}, + {"InvalidUnmarshalError.Type", Field, 0}, + {"Marshal", Func, 0}, + {"MarshalIndent", Func, 0}, + {"Marshaler", Type, 0}, + {"MarshalerError", Type, 0}, + {"MarshalerError.Err", Field, 0}, + {"MarshalerError.Type", Field, 0}, + {"NewDecoder", Func, 0}, + {"NewEncoder", Func, 0}, + {"Number", Type, 1}, + {"RawMessage", Type, 0}, + {"SyntaxError", Type, 0}, + {"SyntaxError.Offset", Field, 0}, + {"Token", Type, 5}, + {"Unmarshal", Func, 0}, + {"UnmarshalFieldError", Type, 0}, + {"UnmarshalFieldError.Field", Field, 0}, + {"UnmarshalFieldError.Key", Field, 0}, + {"UnmarshalFieldError.Type", Field, 0}, + {"UnmarshalTypeError", Type, 0}, + {"UnmarshalTypeError.Field", Field, 8}, + {"UnmarshalTypeError.Offset", Field, 5}, + {"UnmarshalTypeError.Struct", Field, 8}, + {"UnmarshalTypeError.Type", Field, 0}, + {"UnmarshalTypeError.Value", Field, 0}, + {"Unmarshaler", Type, 0}, + {"UnsupportedTypeError", Type, 0}, + {"UnsupportedTypeError.Type", Field, 0}, + {"UnsupportedValueError", Type, 0}, + {"UnsupportedValueError.Str", Field, 0}, + {"UnsupportedValueError.Value", Field, 0}, + {"Valid", Func, 9}, + }, + "encoding/pem": { + {"Block", Type, 0}, + {"Block.Bytes", Field, 0}, + {"Block.Headers", Field, 0}, + {"Block.Type", Field, 0}, + {"Decode", Func, 0}, + {"Encode", Func, 0}, + {"EncodeToMemory", Func, 0}, + }, + "encoding/xml": { + {"(*Decoder).Decode", Method, 0}, + {"(*Decoder).DecodeElement", Method, 0}, + {"(*Decoder).InputOffset", Method, 4}, + {"(*Decoder).InputPos", Method, 19}, + {"(*Decoder).RawToken", Method, 0}, + {"(*Decoder).Skip", Method, 0}, + {"(*Decoder).Token", Method, 0}, + {"(*Encoder).Close", Method, 20}, + {"(*Encoder).Encode", Method, 0}, + {"(*Encoder).EncodeElement", Method, 2}, + {"(*Encoder).EncodeToken", Method, 2}, + {"(*Encoder).Flush", Method, 2}, + {"(*Encoder).Indent", Method, 1}, + {"(*SyntaxError).Error", Method, 0}, + {"(*TagPathError).Error", Method, 0}, + {"(*UnsupportedTypeError).Error", Method, 0}, + {"(CharData).Copy", Method, 0}, + {"(Comment).Copy", Method, 0}, + {"(Directive).Copy", Method, 0}, + {"(ProcInst).Copy", Method, 0}, + {"(StartElement).Copy", Method, 0}, + {"(StartElement).End", Method, 2}, + {"(UnmarshalError).Error", Method, 0}, + {"Attr", Type, 0}, + {"Attr.Name", Field, 0}, + {"Attr.Value", Field, 0}, + {"CharData", Type, 0}, + {"Comment", Type, 0}, + {"CopyToken", Func, 0}, + {"Decoder", Type, 0}, + {"Decoder.AutoClose", Field, 0}, + {"Decoder.CharsetReader", Field, 0}, + {"Decoder.DefaultSpace", Field, 1}, + {"Decoder.Entity", Field, 0}, + {"Decoder.Strict", Field, 0}, + {"Directive", Type, 0}, + {"Encoder", Type, 0}, + {"EndElement", Type, 0}, + {"EndElement.Name", Field, 0}, + {"Escape", Func, 0}, + {"EscapeText", Func, 1}, + {"HTMLAutoClose", Var, 0}, + {"HTMLEntity", Var, 0}, + {"Header", Const, 0}, + {"Marshal", Func, 0}, + {"MarshalIndent", Func, 0}, + {"Marshaler", Type, 2}, + {"MarshalerAttr", Type, 2}, + {"Name", Type, 0}, + {"Name.Local", Field, 0}, + {"Name.Space", Field, 0}, + {"NewDecoder", Func, 0}, + {"NewEncoder", Func, 0}, + {"NewTokenDecoder", Func, 10}, + {"ProcInst", Type, 0}, + {"ProcInst.Inst", Field, 0}, + {"ProcInst.Target", Field, 0}, + {"StartElement", Type, 0}, + {"StartElement.Attr", Field, 0}, + {"StartElement.Name", Field, 0}, + {"SyntaxError", Type, 0}, + {"SyntaxError.Line", Field, 0}, + {"SyntaxError.Msg", Field, 0}, + {"TagPathError", Type, 0}, + {"TagPathError.Field1", Field, 0}, + {"TagPathError.Field2", Field, 0}, + {"TagPathError.Struct", Field, 0}, + {"TagPathError.Tag1", Field, 0}, + {"TagPathError.Tag2", Field, 0}, + {"Token", Type, 0}, + {"TokenReader", Type, 10}, + {"Unmarshal", Func, 0}, + {"UnmarshalError", Type, 0}, + {"Unmarshaler", Type, 2}, + {"UnmarshalerAttr", Type, 2}, + {"UnsupportedTypeError", Type, 0}, + {"UnsupportedTypeError.Type", Field, 0}, + }, + "errors": { + {"As", Func, 13}, + {"ErrUnsupported", Var, 21}, + {"Is", Func, 13}, + {"Join", Func, 20}, + {"New", Func, 0}, + {"Unwrap", Func, 13}, + }, + "expvar": { + {"(*Float).Add", Method, 0}, + {"(*Float).Set", Method, 0}, + {"(*Float).String", Method, 0}, + {"(*Float).Value", Method, 8}, + {"(*Int).Add", Method, 0}, + {"(*Int).Set", Method, 0}, + {"(*Int).String", Method, 0}, + {"(*Int).Value", Method, 8}, + {"(*Map).Add", Method, 0}, + {"(*Map).AddFloat", Method, 0}, + {"(*Map).Delete", Method, 12}, + {"(*Map).Do", Method, 0}, + {"(*Map).Get", Method, 0}, + {"(*Map).Init", Method, 0}, + {"(*Map).Set", Method, 0}, + {"(*Map).String", Method, 0}, + {"(*String).Set", Method, 0}, + {"(*String).String", Method, 0}, + {"(*String).Value", Method, 8}, + {"(Func).String", Method, 0}, + {"(Func).Value", Method, 8}, + {"Do", Func, 0}, + {"Float", Type, 0}, + {"Func", Type, 0}, + {"Get", Func, 0}, + {"Handler", Func, 8}, + {"Int", Type, 0}, + {"KeyValue", Type, 0}, + {"KeyValue.Key", Field, 0}, + {"KeyValue.Value", Field, 0}, + {"Map", Type, 0}, + {"NewFloat", Func, 0}, + {"NewInt", Func, 0}, + {"NewMap", Func, 0}, + {"NewString", Func, 0}, + {"Publish", Func, 0}, + {"String", Type, 0}, + {"Var", Type, 0}, + }, + "flag": { + {"(*FlagSet).Arg", Method, 0}, + {"(*FlagSet).Args", Method, 0}, + {"(*FlagSet).Bool", Method, 0}, + {"(*FlagSet).BoolFunc", Method, 21}, + {"(*FlagSet).BoolVar", Method, 0}, + {"(*FlagSet).Duration", Method, 0}, + {"(*FlagSet).DurationVar", Method, 0}, + {"(*FlagSet).ErrorHandling", Method, 10}, + {"(*FlagSet).Float64", Method, 0}, + {"(*FlagSet).Float64Var", Method, 0}, + {"(*FlagSet).Func", Method, 16}, + {"(*FlagSet).Init", Method, 0}, + {"(*FlagSet).Int", Method, 0}, + {"(*FlagSet).Int64", Method, 0}, + {"(*FlagSet).Int64Var", Method, 0}, + {"(*FlagSet).IntVar", Method, 0}, + {"(*FlagSet).Lookup", Method, 0}, + {"(*FlagSet).NArg", Method, 0}, + {"(*FlagSet).NFlag", Method, 0}, + {"(*FlagSet).Name", Method, 10}, + {"(*FlagSet).Output", Method, 10}, + {"(*FlagSet).Parse", Method, 0}, + {"(*FlagSet).Parsed", Method, 0}, + {"(*FlagSet).PrintDefaults", Method, 0}, + {"(*FlagSet).Set", Method, 0}, + {"(*FlagSet).SetOutput", Method, 0}, + {"(*FlagSet).String", Method, 0}, + {"(*FlagSet).StringVar", Method, 0}, + {"(*FlagSet).TextVar", Method, 19}, + {"(*FlagSet).Uint", Method, 0}, + {"(*FlagSet).Uint64", Method, 0}, + {"(*FlagSet).Uint64Var", Method, 0}, + {"(*FlagSet).UintVar", Method, 0}, + {"(*FlagSet).Var", Method, 0}, + {"(*FlagSet).Visit", Method, 0}, + {"(*FlagSet).VisitAll", Method, 0}, + {"Arg", Func, 0}, + {"Args", Func, 0}, + {"Bool", Func, 0}, + {"BoolFunc", Func, 21}, + {"BoolVar", Func, 0}, + {"CommandLine", Var, 2}, + {"ContinueOnError", Const, 0}, + {"Duration", Func, 0}, + {"DurationVar", Func, 0}, + {"ErrHelp", Var, 0}, + {"ErrorHandling", Type, 0}, + {"ExitOnError", Const, 0}, + {"Flag", Type, 0}, + {"Flag.DefValue", Field, 0}, + {"Flag.Name", Field, 0}, + {"Flag.Usage", Field, 0}, + {"Flag.Value", Field, 0}, + {"FlagSet", Type, 0}, + {"FlagSet.Usage", Field, 0}, + {"Float64", Func, 0}, + {"Float64Var", Func, 0}, + {"Func", Func, 16}, + {"Getter", Type, 2}, + {"Int", Func, 0}, + {"Int64", Func, 0}, + {"Int64Var", Func, 0}, + {"IntVar", Func, 0}, + {"Lookup", Func, 0}, + {"NArg", Func, 0}, + {"NFlag", Func, 0}, + {"NewFlagSet", Func, 0}, + {"PanicOnError", Const, 0}, + {"Parse", Func, 0}, + {"Parsed", Func, 0}, + {"PrintDefaults", Func, 0}, + {"Set", Func, 0}, + {"String", Func, 0}, + {"StringVar", Func, 0}, + {"TextVar", Func, 19}, + {"Uint", Func, 0}, + {"Uint64", Func, 0}, + {"Uint64Var", Func, 0}, + {"UintVar", Func, 0}, + {"UnquoteUsage", Func, 5}, + {"Usage", Var, 0}, + {"Value", Type, 0}, + {"Var", Func, 0}, + {"Visit", Func, 0}, + {"VisitAll", Func, 0}, + }, + "fmt": { + {"Append", Func, 19}, + {"Appendf", Func, 19}, + {"Appendln", Func, 19}, + {"Errorf", Func, 0}, + {"FormatString", Func, 20}, + {"Formatter", Type, 0}, + {"Fprint", Func, 0}, + {"Fprintf", Func, 0}, + {"Fprintln", Func, 0}, + {"Fscan", Func, 0}, + {"Fscanf", Func, 0}, + {"Fscanln", Func, 0}, + {"GoStringer", Type, 0}, + {"Print", Func, 0}, + {"Printf", Func, 0}, + {"Println", Func, 0}, + {"Scan", Func, 0}, + {"ScanState", Type, 0}, + {"Scanf", Func, 0}, + {"Scanln", Func, 0}, + {"Scanner", Type, 0}, + {"Sprint", Func, 0}, + {"Sprintf", Func, 0}, + {"Sprintln", Func, 0}, + {"Sscan", Func, 0}, + {"Sscanf", Func, 0}, + {"Sscanln", Func, 0}, + {"State", Type, 0}, + {"Stringer", Type, 0}, + }, + "go/ast": { + {"(*ArrayType).End", Method, 0}, + {"(*ArrayType).Pos", Method, 0}, + {"(*AssignStmt).End", Method, 0}, + {"(*AssignStmt).Pos", Method, 0}, + {"(*BadDecl).End", Method, 0}, + {"(*BadDecl).Pos", Method, 0}, + {"(*BadExpr).End", Method, 0}, + {"(*BadExpr).Pos", Method, 0}, + {"(*BadStmt).End", Method, 0}, + {"(*BadStmt).Pos", Method, 0}, + {"(*BasicLit).End", Method, 0}, + {"(*BasicLit).Pos", Method, 0}, + {"(*BinaryExpr).End", Method, 0}, + {"(*BinaryExpr).Pos", Method, 0}, + {"(*BlockStmt).End", Method, 0}, + {"(*BlockStmt).Pos", Method, 0}, + {"(*BranchStmt).End", Method, 0}, + {"(*BranchStmt).Pos", Method, 0}, + {"(*CallExpr).End", Method, 0}, + {"(*CallExpr).Pos", Method, 0}, + {"(*CaseClause).End", Method, 0}, + {"(*CaseClause).Pos", Method, 0}, + {"(*ChanType).End", Method, 0}, + {"(*ChanType).Pos", Method, 0}, + {"(*CommClause).End", Method, 0}, + {"(*CommClause).Pos", Method, 0}, + {"(*Comment).End", Method, 0}, + {"(*Comment).Pos", Method, 0}, + {"(*CommentGroup).End", Method, 0}, + {"(*CommentGroup).Pos", Method, 0}, + {"(*CommentGroup).Text", Method, 0}, + {"(*CompositeLit).End", Method, 0}, + {"(*CompositeLit).Pos", Method, 0}, + {"(*DeclStmt).End", Method, 0}, + {"(*DeclStmt).Pos", Method, 0}, + {"(*DeferStmt).End", Method, 0}, + {"(*DeferStmt).Pos", Method, 0}, + {"(*Ellipsis).End", Method, 0}, + {"(*Ellipsis).Pos", Method, 0}, + {"(*EmptyStmt).End", Method, 0}, + {"(*EmptyStmt).Pos", Method, 0}, + {"(*ExprStmt).End", Method, 0}, + {"(*ExprStmt).Pos", Method, 0}, + {"(*Field).End", Method, 0}, + {"(*Field).Pos", Method, 0}, + {"(*FieldList).End", Method, 0}, + {"(*FieldList).NumFields", Method, 0}, + {"(*FieldList).Pos", Method, 0}, + {"(*File).End", Method, 0}, + {"(*File).Pos", Method, 0}, + {"(*ForStmt).End", Method, 0}, + {"(*ForStmt).Pos", Method, 0}, + {"(*FuncDecl).End", Method, 0}, + {"(*FuncDecl).Pos", Method, 0}, + {"(*FuncLit).End", Method, 0}, + {"(*FuncLit).Pos", Method, 0}, + {"(*FuncType).End", Method, 0}, + {"(*FuncType).Pos", Method, 0}, + {"(*GenDecl).End", Method, 0}, + {"(*GenDecl).Pos", Method, 0}, + {"(*GoStmt).End", Method, 0}, + {"(*GoStmt).Pos", Method, 0}, + {"(*Ident).End", Method, 0}, + {"(*Ident).IsExported", Method, 0}, + {"(*Ident).Pos", Method, 0}, + {"(*Ident).String", Method, 0}, + {"(*IfStmt).End", Method, 0}, + {"(*IfStmt).Pos", Method, 0}, + {"(*ImportSpec).End", Method, 0}, + {"(*ImportSpec).Pos", Method, 0}, + {"(*IncDecStmt).End", Method, 0}, + {"(*IncDecStmt).Pos", Method, 0}, + {"(*IndexExpr).End", Method, 0}, + {"(*IndexExpr).Pos", Method, 0}, + {"(*IndexListExpr).End", Method, 18}, + {"(*IndexListExpr).Pos", Method, 18}, + {"(*InterfaceType).End", Method, 0}, + {"(*InterfaceType).Pos", Method, 0}, + {"(*KeyValueExpr).End", Method, 0}, + {"(*KeyValueExpr).Pos", Method, 0}, + {"(*LabeledStmt).End", Method, 0}, + {"(*LabeledStmt).Pos", Method, 0}, + {"(*MapType).End", Method, 0}, + {"(*MapType).Pos", Method, 0}, + {"(*Object).Pos", Method, 0}, + {"(*Package).End", Method, 0}, + {"(*Package).Pos", Method, 0}, + {"(*ParenExpr).End", Method, 0}, + {"(*ParenExpr).Pos", Method, 0}, + {"(*RangeStmt).End", Method, 0}, + {"(*RangeStmt).Pos", Method, 0}, + {"(*ReturnStmt).End", Method, 0}, + {"(*ReturnStmt).Pos", Method, 0}, + {"(*Scope).Insert", Method, 0}, + {"(*Scope).Lookup", Method, 0}, + {"(*Scope).String", Method, 0}, + {"(*SelectStmt).End", Method, 0}, + {"(*SelectStmt).Pos", Method, 0}, + {"(*SelectorExpr).End", Method, 0}, + {"(*SelectorExpr).Pos", Method, 0}, + {"(*SendStmt).End", Method, 0}, + {"(*SendStmt).Pos", Method, 0}, + {"(*SliceExpr).End", Method, 0}, + {"(*SliceExpr).Pos", Method, 0}, + {"(*StarExpr).End", Method, 0}, + {"(*StarExpr).Pos", Method, 0}, + {"(*StructType).End", Method, 0}, + {"(*StructType).Pos", Method, 0}, + {"(*SwitchStmt).End", Method, 0}, + {"(*SwitchStmt).Pos", Method, 0}, + {"(*TypeAssertExpr).End", Method, 0}, + {"(*TypeAssertExpr).Pos", Method, 0}, + {"(*TypeSpec).End", Method, 0}, + {"(*TypeSpec).Pos", Method, 0}, + {"(*TypeSwitchStmt).End", Method, 0}, + {"(*TypeSwitchStmt).Pos", Method, 0}, + {"(*UnaryExpr).End", Method, 0}, + {"(*UnaryExpr).Pos", Method, 0}, + {"(*ValueSpec).End", Method, 0}, + {"(*ValueSpec).Pos", Method, 0}, + {"(CommentMap).Comments", Method, 1}, + {"(CommentMap).Filter", Method, 1}, + {"(CommentMap).String", Method, 1}, + {"(CommentMap).Update", Method, 1}, + {"(ObjKind).String", Method, 0}, + {"ArrayType", Type, 0}, + {"ArrayType.Elt", Field, 0}, + {"ArrayType.Lbrack", Field, 0}, + {"ArrayType.Len", Field, 0}, + {"AssignStmt", Type, 0}, + {"AssignStmt.Lhs", Field, 0}, + {"AssignStmt.Rhs", Field, 0}, + {"AssignStmt.Tok", Field, 0}, + {"AssignStmt.TokPos", Field, 0}, + {"Bad", Const, 0}, + {"BadDecl", Type, 0}, + {"BadDecl.From", Field, 0}, + {"BadDecl.To", Field, 0}, + {"BadExpr", Type, 0}, + {"BadExpr.From", Field, 0}, + {"BadExpr.To", Field, 0}, + {"BadStmt", Type, 0}, + {"BadStmt.From", Field, 0}, + {"BadStmt.To", Field, 0}, + {"BasicLit", Type, 0}, + {"BasicLit.Kind", Field, 0}, + {"BasicLit.Value", Field, 0}, + {"BasicLit.ValuePos", Field, 0}, + {"BinaryExpr", Type, 0}, + {"BinaryExpr.Op", Field, 0}, + {"BinaryExpr.OpPos", Field, 0}, + {"BinaryExpr.X", Field, 0}, + {"BinaryExpr.Y", Field, 0}, + {"BlockStmt", Type, 0}, + {"BlockStmt.Lbrace", Field, 0}, + {"BlockStmt.List", Field, 0}, + {"BlockStmt.Rbrace", Field, 0}, + {"BranchStmt", Type, 0}, + {"BranchStmt.Label", Field, 0}, + {"BranchStmt.Tok", Field, 0}, + {"BranchStmt.TokPos", Field, 0}, + {"CallExpr", Type, 0}, + {"CallExpr.Args", Field, 0}, + {"CallExpr.Ellipsis", Field, 0}, + {"CallExpr.Fun", Field, 0}, + {"CallExpr.Lparen", Field, 0}, + {"CallExpr.Rparen", Field, 0}, + {"CaseClause", Type, 0}, + {"CaseClause.Body", Field, 0}, + {"CaseClause.Case", Field, 0}, + {"CaseClause.Colon", Field, 0}, + {"CaseClause.List", Field, 0}, + {"ChanDir", Type, 0}, + {"ChanType", Type, 0}, + {"ChanType.Arrow", Field, 1}, + {"ChanType.Begin", Field, 0}, + {"ChanType.Dir", Field, 0}, + {"ChanType.Value", Field, 0}, + {"CommClause", Type, 0}, + {"CommClause.Body", Field, 0}, + {"CommClause.Case", Field, 0}, + {"CommClause.Colon", Field, 0}, + {"CommClause.Comm", Field, 0}, + {"Comment", Type, 0}, + {"Comment.Slash", Field, 0}, + {"Comment.Text", Field, 0}, + {"CommentGroup", Type, 0}, + {"CommentGroup.List", Field, 0}, + {"CommentMap", Type, 1}, + {"CompositeLit", Type, 0}, + {"CompositeLit.Elts", Field, 0}, + {"CompositeLit.Incomplete", Field, 11}, + {"CompositeLit.Lbrace", Field, 0}, + {"CompositeLit.Rbrace", Field, 0}, + {"CompositeLit.Type", Field, 0}, + {"Con", Const, 0}, + {"Decl", Type, 0}, + {"DeclStmt", Type, 0}, + {"DeclStmt.Decl", Field, 0}, + {"DeferStmt", Type, 0}, + {"DeferStmt.Call", Field, 0}, + {"DeferStmt.Defer", Field, 0}, + {"Ellipsis", Type, 0}, + {"Ellipsis.Ellipsis", Field, 0}, + {"Ellipsis.Elt", Field, 0}, + {"EmptyStmt", Type, 0}, + {"EmptyStmt.Implicit", Field, 5}, + {"EmptyStmt.Semicolon", Field, 0}, + {"Expr", Type, 0}, + {"ExprStmt", Type, 0}, + {"ExprStmt.X", Field, 0}, + {"Field", Type, 0}, + {"Field.Comment", Field, 0}, + {"Field.Doc", Field, 0}, + {"Field.Names", Field, 0}, + {"Field.Tag", Field, 0}, + {"Field.Type", Field, 0}, + {"FieldFilter", Type, 0}, + {"FieldList", Type, 0}, + {"FieldList.Closing", Field, 0}, + {"FieldList.List", Field, 0}, + {"FieldList.Opening", Field, 0}, + {"File", Type, 0}, + {"File.Comments", Field, 0}, + {"File.Decls", Field, 0}, + {"File.Doc", Field, 0}, + {"File.FileEnd", Field, 20}, + {"File.FileStart", Field, 20}, + {"File.GoVersion", Field, 21}, + {"File.Imports", Field, 0}, + {"File.Name", Field, 0}, + {"File.Package", Field, 0}, + {"File.Scope", Field, 0}, + {"File.Unresolved", Field, 0}, + {"FileExports", Func, 0}, + {"Filter", Type, 0}, + {"FilterDecl", Func, 0}, + {"FilterFile", Func, 0}, + {"FilterFuncDuplicates", Const, 0}, + {"FilterImportDuplicates", Const, 0}, + {"FilterPackage", Func, 0}, + {"FilterUnassociatedComments", Const, 0}, + {"ForStmt", Type, 0}, + {"ForStmt.Body", Field, 0}, + {"ForStmt.Cond", Field, 0}, + {"ForStmt.For", Field, 0}, + {"ForStmt.Init", Field, 0}, + {"ForStmt.Post", Field, 0}, + {"Fprint", Func, 0}, + {"Fun", Const, 0}, + {"FuncDecl", Type, 0}, + {"FuncDecl.Body", Field, 0}, + {"FuncDecl.Doc", Field, 0}, + {"FuncDecl.Name", Field, 0}, + {"FuncDecl.Recv", Field, 0}, + {"FuncDecl.Type", Field, 0}, + {"FuncLit", Type, 0}, + {"FuncLit.Body", Field, 0}, + {"FuncLit.Type", Field, 0}, + {"FuncType", Type, 0}, + {"FuncType.Func", Field, 0}, + {"FuncType.Params", Field, 0}, + {"FuncType.Results", Field, 0}, + {"FuncType.TypeParams", Field, 18}, + {"GenDecl", Type, 0}, + {"GenDecl.Doc", Field, 0}, + {"GenDecl.Lparen", Field, 0}, + {"GenDecl.Rparen", Field, 0}, + {"GenDecl.Specs", Field, 0}, + {"GenDecl.Tok", Field, 0}, + {"GenDecl.TokPos", Field, 0}, + {"GoStmt", Type, 0}, + {"GoStmt.Call", Field, 0}, + {"GoStmt.Go", Field, 0}, + {"Ident", Type, 0}, + {"Ident.Name", Field, 0}, + {"Ident.NamePos", Field, 0}, + {"Ident.Obj", Field, 0}, + {"IfStmt", Type, 0}, + {"IfStmt.Body", Field, 0}, + {"IfStmt.Cond", Field, 0}, + {"IfStmt.Else", Field, 0}, + {"IfStmt.If", Field, 0}, + {"IfStmt.Init", Field, 0}, + {"ImportSpec", Type, 0}, + {"ImportSpec.Comment", Field, 0}, + {"ImportSpec.Doc", Field, 0}, + {"ImportSpec.EndPos", Field, 0}, + {"ImportSpec.Name", Field, 0}, + {"ImportSpec.Path", Field, 0}, + {"Importer", Type, 0}, + {"IncDecStmt", Type, 0}, + {"IncDecStmt.Tok", Field, 0}, + {"IncDecStmt.TokPos", Field, 0}, + {"IncDecStmt.X", Field, 0}, + {"IndexExpr", Type, 0}, + {"IndexExpr.Index", Field, 0}, + {"IndexExpr.Lbrack", Field, 0}, + {"IndexExpr.Rbrack", Field, 0}, + {"IndexExpr.X", Field, 0}, + {"IndexListExpr", Type, 18}, + {"IndexListExpr.Indices", Field, 18}, + {"IndexListExpr.Lbrack", Field, 18}, + {"IndexListExpr.Rbrack", Field, 18}, + {"IndexListExpr.X", Field, 18}, + {"Inspect", Func, 0}, + {"InterfaceType", Type, 0}, + {"InterfaceType.Incomplete", Field, 0}, + {"InterfaceType.Interface", Field, 0}, + {"InterfaceType.Methods", Field, 0}, + {"IsExported", Func, 0}, + {"IsGenerated", Func, 21}, + {"KeyValueExpr", Type, 0}, + {"KeyValueExpr.Colon", Field, 0}, + {"KeyValueExpr.Key", Field, 0}, + {"KeyValueExpr.Value", Field, 0}, + {"LabeledStmt", Type, 0}, + {"LabeledStmt.Colon", Field, 0}, + {"LabeledStmt.Label", Field, 0}, + {"LabeledStmt.Stmt", Field, 0}, + {"Lbl", Const, 0}, + {"MapType", Type, 0}, + {"MapType.Key", Field, 0}, + {"MapType.Map", Field, 0}, + {"MapType.Value", Field, 0}, + {"MergeMode", Type, 0}, + {"MergePackageFiles", Func, 0}, + {"NewCommentMap", Func, 1}, + {"NewIdent", Func, 0}, + {"NewObj", Func, 0}, + {"NewPackage", Func, 0}, + {"NewScope", Func, 0}, + {"Node", Type, 0}, + {"NotNilFilter", Func, 0}, + {"ObjKind", Type, 0}, + {"Object", Type, 0}, + {"Object.Data", Field, 0}, + {"Object.Decl", Field, 0}, + {"Object.Kind", Field, 0}, + {"Object.Name", Field, 0}, + {"Object.Type", Field, 0}, + {"Package", Type, 0}, + {"Package.Files", Field, 0}, + {"Package.Imports", Field, 0}, + {"Package.Name", Field, 0}, + {"Package.Scope", Field, 0}, + {"PackageExports", Func, 0}, + {"ParenExpr", Type, 0}, + {"ParenExpr.Lparen", Field, 0}, + {"ParenExpr.Rparen", Field, 0}, + {"ParenExpr.X", Field, 0}, + {"Pkg", Const, 0}, + {"Preorder", Func, 23}, + {"Print", Func, 0}, + {"RECV", Const, 0}, + {"RangeStmt", Type, 0}, + {"RangeStmt.Body", Field, 0}, + {"RangeStmt.For", Field, 0}, + {"RangeStmt.Key", Field, 0}, + {"RangeStmt.Range", Field, 20}, + {"RangeStmt.Tok", Field, 0}, + {"RangeStmt.TokPos", Field, 0}, + {"RangeStmt.Value", Field, 0}, + {"RangeStmt.X", Field, 0}, + {"ReturnStmt", Type, 0}, + {"ReturnStmt.Results", Field, 0}, + {"ReturnStmt.Return", Field, 0}, + {"SEND", Const, 0}, + {"Scope", Type, 0}, + {"Scope.Objects", Field, 0}, + {"Scope.Outer", Field, 0}, + {"SelectStmt", Type, 0}, + {"SelectStmt.Body", Field, 0}, + {"SelectStmt.Select", Field, 0}, + {"SelectorExpr", Type, 0}, + {"SelectorExpr.Sel", Field, 0}, + {"SelectorExpr.X", Field, 0}, + {"SendStmt", Type, 0}, + {"SendStmt.Arrow", Field, 0}, + {"SendStmt.Chan", Field, 0}, + {"SendStmt.Value", Field, 0}, + {"SliceExpr", Type, 0}, + {"SliceExpr.High", Field, 0}, + {"SliceExpr.Lbrack", Field, 0}, + {"SliceExpr.Low", Field, 0}, + {"SliceExpr.Max", Field, 2}, + {"SliceExpr.Rbrack", Field, 0}, + {"SliceExpr.Slice3", Field, 2}, + {"SliceExpr.X", Field, 0}, + {"SortImports", Func, 0}, + {"Spec", Type, 0}, + {"StarExpr", Type, 0}, + {"StarExpr.Star", Field, 0}, + {"StarExpr.X", Field, 0}, + {"Stmt", Type, 0}, + {"StructType", Type, 0}, + {"StructType.Fields", Field, 0}, + {"StructType.Incomplete", Field, 0}, + {"StructType.Struct", Field, 0}, + {"SwitchStmt", Type, 0}, + {"SwitchStmt.Body", Field, 0}, + {"SwitchStmt.Init", Field, 0}, + {"SwitchStmt.Switch", Field, 0}, + {"SwitchStmt.Tag", Field, 0}, + {"Typ", Const, 0}, + {"TypeAssertExpr", Type, 0}, + {"TypeAssertExpr.Lparen", Field, 2}, + {"TypeAssertExpr.Rparen", Field, 2}, + {"TypeAssertExpr.Type", Field, 0}, + {"TypeAssertExpr.X", Field, 0}, + {"TypeSpec", Type, 0}, + {"TypeSpec.Assign", Field, 9}, + {"TypeSpec.Comment", Field, 0}, + {"TypeSpec.Doc", Field, 0}, + {"TypeSpec.Name", Field, 0}, + {"TypeSpec.Type", Field, 0}, + {"TypeSpec.TypeParams", Field, 18}, + {"TypeSwitchStmt", Type, 0}, + {"TypeSwitchStmt.Assign", Field, 0}, + {"TypeSwitchStmt.Body", Field, 0}, + {"TypeSwitchStmt.Init", Field, 0}, + {"TypeSwitchStmt.Switch", Field, 0}, + {"UnaryExpr", Type, 0}, + {"UnaryExpr.Op", Field, 0}, + {"UnaryExpr.OpPos", Field, 0}, + {"UnaryExpr.X", Field, 0}, + {"Unparen", Func, 22}, + {"ValueSpec", Type, 0}, + {"ValueSpec.Comment", Field, 0}, + {"ValueSpec.Doc", Field, 0}, + {"ValueSpec.Names", Field, 0}, + {"ValueSpec.Type", Field, 0}, + {"ValueSpec.Values", Field, 0}, + {"Var", Const, 0}, + {"Visitor", Type, 0}, + {"Walk", Func, 0}, + }, + "go/build": { + {"(*Context).Import", Method, 0}, + {"(*Context).ImportDir", Method, 0}, + {"(*Context).MatchFile", Method, 2}, + {"(*Context).SrcDirs", Method, 0}, + {"(*MultiplePackageError).Error", Method, 4}, + {"(*NoGoError).Error", Method, 0}, + {"(*Package).IsCommand", Method, 0}, + {"AllowBinary", Const, 0}, + {"ArchChar", Func, 0}, + {"Context", Type, 0}, + {"Context.BuildTags", Field, 0}, + {"Context.CgoEnabled", Field, 0}, + {"Context.Compiler", Field, 0}, + {"Context.Dir", Field, 14}, + {"Context.GOARCH", Field, 0}, + {"Context.GOOS", Field, 0}, + {"Context.GOPATH", Field, 0}, + {"Context.GOROOT", Field, 0}, + {"Context.HasSubdir", Field, 0}, + {"Context.InstallSuffix", Field, 1}, + {"Context.IsAbsPath", Field, 0}, + {"Context.IsDir", Field, 0}, + {"Context.JoinPath", Field, 0}, + {"Context.OpenFile", Field, 0}, + {"Context.ReadDir", Field, 0}, + {"Context.ReleaseTags", Field, 1}, + {"Context.SplitPathList", Field, 0}, + {"Context.ToolTags", Field, 17}, + {"Context.UseAllFiles", Field, 0}, + {"Default", Var, 0}, + {"Directive", Type, 21}, + {"Directive.Pos", Field, 21}, + {"Directive.Text", Field, 21}, + {"FindOnly", Const, 0}, + {"IgnoreVendor", Const, 6}, + {"Import", Func, 0}, + {"ImportComment", Const, 4}, + {"ImportDir", Func, 0}, + {"ImportMode", Type, 0}, + {"IsLocalImport", Func, 0}, + {"MultiplePackageError", Type, 4}, + {"MultiplePackageError.Dir", Field, 4}, + {"MultiplePackageError.Files", Field, 4}, + {"MultiplePackageError.Packages", Field, 4}, + {"NoGoError", Type, 0}, + {"NoGoError.Dir", Field, 0}, + {"Package", Type, 0}, + {"Package.AllTags", Field, 2}, + {"Package.BinDir", Field, 0}, + {"Package.BinaryOnly", Field, 7}, + {"Package.CFiles", Field, 0}, + {"Package.CXXFiles", Field, 2}, + {"Package.CgoCFLAGS", Field, 0}, + {"Package.CgoCPPFLAGS", Field, 2}, + {"Package.CgoCXXFLAGS", Field, 2}, + {"Package.CgoFFLAGS", Field, 7}, + {"Package.CgoFiles", Field, 0}, + {"Package.CgoLDFLAGS", Field, 0}, + {"Package.CgoPkgConfig", Field, 0}, + {"Package.ConflictDir", Field, 2}, + {"Package.Dir", Field, 0}, + {"Package.Directives", Field, 21}, + {"Package.Doc", Field, 0}, + {"Package.EmbedPatternPos", Field, 16}, + {"Package.EmbedPatterns", Field, 16}, + {"Package.FFiles", Field, 7}, + {"Package.GoFiles", Field, 0}, + {"Package.Goroot", Field, 0}, + {"Package.HFiles", Field, 0}, + {"Package.IgnoredGoFiles", Field, 1}, + {"Package.IgnoredOtherFiles", Field, 16}, + {"Package.ImportComment", Field, 4}, + {"Package.ImportPath", Field, 0}, + {"Package.ImportPos", Field, 0}, + {"Package.Imports", Field, 0}, + {"Package.InvalidGoFiles", Field, 6}, + {"Package.MFiles", Field, 3}, + {"Package.Name", Field, 0}, + {"Package.PkgObj", Field, 0}, + {"Package.PkgRoot", Field, 0}, + {"Package.PkgTargetRoot", Field, 5}, + {"Package.Root", Field, 0}, + {"Package.SFiles", Field, 0}, + {"Package.SrcRoot", Field, 0}, + {"Package.SwigCXXFiles", Field, 1}, + {"Package.SwigFiles", Field, 1}, + {"Package.SysoFiles", Field, 0}, + {"Package.TestDirectives", Field, 21}, + {"Package.TestEmbedPatternPos", Field, 16}, + {"Package.TestEmbedPatterns", Field, 16}, + {"Package.TestGoFiles", Field, 0}, + {"Package.TestImportPos", Field, 0}, + {"Package.TestImports", Field, 0}, + {"Package.XTestDirectives", Field, 21}, + {"Package.XTestEmbedPatternPos", Field, 16}, + {"Package.XTestEmbedPatterns", Field, 16}, + {"Package.XTestGoFiles", Field, 0}, + {"Package.XTestImportPos", Field, 0}, + {"Package.XTestImports", Field, 0}, + {"ToolDir", Var, 0}, + }, + "go/build/constraint": { + {"(*AndExpr).Eval", Method, 16}, + {"(*AndExpr).String", Method, 16}, + {"(*NotExpr).Eval", Method, 16}, + {"(*NotExpr).String", Method, 16}, + {"(*OrExpr).Eval", Method, 16}, + {"(*OrExpr).String", Method, 16}, + {"(*SyntaxError).Error", Method, 16}, + {"(*TagExpr).Eval", Method, 16}, + {"(*TagExpr).String", Method, 16}, + {"AndExpr", Type, 16}, + {"AndExpr.X", Field, 16}, + {"AndExpr.Y", Field, 16}, + {"Expr", Type, 16}, + {"GoVersion", Func, 21}, + {"IsGoBuild", Func, 16}, + {"IsPlusBuild", Func, 16}, + {"NotExpr", Type, 16}, + {"NotExpr.X", Field, 16}, + {"OrExpr", Type, 16}, + {"OrExpr.X", Field, 16}, + {"OrExpr.Y", Field, 16}, + {"Parse", Func, 16}, + {"PlusBuildLines", Func, 16}, + {"SyntaxError", Type, 16}, + {"SyntaxError.Err", Field, 16}, + {"SyntaxError.Offset", Field, 16}, + {"TagExpr", Type, 16}, + {"TagExpr.Tag", Field, 16}, + }, + "go/constant": { + {"(Kind).String", Method, 18}, + {"BinaryOp", Func, 5}, + {"BitLen", Func, 5}, + {"Bool", Const, 5}, + {"BoolVal", Func, 5}, + {"Bytes", Func, 5}, + {"Compare", Func, 5}, + {"Complex", Const, 5}, + {"Denom", Func, 5}, + {"Float", Const, 5}, + {"Float32Val", Func, 5}, + {"Float64Val", Func, 5}, + {"Imag", Func, 5}, + {"Int", Const, 5}, + {"Int64Val", Func, 5}, + {"Kind", Type, 5}, + {"Make", Func, 13}, + {"MakeBool", Func, 5}, + {"MakeFloat64", Func, 5}, + {"MakeFromBytes", Func, 5}, + {"MakeFromLiteral", Func, 5}, + {"MakeImag", Func, 5}, + {"MakeInt64", Func, 5}, + {"MakeString", Func, 5}, + {"MakeUint64", Func, 5}, + {"MakeUnknown", Func, 5}, + {"Num", Func, 5}, + {"Real", Func, 5}, + {"Shift", Func, 5}, + {"Sign", Func, 5}, + {"String", Const, 5}, + {"StringVal", Func, 5}, + {"ToComplex", Func, 6}, + {"ToFloat", Func, 6}, + {"ToInt", Func, 6}, + {"Uint64Val", Func, 5}, + {"UnaryOp", Func, 5}, + {"Unknown", Const, 5}, + {"Val", Func, 13}, + {"Value", Type, 5}, + }, + "go/doc": { + {"(*Package).Filter", Method, 0}, + {"(*Package).HTML", Method, 19}, + {"(*Package).Markdown", Method, 19}, + {"(*Package).Parser", Method, 19}, + {"(*Package).Printer", Method, 19}, + {"(*Package).Synopsis", Method, 19}, + {"(*Package).Text", Method, 19}, + {"AllDecls", Const, 0}, + {"AllMethods", Const, 0}, + {"Example", Type, 0}, + {"Example.Code", Field, 0}, + {"Example.Comments", Field, 0}, + {"Example.Doc", Field, 0}, + {"Example.EmptyOutput", Field, 1}, + {"Example.Name", Field, 0}, + {"Example.Order", Field, 1}, + {"Example.Output", Field, 0}, + {"Example.Play", Field, 1}, + {"Example.Suffix", Field, 14}, + {"Example.Unordered", Field, 7}, + {"Examples", Func, 0}, + {"Filter", Type, 0}, + {"Func", Type, 0}, + {"Func.Decl", Field, 0}, + {"Func.Doc", Field, 0}, + {"Func.Examples", Field, 14}, + {"Func.Level", Field, 0}, + {"Func.Name", Field, 0}, + {"Func.Orig", Field, 0}, + {"Func.Recv", Field, 0}, + {"IllegalPrefixes", Var, 1}, + {"IsPredeclared", Func, 8}, + {"Mode", Type, 0}, + {"New", Func, 0}, + {"NewFromFiles", Func, 14}, + {"Note", Type, 1}, + {"Note.Body", Field, 1}, + {"Note.End", Field, 1}, + {"Note.Pos", Field, 1}, + {"Note.UID", Field, 1}, + {"Package", Type, 0}, + {"Package.Bugs", Field, 0}, + {"Package.Consts", Field, 0}, + {"Package.Doc", Field, 0}, + {"Package.Examples", Field, 14}, + {"Package.Filenames", Field, 0}, + {"Package.Funcs", Field, 0}, + {"Package.ImportPath", Field, 0}, + {"Package.Imports", Field, 0}, + {"Package.Name", Field, 0}, + {"Package.Notes", Field, 1}, + {"Package.Types", Field, 0}, + {"Package.Vars", Field, 0}, + {"PreserveAST", Const, 12}, + {"Synopsis", Func, 0}, + {"ToHTML", Func, 0}, + {"ToText", Func, 0}, + {"Type", Type, 0}, + {"Type.Consts", Field, 0}, + {"Type.Decl", Field, 0}, + {"Type.Doc", Field, 0}, + {"Type.Examples", Field, 14}, + {"Type.Funcs", Field, 0}, + {"Type.Methods", Field, 0}, + {"Type.Name", Field, 0}, + {"Type.Vars", Field, 0}, + {"Value", Type, 0}, + {"Value.Decl", Field, 0}, + {"Value.Doc", Field, 0}, + {"Value.Names", Field, 0}, + }, + "go/doc/comment": { + {"(*DocLink).DefaultURL", Method, 19}, + {"(*Heading).DefaultID", Method, 19}, + {"(*List).BlankBefore", Method, 19}, + {"(*List).BlankBetween", Method, 19}, + {"(*Parser).Parse", Method, 19}, + {"(*Printer).Comment", Method, 19}, + {"(*Printer).HTML", Method, 19}, + {"(*Printer).Markdown", Method, 19}, + {"(*Printer).Text", Method, 19}, + {"Block", Type, 19}, + {"Code", Type, 19}, + {"Code.Text", Field, 19}, + {"DefaultLookupPackage", Func, 19}, + {"Doc", Type, 19}, + {"Doc.Content", Field, 19}, + {"Doc.Links", Field, 19}, + {"DocLink", Type, 19}, + {"DocLink.ImportPath", Field, 19}, + {"DocLink.Name", Field, 19}, + {"DocLink.Recv", Field, 19}, + {"DocLink.Text", Field, 19}, + {"Heading", Type, 19}, + {"Heading.Text", Field, 19}, + {"Italic", Type, 19}, + {"Link", Type, 19}, + {"Link.Auto", Field, 19}, + {"Link.Text", Field, 19}, + {"Link.URL", Field, 19}, + {"LinkDef", Type, 19}, + {"LinkDef.Text", Field, 19}, + {"LinkDef.URL", Field, 19}, + {"LinkDef.Used", Field, 19}, + {"List", Type, 19}, + {"List.ForceBlankBefore", Field, 19}, + {"List.ForceBlankBetween", Field, 19}, + {"List.Items", Field, 19}, + {"ListItem", Type, 19}, + {"ListItem.Content", Field, 19}, + {"ListItem.Number", Field, 19}, + {"Paragraph", Type, 19}, + {"Paragraph.Text", Field, 19}, + {"Parser", Type, 19}, + {"Parser.LookupPackage", Field, 19}, + {"Parser.LookupSym", Field, 19}, + {"Parser.Words", Field, 19}, + {"Plain", Type, 19}, + {"Printer", Type, 19}, + {"Printer.DocLinkBaseURL", Field, 19}, + {"Printer.DocLinkURL", Field, 19}, + {"Printer.HeadingID", Field, 19}, + {"Printer.HeadingLevel", Field, 19}, + {"Printer.TextCodePrefix", Field, 19}, + {"Printer.TextPrefix", Field, 19}, + {"Printer.TextWidth", Field, 19}, + {"Text", Type, 19}, + }, + "go/format": { + {"Node", Func, 1}, + {"Source", Func, 1}, + }, + "go/importer": { + {"Default", Func, 5}, + {"For", Func, 5}, + {"ForCompiler", Func, 12}, + {"Lookup", Type, 5}, + }, + "go/parser": { + {"AllErrors", Const, 1}, + {"DeclarationErrors", Const, 0}, + {"ImportsOnly", Const, 0}, + {"Mode", Type, 0}, + {"PackageClauseOnly", Const, 0}, + {"ParseComments", Const, 0}, + {"ParseDir", Func, 0}, + {"ParseExpr", Func, 0}, + {"ParseExprFrom", Func, 5}, + {"ParseFile", Func, 0}, + {"SkipObjectResolution", Const, 17}, + {"SpuriousErrors", Const, 0}, + {"Trace", Const, 0}, + }, + "go/printer": { + {"(*Config).Fprint", Method, 0}, + {"CommentedNode", Type, 0}, + {"CommentedNode.Comments", Field, 0}, + {"CommentedNode.Node", Field, 0}, + {"Config", Type, 0}, + {"Config.Indent", Field, 1}, + {"Config.Mode", Field, 0}, + {"Config.Tabwidth", Field, 0}, + {"Fprint", Func, 0}, + {"Mode", Type, 0}, + {"RawFormat", Const, 0}, + {"SourcePos", Const, 0}, + {"TabIndent", Const, 0}, + {"UseSpaces", Const, 0}, + }, + "go/scanner": { + {"(*ErrorList).Add", Method, 0}, + {"(*ErrorList).RemoveMultiples", Method, 0}, + {"(*ErrorList).Reset", Method, 0}, + {"(*Scanner).Init", Method, 0}, + {"(*Scanner).Scan", Method, 0}, + {"(Error).Error", Method, 0}, + {"(ErrorList).Err", Method, 0}, + {"(ErrorList).Error", Method, 0}, + {"(ErrorList).Len", Method, 0}, + {"(ErrorList).Less", Method, 0}, + {"(ErrorList).Sort", Method, 0}, + {"(ErrorList).Swap", Method, 0}, + {"Error", Type, 0}, + {"Error.Msg", Field, 0}, + {"Error.Pos", Field, 0}, + {"ErrorHandler", Type, 0}, + {"ErrorList", Type, 0}, + {"Mode", Type, 0}, + {"PrintError", Func, 0}, + {"ScanComments", Const, 0}, + {"Scanner", Type, 0}, + {"Scanner.ErrorCount", Field, 0}, + }, + "go/token": { + {"(*File).AddLine", Method, 0}, + {"(*File).AddLineColumnInfo", Method, 11}, + {"(*File).AddLineInfo", Method, 0}, + {"(*File).Base", Method, 0}, + {"(*File).Line", Method, 0}, + {"(*File).LineCount", Method, 0}, + {"(*File).LineStart", Method, 12}, + {"(*File).Lines", Method, 21}, + {"(*File).MergeLine", Method, 2}, + {"(*File).Name", Method, 0}, + {"(*File).Offset", Method, 0}, + {"(*File).Pos", Method, 0}, + {"(*File).Position", Method, 0}, + {"(*File).PositionFor", Method, 4}, + {"(*File).SetLines", Method, 0}, + {"(*File).SetLinesForContent", Method, 0}, + {"(*File).Size", Method, 0}, + {"(*FileSet).AddFile", Method, 0}, + {"(*FileSet).Base", Method, 0}, + {"(*FileSet).File", Method, 0}, + {"(*FileSet).Iterate", Method, 0}, + {"(*FileSet).Position", Method, 0}, + {"(*FileSet).PositionFor", Method, 4}, + {"(*FileSet).Read", Method, 0}, + {"(*FileSet).RemoveFile", Method, 20}, + {"(*FileSet).Write", Method, 0}, + {"(*Position).IsValid", Method, 0}, + {"(Pos).IsValid", Method, 0}, + {"(Position).String", Method, 0}, + {"(Token).IsKeyword", Method, 0}, + {"(Token).IsLiteral", Method, 0}, + {"(Token).IsOperator", Method, 0}, + {"(Token).Precedence", Method, 0}, + {"(Token).String", Method, 0}, + {"ADD", Const, 0}, + {"ADD_ASSIGN", Const, 0}, + {"AND", Const, 0}, + {"AND_ASSIGN", Const, 0}, + {"AND_NOT", Const, 0}, + {"AND_NOT_ASSIGN", Const, 0}, + {"ARROW", Const, 0}, + {"ASSIGN", Const, 0}, + {"BREAK", Const, 0}, + {"CASE", Const, 0}, + {"CHAN", Const, 0}, + {"CHAR", Const, 0}, + {"COLON", Const, 0}, + {"COMMA", Const, 0}, + {"COMMENT", Const, 0}, + {"CONST", Const, 0}, + {"CONTINUE", Const, 0}, + {"DEC", Const, 0}, + {"DEFAULT", Const, 0}, + {"DEFER", Const, 0}, + {"DEFINE", Const, 0}, + {"ELLIPSIS", Const, 0}, + {"ELSE", Const, 0}, + {"EOF", Const, 0}, + {"EQL", Const, 0}, + {"FALLTHROUGH", Const, 0}, + {"FLOAT", Const, 0}, + {"FOR", Const, 0}, + {"FUNC", Const, 0}, + {"File", Type, 0}, + {"FileSet", Type, 0}, + {"GEQ", Const, 0}, + {"GO", Const, 0}, + {"GOTO", Const, 0}, + {"GTR", Const, 0}, + {"HighestPrec", Const, 0}, + {"IDENT", Const, 0}, + {"IF", Const, 0}, + {"ILLEGAL", Const, 0}, + {"IMAG", Const, 0}, + {"IMPORT", Const, 0}, + {"INC", Const, 0}, + {"INT", Const, 0}, + {"INTERFACE", Const, 0}, + {"IsExported", Func, 13}, + {"IsIdentifier", Func, 13}, + {"IsKeyword", Func, 13}, + {"LAND", Const, 0}, + {"LBRACE", Const, 0}, + {"LBRACK", Const, 0}, + {"LEQ", Const, 0}, + {"LOR", Const, 0}, + {"LPAREN", Const, 0}, + {"LSS", Const, 0}, + {"Lookup", Func, 0}, + {"LowestPrec", Const, 0}, + {"MAP", Const, 0}, + {"MUL", Const, 0}, + {"MUL_ASSIGN", Const, 0}, + {"NEQ", Const, 0}, + {"NOT", Const, 0}, + {"NewFileSet", Func, 0}, + {"NoPos", Const, 0}, + {"OR", Const, 0}, + {"OR_ASSIGN", Const, 0}, + {"PACKAGE", Const, 0}, + {"PERIOD", Const, 0}, + {"Pos", Type, 0}, + {"Position", Type, 0}, + {"Position.Column", Field, 0}, + {"Position.Filename", Field, 0}, + {"Position.Line", Field, 0}, + {"Position.Offset", Field, 0}, + {"QUO", Const, 0}, + {"QUO_ASSIGN", Const, 0}, + {"RANGE", Const, 0}, + {"RBRACE", Const, 0}, + {"RBRACK", Const, 0}, + {"REM", Const, 0}, + {"REM_ASSIGN", Const, 0}, + {"RETURN", Const, 0}, + {"RPAREN", Const, 0}, + {"SELECT", Const, 0}, + {"SEMICOLON", Const, 0}, + {"SHL", Const, 0}, + {"SHL_ASSIGN", Const, 0}, + {"SHR", Const, 0}, + {"SHR_ASSIGN", Const, 0}, + {"STRING", Const, 0}, + {"STRUCT", Const, 0}, + {"SUB", Const, 0}, + {"SUB_ASSIGN", Const, 0}, + {"SWITCH", Const, 0}, + {"TILDE", Const, 18}, + {"TYPE", Const, 0}, + {"Token", Type, 0}, + {"UnaryPrec", Const, 0}, + {"VAR", Const, 0}, + {"XOR", Const, 0}, + {"XOR_ASSIGN", Const, 0}, + }, + "go/types": { + {"(*Alias).Obj", Method, 22}, + {"(*Alias).Origin", Method, 23}, + {"(*Alias).Rhs", Method, 23}, + {"(*Alias).SetTypeParams", Method, 23}, + {"(*Alias).String", Method, 22}, + {"(*Alias).TypeArgs", Method, 23}, + {"(*Alias).TypeParams", Method, 23}, + {"(*Alias).Underlying", Method, 22}, + {"(*ArgumentError).Error", Method, 18}, + {"(*ArgumentError).Unwrap", Method, 18}, + {"(*Array).Elem", Method, 5}, + {"(*Array).Len", Method, 5}, + {"(*Array).String", Method, 5}, + {"(*Array).Underlying", Method, 5}, + {"(*Basic).Info", Method, 5}, + {"(*Basic).Kind", Method, 5}, + {"(*Basic).Name", Method, 5}, + {"(*Basic).String", Method, 5}, + {"(*Basic).Underlying", Method, 5}, + {"(*Builtin).Exported", Method, 5}, + {"(*Builtin).Id", Method, 5}, + {"(*Builtin).Name", Method, 5}, + {"(*Builtin).Parent", Method, 5}, + {"(*Builtin).Pkg", Method, 5}, + {"(*Builtin).Pos", Method, 5}, + {"(*Builtin).String", Method, 5}, + {"(*Builtin).Type", Method, 5}, + {"(*Chan).Dir", Method, 5}, + {"(*Chan).Elem", Method, 5}, + {"(*Chan).String", Method, 5}, + {"(*Chan).Underlying", Method, 5}, + {"(*Checker).Files", Method, 5}, + {"(*Config).Check", Method, 5}, + {"(*Const).Exported", Method, 5}, + {"(*Const).Id", Method, 5}, + {"(*Const).Name", Method, 5}, + {"(*Const).Parent", Method, 5}, + {"(*Const).Pkg", Method, 5}, + {"(*Const).Pos", Method, 5}, + {"(*Const).String", Method, 5}, + {"(*Const).Type", Method, 5}, + {"(*Const).Val", Method, 5}, + {"(*Func).Exported", Method, 5}, + {"(*Func).FullName", Method, 5}, + {"(*Func).Id", Method, 5}, + {"(*Func).Name", Method, 5}, + {"(*Func).Origin", Method, 19}, + {"(*Func).Parent", Method, 5}, + {"(*Func).Pkg", Method, 5}, + {"(*Func).Pos", Method, 5}, + {"(*Func).Scope", Method, 5}, + {"(*Func).Signature", Method, 23}, + {"(*Func).String", Method, 5}, + {"(*Func).Type", Method, 5}, + {"(*Info).ObjectOf", Method, 5}, + {"(*Info).PkgNameOf", Method, 22}, + {"(*Info).TypeOf", Method, 5}, + {"(*Initializer).String", Method, 5}, + {"(*Interface).Complete", Method, 5}, + {"(*Interface).Embedded", Method, 5}, + {"(*Interface).EmbeddedType", Method, 11}, + {"(*Interface).Empty", Method, 5}, + {"(*Interface).ExplicitMethod", Method, 5}, + {"(*Interface).IsComparable", Method, 18}, + {"(*Interface).IsImplicit", Method, 18}, + {"(*Interface).IsMethodSet", Method, 18}, + {"(*Interface).MarkImplicit", Method, 18}, + {"(*Interface).Method", Method, 5}, + {"(*Interface).NumEmbeddeds", Method, 5}, + {"(*Interface).NumExplicitMethods", Method, 5}, + {"(*Interface).NumMethods", Method, 5}, + {"(*Interface).String", Method, 5}, + {"(*Interface).Underlying", Method, 5}, + {"(*Label).Exported", Method, 5}, + {"(*Label).Id", Method, 5}, + {"(*Label).Name", Method, 5}, + {"(*Label).Parent", Method, 5}, + {"(*Label).Pkg", Method, 5}, + {"(*Label).Pos", Method, 5}, + {"(*Label).String", Method, 5}, + {"(*Label).Type", Method, 5}, + {"(*Map).Elem", Method, 5}, + {"(*Map).Key", Method, 5}, + {"(*Map).String", Method, 5}, + {"(*Map).Underlying", Method, 5}, + {"(*MethodSet).At", Method, 5}, + {"(*MethodSet).Len", Method, 5}, + {"(*MethodSet).Lookup", Method, 5}, + {"(*MethodSet).String", Method, 5}, + {"(*Named).AddMethod", Method, 5}, + {"(*Named).Method", Method, 5}, + {"(*Named).NumMethods", Method, 5}, + {"(*Named).Obj", Method, 5}, + {"(*Named).Origin", Method, 18}, + {"(*Named).SetTypeParams", Method, 18}, + {"(*Named).SetUnderlying", Method, 5}, + {"(*Named).String", Method, 5}, + {"(*Named).TypeArgs", Method, 18}, + {"(*Named).TypeParams", Method, 18}, + {"(*Named).Underlying", Method, 5}, + {"(*Nil).Exported", Method, 5}, + {"(*Nil).Id", Method, 5}, + {"(*Nil).Name", Method, 5}, + {"(*Nil).Parent", Method, 5}, + {"(*Nil).Pkg", Method, 5}, + {"(*Nil).Pos", Method, 5}, + {"(*Nil).String", Method, 5}, + {"(*Nil).Type", Method, 5}, + {"(*Package).Complete", Method, 5}, + {"(*Package).GoVersion", Method, 21}, + {"(*Package).Imports", Method, 5}, + {"(*Package).MarkComplete", Method, 5}, + {"(*Package).Name", Method, 5}, + {"(*Package).Path", Method, 5}, + {"(*Package).Scope", Method, 5}, + {"(*Package).SetImports", Method, 5}, + {"(*Package).SetName", Method, 6}, + {"(*Package).String", Method, 5}, + {"(*PkgName).Exported", Method, 5}, + {"(*PkgName).Id", Method, 5}, + {"(*PkgName).Imported", Method, 5}, + {"(*PkgName).Name", Method, 5}, + {"(*PkgName).Parent", Method, 5}, + {"(*PkgName).Pkg", Method, 5}, + {"(*PkgName).Pos", Method, 5}, + {"(*PkgName).String", Method, 5}, + {"(*PkgName).Type", Method, 5}, + {"(*Pointer).Elem", Method, 5}, + {"(*Pointer).String", Method, 5}, + {"(*Pointer).Underlying", Method, 5}, + {"(*Scope).Child", Method, 5}, + {"(*Scope).Contains", Method, 5}, + {"(*Scope).End", Method, 5}, + {"(*Scope).Innermost", Method, 5}, + {"(*Scope).Insert", Method, 5}, + {"(*Scope).Len", Method, 5}, + {"(*Scope).Lookup", Method, 5}, + {"(*Scope).LookupParent", Method, 5}, + {"(*Scope).Names", Method, 5}, + {"(*Scope).NumChildren", Method, 5}, + {"(*Scope).Parent", Method, 5}, + {"(*Scope).Pos", Method, 5}, + {"(*Scope).String", Method, 5}, + {"(*Scope).WriteTo", Method, 5}, + {"(*Selection).Index", Method, 5}, + {"(*Selection).Indirect", Method, 5}, + {"(*Selection).Kind", Method, 5}, + {"(*Selection).Obj", Method, 5}, + {"(*Selection).Recv", Method, 5}, + {"(*Selection).String", Method, 5}, + {"(*Selection).Type", Method, 5}, + {"(*Signature).Params", Method, 5}, + {"(*Signature).Recv", Method, 5}, + {"(*Signature).RecvTypeParams", Method, 18}, + {"(*Signature).Results", Method, 5}, + {"(*Signature).String", Method, 5}, + {"(*Signature).TypeParams", Method, 18}, + {"(*Signature).Underlying", Method, 5}, + {"(*Signature).Variadic", Method, 5}, + {"(*Slice).Elem", Method, 5}, + {"(*Slice).String", Method, 5}, + {"(*Slice).Underlying", Method, 5}, + {"(*StdSizes).Alignof", Method, 5}, + {"(*StdSizes).Offsetsof", Method, 5}, + {"(*StdSizes).Sizeof", Method, 5}, + {"(*Struct).Field", Method, 5}, + {"(*Struct).NumFields", Method, 5}, + {"(*Struct).String", Method, 5}, + {"(*Struct).Tag", Method, 5}, + {"(*Struct).Underlying", Method, 5}, + {"(*Term).String", Method, 18}, + {"(*Term).Tilde", Method, 18}, + {"(*Term).Type", Method, 18}, + {"(*Tuple).At", Method, 5}, + {"(*Tuple).Len", Method, 5}, + {"(*Tuple).String", Method, 5}, + {"(*Tuple).Underlying", Method, 5}, + {"(*TypeList).At", Method, 18}, + {"(*TypeList).Len", Method, 18}, + {"(*TypeName).Exported", Method, 5}, + {"(*TypeName).Id", Method, 5}, + {"(*TypeName).IsAlias", Method, 9}, + {"(*TypeName).Name", Method, 5}, + {"(*TypeName).Parent", Method, 5}, + {"(*TypeName).Pkg", Method, 5}, + {"(*TypeName).Pos", Method, 5}, + {"(*TypeName).String", Method, 5}, + {"(*TypeName).Type", Method, 5}, + {"(*TypeParam).Constraint", Method, 18}, + {"(*TypeParam).Index", Method, 18}, + {"(*TypeParam).Obj", Method, 18}, + {"(*TypeParam).SetConstraint", Method, 18}, + {"(*TypeParam).String", Method, 18}, + {"(*TypeParam).Underlying", Method, 18}, + {"(*TypeParamList).At", Method, 18}, + {"(*TypeParamList).Len", Method, 18}, + {"(*Union).Len", Method, 18}, + {"(*Union).String", Method, 18}, + {"(*Union).Term", Method, 18}, + {"(*Union).Underlying", Method, 18}, + {"(*Var).Anonymous", Method, 5}, + {"(*Var).Embedded", Method, 11}, + {"(*Var).Exported", Method, 5}, + {"(*Var).Id", Method, 5}, + {"(*Var).IsField", Method, 5}, + {"(*Var).Name", Method, 5}, + {"(*Var).Origin", Method, 19}, + {"(*Var).Parent", Method, 5}, + {"(*Var).Pkg", Method, 5}, + {"(*Var).Pos", Method, 5}, + {"(*Var).String", Method, 5}, + {"(*Var).Type", Method, 5}, + {"(Checker).ObjectOf", Method, 5}, + {"(Checker).PkgNameOf", Method, 22}, + {"(Checker).TypeOf", Method, 5}, + {"(Error).Error", Method, 5}, + {"(TypeAndValue).Addressable", Method, 5}, + {"(TypeAndValue).Assignable", Method, 5}, + {"(TypeAndValue).HasOk", Method, 5}, + {"(TypeAndValue).IsBuiltin", Method, 5}, + {"(TypeAndValue).IsNil", Method, 5}, + {"(TypeAndValue).IsType", Method, 5}, + {"(TypeAndValue).IsValue", Method, 5}, + {"(TypeAndValue).IsVoid", Method, 5}, + {"Alias", Type, 22}, + {"ArgumentError", Type, 18}, + {"ArgumentError.Err", Field, 18}, + {"ArgumentError.Index", Field, 18}, + {"Array", Type, 5}, + {"AssertableTo", Func, 5}, + {"AssignableTo", Func, 5}, + {"Basic", Type, 5}, + {"BasicInfo", Type, 5}, + {"BasicKind", Type, 5}, + {"Bool", Const, 5}, + {"Builtin", Type, 5}, + {"Byte", Const, 5}, + {"Chan", Type, 5}, + {"ChanDir", Type, 5}, + {"CheckExpr", Func, 13}, + {"Checker", Type, 5}, + {"Checker.Info", Field, 5}, + {"Comparable", Func, 5}, + {"Complex128", Const, 5}, + {"Complex64", Const, 5}, + {"Config", Type, 5}, + {"Config.Context", Field, 18}, + {"Config.DisableUnusedImportCheck", Field, 5}, + {"Config.Error", Field, 5}, + {"Config.FakeImportC", Field, 5}, + {"Config.GoVersion", Field, 18}, + {"Config.IgnoreFuncBodies", Field, 5}, + {"Config.Importer", Field, 5}, + {"Config.Sizes", Field, 5}, + {"Const", Type, 5}, + {"Context", Type, 18}, + {"ConvertibleTo", Func, 5}, + {"DefPredeclaredTestFuncs", Func, 5}, + {"Default", Func, 8}, + {"Error", Type, 5}, + {"Error.Fset", Field, 5}, + {"Error.Msg", Field, 5}, + {"Error.Pos", Field, 5}, + {"Error.Soft", Field, 5}, + {"Eval", Func, 5}, + {"ExprString", Func, 5}, + {"FieldVal", Const, 5}, + {"Float32", Const, 5}, + {"Float64", Const, 5}, + {"Func", Type, 5}, + {"Id", Func, 5}, + {"Identical", Func, 5}, + {"IdenticalIgnoreTags", Func, 8}, + {"Implements", Func, 5}, + {"ImportMode", Type, 6}, + {"Importer", Type, 5}, + {"ImporterFrom", Type, 6}, + {"Info", Type, 5}, + {"Info.Defs", Field, 5}, + {"Info.FileVersions", Field, 22}, + {"Info.Implicits", Field, 5}, + {"Info.InitOrder", Field, 5}, + {"Info.Instances", Field, 18}, + {"Info.Scopes", Field, 5}, + {"Info.Selections", Field, 5}, + {"Info.Types", Field, 5}, + {"Info.Uses", Field, 5}, + {"Initializer", Type, 5}, + {"Initializer.Lhs", Field, 5}, + {"Initializer.Rhs", Field, 5}, + {"Instance", Type, 18}, + {"Instance.Type", Field, 18}, + {"Instance.TypeArgs", Field, 18}, + {"Instantiate", Func, 18}, + {"Int", Const, 5}, + {"Int16", Const, 5}, + {"Int32", Const, 5}, + {"Int64", Const, 5}, + {"Int8", Const, 5}, + {"Interface", Type, 5}, + {"Invalid", Const, 5}, + {"IsBoolean", Const, 5}, + {"IsComplex", Const, 5}, + {"IsConstType", Const, 5}, + {"IsFloat", Const, 5}, + {"IsInteger", Const, 5}, + {"IsInterface", Func, 5}, + {"IsNumeric", Const, 5}, + {"IsOrdered", Const, 5}, + {"IsString", Const, 5}, + {"IsUnsigned", Const, 5}, + {"IsUntyped", Const, 5}, + {"Label", Type, 5}, + {"LookupFieldOrMethod", Func, 5}, + {"Map", Type, 5}, + {"MethodExpr", Const, 5}, + {"MethodSet", Type, 5}, + {"MethodVal", Const, 5}, + {"MissingMethod", Func, 5}, + {"Named", Type, 5}, + {"NewAlias", Func, 22}, + {"NewArray", Func, 5}, + {"NewChan", Func, 5}, + {"NewChecker", Func, 5}, + {"NewConst", Func, 5}, + {"NewContext", Func, 18}, + {"NewField", Func, 5}, + {"NewFunc", Func, 5}, + {"NewInterface", Func, 5}, + {"NewInterfaceType", Func, 11}, + {"NewLabel", Func, 5}, + {"NewMap", Func, 5}, + {"NewMethodSet", Func, 5}, + {"NewNamed", Func, 5}, + {"NewPackage", Func, 5}, + {"NewParam", Func, 5}, + {"NewPkgName", Func, 5}, + {"NewPointer", Func, 5}, + {"NewScope", Func, 5}, + {"NewSignature", Func, 5}, + {"NewSignatureType", Func, 18}, + {"NewSlice", Func, 5}, + {"NewStruct", Func, 5}, + {"NewTerm", Func, 18}, + {"NewTuple", Func, 5}, + {"NewTypeName", Func, 5}, + {"NewTypeParam", Func, 18}, + {"NewUnion", Func, 18}, + {"NewVar", Func, 5}, + {"Nil", Type, 5}, + {"Object", Type, 5}, + {"ObjectString", Func, 5}, + {"Package", Type, 5}, + {"PkgName", Type, 5}, + {"Pointer", Type, 5}, + {"Qualifier", Type, 5}, + {"RecvOnly", Const, 5}, + {"RelativeTo", Func, 5}, + {"Rune", Const, 5}, + {"Satisfies", Func, 20}, + {"Scope", Type, 5}, + {"Selection", Type, 5}, + {"SelectionKind", Type, 5}, + {"SelectionString", Func, 5}, + {"SendOnly", Const, 5}, + {"SendRecv", Const, 5}, + {"Signature", Type, 5}, + {"Sizes", Type, 5}, + {"SizesFor", Func, 9}, + {"Slice", Type, 5}, + {"StdSizes", Type, 5}, + {"StdSizes.MaxAlign", Field, 5}, + {"StdSizes.WordSize", Field, 5}, + {"String", Const, 5}, + {"Struct", Type, 5}, + {"Term", Type, 18}, + {"Tuple", Type, 5}, + {"Typ", Var, 5}, + {"Type", Type, 5}, + {"TypeAndValue", Type, 5}, + {"TypeAndValue.Type", Field, 5}, + {"TypeAndValue.Value", Field, 5}, + {"TypeList", Type, 18}, + {"TypeName", Type, 5}, + {"TypeParam", Type, 18}, + {"TypeParamList", Type, 18}, + {"TypeString", Func, 5}, + {"Uint", Const, 5}, + {"Uint16", Const, 5}, + {"Uint32", Const, 5}, + {"Uint64", Const, 5}, + {"Uint8", Const, 5}, + {"Uintptr", Const, 5}, + {"Unalias", Func, 22}, + {"Union", Type, 18}, + {"Universe", Var, 5}, + {"Unsafe", Var, 5}, + {"UnsafePointer", Const, 5}, + {"UntypedBool", Const, 5}, + {"UntypedComplex", Const, 5}, + {"UntypedFloat", Const, 5}, + {"UntypedInt", Const, 5}, + {"UntypedNil", Const, 5}, + {"UntypedRune", Const, 5}, + {"UntypedString", Const, 5}, + {"Var", Type, 5}, + {"WriteExpr", Func, 5}, + {"WriteSignature", Func, 5}, + {"WriteType", Func, 5}, + }, + "go/version": { + {"Compare", Func, 22}, + {"IsValid", Func, 22}, + {"Lang", Func, 22}, + }, + "hash": { + {"Hash", Type, 0}, + {"Hash32", Type, 0}, + {"Hash64", Type, 0}, + }, + "hash/adler32": { + {"Checksum", Func, 0}, + {"New", Func, 0}, + {"Size", Const, 0}, + }, + "hash/crc32": { + {"Castagnoli", Const, 0}, + {"Checksum", Func, 0}, + {"ChecksumIEEE", Func, 0}, + {"IEEE", Const, 0}, + {"IEEETable", Var, 0}, + {"Koopman", Const, 0}, + {"MakeTable", Func, 0}, + {"New", Func, 0}, + {"NewIEEE", Func, 0}, + {"Size", Const, 0}, + {"Table", Type, 0}, + {"Update", Func, 0}, + }, + "hash/crc64": { + {"Checksum", Func, 0}, + {"ECMA", Const, 0}, + {"ISO", Const, 0}, + {"MakeTable", Func, 0}, + {"New", Func, 0}, + {"Size", Const, 0}, + {"Table", Type, 0}, + {"Update", Func, 0}, + }, + "hash/fnv": { + {"New128", Func, 9}, + {"New128a", Func, 9}, + {"New32", Func, 0}, + {"New32a", Func, 0}, + {"New64", Func, 0}, + {"New64a", Func, 0}, + }, + "hash/maphash": { + {"(*Hash).BlockSize", Method, 14}, + {"(*Hash).Reset", Method, 14}, + {"(*Hash).Seed", Method, 14}, + {"(*Hash).SetSeed", Method, 14}, + {"(*Hash).Size", Method, 14}, + {"(*Hash).Sum", Method, 14}, + {"(*Hash).Sum64", Method, 14}, + {"(*Hash).Write", Method, 14}, + {"(*Hash).WriteByte", Method, 14}, + {"(*Hash).WriteString", Method, 14}, + {"Bytes", Func, 19}, + {"Hash", Type, 14}, + {"MakeSeed", Func, 14}, + {"Seed", Type, 14}, + {"String", Func, 19}, + }, + "html": { + {"EscapeString", Func, 0}, + {"UnescapeString", Func, 0}, + }, + "html/template": { + {"(*Error).Error", Method, 0}, + {"(*Template).AddParseTree", Method, 0}, + {"(*Template).Clone", Method, 0}, + {"(*Template).DefinedTemplates", Method, 6}, + {"(*Template).Delims", Method, 0}, + {"(*Template).Execute", Method, 0}, + {"(*Template).ExecuteTemplate", Method, 0}, + {"(*Template).Funcs", Method, 0}, + {"(*Template).Lookup", Method, 0}, + {"(*Template).Name", Method, 0}, + {"(*Template).New", Method, 0}, + {"(*Template).Option", Method, 5}, + {"(*Template).Parse", Method, 0}, + {"(*Template).ParseFS", Method, 16}, + {"(*Template).ParseFiles", Method, 0}, + {"(*Template).ParseGlob", Method, 0}, + {"(*Template).Templates", Method, 0}, + {"CSS", Type, 0}, + {"ErrAmbigContext", Const, 0}, + {"ErrBadHTML", Const, 0}, + {"ErrBranchEnd", Const, 0}, + {"ErrEndContext", Const, 0}, + {"ErrJSTemplate", Const, 21}, + {"ErrNoSuchTemplate", Const, 0}, + {"ErrOutputContext", Const, 0}, + {"ErrPartialCharset", Const, 0}, + {"ErrPartialEscape", Const, 0}, + {"ErrPredefinedEscaper", Const, 9}, + {"ErrRangeLoopReentry", Const, 0}, + {"ErrSlashAmbig", Const, 0}, + {"Error", Type, 0}, + {"Error.Description", Field, 0}, + {"Error.ErrorCode", Field, 0}, + {"Error.Line", Field, 0}, + {"Error.Name", Field, 0}, + {"Error.Node", Field, 4}, + {"ErrorCode", Type, 0}, + {"FuncMap", Type, 0}, + {"HTML", Type, 0}, + {"HTMLAttr", Type, 0}, + {"HTMLEscape", Func, 0}, + {"HTMLEscapeString", Func, 0}, + {"HTMLEscaper", Func, 0}, + {"IsTrue", Func, 6}, + {"JS", Type, 0}, + {"JSEscape", Func, 0}, + {"JSEscapeString", Func, 0}, + {"JSEscaper", Func, 0}, + {"JSStr", Type, 0}, + {"Must", Func, 0}, + {"New", Func, 0}, + {"OK", Const, 0}, + {"ParseFS", Func, 16}, + {"ParseFiles", Func, 0}, + {"ParseGlob", Func, 0}, + {"Srcset", Type, 10}, + {"Template", Type, 0}, + {"Template.Tree", Field, 2}, + {"URL", Type, 0}, + {"URLQueryEscaper", Func, 0}, + }, + "image": { + {"(*Alpha).AlphaAt", Method, 4}, + {"(*Alpha).At", Method, 0}, + {"(*Alpha).Bounds", Method, 0}, + {"(*Alpha).ColorModel", Method, 0}, + {"(*Alpha).Opaque", Method, 0}, + {"(*Alpha).PixOffset", Method, 0}, + {"(*Alpha).RGBA64At", Method, 17}, + {"(*Alpha).Set", Method, 0}, + {"(*Alpha).SetAlpha", Method, 0}, + {"(*Alpha).SetRGBA64", Method, 17}, + {"(*Alpha).SubImage", Method, 0}, + {"(*Alpha16).Alpha16At", Method, 4}, + {"(*Alpha16).At", Method, 0}, + {"(*Alpha16).Bounds", Method, 0}, + {"(*Alpha16).ColorModel", Method, 0}, + {"(*Alpha16).Opaque", Method, 0}, + {"(*Alpha16).PixOffset", Method, 0}, + {"(*Alpha16).RGBA64At", Method, 17}, + {"(*Alpha16).Set", Method, 0}, + {"(*Alpha16).SetAlpha16", Method, 0}, + {"(*Alpha16).SetRGBA64", Method, 17}, + {"(*Alpha16).SubImage", Method, 0}, + {"(*CMYK).At", Method, 5}, + {"(*CMYK).Bounds", Method, 5}, + {"(*CMYK).CMYKAt", Method, 5}, + {"(*CMYK).ColorModel", Method, 5}, + {"(*CMYK).Opaque", Method, 5}, + {"(*CMYK).PixOffset", Method, 5}, + {"(*CMYK).RGBA64At", Method, 17}, + {"(*CMYK).Set", Method, 5}, + {"(*CMYK).SetCMYK", Method, 5}, + {"(*CMYK).SetRGBA64", Method, 17}, + {"(*CMYK).SubImage", Method, 5}, + {"(*Gray).At", Method, 0}, + {"(*Gray).Bounds", Method, 0}, + {"(*Gray).ColorModel", Method, 0}, + {"(*Gray).GrayAt", Method, 4}, + {"(*Gray).Opaque", Method, 0}, + {"(*Gray).PixOffset", Method, 0}, + {"(*Gray).RGBA64At", Method, 17}, + {"(*Gray).Set", Method, 0}, + {"(*Gray).SetGray", Method, 0}, + {"(*Gray).SetRGBA64", Method, 17}, + {"(*Gray).SubImage", Method, 0}, + {"(*Gray16).At", Method, 0}, + {"(*Gray16).Bounds", Method, 0}, + {"(*Gray16).ColorModel", Method, 0}, + {"(*Gray16).Gray16At", Method, 4}, + {"(*Gray16).Opaque", Method, 0}, + {"(*Gray16).PixOffset", Method, 0}, + {"(*Gray16).RGBA64At", Method, 17}, + {"(*Gray16).Set", Method, 0}, + {"(*Gray16).SetGray16", Method, 0}, + {"(*Gray16).SetRGBA64", Method, 17}, + {"(*Gray16).SubImage", Method, 0}, + {"(*NRGBA).At", Method, 0}, + {"(*NRGBA).Bounds", Method, 0}, + {"(*NRGBA).ColorModel", Method, 0}, + {"(*NRGBA).NRGBAAt", Method, 4}, + {"(*NRGBA).Opaque", Method, 0}, + {"(*NRGBA).PixOffset", Method, 0}, + {"(*NRGBA).RGBA64At", Method, 17}, + {"(*NRGBA).Set", Method, 0}, + {"(*NRGBA).SetNRGBA", Method, 0}, + {"(*NRGBA).SetRGBA64", Method, 17}, + {"(*NRGBA).SubImage", Method, 0}, + {"(*NRGBA64).At", Method, 0}, + {"(*NRGBA64).Bounds", Method, 0}, + {"(*NRGBA64).ColorModel", Method, 0}, + {"(*NRGBA64).NRGBA64At", Method, 4}, + {"(*NRGBA64).Opaque", Method, 0}, + {"(*NRGBA64).PixOffset", Method, 0}, + {"(*NRGBA64).RGBA64At", Method, 17}, + {"(*NRGBA64).Set", Method, 0}, + {"(*NRGBA64).SetNRGBA64", Method, 0}, + {"(*NRGBA64).SetRGBA64", Method, 17}, + {"(*NRGBA64).SubImage", Method, 0}, + {"(*NYCbCrA).AOffset", Method, 6}, + {"(*NYCbCrA).At", Method, 6}, + {"(*NYCbCrA).Bounds", Method, 6}, + {"(*NYCbCrA).COffset", Method, 6}, + {"(*NYCbCrA).ColorModel", Method, 6}, + {"(*NYCbCrA).NYCbCrAAt", Method, 6}, + {"(*NYCbCrA).Opaque", Method, 6}, + {"(*NYCbCrA).RGBA64At", Method, 17}, + {"(*NYCbCrA).SubImage", Method, 6}, + {"(*NYCbCrA).YCbCrAt", Method, 6}, + {"(*NYCbCrA).YOffset", Method, 6}, + {"(*Paletted).At", Method, 0}, + {"(*Paletted).Bounds", Method, 0}, + {"(*Paletted).ColorIndexAt", Method, 0}, + {"(*Paletted).ColorModel", Method, 0}, + {"(*Paletted).Opaque", Method, 0}, + {"(*Paletted).PixOffset", Method, 0}, + {"(*Paletted).RGBA64At", Method, 17}, + {"(*Paletted).Set", Method, 0}, + {"(*Paletted).SetColorIndex", Method, 0}, + {"(*Paletted).SetRGBA64", Method, 17}, + {"(*Paletted).SubImage", Method, 0}, + {"(*RGBA).At", Method, 0}, + {"(*RGBA).Bounds", Method, 0}, + {"(*RGBA).ColorModel", Method, 0}, + {"(*RGBA).Opaque", Method, 0}, + {"(*RGBA).PixOffset", Method, 0}, + {"(*RGBA).RGBA64At", Method, 17}, + {"(*RGBA).RGBAAt", Method, 4}, + {"(*RGBA).Set", Method, 0}, + {"(*RGBA).SetRGBA", Method, 0}, + {"(*RGBA).SetRGBA64", Method, 17}, + {"(*RGBA).SubImage", Method, 0}, + {"(*RGBA64).At", Method, 0}, + {"(*RGBA64).Bounds", Method, 0}, + {"(*RGBA64).ColorModel", Method, 0}, + {"(*RGBA64).Opaque", Method, 0}, + {"(*RGBA64).PixOffset", Method, 0}, + {"(*RGBA64).RGBA64At", Method, 4}, + {"(*RGBA64).Set", Method, 0}, + {"(*RGBA64).SetRGBA64", Method, 0}, + {"(*RGBA64).SubImage", Method, 0}, + {"(*Uniform).At", Method, 0}, + {"(*Uniform).Bounds", Method, 0}, + {"(*Uniform).ColorModel", Method, 0}, + {"(*Uniform).Convert", Method, 0}, + {"(*Uniform).Opaque", Method, 0}, + {"(*Uniform).RGBA", Method, 0}, + {"(*Uniform).RGBA64At", Method, 17}, + {"(*YCbCr).At", Method, 0}, + {"(*YCbCr).Bounds", Method, 0}, + {"(*YCbCr).COffset", Method, 0}, + {"(*YCbCr).ColorModel", Method, 0}, + {"(*YCbCr).Opaque", Method, 0}, + {"(*YCbCr).RGBA64At", Method, 17}, + {"(*YCbCr).SubImage", Method, 0}, + {"(*YCbCr).YCbCrAt", Method, 4}, + {"(*YCbCr).YOffset", Method, 0}, + {"(Point).Add", Method, 0}, + {"(Point).Div", Method, 0}, + {"(Point).Eq", Method, 0}, + {"(Point).In", Method, 0}, + {"(Point).Mod", Method, 0}, + {"(Point).Mul", Method, 0}, + {"(Point).String", Method, 0}, + {"(Point).Sub", Method, 0}, + {"(Rectangle).Add", Method, 0}, + {"(Rectangle).At", Method, 5}, + {"(Rectangle).Bounds", Method, 5}, + {"(Rectangle).Canon", Method, 0}, + {"(Rectangle).ColorModel", Method, 5}, + {"(Rectangle).Dx", Method, 0}, + {"(Rectangle).Dy", Method, 0}, + {"(Rectangle).Empty", Method, 0}, + {"(Rectangle).Eq", Method, 0}, + {"(Rectangle).In", Method, 0}, + {"(Rectangle).Inset", Method, 0}, + {"(Rectangle).Intersect", Method, 0}, + {"(Rectangle).Overlaps", Method, 0}, + {"(Rectangle).RGBA64At", Method, 17}, + {"(Rectangle).Size", Method, 0}, + {"(Rectangle).String", Method, 0}, + {"(Rectangle).Sub", Method, 0}, + {"(Rectangle).Union", Method, 0}, + {"(YCbCrSubsampleRatio).String", Method, 0}, + {"Alpha", Type, 0}, + {"Alpha.Pix", Field, 0}, + {"Alpha.Rect", Field, 0}, + {"Alpha.Stride", Field, 0}, + {"Alpha16", Type, 0}, + {"Alpha16.Pix", Field, 0}, + {"Alpha16.Rect", Field, 0}, + {"Alpha16.Stride", Field, 0}, + {"Black", Var, 0}, + {"CMYK", Type, 5}, + {"CMYK.Pix", Field, 5}, + {"CMYK.Rect", Field, 5}, + {"CMYK.Stride", Field, 5}, + {"Config", Type, 0}, + {"Config.ColorModel", Field, 0}, + {"Config.Height", Field, 0}, + {"Config.Width", Field, 0}, + {"Decode", Func, 0}, + {"DecodeConfig", Func, 0}, + {"ErrFormat", Var, 0}, + {"Gray", Type, 0}, + {"Gray.Pix", Field, 0}, + {"Gray.Rect", Field, 0}, + {"Gray.Stride", Field, 0}, + {"Gray16", Type, 0}, + {"Gray16.Pix", Field, 0}, + {"Gray16.Rect", Field, 0}, + {"Gray16.Stride", Field, 0}, + {"Image", Type, 0}, + {"NRGBA", Type, 0}, + {"NRGBA.Pix", Field, 0}, + {"NRGBA.Rect", Field, 0}, + {"NRGBA.Stride", Field, 0}, + {"NRGBA64", Type, 0}, + {"NRGBA64.Pix", Field, 0}, + {"NRGBA64.Rect", Field, 0}, + {"NRGBA64.Stride", Field, 0}, + {"NYCbCrA", Type, 6}, + {"NYCbCrA.A", Field, 6}, + {"NYCbCrA.AStride", Field, 6}, + {"NYCbCrA.YCbCr", Field, 6}, + {"NewAlpha", Func, 0}, + {"NewAlpha16", Func, 0}, + {"NewCMYK", Func, 5}, + {"NewGray", Func, 0}, + {"NewGray16", Func, 0}, + {"NewNRGBA", Func, 0}, + {"NewNRGBA64", Func, 0}, + {"NewNYCbCrA", Func, 6}, + {"NewPaletted", Func, 0}, + {"NewRGBA", Func, 0}, + {"NewRGBA64", Func, 0}, + {"NewUniform", Func, 0}, + {"NewYCbCr", Func, 0}, + {"Opaque", Var, 0}, + {"Paletted", Type, 0}, + {"Paletted.Palette", Field, 0}, + {"Paletted.Pix", Field, 0}, + {"Paletted.Rect", Field, 0}, + {"Paletted.Stride", Field, 0}, + {"PalettedImage", Type, 0}, + {"Point", Type, 0}, + {"Point.X", Field, 0}, + {"Point.Y", Field, 0}, + {"Pt", Func, 0}, + {"RGBA", Type, 0}, + {"RGBA.Pix", Field, 0}, + {"RGBA.Rect", Field, 0}, + {"RGBA.Stride", Field, 0}, + {"RGBA64", Type, 0}, + {"RGBA64.Pix", Field, 0}, + {"RGBA64.Rect", Field, 0}, + {"RGBA64.Stride", Field, 0}, + {"RGBA64Image", Type, 17}, + {"Rect", Func, 0}, + {"Rectangle", Type, 0}, + {"Rectangle.Max", Field, 0}, + {"Rectangle.Min", Field, 0}, + {"RegisterFormat", Func, 0}, + {"Transparent", Var, 0}, + {"Uniform", Type, 0}, + {"Uniform.C", Field, 0}, + {"White", Var, 0}, + {"YCbCr", Type, 0}, + {"YCbCr.CStride", Field, 0}, + {"YCbCr.Cb", Field, 0}, + {"YCbCr.Cr", Field, 0}, + {"YCbCr.Rect", Field, 0}, + {"YCbCr.SubsampleRatio", Field, 0}, + {"YCbCr.Y", Field, 0}, + {"YCbCr.YStride", Field, 0}, + {"YCbCrSubsampleRatio", Type, 0}, + {"YCbCrSubsampleRatio410", Const, 5}, + {"YCbCrSubsampleRatio411", Const, 5}, + {"YCbCrSubsampleRatio420", Const, 0}, + {"YCbCrSubsampleRatio422", Const, 0}, + {"YCbCrSubsampleRatio440", Const, 1}, + {"YCbCrSubsampleRatio444", Const, 0}, + {"ZP", Var, 0}, + {"ZR", Var, 0}, + }, + "image/color": { + {"(Alpha).RGBA", Method, 0}, + {"(Alpha16).RGBA", Method, 0}, + {"(CMYK).RGBA", Method, 5}, + {"(Gray).RGBA", Method, 0}, + {"(Gray16).RGBA", Method, 0}, + {"(NRGBA).RGBA", Method, 0}, + {"(NRGBA64).RGBA", Method, 0}, + {"(NYCbCrA).RGBA", Method, 6}, + {"(Palette).Convert", Method, 0}, + {"(Palette).Index", Method, 0}, + {"(RGBA).RGBA", Method, 0}, + {"(RGBA64).RGBA", Method, 0}, + {"(YCbCr).RGBA", Method, 0}, + {"Alpha", Type, 0}, + {"Alpha.A", Field, 0}, + {"Alpha16", Type, 0}, + {"Alpha16.A", Field, 0}, + {"Alpha16Model", Var, 0}, + {"AlphaModel", Var, 0}, + {"Black", Var, 0}, + {"CMYK", Type, 5}, + {"CMYK.C", Field, 5}, + {"CMYK.K", Field, 5}, + {"CMYK.M", Field, 5}, + {"CMYK.Y", Field, 5}, + {"CMYKModel", Var, 5}, + {"CMYKToRGB", Func, 5}, + {"Color", Type, 0}, + {"Gray", Type, 0}, + {"Gray.Y", Field, 0}, + {"Gray16", Type, 0}, + {"Gray16.Y", Field, 0}, + {"Gray16Model", Var, 0}, + {"GrayModel", Var, 0}, + {"Model", Type, 0}, + {"ModelFunc", Func, 0}, + {"NRGBA", Type, 0}, + {"NRGBA.A", Field, 0}, + {"NRGBA.B", Field, 0}, + {"NRGBA.G", Field, 0}, + {"NRGBA.R", Field, 0}, + {"NRGBA64", Type, 0}, + {"NRGBA64.A", Field, 0}, + {"NRGBA64.B", Field, 0}, + {"NRGBA64.G", Field, 0}, + {"NRGBA64.R", Field, 0}, + {"NRGBA64Model", Var, 0}, + {"NRGBAModel", Var, 0}, + {"NYCbCrA", Type, 6}, + {"NYCbCrA.A", Field, 6}, + {"NYCbCrA.YCbCr", Field, 6}, + {"NYCbCrAModel", Var, 6}, + {"Opaque", Var, 0}, + {"Palette", Type, 0}, + {"RGBA", Type, 0}, + {"RGBA.A", Field, 0}, + {"RGBA.B", Field, 0}, + {"RGBA.G", Field, 0}, + {"RGBA.R", Field, 0}, + {"RGBA64", Type, 0}, + {"RGBA64.A", Field, 0}, + {"RGBA64.B", Field, 0}, + {"RGBA64.G", Field, 0}, + {"RGBA64.R", Field, 0}, + {"RGBA64Model", Var, 0}, + {"RGBAModel", Var, 0}, + {"RGBToCMYK", Func, 5}, + {"RGBToYCbCr", Func, 0}, + {"Transparent", Var, 0}, + {"White", Var, 0}, + {"YCbCr", Type, 0}, + {"YCbCr.Cb", Field, 0}, + {"YCbCr.Cr", Field, 0}, + {"YCbCr.Y", Field, 0}, + {"YCbCrModel", Var, 0}, + {"YCbCrToRGB", Func, 0}, + }, + "image/color/palette": { + {"Plan9", Var, 2}, + {"WebSafe", Var, 2}, + }, + "image/draw": { + {"(Op).Draw", Method, 2}, + {"Draw", Func, 0}, + {"DrawMask", Func, 0}, + {"Drawer", Type, 2}, + {"FloydSteinberg", Var, 2}, + {"Image", Type, 0}, + {"Op", Type, 0}, + {"Over", Const, 0}, + {"Quantizer", Type, 2}, + {"RGBA64Image", Type, 17}, + {"Src", Const, 0}, + }, + "image/gif": { + {"Decode", Func, 0}, + {"DecodeAll", Func, 0}, + {"DecodeConfig", Func, 0}, + {"DisposalBackground", Const, 5}, + {"DisposalNone", Const, 5}, + {"DisposalPrevious", Const, 5}, + {"Encode", Func, 2}, + {"EncodeAll", Func, 2}, + {"GIF", Type, 0}, + {"GIF.BackgroundIndex", Field, 5}, + {"GIF.Config", Field, 5}, + {"GIF.Delay", Field, 0}, + {"GIF.Disposal", Field, 5}, + {"GIF.Image", Field, 0}, + {"GIF.LoopCount", Field, 0}, + {"Options", Type, 2}, + {"Options.Drawer", Field, 2}, + {"Options.NumColors", Field, 2}, + {"Options.Quantizer", Field, 2}, + }, + "image/jpeg": { + {"(FormatError).Error", Method, 0}, + {"(UnsupportedError).Error", Method, 0}, + {"Decode", Func, 0}, + {"DecodeConfig", Func, 0}, + {"DefaultQuality", Const, 0}, + {"Encode", Func, 0}, + {"FormatError", Type, 0}, + {"Options", Type, 0}, + {"Options.Quality", Field, 0}, + {"Reader", Type, 0}, + {"UnsupportedError", Type, 0}, + }, + "image/png": { + {"(*Encoder).Encode", Method, 4}, + {"(FormatError).Error", Method, 0}, + {"(UnsupportedError).Error", Method, 0}, + {"BestCompression", Const, 4}, + {"BestSpeed", Const, 4}, + {"CompressionLevel", Type, 4}, + {"Decode", Func, 0}, + {"DecodeConfig", Func, 0}, + {"DefaultCompression", Const, 4}, + {"Encode", Func, 0}, + {"Encoder", Type, 4}, + {"Encoder.BufferPool", Field, 9}, + {"Encoder.CompressionLevel", Field, 4}, + {"EncoderBuffer", Type, 9}, + {"EncoderBufferPool", Type, 9}, + {"FormatError", Type, 0}, + {"NoCompression", Const, 4}, + {"UnsupportedError", Type, 0}, + }, + "index/suffixarray": { + {"(*Index).Bytes", Method, 0}, + {"(*Index).FindAllIndex", Method, 0}, + {"(*Index).Lookup", Method, 0}, + {"(*Index).Read", Method, 0}, + {"(*Index).Write", Method, 0}, + {"Index", Type, 0}, + {"New", Func, 0}, + }, + "io": { + {"(*LimitedReader).Read", Method, 0}, + {"(*OffsetWriter).Seek", Method, 20}, + {"(*OffsetWriter).Write", Method, 20}, + {"(*OffsetWriter).WriteAt", Method, 20}, + {"(*PipeReader).Close", Method, 0}, + {"(*PipeReader).CloseWithError", Method, 0}, + {"(*PipeReader).Read", Method, 0}, + {"(*PipeWriter).Close", Method, 0}, + {"(*PipeWriter).CloseWithError", Method, 0}, + {"(*PipeWriter).Write", Method, 0}, + {"(*SectionReader).Outer", Method, 22}, + {"(*SectionReader).Read", Method, 0}, + {"(*SectionReader).ReadAt", Method, 0}, + {"(*SectionReader).Seek", Method, 0}, + {"(*SectionReader).Size", Method, 0}, + {"ByteReader", Type, 0}, + {"ByteScanner", Type, 0}, + {"ByteWriter", Type, 1}, + {"Closer", Type, 0}, + {"Copy", Func, 0}, + {"CopyBuffer", Func, 5}, + {"CopyN", Func, 0}, + {"Discard", Var, 16}, + {"EOF", Var, 0}, + {"ErrClosedPipe", Var, 0}, + {"ErrNoProgress", Var, 1}, + {"ErrShortBuffer", Var, 0}, + {"ErrShortWrite", Var, 0}, + {"ErrUnexpectedEOF", Var, 0}, + {"LimitReader", Func, 0}, + {"LimitedReader", Type, 0}, + {"LimitedReader.N", Field, 0}, + {"LimitedReader.R", Field, 0}, + {"MultiReader", Func, 0}, + {"MultiWriter", Func, 0}, + {"NewOffsetWriter", Func, 20}, + {"NewSectionReader", Func, 0}, + {"NopCloser", Func, 16}, + {"OffsetWriter", Type, 20}, + {"Pipe", Func, 0}, + {"PipeReader", Type, 0}, + {"PipeWriter", Type, 0}, + {"ReadAll", Func, 16}, + {"ReadAtLeast", Func, 0}, + {"ReadCloser", Type, 0}, + {"ReadFull", Func, 0}, + {"ReadSeekCloser", Type, 16}, + {"ReadSeeker", Type, 0}, + {"ReadWriteCloser", Type, 0}, + {"ReadWriteSeeker", Type, 0}, + {"ReadWriter", Type, 0}, + {"Reader", Type, 0}, + {"ReaderAt", Type, 0}, + {"ReaderFrom", Type, 0}, + {"RuneReader", Type, 0}, + {"RuneScanner", Type, 0}, + {"SectionReader", Type, 0}, + {"SeekCurrent", Const, 7}, + {"SeekEnd", Const, 7}, + {"SeekStart", Const, 7}, + {"Seeker", Type, 0}, + {"StringWriter", Type, 12}, + {"TeeReader", Func, 0}, + {"WriteCloser", Type, 0}, + {"WriteSeeker", Type, 0}, + {"WriteString", Func, 0}, + {"Writer", Type, 0}, + {"WriterAt", Type, 0}, + {"WriterTo", Type, 0}, + }, + "io/fs": { + {"(*PathError).Error", Method, 16}, + {"(*PathError).Timeout", Method, 16}, + {"(*PathError).Unwrap", Method, 16}, + {"(FileMode).IsDir", Method, 16}, + {"(FileMode).IsRegular", Method, 16}, + {"(FileMode).Perm", Method, 16}, + {"(FileMode).String", Method, 16}, + {"(FileMode).Type", Method, 16}, + {"DirEntry", Type, 16}, + {"ErrClosed", Var, 16}, + {"ErrExist", Var, 16}, + {"ErrInvalid", Var, 16}, + {"ErrNotExist", Var, 16}, + {"ErrPermission", Var, 16}, + {"FS", Type, 16}, + {"File", Type, 16}, + {"FileInfo", Type, 16}, + {"FileInfoToDirEntry", Func, 17}, + {"FileMode", Type, 16}, + {"FormatDirEntry", Func, 21}, + {"FormatFileInfo", Func, 21}, + {"Glob", Func, 16}, + {"GlobFS", Type, 16}, + {"ModeAppend", Const, 16}, + {"ModeCharDevice", Const, 16}, + {"ModeDevice", Const, 16}, + {"ModeDir", Const, 16}, + {"ModeExclusive", Const, 16}, + {"ModeIrregular", Const, 16}, + {"ModeNamedPipe", Const, 16}, + {"ModePerm", Const, 16}, + {"ModeSetgid", Const, 16}, + {"ModeSetuid", Const, 16}, + {"ModeSocket", Const, 16}, + {"ModeSticky", Const, 16}, + {"ModeSymlink", Const, 16}, + {"ModeTemporary", Const, 16}, + {"ModeType", Const, 16}, + {"PathError", Type, 16}, + {"PathError.Err", Field, 16}, + {"PathError.Op", Field, 16}, + {"PathError.Path", Field, 16}, + {"ReadDir", Func, 16}, + {"ReadDirFS", Type, 16}, + {"ReadDirFile", Type, 16}, + {"ReadFile", Func, 16}, + {"ReadFileFS", Type, 16}, + {"SkipAll", Var, 20}, + {"SkipDir", Var, 16}, + {"Stat", Func, 16}, + {"StatFS", Type, 16}, + {"Sub", Func, 16}, + {"SubFS", Type, 16}, + {"ValidPath", Func, 16}, + {"WalkDir", Func, 16}, + {"WalkDirFunc", Type, 16}, + }, + "io/ioutil": { + {"Discard", Var, 0}, + {"NopCloser", Func, 0}, + {"ReadAll", Func, 0}, + {"ReadDir", Func, 0}, + {"ReadFile", Func, 0}, + {"TempDir", Func, 0}, + {"TempFile", Func, 0}, + {"WriteFile", Func, 0}, + }, + "iter": { + {"Pull", Func, 23}, + {"Pull2", Func, 23}, + {"Seq", Type, 23}, + {"Seq2", Type, 23}, + }, + "log": { + {"(*Logger).Fatal", Method, 0}, + {"(*Logger).Fatalf", Method, 0}, + {"(*Logger).Fatalln", Method, 0}, + {"(*Logger).Flags", Method, 0}, + {"(*Logger).Output", Method, 0}, + {"(*Logger).Panic", Method, 0}, + {"(*Logger).Panicf", Method, 0}, + {"(*Logger).Panicln", Method, 0}, + {"(*Logger).Prefix", Method, 0}, + {"(*Logger).Print", Method, 0}, + {"(*Logger).Printf", Method, 0}, + {"(*Logger).Println", Method, 0}, + {"(*Logger).SetFlags", Method, 0}, + {"(*Logger).SetOutput", Method, 5}, + {"(*Logger).SetPrefix", Method, 0}, + {"(*Logger).Writer", Method, 12}, + {"Default", Func, 16}, + {"Fatal", Func, 0}, + {"Fatalf", Func, 0}, + {"Fatalln", Func, 0}, + {"Flags", Func, 0}, + {"LUTC", Const, 5}, + {"Ldate", Const, 0}, + {"Llongfile", Const, 0}, + {"Lmicroseconds", Const, 0}, + {"Lmsgprefix", Const, 14}, + {"Logger", Type, 0}, + {"Lshortfile", Const, 0}, + {"LstdFlags", Const, 0}, + {"Ltime", Const, 0}, + {"New", Func, 0}, + {"Output", Func, 5}, + {"Panic", Func, 0}, + {"Panicf", Func, 0}, + {"Panicln", Func, 0}, + {"Prefix", Func, 0}, + {"Print", Func, 0}, + {"Printf", Func, 0}, + {"Println", Func, 0}, + {"SetFlags", Func, 0}, + {"SetOutput", Func, 0}, + {"SetPrefix", Func, 0}, + {"Writer", Func, 13}, + }, + "log/slog": { + {"(*JSONHandler).Enabled", Method, 21}, + {"(*JSONHandler).Handle", Method, 21}, + {"(*JSONHandler).WithAttrs", Method, 21}, + {"(*JSONHandler).WithGroup", Method, 21}, + {"(*Level).UnmarshalJSON", Method, 21}, + {"(*Level).UnmarshalText", Method, 21}, + {"(*LevelVar).Level", Method, 21}, + {"(*LevelVar).MarshalText", Method, 21}, + {"(*LevelVar).Set", Method, 21}, + {"(*LevelVar).String", Method, 21}, + {"(*LevelVar).UnmarshalText", Method, 21}, + {"(*Logger).Debug", Method, 21}, + {"(*Logger).DebugContext", Method, 21}, + {"(*Logger).Enabled", Method, 21}, + {"(*Logger).Error", Method, 21}, + {"(*Logger).ErrorContext", Method, 21}, + {"(*Logger).Handler", Method, 21}, + {"(*Logger).Info", Method, 21}, + {"(*Logger).InfoContext", Method, 21}, + {"(*Logger).Log", Method, 21}, + {"(*Logger).LogAttrs", Method, 21}, + {"(*Logger).Warn", Method, 21}, + {"(*Logger).WarnContext", Method, 21}, + {"(*Logger).With", Method, 21}, + {"(*Logger).WithGroup", Method, 21}, + {"(*Record).Add", Method, 21}, + {"(*Record).AddAttrs", Method, 21}, + {"(*TextHandler).Enabled", Method, 21}, + {"(*TextHandler).Handle", Method, 21}, + {"(*TextHandler).WithAttrs", Method, 21}, + {"(*TextHandler).WithGroup", Method, 21}, + {"(Attr).Equal", Method, 21}, + {"(Attr).String", Method, 21}, + {"(Kind).String", Method, 21}, + {"(Level).Level", Method, 21}, + {"(Level).MarshalJSON", Method, 21}, + {"(Level).MarshalText", Method, 21}, + {"(Level).String", Method, 21}, + {"(Record).Attrs", Method, 21}, + {"(Record).Clone", Method, 21}, + {"(Record).NumAttrs", Method, 21}, + {"(Value).Any", Method, 21}, + {"(Value).Bool", Method, 21}, + {"(Value).Duration", Method, 21}, + {"(Value).Equal", Method, 21}, + {"(Value).Float64", Method, 21}, + {"(Value).Group", Method, 21}, + {"(Value).Int64", Method, 21}, + {"(Value).Kind", Method, 21}, + {"(Value).LogValuer", Method, 21}, + {"(Value).Resolve", Method, 21}, + {"(Value).String", Method, 21}, + {"(Value).Time", Method, 21}, + {"(Value).Uint64", Method, 21}, + {"Any", Func, 21}, + {"AnyValue", Func, 21}, + {"Attr", Type, 21}, + {"Attr.Key", Field, 21}, + {"Attr.Value", Field, 21}, + {"Bool", Func, 21}, + {"BoolValue", Func, 21}, + {"Debug", Func, 21}, + {"DebugContext", Func, 21}, + {"Default", Func, 21}, + {"Duration", Func, 21}, + {"DurationValue", Func, 21}, + {"Error", Func, 21}, + {"ErrorContext", Func, 21}, + {"Float64", Func, 21}, + {"Float64Value", Func, 21}, + {"Group", Func, 21}, + {"GroupValue", Func, 21}, + {"Handler", Type, 21}, + {"HandlerOptions", Type, 21}, + {"HandlerOptions.AddSource", Field, 21}, + {"HandlerOptions.Level", Field, 21}, + {"HandlerOptions.ReplaceAttr", Field, 21}, + {"Info", Func, 21}, + {"InfoContext", Func, 21}, + {"Int", Func, 21}, + {"Int64", Func, 21}, + {"Int64Value", Func, 21}, + {"IntValue", Func, 21}, + {"JSONHandler", Type, 21}, + {"Kind", Type, 21}, + {"KindAny", Const, 21}, + {"KindBool", Const, 21}, + {"KindDuration", Const, 21}, + {"KindFloat64", Const, 21}, + {"KindGroup", Const, 21}, + {"KindInt64", Const, 21}, + {"KindLogValuer", Const, 21}, + {"KindString", Const, 21}, + {"KindTime", Const, 21}, + {"KindUint64", Const, 21}, + {"Level", Type, 21}, + {"LevelDebug", Const, 21}, + {"LevelError", Const, 21}, + {"LevelInfo", Const, 21}, + {"LevelKey", Const, 21}, + {"LevelVar", Type, 21}, + {"LevelWarn", Const, 21}, + {"Leveler", Type, 21}, + {"Log", Func, 21}, + {"LogAttrs", Func, 21}, + {"LogValuer", Type, 21}, + {"Logger", Type, 21}, + {"MessageKey", Const, 21}, + {"New", Func, 21}, + {"NewJSONHandler", Func, 21}, + {"NewLogLogger", Func, 21}, + {"NewRecord", Func, 21}, + {"NewTextHandler", Func, 21}, + {"Record", Type, 21}, + {"Record.Level", Field, 21}, + {"Record.Message", Field, 21}, + {"Record.PC", Field, 21}, + {"Record.Time", Field, 21}, + {"SetDefault", Func, 21}, + {"SetLogLoggerLevel", Func, 22}, + {"Source", Type, 21}, + {"Source.File", Field, 21}, + {"Source.Function", Field, 21}, + {"Source.Line", Field, 21}, + {"SourceKey", Const, 21}, + {"String", Func, 21}, + {"StringValue", Func, 21}, + {"TextHandler", Type, 21}, + {"Time", Func, 21}, + {"TimeKey", Const, 21}, + {"TimeValue", Func, 21}, + {"Uint64", Func, 21}, + {"Uint64Value", Func, 21}, + {"Value", Type, 21}, + {"Warn", Func, 21}, + {"WarnContext", Func, 21}, + {"With", Func, 21}, + }, + "log/syslog": { + {"(*Writer).Alert", Method, 0}, + {"(*Writer).Close", Method, 0}, + {"(*Writer).Crit", Method, 0}, + {"(*Writer).Debug", Method, 0}, + {"(*Writer).Emerg", Method, 0}, + {"(*Writer).Err", Method, 0}, + {"(*Writer).Info", Method, 0}, + {"(*Writer).Notice", Method, 0}, + {"(*Writer).Warning", Method, 0}, + {"(*Writer).Write", Method, 0}, + {"Dial", Func, 0}, + {"LOG_ALERT", Const, 0}, + {"LOG_AUTH", Const, 1}, + {"LOG_AUTHPRIV", Const, 1}, + {"LOG_CRIT", Const, 0}, + {"LOG_CRON", Const, 1}, + {"LOG_DAEMON", Const, 1}, + {"LOG_DEBUG", Const, 0}, + {"LOG_EMERG", Const, 0}, + {"LOG_ERR", Const, 0}, + {"LOG_FTP", Const, 1}, + {"LOG_INFO", Const, 0}, + {"LOG_KERN", Const, 1}, + {"LOG_LOCAL0", Const, 1}, + {"LOG_LOCAL1", Const, 1}, + {"LOG_LOCAL2", Const, 1}, + {"LOG_LOCAL3", Const, 1}, + {"LOG_LOCAL4", Const, 1}, + {"LOG_LOCAL5", Const, 1}, + {"LOG_LOCAL6", Const, 1}, + {"LOG_LOCAL7", Const, 1}, + {"LOG_LPR", Const, 1}, + {"LOG_MAIL", Const, 1}, + {"LOG_NEWS", Const, 1}, + {"LOG_NOTICE", Const, 0}, + {"LOG_SYSLOG", Const, 1}, + {"LOG_USER", Const, 1}, + {"LOG_UUCP", Const, 1}, + {"LOG_WARNING", Const, 0}, + {"New", Func, 0}, + {"NewLogger", Func, 0}, + {"Priority", Type, 0}, + {"Writer", Type, 0}, + }, + "maps": { + {"All", Func, 23}, + {"Clone", Func, 21}, + {"Collect", Func, 23}, + {"Copy", Func, 21}, + {"DeleteFunc", Func, 21}, + {"Equal", Func, 21}, + {"EqualFunc", Func, 21}, + {"Insert", Func, 23}, + {"Keys", Func, 23}, + {"Values", Func, 23}, + }, + "math": { + {"Abs", Func, 0}, + {"Acos", Func, 0}, + {"Acosh", Func, 0}, + {"Asin", Func, 0}, + {"Asinh", Func, 0}, + {"Atan", Func, 0}, + {"Atan2", Func, 0}, + {"Atanh", Func, 0}, + {"Cbrt", Func, 0}, + {"Ceil", Func, 0}, + {"Copysign", Func, 0}, + {"Cos", Func, 0}, + {"Cosh", Func, 0}, + {"Dim", Func, 0}, + {"E", Const, 0}, + {"Erf", Func, 0}, + {"Erfc", Func, 0}, + {"Erfcinv", Func, 10}, + {"Erfinv", Func, 10}, + {"Exp", Func, 0}, + {"Exp2", Func, 0}, + {"Expm1", Func, 0}, + {"FMA", Func, 14}, + {"Float32bits", Func, 0}, + {"Float32frombits", Func, 0}, + {"Float64bits", Func, 0}, + {"Float64frombits", Func, 0}, + {"Floor", Func, 0}, + {"Frexp", Func, 0}, + {"Gamma", Func, 0}, + {"Hypot", Func, 0}, + {"Ilogb", Func, 0}, + {"Inf", Func, 0}, + {"IsInf", Func, 0}, + {"IsNaN", Func, 0}, + {"J0", Func, 0}, + {"J1", Func, 0}, + {"Jn", Func, 0}, + {"Ldexp", Func, 0}, + {"Lgamma", Func, 0}, + {"Ln10", Const, 0}, + {"Ln2", Const, 0}, + {"Log", Func, 0}, + {"Log10", Func, 0}, + {"Log10E", Const, 0}, + {"Log1p", Func, 0}, + {"Log2", Func, 0}, + {"Log2E", Const, 0}, + {"Logb", Func, 0}, + {"Max", Func, 0}, + {"MaxFloat32", Const, 0}, + {"MaxFloat64", Const, 0}, + {"MaxInt", Const, 17}, + {"MaxInt16", Const, 0}, + {"MaxInt32", Const, 0}, + {"MaxInt64", Const, 0}, + {"MaxInt8", Const, 0}, + {"MaxUint", Const, 17}, + {"MaxUint16", Const, 0}, + {"MaxUint32", Const, 0}, + {"MaxUint64", Const, 0}, + {"MaxUint8", Const, 0}, + {"Min", Func, 0}, + {"MinInt", Const, 17}, + {"MinInt16", Const, 0}, + {"MinInt32", Const, 0}, + {"MinInt64", Const, 0}, + {"MinInt8", Const, 0}, + {"Mod", Func, 0}, + {"Modf", Func, 0}, + {"NaN", Func, 0}, + {"Nextafter", Func, 0}, + {"Nextafter32", Func, 4}, + {"Phi", Const, 0}, + {"Pi", Const, 0}, + {"Pow", Func, 0}, + {"Pow10", Func, 0}, + {"Remainder", Func, 0}, + {"Round", Func, 10}, + {"RoundToEven", Func, 10}, + {"Signbit", Func, 0}, + {"Sin", Func, 0}, + {"Sincos", Func, 0}, + {"Sinh", Func, 0}, + {"SmallestNonzeroFloat32", Const, 0}, + {"SmallestNonzeroFloat64", Const, 0}, + {"Sqrt", Func, 0}, + {"Sqrt2", Const, 0}, + {"SqrtE", Const, 0}, + {"SqrtPhi", Const, 0}, + {"SqrtPi", Const, 0}, + {"Tan", Func, 0}, + {"Tanh", Func, 0}, + {"Trunc", Func, 0}, + {"Y0", Func, 0}, + {"Y1", Func, 0}, + {"Yn", Func, 0}, + }, + "math/big": { + {"(*Float).Abs", Method, 5}, + {"(*Float).Acc", Method, 5}, + {"(*Float).Add", Method, 5}, + {"(*Float).Append", Method, 5}, + {"(*Float).Cmp", Method, 5}, + {"(*Float).Copy", Method, 5}, + {"(*Float).Float32", Method, 5}, + {"(*Float).Float64", Method, 5}, + {"(*Float).Format", Method, 5}, + {"(*Float).GobDecode", Method, 7}, + {"(*Float).GobEncode", Method, 7}, + {"(*Float).Int", Method, 5}, + {"(*Float).Int64", Method, 5}, + {"(*Float).IsInf", Method, 5}, + {"(*Float).IsInt", Method, 5}, + {"(*Float).MantExp", Method, 5}, + {"(*Float).MarshalText", Method, 6}, + {"(*Float).MinPrec", Method, 5}, + {"(*Float).Mode", Method, 5}, + {"(*Float).Mul", Method, 5}, + {"(*Float).Neg", Method, 5}, + {"(*Float).Parse", Method, 5}, + {"(*Float).Prec", Method, 5}, + {"(*Float).Quo", Method, 5}, + {"(*Float).Rat", Method, 5}, + {"(*Float).Scan", Method, 8}, + {"(*Float).Set", Method, 5}, + {"(*Float).SetFloat64", Method, 5}, + {"(*Float).SetInf", Method, 5}, + {"(*Float).SetInt", Method, 5}, + {"(*Float).SetInt64", Method, 5}, + {"(*Float).SetMantExp", Method, 5}, + {"(*Float).SetMode", Method, 5}, + {"(*Float).SetPrec", Method, 5}, + {"(*Float).SetRat", Method, 5}, + {"(*Float).SetString", Method, 5}, + {"(*Float).SetUint64", Method, 5}, + {"(*Float).Sign", Method, 5}, + {"(*Float).Signbit", Method, 5}, + {"(*Float).Sqrt", Method, 10}, + {"(*Float).String", Method, 5}, + {"(*Float).Sub", Method, 5}, + {"(*Float).Text", Method, 5}, + {"(*Float).Uint64", Method, 5}, + {"(*Float).UnmarshalText", Method, 6}, + {"(*Int).Abs", Method, 0}, + {"(*Int).Add", Method, 0}, + {"(*Int).And", Method, 0}, + {"(*Int).AndNot", Method, 0}, + {"(*Int).Append", Method, 6}, + {"(*Int).Binomial", Method, 0}, + {"(*Int).Bit", Method, 0}, + {"(*Int).BitLen", Method, 0}, + {"(*Int).Bits", Method, 0}, + {"(*Int).Bytes", Method, 0}, + {"(*Int).Cmp", Method, 0}, + {"(*Int).CmpAbs", Method, 10}, + {"(*Int).Div", Method, 0}, + {"(*Int).DivMod", Method, 0}, + {"(*Int).Exp", Method, 0}, + {"(*Int).FillBytes", Method, 15}, + {"(*Int).Float64", Method, 21}, + {"(*Int).Format", Method, 0}, + {"(*Int).GCD", Method, 0}, + {"(*Int).GobDecode", Method, 0}, + {"(*Int).GobEncode", Method, 0}, + {"(*Int).Int64", Method, 0}, + {"(*Int).IsInt64", Method, 9}, + {"(*Int).IsUint64", Method, 9}, + {"(*Int).Lsh", Method, 0}, + {"(*Int).MarshalJSON", Method, 1}, + {"(*Int).MarshalText", Method, 3}, + {"(*Int).Mod", Method, 0}, + {"(*Int).ModInverse", Method, 0}, + {"(*Int).ModSqrt", Method, 5}, + {"(*Int).Mul", Method, 0}, + {"(*Int).MulRange", Method, 0}, + {"(*Int).Neg", Method, 0}, + {"(*Int).Not", Method, 0}, + {"(*Int).Or", Method, 0}, + {"(*Int).ProbablyPrime", Method, 0}, + {"(*Int).Quo", Method, 0}, + {"(*Int).QuoRem", Method, 0}, + {"(*Int).Rand", Method, 0}, + {"(*Int).Rem", Method, 0}, + {"(*Int).Rsh", Method, 0}, + {"(*Int).Scan", Method, 0}, + {"(*Int).Set", Method, 0}, + {"(*Int).SetBit", Method, 0}, + {"(*Int).SetBits", Method, 0}, + {"(*Int).SetBytes", Method, 0}, + {"(*Int).SetInt64", Method, 0}, + {"(*Int).SetString", Method, 0}, + {"(*Int).SetUint64", Method, 1}, + {"(*Int).Sign", Method, 0}, + {"(*Int).Sqrt", Method, 8}, + {"(*Int).String", Method, 0}, + {"(*Int).Sub", Method, 0}, + {"(*Int).Text", Method, 6}, + {"(*Int).TrailingZeroBits", Method, 13}, + {"(*Int).Uint64", Method, 1}, + {"(*Int).UnmarshalJSON", Method, 1}, + {"(*Int).UnmarshalText", Method, 3}, + {"(*Int).Xor", Method, 0}, + {"(*Rat).Abs", Method, 0}, + {"(*Rat).Add", Method, 0}, + {"(*Rat).Cmp", Method, 0}, + {"(*Rat).Denom", Method, 0}, + {"(*Rat).Float32", Method, 4}, + {"(*Rat).Float64", Method, 1}, + {"(*Rat).FloatPrec", Method, 22}, + {"(*Rat).FloatString", Method, 0}, + {"(*Rat).GobDecode", Method, 0}, + {"(*Rat).GobEncode", Method, 0}, + {"(*Rat).Inv", Method, 0}, + {"(*Rat).IsInt", Method, 0}, + {"(*Rat).MarshalText", Method, 3}, + {"(*Rat).Mul", Method, 0}, + {"(*Rat).Neg", Method, 0}, + {"(*Rat).Num", Method, 0}, + {"(*Rat).Quo", Method, 0}, + {"(*Rat).RatString", Method, 0}, + {"(*Rat).Scan", Method, 0}, + {"(*Rat).Set", Method, 0}, + {"(*Rat).SetFloat64", Method, 1}, + {"(*Rat).SetFrac", Method, 0}, + {"(*Rat).SetFrac64", Method, 0}, + {"(*Rat).SetInt", Method, 0}, + {"(*Rat).SetInt64", Method, 0}, + {"(*Rat).SetString", Method, 0}, + {"(*Rat).SetUint64", Method, 13}, + {"(*Rat).Sign", Method, 0}, + {"(*Rat).String", Method, 0}, + {"(*Rat).Sub", Method, 0}, + {"(*Rat).UnmarshalText", Method, 3}, + {"(Accuracy).String", Method, 5}, + {"(ErrNaN).Error", Method, 5}, + {"(RoundingMode).String", Method, 5}, + {"Above", Const, 5}, + {"Accuracy", Type, 5}, + {"AwayFromZero", Const, 5}, + {"Below", Const, 5}, + {"ErrNaN", Type, 5}, + {"Exact", Const, 5}, + {"Float", Type, 5}, + {"Int", Type, 0}, + {"Jacobi", Func, 5}, + {"MaxBase", Const, 0}, + {"MaxExp", Const, 5}, + {"MaxPrec", Const, 5}, + {"MinExp", Const, 5}, + {"NewFloat", Func, 5}, + {"NewInt", Func, 0}, + {"NewRat", Func, 0}, + {"ParseFloat", Func, 5}, + {"Rat", Type, 0}, + {"RoundingMode", Type, 5}, + {"ToNearestAway", Const, 5}, + {"ToNearestEven", Const, 5}, + {"ToNegativeInf", Const, 5}, + {"ToPositiveInf", Const, 5}, + {"ToZero", Const, 5}, + {"Word", Type, 0}, + }, + "math/bits": { + {"Add", Func, 12}, + {"Add32", Func, 12}, + {"Add64", Func, 12}, + {"Div", Func, 12}, + {"Div32", Func, 12}, + {"Div64", Func, 12}, + {"LeadingZeros", Func, 9}, + {"LeadingZeros16", Func, 9}, + {"LeadingZeros32", Func, 9}, + {"LeadingZeros64", Func, 9}, + {"LeadingZeros8", Func, 9}, + {"Len", Func, 9}, + {"Len16", Func, 9}, + {"Len32", Func, 9}, + {"Len64", Func, 9}, + {"Len8", Func, 9}, + {"Mul", Func, 12}, + {"Mul32", Func, 12}, + {"Mul64", Func, 12}, + {"OnesCount", Func, 9}, + {"OnesCount16", Func, 9}, + {"OnesCount32", Func, 9}, + {"OnesCount64", Func, 9}, + {"OnesCount8", Func, 9}, + {"Rem", Func, 14}, + {"Rem32", Func, 14}, + {"Rem64", Func, 14}, + {"Reverse", Func, 9}, + {"Reverse16", Func, 9}, + {"Reverse32", Func, 9}, + {"Reverse64", Func, 9}, + {"Reverse8", Func, 9}, + {"ReverseBytes", Func, 9}, + {"ReverseBytes16", Func, 9}, + {"ReverseBytes32", Func, 9}, + {"ReverseBytes64", Func, 9}, + {"RotateLeft", Func, 9}, + {"RotateLeft16", Func, 9}, + {"RotateLeft32", Func, 9}, + {"RotateLeft64", Func, 9}, + {"RotateLeft8", Func, 9}, + {"Sub", Func, 12}, + {"Sub32", Func, 12}, + {"Sub64", Func, 12}, + {"TrailingZeros", Func, 9}, + {"TrailingZeros16", Func, 9}, + {"TrailingZeros32", Func, 9}, + {"TrailingZeros64", Func, 9}, + {"TrailingZeros8", Func, 9}, + {"UintSize", Const, 9}, + }, + "math/cmplx": { + {"Abs", Func, 0}, + {"Acos", Func, 0}, + {"Acosh", Func, 0}, + {"Asin", Func, 0}, + {"Asinh", Func, 0}, + {"Atan", Func, 0}, + {"Atanh", Func, 0}, + {"Conj", Func, 0}, + {"Cos", Func, 0}, + {"Cosh", Func, 0}, + {"Cot", Func, 0}, + {"Exp", Func, 0}, + {"Inf", Func, 0}, + {"IsInf", Func, 0}, + {"IsNaN", Func, 0}, + {"Log", Func, 0}, + {"Log10", Func, 0}, + {"NaN", Func, 0}, + {"Phase", Func, 0}, + {"Polar", Func, 0}, + {"Pow", Func, 0}, + {"Rect", Func, 0}, + {"Sin", Func, 0}, + {"Sinh", Func, 0}, + {"Sqrt", Func, 0}, + {"Tan", Func, 0}, + {"Tanh", Func, 0}, + }, + "math/rand": { + {"(*Rand).ExpFloat64", Method, 0}, + {"(*Rand).Float32", Method, 0}, + {"(*Rand).Float64", Method, 0}, + {"(*Rand).Int", Method, 0}, + {"(*Rand).Int31", Method, 0}, + {"(*Rand).Int31n", Method, 0}, + {"(*Rand).Int63", Method, 0}, + {"(*Rand).Int63n", Method, 0}, + {"(*Rand).Intn", Method, 0}, + {"(*Rand).NormFloat64", Method, 0}, + {"(*Rand).Perm", Method, 0}, + {"(*Rand).Read", Method, 6}, + {"(*Rand).Seed", Method, 0}, + {"(*Rand).Shuffle", Method, 10}, + {"(*Rand).Uint32", Method, 0}, + {"(*Rand).Uint64", Method, 8}, + {"(*Zipf).Uint64", Method, 0}, + {"ExpFloat64", Func, 0}, + {"Float32", Func, 0}, + {"Float64", Func, 0}, + {"Int", Func, 0}, + {"Int31", Func, 0}, + {"Int31n", Func, 0}, + {"Int63", Func, 0}, + {"Int63n", Func, 0}, + {"Intn", Func, 0}, + {"New", Func, 0}, + {"NewSource", Func, 0}, + {"NewZipf", Func, 0}, + {"NormFloat64", Func, 0}, + {"Perm", Func, 0}, + {"Rand", Type, 0}, + {"Read", Func, 6}, + {"Seed", Func, 0}, + {"Shuffle", Func, 10}, + {"Source", Type, 0}, + {"Source64", Type, 8}, + {"Uint32", Func, 0}, + {"Uint64", Func, 8}, + {"Zipf", Type, 0}, + }, + "math/rand/v2": { + {"(*ChaCha8).MarshalBinary", Method, 22}, + {"(*ChaCha8).Read", Method, 23}, + {"(*ChaCha8).Seed", Method, 22}, + {"(*ChaCha8).Uint64", Method, 22}, + {"(*ChaCha8).UnmarshalBinary", Method, 22}, + {"(*PCG).MarshalBinary", Method, 22}, + {"(*PCG).Seed", Method, 22}, + {"(*PCG).Uint64", Method, 22}, + {"(*PCG).UnmarshalBinary", Method, 22}, + {"(*Rand).ExpFloat64", Method, 22}, + {"(*Rand).Float32", Method, 22}, + {"(*Rand).Float64", Method, 22}, + {"(*Rand).Int", Method, 22}, + {"(*Rand).Int32", Method, 22}, + {"(*Rand).Int32N", Method, 22}, + {"(*Rand).Int64", Method, 22}, + {"(*Rand).Int64N", Method, 22}, + {"(*Rand).IntN", Method, 22}, + {"(*Rand).NormFloat64", Method, 22}, + {"(*Rand).Perm", Method, 22}, + {"(*Rand).Shuffle", Method, 22}, + {"(*Rand).Uint", Method, 23}, + {"(*Rand).Uint32", Method, 22}, + {"(*Rand).Uint32N", Method, 22}, + {"(*Rand).Uint64", Method, 22}, + {"(*Rand).Uint64N", Method, 22}, + {"(*Rand).UintN", Method, 22}, + {"(*Zipf).Uint64", Method, 22}, + {"ChaCha8", Type, 22}, + {"ExpFloat64", Func, 22}, + {"Float32", Func, 22}, + {"Float64", Func, 22}, + {"Int", Func, 22}, + {"Int32", Func, 22}, + {"Int32N", Func, 22}, + {"Int64", Func, 22}, + {"Int64N", Func, 22}, + {"IntN", Func, 22}, + {"N", Func, 22}, + {"New", Func, 22}, + {"NewChaCha8", Func, 22}, + {"NewPCG", Func, 22}, + {"NewZipf", Func, 22}, + {"NormFloat64", Func, 22}, + {"PCG", Type, 22}, + {"Perm", Func, 22}, + {"Rand", Type, 22}, + {"Shuffle", Func, 22}, + {"Source", Type, 22}, + {"Uint", Func, 23}, + {"Uint32", Func, 22}, + {"Uint32N", Func, 22}, + {"Uint64", Func, 22}, + {"Uint64N", Func, 22}, + {"UintN", Func, 22}, + {"Zipf", Type, 22}, + }, + "mime": { + {"(*WordDecoder).Decode", Method, 5}, + {"(*WordDecoder).DecodeHeader", Method, 5}, + {"(WordEncoder).Encode", Method, 5}, + {"AddExtensionType", Func, 0}, + {"BEncoding", Const, 5}, + {"ErrInvalidMediaParameter", Var, 9}, + {"ExtensionsByType", Func, 5}, + {"FormatMediaType", Func, 0}, + {"ParseMediaType", Func, 0}, + {"QEncoding", Const, 5}, + {"TypeByExtension", Func, 0}, + {"WordDecoder", Type, 5}, + {"WordDecoder.CharsetReader", Field, 5}, + {"WordEncoder", Type, 5}, + }, + "mime/multipart": { + {"(*FileHeader).Open", Method, 0}, + {"(*Form).RemoveAll", Method, 0}, + {"(*Part).Close", Method, 0}, + {"(*Part).FileName", Method, 0}, + {"(*Part).FormName", Method, 0}, + {"(*Part).Read", Method, 0}, + {"(*Reader).NextPart", Method, 0}, + {"(*Reader).NextRawPart", Method, 14}, + {"(*Reader).ReadForm", Method, 0}, + {"(*Writer).Boundary", Method, 0}, + {"(*Writer).Close", Method, 0}, + {"(*Writer).CreateFormField", Method, 0}, + {"(*Writer).CreateFormFile", Method, 0}, + {"(*Writer).CreatePart", Method, 0}, + {"(*Writer).FormDataContentType", Method, 0}, + {"(*Writer).SetBoundary", Method, 1}, + {"(*Writer).WriteField", Method, 0}, + {"ErrMessageTooLarge", Var, 9}, + {"File", Type, 0}, + {"FileHeader", Type, 0}, + {"FileHeader.Filename", Field, 0}, + {"FileHeader.Header", Field, 0}, + {"FileHeader.Size", Field, 9}, + {"Form", Type, 0}, + {"Form.File", Field, 0}, + {"Form.Value", Field, 0}, + {"NewReader", Func, 0}, + {"NewWriter", Func, 0}, + {"Part", Type, 0}, + {"Part.Header", Field, 0}, + {"Reader", Type, 0}, + {"Writer", Type, 0}, + }, + "mime/quotedprintable": { + {"(*Reader).Read", Method, 5}, + {"(*Writer).Close", Method, 5}, + {"(*Writer).Write", Method, 5}, + {"NewReader", Func, 5}, + {"NewWriter", Func, 5}, + {"Reader", Type, 5}, + {"Writer", Type, 5}, + {"Writer.Binary", Field, 5}, + }, + "net": { + {"(*AddrError).Error", Method, 0}, + {"(*AddrError).Temporary", Method, 0}, + {"(*AddrError).Timeout", Method, 0}, + {"(*Buffers).Read", Method, 8}, + {"(*Buffers).WriteTo", Method, 8}, + {"(*DNSConfigError).Error", Method, 0}, + {"(*DNSConfigError).Temporary", Method, 0}, + {"(*DNSConfigError).Timeout", Method, 0}, + {"(*DNSConfigError).Unwrap", Method, 13}, + {"(*DNSError).Error", Method, 0}, + {"(*DNSError).Temporary", Method, 0}, + {"(*DNSError).Timeout", Method, 0}, + {"(*DNSError).Unwrap", Method, 23}, + {"(*Dialer).Dial", Method, 1}, + {"(*Dialer).DialContext", Method, 7}, + {"(*Dialer).MultipathTCP", Method, 21}, + {"(*Dialer).SetMultipathTCP", Method, 21}, + {"(*IP).UnmarshalText", Method, 2}, + {"(*IPAddr).Network", Method, 0}, + {"(*IPAddr).String", Method, 0}, + {"(*IPConn).Close", Method, 0}, + {"(*IPConn).File", Method, 0}, + {"(*IPConn).LocalAddr", Method, 0}, + {"(*IPConn).Read", Method, 0}, + {"(*IPConn).ReadFrom", Method, 0}, + {"(*IPConn).ReadFromIP", Method, 0}, + {"(*IPConn).ReadMsgIP", Method, 1}, + {"(*IPConn).RemoteAddr", Method, 0}, + {"(*IPConn).SetDeadline", Method, 0}, + {"(*IPConn).SetReadBuffer", Method, 0}, + {"(*IPConn).SetReadDeadline", Method, 0}, + {"(*IPConn).SetWriteBuffer", Method, 0}, + {"(*IPConn).SetWriteDeadline", Method, 0}, + {"(*IPConn).SyscallConn", Method, 9}, + {"(*IPConn).Write", Method, 0}, + {"(*IPConn).WriteMsgIP", Method, 1}, + {"(*IPConn).WriteTo", Method, 0}, + {"(*IPConn).WriteToIP", Method, 0}, + {"(*IPNet).Contains", Method, 0}, + {"(*IPNet).Network", Method, 0}, + {"(*IPNet).String", Method, 0}, + {"(*Interface).Addrs", Method, 0}, + {"(*Interface).MulticastAddrs", Method, 0}, + {"(*ListenConfig).Listen", Method, 11}, + {"(*ListenConfig).ListenPacket", Method, 11}, + {"(*ListenConfig).MultipathTCP", Method, 21}, + {"(*ListenConfig).SetMultipathTCP", Method, 21}, + {"(*OpError).Error", Method, 0}, + {"(*OpError).Temporary", Method, 0}, + {"(*OpError).Timeout", Method, 0}, + {"(*OpError).Unwrap", Method, 13}, + {"(*ParseError).Error", Method, 0}, + {"(*ParseError).Temporary", Method, 17}, + {"(*ParseError).Timeout", Method, 17}, + {"(*Resolver).LookupAddr", Method, 8}, + {"(*Resolver).LookupCNAME", Method, 8}, + {"(*Resolver).LookupHost", Method, 8}, + {"(*Resolver).LookupIP", Method, 15}, + {"(*Resolver).LookupIPAddr", Method, 8}, + {"(*Resolver).LookupMX", Method, 8}, + {"(*Resolver).LookupNS", Method, 8}, + {"(*Resolver).LookupNetIP", Method, 18}, + {"(*Resolver).LookupPort", Method, 8}, + {"(*Resolver).LookupSRV", Method, 8}, + {"(*Resolver).LookupTXT", Method, 8}, + {"(*TCPAddr).AddrPort", Method, 18}, + {"(*TCPAddr).Network", Method, 0}, + {"(*TCPAddr).String", Method, 0}, + {"(*TCPConn).Close", Method, 0}, + {"(*TCPConn).CloseRead", Method, 0}, + {"(*TCPConn).CloseWrite", Method, 0}, + {"(*TCPConn).File", Method, 0}, + {"(*TCPConn).LocalAddr", Method, 0}, + {"(*TCPConn).MultipathTCP", Method, 21}, + {"(*TCPConn).Read", Method, 0}, + {"(*TCPConn).ReadFrom", Method, 0}, + {"(*TCPConn).RemoteAddr", Method, 0}, + {"(*TCPConn).SetDeadline", Method, 0}, + {"(*TCPConn).SetKeepAlive", Method, 0}, + {"(*TCPConn).SetKeepAliveConfig", Method, 23}, + {"(*TCPConn).SetKeepAlivePeriod", Method, 2}, + {"(*TCPConn).SetLinger", Method, 0}, + {"(*TCPConn).SetNoDelay", Method, 0}, + {"(*TCPConn).SetReadBuffer", Method, 0}, + {"(*TCPConn).SetReadDeadline", Method, 0}, + {"(*TCPConn).SetWriteBuffer", Method, 0}, + {"(*TCPConn).SetWriteDeadline", Method, 0}, + {"(*TCPConn).SyscallConn", Method, 9}, + {"(*TCPConn).Write", Method, 0}, + {"(*TCPConn).WriteTo", Method, 22}, + {"(*TCPListener).Accept", Method, 0}, + {"(*TCPListener).AcceptTCP", Method, 0}, + {"(*TCPListener).Addr", Method, 0}, + {"(*TCPListener).Close", Method, 0}, + {"(*TCPListener).File", Method, 0}, + {"(*TCPListener).SetDeadline", Method, 0}, + {"(*TCPListener).SyscallConn", Method, 10}, + {"(*UDPAddr).AddrPort", Method, 18}, + {"(*UDPAddr).Network", Method, 0}, + {"(*UDPAddr).String", Method, 0}, + {"(*UDPConn).Close", Method, 0}, + {"(*UDPConn).File", Method, 0}, + {"(*UDPConn).LocalAddr", Method, 0}, + {"(*UDPConn).Read", Method, 0}, + {"(*UDPConn).ReadFrom", Method, 0}, + {"(*UDPConn).ReadFromUDP", Method, 0}, + {"(*UDPConn).ReadFromUDPAddrPort", Method, 18}, + {"(*UDPConn).ReadMsgUDP", Method, 1}, + {"(*UDPConn).ReadMsgUDPAddrPort", Method, 18}, + {"(*UDPConn).RemoteAddr", Method, 0}, + {"(*UDPConn).SetDeadline", Method, 0}, + {"(*UDPConn).SetReadBuffer", Method, 0}, + {"(*UDPConn).SetReadDeadline", Method, 0}, + {"(*UDPConn).SetWriteBuffer", Method, 0}, + {"(*UDPConn).SetWriteDeadline", Method, 0}, + {"(*UDPConn).SyscallConn", Method, 9}, + {"(*UDPConn).Write", Method, 0}, + {"(*UDPConn).WriteMsgUDP", Method, 1}, + {"(*UDPConn).WriteMsgUDPAddrPort", Method, 18}, + {"(*UDPConn).WriteTo", Method, 0}, + {"(*UDPConn).WriteToUDP", Method, 0}, + {"(*UDPConn).WriteToUDPAddrPort", Method, 18}, + {"(*UnixAddr).Network", Method, 0}, + {"(*UnixAddr).String", Method, 0}, + {"(*UnixConn).Close", Method, 0}, + {"(*UnixConn).CloseRead", Method, 1}, + {"(*UnixConn).CloseWrite", Method, 1}, + {"(*UnixConn).File", Method, 0}, + {"(*UnixConn).LocalAddr", Method, 0}, + {"(*UnixConn).Read", Method, 0}, + {"(*UnixConn).ReadFrom", Method, 0}, + {"(*UnixConn).ReadFromUnix", Method, 0}, + {"(*UnixConn).ReadMsgUnix", Method, 0}, + {"(*UnixConn).RemoteAddr", Method, 0}, + {"(*UnixConn).SetDeadline", Method, 0}, + {"(*UnixConn).SetReadBuffer", Method, 0}, + {"(*UnixConn).SetReadDeadline", Method, 0}, + {"(*UnixConn).SetWriteBuffer", Method, 0}, + {"(*UnixConn).SetWriteDeadline", Method, 0}, + {"(*UnixConn).SyscallConn", Method, 9}, + {"(*UnixConn).Write", Method, 0}, + {"(*UnixConn).WriteMsgUnix", Method, 0}, + {"(*UnixConn).WriteTo", Method, 0}, + {"(*UnixConn).WriteToUnix", Method, 0}, + {"(*UnixListener).Accept", Method, 0}, + {"(*UnixListener).AcceptUnix", Method, 0}, + {"(*UnixListener).Addr", Method, 0}, + {"(*UnixListener).Close", Method, 0}, + {"(*UnixListener).File", Method, 0}, + {"(*UnixListener).SetDeadline", Method, 0}, + {"(*UnixListener).SetUnlinkOnClose", Method, 8}, + {"(*UnixListener).SyscallConn", Method, 10}, + {"(Flags).String", Method, 0}, + {"(HardwareAddr).String", Method, 0}, + {"(IP).DefaultMask", Method, 0}, + {"(IP).Equal", Method, 0}, + {"(IP).IsGlobalUnicast", Method, 0}, + {"(IP).IsInterfaceLocalMulticast", Method, 0}, + {"(IP).IsLinkLocalMulticast", Method, 0}, + {"(IP).IsLinkLocalUnicast", Method, 0}, + {"(IP).IsLoopback", Method, 0}, + {"(IP).IsMulticast", Method, 0}, + {"(IP).IsPrivate", Method, 17}, + {"(IP).IsUnspecified", Method, 0}, + {"(IP).MarshalText", Method, 2}, + {"(IP).Mask", Method, 0}, + {"(IP).String", Method, 0}, + {"(IP).To16", Method, 0}, + {"(IP).To4", Method, 0}, + {"(IPMask).Size", Method, 0}, + {"(IPMask).String", Method, 0}, + {"(InvalidAddrError).Error", Method, 0}, + {"(InvalidAddrError).Temporary", Method, 0}, + {"(InvalidAddrError).Timeout", Method, 0}, + {"(UnknownNetworkError).Error", Method, 0}, + {"(UnknownNetworkError).Temporary", Method, 0}, + {"(UnknownNetworkError).Timeout", Method, 0}, + {"Addr", Type, 0}, + {"AddrError", Type, 0}, + {"AddrError.Addr", Field, 0}, + {"AddrError.Err", Field, 0}, + {"Buffers", Type, 8}, + {"CIDRMask", Func, 0}, + {"Conn", Type, 0}, + {"DNSConfigError", Type, 0}, + {"DNSConfigError.Err", Field, 0}, + {"DNSError", Type, 0}, + {"DNSError.Err", Field, 0}, + {"DNSError.IsNotFound", Field, 13}, + {"DNSError.IsTemporary", Field, 6}, + {"DNSError.IsTimeout", Field, 0}, + {"DNSError.Name", Field, 0}, + {"DNSError.Server", Field, 0}, + {"DNSError.UnwrapErr", Field, 23}, + {"DefaultResolver", Var, 8}, + {"Dial", Func, 0}, + {"DialIP", Func, 0}, + {"DialTCP", Func, 0}, + {"DialTimeout", Func, 0}, + {"DialUDP", Func, 0}, + {"DialUnix", Func, 0}, + {"Dialer", Type, 1}, + {"Dialer.Cancel", Field, 6}, + {"Dialer.Control", Field, 11}, + {"Dialer.ControlContext", Field, 20}, + {"Dialer.Deadline", Field, 1}, + {"Dialer.DualStack", Field, 2}, + {"Dialer.FallbackDelay", Field, 5}, + {"Dialer.KeepAlive", Field, 3}, + {"Dialer.KeepAliveConfig", Field, 23}, + {"Dialer.LocalAddr", Field, 1}, + {"Dialer.Resolver", Field, 8}, + {"Dialer.Timeout", Field, 1}, + {"ErrClosed", Var, 16}, + {"ErrWriteToConnected", Var, 0}, + {"Error", Type, 0}, + {"FileConn", Func, 0}, + {"FileListener", Func, 0}, + {"FilePacketConn", Func, 0}, + {"FlagBroadcast", Const, 0}, + {"FlagLoopback", Const, 0}, + {"FlagMulticast", Const, 0}, + {"FlagPointToPoint", Const, 0}, + {"FlagRunning", Const, 20}, + {"FlagUp", Const, 0}, + {"Flags", Type, 0}, + {"HardwareAddr", Type, 0}, + {"IP", Type, 0}, + {"IPAddr", Type, 0}, + {"IPAddr.IP", Field, 0}, + {"IPAddr.Zone", Field, 1}, + {"IPConn", Type, 0}, + {"IPMask", Type, 0}, + {"IPNet", Type, 0}, + {"IPNet.IP", Field, 0}, + {"IPNet.Mask", Field, 0}, + {"IPv4", Func, 0}, + {"IPv4Mask", Func, 0}, + {"IPv4allrouter", Var, 0}, + {"IPv4allsys", Var, 0}, + {"IPv4bcast", Var, 0}, + {"IPv4len", Const, 0}, + {"IPv4zero", Var, 0}, + {"IPv6interfacelocalallnodes", Var, 0}, + {"IPv6len", Const, 0}, + {"IPv6linklocalallnodes", Var, 0}, + {"IPv6linklocalallrouters", Var, 0}, + {"IPv6loopback", Var, 0}, + {"IPv6unspecified", Var, 0}, + {"IPv6zero", Var, 0}, + {"Interface", Type, 0}, + {"Interface.Flags", Field, 0}, + {"Interface.HardwareAddr", Field, 0}, + {"Interface.Index", Field, 0}, + {"Interface.MTU", Field, 0}, + {"Interface.Name", Field, 0}, + {"InterfaceAddrs", Func, 0}, + {"InterfaceByIndex", Func, 0}, + {"InterfaceByName", Func, 0}, + {"Interfaces", Func, 0}, + {"InvalidAddrError", Type, 0}, + {"JoinHostPort", Func, 0}, + {"KeepAliveConfig", Type, 23}, + {"KeepAliveConfig.Count", Field, 23}, + {"KeepAliveConfig.Enable", Field, 23}, + {"KeepAliveConfig.Idle", Field, 23}, + {"KeepAliveConfig.Interval", Field, 23}, + {"Listen", Func, 0}, + {"ListenConfig", Type, 11}, + {"ListenConfig.Control", Field, 11}, + {"ListenConfig.KeepAlive", Field, 13}, + {"ListenConfig.KeepAliveConfig", Field, 23}, + {"ListenIP", Func, 0}, + {"ListenMulticastUDP", Func, 0}, + {"ListenPacket", Func, 0}, + {"ListenTCP", Func, 0}, + {"ListenUDP", Func, 0}, + {"ListenUnix", Func, 0}, + {"ListenUnixgram", Func, 0}, + {"Listener", Type, 0}, + {"LookupAddr", Func, 0}, + {"LookupCNAME", Func, 0}, + {"LookupHost", Func, 0}, + {"LookupIP", Func, 0}, + {"LookupMX", Func, 0}, + {"LookupNS", Func, 1}, + {"LookupPort", Func, 0}, + {"LookupSRV", Func, 0}, + {"LookupTXT", Func, 0}, + {"MX", Type, 0}, + {"MX.Host", Field, 0}, + {"MX.Pref", Field, 0}, + {"NS", Type, 1}, + {"NS.Host", Field, 1}, + {"OpError", Type, 0}, + {"OpError.Addr", Field, 0}, + {"OpError.Err", Field, 0}, + {"OpError.Net", Field, 0}, + {"OpError.Op", Field, 0}, + {"OpError.Source", Field, 5}, + {"PacketConn", Type, 0}, + {"ParseCIDR", Func, 0}, + {"ParseError", Type, 0}, + {"ParseError.Text", Field, 0}, + {"ParseError.Type", Field, 0}, + {"ParseIP", Func, 0}, + {"ParseMAC", Func, 0}, + {"Pipe", Func, 0}, + {"ResolveIPAddr", Func, 0}, + {"ResolveTCPAddr", Func, 0}, + {"ResolveUDPAddr", Func, 0}, + {"ResolveUnixAddr", Func, 0}, + {"Resolver", Type, 8}, + {"Resolver.Dial", Field, 9}, + {"Resolver.PreferGo", Field, 8}, + {"Resolver.StrictErrors", Field, 9}, + {"SRV", Type, 0}, + {"SRV.Port", Field, 0}, + {"SRV.Priority", Field, 0}, + {"SRV.Target", Field, 0}, + {"SRV.Weight", Field, 0}, + {"SplitHostPort", Func, 0}, + {"TCPAddr", Type, 0}, + {"TCPAddr.IP", Field, 0}, + {"TCPAddr.Port", Field, 0}, + {"TCPAddr.Zone", Field, 1}, + {"TCPAddrFromAddrPort", Func, 18}, + {"TCPConn", Type, 0}, + {"TCPListener", Type, 0}, + {"UDPAddr", Type, 0}, + {"UDPAddr.IP", Field, 0}, + {"UDPAddr.Port", Field, 0}, + {"UDPAddr.Zone", Field, 1}, + {"UDPAddrFromAddrPort", Func, 18}, + {"UDPConn", Type, 0}, + {"UnixAddr", Type, 0}, + {"UnixAddr.Name", Field, 0}, + {"UnixAddr.Net", Field, 0}, + {"UnixConn", Type, 0}, + {"UnixListener", Type, 0}, + {"UnknownNetworkError", Type, 0}, + }, + "net/http": { + {"(*Client).CloseIdleConnections", Method, 12}, + {"(*Client).Do", Method, 0}, + {"(*Client).Get", Method, 0}, + {"(*Client).Head", Method, 0}, + {"(*Client).Post", Method, 0}, + {"(*Client).PostForm", Method, 0}, + {"(*Cookie).String", Method, 0}, + {"(*Cookie).Valid", Method, 18}, + {"(*MaxBytesError).Error", Method, 19}, + {"(*ProtocolError).Error", Method, 0}, + {"(*ProtocolError).Is", Method, 21}, + {"(*Request).AddCookie", Method, 0}, + {"(*Request).BasicAuth", Method, 4}, + {"(*Request).Clone", Method, 13}, + {"(*Request).Context", Method, 7}, + {"(*Request).Cookie", Method, 0}, + {"(*Request).Cookies", Method, 0}, + {"(*Request).CookiesNamed", Method, 23}, + {"(*Request).FormFile", Method, 0}, + {"(*Request).FormValue", Method, 0}, + {"(*Request).MultipartReader", Method, 0}, + {"(*Request).ParseForm", Method, 0}, + {"(*Request).ParseMultipartForm", Method, 0}, + {"(*Request).PathValue", Method, 22}, + {"(*Request).PostFormValue", Method, 1}, + {"(*Request).ProtoAtLeast", Method, 0}, + {"(*Request).Referer", Method, 0}, + {"(*Request).SetBasicAuth", Method, 0}, + {"(*Request).SetPathValue", Method, 22}, + {"(*Request).UserAgent", Method, 0}, + {"(*Request).WithContext", Method, 7}, + {"(*Request).Write", Method, 0}, + {"(*Request).WriteProxy", Method, 0}, + {"(*Response).Cookies", Method, 0}, + {"(*Response).Location", Method, 0}, + {"(*Response).ProtoAtLeast", Method, 0}, + {"(*Response).Write", Method, 0}, + {"(*ResponseController).EnableFullDuplex", Method, 21}, + {"(*ResponseController).Flush", Method, 20}, + {"(*ResponseController).Hijack", Method, 20}, + {"(*ResponseController).SetReadDeadline", Method, 20}, + {"(*ResponseController).SetWriteDeadline", Method, 20}, + {"(*ServeMux).Handle", Method, 0}, + {"(*ServeMux).HandleFunc", Method, 0}, + {"(*ServeMux).Handler", Method, 1}, + {"(*ServeMux).ServeHTTP", Method, 0}, + {"(*Server).Close", Method, 8}, + {"(*Server).ListenAndServe", Method, 0}, + {"(*Server).ListenAndServeTLS", Method, 0}, + {"(*Server).RegisterOnShutdown", Method, 9}, + {"(*Server).Serve", Method, 0}, + {"(*Server).ServeTLS", Method, 9}, + {"(*Server).SetKeepAlivesEnabled", Method, 3}, + {"(*Server).Shutdown", Method, 8}, + {"(*Transport).CancelRequest", Method, 1}, + {"(*Transport).Clone", Method, 13}, + {"(*Transport).CloseIdleConnections", Method, 0}, + {"(*Transport).RegisterProtocol", Method, 0}, + {"(*Transport).RoundTrip", Method, 0}, + {"(ConnState).String", Method, 3}, + {"(Dir).Open", Method, 0}, + {"(HandlerFunc).ServeHTTP", Method, 0}, + {"(Header).Add", Method, 0}, + {"(Header).Clone", Method, 13}, + {"(Header).Del", Method, 0}, + {"(Header).Get", Method, 0}, + {"(Header).Set", Method, 0}, + {"(Header).Values", Method, 14}, + {"(Header).Write", Method, 0}, + {"(Header).WriteSubset", Method, 0}, + {"AllowQuerySemicolons", Func, 17}, + {"CanonicalHeaderKey", Func, 0}, + {"Client", Type, 0}, + {"Client.CheckRedirect", Field, 0}, + {"Client.Jar", Field, 0}, + {"Client.Timeout", Field, 3}, + {"Client.Transport", Field, 0}, + {"CloseNotifier", Type, 1}, + {"ConnState", Type, 3}, + {"Cookie", Type, 0}, + {"Cookie.Domain", Field, 0}, + {"Cookie.Expires", Field, 0}, + {"Cookie.HttpOnly", Field, 0}, + {"Cookie.MaxAge", Field, 0}, + {"Cookie.Name", Field, 0}, + {"Cookie.Partitioned", Field, 23}, + {"Cookie.Path", Field, 0}, + {"Cookie.Quoted", Field, 23}, + {"Cookie.Raw", Field, 0}, + {"Cookie.RawExpires", Field, 0}, + {"Cookie.SameSite", Field, 11}, + {"Cookie.Secure", Field, 0}, + {"Cookie.Unparsed", Field, 0}, + {"Cookie.Value", Field, 0}, + {"CookieJar", Type, 0}, + {"DefaultClient", Var, 0}, + {"DefaultMaxHeaderBytes", Const, 0}, + {"DefaultMaxIdleConnsPerHost", Const, 0}, + {"DefaultServeMux", Var, 0}, + {"DefaultTransport", Var, 0}, + {"DetectContentType", Func, 0}, + {"Dir", Type, 0}, + {"ErrAbortHandler", Var, 8}, + {"ErrBodyNotAllowed", Var, 0}, + {"ErrBodyReadAfterClose", Var, 0}, + {"ErrContentLength", Var, 0}, + {"ErrHandlerTimeout", Var, 0}, + {"ErrHeaderTooLong", Var, 0}, + {"ErrHijacked", Var, 0}, + {"ErrLineTooLong", Var, 0}, + {"ErrMissingBoundary", Var, 0}, + {"ErrMissingContentLength", Var, 0}, + {"ErrMissingFile", Var, 0}, + {"ErrNoCookie", Var, 0}, + {"ErrNoLocation", Var, 0}, + {"ErrNotMultipart", Var, 0}, + {"ErrNotSupported", Var, 0}, + {"ErrSchemeMismatch", Var, 21}, + {"ErrServerClosed", Var, 8}, + {"ErrShortBody", Var, 0}, + {"ErrSkipAltProtocol", Var, 6}, + {"ErrUnexpectedTrailer", Var, 0}, + {"ErrUseLastResponse", Var, 7}, + {"ErrWriteAfterFlush", Var, 0}, + {"Error", Func, 0}, + {"FS", Func, 16}, + {"File", Type, 0}, + {"FileServer", Func, 0}, + {"FileServerFS", Func, 22}, + {"FileSystem", Type, 0}, + {"Flusher", Type, 0}, + {"Get", Func, 0}, + {"Handle", Func, 0}, + {"HandleFunc", Func, 0}, + {"Handler", Type, 0}, + {"HandlerFunc", Type, 0}, + {"Head", Func, 0}, + {"Header", Type, 0}, + {"Hijacker", Type, 0}, + {"ListenAndServe", Func, 0}, + {"ListenAndServeTLS", Func, 0}, + {"LocalAddrContextKey", Var, 7}, + {"MaxBytesError", Type, 19}, + {"MaxBytesError.Limit", Field, 19}, + {"MaxBytesHandler", Func, 18}, + {"MaxBytesReader", Func, 0}, + {"MethodConnect", Const, 6}, + {"MethodDelete", Const, 6}, + {"MethodGet", Const, 6}, + {"MethodHead", Const, 6}, + {"MethodOptions", Const, 6}, + {"MethodPatch", Const, 6}, + {"MethodPost", Const, 6}, + {"MethodPut", Const, 6}, + {"MethodTrace", Const, 6}, + {"NewFileTransport", Func, 0}, + {"NewFileTransportFS", Func, 22}, + {"NewRequest", Func, 0}, + {"NewRequestWithContext", Func, 13}, + {"NewResponseController", Func, 20}, + {"NewServeMux", Func, 0}, + {"NoBody", Var, 8}, + {"NotFound", Func, 0}, + {"NotFoundHandler", Func, 0}, + {"ParseCookie", Func, 23}, + {"ParseHTTPVersion", Func, 0}, + {"ParseSetCookie", Func, 23}, + {"ParseTime", Func, 1}, + {"Post", Func, 0}, + {"PostForm", Func, 0}, + {"ProtocolError", Type, 0}, + {"ProtocolError.ErrorString", Field, 0}, + {"ProxyFromEnvironment", Func, 0}, + {"ProxyURL", Func, 0}, + {"PushOptions", Type, 8}, + {"PushOptions.Header", Field, 8}, + {"PushOptions.Method", Field, 8}, + {"Pusher", Type, 8}, + {"ReadRequest", Func, 0}, + {"ReadResponse", Func, 0}, + {"Redirect", Func, 0}, + {"RedirectHandler", Func, 0}, + {"Request", Type, 0}, + {"Request.Body", Field, 0}, + {"Request.Cancel", Field, 5}, + {"Request.Close", Field, 0}, + {"Request.ContentLength", Field, 0}, + {"Request.Form", Field, 0}, + {"Request.GetBody", Field, 8}, + {"Request.Header", Field, 0}, + {"Request.Host", Field, 0}, + {"Request.Method", Field, 0}, + {"Request.MultipartForm", Field, 0}, + {"Request.Pattern", Field, 23}, + {"Request.PostForm", Field, 1}, + {"Request.Proto", Field, 0}, + {"Request.ProtoMajor", Field, 0}, + {"Request.ProtoMinor", Field, 0}, + {"Request.RemoteAddr", Field, 0}, + {"Request.RequestURI", Field, 0}, + {"Request.Response", Field, 7}, + {"Request.TLS", Field, 0}, + {"Request.Trailer", Field, 0}, + {"Request.TransferEncoding", Field, 0}, + {"Request.URL", Field, 0}, + {"Response", Type, 0}, + {"Response.Body", Field, 0}, + {"Response.Close", Field, 0}, + {"Response.ContentLength", Field, 0}, + {"Response.Header", Field, 0}, + {"Response.Proto", Field, 0}, + {"Response.ProtoMajor", Field, 0}, + {"Response.ProtoMinor", Field, 0}, + {"Response.Request", Field, 0}, + {"Response.Status", Field, 0}, + {"Response.StatusCode", Field, 0}, + {"Response.TLS", Field, 3}, + {"Response.Trailer", Field, 0}, + {"Response.TransferEncoding", Field, 0}, + {"Response.Uncompressed", Field, 7}, + {"ResponseController", Type, 20}, + {"ResponseWriter", Type, 0}, + {"RoundTripper", Type, 0}, + {"SameSite", Type, 11}, + {"SameSiteDefaultMode", Const, 11}, + {"SameSiteLaxMode", Const, 11}, + {"SameSiteNoneMode", Const, 13}, + {"SameSiteStrictMode", Const, 11}, + {"Serve", Func, 0}, + {"ServeContent", Func, 0}, + {"ServeFile", Func, 0}, + {"ServeFileFS", Func, 22}, + {"ServeMux", Type, 0}, + {"ServeTLS", Func, 9}, + {"Server", Type, 0}, + {"Server.Addr", Field, 0}, + {"Server.BaseContext", Field, 13}, + {"Server.ConnContext", Field, 13}, + {"Server.ConnState", Field, 3}, + {"Server.DisableGeneralOptionsHandler", Field, 20}, + {"Server.ErrorLog", Field, 3}, + {"Server.Handler", Field, 0}, + {"Server.IdleTimeout", Field, 8}, + {"Server.MaxHeaderBytes", Field, 0}, + {"Server.ReadHeaderTimeout", Field, 8}, + {"Server.ReadTimeout", Field, 0}, + {"Server.TLSConfig", Field, 0}, + {"Server.TLSNextProto", Field, 1}, + {"Server.WriteTimeout", Field, 0}, + {"ServerContextKey", Var, 7}, + {"SetCookie", Func, 0}, + {"StateActive", Const, 3}, + {"StateClosed", Const, 3}, + {"StateHijacked", Const, 3}, + {"StateIdle", Const, 3}, + {"StateNew", Const, 3}, + {"StatusAccepted", Const, 0}, + {"StatusAlreadyReported", Const, 7}, + {"StatusBadGateway", Const, 0}, + {"StatusBadRequest", Const, 0}, + {"StatusConflict", Const, 0}, + {"StatusContinue", Const, 0}, + {"StatusCreated", Const, 0}, + {"StatusEarlyHints", Const, 13}, + {"StatusExpectationFailed", Const, 0}, + {"StatusFailedDependency", Const, 7}, + {"StatusForbidden", Const, 0}, + {"StatusFound", Const, 0}, + {"StatusGatewayTimeout", Const, 0}, + {"StatusGone", Const, 0}, + {"StatusHTTPVersionNotSupported", Const, 0}, + {"StatusIMUsed", Const, 7}, + {"StatusInsufficientStorage", Const, 7}, + {"StatusInternalServerError", Const, 0}, + {"StatusLengthRequired", Const, 0}, + {"StatusLocked", Const, 7}, + {"StatusLoopDetected", Const, 7}, + {"StatusMethodNotAllowed", Const, 0}, + {"StatusMisdirectedRequest", Const, 11}, + {"StatusMovedPermanently", Const, 0}, + {"StatusMultiStatus", Const, 7}, + {"StatusMultipleChoices", Const, 0}, + {"StatusNetworkAuthenticationRequired", Const, 6}, + {"StatusNoContent", Const, 0}, + {"StatusNonAuthoritativeInfo", Const, 0}, + {"StatusNotAcceptable", Const, 0}, + {"StatusNotExtended", Const, 7}, + {"StatusNotFound", Const, 0}, + {"StatusNotImplemented", Const, 0}, + {"StatusNotModified", Const, 0}, + {"StatusOK", Const, 0}, + {"StatusPartialContent", Const, 0}, + {"StatusPaymentRequired", Const, 0}, + {"StatusPermanentRedirect", Const, 7}, + {"StatusPreconditionFailed", Const, 0}, + {"StatusPreconditionRequired", Const, 6}, + {"StatusProcessing", Const, 7}, + {"StatusProxyAuthRequired", Const, 0}, + {"StatusRequestEntityTooLarge", Const, 0}, + {"StatusRequestHeaderFieldsTooLarge", Const, 6}, + {"StatusRequestTimeout", Const, 0}, + {"StatusRequestURITooLong", Const, 0}, + {"StatusRequestedRangeNotSatisfiable", Const, 0}, + {"StatusResetContent", Const, 0}, + {"StatusSeeOther", Const, 0}, + {"StatusServiceUnavailable", Const, 0}, + {"StatusSwitchingProtocols", Const, 0}, + {"StatusTeapot", Const, 0}, + {"StatusTemporaryRedirect", Const, 0}, + {"StatusText", Func, 0}, + {"StatusTooEarly", Const, 12}, + {"StatusTooManyRequests", Const, 6}, + {"StatusUnauthorized", Const, 0}, + {"StatusUnavailableForLegalReasons", Const, 6}, + {"StatusUnprocessableEntity", Const, 7}, + {"StatusUnsupportedMediaType", Const, 0}, + {"StatusUpgradeRequired", Const, 7}, + {"StatusUseProxy", Const, 0}, + {"StatusVariantAlsoNegotiates", Const, 7}, + {"StripPrefix", Func, 0}, + {"TimeFormat", Const, 0}, + {"TimeoutHandler", Func, 0}, + {"TrailerPrefix", Const, 8}, + {"Transport", Type, 0}, + {"Transport.Dial", Field, 0}, + {"Transport.DialContext", Field, 7}, + {"Transport.DialTLS", Field, 4}, + {"Transport.DialTLSContext", Field, 14}, + {"Transport.DisableCompression", Field, 0}, + {"Transport.DisableKeepAlives", Field, 0}, + {"Transport.ExpectContinueTimeout", Field, 6}, + {"Transport.ForceAttemptHTTP2", Field, 13}, + {"Transport.GetProxyConnectHeader", Field, 16}, + {"Transport.IdleConnTimeout", Field, 7}, + {"Transport.MaxConnsPerHost", Field, 11}, + {"Transport.MaxIdleConns", Field, 7}, + {"Transport.MaxIdleConnsPerHost", Field, 0}, + {"Transport.MaxResponseHeaderBytes", Field, 7}, + {"Transport.OnProxyConnectResponse", Field, 20}, + {"Transport.Proxy", Field, 0}, + {"Transport.ProxyConnectHeader", Field, 8}, + {"Transport.ReadBufferSize", Field, 13}, + {"Transport.ResponseHeaderTimeout", Field, 1}, + {"Transport.TLSClientConfig", Field, 0}, + {"Transport.TLSHandshakeTimeout", Field, 3}, + {"Transport.TLSNextProto", Field, 6}, + {"Transport.WriteBufferSize", Field, 13}, + }, + "net/http/cgi": { + {"(*Handler).ServeHTTP", Method, 0}, + {"Handler", Type, 0}, + {"Handler.Args", Field, 0}, + {"Handler.Dir", Field, 0}, + {"Handler.Env", Field, 0}, + {"Handler.InheritEnv", Field, 0}, + {"Handler.Logger", Field, 0}, + {"Handler.Path", Field, 0}, + {"Handler.PathLocationHandler", Field, 0}, + {"Handler.Root", Field, 0}, + {"Handler.Stderr", Field, 7}, + {"Request", Func, 0}, + {"RequestFromMap", Func, 0}, + {"Serve", Func, 0}, + }, + "net/http/cookiejar": { + {"(*Jar).Cookies", Method, 1}, + {"(*Jar).SetCookies", Method, 1}, + {"Jar", Type, 1}, + {"New", Func, 1}, + {"Options", Type, 1}, + {"Options.PublicSuffixList", Field, 1}, + {"PublicSuffixList", Type, 1}, + }, + "net/http/fcgi": { + {"ErrConnClosed", Var, 5}, + {"ErrRequestAborted", Var, 5}, + {"ProcessEnv", Func, 9}, + {"Serve", Func, 0}, + }, + "net/http/httptest": { + {"(*ResponseRecorder).Flush", Method, 0}, + {"(*ResponseRecorder).Header", Method, 0}, + {"(*ResponseRecorder).Result", Method, 7}, + {"(*ResponseRecorder).Write", Method, 0}, + {"(*ResponseRecorder).WriteHeader", Method, 0}, + {"(*ResponseRecorder).WriteString", Method, 6}, + {"(*Server).Certificate", Method, 9}, + {"(*Server).Client", Method, 9}, + {"(*Server).Close", Method, 0}, + {"(*Server).CloseClientConnections", Method, 0}, + {"(*Server).Start", Method, 0}, + {"(*Server).StartTLS", Method, 0}, + {"DefaultRemoteAddr", Const, 0}, + {"NewRecorder", Func, 0}, + {"NewRequest", Func, 7}, + {"NewRequestWithContext", Func, 23}, + {"NewServer", Func, 0}, + {"NewTLSServer", Func, 0}, + {"NewUnstartedServer", Func, 0}, + {"ResponseRecorder", Type, 0}, + {"ResponseRecorder.Body", Field, 0}, + {"ResponseRecorder.Code", Field, 0}, + {"ResponseRecorder.Flushed", Field, 0}, + {"ResponseRecorder.HeaderMap", Field, 0}, + {"Server", Type, 0}, + {"Server.Config", Field, 0}, + {"Server.EnableHTTP2", Field, 14}, + {"Server.Listener", Field, 0}, + {"Server.TLS", Field, 0}, + {"Server.URL", Field, 0}, + }, + "net/http/httptrace": { + {"ClientTrace", Type, 7}, + {"ClientTrace.ConnectDone", Field, 7}, + {"ClientTrace.ConnectStart", Field, 7}, + {"ClientTrace.DNSDone", Field, 7}, + {"ClientTrace.DNSStart", Field, 7}, + {"ClientTrace.GetConn", Field, 7}, + {"ClientTrace.Got100Continue", Field, 7}, + {"ClientTrace.Got1xxResponse", Field, 11}, + {"ClientTrace.GotConn", Field, 7}, + {"ClientTrace.GotFirstResponseByte", Field, 7}, + {"ClientTrace.PutIdleConn", Field, 7}, + {"ClientTrace.TLSHandshakeDone", Field, 8}, + {"ClientTrace.TLSHandshakeStart", Field, 8}, + {"ClientTrace.Wait100Continue", Field, 7}, + {"ClientTrace.WroteHeaderField", Field, 11}, + {"ClientTrace.WroteHeaders", Field, 7}, + {"ClientTrace.WroteRequest", Field, 7}, + {"ContextClientTrace", Func, 7}, + {"DNSDoneInfo", Type, 7}, + {"DNSDoneInfo.Addrs", Field, 7}, + {"DNSDoneInfo.Coalesced", Field, 7}, + {"DNSDoneInfo.Err", Field, 7}, + {"DNSStartInfo", Type, 7}, + {"DNSStartInfo.Host", Field, 7}, + {"GotConnInfo", Type, 7}, + {"GotConnInfo.Conn", Field, 7}, + {"GotConnInfo.IdleTime", Field, 7}, + {"GotConnInfo.Reused", Field, 7}, + {"GotConnInfo.WasIdle", Field, 7}, + {"WithClientTrace", Func, 7}, + {"WroteRequestInfo", Type, 7}, + {"WroteRequestInfo.Err", Field, 7}, + }, + "net/http/httputil": { + {"(*ClientConn).Close", Method, 0}, + {"(*ClientConn).Do", Method, 0}, + {"(*ClientConn).Hijack", Method, 0}, + {"(*ClientConn).Pending", Method, 0}, + {"(*ClientConn).Read", Method, 0}, + {"(*ClientConn).Write", Method, 0}, + {"(*ProxyRequest).SetURL", Method, 20}, + {"(*ProxyRequest).SetXForwarded", Method, 20}, + {"(*ReverseProxy).ServeHTTP", Method, 0}, + {"(*ServerConn).Close", Method, 0}, + {"(*ServerConn).Hijack", Method, 0}, + {"(*ServerConn).Pending", Method, 0}, + {"(*ServerConn).Read", Method, 0}, + {"(*ServerConn).Write", Method, 0}, + {"BufferPool", Type, 6}, + {"ClientConn", Type, 0}, + {"DumpRequest", Func, 0}, + {"DumpRequestOut", Func, 0}, + {"DumpResponse", Func, 0}, + {"ErrClosed", Var, 0}, + {"ErrLineTooLong", Var, 0}, + {"ErrPersistEOF", Var, 0}, + {"ErrPipeline", Var, 0}, + {"NewChunkedReader", Func, 0}, + {"NewChunkedWriter", Func, 0}, + {"NewClientConn", Func, 0}, + {"NewProxyClientConn", Func, 0}, + {"NewServerConn", Func, 0}, + {"NewSingleHostReverseProxy", Func, 0}, + {"ProxyRequest", Type, 20}, + {"ProxyRequest.In", Field, 20}, + {"ProxyRequest.Out", Field, 20}, + {"ReverseProxy", Type, 0}, + {"ReverseProxy.BufferPool", Field, 6}, + {"ReverseProxy.Director", Field, 0}, + {"ReverseProxy.ErrorHandler", Field, 11}, + {"ReverseProxy.ErrorLog", Field, 4}, + {"ReverseProxy.FlushInterval", Field, 0}, + {"ReverseProxy.ModifyResponse", Field, 8}, + {"ReverseProxy.Rewrite", Field, 20}, + {"ReverseProxy.Transport", Field, 0}, + {"ServerConn", Type, 0}, + }, + "net/http/pprof": { + {"Cmdline", Func, 0}, + {"Handler", Func, 0}, + {"Index", Func, 0}, + {"Profile", Func, 0}, + {"Symbol", Func, 0}, + {"Trace", Func, 5}, + }, + "net/mail": { + {"(*Address).String", Method, 0}, + {"(*AddressParser).Parse", Method, 5}, + {"(*AddressParser).ParseList", Method, 5}, + {"(Header).AddressList", Method, 0}, + {"(Header).Date", Method, 0}, + {"(Header).Get", Method, 0}, + {"Address", Type, 0}, + {"Address.Address", Field, 0}, + {"Address.Name", Field, 0}, + {"AddressParser", Type, 5}, + {"AddressParser.WordDecoder", Field, 5}, + {"ErrHeaderNotPresent", Var, 0}, + {"Header", Type, 0}, + {"Message", Type, 0}, + {"Message.Body", Field, 0}, + {"Message.Header", Field, 0}, + {"ParseAddress", Func, 1}, + {"ParseAddressList", Func, 1}, + {"ParseDate", Func, 8}, + {"ReadMessage", Func, 0}, + }, + "net/netip": { + {"(*Addr).UnmarshalBinary", Method, 18}, + {"(*Addr).UnmarshalText", Method, 18}, + {"(*AddrPort).UnmarshalBinary", Method, 18}, + {"(*AddrPort).UnmarshalText", Method, 18}, + {"(*Prefix).UnmarshalBinary", Method, 18}, + {"(*Prefix).UnmarshalText", Method, 18}, + {"(Addr).AppendTo", Method, 18}, + {"(Addr).As16", Method, 18}, + {"(Addr).As4", Method, 18}, + {"(Addr).AsSlice", Method, 18}, + {"(Addr).BitLen", Method, 18}, + {"(Addr).Compare", Method, 18}, + {"(Addr).Is4", Method, 18}, + {"(Addr).Is4In6", Method, 18}, + {"(Addr).Is6", Method, 18}, + {"(Addr).IsGlobalUnicast", Method, 18}, + {"(Addr).IsInterfaceLocalMulticast", Method, 18}, + {"(Addr).IsLinkLocalMulticast", Method, 18}, + {"(Addr).IsLinkLocalUnicast", Method, 18}, + {"(Addr).IsLoopback", Method, 18}, + {"(Addr).IsMulticast", Method, 18}, + {"(Addr).IsPrivate", Method, 18}, + {"(Addr).IsUnspecified", Method, 18}, + {"(Addr).IsValid", Method, 18}, + {"(Addr).Less", Method, 18}, + {"(Addr).MarshalBinary", Method, 18}, + {"(Addr).MarshalText", Method, 18}, + {"(Addr).Next", Method, 18}, + {"(Addr).Prefix", Method, 18}, + {"(Addr).Prev", Method, 18}, + {"(Addr).String", Method, 18}, + {"(Addr).StringExpanded", Method, 18}, + {"(Addr).Unmap", Method, 18}, + {"(Addr).WithZone", Method, 18}, + {"(Addr).Zone", Method, 18}, + {"(AddrPort).Addr", Method, 18}, + {"(AddrPort).AppendTo", Method, 18}, + {"(AddrPort).Compare", Method, 22}, + {"(AddrPort).IsValid", Method, 18}, + {"(AddrPort).MarshalBinary", Method, 18}, + {"(AddrPort).MarshalText", Method, 18}, + {"(AddrPort).Port", Method, 18}, + {"(AddrPort).String", Method, 18}, + {"(Prefix).Addr", Method, 18}, + {"(Prefix).AppendTo", Method, 18}, + {"(Prefix).Bits", Method, 18}, + {"(Prefix).Contains", Method, 18}, + {"(Prefix).IsSingleIP", Method, 18}, + {"(Prefix).IsValid", Method, 18}, + {"(Prefix).MarshalBinary", Method, 18}, + {"(Prefix).MarshalText", Method, 18}, + {"(Prefix).Masked", Method, 18}, + {"(Prefix).Overlaps", Method, 18}, + {"(Prefix).String", Method, 18}, + {"Addr", Type, 18}, + {"AddrFrom16", Func, 18}, + {"AddrFrom4", Func, 18}, + {"AddrFromSlice", Func, 18}, + {"AddrPort", Type, 18}, + {"AddrPortFrom", Func, 18}, + {"IPv4Unspecified", Func, 18}, + {"IPv6LinkLocalAllNodes", Func, 18}, + {"IPv6LinkLocalAllRouters", Func, 20}, + {"IPv6Loopback", Func, 20}, + {"IPv6Unspecified", Func, 18}, + {"MustParseAddr", Func, 18}, + {"MustParseAddrPort", Func, 18}, + {"MustParsePrefix", Func, 18}, + {"ParseAddr", Func, 18}, + {"ParseAddrPort", Func, 18}, + {"ParsePrefix", Func, 18}, + {"Prefix", Type, 18}, + {"PrefixFrom", Func, 18}, + }, + "net/rpc": { + {"(*Client).Call", Method, 0}, + {"(*Client).Close", Method, 0}, + {"(*Client).Go", Method, 0}, + {"(*Server).Accept", Method, 0}, + {"(*Server).HandleHTTP", Method, 0}, + {"(*Server).Register", Method, 0}, + {"(*Server).RegisterName", Method, 0}, + {"(*Server).ServeCodec", Method, 0}, + {"(*Server).ServeConn", Method, 0}, + {"(*Server).ServeHTTP", Method, 0}, + {"(*Server).ServeRequest", Method, 0}, + {"(ServerError).Error", Method, 0}, + {"Accept", Func, 0}, + {"Call", Type, 0}, + {"Call.Args", Field, 0}, + {"Call.Done", Field, 0}, + {"Call.Error", Field, 0}, + {"Call.Reply", Field, 0}, + {"Call.ServiceMethod", Field, 0}, + {"Client", Type, 0}, + {"ClientCodec", Type, 0}, + {"DefaultDebugPath", Const, 0}, + {"DefaultRPCPath", Const, 0}, + {"DefaultServer", Var, 0}, + {"Dial", Func, 0}, + {"DialHTTP", Func, 0}, + {"DialHTTPPath", Func, 0}, + {"ErrShutdown", Var, 0}, + {"HandleHTTP", Func, 0}, + {"NewClient", Func, 0}, + {"NewClientWithCodec", Func, 0}, + {"NewServer", Func, 0}, + {"Register", Func, 0}, + {"RegisterName", Func, 0}, + {"Request", Type, 0}, + {"Request.Seq", Field, 0}, + {"Request.ServiceMethod", Field, 0}, + {"Response", Type, 0}, + {"Response.Error", Field, 0}, + {"Response.Seq", Field, 0}, + {"Response.ServiceMethod", Field, 0}, + {"ServeCodec", Func, 0}, + {"ServeConn", Func, 0}, + {"ServeRequest", Func, 0}, + {"Server", Type, 0}, + {"ServerCodec", Type, 0}, + {"ServerError", Type, 0}, + }, + "net/rpc/jsonrpc": { + {"Dial", Func, 0}, + {"NewClient", Func, 0}, + {"NewClientCodec", Func, 0}, + {"NewServerCodec", Func, 0}, + {"ServeConn", Func, 0}, + }, + "net/smtp": { + {"(*Client).Auth", Method, 0}, + {"(*Client).Close", Method, 2}, + {"(*Client).Data", Method, 0}, + {"(*Client).Extension", Method, 0}, + {"(*Client).Hello", Method, 1}, + {"(*Client).Mail", Method, 0}, + {"(*Client).Noop", Method, 10}, + {"(*Client).Quit", Method, 0}, + {"(*Client).Rcpt", Method, 0}, + {"(*Client).Reset", Method, 0}, + {"(*Client).StartTLS", Method, 0}, + {"(*Client).TLSConnectionState", Method, 5}, + {"(*Client).Verify", Method, 0}, + {"Auth", Type, 0}, + {"CRAMMD5Auth", Func, 0}, + {"Client", Type, 0}, + {"Client.Text", Field, 0}, + {"Dial", Func, 0}, + {"NewClient", Func, 0}, + {"PlainAuth", Func, 0}, + {"SendMail", Func, 0}, + {"ServerInfo", Type, 0}, + {"ServerInfo.Auth", Field, 0}, + {"ServerInfo.Name", Field, 0}, + {"ServerInfo.TLS", Field, 0}, + }, + "net/textproto": { + {"(*Conn).Close", Method, 0}, + {"(*Conn).Cmd", Method, 0}, + {"(*Conn).DotReader", Method, 0}, + {"(*Conn).DotWriter", Method, 0}, + {"(*Conn).EndRequest", Method, 0}, + {"(*Conn).EndResponse", Method, 0}, + {"(*Conn).Next", Method, 0}, + {"(*Conn).PrintfLine", Method, 0}, + {"(*Conn).ReadCodeLine", Method, 0}, + {"(*Conn).ReadContinuedLine", Method, 0}, + {"(*Conn).ReadContinuedLineBytes", Method, 0}, + {"(*Conn).ReadDotBytes", Method, 0}, + {"(*Conn).ReadDotLines", Method, 0}, + {"(*Conn).ReadLine", Method, 0}, + {"(*Conn).ReadLineBytes", Method, 0}, + {"(*Conn).ReadMIMEHeader", Method, 0}, + {"(*Conn).ReadResponse", Method, 0}, + {"(*Conn).StartRequest", Method, 0}, + {"(*Conn).StartResponse", Method, 0}, + {"(*Error).Error", Method, 0}, + {"(*Pipeline).EndRequest", Method, 0}, + {"(*Pipeline).EndResponse", Method, 0}, + {"(*Pipeline).Next", Method, 0}, + {"(*Pipeline).StartRequest", Method, 0}, + {"(*Pipeline).StartResponse", Method, 0}, + {"(*Reader).DotReader", Method, 0}, + {"(*Reader).ReadCodeLine", Method, 0}, + {"(*Reader).ReadContinuedLine", Method, 0}, + {"(*Reader).ReadContinuedLineBytes", Method, 0}, + {"(*Reader).ReadDotBytes", Method, 0}, + {"(*Reader).ReadDotLines", Method, 0}, + {"(*Reader).ReadLine", Method, 0}, + {"(*Reader).ReadLineBytes", Method, 0}, + {"(*Reader).ReadMIMEHeader", Method, 0}, + {"(*Reader).ReadResponse", Method, 0}, + {"(*Writer).DotWriter", Method, 0}, + {"(*Writer).PrintfLine", Method, 0}, + {"(MIMEHeader).Add", Method, 0}, + {"(MIMEHeader).Del", Method, 0}, + {"(MIMEHeader).Get", Method, 0}, + {"(MIMEHeader).Set", Method, 0}, + {"(MIMEHeader).Values", Method, 14}, + {"(ProtocolError).Error", Method, 0}, + {"CanonicalMIMEHeaderKey", Func, 0}, + {"Conn", Type, 0}, + {"Conn.Pipeline", Field, 0}, + {"Conn.Reader", Field, 0}, + {"Conn.Writer", Field, 0}, + {"Dial", Func, 0}, + {"Error", Type, 0}, + {"Error.Code", Field, 0}, + {"Error.Msg", Field, 0}, + {"MIMEHeader", Type, 0}, + {"NewConn", Func, 0}, + {"NewReader", Func, 0}, + {"NewWriter", Func, 0}, + {"Pipeline", Type, 0}, + {"ProtocolError", Type, 0}, + {"Reader", Type, 0}, + {"Reader.R", Field, 0}, + {"TrimBytes", Func, 1}, + {"TrimString", Func, 1}, + {"Writer", Type, 0}, + {"Writer.W", Field, 0}, + }, + "net/url": { + {"(*Error).Error", Method, 0}, + {"(*Error).Temporary", Method, 6}, + {"(*Error).Timeout", Method, 6}, + {"(*Error).Unwrap", Method, 13}, + {"(*URL).EscapedFragment", Method, 15}, + {"(*URL).EscapedPath", Method, 5}, + {"(*URL).Hostname", Method, 8}, + {"(*URL).IsAbs", Method, 0}, + {"(*URL).JoinPath", Method, 19}, + {"(*URL).MarshalBinary", Method, 8}, + {"(*URL).Parse", Method, 0}, + {"(*URL).Port", Method, 8}, + {"(*URL).Query", Method, 0}, + {"(*URL).Redacted", Method, 15}, + {"(*URL).RequestURI", Method, 0}, + {"(*URL).ResolveReference", Method, 0}, + {"(*URL).String", Method, 0}, + {"(*URL).UnmarshalBinary", Method, 8}, + {"(*Userinfo).Password", Method, 0}, + {"(*Userinfo).String", Method, 0}, + {"(*Userinfo).Username", Method, 0}, + {"(EscapeError).Error", Method, 0}, + {"(InvalidHostError).Error", Method, 6}, + {"(Values).Add", Method, 0}, + {"(Values).Del", Method, 0}, + {"(Values).Encode", Method, 0}, + {"(Values).Get", Method, 0}, + {"(Values).Has", Method, 17}, + {"(Values).Set", Method, 0}, + {"Error", Type, 0}, + {"Error.Err", Field, 0}, + {"Error.Op", Field, 0}, + {"Error.URL", Field, 0}, + {"EscapeError", Type, 0}, + {"InvalidHostError", Type, 6}, + {"JoinPath", Func, 19}, + {"Parse", Func, 0}, + {"ParseQuery", Func, 0}, + {"ParseRequestURI", Func, 0}, + {"PathEscape", Func, 8}, + {"PathUnescape", Func, 8}, + {"QueryEscape", Func, 0}, + {"QueryUnescape", Func, 0}, + {"URL", Type, 0}, + {"URL.ForceQuery", Field, 7}, + {"URL.Fragment", Field, 0}, + {"URL.Host", Field, 0}, + {"URL.OmitHost", Field, 19}, + {"URL.Opaque", Field, 0}, + {"URL.Path", Field, 0}, + {"URL.RawFragment", Field, 15}, + {"URL.RawPath", Field, 5}, + {"URL.RawQuery", Field, 0}, + {"URL.Scheme", Field, 0}, + {"URL.User", Field, 0}, + {"User", Func, 0}, + {"UserPassword", Func, 0}, + {"Userinfo", Type, 0}, + {"Values", Type, 0}, + }, + "os": { + {"(*File).Chdir", Method, 0}, + {"(*File).Chmod", Method, 0}, + {"(*File).Chown", Method, 0}, + {"(*File).Close", Method, 0}, + {"(*File).Fd", Method, 0}, + {"(*File).Name", Method, 0}, + {"(*File).Read", Method, 0}, + {"(*File).ReadAt", Method, 0}, + {"(*File).ReadDir", Method, 16}, + {"(*File).ReadFrom", Method, 15}, + {"(*File).Readdir", Method, 0}, + {"(*File).Readdirnames", Method, 0}, + {"(*File).Seek", Method, 0}, + {"(*File).SetDeadline", Method, 10}, + {"(*File).SetReadDeadline", Method, 10}, + {"(*File).SetWriteDeadline", Method, 10}, + {"(*File).Stat", Method, 0}, + {"(*File).Sync", Method, 0}, + {"(*File).SyscallConn", Method, 12}, + {"(*File).Truncate", Method, 0}, + {"(*File).Write", Method, 0}, + {"(*File).WriteAt", Method, 0}, + {"(*File).WriteString", Method, 0}, + {"(*File).WriteTo", Method, 22}, + {"(*LinkError).Error", Method, 0}, + {"(*LinkError).Unwrap", Method, 13}, + {"(*PathError).Error", Method, 0}, + {"(*PathError).Timeout", Method, 10}, + {"(*PathError).Unwrap", Method, 13}, + {"(*Process).Kill", Method, 0}, + {"(*Process).Release", Method, 0}, + {"(*Process).Signal", Method, 0}, + {"(*Process).Wait", Method, 0}, + {"(*ProcessState).ExitCode", Method, 12}, + {"(*ProcessState).Exited", Method, 0}, + {"(*ProcessState).Pid", Method, 0}, + {"(*ProcessState).String", Method, 0}, + {"(*ProcessState).Success", Method, 0}, + {"(*ProcessState).Sys", Method, 0}, + {"(*ProcessState).SysUsage", Method, 0}, + {"(*ProcessState).SystemTime", Method, 0}, + {"(*ProcessState).UserTime", Method, 0}, + {"(*SyscallError).Error", Method, 0}, + {"(*SyscallError).Timeout", Method, 10}, + {"(*SyscallError).Unwrap", Method, 13}, + {"(FileMode).IsDir", Method, 0}, + {"(FileMode).IsRegular", Method, 1}, + {"(FileMode).Perm", Method, 0}, + {"(FileMode).String", Method, 0}, + {"Args", Var, 0}, + {"Chdir", Func, 0}, + {"Chmod", Func, 0}, + {"Chown", Func, 0}, + {"Chtimes", Func, 0}, + {"Clearenv", Func, 0}, + {"CopyFS", Func, 23}, + {"Create", Func, 0}, + {"CreateTemp", Func, 16}, + {"DevNull", Const, 0}, + {"DirEntry", Type, 16}, + {"DirFS", Func, 16}, + {"Environ", Func, 0}, + {"ErrClosed", Var, 8}, + {"ErrDeadlineExceeded", Var, 15}, + {"ErrExist", Var, 0}, + {"ErrInvalid", Var, 0}, + {"ErrNoDeadline", Var, 10}, + {"ErrNotExist", Var, 0}, + {"ErrPermission", Var, 0}, + {"ErrProcessDone", Var, 16}, + {"Executable", Func, 8}, + {"Exit", Func, 0}, + {"Expand", Func, 0}, + {"ExpandEnv", Func, 0}, + {"File", Type, 0}, + {"FileInfo", Type, 0}, + {"FileMode", Type, 0}, + {"FindProcess", Func, 0}, + {"Getegid", Func, 0}, + {"Getenv", Func, 0}, + {"Geteuid", Func, 0}, + {"Getgid", Func, 0}, + {"Getgroups", Func, 0}, + {"Getpagesize", Func, 0}, + {"Getpid", Func, 0}, + {"Getppid", Func, 0}, + {"Getuid", Func, 0}, + {"Getwd", Func, 0}, + {"Hostname", Func, 0}, + {"Interrupt", Var, 0}, + {"IsExist", Func, 0}, + {"IsNotExist", Func, 0}, + {"IsPathSeparator", Func, 0}, + {"IsPermission", Func, 0}, + {"IsTimeout", Func, 10}, + {"Kill", Var, 0}, + {"Lchown", Func, 0}, + {"Link", Func, 0}, + {"LinkError", Type, 0}, + {"LinkError.Err", Field, 0}, + {"LinkError.New", Field, 0}, + {"LinkError.Old", Field, 0}, + {"LinkError.Op", Field, 0}, + {"LookupEnv", Func, 5}, + {"Lstat", Func, 0}, + {"Mkdir", Func, 0}, + {"MkdirAll", Func, 0}, + {"MkdirTemp", Func, 16}, + {"ModeAppend", Const, 0}, + {"ModeCharDevice", Const, 0}, + {"ModeDevice", Const, 0}, + {"ModeDir", Const, 0}, + {"ModeExclusive", Const, 0}, + {"ModeIrregular", Const, 11}, + {"ModeNamedPipe", Const, 0}, + {"ModePerm", Const, 0}, + {"ModeSetgid", Const, 0}, + {"ModeSetuid", Const, 0}, + {"ModeSocket", Const, 0}, + {"ModeSticky", Const, 0}, + {"ModeSymlink", Const, 0}, + {"ModeTemporary", Const, 0}, + {"ModeType", Const, 0}, + {"NewFile", Func, 0}, + {"NewSyscallError", Func, 0}, + {"O_APPEND", Const, 0}, + {"O_CREATE", Const, 0}, + {"O_EXCL", Const, 0}, + {"O_RDONLY", Const, 0}, + {"O_RDWR", Const, 0}, + {"O_SYNC", Const, 0}, + {"O_TRUNC", Const, 0}, + {"O_WRONLY", Const, 0}, + {"Open", Func, 0}, + {"OpenFile", Func, 0}, + {"PathError", Type, 0}, + {"PathError.Err", Field, 0}, + {"PathError.Op", Field, 0}, + {"PathError.Path", Field, 0}, + {"PathListSeparator", Const, 0}, + {"PathSeparator", Const, 0}, + {"Pipe", Func, 0}, + {"ProcAttr", Type, 0}, + {"ProcAttr.Dir", Field, 0}, + {"ProcAttr.Env", Field, 0}, + {"ProcAttr.Files", Field, 0}, + {"ProcAttr.Sys", Field, 0}, + {"Process", Type, 0}, + {"Process.Pid", Field, 0}, + {"ProcessState", Type, 0}, + {"ReadDir", Func, 16}, + {"ReadFile", Func, 16}, + {"Readlink", Func, 0}, + {"Remove", Func, 0}, + {"RemoveAll", Func, 0}, + {"Rename", Func, 0}, + {"SEEK_CUR", Const, 0}, + {"SEEK_END", Const, 0}, + {"SEEK_SET", Const, 0}, + {"SameFile", Func, 0}, + {"Setenv", Func, 0}, + {"Signal", Type, 0}, + {"StartProcess", Func, 0}, + {"Stat", Func, 0}, + {"Stderr", Var, 0}, + {"Stdin", Var, 0}, + {"Stdout", Var, 0}, + {"Symlink", Func, 0}, + {"SyscallError", Type, 0}, + {"SyscallError.Err", Field, 0}, + {"SyscallError.Syscall", Field, 0}, + {"TempDir", Func, 0}, + {"Truncate", Func, 0}, + {"Unsetenv", Func, 4}, + {"UserCacheDir", Func, 11}, + {"UserConfigDir", Func, 13}, + {"UserHomeDir", Func, 12}, + {"WriteFile", Func, 16}, + }, + "os/exec": { + {"(*Cmd).CombinedOutput", Method, 0}, + {"(*Cmd).Environ", Method, 19}, + {"(*Cmd).Output", Method, 0}, + {"(*Cmd).Run", Method, 0}, + {"(*Cmd).Start", Method, 0}, + {"(*Cmd).StderrPipe", Method, 0}, + {"(*Cmd).StdinPipe", Method, 0}, + {"(*Cmd).StdoutPipe", Method, 0}, + {"(*Cmd).String", Method, 13}, + {"(*Cmd).Wait", Method, 0}, + {"(*Error).Error", Method, 0}, + {"(*Error).Unwrap", Method, 13}, + {"(*ExitError).Error", Method, 0}, + {"(ExitError).ExitCode", Method, 12}, + {"(ExitError).Exited", Method, 0}, + {"(ExitError).Pid", Method, 0}, + {"(ExitError).String", Method, 0}, + {"(ExitError).Success", Method, 0}, + {"(ExitError).Sys", Method, 0}, + {"(ExitError).SysUsage", Method, 0}, + {"(ExitError).SystemTime", Method, 0}, + {"(ExitError).UserTime", Method, 0}, + {"Cmd", Type, 0}, + {"Cmd.Args", Field, 0}, + {"Cmd.Cancel", Field, 20}, + {"Cmd.Dir", Field, 0}, + {"Cmd.Env", Field, 0}, + {"Cmd.Err", Field, 19}, + {"Cmd.ExtraFiles", Field, 0}, + {"Cmd.Path", Field, 0}, + {"Cmd.Process", Field, 0}, + {"Cmd.ProcessState", Field, 0}, + {"Cmd.Stderr", Field, 0}, + {"Cmd.Stdin", Field, 0}, + {"Cmd.Stdout", Field, 0}, + {"Cmd.SysProcAttr", Field, 0}, + {"Cmd.WaitDelay", Field, 20}, + {"Command", Func, 0}, + {"CommandContext", Func, 7}, + {"ErrDot", Var, 19}, + {"ErrNotFound", Var, 0}, + {"ErrWaitDelay", Var, 20}, + {"Error", Type, 0}, + {"Error.Err", Field, 0}, + {"Error.Name", Field, 0}, + {"ExitError", Type, 0}, + {"ExitError.ProcessState", Field, 0}, + {"ExitError.Stderr", Field, 6}, + {"LookPath", Func, 0}, + }, + "os/signal": { + {"Ignore", Func, 5}, + {"Ignored", Func, 11}, + {"Notify", Func, 0}, + {"NotifyContext", Func, 16}, + {"Reset", Func, 5}, + {"Stop", Func, 1}, + }, + "os/user": { + {"(*User).GroupIds", Method, 7}, + {"(UnknownGroupError).Error", Method, 7}, + {"(UnknownGroupIdError).Error", Method, 7}, + {"(UnknownUserError).Error", Method, 0}, + {"(UnknownUserIdError).Error", Method, 0}, + {"Current", Func, 0}, + {"Group", Type, 7}, + {"Group.Gid", Field, 7}, + {"Group.Name", Field, 7}, + {"Lookup", Func, 0}, + {"LookupGroup", Func, 7}, + {"LookupGroupId", Func, 7}, + {"LookupId", Func, 0}, + {"UnknownGroupError", Type, 7}, + {"UnknownGroupIdError", Type, 7}, + {"UnknownUserError", Type, 0}, + {"UnknownUserIdError", Type, 0}, + {"User", Type, 0}, + {"User.Gid", Field, 0}, + {"User.HomeDir", Field, 0}, + {"User.Name", Field, 0}, + {"User.Uid", Field, 0}, + {"User.Username", Field, 0}, + }, + "path": { + {"Base", Func, 0}, + {"Clean", Func, 0}, + {"Dir", Func, 0}, + {"ErrBadPattern", Var, 0}, + {"Ext", Func, 0}, + {"IsAbs", Func, 0}, + {"Join", Func, 0}, + {"Match", Func, 0}, + {"Split", Func, 0}, + }, + "path/filepath": { + {"Abs", Func, 0}, + {"Base", Func, 0}, + {"Clean", Func, 0}, + {"Dir", Func, 0}, + {"ErrBadPattern", Var, 0}, + {"EvalSymlinks", Func, 0}, + {"Ext", Func, 0}, + {"FromSlash", Func, 0}, + {"Glob", Func, 0}, + {"HasPrefix", Func, 0}, + {"IsAbs", Func, 0}, + {"IsLocal", Func, 20}, + {"Join", Func, 0}, + {"ListSeparator", Const, 0}, + {"Localize", Func, 23}, + {"Match", Func, 0}, + {"Rel", Func, 0}, + {"Separator", Const, 0}, + {"SkipAll", Var, 20}, + {"SkipDir", Var, 0}, + {"Split", Func, 0}, + {"SplitList", Func, 0}, + {"ToSlash", Func, 0}, + {"VolumeName", Func, 0}, + {"Walk", Func, 0}, + {"WalkDir", Func, 16}, + {"WalkFunc", Type, 0}, + }, + "plugin": { + {"(*Plugin).Lookup", Method, 8}, + {"Open", Func, 8}, + {"Plugin", Type, 8}, + {"Symbol", Type, 8}, + }, + "reflect": { + {"(*MapIter).Key", Method, 12}, + {"(*MapIter).Next", Method, 12}, + {"(*MapIter).Reset", Method, 18}, + {"(*MapIter).Value", Method, 12}, + {"(*ValueError).Error", Method, 0}, + {"(ChanDir).String", Method, 0}, + {"(Kind).String", Method, 0}, + {"(Method).IsExported", Method, 17}, + {"(StructField).IsExported", Method, 17}, + {"(StructTag).Get", Method, 0}, + {"(StructTag).Lookup", Method, 7}, + {"(Value).Addr", Method, 0}, + {"(Value).Bool", Method, 0}, + {"(Value).Bytes", Method, 0}, + {"(Value).Call", Method, 0}, + {"(Value).CallSlice", Method, 0}, + {"(Value).CanAddr", Method, 0}, + {"(Value).CanComplex", Method, 18}, + {"(Value).CanConvert", Method, 17}, + {"(Value).CanFloat", Method, 18}, + {"(Value).CanInt", Method, 18}, + {"(Value).CanInterface", Method, 0}, + {"(Value).CanSet", Method, 0}, + {"(Value).CanUint", Method, 18}, + {"(Value).Cap", Method, 0}, + {"(Value).Clear", Method, 21}, + {"(Value).Close", Method, 0}, + {"(Value).Comparable", Method, 20}, + {"(Value).Complex", Method, 0}, + {"(Value).Convert", Method, 1}, + {"(Value).Elem", Method, 0}, + {"(Value).Equal", Method, 20}, + {"(Value).Field", Method, 0}, + {"(Value).FieldByIndex", Method, 0}, + {"(Value).FieldByIndexErr", Method, 18}, + {"(Value).FieldByName", Method, 0}, + {"(Value).FieldByNameFunc", Method, 0}, + {"(Value).Float", Method, 0}, + {"(Value).Grow", Method, 20}, + {"(Value).Index", Method, 0}, + {"(Value).Int", Method, 0}, + {"(Value).Interface", Method, 0}, + {"(Value).InterfaceData", Method, 0}, + {"(Value).IsNil", Method, 0}, + {"(Value).IsValid", Method, 0}, + {"(Value).IsZero", Method, 13}, + {"(Value).Kind", Method, 0}, + {"(Value).Len", Method, 0}, + {"(Value).MapIndex", Method, 0}, + {"(Value).MapKeys", Method, 0}, + {"(Value).MapRange", Method, 12}, + {"(Value).Method", Method, 0}, + {"(Value).MethodByName", Method, 0}, + {"(Value).NumField", Method, 0}, + {"(Value).NumMethod", Method, 0}, + {"(Value).OverflowComplex", Method, 0}, + {"(Value).OverflowFloat", Method, 0}, + {"(Value).OverflowInt", Method, 0}, + {"(Value).OverflowUint", Method, 0}, + {"(Value).Pointer", Method, 0}, + {"(Value).Recv", Method, 0}, + {"(Value).Send", Method, 0}, + {"(Value).Seq", Method, 23}, + {"(Value).Seq2", Method, 23}, + {"(Value).Set", Method, 0}, + {"(Value).SetBool", Method, 0}, + {"(Value).SetBytes", Method, 0}, + {"(Value).SetCap", Method, 2}, + {"(Value).SetComplex", Method, 0}, + {"(Value).SetFloat", Method, 0}, + {"(Value).SetInt", Method, 0}, + {"(Value).SetIterKey", Method, 18}, + {"(Value).SetIterValue", Method, 18}, + {"(Value).SetLen", Method, 0}, + {"(Value).SetMapIndex", Method, 0}, + {"(Value).SetPointer", Method, 0}, + {"(Value).SetString", Method, 0}, + {"(Value).SetUint", Method, 0}, + {"(Value).SetZero", Method, 20}, + {"(Value).Slice", Method, 0}, + {"(Value).Slice3", Method, 2}, + {"(Value).String", Method, 0}, + {"(Value).TryRecv", Method, 0}, + {"(Value).TrySend", Method, 0}, + {"(Value).Type", Method, 0}, + {"(Value).Uint", Method, 0}, + {"(Value).UnsafeAddr", Method, 0}, + {"(Value).UnsafePointer", Method, 18}, + {"Append", Func, 0}, + {"AppendSlice", Func, 0}, + {"Array", Const, 0}, + {"ArrayOf", Func, 5}, + {"Bool", Const, 0}, + {"BothDir", Const, 0}, + {"Chan", Const, 0}, + {"ChanDir", Type, 0}, + {"ChanOf", Func, 1}, + {"Complex128", Const, 0}, + {"Complex64", Const, 0}, + {"Copy", Func, 0}, + {"DeepEqual", Func, 0}, + {"Float32", Const, 0}, + {"Float64", Const, 0}, + {"Func", Const, 0}, + {"FuncOf", Func, 5}, + {"Indirect", Func, 0}, + {"Int", Const, 0}, + {"Int16", Const, 0}, + {"Int32", Const, 0}, + {"Int64", Const, 0}, + {"Int8", Const, 0}, + {"Interface", Const, 0}, + {"Invalid", Const, 0}, + {"Kind", Type, 0}, + {"MakeChan", Func, 0}, + {"MakeFunc", Func, 1}, + {"MakeMap", Func, 0}, + {"MakeMapWithSize", Func, 9}, + {"MakeSlice", Func, 0}, + {"Map", Const, 0}, + {"MapIter", Type, 12}, + {"MapOf", Func, 1}, + {"Method", Type, 0}, + {"Method.Func", Field, 0}, + {"Method.Index", Field, 0}, + {"Method.Name", Field, 0}, + {"Method.PkgPath", Field, 0}, + {"Method.Type", Field, 0}, + {"New", Func, 0}, + {"NewAt", Func, 0}, + {"Pointer", Const, 18}, + {"PointerTo", Func, 18}, + {"Ptr", Const, 0}, + {"PtrTo", Func, 0}, + {"RecvDir", Const, 0}, + {"Select", Func, 1}, + {"SelectCase", Type, 1}, + {"SelectCase.Chan", Field, 1}, + {"SelectCase.Dir", Field, 1}, + {"SelectCase.Send", Field, 1}, + {"SelectDefault", Const, 1}, + {"SelectDir", Type, 1}, + {"SelectRecv", Const, 1}, + {"SelectSend", Const, 1}, + {"SendDir", Const, 0}, + {"Slice", Const, 0}, + {"SliceAt", Func, 23}, + {"SliceHeader", Type, 0}, + {"SliceHeader.Cap", Field, 0}, + {"SliceHeader.Data", Field, 0}, + {"SliceHeader.Len", Field, 0}, + {"SliceOf", Func, 1}, + {"String", Const, 0}, + {"StringHeader", Type, 0}, + {"StringHeader.Data", Field, 0}, + {"StringHeader.Len", Field, 0}, + {"Struct", Const, 0}, + {"StructField", Type, 0}, + {"StructField.Anonymous", Field, 0}, + {"StructField.Index", Field, 0}, + {"StructField.Name", Field, 0}, + {"StructField.Offset", Field, 0}, + {"StructField.PkgPath", Field, 0}, + {"StructField.Tag", Field, 0}, + {"StructField.Type", Field, 0}, + {"StructOf", Func, 7}, + {"StructTag", Type, 0}, + {"Swapper", Func, 8}, + {"Type", Type, 0}, + {"TypeFor", Func, 22}, + {"TypeOf", Func, 0}, + {"Uint", Const, 0}, + {"Uint16", Const, 0}, + {"Uint32", Const, 0}, + {"Uint64", Const, 0}, + {"Uint8", Const, 0}, + {"Uintptr", Const, 0}, + {"UnsafePointer", Const, 0}, + {"Value", Type, 0}, + {"ValueError", Type, 0}, + {"ValueError.Kind", Field, 0}, + {"ValueError.Method", Field, 0}, + {"ValueOf", Func, 0}, + {"VisibleFields", Func, 17}, + {"Zero", Func, 0}, + }, + "regexp": { + {"(*Regexp).Copy", Method, 6}, + {"(*Regexp).Expand", Method, 0}, + {"(*Regexp).ExpandString", Method, 0}, + {"(*Regexp).Find", Method, 0}, + {"(*Regexp).FindAll", Method, 0}, + {"(*Regexp).FindAllIndex", Method, 0}, + {"(*Regexp).FindAllString", Method, 0}, + {"(*Regexp).FindAllStringIndex", Method, 0}, + {"(*Regexp).FindAllStringSubmatch", Method, 0}, + {"(*Regexp).FindAllStringSubmatchIndex", Method, 0}, + {"(*Regexp).FindAllSubmatch", Method, 0}, + {"(*Regexp).FindAllSubmatchIndex", Method, 0}, + {"(*Regexp).FindIndex", Method, 0}, + {"(*Regexp).FindReaderIndex", Method, 0}, + {"(*Regexp).FindReaderSubmatchIndex", Method, 0}, + {"(*Regexp).FindString", Method, 0}, + {"(*Regexp).FindStringIndex", Method, 0}, + {"(*Regexp).FindStringSubmatch", Method, 0}, + {"(*Regexp).FindStringSubmatchIndex", Method, 0}, + {"(*Regexp).FindSubmatch", Method, 0}, + {"(*Regexp).FindSubmatchIndex", Method, 0}, + {"(*Regexp).LiteralPrefix", Method, 0}, + {"(*Regexp).Longest", Method, 1}, + {"(*Regexp).MarshalText", Method, 21}, + {"(*Regexp).Match", Method, 0}, + {"(*Regexp).MatchReader", Method, 0}, + {"(*Regexp).MatchString", Method, 0}, + {"(*Regexp).NumSubexp", Method, 0}, + {"(*Regexp).ReplaceAll", Method, 0}, + {"(*Regexp).ReplaceAllFunc", Method, 0}, + {"(*Regexp).ReplaceAllLiteral", Method, 0}, + {"(*Regexp).ReplaceAllLiteralString", Method, 0}, + {"(*Regexp).ReplaceAllString", Method, 0}, + {"(*Regexp).ReplaceAllStringFunc", Method, 0}, + {"(*Regexp).Split", Method, 1}, + {"(*Regexp).String", Method, 0}, + {"(*Regexp).SubexpIndex", Method, 15}, + {"(*Regexp).SubexpNames", Method, 0}, + {"(*Regexp).UnmarshalText", Method, 21}, + {"Compile", Func, 0}, + {"CompilePOSIX", Func, 0}, + {"Match", Func, 0}, + {"MatchReader", Func, 0}, + {"MatchString", Func, 0}, + {"MustCompile", Func, 0}, + {"MustCompilePOSIX", Func, 0}, + {"QuoteMeta", Func, 0}, + {"Regexp", Type, 0}, + }, + "regexp/syntax": { + {"(*Error).Error", Method, 0}, + {"(*Inst).MatchEmptyWidth", Method, 0}, + {"(*Inst).MatchRune", Method, 0}, + {"(*Inst).MatchRunePos", Method, 3}, + {"(*Inst).String", Method, 0}, + {"(*Prog).Prefix", Method, 0}, + {"(*Prog).StartCond", Method, 0}, + {"(*Prog).String", Method, 0}, + {"(*Regexp).CapNames", Method, 0}, + {"(*Regexp).Equal", Method, 0}, + {"(*Regexp).MaxCap", Method, 0}, + {"(*Regexp).Simplify", Method, 0}, + {"(*Regexp).String", Method, 0}, + {"(ErrorCode).String", Method, 0}, + {"(InstOp).String", Method, 3}, + {"(Op).String", Method, 11}, + {"ClassNL", Const, 0}, + {"Compile", Func, 0}, + {"DotNL", Const, 0}, + {"EmptyBeginLine", Const, 0}, + {"EmptyBeginText", Const, 0}, + {"EmptyEndLine", Const, 0}, + {"EmptyEndText", Const, 0}, + {"EmptyNoWordBoundary", Const, 0}, + {"EmptyOp", Type, 0}, + {"EmptyOpContext", Func, 0}, + {"EmptyWordBoundary", Const, 0}, + {"ErrInternalError", Const, 0}, + {"ErrInvalidCharClass", Const, 0}, + {"ErrInvalidCharRange", Const, 0}, + {"ErrInvalidEscape", Const, 0}, + {"ErrInvalidNamedCapture", Const, 0}, + {"ErrInvalidPerlOp", Const, 0}, + {"ErrInvalidRepeatOp", Const, 0}, + {"ErrInvalidRepeatSize", Const, 0}, + {"ErrInvalidUTF8", Const, 0}, + {"ErrLarge", Const, 20}, + {"ErrMissingBracket", Const, 0}, + {"ErrMissingParen", Const, 0}, + {"ErrMissingRepeatArgument", Const, 0}, + {"ErrNestingDepth", Const, 19}, + {"ErrTrailingBackslash", Const, 0}, + {"ErrUnexpectedParen", Const, 1}, + {"Error", Type, 0}, + {"Error.Code", Field, 0}, + {"Error.Expr", Field, 0}, + {"ErrorCode", Type, 0}, + {"Flags", Type, 0}, + {"FoldCase", Const, 0}, + {"Inst", Type, 0}, + {"Inst.Arg", Field, 0}, + {"Inst.Op", Field, 0}, + {"Inst.Out", Field, 0}, + {"Inst.Rune", Field, 0}, + {"InstAlt", Const, 0}, + {"InstAltMatch", Const, 0}, + {"InstCapture", Const, 0}, + {"InstEmptyWidth", Const, 0}, + {"InstFail", Const, 0}, + {"InstMatch", Const, 0}, + {"InstNop", Const, 0}, + {"InstOp", Type, 0}, + {"InstRune", Const, 0}, + {"InstRune1", Const, 0}, + {"InstRuneAny", Const, 0}, + {"InstRuneAnyNotNL", Const, 0}, + {"IsWordChar", Func, 0}, + {"Literal", Const, 0}, + {"MatchNL", Const, 0}, + {"NonGreedy", Const, 0}, + {"OneLine", Const, 0}, + {"Op", Type, 0}, + {"OpAlternate", Const, 0}, + {"OpAnyChar", Const, 0}, + {"OpAnyCharNotNL", Const, 0}, + {"OpBeginLine", Const, 0}, + {"OpBeginText", Const, 0}, + {"OpCapture", Const, 0}, + {"OpCharClass", Const, 0}, + {"OpConcat", Const, 0}, + {"OpEmptyMatch", Const, 0}, + {"OpEndLine", Const, 0}, + {"OpEndText", Const, 0}, + {"OpLiteral", Const, 0}, + {"OpNoMatch", Const, 0}, + {"OpNoWordBoundary", Const, 0}, + {"OpPlus", Const, 0}, + {"OpQuest", Const, 0}, + {"OpRepeat", Const, 0}, + {"OpStar", Const, 0}, + {"OpWordBoundary", Const, 0}, + {"POSIX", Const, 0}, + {"Parse", Func, 0}, + {"Perl", Const, 0}, + {"PerlX", Const, 0}, + {"Prog", Type, 0}, + {"Prog.Inst", Field, 0}, + {"Prog.NumCap", Field, 0}, + {"Prog.Start", Field, 0}, + {"Regexp", Type, 0}, + {"Regexp.Cap", Field, 0}, + {"Regexp.Flags", Field, 0}, + {"Regexp.Max", Field, 0}, + {"Regexp.Min", Field, 0}, + {"Regexp.Name", Field, 0}, + {"Regexp.Op", Field, 0}, + {"Regexp.Rune", Field, 0}, + {"Regexp.Rune0", Field, 0}, + {"Regexp.Sub", Field, 0}, + {"Regexp.Sub0", Field, 0}, + {"Simple", Const, 0}, + {"UnicodeGroups", Const, 0}, + {"WasDollar", Const, 0}, + }, + "runtime": { + {"(*BlockProfileRecord).Stack", Method, 1}, + {"(*Frames).Next", Method, 7}, + {"(*Func).Entry", Method, 0}, + {"(*Func).FileLine", Method, 0}, + {"(*Func).Name", Method, 0}, + {"(*MemProfileRecord).InUseBytes", Method, 0}, + {"(*MemProfileRecord).InUseObjects", Method, 0}, + {"(*MemProfileRecord).Stack", Method, 0}, + {"(*PanicNilError).Error", Method, 21}, + {"(*PanicNilError).RuntimeError", Method, 21}, + {"(*Pinner).Pin", Method, 21}, + {"(*Pinner).Unpin", Method, 21}, + {"(*StackRecord).Stack", Method, 0}, + {"(*TypeAssertionError).Error", Method, 0}, + {"(*TypeAssertionError).RuntimeError", Method, 0}, + {"BlockProfile", Func, 1}, + {"BlockProfileRecord", Type, 1}, + {"BlockProfileRecord.Count", Field, 1}, + {"BlockProfileRecord.Cycles", Field, 1}, + {"BlockProfileRecord.StackRecord", Field, 1}, + {"Breakpoint", Func, 0}, + {"CPUProfile", Func, 0}, + {"Caller", Func, 0}, + {"Callers", Func, 0}, + {"CallersFrames", Func, 7}, + {"Compiler", Const, 0}, + {"Error", Type, 0}, + {"Frame", Type, 7}, + {"Frame.Entry", Field, 7}, + {"Frame.File", Field, 7}, + {"Frame.Func", Field, 7}, + {"Frame.Function", Field, 7}, + {"Frame.Line", Field, 7}, + {"Frame.PC", Field, 7}, + {"Frames", Type, 7}, + {"Func", Type, 0}, + {"FuncForPC", Func, 0}, + {"GC", Func, 0}, + {"GOARCH", Const, 0}, + {"GOMAXPROCS", Func, 0}, + {"GOOS", Const, 0}, + {"GOROOT", Func, 0}, + {"Goexit", Func, 0}, + {"GoroutineProfile", Func, 0}, + {"Gosched", Func, 0}, + {"KeepAlive", Func, 7}, + {"LockOSThread", Func, 0}, + {"MemProfile", Func, 0}, + {"MemProfileRate", Var, 0}, + {"MemProfileRecord", Type, 0}, + {"MemProfileRecord.AllocBytes", Field, 0}, + {"MemProfileRecord.AllocObjects", Field, 0}, + {"MemProfileRecord.FreeBytes", Field, 0}, + {"MemProfileRecord.FreeObjects", Field, 0}, + {"MemProfileRecord.Stack0", Field, 0}, + {"MemStats", Type, 0}, + {"MemStats.Alloc", Field, 0}, + {"MemStats.BuckHashSys", Field, 0}, + {"MemStats.BySize", Field, 0}, + {"MemStats.DebugGC", Field, 0}, + {"MemStats.EnableGC", Field, 0}, + {"MemStats.Frees", Field, 0}, + {"MemStats.GCCPUFraction", Field, 5}, + {"MemStats.GCSys", Field, 2}, + {"MemStats.HeapAlloc", Field, 0}, + {"MemStats.HeapIdle", Field, 0}, + {"MemStats.HeapInuse", Field, 0}, + {"MemStats.HeapObjects", Field, 0}, + {"MemStats.HeapReleased", Field, 0}, + {"MemStats.HeapSys", Field, 0}, + {"MemStats.LastGC", Field, 0}, + {"MemStats.Lookups", Field, 0}, + {"MemStats.MCacheInuse", Field, 0}, + {"MemStats.MCacheSys", Field, 0}, + {"MemStats.MSpanInuse", Field, 0}, + {"MemStats.MSpanSys", Field, 0}, + {"MemStats.Mallocs", Field, 0}, + {"MemStats.NextGC", Field, 0}, + {"MemStats.NumForcedGC", Field, 8}, + {"MemStats.NumGC", Field, 0}, + {"MemStats.OtherSys", Field, 2}, + {"MemStats.PauseEnd", Field, 4}, + {"MemStats.PauseNs", Field, 0}, + {"MemStats.PauseTotalNs", Field, 0}, + {"MemStats.StackInuse", Field, 0}, + {"MemStats.StackSys", Field, 0}, + {"MemStats.Sys", Field, 0}, + {"MemStats.TotalAlloc", Field, 0}, + {"MutexProfile", Func, 8}, + {"NumCPU", Func, 0}, + {"NumCgoCall", Func, 0}, + {"NumGoroutine", Func, 0}, + {"PanicNilError", Type, 21}, + {"Pinner", Type, 21}, + {"ReadMemStats", Func, 0}, + {"ReadTrace", Func, 5}, + {"SetBlockProfileRate", Func, 1}, + {"SetCPUProfileRate", Func, 0}, + {"SetCgoTraceback", Func, 7}, + {"SetFinalizer", Func, 0}, + {"SetMutexProfileFraction", Func, 8}, + {"Stack", Func, 0}, + {"StackRecord", Type, 0}, + {"StackRecord.Stack0", Field, 0}, + {"StartTrace", Func, 5}, + {"StopTrace", Func, 5}, + {"ThreadCreateProfile", Func, 0}, + {"TypeAssertionError", Type, 0}, + {"UnlockOSThread", Func, 0}, + {"Version", Func, 0}, + }, + "runtime/cgo": { + {"(Handle).Delete", Method, 17}, + {"(Handle).Value", Method, 17}, + {"Handle", Type, 17}, + {"Incomplete", Type, 20}, + {"NewHandle", Func, 17}, + }, + "runtime/coverage": { + {"ClearCounters", Func, 20}, + {"WriteCounters", Func, 20}, + {"WriteCountersDir", Func, 20}, + {"WriteMeta", Func, 20}, + {"WriteMetaDir", Func, 20}, + }, + "runtime/debug": { + {"(*BuildInfo).String", Method, 18}, + {"BuildInfo", Type, 12}, + {"BuildInfo.Deps", Field, 12}, + {"BuildInfo.GoVersion", Field, 18}, + {"BuildInfo.Main", Field, 12}, + {"BuildInfo.Path", Field, 12}, + {"BuildInfo.Settings", Field, 18}, + {"BuildSetting", Type, 18}, + {"BuildSetting.Key", Field, 18}, + {"BuildSetting.Value", Field, 18}, + {"CrashOptions", Type, 23}, + {"FreeOSMemory", Func, 1}, + {"GCStats", Type, 1}, + {"GCStats.LastGC", Field, 1}, + {"GCStats.NumGC", Field, 1}, + {"GCStats.Pause", Field, 1}, + {"GCStats.PauseEnd", Field, 4}, + {"GCStats.PauseQuantiles", Field, 1}, + {"GCStats.PauseTotal", Field, 1}, + {"Module", Type, 12}, + {"Module.Path", Field, 12}, + {"Module.Replace", Field, 12}, + {"Module.Sum", Field, 12}, + {"Module.Version", Field, 12}, + {"ParseBuildInfo", Func, 18}, + {"PrintStack", Func, 0}, + {"ReadBuildInfo", Func, 12}, + {"ReadGCStats", Func, 1}, + {"SetCrashOutput", Func, 23}, + {"SetGCPercent", Func, 1}, + {"SetMaxStack", Func, 2}, + {"SetMaxThreads", Func, 2}, + {"SetMemoryLimit", Func, 19}, + {"SetPanicOnFault", Func, 3}, + {"SetTraceback", Func, 6}, + {"Stack", Func, 0}, + {"WriteHeapDump", Func, 3}, + }, + "runtime/metrics": { + {"(Value).Float64", Method, 16}, + {"(Value).Float64Histogram", Method, 16}, + {"(Value).Kind", Method, 16}, + {"(Value).Uint64", Method, 16}, + {"All", Func, 16}, + {"Description", Type, 16}, + {"Description.Cumulative", Field, 16}, + {"Description.Description", Field, 16}, + {"Description.Kind", Field, 16}, + {"Description.Name", Field, 16}, + {"Float64Histogram", Type, 16}, + {"Float64Histogram.Buckets", Field, 16}, + {"Float64Histogram.Counts", Field, 16}, + {"KindBad", Const, 16}, + {"KindFloat64", Const, 16}, + {"KindFloat64Histogram", Const, 16}, + {"KindUint64", Const, 16}, + {"Read", Func, 16}, + {"Sample", Type, 16}, + {"Sample.Name", Field, 16}, + {"Sample.Value", Field, 16}, + {"Value", Type, 16}, + {"ValueKind", Type, 16}, + }, + "runtime/pprof": { + {"(*Profile).Add", Method, 0}, + {"(*Profile).Count", Method, 0}, + {"(*Profile).Name", Method, 0}, + {"(*Profile).Remove", Method, 0}, + {"(*Profile).WriteTo", Method, 0}, + {"Do", Func, 9}, + {"ForLabels", Func, 9}, + {"Label", Func, 9}, + {"LabelSet", Type, 9}, + {"Labels", Func, 9}, + {"Lookup", Func, 0}, + {"NewProfile", Func, 0}, + {"Profile", Type, 0}, + {"Profiles", Func, 0}, + {"SetGoroutineLabels", Func, 9}, + {"StartCPUProfile", Func, 0}, + {"StopCPUProfile", Func, 0}, + {"WithLabels", Func, 9}, + {"WriteHeapProfile", Func, 0}, + }, + "runtime/trace": { + {"(*Region).End", Method, 11}, + {"(*Task).End", Method, 11}, + {"IsEnabled", Func, 11}, + {"Log", Func, 11}, + {"Logf", Func, 11}, + {"NewTask", Func, 11}, + {"Region", Type, 11}, + {"Start", Func, 5}, + {"StartRegion", Func, 11}, + {"Stop", Func, 5}, + {"Task", Type, 11}, + {"WithRegion", Func, 11}, + }, + "slices": { + {"All", Func, 23}, + {"AppendSeq", Func, 23}, + {"Backward", Func, 23}, + {"BinarySearch", Func, 21}, + {"BinarySearchFunc", Func, 21}, + {"Chunk", Func, 23}, + {"Clip", Func, 21}, + {"Clone", Func, 21}, + {"Collect", Func, 23}, + {"Compact", Func, 21}, + {"CompactFunc", Func, 21}, + {"Compare", Func, 21}, + {"CompareFunc", Func, 21}, + {"Concat", Func, 22}, + {"Contains", Func, 21}, + {"ContainsFunc", Func, 21}, + {"Delete", Func, 21}, + {"DeleteFunc", Func, 21}, + {"Equal", Func, 21}, + {"EqualFunc", Func, 21}, + {"Grow", Func, 21}, + {"Index", Func, 21}, + {"IndexFunc", Func, 21}, + {"Insert", Func, 21}, + {"IsSorted", Func, 21}, + {"IsSortedFunc", Func, 21}, + {"Max", Func, 21}, + {"MaxFunc", Func, 21}, + {"Min", Func, 21}, + {"MinFunc", Func, 21}, + {"Repeat", Func, 23}, + {"Replace", Func, 21}, + {"Reverse", Func, 21}, + {"Sort", Func, 21}, + {"SortFunc", Func, 21}, + {"SortStableFunc", Func, 21}, + {"Sorted", Func, 23}, + {"SortedFunc", Func, 23}, + {"SortedStableFunc", Func, 23}, + {"Values", Func, 23}, + }, + "sort": { + {"(Float64Slice).Len", Method, 0}, + {"(Float64Slice).Less", Method, 0}, + {"(Float64Slice).Search", Method, 0}, + {"(Float64Slice).Sort", Method, 0}, + {"(Float64Slice).Swap", Method, 0}, + {"(IntSlice).Len", Method, 0}, + {"(IntSlice).Less", Method, 0}, + {"(IntSlice).Search", Method, 0}, + {"(IntSlice).Sort", Method, 0}, + {"(IntSlice).Swap", Method, 0}, + {"(StringSlice).Len", Method, 0}, + {"(StringSlice).Less", Method, 0}, + {"(StringSlice).Search", Method, 0}, + {"(StringSlice).Sort", Method, 0}, + {"(StringSlice).Swap", Method, 0}, + {"Find", Func, 19}, + {"Float64Slice", Type, 0}, + {"Float64s", Func, 0}, + {"Float64sAreSorted", Func, 0}, + {"IntSlice", Type, 0}, + {"Interface", Type, 0}, + {"Ints", Func, 0}, + {"IntsAreSorted", Func, 0}, + {"IsSorted", Func, 0}, + {"Reverse", Func, 1}, + {"Search", Func, 0}, + {"SearchFloat64s", Func, 0}, + {"SearchInts", Func, 0}, + {"SearchStrings", Func, 0}, + {"Slice", Func, 8}, + {"SliceIsSorted", Func, 8}, + {"SliceStable", Func, 8}, + {"Sort", Func, 0}, + {"Stable", Func, 2}, + {"StringSlice", Type, 0}, + {"Strings", Func, 0}, + {"StringsAreSorted", Func, 0}, + }, + "strconv": { + {"(*NumError).Error", Method, 0}, + {"(*NumError).Unwrap", Method, 14}, + {"AppendBool", Func, 0}, + {"AppendFloat", Func, 0}, + {"AppendInt", Func, 0}, + {"AppendQuote", Func, 0}, + {"AppendQuoteRune", Func, 0}, + {"AppendQuoteRuneToASCII", Func, 0}, + {"AppendQuoteRuneToGraphic", Func, 6}, + {"AppendQuoteToASCII", Func, 0}, + {"AppendQuoteToGraphic", Func, 6}, + {"AppendUint", Func, 0}, + {"Atoi", Func, 0}, + {"CanBackquote", Func, 0}, + {"ErrRange", Var, 0}, + {"ErrSyntax", Var, 0}, + {"FormatBool", Func, 0}, + {"FormatComplex", Func, 15}, + {"FormatFloat", Func, 0}, + {"FormatInt", Func, 0}, + {"FormatUint", Func, 0}, + {"IntSize", Const, 0}, + {"IsGraphic", Func, 6}, + {"IsPrint", Func, 0}, + {"Itoa", Func, 0}, + {"NumError", Type, 0}, + {"NumError.Err", Field, 0}, + {"NumError.Func", Field, 0}, + {"NumError.Num", Field, 0}, + {"ParseBool", Func, 0}, + {"ParseComplex", Func, 15}, + {"ParseFloat", Func, 0}, + {"ParseInt", Func, 0}, + {"ParseUint", Func, 0}, + {"Quote", Func, 0}, + {"QuoteRune", Func, 0}, + {"QuoteRuneToASCII", Func, 0}, + {"QuoteRuneToGraphic", Func, 6}, + {"QuoteToASCII", Func, 0}, + {"QuoteToGraphic", Func, 6}, + {"QuotedPrefix", Func, 17}, + {"Unquote", Func, 0}, + {"UnquoteChar", Func, 0}, + }, + "strings": { + {"(*Builder).Cap", Method, 12}, + {"(*Builder).Grow", Method, 10}, + {"(*Builder).Len", Method, 10}, + {"(*Builder).Reset", Method, 10}, + {"(*Builder).String", Method, 10}, + {"(*Builder).Write", Method, 10}, + {"(*Builder).WriteByte", Method, 10}, + {"(*Builder).WriteRune", Method, 10}, + {"(*Builder).WriteString", Method, 10}, + {"(*Reader).Len", Method, 0}, + {"(*Reader).Read", Method, 0}, + {"(*Reader).ReadAt", Method, 0}, + {"(*Reader).ReadByte", Method, 0}, + {"(*Reader).ReadRune", Method, 0}, + {"(*Reader).Reset", Method, 7}, + {"(*Reader).Seek", Method, 0}, + {"(*Reader).Size", Method, 5}, + {"(*Reader).UnreadByte", Method, 0}, + {"(*Reader).UnreadRune", Method, 0}, + {"(*Reader).WriteTo", Method, 1}, + {"(*Replacer).Replace", Method, 0}, + {"(*Replacer).WriteString", Method, 0}, + {"Builder", Type, 10}, + {"Clone", Func, 18}, + {"Compare", Func, 5}, + {"Contains", Func, 0}, + {"ContainsAny", Func, 0}, + {"ContainsFunc", Func, 21}, + {"ContainsRune", Func, 0}, + {"Count", Func, 0}, + {"Cut", Func, 18}, + {"CutPrefix", Func, 20}, + {"CutSuffix", Func, 20}, + {"EqualFold", Func, 0}, + {"Fields", Func, 0}, + {"FieldsFunc", Func, 0}, + {"HasPrefix", Func, 0}, + {"HasSuffix", Func, 0}, + {"Index", Func, 0}, + {"IndexAny", Func, 0}, + {"IndexByte", Func, 2}, + {"IndexFunc", Func, 0}, + {"IndexRune", Func, 0}, + {"Join", Func, 0}, + {"LastIndex", Func, 0}, + {"LastIndexAny", Func, 0}, + {"LastIndexByte", Func, 5}, + {"LastIndexFunc", Func, 0}, + {"Map", Func, 0}, + {"NewReader", Func, 0}, + {"NewReplacer", Func, 0}, + {"Reader", Type, 0}, + {"Repeat", Func, 0}, + {"Replace", Func, 0}, + {"ReplaceAll", Func, 12}, + {"Replacer", Type, 0}, + {"Split", Func, 0}, + {"SplitAfter", Func, 0}, + {"SplitAfterN", Func, 0}, + {"SplitN", Func, 0}, + {"Title", Func, 0}, + {"ToLower", Func, 0}, + {"ToLowerSpecial", Func, 0}, + {"ToTitle", Func, 0}, + {"ToTitleSpecial", Func, 0}, + {"ToUpper", Func, 0}, + {"ToUpperSpecial", Func, 0}, + {"ToValidUTF8", Func, 13}, + {"Trim", Func, 0}, + {"TrimFunc", Func, 0}, + {"TrimLeft", Func, 0}, + {"TrimLeftFunc", Func, 0}, + {"TrimPrefix", Func, 1}, + {"TrimRight", Func, 0}, + {"TrimRightFunc", Func, 0}, + {"TrimSpace", Func, 0}, + {"TrimSuffix", Func, 1}, + }, + "structs": { + {"HostLayout", Type, 23}, + }, + "sync": { + {"(*Cond).Broadcast", Method, 0}, + {"(*Cond).Signal", Method, 0}, + {"(*Cond).Wait", Method, 0}, + {"(*Map).Clear", Method, 23}, + {"(*Map).CompareAndDelete", Method, 20}, + {"(*Map).CompareAndSwap", Method, 20}, + {"(*Map).Delete", Method, 9}, + {"(*Map).Load", Method, 9}, + {"(*Map).LoadAndDelete", Method, 15}, + {"(*Map).LoadOrStore", Method, 9}, + {"(*Map).Range", Method, 9}, + {"(*Map).Store", Method, 9}, + {"(*Map).Swap", Method, 20}, + {"(*Mutex).Lock", Method, 0}, + {"(*Mutex).TryLock", Method, 18}, + {"(*Mutex).Unlock", Method, 0}, + {"(*Once).Do", Method, 0}, + {"(*Pool).Get", Method, 3}, + {"(*Pool).Put", Method, 3}, + {"(*RWMutex).Lock", Method, 0}, + {"(*RWMutex).RLock", Method, 0}, + {"(*RWMutex).RLocker", Method, 0}, + {"(*RWMutex).RUnlock", Method, 0}, + {"(*RWMutex).TryLock", Method, 18}, + {"(*RWMutex).TryRLock", Method, 18}, + {"(*RWMutex).Unlock", Method, 0}, + {"(*WaitGroup).Add", Method, 0}, + {"(*WaitGroup).Done", Method, 0}, + {"(*WaitGroup).Wait", Method, 0}, + {"Cond", Type, 0}, + {"Cond.L", Field, 0}, + {"Locker", Type, 0}, + {"Map", Type, 9}, + {"Mutex", Type, 0}, + {"NewCond", Func, 0}, + {"Once", Type, 0}, + {"OnceFunc", Func, 21}, + {"OnceValue", Func, 21}, + {"OnceValues", Func, 21}, + {"Pool", Type, 3}, + {"Pool.New", Field, 3}, + {"RWMutex", Type, 0}, + {"WaitGroup", Type, 0}, + }, + "sync/atomic": { + {"(*Bool).CompareAndSwap", Method, 19}, + {"(*Bool).Load", Method, 19}, + {"(*Bool).Store", Method, 19}, + {"(*Bool).Swap", Method, 19}, + {"(*Int32).Add", Method, 19}, + {"(*Int32).And", Method, 23}, + {"(*Int32).CompareAndSwap", Method, 19}, + {"(*Int32).Load", Method, 19}, + {"(*Int32).Or", Method, 23}, + {"(*Int32).Store", Method, 19}, + {"(*Int32).Swap", Method, 19}, + {"(*Int64).Add", Method, 19}, + {"(*Int64).And", Method, 23}, + {"(*Int64).CompareAndSwap", Method, 19}, + {"(*Int64).Load", Method, 19}, + {"(*Int64).Or", Method, 23}, + {"(*Int64).Store", Method, 19}, + {"(*Int64).Swap", Method, 19}, + {"(*Pointer).CompareAndSwap", Method, 19}, + {"(*Pointer).Load", Method, 19}, + {"(*Pointer).Store", Method, 19}, + {"(*Pointer).Swap", Method, 19}, + {"(*Uint32).Add", Method, 19}, + {"(*Uint32).And", Method, 23}, + {"(*Uint32).CompareAndSwap", Method, 19}, + {"(*Uint32).Load", Method, 19}, + {"(*Uint32).Or", Method, 23}, + {"(*Uint32).Store", Method, 19}, + {"(*Uint32).Swap", Method, 19}, + {"(*Uint64).Add", Method, 19}, + {"(*Uint64).And", Method, 23}, + {"(*Uint64).CompareAndSwap", Method, 19}, + {"(*Uint64).Load", Method, 19}, + {"(*Uint64).Or", Method, 23}, + {"(*Uint64).Store", Method, 19}, + {"(*Uint64).Swap", Method, 19}, + {"(*Uintptr).Add", Method, 19}, + {"(*Uintptr).And", Method, 23}, + {"(*Uintptr).CompareAndSwap", Method, 19}, + {"(*Uintptr).Load", Method, 19}, + {"(*Uintptr).Or", Method, 23}, + {"(*Uintptr).Store", Method, 19}, + {"(*Uintptr).Swap", Method, 19}, + {"(*Value).CompareAndSwap", Method, 17}, + {"(*Value).Load", Method, 4}, + {"(*Value).Store", Method, 4}, + {"(*Value).Swap", Method, 17}, + {"AddInt32", Func, 0}, + {"AddInt64", Func, 0}, + {"AddUint32", Func, 0}, + {"AddUint64", Func, 0}, + {"AddUintptr", Func, 0}, + {"AndInt32", Func, 23}, + {"AndInt64", Func, 23}, + {"AndUint32", Func, 23}, + {"AndUint64", Func, 23}, + {"AndUintptr", Func, 23}, + {"Bool", Type, 19}, + {"CompareAndSwapInt32", Func, 0}, + {"CompareAndSwapInt64", Func, 0}, + {"CompareAndSwapPointer", Func, 0}, + {"CompareAndSwapUint32", Func, 0}, + {"CompareAndSwapUint64", Func, 0}, + {"CompareAndSwapUintptr", Func, 0}, + {"Int32", Type, 19}, + {"Int64", Type, 19}, + {"LoadInt32", Func, 0}, + {"LoadInt64", Func, 0}, + {"LoadPointer", Func, 0}, + {"LoadUint32", Func, 0}, + {"LoadUint64", Func, 0}, + {"LoadUintptr", Func, 0}, + {"OrInt32", Func, 23}, + {"OrInt64", Func, 23}, + {"OrUint32", Func, 23}, + {"OrUint64", Func, 23}, + {"OrUintptr", Func, 23}, + {"Pointer", Type, 19}, + {"StoreInt32", Func, 0}, + {"StoreInt64", Func, 0}, + {"StorePointer", Func, 0}, + {"StoreUint32", Func, 0}, + {"StoreUint64", Func, 0}, + {"StoreUintptr", Func, 0}, + {"SwapInt32", Func, 2}, + {"SwapInt64", Func, 2}, + {"SwapPointer", Func, 2}, + {"SwapUint32", Func, 2}, + {"SwapUint64", Func, 2}, + {"SwapUintptr", Func, 2}, + {"Uint32", Type, 19}, + {"Uint64", Type, 19}, + {"Uintptr", Type, 19}, + {"Value", Type, 4}, + }, + "syscall": { + {"(*Cmsghdr).SetLen", Method, 0}, + {"(*DLL).FindProc", Method, 0}, + {"(*DLL).MustFindProc", Method, 0}, + {"(*DLL).Release", Method, 0}, + {"(*DLLError).Error", Method, 0}, + {"(*DLLError).Unwrap", Method, 16}, + {"(*Filetime).Nanoseconds", Method, 0}, + {"(*Iovec).SetLen", Method, 0}, + {"(*LazyDLL).Handle", Method, 0}, + {"(*LazyDLL).Load", Method, 0}, + {"(*LazyDLL).NewProc", Method, 0}, + {"(*LazyProc).Addr", Method, 0}, + {"(*LazyProc).Call", Method, 0}, + {"(*LazyProc).Find", Method, 0}, + {"(*Msghdr).SetControllen", Method, 0}, + {"(*Proc).Addr", Method, 0}, + {"(*Proc).Call", Method, 0}, + {"(*PtraceRegs).PC", Method, 0}, + {"(*PtraceRegs).SetPC", Method, 0}, + {"(*RawSockaddrAny).Sockaddr", Method, 0}, + {"(*SID).Copy", Method, 0}, + {"(*SID).Len", Method, 0}, + {"(*SID).LookupAccount", Method, 0}, + {"(*SID).String", Method, 0}, + {"(*Timespec).Nano", Method, 0}, + {"(*Timespec).Unix", Method, 0}, + {"(*Timeval).Nano", Method, 0}, + {"(*Timeval).Nanoseconds", Method, 0}, + {"(*Timeval).Unix", Method, 0}, + {"(Errno).Error", Method, 0}, + {"(Errno).Is", Method, 13}, + {"(Errno).Temporary", Method, 0}, + {"(Errno).Timeout", Method, 0}, + {"(Signal).Signal", Method, 0}, + {"(Signal).String", Method, 0}, + {"(Token).Close", Method, 0}, + {"(Token).GetTokenPrimaryGroup", Method, 0}, + {"(Token).GetTokenUser", Method, 0}, + {"(Token).GetUserProfileDirectory", Method, 0}, + {"(WaitStatus).Continued", Method, 0}, + {"(WaitStatus).CoreDump", Method, 0}, + {"(WaitStatus).ExitStatus", Method, 0}, + {"(WaitStatus).Exited", Method, 0}, + {"(WaitStatus).Signal", Method, 0}, + {"(WaitStatus).Signaled", Method, 0}, + {"(WaitStatus).StopSignal", Method, 0}, + {"(WaitStatus).Stopped", Method, 0}, + {"(WaitStatus).TrapCause", Method, 0}, + {"AF_ALG", Const, 0}, + {"AF_APPLETALK", Const, 0}, + {"AF_ARP", Const, 0}, + {"AF_ASH", Const, 0}, + {"AF_ATM", Const, 0}, + {"AF_ATMPVC", Const, 0}, + {"AF_ATMSVC", Const, 0}, + {"AF_AX25", Const, 0}, + {"AF_BLUETOOTH", Const, 0}, + {"AF_BRIDGE", Const, 0}, + {"AF_CAIF", Const, 0}, + {"AF_CAN", Const, 0}, + {"AF_CCITT", Const, 0}, + {"AF_CHAOS", Const, 0}, + {"AF_CNT", Const, 0}, + {"AF_COIP", Const, 0}, + {"AF_DATAKIT", Const, 0}, + {"AF_DECnet", Const, 0}, + {"AF_DLI", Const, 0}, + {"AF_E164", Const, 0}, + {"AF_ECMA", Const, 0}, + {"AF_ECONET", Const, 0}, + {"AF_ENCAP", Const, 1}, + {"AF_FILE", Const, 0}, + {"AF_HYLINK", Const, 0}, + {"AF_IEEE80211", Const, 0}, + {"AF_IEEE802154", Const, 0}, + {"AF_IMPLINK", Const, 0}, + {"AF_INET", Const, 0}, + {"AF_INET6", Const, 0}, + {"AF_INET6_SDP", Const, 3}, + {"AF_INET_SDP", Const, 3}, + {"AF_IPX", Const, 0}, + {"AF_IRDA", Const, 0}, + {"AF_ISDN", Const, 0}, + {"AF_ISO", Const, 0}, + {"AF_IUCV", Const, 0}, + {"AF_KEY", Const, 0}, + {"AF_LAT", Const, 0}, + {"AF_LINK", Const, 0}, + {"AF_LLC", Const, 0}, + {"AF_LOCAL", Const, 0}, + {"AF_MAX", Const, 0}, + {"AF_MPLS", Const, 1}, + {"AF_NATM", Const, 0}, + {"AF_NDRV", Const, 0}, + {"AF_NETBEUI", Const, 0}, + {"AF_NETBIOS", Const, 0}, + {"AF_NETGRAPH", Const, 0}, + {"AF_NETLINK", Const, 0}, + {"AF_NETROM", Const, 0}, + {"AF_NS", Const, 0}, + {"AF_OROUTE", Const, 1}, + {"AF_OSI", Const, 0}, + {"AF_PACKET", Const, 0}, + {"AF_PHONET", Const, 0}, + {"AF_PPP", Const, 0}, + {"AF_PPPOX", Const, 0}, + {"AF_PUP", Const, 0}, + {"AF_RDS", Const, 0}, + {"AF_RESERVED_36", Const, 0}, + {"AF_ROSE", Const, 0}, + {"AF_ROUTE", Const, 0}, + {"AF_RXRPC", Const, 0}, + {"AF_SCLUSTER", Const, 0}, + {"AF_SECURITY", Const, 0}, + {"AF_SIP", Const, 0}, + {"AF_SLOW", Const, 0}, + {"AF_SNA", Const, 0}, + {"AF_SYSTEM", Const, 0}, + {"AF_TIPC", Const, 0}, + {"AF_UNIX", Const, 0}, + {"AF_UNSPEC", Const, 0}, + {"AF_UTUN", Const, 16}, + {"AF_VENDOR00", Const, 0}, + {"AF_VENDOR01", Const, 0}, + {"AF_VENDOR02", Const, 0}, + {"AF_VENDOR03", Const, 0}, + {"AF_VENDOR04", Const, 0}, + {"AF_VENDOR05", Const, 0}, + {"AF_VENDOR06", Const, 0}, + {"AF_VENDOR07", Const, 0}, + {"AF_VENDOR08", Const, 0}, + {"AF_VENDOR09", Const, 0}, + {"AF_VENDOR10", Const, 0}, + {"AF_VENDOR11", Const, 0}, + {"AF_VENDOR12", Const, 0}, + {"AF_VENDOR13", Const, 0}, + {"AF_VENDOR14", Const, 0}, + {"AF_VENDOR15", Const, 0}, + {"AF_VENDOR16", Const, 0}, + {"AF_VENDOR17", Const, 0}, + {"AF_VENDOR18", Const, 0}, + {"AF_VENDOR19", Const, 0}, + {"AF_VENDOR20", Const, 0}, + {"AF_VENDOR21", Const, 0}, + {"AF_VENDOR22", Const, 0}, + {"AF_VENDOR23", Const, 0}, + {"AF_VENDOR24", Const, 0}, + {"AF_VENDOR25", Const, 0}, + {"AF_VENDOR26", Const, 0}, + {"AF_VENDOR27", Const, 0}, + {"AF_VENDOR28", Const, 0}, + {"AF_VENDOR29", Const, 0}, + {"AF_VENDOR30", Const, 0}, + {"AF_VENDOR31", Const, 0}, + {"AF_VENDOR32", Const, 0}, + {"AF_VENDOR33", Const, 0}, + {"AF_VENDOR34", Const, 0}, + {"AF_VENDOR35", Const, 0}, + {"AF_VENDOR36", Const, 0}, + {"AF_VENDOR37", Const, 0}, + {"AF_VENDOR38", Const, 0}, + {"AF_VENDOR39", Const, 0}, + {"AF_VENDOR40", Const, 0}, + {"AF_VENDOR41", Const, 0}, + {"AF_VENDOR42", Const, 0}, + {"AF_VENDOR43", Const, 0}, + {"AF_VENDOR44", Const, 0}, + {"AF_VENDOR45", Const, 0}, + {"AF_VENDOR46", Const, 0}, + {"AF_VENDOR47", Const, 0}, + {"AF_WANPIPE", Const, 0}, + {"AF_X25", Const, 0}, + {"AI_CANONNAME", Const, 1}, + {"AI_NUMERICHOST", Const, 1}, + {"AI_PASSIVE", Const, 1}, + {"APPLICATION_ERROR", Const, 0}, + {"ARPHRD_ADAPT", Const, 0}, + {"ARPHRD_APPLETLK", Const, 0}, + {"ARPHRD_ARCNET", Const, 0}, + {"ARPHRD_ASH", Const, 0}, + {"ARPHRD_ATM", Const, 0}, + {"ARPHRD_AX25", Const, 0}, + {"ARPHRD_BIF", Const, 0}, + {"ARPHRD_CHAOS", Const, 0}, + {"ARPHRD_CISCO", Const, 0}, + {"ARPHRD_CSLIP", Const, 0}, + {"ARPHRD_CSLIP6", Const, 0}, + {"ARPHRD_DDCMP", Const, 0}, + {"ARPHRD_DLCI", Const, 0}, + {"ARPHRD_ECONET", Const, 0}, + {"ARPHRD_EETHER", Const, 0}, + {"ARPHRD_ETHER", Const, 0}, + {"ARPHRD_EUI64", Const, 0}, + {"ARPHRD_FCAL", Const, 0}, + {"ARPHRD_FCFABRIC", Const, 0}, + {"ARPHRD_FCPL", Const, 0}, + {"ARPHRD_FCPP", Const, 0}, + {"ARPHRD_FDDI", Const, 0}, + {"ARPHRD_FRAD", Const, 0}, + {"ARPHRD_FRELAY", Const, 1}, + {"ARPHRD_HDLC", Const, 0}, + {"ARPHRD_HIPPI", Const, 0}, + {"ARPHRD_HWX25", Const, 0}, + {"ARPHRD_IEEE1394", Const, 0}, + {"ARPHRD_IEEE802", Const, 0}, + {"ARPHRD_IEEE80211", Const, 0}, + {"ARPHRD_IEEE80211_PRISM", Const, 0}, + {"ARPHRD_IEEE80211_RADIOTAP", Const, 0}, + {"ARPHRD_IEEE802154", Const, 0}, + {"ARPHRD_IEEE802154_PHY", Const, 0}, + {"ARPHRD_IEEE802_TR", Const, 0}, + {"ARPHRD_INFINIBAND", Const, 0}, + {"ARPHRD_IPDDP", Const, 0}, + {"ARPHRD_IPGRE", Const, 0}, + {"ARPHRD_IRDA", Const, 0}, + {"ARPHRD_LAPB", Const, 0}, + {"ARPHRD_LOCALTLK", Const, 0}, + {"ARPHRD_LOOPBACK", Const, 0}, + {"ARPHRD_METRICOM", Const, 0}, + {"ARPHRD_NETROM", Const, 0}, + {"ARPHRD_NONE", Const, 0}, + {"ARPHRD_PIMREG", Const, 0}, + {"ARPHRD_PPP", Const, 0}, + {"ARPHRD_PRONET", Const, 0}, + {"ARPHRD_RAWHDLC", Const, 0}, + {"ARPHRD_ROSE", Const, 0}, + {"ARPHRD_RSRVD", Const, 0}, + {"ARPHRD_SIT", Const, 0}, + {"ARPHRD_SKIP", Const, 0}, + {"ARPHRD_SLIP", Const, 0}, + {"ARPHRD_SLIP6", Const, 0}, + {"ARPHRD_STRIP", Const, 1}, + {"ARPHRD_TUNNEL", Const, 0}, + {"ARPHRD_TUNNEL6", Const, 0}, + {"ARPHRD_VOID", Const, 0}, + {"ARPHRD_X25", Const, 0}, + {"AUTHTYPE_CLIENT", Const, 0}, + {"AUTHTYPE_SERVER", Const, 0}, + {"Accept", Func, 0}, + {"Accept4", Func, 1}, + {"AcceptEx", Func, 0}, + {"Access", Func, 0}, + {"Acct", Func, 0}, + {"AddrinfoW", Type, 1}, + {"AddrinfoW.Addr", Field, 1}, + {"AddrinfoW.Addrlen", Field, 1}, + {"AddrinfoW.Canonname", Field, 1}, + {"AddrinfoW.Family", Field, 1}, + {"AddrinfoW.Flags", Field, 1}, + {"AddrinfoW.Next", Field, 1}, + {"AddrinfoW.Protocol", Field, 1}, + {"AddrinfoW.Socktype", Field, 1}, + {"Adjtime", Func, 0}, + {"Adjtimex", Func, 0}, + {"AllThreadsSyscall", Func, 16}, + {"AllThreadsSyscall6", Func, 16}, + {"AttachLsf", Func, 0}, + {"B0", Const, 0}, + {"B1000000", Const, 0}, + {"B110", Const, 0}, + {"B115200", Const, 0}, + {"B1152000", Const, 0}, + {"B1200", Const, 0}, + {"B134", Const, 0}, + {"B14400", Const, 1}, + {"B150", Const, 0}, + {"B1500000", Const, 0}, + {"B1800", Const, 0}, + {"B19200", Const, 0}, + {"B200", Const, 0}, + {"B2000000", Const, 0}, + {"B230400", Const, 0}, + {"B2400", Const, 0}, + {"B2500000", Const, 0}, + {"B28800", Const, 1}, + {"B300", Const, 0}, + {"B3000000", Const, 0}, + {"B3500000", Const, 0}, + {"B38400", Const, 0}, + {"B4000000", Const, 0}, + {"B460800", Const, 0}, + {"B4800", Const, 0}, + {"B50", Const, 0}, + {"B500000", Const, 0}, + {"B57600", Const, 0}, + {"B576000", Const, 0}, + {"B600", Const, 0}, + {"B7200", Const, 1}, + {"B75", Const, 0}, + {"B76800", Const, 1}, + {"B921600", Const, 0}, + {"B9600", Const, 0}, + {"BASE_PROTOCOL", Const, 2}, + {"BIOCFEEDBACK", Const, 0}, + {"BIOCFLUSH", Const, 0}, + {"BIOCGBLEN", Const, 0}, + {"BIOCGDIRECTION", Const, 0}, + {"BIOCGDIRFILT", Const, 1}, + {"BIOCGDLT", Const, 0}, + {"BIOCGDLTLIST", Const, 0}, + {"BIOCGETBUFMODE", Const, 0}, + {"BIOCGETIF", Const, 0}, + {"BIOCGETZMAX", Const, 0}, + {"BIOCGFEEDBACK", Const, 1}, + {"BIOCGFILDROP", Const, 1}, + {"BIOCGHDRCMPLT", Const, 0}, + {"BIOCGRSIG", Const, 0}, + {"BIOCGRTIMEOUT", Const, 0}, + {"BIOCGSEESENT", Const, 0}, + {"BIOCGSTATS", Const, 0}, + {"BIOCGSTATSOLD", Const, 1}, + {"BIOCGTSTAMP", Const, 1}, + {"BIOCIMMEDIATE", Const, 0}, + {"BIOCLOCK", Const, 0}, + {"BIOCPROMISC", Const, 0}, + {"BIOCROTZBUF", Const, 0}, + {"BIOCSBLEN", Const, 0}, + {"BIOCSDIRECTION", Const, 0}, + {"BIOCSDIRFILT", Const, 1}, + {"BIOCSDLT", Const, 0}, + {"BIOCSETBUFMODE", Const, 0}, + {"BIOCSETF", Const, 0}, + {"BIOCSETFNR", Const, 0}, + {"BIOCSETIF", Const, 0}, + {"BIOCSETWF", Const, 0}, + {"BIOCSETZBUF", Const, 0}, + {"BIOCSFEEDBACK", Const, 1}, + {"BIOCSFILDROP", Const, 1}, + {"BIOCSHDRCMPLT", Const, 0}, + {"BIOCSRSIG", Const, 0}, + {"BIOCSRTIMEOUT", Const, 0}, + {"BIOCSSEESENT", Const, 0}, + {"BIOCSTCPF", Const, 1}, + {"BIOCSTSTAMP", Const, 1}, + {"BIOCSUDPF", Const, 1}, + {"BIOCVERSION", Const, 0}, + {"BPF_A", Const, 0}, + {"BPF_ABS", Const, 0}, + {"BPF_ADD", Const, 0}, + {"BPF_ALIGNMENT", Const, 0}, + {"BPF_ALIGNMENT32", Const, 1}, + {"BPF_ALU", Const, 0}, + {"BPF_AND", Const, 0}, + {"BPF_B", Const, 0}, + {"BPF_BUFMODE_BUFFER", Const, 0}, + {"BPF_BUFMODE_ZBUF", Const, 0}, + {"BPF_DFLTBUFSIZE", Const, 1}, + {"BPF_DIRECTION_IN", Const, 1}, + {"BPF_DIRECTION_OUT", Const, 1}, + {"BPF_DIV", Const, 0}, + {"BPF_H", Const, 0}, + {"BPF_IMM", Const, 0}, + {"BPF_IND", Const, 0}, + {"BPF_JA", Const, 0}, + {"BPF_JEQ", Const, 0}, + {"BPF_JGE", Const, 0}, + {"BPF_JGT", Const, 0}, + {"BPF_JMP", Const, 0}, + {"BPF_JSET", Const, 0}, + {"BPF_K", Const, 0}, + {"BPF_LD", Const, 0}, + {"BPF_LDX", Const, 0}, + {"BPF_LEN", Const, 0}, + {"BPF_LSH", Const, 0}, + {"BPF_MAJOR_VERSION", Const, 0}, + {"BPF_MAXBUFSIZE", Const, 0}, + {"BPF_MAXINSNS", Const, 0}, + {"BPF_MEM", Const, 0}, + {"BPF_MEMWORDS", Const, 0}, + {"BPF_MINBUFSIZE", Const, 0}, + {"BPF_MINOR_VERSION", Const, 0}, + {"BPF_MISC", Const, 0}, + {"BPF_MSH", Const, 0}, + {"BPF_MUL", Const, 0}, + {"BPF_NEG", Const, 0}, + {"BPF_OR", Const, 0}, + {"BPF_RELEASE", Const, 0}, + {"BPF_RET", Const, 0}, + {"BPF_RSH", Const, 0}, + {"BPF_ST", Const, 0}, + {"BPF_STX", Const, 0}, + {"BPF_SUB", Const, 0}, + {"BPF_TAX", Const, 0}, + {"BPF_TXA", Const, 0}, + {"BPF_T_BINTIME", Const, 1}, + {"BPF_T_BINTIME_FAST", Const, 1}, + {"BPF_T_BINTIME_MONOTONIC", Const, 1}, + {"BPF_T_BINTIME_MONOTONIC_FAST", Const, 1}, + {"BPF_T_FAST", Const, 1}, + {"BPF_T_FLAG_MASK", Const, 1}, + {"BPF_T_FORMAT_MASK", Const, 1}, + {"BPF_T_MICROTIME", Const, 1}, + {"BPF_T_MICROTIME_FAST", Const, 1}, + {"BPF_T_MICROTIME_MONOTONIC", Const, 1}, + {"BPF_T_MICROTIME_MONOTONIC_FAST", Const, 1}, + {"BPF_T_MONOTONIC", Const, 1}, + {"BPF_T_MONOTONIC_FAST", Const, 1}, + {"BPF_T_NANOTIME", Const, 1}, + {"BPF_T_NANOTIME_FAST", Const, 1}, + {"BPF_T_NANOTIME_MONOTONIC", Const, 1}, + {"BPF_T_NANOTIME_MONOTONIC_FAST", Const, 1}, + {"BPF_T_NONE", Const, 1}, + {"BPF_T_NORMAL", Const, 1}, + {"BPF_W", Const, 0}, + {"BPF_X", Const, 0}, + {"BRKINT", Const, 0}, + {"Bind", Func, 0}, + {"BindToDevice", Func, 0}, + {"BpfBuflen", Func, 0}, + {"BpfDatalink", Func, 0}, + {"BpfHdr", Type, 0}, + {"BpfHdr.Caplen", Field, 0}, + {"BpfHdr.Datalen", Field, 0}, + {"BpfHdr.Hdrlen", Field, 0}, + {"BpfHdr.Pad_cgo_0", Field, 0}, + {"BpfHdr.Tstamp", Field, 0}, + {"BpfHeadercmpl", Func, 0}, + {"BpfInsn", Type, 0}, + {"BpfInsn.Code", Field, 0}, + {"BpfInsn.Jf", Field, 0}, + {"BpfInsn.Jt", Field, 0}, + {"BpfInsn.K", Field, 0}, + {"BpfInterface", Func, 0}, + {"BpfJump", Func, 0}, + {"BpfProgram", Type, 0}, + {"BpfProgram.Insns", Field, 0}, + {"BpfProgram.Len", Field, 0}, + {"BpfProgram.Pad_cgo_0", Field, 0}, + {"BpfStat", Type, 0}, + {"BpfStat.Capt", Field, 2}, + {"BpfStat.Drop", Field, 0}, + {"BpfStat.Padding", Field, 2}, + {"BpfStat.Recv", Field, 0}, + {"BpfStats", Func, 0}, + {"BpfStmt", Func, 0}, + {"BpfTimeout", Func, 0}, + {"BpfTimeval", Type, 2}, + {"BpfTimeval.Sec", Field, 2}, + {"BpfTimeval.Usec", Field, 2}, + {"BpfVersion", Type, 0}, + {"BpfVersion.Major", Field, 0}, + {"BpfVersion.Minor", Field, 0}, + {"BpfZbuf", Type, 0}, + {"BpfZbuf.Bufa", Field, 0}, + {"BpfZbuf.Bufb", Field, 0}, + {"BpfZbuf.Buflen", Field, 0}, + {"BpfZbufHeader", Type, 0}, + {"BpfZbufHeader.Kernel_gen", Field, 0}, + {"BpfZbufHeader.Kernel_len", Field, 0}, + {"BpfZbufHeader.User_gen", Field, 0}, + {"BpfZbufHeader.X_bzh_pad", Field, 0}, + {"ByHandleFileInformation", Type, 0}, + {"ByHandleFileInformation.CreationTime", Field, 0}, + {"ByHandleFileInformation.FileAttributes", Field, 0}, + {"ByHandleFileInformation.FileIndexHigh", Field, 0}, + {"ByHandleFileInformation.FileIndexLow", Field, 0}, + {"ByHandleFileInformation.FileSizeHigh", Field, 0}, + {"ByHandleFileInformation.FileSizeLow", Field, 0}, + {"ByHandleFileInformation.LastAccessTime", Field, 0}, + {"ByHandleFileInformation.LastWriteTime", Field, 0}, + {"ByHandleFileInformation.NumberOfLinks", Field, 0}, + {"ByHandleFileInformation.VolumeSerialNumber", Field, 0}, + {"BytePtrFromString", Func, 1}, + {"ByteSliceFromString", Func, 1}, + {"CCR0_FLUSH", Const, 1}, + {"CERT_CHAIN_POLICY_AUTHENTICODE", Const, 0}, + {"CERT_CHAIN_POLICY_AUTHENTICODE_TS", Const, 0}, + {"CERT_CHAIN_POLICY_BASE", Const, 0}, + {"CERT_CHAIN_POLICY_BASIC_CONSTRAINTS", Const, 0}, + {"CERT_CHAIN_POLICY_EV", Const, 0}, + {"CERT_CHAIN_POLICY_MICROSOFT_ROOT", Const, 0}, + {"CERT_CHAIN_POLICY_NT_AUTH", Const, 0}, + {"CERT_CHAIN_POLICY_SSL", Const, 0}, + {"CERT_E_CN_NO_MATCH", Const, 0}, + {"CERT_E_EXPIRED", Const, 0}, + {"CERT_E_PURPOSE", Const, 0}, + {"CERT_E_ROLE", Const, 0}, + {"CERT_E_UNTRUSTEDROOT", Const, 0}, + {"CERT_STORE_ADD_ALWAYS", Const, 0}, + {"CERT_STORE_DEFER_CLOSE_UNTIL_LAST_FREE_FLAG", Const, 0}, + {"CERT_STORE_PROV_MEMORY", Const, 0}, + {"CERT_TRUST_HAS_EXCLUDED_NAME_CONSTRAINT", Const, 0}, + {"CERT_TRUST_HAS_NOT_DEFINED_NAME_CONSTRAINT", Const, 0}, + {"CERT_TRUST_HAS_NOT_PERMITTED_NAME_CONSTRAINT", Const, 0}, + {"CERT_TRUST_HAS_NOT_SUPPORTED_CRITICAL_EXT", Const, 0}, + {"CERT_TRUST_HAS_NOT_SUPPORTED_NAME_CONSTRAINT", Const, 0}, + {"CERT_TRUST_INVALID_BASIC_CONSTRAINTS", Const, 0}, + {"CERT_TRUST_INVALID_EXTENSION", Const, 0}, + {"CERT_TRUST_INVALID_NAME_CONSTRAINTS", Const, 0}, + {"CERT_TRUST_INVALID_POLICY_CONSTRAINTS", Const, 0}, + {"CERT_TRUST_IS_CYCLIC", Const, 0}, + {"CERT_TRUST_IS_EXPLICIT_DISTRUST", Const, 0}, + {"CERT_TRUST_IS_NOT_SIGNATURE_VALID", Const, 0}, + {"CERT_TRUST_IS_NOT_TIME_VALID", Const, 0}, + {"CERT_TRUST_IS_NOT_VALID_FOR_USAGE", Const, 0}, + {"CERT_TRUST_IS_OFFLINE_REVOCATION", Const, 0}, + {"CERT_TRUST_IS_REVOKED", Const, 0}, + {"CERT_TRUST_IS_UNTRUSTED_ROOT", Const, 0}, + {"CERT_TRUST_NO_ERROR", Const, 0}, + {"CERT_TRUST_NO_ISSUANCE_CHAIN_POLICY", Const, 0}, + {"CERT_TRUST_REVOCATION_STATUS_UNKNOWN", Const, 0}, + {"CFLUSH", Const, 1}, + {"CLOCAL", Const, 0}, + {"CLONE_CHILD_CLEARTID", Const, 2}, + {"CLONE_CHILD_SETTID", Const, 2}, + {"CLONE_CLEAR_SIGHAND", Const, 20}, + {"CLONE_CSIGNAL", Const, 3}, + {"CLONE_DETACHED", Const, 2}, + {"CLONE_FILES", Const, 2}, + {"CLONE_FS", Const, 2}, + {"CLONE_INTO_CGROUP", Const, 20}, + {"CLONE_IO", Const, 2}, + {"CLONE_NEWCGROUP", Const, 20}, + {"CLONE_NEWIPC", Const, 2}, + {"CLONE_NEWNET", Const, 2}, + {"CLONE_NEWNS", Const, 2}, + {"CLONE_NEWPID", Const, 2}, + {"CLONE_NEWTIME", Const, 20}, + {"CLONE_NEWUSER", Const, 2}, + {"CLONE_NEWUTS", Const, 2}, + {"CLONE_PARENT", Const, 2}, + {"CLONE_PARENT_SETTID", Const, 2}, + {"CLONE_PID", Const, 3}, + {"CLONE_PIDFD", Const, 20}, + {"CLONE_PTRACE", Const, 2}, + {"CLONE_SETTLS", Const, 2}, + {"CLONE_SIGHAND", Const, 2}, + {"CLONE_SYSVSEM", Const, 2}, + {"CLONE_THREAD", Const, 2}, + {"CLONE_UNTRACED", Const, 2}, + {"CLONE_VFORK", Const, 2}, + {"CLONE_VM", Const, 2}, + {"CPUID_CFLUSH", Const, 1}, + {"CREAD", Const, 0}, + {"CREATE_ALWAYS", Const, 0}, + {"CREATE_NEW", Const, 0}, + {"CREATE_NEW_PROCESS_GROUP", Const, 1}, + {"CREATE_UNICODE_ENVIRONMENT", Const, 0}, + {"CRYPT_DEFAULT_CONTAINER_OPTIONAL", Const, 0}, + {"CRYPT_DELETEKEYSET", Const, 0}, + {"CRYPT_MACHINE_KEYSET", Const, 0}, + {"CRYPT_NEWKEYSET", Const, 0}, + {"CRYPT_SILENT", Const, 0}, + {"CRYPT_VERIFYCONTEXT", Const, 0}, + {"CS5", Const, 0}, + {"CS6", Const, 0}, + {"CS7", Const, 0}, + {"CS8", Const, 0}, + {"CSIZE", Const, 0}, + {"CSTART", Const, 1}, + {"CSTATUS", Const, 1}, + {"CSTOP", Const, 1}, + {"CSTOPB", Const, 0}, + {"CSUSP", Const, 1}, + {"CTL_MAXNAME", Const, 0}, + {"CTL_NET", Const, 0}, + {"CTL_QUERY", Const, 1}, + {"CTRL_BREAK_EVENT", Const, 1}, + {"CTRL_CLOSE_EVENT", Const, 14}, + {"CTRL_C_EVENT", Const, 1}, + {"CTRL_LOGOFF_EVENT", Const, 14}, + {"CTRL_SHUTDOWN_EVENT", Const, 14}, + {"CancelIo", Func, 0}, + {"CancelIoEx", Func, 1}, + {"CertAddCertificateContextToStore", Func, 0}, + {"CertChainContext", Type, 0}, + {"CertChainContext.ChainCount", Field, 0}, + {"CertChainContext.Chains", Field, 0}, + {"CertChainContext.HasRevocationFreshnessTime", Field, 0}, + {"CertChainContext.LowerQualityChainCount", Field, 0}, + {"CertChainContext.LowerQualityChains", Field, 0}, + {"CertChainContext.RevocationFreshnessTime", Field, 0}, + {"CertChainContext.Size", Field, 0}, + {"CertChainContext.TrustStatus", Field, 0}, + {"CertChainElement", Type, 0}, + {"CertChainElement.ApplicationUsage", Field, 0}, + {"CertChainElement.CertContext", Field, 0}, + {"CertChainElement.ExtendedErrorInfo", Field, 0}, + {"CertChainElement.IssuanceUsage", Field, 0}, + {"CertChainElement.RevocationInfo", Field, 0}, + {"CertChainElement.Size", Field, 0}, + {"CertChainElement.TrustStatus", Field, 0}, + {"CertChainPara", Type, 0}, + {"CertChainPara.CacheResync", Field, 0}, + {"CertChainPara.CheckRevocationFreshnessTime", Field, 0}, + {"CertChainPara.RequestedUsage", Field, 0}, + {"CertChainPara.RequstedIssuancePolicy", Field, 0}, + {"CertChainPara.RevocationFreshnessTime", Field, 0}, + {"CertChainPara.Size", Field, 0}, + {"CertChainPara.URLRetrievalTimeout", Field, 0}, + {"CertChainPolicyPara", Type, 0}, + {"CertChainPolicyPara.ExtraPolicyPara", Field, 0}, + {"CertChainPolicyPara.Flags", Field, 0}, + {"CertChainPolicyPara.Size", Field, 0}, + {"CertChainPolicyStatus", Type, 0}, + {"CertChainPolicyStatus.ChainIndex", Field, 0}, + {"CertChainPolicyStatus.ElementIndex", Field, 0}, + {"CertChainPolicyStatus.Error", Field, 0}, + {"CertChainPolicyStatus.ExtraPolicyStatus", Field, 0}, + {"CertChainPolicyStatus.Size", Field, 0}, + {"CertCloseStore", Func, 0}, + {"CertContext", Type, 0}, + {"CertContext.CertInfo", Field, 0}, + {"CertContext.EncodedCert", Field, 0}, + {"CertContext.EncodingType", Field, 0}, + {"CertContext.Length", Field, 0}, + {"CertContext.Store", Field, 0}, + {"CertCreateCertificateContext", Func, 0}, + {"CertEnhKeyUsage", Type, 0}, + {"CertEnhKeyUsage.Length", Field, 0}, + {"CertEnhKeyUsage.UsageIdentifiers", Field, 0}, + {"CertEnumCertificatesInStore", Func, 0}, + {"CertFreeCertificateChain", Func, 0}, + {"CertFreeCertificateContext", Func, 0}, + {"CertGetCertificateChain", Func, 0}, + {"CertInfo", Type, 11}, + {"CertOpenStore", Func, 0}, + {"CertOpenSystemStore", Func, 0}, + {"CertRevocationCrlInfo", Type, 11}, + {"CertRevocationInfo", Type, 0}, + {"CertRevocationInfo.CrlInfo", Field, 0}, + {"CertRevocationInfo.FreshnessTime", Field, 0}, + {"CertRevocationInfo.HasFreshnessTime", Field, 0}, + {"CertRevocationInfo.OidSpecificInfo", Field, 0}, + {"CertRevocationInfo.RevocationOid", Field, 0}, + {"CertRevocationInfo.RevocationResult", Field, 0}, + {"CertRevocationInfo.Size", Field, 0}, + {"CertSimpleChain", Type, 0}, + {"CertSimpleChain.Elements", Field, 0}, + {"CertSimpleChain.HasRevocationFreshnessTime", Field, 0}, + {"CertSimpleChain.NumElements", Field, 0}, + {"CertSimpleChain.RevocationFreshnessTime", Field, 0}, + {"CertSimpleChain.Size", Field, 0}, + {"CertSimpleChain.TrustListInfo", Field, 0}, + {"CertSimpleChain.TrustStatus", Field, 0}, + {"CertTrustListInfo", Type, 11}, + {"CertTrustStatus", Type, 0}, + {"CertTrustStatus.ErrorStatus", Field, 0}, + {"CertTrustStatus.InfoStatus", Field, 0}, + {"CertUsageMatch", Type, 0}, + {"CertUsageMatch.Type", Field, 0}, + {"CertUsageMatch.Usage", Field, 0}, + {"CertVerifyCertificateChainPolicy", Func, 0}, + {"Chdir", Func, 0}, + {"CheckBpfVersion", Func, 0}, + {"Chflags", Func, 0}, + {"Chmod", Func, 0}, + {"Chown", Func, 0}, + {"Chroot", Func, 0}, + {"Clearenv", Func, 0}, + {"Close", Func, 0}, + {"CloseHandle", Func, 0}, + {"CloseOnExec", Func, 0}, + {"Closesocket", Func, 0}, + {"CmsgLen", Func, 0}, + {"CmsgSpace", Func, 0}, + {"Cmsghdr", Type, 0}, + {"Cmsghdr.Len", Field, 0}, + {"Cmsghdr.Level", Field, 0}, + {"Cmsghdr.Type", Field, 0}, + {"Cmsghdr.X__cmsg_data", Field, 0}, + {"CommandLineToArgv", Func, 0}, + {"ComputerName", Func, 0}, + {"Conn", Type, 9}, + {"Connect", Func, 0}, + {"ConnectEx", Func, 1}, + {"ConvertSidToStringSid", Func, 0}, + {"ConvertStringSidToSid", Func, 0}, + {"CopySid", Func, 0}, + {"Creat", Func, 0}, + {"CreateDirectory", Func, 0}, + {"CreateFile", Func, 0}, + {"CreateFileMapping", Func, 0}, + {"CreateHardLink", Func, 4}, + {"CreateIoCompletionPort", Func, 0}, + {"CreatePipe", Func, 0}, + {"CreateProcess", Func, 0}, + {"CreateProcessAsUser", Func, 10}, + {"CreateSymbolicLink", Func, 4}, + {"CreateToolhelp32Snapshot", Func, 4}, + {"Credential", Type, 0}, + {"Credential.Gid", Field, 0}, + {"Credential.Groups", Field, 0}, + {"Credential.NoSetGroups", Field, 9}, + {"Credential.Uid", Field, 0}, + {"CryptAcquireContext", Func, 0}, + {"CryptGenRandom", Func, 0}, + {"CryptReleaseContext", Func, 0}, + {"DIOCBSFLUSH", Const, 1}, + {"DIOCOSFPFLUSH", Const, 1}, + {"DLL", Type, 0}, + {"DLL.Handle", Field, 0}, + {"DLL.Name", Field, 0}, + {"DLLError", Type, 0}, + {"DLLError.Err", Field, 0}, + {"DLLError.Msg", Field, 0}, + {"DLLError.ObjName", Field, 0}, + {"DLT_A429", Const, 0}, + {"DLT_A653_ICM", Const, 0}, + {"DLT_AIRONET_HEADER", Const, 0}, + {"DLT_AOS", Const, 1}, + {"DLT_APPLE_IP_OVER_IEEE1394", Const, 0}, + {"DLT_ARCNET", Const, 0}, + {"DLT_ARCNET_LINUX", Const, 0}, + {"DLT_ATM_CLIP", Const, 0}, + {"DLT_ATM_RFC1483", Const, 0}, + {"DLT_AURORA", Const, 0}, + {"DLT_AX25", Const, 0}, + {"DLT_AX25_KISS", Const, 0}, + {"DLT_BACNET_MS_TP", Const, 0}, + {"DLT_BLUETOOTH_HCI_H4", Const, 0}, + {"DLT_BLUETOOTH_HCI_H4_WITH_PHDR", Const, 0}, + {"DLT_CAN20B", Const, 0}, + {"DLT_CAN_SOCKETCAN", Const, 1}, + {"DLT_CHAOS", Const, 0}, + {"DLT_CHDLC", Const, 0}, + {"DLT_CISCO_IOS", Const, 0}, + {"DLT_C_HDLC", Const, 0}, + {"DLT_C_HDLC_WITH_DIR", Const, 0}, + {"DLT_DBUS", Const, 1}, + {"DLT_DECT", Const, 1}, + {"DLT_DOCSIS", Const, 0}, + {"DLT_DVB_CI", Const, 1}, + {"DLT_ECONET", Const, 0}, + {"DLT_EN10MB", Const, 0}, + {"DLT_EN3MB", Const, 0}, + {"DLT_ENC", Const, 0}, + {"DLT_ERF", Const, 0}, + {"DLT_ERF_ETH", Const, 0}, + {"DLT_ERF_POS", Const, 0}, + {"DLT_FC_2", Const, 1}, + {"DLT_FC_2_WITH_FRAME_DELIMS", Const, 1}, + {"DLT_FDDI", Const, 0}, + {"DLT_FLEXRAY", Const, 0}, + {"DLT_FRELAY", Const, 0}, + {"DLT_FRELAY_WITH_DIR", Const, 0}, + {"DLT_GCOM_SERIAL", Const, 0}, + {"DLT_GCOM_T1E1", Const, 0}, + {"DLT_GPF_F", Const, 0}, + {"DLT_GPF_T", Const, 0}, + {"DLT_GPRS_LLC", Const, 0}, + {"DLT_GSMTAP_ABIS", Const, 1}, + {"DLT_GSMTAP_UM", Const, 1}, + {"DLT_HDLC", Const, 1}, + {"DLT_HHDLC", Const, 0}, + {"DLT_HIPPI", Const, 1}, + {"DLT_IBM_SN", Const, 0}, + {"DLT_IBM_SP", Const, 0}, + {"DLT_IEEE802", Const, 0}, + {"DLT_IEEE802_11", Const, 0}, + {"DLT_IEEE802_11_RADIO", Const, 0}, + {"DLT_IEEE802_11_RADIO_AVS", Const, 0}, + {"DLT_IEEE802_15_4", Const, 0}, + {"DLT_IEEE802_15_4_LINUX", Const, 0}, + {"DLT_IEEE802_15_4_NOFCS", Const, 1}, + {"DLT_IEEE802_15_4_NONASK_PHY", Const, 0}, + {"DLT_IEEE802_16_MAC_CPS", Const, 0}, + {"DLT_IEEE802_16_MAC_CPS_RADIO", Const, 0}, + {"DLT_IPFILTER", Const, 0}, + {"DLT_IPMB", Const, 0}, + {"DLT_IPMB_LINUX", Const, 0}, + {"DLT_IPNET", Const, 1}, + {"DLT_IPOIB", Const, 1}, + {"DLT_IPV4", Const, 1}, + {"DLT_IPV6", Const, 1}, + {"DLT_IP_OVER_FC", Const, 0}, + {"DLT_JUNIPER_ATM1", Const, 0}, + {"DLT_JUNIPER_ATM2", Const, 0}, + {"DLT_JUNIPER_ATM_CEMIC", Const, 1}, + {"DLT_JUNIPER_CHDLC", Const, 0}, + {"DLT_JUNIPER_ES", Const, 0}, + {"DLT_JUNIPER_ETHER", Const, 0}, + {"DLT_JUNIPER_FIBRECHANNEL", Const, 1}, + {"DLT_JUNIPER_FRELAY", Const, 0}, + {"DLT_JUNIPER_GGSN", Const, 0}, + {"DLT_JUNIPER_ISM", Const, 0}, + {"DLT_JUNIPER_MFR", Const, 0}, + {"DLT_JUNIPER_MLFR", Const, 0}, + {"DLT_JUNIPER_MLPPP", Const, 0}, + {"DLT_JUNIPER_MONITOR", Const, 0}, + {"DLT_JUNIPER_PIC_PEER", Const, 0}, + {"DLT_JUNIPER_PPP", Const, 0}, + {"DLT_JUNIPER_PPPOE", Const, 0}, + {"DLT_JUNIPER_PPPOE_ATM", Const, 0}, + {"DLT_JUNIPER_SERVICES", Const, 0}, + {"DLT_JUNIPER_SRX_E2E", Const, 1}, + {"DLT_JUNIPER_ST", Const, 0}, + {"DLT_JUNIPER_VP", Const, 0}, + {"DLT_JUNIPER_VS", Const, 1}, + {"DLT_LAPB_WITH_DIR", Const, 0}, + {"DLT_LAPD", Const, 0}, + {"DLT_LIN", Const, 0}, + {"DLT_LINUX_EVDEV", Const, 1}, + {"DLT_LINUX_IRDA", Const, 0}, + {"DLT_LINUX_LAPD", Const, 0}, + {"DLT_LINUX_PPP_WITHDIRECTION", Const, 0}, + {"DLT_LINUX_SLL", Const, 0}, + {"DLT_LOOP", Const, 0}, + {"DLT_LTALK", Const, 0}, + {"DLT_MATCHING_MAX", Const, 1}, + {"DLT_MATCHING_MIN", Const, 1}, + {"DLT_MFR", Const, 0}, + {"DLT_MOST", Const, 0}, + {"DLT_MPEG_2_TS", Const, 1}, + {"DLT_MPLS", Const, 1}, + {"DLT_MTP2", Const, 0}, + {"DLT_MTP2_WITH_PHDR", Const, 0}, + {"DLT_MTP3", Const, 0}, + {"DLT_MUX27010", Const, 1}, + {"DLT_NETANALYZER", Const, 1}, + {"DLT_NETANALYZER_TRANSPARENT", Const, 1}, + {"DLT_NFC_LLCP", Const, 1}, + {"DLT_NFLOG", Const, 1}, + {"DLT_NG40", Const, 1}, + {"DLT_NULL", Const, 0}, + {"DLT_PCI_EXP", Const, 0}, + {"DLT_PFLOG", Const, 0}, + {"DLT_PFSYNC", Const, 0}, + {"DLT_PPI", Const, 0}, + {"DLT_PPP", Const, 0}, + {"DLT_PPP_BSDOS", Const, 0}, + {"DLT_PPP_ETHER", Const, 0}, + {"DLT_PPP_PPPD", Const, 0}, + {"DLT_PPP_SERIAL", Const, 0}, + {"DLT_PPP_WITH_DIR", Const, 0}, + {"DLT_PPP_WITH_DIRECTION", Const, 0}, + {"DLT_PRISM_HEADER", Const, 0}, + {"DLT_PRONET", Const, 0}, + {"DLT_RAIF1", Const, 0}, + {"DLT_RAW", Const, 0}, + {"DLT_RAWAF_MASK", Const, 1}, + {"DLT_RIO", Const, 0}, + {"DLT_SCCP", Const, 0}, + {"DLT_SITA", Const, 0}, + {"DLT_SLIP", Const, 0}, + {"DLT_SLIP_BSDOS", Const, 0}, + {"DLT_STANAG_5066_D_PDU", Const, 1}, + {"DLT_SUNATM", Const, 0}, + {"DLT_SYMANTEC_FIREWALL", Const, 0}, + {"DLT_TZSP", Const, 0}, + {"DLT_USB", Const, 0}, + {"DLT_USB_LINUX", Const, 0}, + {"DLT_USB_LINUX_MMAPPED", Const, 1}, + {"DLT_USER0", Const, 0}, + {"DLT_USER1", Const, 0}, + {"DLT_USER10", Const, 0}, + {"DLT_USER11", Const, 0}, + {"DLT_USER12", Const, 0}, + {"DLT_USER13", Const, 0}, + {"DLT_USER14", Const, 0}, + {"DLT_USER15", Const, 0}, + {"DLT_USER2", Const, 0}, + {"DLT_USER3", Const, 0}, + {"DLT_USER4", Const, 0}, + {"DLT_USER5", Const, 0}, + {"DLT_USER6", Const, 0}, + {"DLT_USER7", Const, 0}, + {"DLT_USER8", Const, 0}, + {"DLT_USER9", Const, 0}, + {"DLT_WIHART", Const, 1}, + {"DLT_X2E_SERIAL", Const, 0}, + {"DLT_X2E_XORAYA", Const, 0}, + {"DNSMXData", Type, 0}, + {"DNSMXData.NameExchange", Field, 0}, + {"DNSMXData.Pad", Field, 0}, + {"DNSMXData.Preference", Field, 0}, + {"DNSPTRData", Type, 0}, + {"DNSPTRData.Host", Field, 0}, + {"DNSRecord", Type, 0}, + {"DNSRecord.Data", Field, 0}, + {"DNSRecord.Dw", Field, 0}, + {"DNSRecord.Length", Field, 0}, + {"DNSRecord.Name", Field, 0}, + {"DNSRecord.Next", Field, 0}, + {"DNSRecord.Reserved", Field, 0}, + {"DNSRecord.Ttl", Field, 0}, + {"DNSRecord.Type", Field, 0}, + {"DNSSRVData", Type, 0}, + {"DNSSRVData.Pad", Field, 0}, + {"DNSSRVData.Port", Field, 0}, + {"DNSSRVData.Priority", Field, 0}, + {"DNSSRVData.Target", Field, 0}, + {"DNSSRVData.Weight", Field, 0}, + {"DNSTXTData", Type, 0}, + {"DNSTXTData.StringArray", Field, 0}, + {"DNSTXTData.StringCount", Field, 0}, + {"DNS_INFO_NO_RECORDS", Const, 4}, + {"DNS_TYPE_A", Const, 0}, + {"DNS_TYPE_A6", Const, 0}, + {"DNS_TYPE_AAAA", Const, 0}, + {"DNS_TYPE_ADDRS", Const, 0}, + {"DNS_TYPE_AFSDB", Const, 0}, + {"DNS_TYPE_ALL", Const, 0}, + {"DNS_TYPE_ANY", Const, 0}, + {"DNS_TYPE_ATMA", Const, 0}, + {"DNS_TYPE_AXFR", Const, 0}, + {"DNS_TYPE_CERT", Const, 0}, + {"DNS_TYPE_CNAME", Const, 0}, + {"DNS_TYPE_DHCID", Const, 0}, + {"DNS_TYPE_DNAME", Const, 0}, + {"DNS_TYPE_DNSKEY", Const, 0}, + {"DNS_TYPE_DS", Const, 0}, + {"DNS_TYPE_EID", Const, 0}, + {"DNS_TYPE_GID", Const, 0}, + {"DNS_TYPE_GPOS", Const, 0}, + {"DNS_TYPE_HINFO", Const, 0}, + {"DNS_TYPE_ISDN", Const, 0}, + {"DNS_TYPE_IXFR", Const, 0}, + {"DNS_TYPE_KEY", Const, 0}, + {"DNS_TYPE_KX", Const, 0}, + {"DNS_TYPE_LOC", Const, 0}, + {"DNS_TYPE_MAILA", Const, 0}, + {"DNS_TYPE_MAILB", Const, 0}, + {"DNS_TYPE_MB", Const, 0}, + {"DNS_TYPE_MD", Const, 0}, + {"DNS_TYPE_MF", Const, 0}, + {"DNS_TYPE_MG", Const, 0}, + {"DNS_TYPE_MINFO", Const, 0}, + {"DNS_TYPE_MR", Const, 0}, + {"DNS_TYPE_MX", Const, 0}, + {"DNS_TYPE_NAPTR", Const, 0}, + {"DNS_TYPE_NBSTAT", Const, 0}, + {"DNS_TYPE_NIMLOC", Const, 0}, + {"DNS_TYPE_NS", Const, 0}, + {"DNS_TYPE_NSAP", Const, 0}, + {"DNS_TYPE_NSAPPTR", Const, 0}, + {"DNS_TYPE_NSEC", Const, 0}, + {"DNS_TYPE_NULL", Const, 0}, + {"DNS_TYPE_NXT", Const, 0}, + {"DNS_TYPE_OPT", Const, 0}, + {"DNS_TYPE_PTR", Const, 0}, + {"DNS_TYPE_PX", Const, 0}, + {"DNS_TYPE_RP", Const, 0}, + {"DNS_TYPE_RRSIG", Const, 0}, + {"DNS_TYPE_RT", Const, 0}, + {"DNS_TYPE_SIG", Const, 0}, + {"DNS_TYPE_SINK", Const, 0}, + {"DNS_TYPE_SOA", Const, 0}, + {"DNS_TYPE_SRV", Const, 0}, + {"DNS_TYPE_TEXT", Const, 0}, + {"DNS_TYPE_TKEY", Const, 0}, + {"DNS_TYPE_TSIG", Const, 0}, + {"DNS_TYPE_UID", Const, 0}, + {"DNS_TYPE_UINFO", Const, 0}, + {"DNS_TYPE_UNSPEC", Const, 0}, + {"DNS_TYPE_WINS", Const, 0}, + {"DNS_TYPE_WINSR", Const, 0}, + {"DNS_TYPE_WKS", Const, 0}, + {"DNS_TYPE_X25", Const, 0}, + {"DT_BLK", Const, 0}, + {"DT_CHR", Const, 0}, + {"DT_DIR", Const, 0}, + {"DT_FIFO", Const, 0}, + {"DT_LNK", Const, 0}, + {"DT_REG", Const, 0}, + {"DT_SOCK", Const, 0}, + {"DT_UNKNOWN", Const, 0}, + {"DT_WHT", Const, 0}, + {"DUPLICATE_CLOSE_SOURCE", Const, 0}, + {"DUPLICATE_SAME_ACCESS", Const, 0}, + {"DeleteFile", Func, 0}, + {"DetachLsf", Func, 0}, + {"DeviceIoControl", Func, 4}, + {"Dirent", Type, 0}, + {"Dirent.Fileno", Field, 0}, + {"Dirent.Ino", Field, 0}, + {"Dirent.Name", Field, 0}, + {"Dirent.Namlen", Field, 0}, + {"Dirent.Off", Field, 0}, + {"Dirent.Pad0", Field, 12}, + {"Dirent.Pad1", Field, 12}, + {"Dirent.Pad_cgo_0", Field, 0}, + {"Dirent.Reclen", Field, 0}, + {"Dirent.Seekoff", Field, 0}, + {"Dirent.Type", Field, 0}, + {"Dirent.X__d_padding", Field, 3}, + {"DnsNameCompare", Func, 4}, + {"DnsQuery", Func, 0}, + {"DnsRecordListFree", Func, 0}, + {"DnsSectionAdditional", Const, 4}, + {"DnsSectionAnswer", Const, 4}, + {"DnsSectionAuthority", Const, 4}, + {"DnsSectionQuestion", Const, 4}, + {"Dup", Func, 0}, + {"Dup2", Func, 0}, + {"Dup3", Func, 2}, + {"DuplicateHandle", Func, 0}, + {"E2BIG", Const, 0}, + {"EACCES", Const, 0}, + {"EADDRINUSE", Const, 0}, + {"EADDRNOTAVAIL", Const, 0}, + {"EADV", Const, 0}, + {"EAFNOSUPPORT", Const, 0}, + {"EAGAIN", Const, 0}, + {"EALREADY", Const, 0}, + {"EAUTH", Const, 0}, + {"EBADARCH", Const, 0}, + {"EBADE", Const, 0}, + {"EBADEXEC", Const, 0}, + {"EBADF", Const, 0}, + {"EBADFD", Const, 0}, + {"EBADMACHO", Const, 0}, + {"EBADMSG", Const, 0}, + {"EBADR", Const, 0}, + {"EBADRPC", Const, 0}, + {"EBADRQC", Const, 0}, + {"EBADSLT", Const, 0}, + {"EBFONT", Const, 0}, + {"EBUSY", Const, 0}, + {"ECANCELED", Const, 0}, + {"ECAPMODE", Const, 1}, + {"ECHILD", Const, 0}, + {"ECHO", Const, 0}, + {"ECHOCTL", Const, 0}, + {"ECHOE", Const, 0}, + {"ECHOK", Const, 0}, + {"ECHOKE", Const, 0}, + {"ECHONL", Const, 0}, + {"ECHOPRT", Const, 0}, + {"ECHRNG", Const, 0}, + {"ECOMM", Const, 0}, + {"ECONNABORTED", Const, 0}, + {"ECONNREFUSED", Const, 0}, + {"ECONNRESET", Const, 0}, + {"EDEADLK", Const, 0}, + {"EDEADLOCK", Const, 0}, + {"EDESTADDRREQ", Const, 0}, + {"EDEVERR", Const, 0}, + {"EDOM", Const, 0}, + {"EDOOFUS", Const, 0}, + {"EDOTDOT", Const, 0}, + {"EDQUOT", Const, 0}, + {"EEXIST", Const, 0}, + {"EFAULT", Const, 0}, + {"EFBIG", Const, 0}, + {"EFER_LMA", Const, 1}, + {"EFER_LME", Const, 1}, + {"EFER_NXE", Const, 1}, + {"EFER_SCE", Const, 1}, + {"EFTYPE", Const, 0}, + {"EHOSTDOWN", Const, 0}, + {"EHOSTUNREACH", Const, 0}, + {"EHWPOISON", Const, 0}, + {"EIDRM", Const, 0}, + {"EILSEQ", Const, 0}, + {"EINPROGRESS", Const, 0}, + {"EINTR", Const, 0}, + {"EINVAL", Const, 0}, + {"EIO", Const, 0}, + {"EIPSEC", Const, 1}, + {"EISCONN", Const, 0}, + {"EISDIR", Const, 0}, + {"EISNAM", Const, 0}, + {"EKEYEXPIRED", Const, 0}, + {"EKEYREJECTED", Const, 0}, + {"EKEYREVOKED", Const, 0}, + {"EL2HLT", Const, 0}, + {"EL2NSYNC", Const, 0}, + {"EL3HLT", Const, 0}, + {"EL3RST", Const, 0}, + {"ELAST", Const, 0}, + {"ELF_NGREG", Const, 0}, + {"ELF_PRARGSZ", Const, 0}, + {"ELIBACC", Const, 0}, + {"ELIBBAD", Const, 0}, + {"ELIBEXEC", Const, 0}, + {"ELIBMAX", Const, 0}, + {"ELIBSCN", Const, 0}, + {"ELNRNG", Const, 0}, + {"ELOOP", Const, 0}, + {"EMEDIUMTYPE", Const, 0}, + {"EMFILE", Const, 0}, + {"EMLINK", Const, 0}, + {"EMSGSIZE", Const, 0}, + {"EMT_TAGOVF", Const, 1}, + {"EMULTIHOP", Const, 0}, + {"EMUL_ENABLED", Const, 1}, + {"EMUL_LINUX", Const, 1}, + {"EMUL_LINUX32", Const, 1}, + {"EMUL_MAXID", Const, 1}, + {"EMUL_NATIVE", Const, 1}, + {"ENAMETOOLONG", Const, 0}, + {"ENAVAIL", Const, 0}, + {"ENDRUNDISC", Const, 1}, + {"ENEEDAUTH", Const, 0}, + {"ENETDOWN", Const, 0}, + {"ENETRESET", Const, 0}, + {"ENETUNREACH", Const, 0}, + {"ENFILE", Const, 0}, + {"ENOANO", Const, 0}, + {"ENOATTR", Const, 0}, + {"ENOBUFS", Const, 0}, + {"ENOCSI", Const, 0}, + {"ENODATA", Const, 0}, + {"ENODEV", Const, 0}, + {"ENOENT", Const, 0}, + {"ENOEXEC", Const, 0}, + {"ENOKEY", Const, 0}, + {"ENOLCK", Const, 0}, + {"ENOLINK", Const, 0}, + {"ENOMEDIUM", Const, 0}, + {"ENOMEM", Const, 0}, + {"ENOMSG", Const, 0}, + {"ENONET", Const, 0}, + {"ENOPKG", Const, 0}, + {"ENOPOLICY", Const, 0}, + {"ENOPROTOOPT", Const, 0}, + {"ENOSPC", Const, 0}, + {"ENOSR", Const, 0}, + {"ENOSTR", Const, 0}, + {"ENOSYS", Const, 0}, + {"ENOTBLK", Const, 0}, + {"ENOTCAPABLE", Const, 0}, + {"ENOTCONN", Const, 0}, + {"ENOTDIR", Const, 0}, + {"ENOTEMPTY", Const, 0}, + {"ENOTNAM", Const, 0}, + {"ENOTRECOVERABLE", Const, 0}, + {"ENOTSOCK", Const, 0}, + {"ENOTSUP", Const, 0}, + {"ENOTTY", Const, 0}, + {"ENOTUNIQ", Const, 0}, + {"ENXIO", Const, 0}, + {"EN_SW_CTL_INF", Const, 1}, + {"EN_SW_CTL_PREC", Const, 1}, + {"EN_SW_CTL_ROUND", Const, 1}, + {"EN_SW_DATACHAIN", Const, 1}, + {"EN_SW_DENORM", Const, 1}, + {"EN_SW_INVOP", Const, 1}, + {"EN_SW_OVERFLOW", Const, 1}, + {"EN_SW_PRECLOSS", Const, 1}, + {"EN_SW_UNDERFLOW", Const, 1}, + {"EN_SW_ZERODIV", Const, 1}, + {"EOPNOTSUPP", Const, 0}, + {"EOVERFLOW", Const, 0}, + {"EOWNERDEAD", Const, 0}, + {"EPERM", Const, 0}, + {"EPFNOSUPPORT", Const, 0}, + {"EPIPE", Const, 0}, + {"EPOLLERR", Const, 0}, + {"EPOLLET", Const, 0}, + {"EPOLLHUP", Const, 0}, + {"EPOLLIN", Const, 0}, + {"EPOLLMSG", Const, 0}, + {"EPOLLONESHOT", Const, 0}, + {"EPOLLOUT", Const, 0}, + {"EPOLLPRI", Const, 0}, + {"EPOLLRDBAND", Const, 0}, + {"EPOLLRDHUP", Const, 0}, + {"EPOLLRDNORM", Const, 0}, + {"EPOLLWRBAND", Const, 0}, + {"EPOLLWRNORM", Const, 0}, + {"EPOLL_CLOEXEC", Const, 0}, + {"EPOLL_CTL_ADD", Const, 0}, + {"EPOLL_CTL_DEL", Const, 0}, + {"EPOLL_CTL_MOD", Const, 0}, + {"EPOLL_NONBLOCK", Const, 0}, + {"EPROCLIM", Const, 0}, + {"EPROCUNAVAIL", Const, 0}, + {"EPROGMISMATCH", Const, 0}, + {"EPROGUNAVAIL", Const, 0}, + {"EPROTO", Const, 0}, + {"EPROTONOSUPPORT", Const, 0}, + {"EPROTOTYPE", Const, 0}, + {"EPWROFF", Const, 0}, + {"EQFULL", Const, 16}, + {"ERANGE", Const, 0}, + {"EREMCHG", Const, 0}, + {"EREMOTE", Const, 0}, + {"EREMOTEIO", Const, 0}, + {"ERESTART", Const, 0}, + {"ERFKILL", Const, 0}, + {"EROFS", Const, 0}, + {"ERPCMISMATCH", Const, 0}, + {"ERROR_ACCESS_DENIED", Const, 0}, + {"ERROR_ALREADY_EXISTS", Const, 0}, + {"ERROR_BROKEN_PIPE", Const, 0}, + {"ERROR_BUFFER_OVERFLOW", Const, 0}, + {"ERROR_DIR_NOT_EMPTY", Const, 8}, + {"ERROR_ENVVAR_NOT_FOUND", Const, 0}, + {"ERROR_FILE_EXISTS", Const, 0}, + {"ERROR_FILE_NOT_FOUND", Const, 0}, + {"ERROR_HANDLE_EOF", Const, 2}, + {"ERROR_INSUFFICIENT_BUFFER", Const, 0}, + {"ERROR_IO_PENDING", Const, 0}, + {"ERROR_MOD_NOT_FOUND", Const, 0}, + {"ERROR_MORE_DATA", Const, 3}, + {"ERROR_NETNAME_DELETED", Const, 3}, + {"ERROR_NOT_FOUND", Const, 1}, + {"ERROR_NO_MORE_FILES", Const, 0}, + {"ERROR_OPERATION_ABORTED", Const, 0}, + {"ERROR_PATH_NOT_FOUND", Const, 0}, + {"ERROR_PRIVILEGE_NOT_HELD", Const, 4}, + {"ERROR_PROC_NOT_FOUND", Const, 0}, + {"ESHLIBVERS", Const, 0}, + {"ESHUTDOWN", Const, 0}, + {"ESOCKTNOSUPPORT", Const, 0}, + {"ESPIPE", Const, 0}, + {"ESRCH", Const, 0}, + {"ESRMNT", Const, 0}, + {"ESTALE", Const, 0}, + {"ESTRPIPE", Const, 0}, + {"ETHERCAP_JUMBO_MTU", Const, 1}, + {"ETHERCAP_VLAN_HWTAGGING", Const, 1}, + {"ETHERCAP_VLAN_MTU", Const, 1}, + {"ETHERMIN", Const, 1}, + {"ETHERMTU", Const, 1}, + {"ETHERMTU_JUMBO", Const, 1}, + {"ETHERTYPE_8023", Const, 1}, + {"ETHERTYPE_AARP", Const, 1}, + {"ETHERTYPE_ACCTON", Const, 1}, + {"ETHERTYPE_AEONIC", Const, 1}, + {"ETHERTYPE_ALPHA", Const, 1}, + {"ETHERTYPE_AMBER", Const, 1}, + {"ETHERTYPE_AMOEBA", Const, 1}, + {"ETHERTYPE_AOE", Const, 1}, + {"ETHERTYPE_APOLLO", Const, 1}, + {"ETHERTYPE_APOLLODOMAIN", Const, 1}, + {"ETHERTYPE_APPLETALK", Const, 1}, + {"ETHERTYPE_APPLITEK", Const, 1}, + {"ETHERTYPE_ARGONAUT", Const, 1}, + {"ETHERTYPE_ARP", Const, 1}, + {"ETHERTYPE_AT", Const, 1}, + {"ETHERTYPE_ATALK", Const, 1}, + {"ETHERTYPE_ATOMIC", Const, 1}, + {"ETHERTYPE_ATT", Const, 1}, + {"ETHERTYPE_ATTSTANFORD", Const, 1}, + {"ETHERTYPE_AUTOPHON", Const, 1}, + {"ETHERTYPE_AXIS", Const, 1}, + {"ETHERTYPE_BCLOOP", Const, 1}, + {"ETHERTYPE_BOFL", Const, 1}, + {"ETHERTYPE_CABLETRON", Const, 1}, + {"ETHERTYPE_CHAOS", Const, 1}, + {"ETHERTYPE_COMDESIGN", Const, 1}, + {"ETHERTYPE_COMPUGRAPHIC", Const, 1}, + {"ETHERTYPE_COUNTERPOINT", Const, 1}, + {"ETHERTYPE_CRONUS", Const, 1}, + {"ETHERTYPE_CRONUSVLN", Const, 1}, + {"ETHERTYPE_DCA", Const, 1}, + {"ETHERTYPE_DDE", Const, 1}, + {"ETHERTYPE_DEBNI", Const, 1}, + {"ETHERTYPE_DECAM", Const, 1}, + {"ETHERTYPE_DECCUST", Const, 1}, + {"ETHERTYPE_DECDIAG", Const, 1}, + {"ETHERTYPE_DECDNS", Const, 1}, + {"ETHERTYPE_DECDTS", Const, 1}, + {"ETHERTYPE_DECEXPER", Const, 1}, + {"ETHERTYPE_DECLAST", Const, 1}, + {"ETHERTYPE_DECLTM", Const, 1}, + {"ETHERTYPE_DECMUMPS", Const, 1}, + {"ETHERTYPE_DECNETBIOS", Const, 1}, + {"ETHERTYPE_DELTACON", Const, 1}, + {"ETHERTYPE_DIDDLE", Const, 1}, + {"ETHERTYPE_DLOG1", Const, 1}, + {"ETHERTYPE_DLOG2", Const, 1}, + {"ETHERTYPE_DN", Const, 1}, + {"ETHERTYPE_DOGFIGHT", Const, 1}, + {"ETHERTYPE_DSMD", Const, 1}, + {"ETHERTYPE_ECMA", Const, 1}, + {"ETHERTYPE_ENCRYPT", Const, 1}, + {"ETHERTYPE_ES", Const, 1}, + {"ETHERTYPE_EXCELAN", Const, 1}, + {"ETHERTYPE_EXPERDATA", Const, 1}, + {"ETHERTYPE_FLIP", Const, 1}, + {"ETHERTYPE_FLOWCONTROL", Const, 1}, + {"ETHERTYPE_FRARP", Const, 1}, + {"ETHERTYPE_GENDYN", Const, 1}, + {"ETHERTYPE_HAYES", Const, 1}, + {"ETHERTYPE_HIPPI_FP", Const, 1}, + {"ETHERTYPE_HITACHI", Const, 1}, + {"ETHERTYPE_HP", Const, 1}, + {"ETHERTYPE_IEEEPUP", Const, 1}, + {"ETHERTYPE_IEEEPUPAT", Const, 1}, + {"ETHERTYPE_IMLBL", Const, 1}, + {"ETHERTYPE_IMLBLDIAG", Const, 1}, + {"ETHERTYPE_IP", Const, 1}, + {"ETHERTYPE_IPAS", Const, 1}, + {"ETHERTYPE_IPV6", Const, 1}, + {"ETHERTYPE_IPX", Const, 1}, + {"ETHERTYPE_IPXNEW", Const, 1}, + {"ETHERTYPE_KALPANA", Const, 1}, + {"ETHERTYPE_LANBRIDGE", Const, 1}, + {"ETHERTYPE_LANPROBE", Const, 1}, + {"ETHERTYPE_LAT", Const, 1}, + {"ETHERTYPE_LBACK", Const, 1}, + {"ETHERTYPE_LITTLE", Const, 1}, + {"ETHERTYPE_LLDP", Const, 1}, + {"ETHERTYPE_LOGICRAFT", Const, 1}, + {"ETHERTYPE_LOOPBACK", Const, 1}, + {"ETHERTYPE_MATRA", Const, 1}, + {"ETHERTYPE_MAX", Const, 1}, + {"ETHERTYPE_MERIT", Const, 1}, + {"ETHERTYPE_MICP", Const, 1}, + {"ETHERTYPE_MOPDL", Const, 1}, + {"ETHERTYPE_MOPRC", Const, 1}, + {"ETHERTYPE_MOTOROLA", Const, 1}, + {"ETHERTYPE_MPLS", Const, 1}, + {"ETHERTYPE_MPLS_MCAST", Const, 1}, + {"ETHERTYPE_MUMPS", Const, 1}, + {"ETHERTYPE_NBPCC", Const, 1}, + {"ETHERTYPE_NBPCLAIM", Const, 1}, + {"ETHERTYPE_NBPCLREQ", Const, 1}, + {"ETHERTYPE_NBPCLRSP", Const, 1}, + {"ETHERTYPE_NBPCREQ", Const, 1}, + {"ETHERTYPE_NBPCRSP", Const, 1}, + {"ETHERTYPE_NBPDG", Const, 1}, + {"ETHERTYPE_NBPDGB", Const, 1}, + {"ETHERTYPE_NBPDLTE", Const, 1}, + {"ETHERTYPE_NBPRAR", Const, 1}, + {"ETHERTYPE_NBPRAS", Const, 1}, + {"ETHERTYPE_NBPRST", Const, 1}, + {"ETHERTYPE_NBPSCD", Const, 1}, + {"ETHERTYPE_NBPVCD", Const, 1}, + {"ETHERTYPE_NBS", Const, 1}, + {"ETHERTYPE_NCD", Const, 1}, + {"ETHERTYPE_NESTAR", Const, 1}, + {"ETHERTYPE_NETBEUI", Const, 1}, + {"ETHERTYPE_NOVELL", Const, 1}, + {"ETHERTYPE_NS", Const, 1}, + {"ETHERTYPE_NSAT", Const, 1}, + {"ETHERTYPE_NSCOMPAT", Const, 1}, + {"ETHERTYPE_NTRAILER", Const, 1}, + {"ETHERTYPE_OS9", Const, 1}, + {"ETHERTYPE_OS9NET", Const, 1}, + {"ETHERTYPE_PACER", Const, 1}, + {"ETHERTYPE_PAE", Const, 1}, + {"ETHERTYPE_PCS", Const, 1}, + {"ETHERTYPE_PLANNING", Const, 1}, + {"ETHERTYPE_PPP", Const, 1}, + {"ETHERTYPE_PPPOE", Const, 1}, + {"ETHERTYPE_PPPOEDISC", Const, 1}, + {"ETHERTYPE_PRIMENTS", Const, 1}, + {"ETHERTYPE_PUP", Const, 1}, + {"ETHERTYPE_PUPAT", Const, 1}, + {"ETHERTYPE_QINQ", Const, 1}, + {"ETHERTYPE_RACAL", Const, 1}, + {"ETHERTYPE_RATIONAL", Const, 1}, + {"ETHERTYPE_RAWFR", Const, 1}, + {"ETHERTYPE_RCL", Const, 1}, + {"ETHERTYPE_RDP", Const, 1}, + {"ETHERTYPE_RETIX", Const, 1}, + {"ETHERTYPE_REVARP", Const, 1}, + {"ETHERTYPE_SCA", Const, 1}, + {"ETHERTYPE_SECTRA", Const, 1}, + {"ETHERTYPE_SECUREDATA", Const, 1}, + {"ETHERTYPE_SGITW", Const, 1}, + {"ETHERTYPE_SG_BOUNCE", Const, 1}, + {"ETHERTYPE_SG_DIAG", Const, 1}, + {"ETHERTYPE_SG_NETGAMES", Const, 1}, + {"ETHERTYPE_SG_RESV", Const, 1}, + {"ETHERTYPE_SIMNET", Const, 1}, + {"ETHERTYPE_SLOW", Const, 1}, + {"ETHERTYPE_SLOWPROTOCOLS", Const, 1}, + {"ETHERTYPE_SNA", Const, 1}, + {"ETHERTYPE_SNMP", Const, 1}, + {"ETHERTYPE_SONIX", Const, 1}, + {"ETHERTYPE_SPIDER", Const, 1}, + {"ETHERTYPE_SPRITE", Const, 1}, + {"ETHERTYPE_STP", Const, 1}, + {"ETHERTYPE_TALARIS", Const, 1}, + {"ETHERTYPE_TALARISMC", Const, 1}, + {"ETHERTYPE_TCPCOMP", Const, 1}, + {"ETHERTYPE_TCPSM", Const, 1}, + {"ETHERTYPE_TEC", Const, 1}, + {"ETHERTYPE_TIGAN", Const, 1}, + {"ETHERTYPE_TRAIL", Const, 1}, + {"ETHERTYPE_TRANSETHER", Const, 1}, + {"ETHERTYPE_TYMSHARE", Const, 1}, + {"ETHERTYPE_UBBST", Const, 1}, + {"ETHERTYPE_UBDEBUG", Const, 1}, + {"ETHERTYPE_UBDIAGLOOP", Const, 1}, + {"ETHERTYPE_UBDL", Const, 1}, + {"ETHERTYPE_UBNIU", Const, 1}, + {"ETHERTYPE_UBNMC", Const, 1}, + {"ETHERTYPE_VALID", Const, 1}, + {"ETHERTYPE_VARIAN", Const, 1}, + {"ETHERTYPE_VAXELN", Const, 1}, + {"ETHERTYPE_VEECO", Const, 1}, + {"ETHERTYPE_VEXP", Const, 1}, + {"ETHERTYPE_VGLAB", Const, 1}, + {"ETHERTYPE_VINES", Const, 1}, + {"ETHERTYPE_VINESECHO", Const, 1}, + {"ETHERTYPE_VINESLOOP", Const, 1}, + {"ETHERTYPE_VITAL", Const, 1}, + {"ETHERTYPE_VLAN", Const, 1}, + {"ETHERTYPE_VLTLMAN", Const, 1}, + {"ETHERTYPE_VPROD", Const, 1}, + {"ETHERTYPE_VURESERVED", Const, 1}, + {"ETHERTYPE_WATERLOO", Const, 1}, + {"ETHERTYPE_WELLFLEET", Const, 1}, + {"ETHERTYPE_X25", Const, 1}, + {"ETHERTYPE_X75", Const, 1}, + {"ETHERTYPE_XNSSM", Const, 1}, + {"ETHERTYPE_XTP", Const, 1}, + {"ETHER_ADDR_LEN", Const, 1}, + {"ETHER_ALIGN", Const, 1}, + {"ETHER_CRC_LEN", Const, 1}, + {"ETHER_CRC_POLY_BE", Const, 1}, + {"ETHER_CRC_POLY_LE", Const, 1}, + {"ETHER_HDR_LEN", Const, 1}, + {"ETHER_MAX_DIX_LEN", Const, 1}, + {"ETHER_MAX_LEN", Const, 1}, + {"ETHER_MAX_LEN_JUMBO", Const, 1}, + {"ETHER_MIN_LEN", Const, 1}, + {"ETHER_PPPOE_ENCAP_LEN", Const, 1}, + {"ETHER_TYPE_LEN", Const, 1}, + {"ETHER_VLAN_ENCAP_LEN", Const, 1}, + {"ETH_P_1588", Const, 0}, + {"ETH_P_8021Q", Const, 0}, + {"ETH_P_802_2", Const, 0}, + {"ETH_P_802_3", Const, 0}, + {"ETH_P_AARP", Const, 0}, + {"ETH_P_ALL", Const, 0}, + {"ETH_P_AOE", Const, 0}, + {"ETH_P_ARCNET", Const, 0}, + {"ETH_P_ARP", Const, 0}, + {"ETH_P_ATALK", Const, 0}, + {"ETH_P_ATMFATE", Const, 0}, + {"ETH_P_ATMMPOA", Const, 0}, + {"ETH_P_AX25", Const, 0}, + {"ETH_P_BPQ", Const, 0}, + {"ETH_P_CAIF", Const, 0}, + {"ETH_P_CAN", Const, 0}, + {"ETH_P_CONTROL", Const, 0}, + {"ETH_P_CUST", Const, 0}, + {"ETH_P_DDCMP", Const, 0}, + {"ETH_P_DEC", Const, 0}, + {"ETH_P_DIAG", Const, 0}, + {"ETH_P_DNA_DL", Const, 0}, + {"ETH_P_DNA_RC", Const, 0}, + {"ETH_P_DNA_RT", Const, 0}, + {"ETH_P_DSA", Const, 0}, + {"ETH_P_ECONET", Const, 0}, + {"ETH_P_EDSA", Const, 0}, + {"ETH_P_FCOE", Const, 0}, + {"ETH_P_FIP", Const, 0}, + {"ETH_P_HDLC", Const, 0}, + {"ETH_P_IEEE802154", Const, 0}, + {"ETH_P_IEEEPUP", Const, 0}, + {"ETH_P_IEEEPUPAT", Const, 0}, + {"ETH_P_IP", Const, 0}, + {"ETH_P_IPV6", Const, 0}, + {"ETH_P_IPX", Const, 0}, + {"ETH_P_IRDA", Const, 0}, + {"ETH_P_LAT", Const, 0}, + {"ETH_P_LINK_CTL", Const, 0}, + {"ETH_P_LOCALTALK", Const, 0}, + {"ETH_P_LOOP", Const, 0}, + {"ETH_P_MOBITEX", Const, 0}, + {"ETH_P_MPLS_MC", Const, 0}, + {"ETH_P_MPLS_UC", Const, 0}, + {"ETH_P_PAE", Const, 0}, + {"ETH_P_PAUSE", Const, 0}, + {"ETH_P_PHONET", Const, 0}, + {"ETH_P_PPPTALK", Const, 0}, + {"ETH_P_PPP_DISC", Const, 0}, + {"ETH_P_PPP_MP", Const, 0}, + {"ETH_P_PPP_SES", Const, 0}, + {"ETH_P_PUP", Const, 0}, + {"ETH_P_PUPAT", Const, 0}, + {"ETH_P_RARP", Const, 0}, + {"ETH_P_SCA", Const, 0}, + {"ETH_P_SLOW", Const, 0}, + {"ETH_P_SNAP", Const, 0}, + {"ETH_P_TEB", Const, 0}, + {"ETH_P_TIPC", Const, 0}, + {"ETH_P_TRAILER", Const, 0}, + {"ETH_P_TR_802_2", Const, 0}, + {"ETH_P_WAN_PPP", Const, 0}, + {"ETH_P_WCCP", Const, 0}, + {"ETH_P_X25", Const, 0}, + {"ETIME", Const, 0}, + {"ETIMEDOUT", Const, 0}, + {"ETOOMANYREFS", Const, 0}, + {"ETXTBSY", Const, 0}, + {"EUCLEAN", Const, 0}, + {"EUNATCH", Const, 0}, + {"EUSERS", Const, 0}, + {"EVFILT_AIO", Const, 0}, + {"EVFILT_FS", Const, 0}, + {"EVFILT_LIO", Const, 0}, + {"EVFILT_MACHPORT", Const, 0}, + {"EVFILT_PROC", Const, 0}, + {"EVFILT_READ", Const, 0}, + {"EVFILT_SIGNAL", Const, 0}, + {"EVFILT_SYSCOUNT", Const, 0}, + {"EVFILT_THREADMARKER", Const, 0}, + {"EVFILT_TIMER", Const, 0}, + {"EVFILT_USER", Const, 0}, + {"EVFILT_VM", Const, 0}, + {"EVFILT_VNODE", Const, 0}, + {"EVFILT_WRITE", Const, 0}, + {"EV_ADD", Const, 0}, + {"EV_CLEAR", Const, 0}, + {"EV_DELETE", Const, 0}, + {"EV_DISABLE", Const, 0}, + {"EV_DISPATCH", Const, 0}, + {"EV_DROP", Const, 3}, + {"EV_ENABLE", Const, 0}, + {"EV_EOF", Const, 0}, + {"EV_ERROR", Const, 0}, + {"EV_FLAG0", Const, 0}, + {"EV_FLAG1", Const, 0}, + {"EV_ONESHOT", Const, 0}, + {"EV_OOBAND", Const, 0}, + {"EV_POLL", Const, 0}, + {"EV_RECEIPT", Const, 0}, + {"EV_SYSFLAGS", Const, 0}, + {"EWINDOWS", Const, 0}, + {"EWOULDBLOCK", Const, 0}, + {"EXDEV", Const, 0}, + {"EXFULL", Const, 0}, + {"EXTA", Const, 0}, + {"EXTB", Const, 0}, + {"EXTPROC", Const, 0}, + {"Environ", Func, 0}, + {"EpollCreate", Func, 0}, + {"EpollCreate1", Func, 0}, + {"EpollCtl", Func, 0}, + {"EpollEvent", Type, 0}, + {"EpollEvent.Events", Field, 0}, + {"EpollEvent.Fd", Field, 0}, + {"EpollEvent.Pad", Field, 0}, + {"EpollEvent.PadFd", Field, 0}, + {"EpollWait", Func, 0}, + {"Errno", Type, 0}, + {"EscapeArg", Func, 0}, + {"Exchangedata", Func, 0}, + {"Exec", Func, 0}, + {"Exit", Func, 0}, + {"ExitProcess", Func, 0}, + {"FD_CLOEXEC", Const, 0}, + {"FD_SETSIZE", Const, 0}, + {"FILE_ACTION_ADDED", Const, 0}, + {"FILE_ACTION_MODIFIED", Const, 0}, + {"FILE_ACTION_REMOVED", Const, 0}, + {"FILE_ACTION_RENAMED_NEW_NAME", Const, 0}, + {"FILE_ACTION_RENAMED_OLD_NAME", Const, 0}, + {"FILE_APPEND_DATA", Const, 0}, + {"FILE_ATTRIBUTE_ARCHIVE", Const, 0}, + {"FILE_ATTRIBUTE_DIRECTORY", Const, 0}, + {"FILE_ATTRIBUTE_HIDDEN", Const, 0}, + {"FILE_ATTRIBUTE_NORMAL", Const, 0}, + {"FILE_ATTRIBUTE_READONLY", Const, 0}, + {"FILE_ATTRIBUTE_REPARSE_POINT", Const, 4}, + {"FILE_ATTRIBUTE_SYSTEM", Const, 0}, + {"FILE_BEGIN", Const, 0}, + {"FILE_CURRENT", Const, 0}, + {"FILE_END", Const, 0}, + {"FILE_FLAG_BACKUP_SEMANTICS", Const, 0}, + {"FILE_FLAG_OPEN_REPARSE_POINT", Const, 4}, + {"FILE_FLAG_OVERLAPPED", Const, 0}, + {"FILE_LIST_DIRECTORY", Const, 0}, + {"FILE_MAP_COPY", Const, 0}, + {"FILE_MAP_EXECUTE", Const, 0}, + {"FILE_MAP_READ", Const, 0}, + {"FILE_MAP_WRITE", Const, 0}, + {"FILE_NOTIFY_CHANGE_ATTRIBUTES", Const, 0}, + {"FILE_NOTIFY_CHANGE_CREATION", Const, 0}, + {"FILE_NOTIFY_CHANGE_DIR_NAME", Const, 0}, + {"FILE_NOTIFY_CHANGE_FILE_NAME", Const, 0}, + {"FILE_NOTIFY_CHANGE_LAST_ACCESS", Const, 0}, + {"FILE_NOTIFY_CHANGE_LAST_WRITE", Const, 0}, + {"FILE_NOTIFY_CHANGE_SIZE", Const, 0}, + {"FILE_SHARE_DELETE", Const, 0}, + {"FILE_SHARE_READ", Const, 0}, + {"FILE_SHARE_WRITE", Const, 0}, + {"FILE_SKIP_COMPLETION_PORT_ON_SUCCESS", Const, 2}, + {"FILE_SKIP_SET_EVENT_ON_HANDLE", Const, 2}, + {"FILE_TYPE_CHAR", Const, 0}, + {"FILE_TYPE_DISK", Const, 0}, + {"FILE_TYPE_PIPE", Const, 0}, + {"FILE_TYPE_REMOTE", Const, 0}, + {"FILE_TYPE_UNKNOWN", Const, 0}, + {"FILE_WRITE_ATTRIBUTES", Const, 0}, + {"FLUSHO", Const, 0}, + {"FORMAT_MESSAGE_ALLOCATE_BUFFER", Const, 0}, + {"FORMAT_MESSAGE_ARGUMENT_ARRAY", Const, 0}, + {"FORMAT_MESSAGE_FROM_HMODULE", Const, 0}, + {"FORMAT_MESSAGE_FROM_STRING", Const, 0}, + {"FORMAT_MESSAGE_FROM_SYSTEM", Const, 0}, + {"FORMAT_MESSAGE_IGNORE_INSERTS", Const, 0}, + {"FORMAT_MESSAGE_MAX_WIDTH_MASK", Const, 0}, + {"FSCTL_GET_REPARSE_POINT", Const, 4}, + {"F_ADDFILESIGS", Const, 0}, + {"F_ADDSIGS", Const, 0}, + {"F_ALLOCATEALL", Const, 0}, + {"F_ALLOCATECONTIG", Const, 0}, + {"F_CANCEL", Const, 0}, + {"F_CHKCLEAN", Const, 0}, + {"F_CLOSEM", Const, 1}, + {"F_DUP2FD", Const, 0}, + {"F_DUP2FD_CLOEXEC", Const, 1}, + {"F_DUPFD", Const, 0}, + {"F_DUPFD_CLOEXEC", Const, 0}, + {"F_EXLCK", Const, 0}, + {"F_FINDSIGS", Const, 16}, + {"F_FLUSH_DATA", Const, 0}, + {"F_FREEZE_FS", Const, 0}, + {"F_FSCTL", Const, 1}, + {"F_FSDIRMASK", Const, 1}, + {"F_FSIN", Const, 1}, + {"F_FSINOUT", Const, 1}, + {"F_FSOUT", Const, 1}, + {"F_FSPRIV", Const, 1}, + {"F_FSVOID", Const, 1}, + {"F_FULLFSYNC", Const, 0}, + {"F_GETCODEDIR", Const, 16}, + {"F_GETFD", Const, 0}, + {"F_GETFL", Const, 0}, + {"F_GETLEASE", Const, 0}, + {"F_GETLK", Const, 0}, + {"F_GETLK64", Const, 0}, + {"F_GETLKPID", Const, 0}, + {"F_GETNOSIGPIPE", Const, 0}, + {"F_GETOWN", Const, 0}, + {"F_GETOWN_EX", Const, 0}, + {"F_GETPATH", Const, 0}, + {"F_GETPATH_MTMINFO", Const, 0}, + {"F_GETPIPE_SZ", Const, 0}, + {"F_GETPROTECTIONCLASS", Const, 0}, + {"F_GETPROTECTIONLEVEL", Const, 16}, + {"F_GETSIG", Const, 0}, + {"F_GLOBAL_NOCACHE", Const, 0}, + {"F_LOCK", Const, 0}, + {"F_LOG2PHYS", Const, 0}, + {"F_LOG2PHYS_EXT", Const, 0}, + {"F_MARKDEPENDENCY", Const, 0}, + {"F_MAXFD", Const, 1}, + {"F_NOCACHE", Const, 0}, + {"F_NODIRECT", Const, 0}, + {"F_NOTIFY", Const, 0}, + {"F_OGETLK", Const, 0}, + {"F_OK", Const, 0}, + {"F_OSETLK", Const, 0}, + {"F_OSETLKW", Const, 0}, + {"F_PARAM_MASK", Const, 1}, + {"F_PARAM_MAX", Const, 1}, + {"F_PATHPKG_CHECK", Const, 0}, + {"F_PEOFPOSMODE", Const, 0}, + {"F_PREALLOCATE", Const, 0}, + {"F_RDADVISE", Const, 0}, + {"F_RDAHEAD", Const, 0}, + {"F_RDLCK", Const, 0}, + {"F_READAHEAD", Const, 0}, + {"F_READBOOTSTRAP", Const, 0}, + {"F_SETBACKINGSTORE", Const, 0}, + {"F_SETFD", Const, 0}, + {"F_SETFL", Const, 0}, + {"F_SETLEASE", Const, 0}, + {"F_SETLK", Const, 0}, + {"F_SETLK64", Const, 0}, + {"F_SETLKW", Const, 0}, + {"F_SETLKW64", Const, 0}, + {"F_SETLKWTIMEOUT", Const, 16}, + {"F_SETLK_REMOTE", Const, 0}, + {"F_SETNOSIGPIPE", Const, 0}, + {"F_SETOWN", Const, 0}, + {"F_SETOWN_EX", Const, 0}, + {"F_SETPIPE_SZ", Const, 0}, + {"F_SETPROTECTIONCLASS", Const, 0}, + {"F_SETSIG", Const, 0}, + {"F_SETSIZE", Const, 0}, + {"F_SHLCK", Const, 0}, + {"F_SINGLE_WRITER", Const, 16}, + {"F_TEST", Const, 0}, + {"F_THAW_FS", Const, 0}, + {"F_TLOCK", Const, 0}, + {"F_TRANSCODEKEY", Const, 16}, + {"F_ULOCK", Const, 0}, + {"F_UNLCK", Const, 0}, + {"F_UNLCKSYS", Const, 0}, + {"F_VOLPOSMODE", Const, 0}, + {"F_WRITEBOOTSTRAP", Const, 0}, + {"F_WRLCK", Const, 0}, + {"Faccessat", Func, 0}, + {"Fallocate", Func, 0}, + {"Fbootstraptransfer_t", Type, 0}, + {"Fbootstraptransfer_t.Buffer", Field, 0}, + {"Fbootstraptransfer_t.Length", Field, 0}, + {"Fbootstraptransfer_t.Offset", Field, 0}, + {"Fchdir", Func, 0}, + {"Fchflags", Func, 0}, + {"Fchmod", Func, 0}, + {"Fchmodat", Func, 0}, + {"Fchown", Func, 0}, + {"Fchownat", Func, 0}, + {"FcntlFlock", Func, 3}, + {"FdSet", Type, 0}, + {"FdSet.Bits", Field, 0}, + {"FdSet.X__fds_bits", Field, 0}, + {"Fdatasync", Func, 0}, + {"FileNotifyInformation", Type, 0}, + {"FileNotifyInformation.Action", Field, 0}, + {"FileNotifyInformation.FileName", Field, 0}, + {"FileNotifyInformation.FileNameLength", Field, 0}, + {"FileNotifyInformation.NextEntryOffset", Field, 0}, + {"Filetime", Type, 0}, + {"Filetime.HighDateTime", Field, 0}, + {"Filetime.LowDateTime", Field, 0}, + {"FindClose", Func, 0}, + {"FindFirstFile", Func, 0}, + {"FindNextFile", Func, 0}, + {"Flock", Func, 0}, + {"Flock_t", Type, 0}, + {"Flock_t.Len", Field, 0}, + {"Flock_t.Pad_cgo_0", Field, 0}, + {"Flock_t.Pad_cgo_1", Field, 3}, + {"Flock_t.Pid", Field, 0}, + {"Flock_t.Start", Field, 0}, + {"Flock_t.Sysid", Field, 0}, + {"Flock_t.Type", Field, 0}, + {"Flock_t.Whence", Field, 0}, + {"FlushBpf", Func, 0}, + {"FlushFileBuffers", Func, 0}, + {"FlushViewOfFile", Func, 0}, + {"ForkExec", Func, 0}, + {"ForkLock", Var, 0}, + {"FormatMessage", Func, 0}, + {"Fpathconf", Func, 0}, + {"FreeAddrInfoW", Func, 1}, + {"FreeEnvironmentStrings", Func, 0}, + {"FreeLibrary", Func, 0}, + {"Fsid", Type, 0}, + {"Fsid.Val", Field, 0}, + {"Fsid.X__fsid_val", Field, 2}, + {"Fsid.X__val", Field, 0}, + {"Fstat", Func, 0}, + {"Fstatat", Func, 12}, + {"Fstatfs", Func, 0}, + {"Fstore_t", Type, 0}, + {"Fstore_t.Bytesalloc", Field, 0}, + {"Fstore_t.Flags", Field, 0}, + {"Fstore_t.Length", Field, 0}, + {"Fstore_t.Offset", Field, 0}, + {"Fstore_t.Posmode", Field, 0}, + {"Fsync", Func, 0}, + {"Ftruncate", Func, 0}, + {"FullPath", Func, 4}, + {"Futimes", Func, 0}, + {"Futimesat", Func, 0}, + {"GENERIC_ALL", Const, 0}, + {"GENERIC_EXECUTE", Const, 0}, + {"GENERIC_READ", Const, 0}, + {"GENERIC_WRITE", Const, 0}, + {"GUID", Type, 1}, + {"GUID.Data1", Field, 1}, + {"GUID.Data2", Field, 1}, + {"GUID.Data3", Field, 1}, + {"GUID.Data4", Field, 1}, + {"GetAcceptExSockaddrs", Func, 0}, + {"GetAdaptersInfo", Func, 0}, + {"GetAddrInfoW", Func, 1}, + {"GetCommandLine", Func, 0}, + {"GetComputerName", Func, 0}, + {"GetConsoleMode", Func, 1}, + {"GetCurrentDirectory", Func, 0}, + {"GetCurrentProcess", Func, 0}, + {"GetEnvironmentStrings", Func, 0}, + {"GetEnvironmentVariable", Func, 0}, + {"GetExitCodeProcess", Func, 0}, + {"GetFileAttributes", Func, 0}, + {"GetFileAttributesEx", Func, 0}, + {"GetFileExInfoStandard", Const, 0}, + {"GetFileExMaxInfoLevel", Const, 0}, + {"GetFileInformationByHandle", Func, 0}, + {"GetFileType", Func, 0}, + {"GetFullPathName", Func, 0}, + {"GetHostByName", Func, 0}, + {"GetIfEntry", Func, 0}, + {"GetLastError", Func, 0}, + {"GetLengthSid", Func, 0}, + {"GetLongPathName", Func, 0}, + {"GetProcAddress", Func, 0}, + {"GetProcessTimes", Func, 0}, + {"GetProtoByName", Func, 0}, + {"GetQueuedCompletionStatus", Func, 0}, + {"GetServByName", Func, 0}, + {"GetShortPathName", Func, 0}, + {"GetStartupInfo", Func, 0}, + {"GetStdHandle", Func, 0}, + {"GetSystemTimeAsFileTime", Func, 0}, + {"GetTempPath", Func, 0}, + {"GetTimeZoneInformation", Func, 0}, + {"GetTokenInformation", Func, 0}, + {"GetUserNameEx", Func, 0}, + {"GetUserProfileDirectory", Func, 0}, + {"GetVersion", Func, 0}, + {"Getcwd", Func, 0}, + {"Getdents", Func, 0}, + {"Getdirentries", Func, 0}, + {"Getdtablesize", Func, 0}, + {"Getegid", Func, 0}, + {"Getenv", Func, 0}, + {"Geteuid", Func, 0}, + {"Getfsstat", Func, 0}, + {"Getgid", Func, 0}, + {"Getgroups", Func, 0}, + {"Getpagesize", Func, 0}, + {"Getpeername", Func, 0}, + {"Getpgid", Func, 0}, + {"Getpgrp", Func, 0}, + {"Getpid", Func, 0}, + {"Getppid", Func, 0}, + {"Getpriority", Func, 0}, + {"Getrlimit", Func, 0}, + {"Getrusage", Func, 0}, + {"Getsid", Func, 0}, + {"Getsockname", Func, 0}, + {"Getsockopt", Func, 1}, + {"GetsockoptByte", Func, 0}, + {"GetsockoptICMPv6Filter", Func, 2}, + {"GetsockoptIPMreq", Func, 0}, + {"GetsockoptIPMreqn", Func, 0}, + {"GetsockoptIPv6MTUInfo", Func, 2}, + {"GetsockoptIPv6Mreq", Func, 0}, + {"GetsockoptInet4Addr", Func, 0}, + {"GetsockoptInt", Func, 0}, + {"GetsockoptUcred", Func, 1}, + {"Gettid", Func, 0}, + {"Gettimeofday", Func, 0}, + {"Getuid", Func, 0}, + {"Getwd", Func, 0}, + {"Getxattr", Func, 1}, + {"HANDLE_FLAG_INHERIT", Const, 0}, + {"HKEY_CLASSES_ROOT", Const, 0}, + {"HKEY_CURRENT_CONFIG", Const, 0}, + {"HKEY_CURRENT_USER", Const, 0}, + {"HKEY_DYN_DATA", Const, 0}, + {"HKEY_LOCAL_MACHINE", Const, 0}, + {"HKEY_PERFORMANCE_DATA", Const, 0}, + {"HKEY_USERS", Const, 0}, + {"HUPCL", Const, 0}, + {"Handle", Type, 0}, + {"Hostent", Type, 0}, + {"Hostent.AddrList", Field, 0}, + {"Hostent.AddrType", Field, 0}, + {"Hostent.Aliases", Field, 0}, + {"Hostent.Length", Field, 0}, + {"Hostent.Name", Field, 0}, + {"ICANON", Const, 0}, + {"ICMP6_FILTER", Const, 2}, + {"ICMPV6_FILTER", Const, 2}, + {"ICMPv6Filter", Type, 2}, + {"ICMPv6Filter.Data", Field, 2}, + {"ICMPv6Filter.Filt", Field, 2}, + {"ICRNL", Const, 0}, + {"IEXTEN", Const, 0}, + {"IFAN_ARRIVAL", Const, 1}, + {"IFAN_DEPARTURE", Const, 1}, + {"IFA_ADDRESS", Const, 0}, + {"IFA_ANYCAST", Const, 0}, + {"IFA_BROADCAST", Const, 0}, + {"IFA_CACHEINFO", Const, 0}, + {"IFA_F_DADFAILED", Const, 0}, + {"IFA_F_DEPRECATED", Const, 0}, + {"IFA_F_HOMEADDRESS", Const, 0}, + {"IFA_F_NODAD", Const, 0}, + {"IFA_F_OPTIMISTIC", Const, 0}, + {"IFA_F_PERMANENT", Const, 0}, + {"IFA_F_SECONDARY", Const, 0}, + {"IFA_F_TEMPORARY", Const, 0}, + {"IFA_F_TENTATIVE", Const, 0}, + {"IFA_LABEL", Const, 0}, + {"IFA_LOCAL", Const, 0}, + {"IFA_MAX", Const, 0}, + {"IFA_MULTICAST", Const, 0}, + {"IFA_ROUTE", Const, 1}, + {"IFA_UNSPEC", Const, 0}, + {"IFF_ALLMULTI", Const, 0}, + {"IFF_ALTPHYS", Const, 0}, + {"IFF_AUTOMEDIA", Const, 0}, + {"IFF_BROADCAST", Const, 0}, + {"IFF_CANTCHANGE", Const, 0}, + {"IFF_CANTCONFIG", Const, 1}, + {"IFF_DEBUG", Const, 0}, + {"IFF_DRV_OACTIVE", Const, 0}, + {"IFF_DRV_RUNNING", Const, 0}, + {"IFF_DYING", Const, 0}, + {"IFF_DYNAMIC", Const, 0}, + {"IFF_LINK0", Const, 0}, + {"IFF_LINK1", Const, 0}, + {"IFF_LINK2", Const, 0}, + {"IFF_LOOPBACK", Const, 0}, + {"IFF_MASTER", Const, 0}, + {"IFF_MONITOR", Const, 0}, + {"IFF_MULTICAST", Const, 0}, + {"IFF_NOARP", Const, 0}, + {"IFF_NOTRAILERS", Const, 0}, + {"IFF_NO_PI", Const, 0}, + {"IFF_OACTIVE", Const, 0}, + {"IFF_ONE_QUEUE", Const, 0}, + {"IFF_POINTOPOINT", Const, 0}, + {"IFF_POINTTOPOINT", Const, 0}, + {"IFF_PORTSEL", Const, 0}, + {"IFF_PPROMISC", Const, 0}, + {"IFF_PROMISC", Const, 0}, + {"IFF_RENAMING", Const, 0}, + {"IFF_RUNNING", Const, 0}, + {"IFF_SIMPLEX", Const, 0}, + {"IFF_SLAVE", Const, 0}, + {"IFF_SMART", Const, 0}, + {"IFF_STATICARP", Const, 0}, + {"IFF_TAP", Const, 0}, + {"IFF_TUN", Const, 0}, + {"IFF_TUN_EXCL", Const, 0}, + {"IFF_UP", Const, 0}, + {"IFF_VNET_HDR", Const, 0}, + {"IFLA_ADDRESS", Const, 0}, + {"IFLA_BROADCAST", Const, 0}, + {"IFLA_COST", Const, 0}, + {"IFLA_IFALIAS", Const, 0}, + {"IFLA_IFNAME", Const, 0}, + {"IFLA_LINK", Const, 0}, + {"IFLA_LINKINFO", Const, 0}, + {"IFLA_LINKMODE", Const, 0}, + {"IFLA_MAP", Const, 0}, + {"IFLA_MASTER", Const, 0}, + {"IFLA_MAX", Const, 0}, + {"IFLA_MTU", Const, 0}, + {"IFLA_NET_NS_PID", Const, 0}, + {"IFLA_OPERSTATE", Const, 0}, + {"IFLA_PRIORITY", Const, 0}, + {"IFLA_PROTINFO", Const, 0}, + {"IFLA_QDISC", Const, 0}, + {"IFLA_STATS", Const, 0}, + {"IFLA_TXQLEN", Const, 0}, + {"IFLA_UNSPEC", Const, 0}, + {"IFLA_WEIGHT", Const, 0}, + {"IFLA_WIRELESS", Const, 0}, + {"IFNAMSIZ", Const, 0}, + {"IFT_1822", Const, 0}, + {"IFT_A12MPPSWITCH", Const, 0}, + {"IFT_AAL2", Const, 0}, + {"IFT_AAL5", Const, 0}, + {"IFT_ADSL", Const, 0}, + {"IFT_AFLANE8023", Const, 0}, + {"IFT_AFLANE8025", Const, 0}, + {"IFT_ARAP", Const, 0}, + {"IFT_ARCNET", Const, 0}, + {"IFT_ARCNETPLUS", Const, 0}, + {"IFT_ASYNC", Const, 0}, + {"IFT_ATM", Const, 0}, + {"IFT_ATMDXI", Const, 0}, + {"IFT_ATMFUNI", Const, 0}, + {"IFT_ATMIMA", Const, 0}, + {"IFT_ATMLOGICAL", Const, 0}, + {"IFT_ATMRADIO", Const, 0}, + {"IFT_ATMSUBINTERFACE", Const, 0}, + {"IFT_ATMVCIENDPT", Const, 0}, + {"IFT_ATMVIRTUAL", Const, 0}, + {"IFT_BGPPOLICYACCOUNTING", Const, 0}, + {"IFT_BLUETOOTH", Const, 1}, + {"IFT_BRIDGE", Const, 0}, + {"IFT_BSC", Const, 0}, + {"IFT_CARP", Const, 0}, + {"IFT_CCTEMUL", Const, 0}, + {"IFT_CELLULAR", Const, 0}, + {"IFT_CEPT", Const, 0}, + {"IFT_CES", Const, 0}, + {"IFT_CHANNEL", Const, 0}, + {"IFT_CNR", Const, 0}, + {"IFT_COFFEE", Const, 0}, + {"IFT_COMPOSITELINK", Const, 0}, + {"IFT_DCN", Const, 0}, + {"IFT_DIGITALPOWERLINE", Const, 0}, + {"IFT_DIGITALWRAPPEROVERHEADCHANNEL", Const, 0}, + {"IFT_DLSW", Const, 0}, + {"IFT_DOCSCABLEDOWNSTREAM", Const, 0}, + {"IFT_DOCSCABLEMACLAYER", Const, 0}, + {"IFT_DOCSCABLEUPSTREAM", Const, 0}, + {"IFT_DOCSCABLEUPSTREAMCHANNEL", Const, 1}, + {"IFT_DS0", Const, 0}, + {"IFT_DS0BUNDLE", Const, 0}, + {"IFT_DS1FDL", Const, 0}, + {"IFT_DS3", Const, 0}, + {"IFT_DTM", Const, 0}, + {"IFT_DUMMY", Const, 1}, + {"IFT_DVBASILN", Const, 0}, + {"IFT_DVBASIOUT", Const, 0}, + {"IFT_DVBRCCDOWNSTREAM", Const, 0}, + {"IFT_DVBRCCMACLAYER", Const, 0}, + {"IFT_DVBRCCUPSTREAM", Const, 0}, + {"IFT_ECONET", Const, 1}, + {"IFT_ENC", Const, 0}, + {"IFT_EON", Const, 0}, + {"IFT_EPLRS", Const, 0}, + {"IFT_ESCON", Const, 0}, + {"IFT_ETHER", Const, 0}, + {"IFT_FAITH", Const, 0}, + {"IFT_FAST", Const, 0}, + {"IFT_FASTETHER", Const, 0}, + {"IFT_FASTETHERFX", Const, 0}, + {"IFT_FDDI", Const, 0}, + {"IFT_FIBRECHANNEL", Const, 0}, + {"IFT_FRAMERELAYINTERCONNECT", Const, 0}, + {"IFT_FRAMERELAYMPI", Const, 0}, + {"IFT_FRDLCIENDPT", Const, 0}, + {"IFT_FRELAY", Const, 0}, + {"IFT_FRELAYDCE", Const, 0}, + {"IFT_FRF16MFRBUNDLE", Const, 0}, + {"IFT_FRFORWARD", Const, 0}, + {"IFT_G703AT2MB", Const, 0}, + {"IFT_G703AT64K", Const, 0}, + {"IFT_GIF", Const, 0}, + {"IFT_GIGABITETHERNET", Const, 0}, + {"IFT_GR303IDT", Const, 0}, + {"IFT_GR303RDT", Const, 0}, + {"IFT_H323GATEKEEPER", Const, 0}, + {"IFT_H323PROXY", Const, 0}, + {"IFT_HDH1822", Const, 0}, + {"IFT_HDLC", Const, 0}, + {"IFT_HDSL2", Const, 0}, + {"IFT_HIPERLAN2", Const, 0}, + {"IFT_HIPPI", Const, 0}, + {"IFT_HIPPIINTERFACE", Const, 0}, + {"IFT_HOSTPAD", Const, 0}, + {"IFT_HSSI", Const, 0}, + {"IFT_HY", Const, 0}, + {"IFT_IBM370PARCHAN", Const, 0}, + {"IFT_IDSL", Const, 0}, + {"IFT_IEEE1394", Const, 0}, + {"IFT_IEEE80211", Const, 0}, + {"IFT_IEEE80212", Const, 0}, + {"IFT_IEEE8023ADLAG", Const, 0}, + {"IFT_IFGSN", Const, 0}, + {"IFT_IMT", Const, 0}, + {"IFT_INFINIBAND", Const, 1}, + {"IFT_INTERLEAVE", Const, 0}, + {"IFT_IP", Const, 0}, + {"IFT_IPFORWARD", Const, 0}, + {"IFT_IPOVERATM", Const, 0}, + {"IFT_IPOVERCDLC", Const, 0}, + {"IFT_IPOVERCLAW", Const, 0}, + {"IFT_IPSWITCH", Const, 0}, + {"IFT_IPXIP", Const, 0}, + {"IFT_ISDN", Const, 0}, + {"IFT_ISDNBASIC", Const, 0}, + {"IFT_ISDNPRIMARY", Const, 0}, + {"IFT_ISDNS", Const, 0}, + {"IFT_ISDNU", Const, 0}, + {"IFT_ISO88022LLC", Const, 0}, + {"IFT_ISO88023", Const, 0}, + {"IFT_ISO88024", Const, 0}, + {"IFT_ISO88025", Const, 0}, + {"IFT_ISO88025CRFPINT", Const, 0}, + {"IFT_ISO88025DTR", Const, 0}, + {"IFT_ISO88025FIBER", Const, 0}, + {"IFT_ISO88026", Const, 0}, + {"IFT_ISUP", Const, 0}, + {"IFT_L2VLAN", Const, 0}, + {"IFT_L3IPVLAN", Const, 0}, + {"IFT_L3IPXVLAN", Const, 0}, + {"IFT_LAPB", Const, 0}, + {"IFT_LAPD", Const, 0}, + {"IFT_LAPF", Const, 0}, + {"IFT_LINEGROUP", Const, 1}, + {"IFT_LOCALTALK", Const, 0}, + {"IFT_LOOP", Const, 0}, + {"IFT_MEDIAMAILOVERIP", Const, 0}, + {"IFT_MFSIGLINK", Const, 0}, + {"IFT_MIOX25", Const, 0}, + {"IFT_MODEM", Const, 0}, + {"IFT_MPC", Const, 0}, + {"IFT_MPLS", Const, 0}, + {"IFT_MPLSTUNNEL", Const, 0}, + {"IFT_MSDSL", Const, 0}, + {"IFT_MVL", Const, 0}, + {"IFT_MYRINET", Const, 0}, + {"IFT_NFAS", Const, 0}, + {"IFT_NSIP", Const, 0}, + {"IFT_OPTICALCHANNEL", Const, 0}, + {"IFT_OPTICALTRANSPORT", Const, 0}, + {"IFT_OTHER", Const, 0}, + {"IFT_P10", Const, 0}, + {"IFT_P80", Const, 0}, + {"IFT_PARA", Const, 0}, + {"IFT_PDP", Const, 0}, + {"IFT_PFLOG", Const, 0}, + {"IFT_PFLOW", Const, 1}, + {"IFT_PFSYNC", Const, 0}, + {"IFT_PLC", Const, 0}, + {"IFT_PON155", Const, 1}, + {"IFT_PON622", Const, 1}, + {"IFT_POS", Const, 0}, + {"IFT_PPP", Const, 0}, + {"IFT_PPPMULTILINKBUNDLE", Const, 0}, + {"IFT_PROPATM", Const, 1}, + {"IFT_PROPBWAP2MP", Const, 0}, + {"IFT_PROPCNLS", Const, 0}, + {"IFT_PROPDOCSWIRELESSDOWNSTREAM", Const, 0}, + {"IFT_PROPDOCSWIRELESSMACLAYER", Const, 0}, + {"IFT_PROPDOCSWIRELESSUPSTREAM", Const, 0}, + {"IFT_PROPMUX", Const, 0}, + {"IFT_PROPVIRTUAL", Const, 0}, + {"IFT_PROPWIRELESSP2P", Const, 0}, + {"IFT_PTPSERIAL", Const, 0}, + {"IFT_PVC", Const, 0}, + {"IFT_Q2931", Const, 1}, + {"IFT_QLLC", Const, 0}, + {"IFT_RADIOMAC", Const, 0}, + {"IFT_RADSL", Const, 0}, + {"IFT_REACHDSL", Const, 0}, + {"IFT_RFC1483", Const, 0}, + {"IFT_RS232", Const, 0}, + {"IFT_RSRB", Const, 0}, + {"IFT_SDLC", Const, 0}, + {"IFT_SDSL", Const, 0}, + {"IFT_SHDSL", Const, 0}, + {"IFT_SIP", Const, 0}, + {"IFT_SIPSIG", Const, 1}, + {"IFT_SIPTG", Const, 1}, + {"IFT_SLIP", Const, 0}, + {"IFT_SMDSDXI", Const, 0}, + {"IFT_SMDSICIP", Const, 0}, + {"IFT_SONET", Const, 0}, + {"IFT_SONETOVERHEADCHANNEL", Const, 0}, + {"IFT_SONETPATH", Const, 0}, + {"IFT_SONETVT", Const, 0}, + {"IFT_SRP", Const, 0}, + {"IFT_SS7SIGLINK", Const, 0}, + {"IFT_STACKTOSTACK", Const, 0}, + {"IFT_STARLAN", Const, 0}, + {"IFT_STF", Const, 0}, + {"IFT_T1", Const, 0}, + {"IFT_TDLC", Const, 0}, + {"IFT_TELINK", Const, 1}, + {"IFT_TERMPAD", Const, 0}, + {"IFT_TR008", Const, 0}, + {"IFT_TRANSPHDLC", Const, 0}, + {"IFT_TUNNEL", Const, 0}, + {"IFT_ULTRA", Const, 0}, + {"IFT_USB", Const, 0}, + {"IFT_V11", Const, 0}, + {"IFT_V35", Const, 0}, + {"IFT_V36", Const, 0}, + {"IFT_V37", Const, 0}, + {"IFT_VDSL", Const, 0}, + {"IFT_VIRTUALIPADDRESS", Const, 0}, + {"IFT_VIRTUALTG", Const, 1}, + {"IFT_VOICEDID", Const, 1}, + {"IFT_VOICEEM", Const, 0}, + {"IFT_VOICEEMFGD", Const, 1}, + {"IFT_VOICEENCAP", Const, 0}, + {"IFT_VOICEFGDEANA", Const, 1}, + {"IFT_VOICEFXO", Const, 0}, + {"IFT_VOICEFXS", Const, 0}, + {"IFT_VOICEOVERATM", Const, 0}, + {"IFT_VOICEOVERCABLE", Const, 1}, + {"IFT_VOICEOVERFRAMERELAY", Const, 0}, + {"IFT_VOICEOVERIP", Const, 0}, + {"IFT_X213", Const, 0}, + {"IFT_X25", Const, 0}, + {"IFT_X25DDN", Const, 0}, + {"IFT_X25HUNTGROUP", Const, 0}, + {"IFT_X25MLP", Const, 0}, + {"IFT_X25PLE", Const, 0}, + {"IFT_XETHER", Const, 0}, + {"IGNBRK", Const, 0}, + {"IGNCR", Const, 0}, + {"IGNORE", Const, 0}, + {"IGNPAR", Const, 0}, + {"IMAXBEL", Const, 0}, + {"INFINITE", Const, 0}, + {"INLCR", Const, 0}, + {"INPCK", Const, 0}, + {"INVALID_FILE_ATTRIBUTES", Const, 0}, + {"IN_ACCESS", Const, 0}, + {"IN_ALL_EVENTS", Const, 0}, + {"IN_ATTRIB", Const, 0}, + {"IN_CLASSA_HOST", Const, 0}, + {"IN_CLASSA_MAX", Const, 0}, + {"IN_CLASSA_NET", Const, 0}, + {"IN_CLASSA_NSHIFT", Const, 0}, + {"IN_CLASSB_HOST", Const, 0}, + {"IN_CLASSB_MAX", Const, 0}, + {"IN_CLASSB_NET", Const, 0}, + {"IN_CLASSB_NSHIFT", Const, 0}, + {"IN_CLASSC_HOST", Const, 0}, + {"IN_CLASSC_NET", Const, 0}, + {"IN_CLASSC_NSHIFT", Const, 0}, + {"IN_CLASSD_HOST", Const, 0}, + {"IN_CLASSD_NET", Const, 0}, + {"IN_CLASSD_NSHIFT", Const, 0}, + {"IN_CLOEXEC", Const, 0}, + {"IN_CLOSE", Const, 0}, + {"IN_CLOSE_NOWRITE", Const, 0}, + {"IN_CLOSE_WRITE", Const, 0}, + {"IN_CREATE", Const, 0}, + {"IN_DELETE", Const, 0}, + {"IN_DELETE_SELF", Const, 0}, + {"IN_DONT_FOLLOW", Const, 0}, + {"IN_EXCL_UNLINK", Const, 0}, + {"IN_IGNORED", Const, 0}, + {"IN_ISDIR", Const, 0}, + {"IN_LINKLOCALNETNUM", Const, 0}, + {"IN_LOOPBACKNET", Const, 0}, + {"IN_MASK_ADD", Const, 0}, + {"IN_MODIFY", Const, 0}, + {"IN_MOVE", Const, 0}, + {"IN_MOVED_FROM", Const, 0}, + {"IN_MOVED_TO", Const, 0}, + {"IN_MOVE_SELF", Const, 0}, + {"IN_NONBLOCK", Const, 0}, + {"IN_ONESHOT", Const, 0}, + {"IN_ONLYDIR", Const, 0}, + {"IN_OPEN", Const, 0}, + {"IN_Q_OVERFLOW", Const, 0}, + {"IN_RFC3021_HOST", Const, 1}, + {"IN_RFC3021_MASK", Const, 1}, + {"IN_RFC3021_NET", Const, 1}, + {"IN_RFC3021_NSHIFT", Const, 1}, + {"IN_UNMOUNT", Const, 0}, + {"IOC_IN", Const, 1}, + {"IOC_INOUT", Const, 1}, + {"IOC_OUT", Const, 1}, + {"IOC_VENDOR", Const, 3}, + {"IOC_WS2", Const, 1}, + {"IO_REPARSE_TAG_SYMLINK", Const, 4}, + {"IPMreq", Type, 0}, + {"IPMreq.Interface", Field, 0}, + {"IPMreq.Multiaddr", Field, 0}, + {"IPMreqn", Type, 0}, + {"IPMreqn.Address", Field, 0}, + {"IPMreqn.Ifindex", Field, 0}, + {"IPMreqn.Multiaddr", Field, 0}, + {"IPPROTO_3PC", Const, 0}, + {"IPPROTO_ADFS", Const, 0}, + {"IPPROTO_AH", Const, 0}, + {"IPPROTO_AHIP", Const, 0}, + {"IPPROTO_APES", Const, 0}, + {"IPPROTO_ARGUS", Const, 0}, + {"IPPROTO_AX25", Const, 0}, + {"IPPROTO_BHA", Const, 0}, + {"IPPROTO_BLT", Const, 0}, + {"IPPROTO_BRSATMON", Const, 0}, + {"IPPROTO_CARP", Const, 0}, + {"IPPROTO_CFTP", Const, 0}, + {"IPPROTO_CHAOS", Const, 0}, + {"IPPROTO_CMTP", Const, 0}, + {"IPPROTO_COMP", Const, 0}, + {"IPPROTO_CPHB", Const, 0}, + {"IPPROTO_CPNX", Const, 0}, + {"IPPROTO_DCCP", Const, 0}, + {"IPPROTO_DDP", Const, 0}, + {"IPPROTO_DGP", Const, 0}, + {"IPPROTO_DIVERT", Const, 0}, + {"IPPROTO_DIVERT_INIT", Const, 3}, + {"IPPROTO_DIVERT_RESP", Const, 3}, + {"IPPROTO_DONE", Const, 0}, + {"IPPROTO_DSTOPTS", Const, 0}, + {"IPPROTO_EGP", Const, 0}, + {"IPPROTO_EMCON", Const, 0}, + {"IPPROTO_ENCAP", Const, 0}, + {"IPPROTO_EON", Const, 0}, + {"IPPROTO_ESP", Const, 0}, + {"IPPROTO_ETHERIP", Const, 0}, + {"IPPROTO_FRAGMENT", Const, 0}, + {"IPPROTO_GGP", Const, 0}, + {"IPPROTO_GMTP", Const, 0}, + {"IPPROTO_GRE", Const, 0}, + {"IPPROTO_HELLO", Const, 0}, + {"IPPROTO_HMP", Const, 0}, + {"IPPROTO_HOPOPTS", Const, 0}, + {"IPPROTO_ICMP", Const, 0}, + {"IPPROTO_ICMPV6", Const, 0}, + {"IPPROTO_IDP", Const, 0}, + {"IPPROTO_IDPR", Const, 0}, + {"IPPROTO_IDRP", Const, 0}, + {"IPPROTO_IGMP", Const, 0}, + {"IPPROTO_IGP", Const, 0}, + {"IPPROTO_IGRP", Const, 0}, + {"IPPROTO_IL", Const, 0}, + {"IPPROTO_INLSP", Const, 0}, + {"IPPROTO_INP", Const, 0}, + {"IPPROTO_IP", Const, 0}, + {"IPPROTO_IPCOMP", Const, 0}, + {"IPPROTO_IPCV", Const, 0}, + {"IPPROTO_IPEIP", Const, 0}, + {"IPPROTO_IPIP", Const, 0}, + {"IPPROTO_IPPC", Const, 0}, + {"IPPROTO_IPV4", Const, 0}, + {"IPPROTO_IPV6", Const, 0}, + {"IPPROTO_IPV6_ICMP", Const, 1}, + {"IPPROTO_IRTP", Const, 0}, + {"IPPROTO_KRYPTOLAN", Const, 0}, + {"IPPROTO_LARP", Const, 0}, + {"IPPROTO_LEAF1", Const, 0}, + {"IPPROTO_LEAF2", Const, 0}, + {"IPPROTO_MAX", Const, 0}, + {"IPPROTO_MAXID", Const, 0}, + {"IPPROTO_MEAS", Const, 0}, + {"IPPROTO_MH", Const, 1}, + {"IPPROTO_MHRP", Const, 0}, + {"IPPROTO_MICP", Const, 0}, + {"IPPROTO_MOBILE", Const, 0}, + {"IPPROTO_MPLS", Const, 1}, + {"IPPROTO_MTP", Const, 0}, + {"IPPROTO_MUX", Const, 0}, + {"IPPROTO_ND", Const, 0}, + {"IPPROTO_NHRP", Const, 0}, + {"IPPROTO_NONE", Const, 0}, + {"IPPROTO_NSP", Const, 0}, + {"IPPROTO_NVPII", Const, 0}, + {"IPPROTO_OLD_DIVERT", Const, 0}, + {"IPPROTO_OSPFIGP", Const, 0}, + {"IPPROTO_PFSYNC", Const, 0}, + {"IPPROTO_PGM", Const, 0}, + {"IPPROTO_PIGP", Const, 0}, + {"IPPROTO_PIM", Const, 0}, + {"IPPROTO_PRM", Const, 0}, + {"IPPROTO_PUP", Const, 0}, + {"IPPROTO_PVP", Const, 0}, + {"IPPROTO_RAW", Const, 0}, + {"IPPROTO_RCCMON", Const, 0}, + {"IPPROTO_RDP", Const, 0}, + {"IPPROTO_ROUTING", Const, 0}, + {"IPPROTO_RSVP", Const, 0}, + {"IPPROTO_RVD", Const, 0}, + {"IPPROTO_SATEXPAK", Const, 0}, + {"IPPROTO_SATMON", Const, 0}, + {"IPPROTO_SCCSP", Const, 0}, + {"IPPROTO_SCTP", Const, 0}, + {"IPPROTO_SDRP", Const, 0}, + {"IPPROTO_SEND", Const, 1}, + {"IPPROTO_SEP", Const, 0}, + {"IPPROTO_SKIP", Const, 0}, + {"IPPROTO_SPACER", Const, 0}, + {"IPPROTO_SRPC", Const, 0}, + {"IPPROTO_ST", Const, 0}, + {"IPPROTO_SVMTP", Const, 0}, + {"IPPROTO_SWIPE", Const, 0}, + {"IPPROTO_TCF", Const, 0}, + {"IPPROTO_TCP", Const, 0}, + {"IPPROTO_TLSP", Const, 0}, + {"IPPROTO_TP", Const, 0}, + {"IPPROTO_TPXX", Const, 0}, + {"IPPROTO_TRUNK1", Const, 0}, + {"IPPROTO_TRUNK2", Const, 0}, + {"IPPROTO_TTP", Const, 0}, + {"IPPROTO_UDP", Const, 0}, + {"IPPROTO_UDPLITE", Const, 0}, + {"IPPROTO_VINES", Const, 0}, + {"IPPROTO_VISA", Const, 0}, + {"IPPROTO_VMTP", Const, 0}, + {"IPPROTO_VRRP", Const, 1}, + {"IPPROTO_WBEXPAK", Const, 0}, + {"IPPROTO_WBMON", Const, 0}, + {"IPPROTO_WSN", Const, 0}, + {"IPPROTO_XNET", Const, 0}, + {"IPPROTO_XTP", Const, 0}, + {"IPV6_2292DSTOPTS", Const, 0}, + {"IPV6_2292HOPLIMIT", Const, 0}, + {"IPV6_2292HOPOPTS", Const, 0}, + {"IPV6_2292NEXTHOP", Const, 0}, + {"IPV6_2292PKTINFO", Const, 0}, + {"IPV6_2292PKTOPTIONS", Const, 0}, + {"IPV6_2292RTHDR", Const, 0}, + {"IPV6_ADDRFORM", Const, 0}, + {"IPV6_ADD_MEMBERSHIP", Const, 0}, + {"IPV6_AUTHHDR", Const, 0}, + {"IPV6_AUTH_LEVEL", Const, 1}, + {"IPV6_AUTOFLOWLABEL", Const, 0}, + {"IPV6_BINDANY", Const, 0}, + {"IPV6_BINDV6ONLY", Const, 0}, + {"IPV6_BOUND_IF", Const, 0}, + {"IPV6_CHECKSUM", Const, 0}, + {"IPV6_DEFAULT_MULTICAST_HOPS", Const, 0}, + {"IPV6_DEFAULT_MULTICAST_LOOP", Const, 0}, + {"IPV6_DEFHLIM", Const, 0}, + {"IPV6_DONTFRAG", Const, 0}, + {"IPV6_DROP_MEMBERSHIP", Const, 0}, + {"IPV6_DSTOPTS", Const, 0}, + {"IPV6_ESP_NETWORK_LEVEL", Const, 1}, + {"IPV6_ESP_TRANS_LEVEL", Const, 1}, + {"IPV6_FAITH", Const, 0}, + {"IPV6_FLOWINFO_MASK", Const, 0}, + {"IPV6_FLOWLABEL_MASK", Const, 0}, + {"IPV6_FRAGTTL", Const, 0}, + {"IPV6_FW_ADD", Const, 0}, + {"IPV6_FW_DEL", Const, 0}, + {"IPV6_FW_FLUSH", Const, 0}, + {"IPV6_FW_GET", Const, 0}, + {"IPV6_FW_ZERO", Const, 0}, + {"IPV6_HLIMDEC", Const, 0}, + {"IPV6_HOPLIMIT", Const, 0}, + {"IPV6_HOPOPTS", Const, 0}, + {"IPV6_IPCOMP_LEVEL", Const, 1}, + {"IPV6_IPSEC_POLICY", Const, 0}, + {"IPV6_JOIN_ANYCAST", Const, 0}, + {"IPV6_JOIN_GROUP", Const, 0}, + {"IPV6_LEAVE_ANYCAST", Const, 0}, + {"IPV6_LEAVE_GROUP", Const, 0}, + {"IPV6_MAXHLIM", Const, 0}, + {"IPV6_MAXOPTHDR", Const, 0}, + {"IPV6_MAXPACKET", Const, 0}, + {"IPV6_MAX_GROUP_SRC_FILTER", Const, 0}, + {"IPV6_MAX_MEMBERSHIPS", Const, 0}, + {"IPV6_MAX_SOCK_SRC_FILTER", Const, 0}, + {"IPV6_MIN_MEMBERSHIPS", Const, 0}, + {"IPV6_MMTU", Const, 0}, + {"IPV6_MSFILTER", Const, 0}, + {"IPV6_MTU", Const, 0}, + {"IPV6_MTU_DISCOVER", Const, 0}, + {"IPV6_MULTICAST_HOPS", Const, 0}, + {"IPV6_MULTICAST_IF", Const, 0}, + {"IPV6_MULTICAST_LOOP", Const, 0}, + {"IPV6_NEXTHOP", Const, 0}, + {"IPV6_OPTIONS", Const, 1}, + {"IPV6_PATHMTU", Const, 0}, + {"IPV6_PIPEX", Const, 1}, + {"IPV6_PKTINFO", Const, 0}, + {"IPV6_PMTUDISC_DO", Const, 0}, + {"IPV6_PMTUDISC_DONT", Const, 0}, + {"IPV6_PMTUDISC_PROBE", Const, 0}, + {"IPV6_PMTUDISC_WANT", Const, 0}, + {"IPV6_PORTRANGE", Const, 0}, + {"IPV6_PORTRANGE_DEFAULT", Const, 0}, + {"IPV6_PORTRANGE_HIGH", Const, 0}, + {"IPV6_PORTRANGE_LOW", Const, 0}, + {"IPV6_PREFER_TEMPADDR", Const, 0}, + {"IPV6_RECVDSTOPTS", Const, 0}, + {"IPV6_RECVDSTPORT", Const, 3}, + {"IPV6_RECVERR", Const, 0}, + {"IPV6_RECVHOPLIMIT", Const, 0}, + {"IPV6_RECVHOPOPTS", Const, 0}, + {"IPV6_RECVPATHMTU", Const, 0}, + {"IPV6_RECVPKTINFO", Const, 0}, + {"IPV6_RECVRTHDR", Const, 0}, + {"IPV6_RECVTCLASS", Const, 0}, + {"IPV6_ROUTER_ALERT", Const, 0}, + {"IPV6_RTABLE", Const, 1}, + {"IPV6_RTHDR", Const, 0}, + {"IPV6_RTHDRDSTOPTS", Const, 0}, + {"IPV6_RTHDR_LOOSE", Const, 0}, + {"IPV6_RTHDR_STRICT", Const, 0}, + {"IPV6_RTHDR_TYPE_0", Const, 0}, + {"IPV6_RXDSTOPTS", Const, 0}, + {"IPV6_RXHOPOPTS", Const, 0}, + {"IPV6_SOCKOPT_RESERVED1", Const, 0}, + {"IPV6_TCLASS", Const, 0}, + {"IPV6_UNICAST_HOPS", Const, 0}, + {"IPV6_USE_MIN_MTU", Const, 0}, + {"IPV6_V6ONLY", Const, 0}, + {"IPV6_VERSION", Const, 0}, + {"IPV6_VERSION_MASK", Const, 0}, + {"IPV6_XFRM_POLICY", Const, 0}, + {"IP_ADD_MEMBERSHIP", Const, 0}, + {"IP_ADD_SOURCE_MEMBERSHIP", Const, 0}, + {"IP_AUTH_LEVEL", Const, 1}, + {"IP_BINDANY", Const, 0}, + {"IP_BLOCK_SOURCE", Const, 0}, + {"IP_BOUND_IF", Const, 0}, + {"IP_DEFAULT_MULTICAST_LOOP", Const, 0}, + {"IP_DEFAULT_MULTICAST_TTL", Const, 0}, + {"IP_DF", Const, 0}, + {"IP_DIVERTFL", Const, 3}, + {"IP_DONTFRAG", Const, 0}, + {"IP_DROP_MEMBERSHIP", Const, 0}, + {"IP_DROP_SOURCE_MEMBERSHIP", Const, 0}, + {"IP_DUMMYNET3", Const, 0}, + {"IP_DUMMYNET_CONFIGURE", Const, 0}, + {"IP_DUMMYNET_DEL", Const, 0}, + {"IP_DUMMYNET_FLUSH", Const, 0}, + {"IP_DUMMYNET_GET", Const, 0}, + {"IP_EF", Const, 1}, + {"IP_ERRORMTU", Const, 1}, + {"IP_ESP_NETWORK_LEVEL", Const, 1}, + {"IP_ESP_TRANS_LEVEL", Const, 1}, + {"IP_FAITH", Const, 0}, + {"IP_FREEBIND", Const, 0}, + {"IP_FW3", Const, 0}, + {"IP_FW_ADD", Const, 0}, + {"IP_FW_DEL", Const, 0}, + {"IP_FW_FLUSH", Const, 0}, + {"IP_FW_GET", Const, 0}, + {"IP_FW_NAT_CFG", Const, 0}, + {"IP_FW_NAT_DEL", Const, 0}, + {"IP_FW_NAT_GET_CONFIG", Const, 0}, + {"IP_FW_NAT_GET_LOG", Const, 0}, + {"IP_FW_RESETLOG", Const, 0}, + {"IP_FW_TABLE_ADD", Const, 0}, + {"IP_FW_TABLE_DEL", Const, 0}, + {"IP_FW_TABLE_FLUSH", Const, 0}, + {"IP_FW_TABLE_GETSIZE", Const, 0}, + {"IP_FW_TABLE_LIST", Const, 0}, + {"IP_FW_ZERO", Const, 0}, + {"IP_HDRINCL", Const, 0}, + {"IP_IPCOMP_LEVEL", Const, 1}, + {"IP_IPSECFLOWINFO", Const, 1}, + {"IP_IPSEC_LOCAL_AUTH", Const, 1}, + {"IP_IPSEC_LOCAL_CRED", Const, 1}, + {"IP_IPSEC_LOCAL_ID", Const, 1}, + {"IP_IPSEC_POLICY", Const, 0}, + {"IP_IPSEC_REMOTE_AUTH", Const, 1}, + {"IP_IPSEC_REMOTE_CRED", Const, 1}, + {"IP_IPSEC_REMOTE_ID", Const, 1}, + {"IP_MAXPACKET", Const, 0}, + {"IP_MAX_GROUP_SRC_FILTER", Const, 0}, + {"IP_MAX_MEMBERSHIPS", Const, 0}, + {"IP_MAX_SOCK_MUTE_FILTER", Const, 0}, + {"IP_MAX_SOCK_SRC_FILTER", Const, 0}, + {"IP_MAX_SOURCE_FILTER", Const, 0}, + {"IP_MF", Const, 0}, + {"IP_MINFRAGSIZE", Const, 1}, + {"IP_MINTTL", Const, 0}, + {"IP_MIN_MEMBERSHIPS", Const, 0}, + {"IP_MSFILTER", Const, 0}, + {"IP_MSS", Const, 0}, + {"IP_MTU", Const, 0}, + {"IP_MTU_DISCOVER", Const, 0}, + {"IP_MULTICAST_IF", Const, 0}, + {"IP_MULTICAST_IFINDEX", Const, 0}, + {"IP_MULTICAST_LOOP", Const, 0}, + {"IP_MULTICAST_TTL", Const, 0}, + {"IP_MULTICAST_VIF", Const, 0}, + {"IP_NAT__XXX", Const, 0}, + {"IP_OFFMASK", Const, 0}, + {"IP_OLD_FW_ADD", Const, 0}, + {"IP_OLD_FW_DEL", Const, 0}, + {"IP_OLD_FW_FLUSH", Const, 0}, + {"IP_OLD_FW_GET", Const, 0}, + {"IP_OLD_FW_RESETLOG", Const, 0}, + {"IP_OLD_FW_ZERO", Const, 0}, + {"IP_ONESBCAST", Const, 0}, + {"IP_OPTIONS", Const, 0}, + {"IP_ORIGDSTADDR", Const, 0}, + {"IP_PASSSEC", Const, 0}, + {"IP_PIPEX", Const, 1}, + {"IP_PKTINFO", Const, 0}, + {"IP_PKTOPTIONS", Const, 0}, + {"IP_PMTUDISC", Const, 0}, + {"IP_PMTUDISC_DO", Const, 0}, + {"IP_PMTUDISC_DONT", Const, 0}, + {"IP_PMTUDISC_PROBE", Const, 0}, + {"IP_PMTUDISC_WANT", Const, 0}, + {"IP_PORTRANGE", Const, 0}, + {"IP_PORTRANGE_DEFAULT", Const, 0}, + {"IP_PORTRANGE_HIGH", Const, 0}, + {"IP_PORTRANGE_LOW", Const, 0}, + {"IP_RECVDSTADDR", Const, 0}, + {"IP_RECVDSTPORT", Const, 1}, + {"IP_RECVERR", Const, 0}, + {"IP_RECVIF", Const, 0}, + {"IP_RECVOPTS", Const, 0}, + {"IP_RECVORIGDSTADDR", Const, 0}, + {"IP_RECVPKTINFO", Const, 0}, + {"IP_RECVRETOPTS", Const, 0}, + {"IP_RECVRTABLE", Const, 1}, + {"IP_RECVTOS", Const, 0}, + {"IP_RECVTTL", Const, 0}, + {"IP_RETOPTS", Const, 0}, + {"IP_RF", Const, 0}, + {"IP_ROUTER_ALERT", Const, 0}, + {"IP_RSVP_OFF", Const, 0}, + {"IP_RSVP_ON", Const, 0}, + {"IP_RSVP_VIF_OFF", Const, 0}, + {"IP_RSVP_VIF_ON", Const, 0}, + {"IP_RTABLE", Const, 1}, + {"IP_SENDSRCADDR", Const, 0}, + {"IP_STRIPHDR", Const, 0}, + {"IP_TOS", Const, 0}, + {"IP_TRAFFIC_MGT_BACKGROUND", Const, 0}, + {"IP_TRANSPARENT", Const, 0}, + {"IP_TTL", Const, 0}, + {"IP_UNBLOCK_SOURCE", Const, 0}, + {"IP_XFRM_POLICY", Const, 0}, + {"IPv6MTUInfo", Type, 2}, + {"IPv6MTUInfo.Addr", Field, 2}, + {"IPv6MTUInfo.Mtu", Field, 2}, + {"IPv6Mreq", Type, 0}, + {"IPv6Mreq.Interface", Field, 0}, + {"IPv6Mreq.Multiaddr", Field, 0}, + {"ISIG", Const, 0}, + {"ISTRIP", Const, 0}, + {"IUCLC", Const, 0}, + {"IUTF8", Const, 0}, + {"IXANY", Const, 0}, + {"IXOFF", Const, 0}, + {"IXON", Const, 0}, + {"IfAddrmsg", Type, 0}, + {"IfAddrmsg.Family", Field, 0}, + {"IfAddrmsg.Flags", Field, 0}, + {"IfAddrmsg.Index", Field, 0}, + {"IfAddrmsg.Prefixlen", Field, 0}, + {"IfAddrmsg.Scope", Field, 0}, + {"IfAnnounceMsghdr", Type, 1}, + {"IfAnnounceMsghdr.Hdrlen", Field, 2}, + {"IfAnnounceMsghdr.Index", Field, 1}, + {"IfAnnounceMsghdr.Msglen", Field, 1}, + {"IfAnnounceMsghdr.Name", Field, 1}, + {"IfAnnounceMsghdr.Type", Field, 1}, + {"IfAnnounceMsghdr.Version", Field, 1}, + {"IfAnnounceMsghdr.What", Field, 1}, + {"IfData", Type, 0}, + {"IfData.Addrlen", Field, 0}, + {"IfData.Baudrate", Field, 0}, + {"IfData.Capabilities", Field, 2}, + {"IfData.Collisions", Field, 0}, + {"IfData.Datalen", Field, 0}, + {"IfData.Epoch", Field, 0}, + {"IfData.Hdrlen", Field, 0}, + {"IfData.Hwassist", Field, 0}, + {"IfData.Ibytes", Field, 0}, + {"IfData.Ierrors", Field, 0}, + {"IfData.Imcasts", Field, 0}, + {"IfData.Ipackets", Field, 0}, + {"IfData.Iqdrops", Field, 0}, + {"IfData.Lastchange", Field, 0}, + {"IfData.Link_state", Field, 0}, + {"IfData.Mclpool", Field, 2}, + {"IfData.Metric", Field, 0}, + {"IfData.Mtu", Field, 0}, + {"IfData.Noproto", Field, 0}, + {"IfData.Obytes", Field, 0}, + {"IfData.Oerrors", Field, 0}, + {"IfData.Omcasts", Field, 0}, + {"IfData.Opackets", Field, 0}, + {"IfData.Pad", Field, 2}, + {"IfData.Pad_cgo_0", Field, 2}, + {"IfData.Pad_cgo_1", Field, 2}, + {"IfData.Physical", Field, 0}, + {"IfData.Recvquota", Field, 0}, + {"IfData.Recvtiming", Field, 0}, + {"IfData.Reserved1", Field, 0}, + {"IfData.Reserved2", Field, 0}, + {"IfData.Spare_char1", Field, 0}, + {"IfData.Spare_char2", Field, 0}, + {"IfData.Type", Field, 0}, + {"IfData.Typelen", Field, 0}, + {"IfData.Unused1", Field, 0}, + {"IfData.Unused2", Field, 0}, + {"IfData.Xmitquota", Field, 0}, + {"IfData.Xmittiming", Field, 0}, + {"IfInfomsg", Type, 0}, + {"IfInfomsg.Change", Field, 0}, + {"IfInfomsg.Family", Field, 0}, + {"IfInfomsg.Flags", Field, 0}, + {"IfInfomsg.Index", Field, 0}, + {"IfInfomsg.Type", Field, 0}, + {"IfInfomsg.X__ifi_pad", Field, 0}, + {"IfMsghdr", Type, 0}, + {"IfMsghdr.Addrs", Field, 0}, + {"IfMsghdr.Data", Field, 0}, + {"IfMsghdr.Flags", Field, 0}, + {"IfMsghdr.Hdrlen", Field, 2}, + {"IfMsghdr.Index", Field, 0}, + {"IfMsghdr.Msglen", Field, 0}, + {"IfMsghdr.Pad1", Field, 2}, + {"IfMsghdr.Pad2", Field, 2}, + {"IfMsghdr.Pad_cgo_0", Field, 0}, + {"IfMsghdr.Pad_cgo_1", Field, 2}, + {"IfMsghdr.Tableid", Field, 2}, + {"IfMsghdr.Type", Field, 0}, + {"IfMsghdr.Version", Field, 0}, + {"IfMsghdr.Xflags", Field, 2}, + {"IfaMsghdr", Type, 0}, + {"IfaMsghdr.Addrs", Field, 0}, + {"IfaMsghdr.Flags", Field, 0}, + {"IfaMsghdr.Hdrlen", Field, 2}, + {"IfaMsghdr.Index", Field, 0}, + {"IfaMsghdr.Metric", Field, 0}, + {"IfaMsghdr.Msglen", Field, 0}, + {"IfaMsghdr.Pad1", Field, 2}, + {"IfaMsghdr.Pad2", Field, 2}, + {"IfaMsghdr.Pad_cgo_0", Field, 0}, + {"IfaMsghdr.Tableid", Field, 2}, + {"IfaMsghdr.Type", Field, 0}, + {"IfaMsghdr.Version", Field, 0}, + {"IfmaMsghdr", Type, 0}, + {"IfmaMsghdr.Addrs", Field, 0}, + {"IfmaMsghdr.Flags", Field, 0}, + {"IfmaMsghdr.Index", Field, 0}, + {"IfmaMsghdr.Msglen", Field, 0}, + {"IfmaMsghdr.Pad_cgo_0", Field, 0}, + {"IfmaMsghdr.Type", Field, 0}, + {"IfmaMsghdr.Version", Field, 0}, + {"IfmaMsghdr2", Type, 0}, + {"IfmaMsghdr2.Addrs", Field, 0}, + {"IfmaMsghdr2.Flags", Field, 0}, + {"IfmaMsghdr2.Index", Field, 0}, + {"IfmaMsghdr2.Msglen", Field, 0}, + {"IfmaMsghdr2.Pad_cgo_0", Field, 0}, + {"IfmaMsghdr2.Refcount", Field, 0}, + {"IfmaMsghdr2.Type", Field, 0}, + {"IfmaMsghdr2.Version", Field, 0}, + {"ImplementsGetwd", Const, 0}, + {"Inet4Pktinfo", Type, 0}, + {"Inet4Pktinfo.Addr", Field, 0}, + {"Inet4Pktinfo.Ifindex", Field, 0}, + {"Inet4Pktinfo.Spec_dst", Field, 0}, + {"Inet6Pktinfo", Type, 0}, + {"Inet6Pktinfo.Addr", Field, 0}, + {"Inet6Pktinfo.Ifindex", Field, 0}, + {"InotifyAddWatch", Func, 0}, + {"InotifyEvent", Type, 0}, + {"InotifyEvent.Cookie", Field, 0}, + {"InotifyEvent.Len", Field, 0}, + {"InotifyEvent.Mask", Field, 0}, + {"InotifyEvent.Name", Field, 0}, + {"InotifyEvent.Wd", Field, 0}, + {"InotifyInit", Func, 0}, + {"InotifyInit1", Func, 0}, + {"InotifyRmWatch", Func, 0}, + {"InterfaceAddrMessage", Type, 0}, + {"InterfaceAddrMessage.Data", Field, 0}, + {"InterfaceAddrMessage.Header", Field, 0}, + {"InterfaceAnnounceMessage", Type, 1}, + {"InterfaceAnnounceMessage.Header", Field, 1}, + {"InterfaceInfo", Type, 0}, + {"InterfaceInfo.Address", Field, 0}, + {"InterfaceInfo.BroadcastAddress", Field, 0}, + {"InterfaceInfo.Flags", Field, 0}, + {"InterfaceInfo.Netmask", Field, 0}, + {"InterfaceMessage", Type, 0}, + {"InterfaceMessage.Data", Field, 0}, + {"InterfaceMessage.Header", Field, 0}, + {"InterfaceMulticastAddrMessage", Type, 0}, + {"InterfaceMulticastAddrMessage.Data", Field, 0}, + {"InterfaceMulticastAddrMessage.Header", Field, 0}, + {"InvalidHandle", Const, 0}, + {"Ioperm", Func, 0}, + {"Iopl", Func, 0}, + {"Iovec", Type, 0}, + {"Iovec.Base", Field, 0}, + {"Iovec.Len", Field, 0}, + {"IpAdapterInfo", Type, 0}, + {"IpAdapterInfo.AdapterName", Field, 0}, + {"IpAdapterInfo.Address", Field, 0}, + {"IpAdapterInfo.AddressLength", Field, 0}, + {"IpAdapterInfo.ComboIndex", Field, 0}, + {"IpAdapterInfo.CurrentIpAddress", Field, 0}, + {"IpAdapterInfo.Description", Field, 0}, + {"IpAdapterInfo.DhcpEnabled", Field, 0}, + {"IpAdapterInfo.DhcpServer", Field, 0}, + {"IpAdapterInfo.GatewayList", Field, 0}, + {"IpAdapterInfo.HaveWins", Field, 0}, + {"IpAdapterInfo.Index", Field, 0}, + {"IpAdapterInfo.IpAddressList", Field, 0}, + {"IpAdapterInfo.LeaseExpires", Field, 0}, + {"IpAdapterInfo.LeaseObtained", Field, 0}, + {"IpAdapterInfo.Next", Field, 0}, + {"IpAdapterInfo.PrimaryWinsServer", Field, 0}, + {"IpAdapterInfo.SecondaryWinsServer", Field, 0}, + {"IpAdapterInfo.Type", Field, 0}, + {"IpAddrString", Type, 0}, + {"IpAddrString.Context", Field, 0}, + {"IpAddrString.IpAddress", Field, 0}, + {"IpAddrString.IpMask", Field, 0}, + {"IpAddrString.Next", Field, 0}, + {"IpAddressString", Type, 0}, + {"IpAddressString.String", Field, 0}, + {"IpMaskString", Type, 0}, + {"IpMaskString.String", Field, 2}, + {"Issetugid", Func, 0}, + {"KEY_ALL_ACCESS", Const, 0}, + {"KEY_CREATE_LINK", Const, 0}, + {"KEY_CREATE_SUB_KEY", Const, 0}, + {"KEY_ENUMERATE_SUB_KEYS", Const, 0}, + {"KEY_EXECUTE", Const, 0}, + {"KEY_NOTIFY", Const, 0}, + {"KEY_QUERY_VALUE", Const, 0}, + {"KEY_READ", Const, 0}, + {"KEY_SET_VALUE", Const, 0}, + {"KEY_WOW64_32KEY", Const, 0}, + {"KEY_WOW64_64KEY", Const, 0}, + {"KEY_WRITE", Const, 0}, + {"Kevent", Func, 0}, + {"Kevent_t", Type, 0}, + {"Kevent_t.Data", Field, 0}, + {"Kevent_t.Fflags", Field, 0}, + {"Kevent_t.Filter", Field, 0}, + {"Kevent_t.Flags", Field, 0}, + {"Kevent_t.Ident", Field, 0}, + {"Kevent_t.Pad_cgo_0", Field, 2}, + {"Kevent_t.Udata", Field, 0}, + {"Kill", Func, 0}, + {"Klogctl", Func, 0}, + {"Kqueue", Func, 0}, + {"LANG_ENGLISH", Const, 0}, + {"LAYERED_PROTOCOL", Const, 2}, + {"LCNT_OVERLOAD_FLUSH", Const, 1}, + {"LINUX_REBOOT_CMD_CAD_OFF", Const, 0}, + {"LINUX_REBOOT_CMD_CAD_ON", Const, 0}, + {"LINUX_REBOOT_CMD_HALT", Const, 0}, + {"LINUX_REBOOT_CMD_KEXEC", Const, 0}, + {"LINUX_REBOOT_CMD_POWER_OFF", Const, 0}, + {"LINUX_REBOOT_CMD_RESTART", Const, 0}, + {"LINUX_REBOOT_CMD_RESTART2", Const, 0}, + {"LINUX_REBOOT_CMD_SW_SUSPEND", Const, 0}, + {"LINUX_REBOOT_MAGIC1", Const, 0}, + {"LINUX_REBOOT_MAGIC2", Const, 0}, + {"LOCK_EX", Const, 0}, + {"LOCK_NB", Const, 0}, + {"LOCK_SH", Const, 0}, + {"LOCK_UN", Const, 0}, + {"LazyDLL", Type, 0}, + {"LazyDLL.Name", Field, 0}, + {"LazyProc", Type, 0}, + {"LazyProc.Name", Field, 0}, + {"Lchown", Func, 0}, + {"Linger", Type, 0}, + {"Linger.Linger", Field, 0}, + {"Linger.Onoff", Field, 0}, + {"Link", Func, 0}, + {"Listen", Func, 0}, + {"Listxattr", Func, 1}, + {"LoadCancelIoEx", Func, 1}, + {"LoadConnectEx", Func, 1}, + {"LoadCreateSymbolicLink", Func, 4}, + {"LoadDLL", Func, 0}, + {"LoadGetAddrInfo", Func, 1}, + {"LoadLibrary", Func, 0}, + {"LoadSetFileCompletionNotificationModes", Func, 2}, + {"LocalFree", Func, 0}, + {"Log2phys_t", Type, 0}, + {"Log2phys_t.Contigbytes", Field, 0}, + {"Log2phys_t.Devoffset", Field, 0}, + {"Log2phys_t.Flags", Field, 0}, + {"LookupAccountName", Func, 0}, + {"LookupAccountSid", Func, 0}, + {"LookupSID", Func, 0}, + {"LsfJump", Func, 0}, + {"LsfSocket", Func, 0}, + {"LsfStmt", Func, 0}, + {"Lstat", Func, 0}, + {"MADV_AUTOSYNC", Const, 1}, + {"MADV_CAN_REUSE", Const, 0}, + {"MADV_CORE", Const, 1}, + {"MADV_DOFORK", Const, 0}, + {"MADV_DONTFORK", Const, 0}, + {"MADV_DONTNEED", Const, 0}, + {"MADV_FREE", Const, 0}, + {"MADV_FREE_REUSABLE", Const, 0}, + {"MADV_FREE_REUSE", Const, 0}, + {"MADV_HUGEPAGE", Const, 0}, + {"MADV_HWPOISON", Const, 0}, + {"MADV_MERGEABLE", Const, 0}, + {"MADV_NOCORE", Const, 1}, + {"MADV_NOHUGEPAGE", Const, 0}, + {"MADV_NORMAL", Const, 0}, + {"MADV_NOSYNC", Const, 1}, + {"MADV_PROTECT", Const, 1}, + {"MADV_RANDOM", Const, 0}, + {"MADV_REMOVE", Const, 0}, + {"MADV_SEQUENTIAL", Const, 0}, + {"MADV_SPACEAVAIL", Const, 3}, + {"MADV_UNMERGEABLE", Const, 0}, + {"MADV_WILLNEED", Const, 0}, + {"MADV_ZERO_WIRED_PAGES", Const, 0}, + {"MAP_32BIT", Const, 0}, + {"MAP_ALIGNED_SUPER", Const, 3}, + {"MAP_ALIGNMENT_16MB", Const, 3}, + {"MAP_ALIGNMENT_1TB", Const, 3}, + {"MAP_ALIGNMENT_256TB", Const, 3}, + {"MAP_ALIGNMENT_4GB", Const, 3}, + {"MAP_ALIGNMENT_64KB", Const, 3}, + {"MAP_ALIGNMENT_64PB", Const, 3}, + {"MAP_ALIGNMENT_MASK", Const, 3}, + {"MAP_ALIGNMENT_SHIFT", Const, 3}, + {"MAP_ANON", Const, 0}, + {"MAP_ANONYMOUS", Const, 0}, + {"MAP_COPY", Const, 0}, + {"MAP_DENYWRITE", Const, 0}, + {"MAP_EXECUTABLE", Const, 0}, + {"MAP_FILE", Const, 0}, + {"MAP_FIXED", Const, 0}, + {"MAP_FLAGMASK", Const, 3}, + {"MAP_GROWSDOWN", Const, 0}, + {"MAP_HASSEMAPHORE", Const, 0}, + {"MAP_HUGETLB", Const, 0}, + {"MAP_INHERIT", Const, 3}, + {"MAP_INHERIT_COPY", Const, 3}, + {"MAP_INHERIT_DEFAULT", Const, 3}, + {"MAP_INHERIT_DONATE_COPY", Const, 3}, + {"MAP_INHERIT_NONE", Const, 3}, + {"MAP_INHERIT_SHARE", Const, 3}, + {"MAP_JIT", Const, 0}, + {"MAP_LOCKED", Const, 0}, + {"MAP_NOCACHE", Const, 0}, + {"MAP_NOCORE", Const, 1}, + {"MAP_NOEXTEND", Const, 0}, + {"MAP_NONBLOCK", Const, 0}, + {"MAP_NORESERVE", Const, 0}, + {"MAP_NOSYNC", Const, 1}, + {"MAP_POPULATE", Const, 0}, + {"MAP_PREFAULT_READ", Const, 1}, + {"MAP_PRIVATE", Const, 0}, + {"MAP_RENAME", Const, 0}, + {"MAP_RESERVED0080", Const, 0}, + {"MAP_RESERVED0100", Const, 1}, + {"MAP_SHARED", Const, 0}, + {"MAP_STACK", Const, 0}, + {"MAP_TRYFIXED", Const, 3}, + {"MAP_TYPE", Const, 0}, + {"MAP_WIRED", Const, 3}, + {"MAXIMUM_REPARSE_DATA_BUFFER_SIZE", Const, 4}, + {"MAXLEN_IFDESCR", Const, 0}, + {"MAXLEN_PHYSADDR", Const, 0}, + {"MAX_ADAPTER_ADDRESS_LENGTH", Const, 0}, + {"MAX_ADAPTER_DESCRIPTION_LENGTH", Const, 0}, + {"MAX_ADAPTER_NAME_LENGTH", Const, 0}, + {"MAX_COMPUTERNAME_LENGTH", Const, 0}, + {"MAX_INTERFACE_NAME_LEN", Const, 0}, + {"MAX_LONG_PATH", Const, 0}, + {"MAX_PATH", Const, 0}, + {"MAX_PROTOCOL_CHAIN", Const, 2}, + {"MCL_CURRENT", Const, 0}, + {"MCL_FUTURE", Const, 0}, + {"MNT_DETACH", Const, 0}, + {"MNT_EXPIRE", Const, 0}, + {"MNT_FORCE", Const, 0}, + {"MSG_BCAST", Const, 1}, + {"MSG_CMSG_CLOEXEC", Const, 0}, + {"MSG_COMPAT", Const, 0}, + {"MSG_CONFIRM", Const, 0}, + {"MSG_CONTROLMBUF", Const, 1}, + {"MSG_CTRUNC", Const, 0}, + {"MSG_DONTROUTE", Const, 0}, + {"MSG_DONTWAIT", Const, 0}, + {"MSG_EOF", Const, 0}, + {"MSG_EOR", Const, 0}, + {"MSG_ERRQUEUE", Const, 0}, + {"MSG_FASTOPEN", Const, 1}, + {"MSG_FIN", Const, 0}, + {"MSG_FLUSH", Const, 0}, + {"MSG_HAVEMORE", Const, 0}, + {"MSG_HOLD", Const, 0}, + {"MSG_IOVUSRSPACE", Const, 1}, + {"MSG_LENUSRSPACE", Const, 1}, + {"MSG_MCAST", Const, 1}, + {"MSG_MORE", Const, 0}, + {"MSG_NAMEMBUF", Const, 1}, + {"MSG_NBIO", Const, 0}, + {"MSG_NEEDSA", Const, 0}, + {"MSG_NOSIGNAL", Const, 0}, + {"MSG_NOTIFICATION", Const, 0}, + {"MSG_OOB", Const, 0}, + {"MSG_PEEK", Const, 0}, + {"MSG_PROXY", Const, 0}, + {"MSG_RCVMORE", Const, 0}, + {"MSG_RST", Const, 0}, + {"MSG_SEND", Const, 0}, + {"MSG_SYN", Const, 0}, + {"MSG_TRUNC", Const, 0}, + {"MSG_TRYHARD", Const, 0}, + {"MSG_USERFLAGS", Const, 1}, + {"MSG_WAITALL", Const, 0}, + {"MSG_WAITFORONE", Const, 0}, + {"MSG_WAITSTREAM", Const, 0}, + {"MS_ACTIVE", Const, 0}, + {"MS_ASYNC", Const, 0}, + {"MS_BIND", Const, 0}, + {"MS_DEACTIVATE", Const, 0}, + {"MS_DIRSYNC", Const, 0}, + {"MS_INVALIDATE", Const, 0}, + {"MS_I_VERSION", Const, 0}, + {"MS_KERNMOUNT", Const, 0}, + {"MS_KILLPAGES", Const, 0}, + {"MS_MANDLOCK", Const, 0}, + {"MS_MGC_MSK", Const, 0}, + {"MS_MGC_VAL", Const, 0}, + {"MS_MOVE", Const, 0}, + {"MS_NOATIME", Const, 0}, + {"MS_NODEV", Const, 0}, + {"MS_NODIRATIME", Const, 0}, + {"MS_NOEXEC", Const, 0}, + {"MS_NOSUID", Const, 0}, + {"MS_NOUSER", Const, 0}, + {"MS_POSIXACL", Const, 0}, + {"MS_PRIVATE", Const, 0}, + {"MS_RDONLY", Const, 0}, + {"MS_REC", Const, 0}, + {"MS_RELATIME", Const, 0}, + {"MS_REMOUNT", Const, 0}, + {"MS_RMT_MASK", Const, 0}, + {"MS_SHARED", Const, 0}, + {"MS_SILENT", Const, 0}, + {"MS_SLAVE", Const, 0}, + {"MS_STRICTATIME", Const, 0}, + {"MS_SYNC", Const, 0}, + {"MS_SYNCHRONOUS", Const, 0}, + {"MS_UNBINDABLE", Const, 0}, + {"Madvise", Func, 0}, + {"MapViewOfFile", Func, 0}, + {"MaxTokenInfoClass", Const, 0}, + {"Mclpool", Type, 2}, + {"Mclpool.Alive", Field, 2}, + {"Mclpool.Cwm", Field, 2}, + {"Mclpool.Grown", Field, 2}, + {"Mclpool.Hwm", Field, 2}, + {"Mclpool.Lwm", Field, 2}, + {"MibIfRow", Type, 0}, + {"MibIfRow.AdminStatus", Field, 0}, + {"MibIfRow.Descr", Field, 0}, + {"MibIfRow.DescrLen", Field, 0}, + {"MibIfRow.InDiscards", Field, 0}, + {"MibIfRow.InErrors", Field, 0}, + {"MibIfRow.InNUcastPkts", Field, 0}, + {"MibIfRow.InOctets", Field, 0}, + {"MibIfRow.InUcastPkts", Field, 0}, + {"MibIfRow.InUnknownProtos", Field, 0}, + {"MibIfRow.Index", Field, 0}, + {"MibIfRow.LastChange", Field, 0}, + {"MibIfRow.Mtu", Field, 0}, + {"MibIfRow.Name", Field, 0}, + {"MibIfRow.OperStatus", Field, 0}, + {"MibIfRow.OutDiscards", Field, 0}, + {"MibIfRow.OutErrors", Field, 0}, + {"MibIfRow.OutNUcastPkts", Field, 0}, + {"MibIfRow.OutOctets", Field, 0}, + {"MibIfRow.OutQLen", Field, 0}, + {"MibIfRow.OutUcastPkts", Field, 0}, + {"MibIfRow.PhysAddr", Field, 0}, + {"MibIfRow.PhysAddrLen", Field, 0}, + {"MibIfRow.Speed", Field, 0}, + {"MibIfRow.Type", Field, 0}, + {"Mkdir", Func, 0}, + {"Mkdirat", Func, 0}, + {"Mkfifo", Func, 0}, + {"Mknod", Func, 0}, + {"Mknodat", Func, 0}, + {"Mlock", Func, 0}, + {"Mlockall", Func, 0}, + {"Mmap", Func, 0}, + {"Mount", Func, 0}, + {"MoveFile", Func, 0}, + {"Mprotect", Func, 0}, + {"Msghdr", Type, 0}, + {"Msghdr.Control", Field, 0}, + {"Msghdr.Controllen", Field, 0}, + {"Msghdr.Flags", Field, 0}, + {"Msghdr.Iov", Field, 0}, + {"Msghdr.Iovlen", Field, 0}, + {"Msghdr.Name", Field, 0}, + {"Msghdr.Namelen", Field, 0}, + {"Msghdr.Pad_cgo_0", Field, 0}, + {"Msghdr.Pad_cgo_1", Field, 0}, + {"Munlock", Func, 0}, + {"Munlockall", Func, 0}, + {"Munmap", Func, 0}, + {"MustLoadDLL", Func, 0}, + {"NAME_MAX", Const, 0}, + {"NETLINK_ADD_MEMBERSHIP", Const, 0}, + {"NETLINK_AUDIT", Const, 0}, + {"NETLINK_BROADCAST_ERROR", Const, 0}, + {"NETLINK_CONNECTOR", Const, 0}, + {"NETLINK_DNRTMSG", Const, 0}, + {"NETLINK_DROP_MEMBERSHIP", Const, 0}, + {"NETLINK_ECRYPTFS", Const, 0}, + {"NETLINK_FIB_LOOKUP", Const, 0}, + {"NETLINK_FIREWALL", Const, 0}, + {"NETLINK_GENERIC", Const, 0}, + {"NETLINK_INET_DIAG", Const, 0}, + {"NETLINK_IP6_FW", Const, 0}, + {"NETLINK_ISCSI", Const, 0}, + {"NETLINK_KOBJECT_UEVENT", Const, 0}, + {"NETLINK_NETFILTER", Const, 0}, + {"NETLINK_NFLOG", Const, 0}, + {"NETLINK_NO_ENOBUFS", Const, 0}, + {"NETLINK_PKTINFO", Const, 0}, + {"NETLINK_RDMA", Const, 0}, + {"NETLINK_ROUTE", Const, 0}, + {"NETLINK_SCSITRANSPORT", Const, 0}, + {"NETLINK_SELINUX", Const, 0}, + {"NETLINK_UNUSED", Const, 0}, + {"NETLINK_USERSOCK", Const, 0}, + {"NETLINK_XFRM", Const, 0}, + {"NET_RT_DUMP", Const, 0}, + {"NET_RT_DUMP2", Const, 0}, + {"NET_RT_FLAGS", Const, 0}, + {"NET_RT_IFLIST", Const, 0}, + {"NET_RT_IFLIST2", Const, 0}, + {"NET_RT_IFLISTL", Const, 1}, + {"NET_RT_IFMALIST", Const, 0}, + {"NET_RT_MAXID", Const, 0}, + {"NET_RT_OIFLIST", Const, 1}, + {"NET_RT_OOIFLIST", Const, 1}, + {"NET_RT_STAT", Const, 0}, + {"NET_RT_STATS", Const, 1}, + {"NET_RT_TABLE", Const, 1}, + {"NET_RT_TRASH", Const, 0}, + {"NLA_ALIGNTO", Const, 0}, + {"NLA_F_NESTED", Const, 0}, + {"NLA_F_NET_BYTEORDER", Const, 0}, + {"NLA_HDRLEN", Const, 0}, + {"NLMSG_ALIGNTO", Const, 0}, + {"NLMSG_DONE", Const, 0}, + {"NLMSG_ERROR", Const, 0}, + {"NLMSG_HDRLEN", Const, 0}, + {"NLMSG_MIN_TYPE", Const, 0}, + {"NLMSG_NOOP", Const, 0}, + {"NLMSG_OVERRUN", Const, 0}, + {"NLM_F_ACK", Const, 0}, + {"NLM_F_APPEND", Const, 0}, + {"NLM_F_ATOMIC", Const, 0}, + {"NLM_F_CREATE", Const, 0}, + {"NLM_F_DUMP", Const, 0}, + {"NLM_F_ECHO", Const, 0}, + {"NLM_F_EXCL", Const, 0}, + {"NLM_F_MATCH", Const, 0}, + {"NLM_F_MULTI", Const, 0}, + {"NLM_F_REPLACE", Const, 0}, + {"NLM_F_REQUEST", Const, 0}, + {"NLM_F_ROOT", Const, 0}, + {"NOFLSH", Const, 0}, + {"NOTE_ABSOLUTE", Const, 0}, + {"NOTE_ATTRIB", Const, 0}, + {"NOTE_BACKGROUND", Const, 16}, + {"NOTE_CHILD", Const, 0}, + {"NOTE_CRITICAL", Const, 16}, + {"NOTE_DELETE", Const, 0}, + {"NOTE_EOF", Const, 1}, + {"NOTE_EXEC", Const, 0}, + {"NOTE_EXIT", Const, 0}, + {"NOTE_EXITSTATUS", Const, 0}, + {"NOTE_EXIT_CSERROR", Const, 16}, + {"NOTE_EXIT_DECRYPTFAIL", Const, 16}, + {"NOTE_EXIT_DETAIL", Const, 16}, + {"NOTE_EXIT_DETAIL_MASK", Const, 16}, + {"NOTE_EXIT_MEMORY", Const, 16}, + {"NOTE_EXIT_REPARENTED", Const, 16}, + {"NOTE_EXTEND", Const, 0}, + {"NOTE_FFAND", Const, 0}, + {"NOTE_FFCOPY", Const, 0}, + {"NOTE_FFCTRLMASK", Const, 0}, + {"NOTE_FFLAGSMASK", Const, 0}, + {"NOTE_FFNOP", Const, 0}, + {"NOTE_FFOR", Const, 0}, + {"NOTE_FORK", Const, 0}, + {"NOTE_LEEWAY", Const, 16}, + {"NOTE_LINK", Const, 0}, + {"NOTE_LOWAT", Const, 0}, + {"NOTE_NONE", Const, 0}, + {"NOTE_NSECONDS", Const, 0}, + {"NOTE_PCTRLMASK", Const, 0}, + {"NOTE_PDATAMASK", Const, 0}, + {"NOTE_REAP", Const, 0}, + {"NOTE_RENAME", Const, 0}, + {"NOTE_RESOURCEEND", Const, 0}, + {"NOTE_REVOKE", Const, 0}, + {"NOTE_SECONDS", Const, 0}, + {"NOTE_SIGNAL", Const, 0}, + {"NOTE_TRACK", Const, 0}, + {"NOTE_TRACKERR", Const, 0}, + {"NOTE_TRIGGER", Const, 0}, + {"NOTE_TRUNCATE", Const, 1}, + {"NOTE_USECONDS", Const, 0}, + {"NOTE_VM_ERROR", Const, 0}, + {"NOTE_VM_PRESSURE", Const, 0}, + {"NOTE_VM_PRESSURE_SUDDEN_TERMINATE", Const, 0}, + {"NOTE_VM_PRESSURE_TERMINATE", Const, 0}, + {"NOTE_WRITE", Const, 0}, + {"NameCanonical", Const, 0}, + {"NameCanonicalEx", Const, 0}, + {"NameDisplay", Const, 0}, + {"NameDnsDomain", Const, 0}, + {"NameFullyQualifiedDN", Const, 0}, + {"NameSamCompatible", Const, 0}, + {"NameServicePrincipal", Const, 0}, + {"NameUniqueId", Const, 0}, + {"NameUnknown", Const, 0}, + {"NameUserPrincipal", Const, 0}, + {"Nanosleep", Func, 0}, + {"NetApiBufferFree", Func, 0}, + {"NetGetJoinInformation", Func, 2}, + {"NetSetupDomainName", Const, 2}, + {"NetSetupUnjoined", Const, 2}, + {"NetSetupUnknownStatus", Const, 2}, + {"NetSetupWorkgroupName", Const, 2}, + {"NetUserGetInfo", Func, 0}, + {"NetlinkMessage", Type, 0}, + {"NetlinkMessage.Data", Field, 0}, + {"NetlinkMessage.Header", Field, 0}, + {"NetlinkRIB", Func, 0}, + {"NetlinkRouteAttr", Type, 0}, + {"NetlinkRouteAttr.Attr", Field, 0}, + {"NetlinkRouteAttr.Value", Field, 0}, + {"NetlinkRouteRequest", Type, 0}, + {"NetlinkRouteRequest.Data", Field, 0}, + {"NetlinkRouteRequest.Header", Field, 0}, + {"NewCallback", Func, 0}, + {"NewCallbackCDecl", Func, 3}, + {"NewLazyDLL", Func, 0}, + {"NlAttr", Type, 0}, + {"NlAttr.Len", Field, 0}, + {"NlAttr.Type", Field, 0}, + {"NlMsgerr", Type, 0}, + {"NlMsgerr.Error", Field, 0}, + {"NlMsgerr.Msg", Field, 0}, + {"NlMsghdr", Type, 0}, + {"NlMsghdr.Flags", Field, 0}, + {"NlMsghdr.Len", Field, 0}, + {"NlMsghdr.Pid", Field, 0}, + {"NlMsghdr.Seq", Field, 0}, + {"NlMsghdr.Type", Field, 0}, + {"NsecToFiletime", Func, 0}, + {"NsecToTimespec", Func, 0}, + {"NsecToTimeval", Func, 0}, + {"Ntohs", Func, 0}, + {"OCRNL", Const, 0}, + {"OFDEL", Const, 0}, + {"OFILL", Const, 0}, + {"OFIOGETBMAP", Const, 1}, + {"OID_PKIX_KP_SERVER_AUTH", Var, 0}, + {"OID_SERVER_GATED_CRYPTO", Var, 0}, + {"OID_SGC_NETSCAPE", Var, 0}, + {"OLCUC", Const, 0}, + {"ONLCR", Const, 0}, + {"ONLRET", Const, 0}, + {"ONOCR", Const, 0}, + {"ONOEOT", Const, 1}, + {"OPEN_ALWAYS", Const, 0}, + {"OPEN_EXISTING", Const, 0}, + {"OPOST", Const, 0}, + {"O_ACCMODE", Const, 0}, + {"O_ALERT", Const, 0}, + {"O_ALT_IO", Const, 1}, + {"O_APPEND", Const, 0}, + {"O_ASYNC", Const, 0}, + {"O_CLOEXEC", Const, 0}, + {"O_CREAT", Const, 0}, + {"O_DIRECT", Const, 0}, + {"O_DIRECTORY", Const, 0}, + {"O_DP_GETRAWENCRYPTED", Const, 16}, + {"O_DSYNC", Const, 0}, + {"O_EVTONLY", Const, 0}, + {"O_EXCL", Const, 0}, + {"O_EXEC", Const, 0}, + {"O_EXLOCK", Const, 0}, + {"O_FSYNC", Const, 0}, + {"O_LARGEFILE", Const, 0}, + {"O_NDELAY", Const, 0}, + {"O_NOATIME", Const, 0}, + {"O_NOCTTY", Const, 0}, + {"O_NOFOLLOW", Const, 0}, + {"O_NONBLOCK", Const, 0}, + {"O_NOSIGPIPE", Const, 1}, + {"O_POPUP", Const, 0}, + {"O_RDONLY", Const, 0}, + {"O_RDWR", Const, 0}, + {"O_RSYNC", Const, 0}, + {"O_SHLOCK", Const, 0}, + {"O_SYMLINK", Const, 0}, + {"O_SYNC", Const, 0}, + {"O_TRUNC", Const, 0}, + {"O_TTY_INIT", Const, 0}, + {"O_WRONLY", Const, 0}, + {"Open", Func, 0}, + {"OpenCurrentProcessToken", Func, 0}, + {"OpenProcess", Func, 0}, + {"OpenProcessToken", Func, 0}, + {"Openat", Func, 0}, + {"Overlapped", Type, 0}, + {"Overlapped.HEvent", Field, 0}, + {"Overlapped.Internal", Field, 0}, + {"Overlapped.InternalHigh", Field, 0}, + {"Overlapped.Offset", Field, 0}, + {"Overlapped.OffsetHigh", Field, 0}, + {"PACKET_ADD_MEMBERSHIP", Const, 0}, + {"PACKET_BROADCAST", Const, 0}, + {"PACKET_DROP_MEMBERSHIP", Const, 0}, + {"PACKET_FASTROUTE", Const, 0}, + {"PACKET_HOST", Const, 0}, + {"PACKET_LOOPBACK", Const, 0}, + {"PACKET_MR_ALLMULTI", Const, 0}, + {"PACKET_MR_MULTICAST", Const, 0}, + {"PACKET_MR_PROMISC", Const, 0}, + {"PACKET_MULTICAST", Const, 0}, + {"PACKET_OTHERHOST", Const, 0}, + {"PACKET_OUTGOING", Const, 0}, + {"PACKET_RECV_OUTPUT", Const, 0}, + {"PACKET_RX_RING", Const, 0}, + {"PACKET_STATISTICS", Const, 0}, + {"PAGE_EXECUTE_READ", Const, 0}, + {"PAGE_EXECUTE_READWRITE", Const, 0}, + {"PAGE_EXECUTE_WRITECOPY", Const, 0}, + {"PAGE_READONLY", Const, 0}, + {"PAGE_READWRITE", Const, 0}, + {"PAGE_WRITECOPY", Const, 0}, + {"PARENB", Const, 0}, + {"PARMRK", Const, 0}, + {"PARODD", Const, 0}, + {"PENDIN", Const, 0}, + {"PFL_HIDDEN", Const, 2}, + {"PFL_MATCHES_PROTOCOL_ZERO", Const, 2}, + {"PFL_MULTIPLE_PROTO_ENTRIES", Const, 2}, + {"PFL_NETWORKDIRECT_PROVIDER", Const, 2}, + {"PFL_RECOMMENDED_PROTO_ENTRY", Const, 2}, + {"PF_FLUSH", Const, 1}, + {"PKCS_7_ASN_ENCODING", Const, 0}, + {"PMC5_PIPELINE_FLUSH", Const, 1}, + {"PRIO_PGRP", Const, 2}, + {"PRIO_PROCESS", Const, 2}, + {"PRIO_USER", Const, 2}, + {"PRI_IOFLUSH", Const, 1}, + {"PROCESS_QUERY_INFORMATION", Const, 0}, + {"PROCESS_TERMINATE", Const, 2}, + {"PROT_EXEC", Const, 0}, + {"PROT_GROWSDOWN", Const, 0}, + {"PROT_GROWSUP", Const, 0}, + {"PROT_NONE", Const, 0}, + {"PROT_READ", Const, 0}, + {"PROT_WRITE", Const, 0}, + {"PROV_DH_SCHANNEL", Const, 0}, + {"PROV_DSS", Const, 0}, + {"PROV_DSS_DH", Const, 0}, + {"PROV_EC_ECDSA_FULL", Const, 0}, + {"PROV_EC_ECDSA_SIG", Const, 0}, + {"PROV_EC_ECNRA_FULL", Const, 0}, + {"PROV_EC_ECNRA_SIG", Const, 0}, + {"PROV_FORTEZZA", Const, 0}, + {"PROV_INTEL_SEC", Const, 0}, + {"PROV_MS_EXCHANGE", Const, 0}, + {"PROV_REPLACE_OWF", Const, 0}, + {"PROV_RNG", Const, 0}, + {"PROV_RSA_AES", Const, 0}, + {"PROV_RSA_FULL", Const, 0}, + {"PROV_RSA_SCHANNEL", Const, 0}, + {"PROV_RSA_SIG", Const, 0}, + {"PROV_SPYRUS_LYNKS", Const, 0}, + {"PROV_SSL", Const, 0}, + {"PR_CAPBSET_DROP", Const, 0}, + {"PR_CAPBSET_READ", Const, 0}, + {"PR_CLEAR_SECCOMP_FILTER", Const, 0}, + {"PR_ENDIAN_BIG", Const, 0}, + {"PR_ENDIAN_LITTLE", Const, 0}, + {"PR_ENDIAN_PPC_LITTLE", Const, 0}, + {"PR_FPEMU_NOPRINT", Const, 0}, + {"PR_FPEMU_SIGFPE", Const, 0}, + {"PR_FP_EXC_ASYNC", Const, 0}, + {"PR_FP_EXC_DISABLED", Const, 0}, + {"PR_FP_EXC_DIV", Const, 0}, + {"PR_FP_EXC_INV", Const, 0}, + {"PR_FP_EXC_NONRECOV", Const, 0}, + {"PR_FP_EXC_OVF", Const, 0}, + {"PR_FP_EXC_PRECISE", Const, 0}, + {"PR_FP_EXC_RES", Const, 0}, + {"PR_FP_EXC_SW_ENABLE", Const, 0}, + {"PR_FP_EXC_UND", Const, 0}, + {"PR_GET_DUMPABLE", Const, 0}, + {"PR_GET_ENDIAN", Const, 0}, + {"PR_GET_FPEMU", Const, 0}, + {"PR_GET_FPEXC", Const, 0}, + {"PR_GET_KEEPCAPS", Const, 0}, + {"PR_GET_NAME", Const, 0}, + {"PR_GET_PDEATHSIG", Const, 0}, + {"PR_GET_SECCOMP", Const, 0}, + {"PR_GET_SECCOMP_FILTER", Const, 0}, + {"PR_GET_SECUREBITS", Const, 0}, + {"PR_GET_TIMERSLACK", Const, 0}, + {"PR_GET_TIMING", Const, 0}, + {"PR_GET_TSC", Const, 0}, + {"PR_GET_UNALIGN", Const, 0}, + {"PR_MCE_KILL", Const, 0}, + {"PR_MCE_KILL_CLEAR", Const, 0}, + {"PR_MCE_KILL_DEFAULT", Const, 0}, + {"PR_MCE_KILL_EARLY", Const, 0}, + {"PR_MCE_KILL_GET", Const, 0}, + {"PR_MCE_KILL_LATE", Const, 0}, + {"PR_MCE_KILL_SET", Const, 0}, + {"PR_SECCOMP_FILTER_EVENT", Const, 0}, + {"PR_SECCOMP_FILTER_SYSCALL", Const, 0}, + {"PR_SET_DUMPABLE", Const, 0}, + {"PR_SET_ENDIAN", Const, 0}, + {"PR_SET_FPEMU", Const, 0}, + {"PR_SET_FPEXC", Const, 0}, + {"PR_SET_KEEPCAPS", Const, 0}, + {"PR_SET_NAME", Const, 0}, + {"PR_SET_PDEATHSIG", Const, 0}, + {"PR_SET_PTRACER", Const, 0}, + {"PR_SET_SECCOMP", Const, 0}, + {"PR_SET_SECCOMP_FILTER", Const, 0}, + {"PR_SET_SECUREBITS", Const, 0}, + {"PR_SET_TIMERSLACK", Const, 0}, + {"PR_SET_TIMING", Const, 0}, + {"PR_SET_TSC", Const, 0}, + {"PR_SET_UNALIGN", Const, 0}, + {"PR_TASK_PERF_EVENTS_DISABLE", Const, 0}, + {"PR_TASK_PERF_EVENTS_ENABLE", Const, 0}, + {"PR_TIMING_STATISTICAL", Const, 0}, + {"PR_TIMING_TIMESTAMP", Const, 0}, + {"PR_TSC_ENABLE", Const, 0}, + {"PR_TSC_SIGSEGV", Const, 0}, + {"PR_UNALIGN_NOPRINT", Const, 0}, + {"PR_UNALIGN_SIGBUS", Const, 0}, + {"PTRACE_ARCH_PRCTL", Const, 0}, + {"PTRACE_ATTACH", Const, 0}, + {"PTRACE_CONT", Const, 0}, + {"PTRACE_DETACH", Const, 0}, + {"PTRACE_EVENT_CLONE", Const, 0}, + {"PTRACE_EVENT_EXEC", Const, 0}, + {"PTRACE_EVENT_EXIT", Const, 0}, + {"PTRACE_EVENT_FORK", Const, 0}, + {"PTRACE_EVENT_VFORK", Const, 0}, + {"PTRACE_EVENT_VFORK_DONE", Const, 0}, + {"PTRACE_GETCRUNCHREGS", Const, 0}, + {"PTRACE_GETEVENTMSG", Const, 0}, + {"PTRACE_GETFPREGS", Const, 0}, + {"PTRACE_GETFPXREGS", Const, 0}, + {"PTRACE_GETHBPREGS", Const, 0}, + {"PTRACE_GETREGS", Const, 0}, + {"PTRACE_GETREGSET", Const, 0}, + {"PTRACE_GETSIGINFO", Const, 0}, + {"PTRACE_GETVFPREGS", Const, 0}, + {"PTRACE_GETWMMXREGS", Const, 0}, + {"PTRACE_GET_THREAD_AREA", Const, 0}, + {"PTRACE_KILL", Const, 0}, + {"PTRACE_OLDSETOPTIONS", Const, 0}, + {"PTRACE_O_MASK", Const, 0}, + {"PTRACE_O_TRACECLONE", Const, 0}, + {"PTRACE_O_TRACEEXEC", Const, 0}, + {"PTRACE_O_TRACEEXIT", Const, 0}, + {"PTRACE_O_TRACEFORK", Const, 0}, + {"PTRACE_O_TRACESYSGOOD", Const, 0}, + {"PTRACE_O_TRACEVFORK", Const, 0}, + {"PTRACE_O_TRACEVFORKDONE", Const, 0}, + {"PTRACE_PEEKDATA", Const, 0}, + {"PTRACE_PEEKTEXT", Const, 0}, + {"PTRACE_PEEKUSR", Const, 0}, + {"PTRACE_POKEDATA", Const, 0}, + {"PTRACE_POKETEXT", Const, 0}, + {"PTRACE_POKEUSR", Const, 0}, + {"PTRACE_SETCRUNCHREGS", Const, 0}, + {"PTRACE_SETFPREGS", Const, 0}, + {"PTRACE_SETFPXREGS", Const, 0}, + {"PTRACE_SETHBPREGS", Const, 0}, + {"PTRACE_SETOPTIONS", Const, 0}, + {"PTRACE_SETREGS", Const, 0}, + {"PTRACE_SETREGSET", Const, 0}, + {"PTRACE_SETSIGINFO", Const, 0}, + {"PTRACE_SETVFPREGS", Const, 0}, + {"PTRACE_SETWMMXREGS", Const, 0}, + {"PTRACE_SET_SYSCALL", Const, 0}, + {"PTRACE_SET_THREAD_AREA", Const, 0}, + {"PTRACE_SINGLEBLOCK", Const, 0}, + {"PTRACE_SINGLESTEP", Const, 0}, + {"PTRACE_SYSCALL", Const, 0}, + {"PTRACE_SYSEMU", Const, 0}, + {"PTRACE_SYSEMU_SINGLESTEP", Const, 0}, + {"PTRACE_TRACEME", Const, 0}, + {"PT_ATTACH", Const, 0}, + {"PT_ATTACHEXC", Const, 0}, + {"PT_CONTINUE", Const, 0}, + {"PT_DATA_ADDR", Const, 0}, + {"PT_DENY_ATTACH", Const, 0}, + {"PT_DETACH", Const, 0}, + {"PT_FIRSTMACH", Const, 0}, + {"PT_FORCEQUOTA", Const, 0}, + {"PT_KILL", Const, 0}, + {"PT_MASK", Const, 1}, + {"PT_READ_D", Const, 0}, + {"PT_READ_I", Const, 0}, + {"PT_READ_U", Const, 0}, + {"PT_SIGEXC", Const, 0}, + {"PT_STEP", Const, 0}, + {"PT_TEXT_ADDR", Const, 0}, + {"PT_TEXT_END_ADDR", Const, 0}, + {"PT_THUPDATE", Const, 0}, + {"PT_TRACE_ME", Const, 0}, + {"PT_WRITE_D", Const, 0}, + {"PT_WRITE_I", Const, 0}, + {"PT_WRITE_U", Const, 0}, + {"ParseDirent", Func, 0}, + {"ParseNetlinkMessage", Func, 0}, + {"ParseNetlinkRouteAttr", Func, 0}, + {"ParseRoutingMessage", Func, 0}, + {"ParseRoutingSockaddr", Func, 0}, + {"ParseSocketControlMessage", Func, 0}, + {"ParseUnixCredentials", Func, 0}, + {"ParseUnixRights", Func, 0}, + {"PathMax", Const, 0}, + {"Pathconf", Func, 0}, + {"Pause", Func, 0}, + {"Pipe", Func, 0}, + {"Pipe2", Func, 1}, + {"PivotRoot", Func, 0}, + {"Pointer", Type, 11}, + {"PostQueuedCompletionStatus", Func, 0}, + {"Pread", Func, 0}, + {"Proc", Type, 0}, + {"Proc.Dll", Field, 0}, + {"Proc.Name", Field, 0}, + {"ProcAttr", Type, 0}, + {"ProcAttr.Dir", Field, 0}, + {"ProcAttr.Env", Field, 0}, + {"ProcAttr.Files", Field, 0}, + {"ProcAttr.Sys", Field, 0}, + {"Process32First", Func, 4}, + {"Process32Next", Func, 4}, + {"ProcessEntry32", Type, 4}, + {"ProcessEntry32.DefaultHeapID", Field, 4}, + {"ProcessEntry32.ExeFile", Field, 4}, + {"ProcessEntry32.Flags", Field, 4}, + {"ProcessEntry32.ModuleID", Field, 4}, + {"ProcessEntry32.ParentProcessID", Field, 4}, + {"ProcessEntry32.PriClassBase", Field, 4}, + {"ProcessEntry32.ProcessID", Field, 4}, + {"ProcessEntry32.Size", Field, 4}, + {"ProcessEntry32.Threads", Field, 4}, + {"ProcessEntry32.Usage", Field, 4}, + {"ProcessInformation", Type, 0}, + {"ProcessInformation.Process", Field, 0}, + {"ProcessInformation.ProcessId", Field, 0}, + {"ProcessInformation.Thread", Field, 0}, + {"ProcessInformation.ThreadId", Field, 0}, + {"Protoent", Type, 0}, + {"Protoent.Aliases", Field, 0}, + {"Protoent.Name", Field, 0}, + {"Protoent.Proto", Field, 0}, + {"PtraceAttach", Func, 0}, + {"PtraceCont", Func, 0}, + {"PtraceDetach", Func, 0}, + {"PtraceGetEventMsg", Func, 0}, + {"PtraceGetRegs", Func, 0}, + {"PtracePeekData", Func, 0}, + {"PtracePeekText", Func, 0}, + {"PtracePokeData", Func, 0}, + {"PtracePokeText", Func, 0}, + {"PtraceRegs", Type, 0}, + {"PtraceRegs.Cs", Field, 0}, + {"PtraceRegs.Ds", Field, 0}, + {"PtraceRegs.Eax", Field, 0}, + {"PtraceRegs.Ebp", Field, 0}, + {"PtraceRegs.Ebx", Field, 0}, + {"PtraceRegs.Ecx", Field, 0}, + {"PtraceRegs.Edi", Field, 0}, + {"PtraceRegs.Edx", Field, 0}, + {"PtraceRegs.Eflags", Field, 0}, + {"PtraceRegs.Eip", Field, 0}, + {"PtraceRegs.Es", Field, 0}, + {"PtraceRegs.Esi", Field, 0}, + {"PtraceRegs.Esp", Field, 0}, + {"PtraceRegs.Fs", Field, 0}, + {"PtraceRegs.Fs_base", Field, 0}, + {"PtraceRegs.Gs", Field, 0}, + {"PtraceRegs.Gs_base", Field, 0}, + {"PtraceRegs.Orig_eax", Field, 0}, + {"PtraceRegs.Orig_rax", Field, 0}, + {"PtraceRegs.R10", Field, 0}, + {"PtraceRegs.R11", Field, 0}, + {"PtraceRegs.R12", Field, 0}, + {"PtraceRegs.R13", Field, 0}, + {"PtraceRegs.R14", Field, 0}, + {"PtraceRegs.R15", Field, 0}, + {"PtraceRegs.R8", Field, 0}, + {"PtraceRegs.R9", Field, 0}, + {"PtraceRegs.Rax", Field, 0}, + {"PtraceRegs.Rbp", Field, 0}, + {"PtraceRegs.Rbx", Field, 0}, + {"PtraceRegs.Rcx", Field, 0}, + {"PtraceRegs.Rdi", Field, 0}, + {"PtraceRegs.Rdx", Field, 0}, + {"PtraceRegs.Rip", Field, 0}, + {"PtraceRegs.Rsi", Field, 0}, + {"PtraceRegs.Rsp", Field, 0}, + {"PtraceRegs.Ss", Field, 0}, + {"PtraceRegs.Uregs", Field, 0}, + {"PtraceRegs.Xcs", Field, 0}, + {"PtraceRegs.Xds", Field, 0}, + {"PtraceRegs.Xes", Field, 0}, + {"PtraceRegs.Xfs", Field, 0}, + {"PtraceRegs.Xgs", Field, 0}, + {"PtraceRegs.Xss", Field, 0}, + {"PtraceSetOptions", Func, 0}, + {"PtraceSetRegs", Func, 0}, + {"PtraceSingleStep", Func, 0}, + {"PtraceSyscall", Func, 1}, + {"Pwrite", Func, 0}, + {"REG_BINARY", Const, 0}, + {"REG_DWORD", Const, 0}, + {"REG_DWORD_BIG_ENDIAN", Const, 0}, + {"REG_DWORD_LITTLE_ENDIAN", Const, 0}, + {"REG_EXPAND_SZ", Const, 0}, + {"REG_FULL_RESOURCE_DESCRIPTOR", Const, 0}, + {"REG_LINK", Const, 0}, + {"REG_MULTI_SZ", Const, 0}, + {"REG_NONE", Const, 0}, + {"REG_QWORD", Const, 0}, + {"REG_QWORD_LITTLE_ENDIAN", Const, 0}, + {"REG_RESOURCE_LIST", Const, 0}, + {"REG_RESOURCE_REQUIREMENTS_LIST", Const, 0}, + {"REG_SZ", Const, 0}, + {"RLIMIT_AS", Const, 0}, + {"RLIMIT_CORE", Const, 0}, + {"RLIMIT_CPU", Const, 0}, + {"RLIMIT_CPU_USAGE_MONITOR", Const, 16}, + {"RLIMIT_DATA", Const, 0}, + {"RLIMIT_FSIZE", Const, 0}, + {"RLIMIT_NOFILE", Const, 0}, + {"RLIMIT_STACK", Const, 0}, + {"RLIM_INFINITY", Const, 0}, + {"RTAX_ADVMSS", Const, 0}, + {"RTAX_AUTHOR", Const, 0}, + {"RTAX_BRD", Const, 0}, + {"RTAX_CWND", Const, 0}, + {"RTAX_DST", Const, 0}, + {"RTAX_FEATURES", Const, 0}, + {"RTAX_FEATURE_ALLFRAG", Const, 0}, + {"RTAX_FEATURE_ECN", Const, 0}, + {"RTAX_FEATURE_SACK", Const, 0}, + {"RTAX_FEATURE_TIMESTAMP", Const, 0}, + {"RTAX_GATEWAY", Const, 0}, + {"RTAX_GENMASK", Const, 0}, + {"RTAX_HOPLIMIT", Const, 0}, + {"RTAX_IFA", Const, 0}, + {"RTAX_IFP", Const, 0}, + {"RTAX_INITCWND", Const, 0}, + {"RTAX_INITRWND", Const, 0}, + {"RTAX_LABEL", Const, 1}, + {"RTAX_LOCK", Const, 0}, + {"RTAX_MAX", Const, 0}, + {"RTAX_MTU", Const, 0}, + {"RTAX_NETMASK", Const, 0}, + {"RTAX_REORDERING", Const, 0}, + {"RTAX_RTO_MIN", Const, 0}, + {"RTAX_RTT", Const, 0}, + {"RTAX_RTTVAR", Const, 0}, + {"RTAX_SRC", Const, 1}, + {"RTAX_SRCMASK", Const, 1}, + {"RTAX_SSTHRESH", Const, 0}, + {"RTAX_TAG", Const, 1}, + {"RTAX_UNSPEC", Const, 0}, + {"RTAX_WINDOW", Const, 0}, + {"RTA_ALIGNTO", Const, 0}, + {"RTA_AUTHOR", Const, 0}, + {"RTA_BRD", Const, 0}, + {"RTA_CACHEINFO", Const, 0}, + {"RTA_DST", Const, 0}, + {"RTA_FLOW", Const, 0}, + {"RTA_GATEWAY", Const, 0}, + {"RTA_GENMASK", Const, 0}, + {"RTA_IFA", Const, 0}, + {"RTA_IFP", Const, 0}, + {"RTA_IIF", Const, 0}, + {"RTA_LABEL", Const, 1}, + {"RTA_MAX", Const, 0}, + {"RTA_METRICS", Const, 0}, + {"RTA_MULTIPATH", Const, 0}, + {"RTA_NETMASK", Const, 0}, + {"RTA_OIF", Const, 0}, + {"RTA_PREFSRC", Const, 0}, + {"RTA_PRIORITY", Const, 0}, + {"RTA_SRC", Const, 0}, + {"RTA_SRCMASK", Const, 1}, + {"RTA_TABLE", Const, 0}, + {"RTA_TAG", Const, 1}, + {"RTA_UNSPEC", Const, 0}, + {"RTCF_DIRECTSRC", Const, 0}, + {"RTCF_DOREDIRECT", Const, 0}, + {"RTCF_LOG", Const, 0}, + {"RTCF_MASQ", Const, 0}, + {"RTCF_NAT", Const, 0}, + {"RTCF_VALVE", Const, 0}, + {"RTF_ADDRCLASSMASK", Const, 0}, + {"RTF_ADDRCONF", Const, 0}, + {"RTF_ALLONLINK", Const, 0}, + {"RTF_ANNOUNCE", Const, 1}, + {"RTF_BLACKHOLE", Const, 0}, + {"RTF_BROADCAST", Const, 0}, + {"RTF_CACHE", Const, 0}, + {"RTF_CLONED", Const, 1}, + {"RTF_CLONING", Const, 0}, + {"RTF_CONDEMNED", Const, 0}, + {"RTF_DEFAULT", Const, 0}, + {"RTF_DELCLONE", Const, 0}, + {"RTF_DONE", Const, 0}, + {"RTF_DYNAMIC", Const, 0}, + {"RTF_FLOW", Const, 0}, + {"RTF_FMASK", Const, 0}, + {"RTF_GATEWAY", Const, 0}, + {"RTF_GWFLAG_COMPAT", Const, 3}, + {"RTF_HOST", Const, 0}, + {"RTF_IFREF", Const, 0}, + {"RTF_IFSCOPE", Const, 0}, + {"RTF_INTERFACE", Const, 0}, + {"RTF_IRTT", Const, 0}, + {"RTF_LINKRT", Const, 0}, + {"RTF_LLDATA", Const, 0}, + {"RTF_LLINFO", Const, 0}, + {"RTF_LOCAL", Const, 0}, + {"RTF_MASK", Const, 1}, + {"RTF_MODIFIED", Const, 0}, + {"RTF_MPATH", Const, 1}, + {"RTF_MPLS", Const, 1}, + {"RTF_MSS", Const, 0}, + {"RTF_MTU", Const, 0}, + {"RTF_MULTICAST", Const, 0}, + {"RTF_NAT", Const, 0}, + {"RTF_NOFORWARD", Const, 0}, + {"RTF_NONEXTHOP", Const, 0}, + {"RTF_NOPMTUDISC", Const, 0}, + {"RTF_PERMANENT_ARP", Const, 1}, + {"RTF_PINNED", Const, 0}, + {"RTF_POLICY", Const, 0}, + {"RTF_PRCLONING", Const, 0}, + {"RTF_PROTO1", Const, 0}, + {"RTF_PROTO2", Const, 0}, + {"RTF_PROTO3", Const, 0}, + {"RTF_PROXY", Const, 16}, + {"RTF_REINSTATE", Const, 0}, + {"RTF_REJECT", Const, 0}, + {"RTF_RNH_LOCKED", Const, 0}, + {"RTF_ROUTER", Const, 16}, + {"RTF_SOURCE", Const, 1}, + {"RTF_SRC", Const, 1}, + {"RTF_STATIC", Const, 0}, + {"RTF_STICKY", Const, 0}, + {"RTF_THROW", Const, 0}, + {"RTF_TUNNEL", Const, 1}, + {"RTF_UP", Const, 0}, + {"RTF_USETRAILERS", Const, 1}, + {"RTF_WASCLONED", Const, 0}, + {"RTF_WINDOW", Const, 0}, + {"RTF_XRESOLVE", Const, 0}, + {"RTM_ADD", Const, 0}, + {"RTM_BASE", Const, 0}, + {"RTM_CHANGE", Const, 0}, + {"RTM_CHGADDR", Const, 1}, + {"RTM_DELACTION", Const, 0}, + {"RTM_DELADDR", Const, 0}, + {"RTM_DELADDRLABEL", Const, 0}, + {"RTM_DELETE", Const, 0}, + {"RTM_DELLINK", Const, 0}, + {"RTM_DELMADDR", Const, 0}, + {"RTM_DELNEIGH", Const, 0}, + {"RTM_DELQDISC", Const, 0}, + {"RTM_DELROUTE", Const, 0}, + {"RTM_DELRULE", Const, 0}, + {"RTM_DELTCLASS", Const, 0}, + {"RTM_DELTFILTER", Const, 0}, + {"RTM_DESYNC", Const, 1}, + {"RTM_F_CLONED", Const, 0}, + {"RTM_F_EQUALIZE", Const, 0}, + {"RTM_F_NOTIFY", Const, 0}, + {"RTM_F_PREFIX", Const, 0}, + {"RTM_GET", Const, 0}, + {"RTM_GET2", Const, 0}, + {"RTM_GETACTION", Const, 0}, + {"RTM_GETADDR", Const, 0}, + {"RTM_GETADDRLABEL", Const, 0}, + {"RTM_GETANYCAST", Const, 0}, + {"RTM_GETDCB", Const, 0}, + {"RTM_GETLINK", Const, 0}, + {"RTM_GETMULTICAST", Const, 0}, + {"RTM_GETNEIGH", Const, 0}, + {"RTM_GETNEIGHTBL", Const, 0}, + {"RTM_GETQDISC", Const, 0}, + {"RTM_GETROUTE", Const, 0}, + {"RTM_GETRULE", Const, 0}, + {"RTM_GETTCLASS", Const, 0}, + {"RTM_GETTFILTER", Const, 0}, + {"RTM_IEEE80211", Const, 0}, + {"RTM_IFANNOUNCE", Const, 0}, + {"RTM_IFINFO", Const, 0}, + {"RTM_IFINFO2", Const, 0}, + {"RTM_LLINFO_UPD", Const, 1}, + {"RTM_LOCK", Const, 0}, + {"RTM_LOSING", Const, 0}, + {"RTM_MAX", Const, 0}, + {"RTM_MAXSIZE", Const, 1}, + {"RTM_MISS", Const, 0}, + {"RTM_NEWACTION", Const, 0}, + {"RTM_NEWADDR", Const, 0}, + {"RTM_NEWADDRLABEL", Const, 0}, + {"RTM_NEWLINK", Const, 0}, + {"RTM_NEWMADDR", Const, 0}, + {"RTM_NEWMADDR2", Const, 0}, + {"RTM_NEWNDUSEROPT", Const, 0}, + {"RTM_NEWNEIGH", Const, 0}, + {"RTM_NEWNEIGHTBL", Const, 0}, + {"RTM_NEWPREFIX", Const, 0}, + {"RTM_NEWQDISC", Const, 0}, + {"RTM_NEWROUTE", Const, 0}, + {"RTM_NEWRULE", Const, 0}, + {"RTM_NEWTCLASS", Const, 0}, + {"RTM_NEWTFILTER", Const, 0}, + {"RTM_NR_FAMILIES", Const, 0}, + {"RTM_NR_MSGTYPES", Const, 0}, + {"RTM_OIFINFO", Const, 1}, + {"RTM_OLDADD", Const, 0}, + {"RTM_OLDDEL", Const, 0}, + {"RTM_OOIFINFO", Const, 1}, + {"RTM_REDIRECT", Const, 0}, + {"RTM_RESOLVE", Const, 0}, + {"RTM_RTTUNIT", Const, 0}, + {"RTM_SETDCB", Const, 0}, + {"RTM_SETGATE", Const, 1}, + {"RTM_SETLINK", Const, 0}, + {"RTM_SETNEIGHTBL", Const, 0}, + {"RTM_VERSION", Const, 0}, + {"RTNH_ALIGNTO", Const, 0}, + {"RTNH_F_DEAD", Const, 0}, + {"RTNH_F_ONLINK", Const, 0}, + {"RTNH_F_PERVASIVE", Const, 0}, + {"RTNLGRP_IPV4_IFADDR", Const, 1}, + {"RTNLGRP_IPV4_MROUTE", Const, 1}, + {"RTNLGRP_IPV4_ROUTE", Const, 1}, + {"RTNLGRP_IPV4_RULE", Const, 1}, + {"RTNLGRP_IPV6_IFADDR", Const, 1}, + {"RTNLGRP_IPV6_IFINFO", Const, 1}, + {"RTNLGRP_IPV6_MROUTE", Const, 1}, + {"RTNLGRP_IPV6_PREFIX", Const, 1}, + {"RTNLGRP_IPV6_ROUTE", Const, 1}, + {"RTNLGRP_IPV6_RULE", Const, 1}, + {"RTNLGRP_LINK", Const, 1}, + {"RTNLGRP_ND_USEROPT", Const, 1}, + {"RTNLGRP_NEIGH", Const, 1}, + {"RTNLGRP_NONE", Const, 1}, + {"RTNLGRP_NOTIFY", Const, 1}, + {"RTNLGRP_TC", Const, 1}, + {"RTN_ANYCAST", Const, 0}, + {"RTN_BLACKHOLE", Const, 0}, + {"RTN_BROADCAST", Const, 0}, + {"RTN_LOCAL", Const, 0}, + {"RTN_MAX", Const, 0}, + {"RTN_MULTICAST", Const, 0}, + {"RTN_NAT", Const, 0}, + {"RTN_PROHIBIT", Const, 0}, + {"RTN_THROW", Const, 0}, + {"RTN_UNICAST", Const, 0}, + {"RTN_UNREACHABLE", Const, 0}, + {"RTN_UNSPEC", Const, 0}, + {"RTN_XRESOLVE", Const, 0}, + {"RTPROT_BIRD", Const, 0}, + {"RTPROT_BOOT", Const, 0}, + {"RTPROT_DHCP", Const, 0}, + {"RTPROT_DNROUTED", Const, 0}, + {"RTPROT_GATED", Const, 0}, + {"RTPROT_KERNEL", Const, 0}, + {"RTPROT_MRT", Const, 0}, + {"RTPROT_NTK", Const, 0}, + {"RTPROT_RA", Const, 0}, + {"RTPROT_REDIRECT", Const, 0}, + {"RTPROT_STATIC", Const, 0}, + {"RTPROT_UNSPEC", Const, 0}, + {"RTPROT_XORP", Const, 0}, + {"RTPROT_ZEBRA", Const, 0}, + {"RTV_EXPIRE", Const, 0}, + {"RTV_HOPCOUNT", Const, 0}, + {"RTV_MTU", Const, 0}, + {"RTV_RPIPE", Const, 0}, + {"RTV_RTT", Const, 0}, + {"RTV_RTTVAR", Const, 0}, + {"RTV_SPIPE", Const, 0}, + {"RTV_SSTHRESH", Const, 0}, + {"RTV_WEIGHT", Const, 0}, + {"RT_CACHING_CONTEXT", Const, 1}, + {"RT_CLASS_DEFAULT", Const, 0}, + {"RT_CLASS_LOCAL", Const, 0}, + {"RT_CLASS_MAIN", Const, 0}, + {"RT_CLASS_MAX", Const, 0}, + {"RT_CLASS_UNSPEC", Const, 0}, + {"RT_DEFAULT_FIB", Const, 1}, + {"RT_NORTREF", Const, 1}, + {"RT_SCOPE_HOST", Const, 0}, + {"RT_SCOPE_LINK", Const, 0}, + {"RT_SCOPE_NOWHERE", Const, 0}, + {"RT_SCOPE_SITE", Const, 0}, + {"RT_SCOPE_UNIVERSE", Const, 0}, + {"RT_TABLEID_MAX", Const, 1}, + {"RT_TABLE_COMPAT", Const, 0}, + {"RT_TABLE_DEFAULT", Const, 0}, + {"RT_TABLE_LOCAL", Const, 0}, + {"RT_TABLE_MAIN", Const, 0}, + {"RT_TABLE_MAX", Const, 0}, + {"RT_TABLE_UNSPEC", Const, 0}, + {"RUSAGE_CHILDREN", Const, 0}, + {"RUSAGE_SELF", Const, 0}, + {"RUSAGE_THREAD", Const, 0}, + {"Radvisory_t", Type, 0}, + {"Radvisory_t.Count", Field, 0}, + {"Radvisory_t.Offset", Field, 0}, + {"Radvisory_t.Pad_cgo_0", Field, 0}, + {"RawConn", Type, 9}, + {"RawSockaddr", Type, 0}, + {"RawSockaddr.Data", Field, 0}, + {"RawSockaddr.Family", Field, 0}, + {"RawSockaddr.Len", Field, 0}, + {"RawSockaddrAny", Type, 0}, + {"RawSockaddrAny.Addr", Field, 0}, + {"RawSockaddrAny.Pad", Field, 0}, + {"RawSockaddrDatalink", Type, 0}, + {"RawSockaddrDatalink.Alen", Field, 0}, + {"RawSockaddrDatalink.Data", Field, 0}, + {"RawSockaddrDatalink.Family", Field, 0}, + {"RawSockaddrDatalink.Index", Field, 0}, + {"RawSockaddrDatalink.Len", Field, 0}, + {"RawSockaddrDatalink.Nlen", Field, 0}, + {"RawSockaddrDatalink.Pad_cgo_0", Field, 2}, + {"RawSockaddrDatalink.Slen", Field, 0}, + {"RawSockaddrDatalink.Type", Field, 0}, + {"RawSockaddrInet4", Type, 0}, + {"RawSockaddrInet4.Addr", Field, 0}, + {"RawSockaddrInet4.Family", Field, 0}, + {"RawSockaddrInet4.Len", Field, 0}, + {"RawSockaddrInet4.Port", Field, 0}, + {"RawSockaddrInet4.Zero", Field, 0}, + {"RawSockaddrInet6", Type, 0}, + {"RawSockaddrInet6.Addr", Field, 0}, + {"RawSockaddrInet6.Family", Field, 0}, + {"RawSockaddrInet6.Flowinfo", Field, 0}, + {"RawSockaddrInet6.Len", Field, 0}, + {"RawSockaddrInet6.Port", Field, 0}, + {"RawSockaddrInet6.Scope_id", Field, 0}, + {"RawSockaddrLinklayer", Type, 0}, + {"RawSockaddrLinklayer.Addr", Field, 0}, + {"RawSockaddrLinklayer.Family", Field, 0}, + {"RawSockaddrLinklayer.Halen", Field, 0}, + {"RawSockaddrLinklayer.Hatype", Field, 0}, + {"RawSockaddrLinklayer.Ifindex", Field, 0}, + {"RawSockaddrLinklayer.Pkttype", Field, 0}, + {"RawSockaddrLinklayer.Protocol", Field, 0}, + {"RawSockaddrNetlink", Type, 0}, + {"RawSockaddrNetlink.Family", Field, 0}, + {"RawSockaddrNetlink.Groups", Field, 0}, + {"RawSockaddrNetlink.Pad", Field, 0}, + {"RawSockaddrNetlink.Pid", Field, 0}, + {"RawSockaddrUnix", Type, 0}, + {"RawSockaddrUnix.Family", Field, 0}, + {"RawSockaddrUnix.Len", Field, 0}, + {"RawSockaddrUnix.Pad_cgo_0", Field, 2}, + {"RawSockaddrUnix.Path", Field, 0}, + {"RawSyscall", Func, 0}, + {"RawSyscall6", Func, 0}, + {"Read", Func, 0}, + {"ReadConsole", Func, 1}, + {"ReadDirectoryChanges", Func, 0}, + {"ReadDirent", Func, 0}, + {"ReadFile", Func, 0}, + {"Readlink", Func, 0}, + {"Reboot", Func, 0}, + {"Recvfrom", Func, 0}, + {"Recvmsg", Func, 0}, + {"RegCloseKey", Func, 0}, + {"RegEnumKeyEx", Func, 0}, + {"RegOpenKeyEx", Func, 0}, + {"RegQueryInfoKey", Func, 0}, + {"RegQueryValueEx", Func, 0}, + {"RemoveDirectory", Func, 0}, + {"Removexattr", Func, 1}, + {"Rename", Func, 0}, + {"Renameat", Func, 0}, + {"Revoke", Func, 0}, + {"Rlimit", Type, 0}, + {"Rlimit.Cur", Field, 0}, + {"Rlimit.Max", Field, 0}, + {"Rmdir", Func, 0}, + {"RouteMessage", Type, 0}, + {"RouteMessage.Data", Field, 0}, + {"RouteMessage.Header", Field, 0}, + {"RouteRIB", Func, 0}, + {"RoutingMessage", Type, 0}, + {"RtAttr", Type, 0}, + {"RtAttr.Len", Field, 0}, + {"RtAttr.Type", Field, 0}, + {"RtGenmsg", Type, 0}, + {"RtGenmsg.Family", Field, 0}, + {"RtMetrics", Type, 0}, + {"RtMetrics.Expire", Field, 0}, + {"RtMetrics.Filler", Field, 0}, + {"RtMetrics.Hopcount", Field, 0}, + {"RtMetrics.Locks", Field, 0}, + {"RtMetrics.Mtu", Field, 0}, + {"RtMetrics.Pad", Field, 3}, + {"RtMetrics.Pksent", Field, 0}, + {"RtMetrics.Recvpipe", Field, 0}, + {"RtMetrics.Refcnt", Field, 2}, + {"RtMetrics.Rtt", Field, 0}, + {"RtMetrics.Rttvar", Field, 0}, + {"RtMetrics.Sendpipe", Field, 0}, + {"RtMetrics.Ssthresh", Field, 0}, + {"RtMetrics.Weight", Field, 0}, + {"RtMsg", Type, 0}, + {"RtMsg.Dst_len", Field, 0}, + {"RtMsg.Family", Field, 0}, + {"RtMsg.Flags", Field, 0}, + {"RtMsg.Protocol", Field, 0}, + {"RtMsg.Scope", Field, 0}, + {"RtMsg.Src_len", Field, 0}, + {"RtMsg.Table", Field, 0}, + {"RtMsg.Tos", Field, 0}, + {"RtMsg.Type", Field, 0}, + {"RtMsghdr", Type, 0}, + {"RtMsghdr.Addrs", Field, 0}, + {"RtMsghdr.Errno", Field, 0}, + {"RtMsghdr.Flags", Field, 0}, + {"RtMsghdr.Fmask", Field, 0}, + {"RtMsghdr.Hdrlen", Field, 2}, + {"RtMsghdr.Index", Field, 0}, + {"RtMsghdr.Inits", Field, 0}, + {"RtMsghdr.Mpls", Field, 2}, + {"RtMsghdr.Msglen", Field, 0}, + {"RtMsghdr.Pad_cgo_0", Field, 0}, + {"RtMsghdr.Pad_cgo_1", Field, 2}, + {"RtMsghdr.Pid", Field, 0}, + {"RtMsghdr.Priority", Field, 2}, + {"RtMsghdr.Rmx", Field, 0}, + {"RtMsghdr.Seq", Field, 0}, + {"RtMsghdr.Tableid", Field, 2}, + {"RtMsghdr.Type", Field, 0}, + {"RtMsghdr.Use", Field, 0}, + {"RtMsghdr.Version", Field, 0}, + {"RtNexthop", Type, 0}, + {"RtNexthop.Flags", Field, 0}, + {"RtNexthop.Hops", Field, 0}, + {"RtNexthop.Ifindex", Field, 0}, + {"RtNexthop.Len", Field, 0}, + {"Rusage", Type, 0}, + {"Rusage.CreationTime", Field, 0}, + {"Rusage.ExitTime", Field, 0}, + {"Rusage.Idrss", Field, 0}, + {"Rusage.Inblock", Field, 0}, + {"Rusage.Isrss", Field, 0}, + {"Rusage.Ixrss", Field, 0}, + {"Rusage.KernelTime", Field, 0}, + {"Rusage.Majflt", Field, 0}, + {"Rusage.Maxrss", Field, 0}, + {"Rusage.Minflt", Field, 0}, + {"Rusage.Msgrcv", Field, 0}, + {"Rusage.Msgsnd", Field, 0}, + {"Rusage.Nivcsw", Field, 0}, + {"Rusage.Nsignals", Field, 0}, + {"Rusage.Nswap", Field, 0}, + {"Rusage.Nvcsw", Field, 0}, + {"Rusage.Oublock", Field, 0}, + {"Rusage.Stime", Field, 0}, + {"Rusage.UserTime", Field, 0}, + {"Rusage.Utime", Field, 0}, + {"SCM_BINTIME", Const, 0}, + {"SCM_CREDENTIALS", Const, 0}, + {"SCM_CREDS", Const, 0}, + {"SCM_RIGHTS", Const, 0}, + {"SCM_TIMESTAMP", Const, 0}, + {"SCM_TIMESTAMPING", Const, 0}, + {"SCM_TIMESTAMPNS", Const, 0}, + {"SCM_TIMESTAMP_MONOTONIC", Const, 0}, + {"SHUT_RD", Const, 0}, + {"SHUT_RDWR", Const, 0}, + {"SHUT_WR", Const, 0}, + {"SID", Type, 0}, + {"SIDAndAttributes", Type, 0}, + {"SIDAndAttributes.Attributes", Field, 0}, + {"SIDAndAttributes.Sid", Field, 0}, + {"SIGABRT", Const, 0}, + {"SIGALRM", Const, 0}, + {"SIGBUS", Const, 0}, + {"SIGCHLD", Const, 0}, + {"SIGCLD", Const, 0}, + {"SIGCONT", Const, 0}, + {"SIGEMT", Const, 0}, + {"SIGFPE", Const, 0}, + {"SIGHUP", Const, 0}, + {"SIGILL", Const, 0}, + {"SIGINFO", Const, 0}, + {"SIGINT", Const, 0}, + {"SIGIO", Const, 0}, + {"SIGIOT", Const, 0}, + {"SIGKILL", Const, 0}, + {"SIGLIBRT", Const, 1}, + {"SIGLWP", Const, 0}, + {"SIGPIPE", Const, 0}, + {"SIGPOLL", Const, 0}, + {"SIGPROF", Const, 0}, + {"SIGPWR", Const, 0}, + {"SIGQUIT", Const, 0}, + {"SIGSEGV", Const, 0}, + {"SIGSTKFLT", Const, 0}, + {"SIGSTOP", Const, 0}, + {"SIGSYS", Const, 0}, + {"SIGTERM", Const, 0}, + {"SIGTHR", Const, 0}, + {"SIGTRAP", Const, 0}, + {"SIGTSTP", Const, 0}, + {"SIGTTIN", Const, 0}, + {"SIGTTOU", Const, 0}, + {"SIGUNUSED", Const, 0}, + {"SIGURG", Const, 0}, + {"SIGUSR1", Const, 0}, + {"SIGUSR2", Const, 0}, + {"SIGVTALRM", Const, 0}, + {"SIGWINCH", Const, 0}, + {"SIGXCPU", Const, 0}, + {"SIGXFSZ", Const, 0}, + {"SIOCADDDLCI", Const, 0}, + {"SIOCADDMULTI", Const, 0}, + {"SIOCADDRT", Const, 0}, + {"SIOCAIFADDR", Const, 0}, + {"SIOCAIFGROUP", Const, 0}, + {"SIOCALIFADDR", Const, 0}, + {"SIOCARPIPLL", Const, 0}, + {"SIOCATMARK", Const, 0}, + {"SIOCAUTOADDR", Const, 0}, + {"SIOCAUTONETMASK", Const, 0}, + {"SIOCBRDGADD", Const, 1}, + {"SIOCBRDGADDS", Const, 1}, + {"SIOCBRDGARL", Const, 1}, + {"SIOCBRDGDADDR", Const, 1}, + {"SIOCBRDGDEL", Const, 1}, + {"SIOCBRDGDELS", Const, 1}, + {"SIOCBRDGFLUSH", Const, 1}, + {"SIOCBRDGFRL", Const, 1}, + {"SIOCBRDGGCACHE", Const, 1}, + {"SIOCBRDGGFD", Const, 1}, + {"SIOCBRDGGHT", Const, 1}, + {"SIOCBRDGGIFFLGS", Const, 1}, + {"SIOCBRDGGMA", Const, 1}, + {"SIOCBRDGGPARAM", Const, 1}, + {"SIOCBRDGGPRI", Const, 1}, + {"SIOCBRDGGRL", Const, 1}, + {"SIOCBRDGGSIFS", Const, 1}, + {"SIOCBRDGGTO", Const, 1}, + {"SIOCBRDGIFS", Const, 1}, + {"SIOCBRDGRTS", Const, 1}, + {"SIOCBRDGSADDR", Const, 1}, + {"SIOCBRDGSCACHE", Const, 1}, + {"SIOCBRDGSFD", Const, 1}, + {"SIOCBRDGSHT", Const, 1}, + {"SIOCBRDGSIFCOST", Const, 1}, + {"SIOCBRDGSIFFLGS", Const, 1}, + {"SIOCBRDGSIFPRIO", Const, 1}, + {"SIOCBRDGSMA", Const, 1}, + {"SIOCBRDGSPRI", Const, 1}, + {"SIOCBRDGSPROTO", Const, 1}, + {"SIOCBRDGSTO", Const, 1}, + {"SIOCBRDGSTXHC", Const, 1}, + {"SIOCDARP", Const, 0}, + {"SIOCDELDLCI", Const, 0}, + {"SIOCDELMULTI", Const, 0}, + {"SIOCDELRT", Const, 0}, + {"SIOCDEVPRIVATE", Const, 0}, + {"SIOCDIFADDR", Const, 0}, + {"SIOCDIFGROUP", Const, 0}, + {"SIOCDIFPHYADDR", Const, 0}, + {"SIOCDLIFADDR", Const, 0}, + {"SIOCDRARP", Const, 0}, + {"SIOCGARP", Const, 0}, + {"SIOCGDRVSPEC", Const, 0}, + {"SIOCGETKALIVE", Const, 1}, + {"SIOCGETLABEL", Const, 1}, + {"SIOCGETPFLOW", Const, 1}, + {"SIOCGETPFSYNC", Const, 1}, + {"SIOCGETSGCNT", Const, 0}, + {"SIOCGETVIFCNT", Const, 0}, + {"SIOCGETVLAN", Const, 0}, + {"SIOCGHIWAT", Const, 0}, + {"SIOCGIFADDR", Const, 0}, + {"SIOCGIFADDRPREF", Const, 1}, + {"SIOCGIFALIAS", Const, 1}, + {"SIOCGIFALTMTU", Const, 0}, + {"SIOCGIFASYNCMAP", Const, 0}, + {"SIOCGIFBOND", Const, 0}, + {"SIOCGIFBR", Const, 0}, + {"SIOCGIFBRDADDR", Const, 0}, + {"SIOCGIFCAP", Const, 0}, + {"SIOCGIFCONF", Const, 0}, + {"SIOCGIFCOUNT", Const, 0}, + {"SIOCGIFDATA", Const, 1}, + {"SIOCGIFDESCR", Const, 0}, + {"SIOCGIFDEVMTU", Const, 0}, + {"SIOCGIFDLT", Const, 1}, + {"SIOCGIFDSTADDR", Const, 0}, + {"SIOCGIFENCAP", Const, 0}, + {"SIOCGIFFIB", Const, 1}, + {"SIOCGIFFLAGS", Const, 0}, + {"SIOCGIFGATTR", Const, 1}, + {"SIOCGIFGENERIC", Const, 0}, + {"SIOCGIFGMEMB", Const, 0}, + {"SIOCGIFGROUP", Const, 0}, + {"SIOCGIFHARDMTU", Const, 3}, + {"SIOCGIFHWADDR", Const, 0}, + {"SIOCGIFINDEX", Const, 0}, + {"SIOCGIFKPI", Const, 0}, + {"SIOCGIFMAC", Const, 0}, + {"SIOCGIFMAP", Const, 0}, + {"SIOCGIFMEDIA", Const, 0}, + {"SIOCGIFMEM", Const, 0}, + {"SIOCGIFMETRIC", Const, 0}, + {"SIOCGIFMTU", Const, 0}, + {"SIOCGIFNAME", Const, 0}, + {"SIOCGIFNETMASK", Const, 0}, + {"SIOCGIFPDSTADDR", Const, 0}, + {"SIOCGIFPFLAGS", Const, 0}, + {"SIOCGIFPHYS", Const, 0}, + {"SIOCGIFPRIORITY", Const, 1}, + {"SIOCGIFPSRCADDR", Const, 0}, + {"SIOCGIFRDOMAIN", Const, 1}, + {"SIOCGIFRTLABEL", Const, 1}, + {"SIOCGIFSLAVE", Const, 0}, + {"SIOCGIFSTATUS", Const, 0}, + {"SIOCGIFTIMESLOT", Const, 1}, + {"SIOCGIFTXQLEN", Const, 0}, + {"SIOCGIFVLAN", Const, 0}, + {"SIOCGIFWAKEFLAGS", Const, 0}, + {"SIOCGIFXFLAGS", Const, 1}, + {"SIOCGLIFADDR", Const, 0}, + {"SIOCGLIFPHYADDR", Const, 0}, + {"SIOCGLIFPHYRTABLE", Const, 1}, + {"SIOCGLIFPHYTTL", Const, 3}, + {"SIOCGLINKSTR", Const, 1}, + {"SIOCGLOWAT", Const, 0}, + {"SIOCGPGRP", Const, 0}, + {"SIOCGPRIVATE_0", Const, 0}, + {"SIOCGPRIVATE_1", Const, 0}, + {"SIOCGRARP", Const, 0}, + {"SIOCGSPPPPARAMS", Const, 3}, + {"SIOCGSTAMP", Const, 0}, + {"SIOCGSTAMPNS", Const, 0}, + {"SIOCGVH", Const, 1}, + {"SIOCGVNETID", Const, 3}, + {"SIOCIFCREATE", Const, 0}, + {"SIOCIFCREATE2", Const, 0}, + {"SIOCIFDESTROY", Const, 0}, + {"SIOCIFGCLONERS", Const, 0}, + {"SIOCINITIFADDR", Const, 1}, + {"SIOCPROTOPRIVATE", Const, 0}, + {"SIOCRSLVMULTI", Const, 0}, + {"SIOCRTMSG", Const, 0}, + {"SIOCSARP", Const, 0}, + {"SIOCSDRVSPEC", Const, 0}, + {"SIOCSETKALIVE", Const, 1}, + {"SIOCSETLABEL", Const, 1}, + {"SIOCSETPFLOW", Const, 1}, + {"SIOCSETPFSYNC", Const, 1}, + {"SIOCSETVLAN", Const, 0}, + {"SIOCSHIWAT", Const, 0}, + {"SIOCSIFADDR", Const, 0}, + {"SIOCSIFADDRPREF", Const, 1}, + {"SIOCSIFALTMTU", Const, 0}, + {"SIOCSIFASYNCMAP", Const, 0}, + {"SIOCSIFBOND", Const, 0}, + {"SIOCSIFBR", Const, 0}, + {"SIOCSIFBRDADDR", Const, 0}, + {"SIOCSIFCAP", Const, 0}, + {"SIOCSIFDESCR", Const, 0}, + {"SIOCSIFDSTADDR", Const, 0}, + {"SIOCSIFENCAP", Const, 0}, + {"SIOCSIFFIB", Const, 1}, + {"SIOCSIFFLAGS", Const, 0}, + {"SIOCSIFGATTR", Const, 1}, + {"SIOCSIFGENERIC", Const, 0}, + {"SIOCSIFHWADDR", Const, 0}, + {"SIOCSIFHWBROADCAST", Const, 0}, + {"SIOCSIFKPI", Const, 0}, + {"SIOCSIFLINK", Const, 0}, + {"SIOCSIFLLADDR", Const, 0}, + {"SIOCSIFMAC", Const, 0}, + {"SIOCSIFMAP", Const, 0}, + {"SIOCSIFMEDIA", Const, 0}, + {"SIOCSIFMEM", Const, 0}, + {"SIOCSIFMETRIC", Const, 0}, + {"SIOCSIFMTU", Const, 0}, + {"SIOCSIFNAME", Const, 0}, + {"SIOCSIFNETMASK", Const, 0}, + {"SIOCSIFPFLAGS", Const, 0}, + {"SIOCSIFPHYADDR", Const, 0}, + {"SIOCSIFPHYS", Const, 0}, + {"SIOCSIFPRIORITY", Const, 1}, + {"SIOCSIFRDOMAIN", Const, 1}, + {"SIOCSIFRTLABEL", Const, 1}, + {"SIOCSIFRVNET", Const, 0}, + {"SIOCSIFSLAVE", Const, 0}, + {"SIOCSIFTIMESLOT", Const, 1}, + {"SIOCSIFTXQLEN", Const, 0}, + {"SIOCSIFVLAN", Const, 0}, + {"SIOCSIFVNET", Const, 0}, + {"SIOCSIFXFLAGS", Const, 1}, + {"SIOCSLIFPHYADDR", Const, 0}, + {"SIOCSLIFPHYRTABLE", Const, 1}, + {"SIOCSLIFPHYTTL", Const, 3}, + {"SIOCSLINKSTR", Const, 1}, + {"SIOCSLOWAT", Const, 0}, + {"SIOCSPGRP", Const, 0}, + {"SIOCSRARP", Const, 0}, + {"SIOCSSPPPPARAMS", Const, 3}, + {"SIOCSVH", Const, 1}, + {"SIOCSVNETID", Const, 3}, + {"SIOCZIFDATA", Const, 1}, + {"SIO_GET_EXTENSION_FUNCTION_POINTER", Const, 1}, + {"SIO_GET_INTERFACE_LIST", Const, 0}, + {"SIO_KEEPALIVE_VALS", Const, 3}, + {"SIO_UDP_CONNRESET", Const, 4}, + {"SOCK_CLOEXEC", Const, 0}, + {"SOCK_DCCP", Const, 0}, + {"SOCK_DGRAM", Const, 0}, + {"SOCK_FLAGS_MASK", Const, 1}, + {"SOCK_MAXADDRLEN", Const, 0}, + {"SOCK_NONBLOCK", Const, 0}, + {"SOCK_NOSIGPIPE", Const, 1}, + {"SOCK_PACKET", Const, 0}, + {"SOCK_RAW", Const, 0}, + {"SOCK_RDM", Const, 0}, + {"SOCK_SEQPACKET", Const, 0}, + {"SOCK_STREAM", Const, 0}, + {"SOL_AAL", Const, 0}, + {"SOL_ATM", Const, 0}, + {"SOL_DECNET", Const, 0}, + {"SOL_ICMPV6", Const, 0}, + {"SOL_IP", Const, 0}, + {"SOL_IPV6", Const, 0}, + {"SOL_IRDA", Const, 0}, + {"SOL_PACKET", Const, 0}, + {"SOL_RAW", Const, 0}, + {"SOL_SOCKET", Const, 0}, + {"SOL_TCP", Const, 0}, + {"SOL_X25", Const, 0}, + {"SOMAXCONN", Const, 0}, + {"SO_ACCEPTCONN", Const, 0}, + {"SO_ACCEPTFILTER", Const, 0}, + {"SO_ATTACH_FILTER", Const, 0}, + {"SO_BINDANY", Const, 1}, + {"SO_BINDTODEVICE", Const, 0}, + {"SO_BINTIME", Const, 0}, + {"SO_BROADCAST", Const, 0}, + {"SO_BSDCOMPAT", Const, 0}, + {"SO_DEBUG", Const, 0}, + {"SO_DETACH_FILTER", Const, 0}, + {"SO_DOMAIN", Const, 0}, + {"SO_DONTROUTE", Const, 0}, + {"SO_DONTTRUNC", Const, 0}, + {"SO_ERROR", Const, 0}, + {"SO_KEEPALIVE", Const, 0}, + {"SO_LABEL", Const, 0}, + {"SO_LINGER", Const, 0}, + {"SO_LINGER_SEC", Const, 0}, + {"SO_LISTENINCQLEN", Const, 0}, + {"SO_LISTENQLEN", Const, 0}, + {"SO_LISTENQLIMIT", Const, 0}, + {"SO_MARK", Const, 0}, + {"SO_NETPROC", Const, 1}, + {"SO_NKE", Const, 0}, + {"SO_NOADDRERR", Const, 0}, + {"SO_NOHEADER", Const, 1}, + {"SO_NOSIGPIPE", Const, 0}, + {"SO_NOTIFYCONFLICT", Const, 0}, + {"SO_NO_CHECK", Const, 0}, + {"SO_NO_DDP", Const, 0}, + {"SO_NO_OFFLOAD", Const, 0}, + {"SO_NP_EXTENSIONS", Const, 0}, + {"SO_NREAD", Const, 0}, + {"SO_NUMRCVPKT", Const, 16}, + {"SO_NWRITE", Const, 0}, + {"SO_OOBINLINE", Const, 0}, + {"SO_OVERFLOWED", Const, 1}, + {"SO_PASSCRED", Const, 0}, + {"SO_PASSSEC", Const, 0}, + {"SO_PEERCRED", Const, 0}, + {"SO_PEERLABEL", Const, 0}, + {"SO_PEERNAME", Const, 0}, + {"SO_PEERSEC", Const, 0}, + {"SO_PRIORITY", Const, 0}, + {"SO_PROTOCOL", Const, 0}, + {"SO_PROTOTYPE", Const, 1}, + {"SO_RANDOMPORT", Const, 0}, + {"SO_RCVBUF", Const, 0}, + {"SO_RCVBUFFORCE", Const, 0}, + {"SO_RCVLOWAT", Const, 0}, + {"SO_RCVTIMEO", Const, 0}, + {"SO_RESTRICTIONS", Const, 0}, + {"SO_RESTRICT_DENYIN", Const, 0}, + {"SO_RESTRICT_DENYOUT", Const, 0}, + {"SO_RESTRICT_DENYSET", Const, 0}, + {"SO_REUSEADDR", Const, 0}, + {"SO_REUSEPORT", Const, 0}, + {"SO_REUSESHAREUID", Const, 0}, + {"SO_RTABLE", Const, 1}, + {"SO_RXQ_OVFL", Const, 0}, + {"SO_SECURITY_AUTHENTICATION", Const, 0}, + {"SO_SECURITY_ENCRYPTION_NETWORK", Const, 0}, + {"SO_SECURITY_ENCRYPTION_TRANSPORT", Const, 0}, + {"SO_SETFIB", Const, 0}, + {"SO_SNDBUF", Const, 0}, + {"SO_SNDBUFFORCE", Const, 0}, + {"SO_SNDLOWAT", Const, 0}, + {"SO_SNDTIMEO", Const, 0}, + {"SO_SPLICE", Const, 1}, + {"SO_TIMESTAMP", Const, 0}, + {"SO_TIMESTAMPING", Const, 0}, + {"SO_TIMESTAMPNS", Const, 0}, + {"SO_TIMESTAMP_MONOTONIC", Const, 0}, + {"SO_TYPE", Const, 0}, + {"SO_UPCALLCLOSEWAIT", Const, 0}, + {"SO_UPDATE_ACCEPT_CONTEXT", Const, 0}, + {"SO_UPDATE_CONNECT_CONTEXT", Const, 1}, + {"SO_USELOOPBACK", Const, 0}, + {"SO_USER_COOKIE", Const, 1}, + {"SO_VENDOR", Const, 3}, + {"SO_WANTMORE", Const, 0}, + {"SO_WANTOOBFLAG", Const, 0}, + {"SSLExtraCertChainPolicyPara", Type, 0}, + {"SSLExtraCertChainPolicyPara.AuthType", Field, 0}, + {"SSLExtraCertChainPolicyPara.Checks", Field, 0}, + {"SSLExtraCertChainPolicyPara.ServerName", Field, 0}, + {"SSLExtraCertChainPolicyPara.Size", Field, 0}, + {"STANDARD_RIGHTS_ALL", Const, 0}, + {"STANDARD_RIGHTS_EXECUTE", Const, 0}, + {"STANDARD_RIGHTS_READ", Const, 0}, + {"STANDARD_RIGHTS_REQUIRED", Const, 0}, + {"STANDARD_RIGHTS_WRITE", Const, 0}, + {"STARTF_USESHOWWINDOW", Const, 0}, + {"STARTF_USESTDHANDLES", Const, 0}, + {"STD_ERROR_HANDLE", Const, 0}, + {"STD_INPUT_HANDLE", Const, 0}, + {"STD_OUTPUT_HANDLE", Const, 0}, + {"SUBLANG_ENGLISH_US", Const, 0}, + {"SW_FORCEMINIMIZE", Const, 0}, + {"SW_HIDE", Const, 0}, + {"SW_MAXIMIZE", Const, 0}, + {"SW_MINIMIZE", Const, 0}, + {"SW_NORMAL", Const, 0}, + {"SW_RESTORE", Const, 0}, + {"SW_SHOW", Const, 0}, + {"SW_SHOWDEFAULT", Const, 0}, + {"SW_SHOWMAXIMIZED", Const, 0}, + {"SW_SHOWMINIMIZED", Const, 0}, + {"SW_SHOWMINNOACTIVE", Const, 0}, + {"SW_SHOWNA", Const, 0}, + {"SW_SHOWNOACTIVATE", Const, 0}, + {"SW_SHOWNORMAL", Const, 0}, + {"SYMBOLIC_LINK_FLAG_DIRECTORY", Const, 4}, + {"SYNCHRONIZE", Const, 0}, + {"SYSCTL_VERSION", Const, 1}, + {"SYSCTL_VERS_0", Const, 1}, + {"SYSCTL_VERS_1", Const, 1}, + {"SYSCTL_VERS_MASK", Const, 1}, + {"SYS_ABORT2", Const, 0}, + {"SYS_ACCEPT", Const, 0}, + {"SYS_ACCEPT4", Const, 0}, + {"SYS_ACCEPT_NOCANCEL", Const, 0}, + {"SYS_ACCESS", Const, 0}, + {"SYS_ACCESS_EXTENDED", Const, 0}, + {"SYS_ACCT", Const, 0}, + {"SYS_ADD_KEY", Const, 0}, + {"SYS_ADD_PROFIL", Const, 0}, + {"SYS_ADJFREQ", Const, 1}, + {"SYS_ADJTIME", Const, 0}, + {"SYS_ADJTIMEX", Const, 0}, + {"SYS_AFS_SYSCALL", Const, 0}, + {"SYS_AIO_CANCEL", Const, 0}, + {"SYS_AIO_ERROR", Const, 0}, + {"SYS_AIO_FSYNC", Const, 0}, + {"SYS_AIO_MLOCK", Const, 14}, + {"SYS_AIO_READ", Const, 0}, + {"SYS_AIO_RETURN", Const, 0}, + {"SYS_AIO_SUSPEND", Const, 0}, + {"SYS_AIO_SUSPEND_NOCANCEL", Const, 0}, + {"SYS_AIO_WAITCOMPLETE", Const, 14}, + {"SYS_AIO_WRITE", Const, 0}, + {"SYS_ALARM", Const, 0}, + {"SYS_ARCH_PRCTL", Const, 0}, + {"SYS_ARM_FADVISE64_64", Const, 0}, + {"SYS_ARM_SYNC_FILE_RANGE", Const, 0}, + {"SYS_ATGETMSG", Const, 0}, + {"SYS_ATPGETREQ", Const, 0}, + {"SYS_ATPGETRSP", Const, 0}, + {"SYS_ATPSNDREQ", Const, 0}, + {"SYS_ATPSNDRSP", Const, 0}, + {"SYS_ATPUTMSG", Const, 0}, + {"SYS_ATSOCKET", Const, 0}, + {"SYS_AUDIT", Const, 0}, + {"SYS_AUDITCTL", Const, 0}, + {"SYS_AUDITON", Const, 0}, + {"SYS_AUDIT_SESSION_JOIN", Const, 0}, + {"SYS_AUDIT_SESSION_PORT", Const, 0}, + {"SYS_AUDIT_SESSION_SELF", Const, 0}, + {"SYS_BDFLUSH", Const, 0}, + {"SYS_BIND", Const, 0}, + {"SYS_BINDAT", Const, 3}, + {"SYS_BREAK", Const, 0}, + {"SYS_BRK", Const, 0}, + {"SYS_BSDTHREAD_CREATE", Const, 0}, + {"SYS_BSDTHREAD_REGISTER", Const, 0}, + {"SYS_BSDTHREAD_TERMINATE", Const, 0}, + {"SYS_CAPGET", Const, 0}, + {"SYS_CAPSET", Const, 0}, + {"SYS_CAP_ENTER", Const, 0}, + {"SYS_CAP_FCNTLS_GET", Const, 1}, + {"SYS_CAP_FCNTLS_LIMIT", Const, 1}, + {"SYS_CAP_GETMODE", Const, 0}, + {"SYS_CAP_GETRIGHTS", Const, 0}, + {"SYS_CAP_IOCTLS_GET", Const, 1}, + {"SYS_CAP_IOCTLS_LIMIT", Const, 1}, + {"SYS_CAP_NEW", Const, 0}, + {"SYS_CAP_RIGHTS_GET", Const, 1}, + {"SYS_CAP_RIGHTS_LIMIT", Const, 1}, + {"SYS_CHDIR", Const, 0}, + {"SYS_CHFLAGS", Const, 0}, + {"SYS_CHFLAGSAT", Const, 3}, + {"SYS_CHMOD", Const, 0}, + {"SYS_CHMOD_EXTENDED", Const, 0}, + {"SYS_CHOWN", Const, 0}, + {"SYS_CHOWN32", Const, 0}, + {"SYS_CHROOT", Const, 0}, + {"SYS_CHUD", Const, 0}, + {"SYS_CLOCK_ADJTIME", Const, 0}, + {"SYS_CLOCK_GETCPUCLOCKID2", Const, 1}, + {"SYS_CLOCK_GETRES", Const, 0}, + {"SYS_CLOCK_GETTIME", Const, 0}, + {"SYS_CLOCK_NANOSLEEP", Const, 0}, + {"SYS_CLOCK_SETTIME", Const, 0}, + {"SYS_CLONE", Const, 0}, + {"SYS_CLOSE", Const, 0}, + {"SYS_CLOSEFROM", Const, 0}, + {"SYS_CLOSE_NOCANCEL", Const, 0}, + {"SYS_CONNECT", Const, 0}, + {"SYS_CONNECTAT", Const, 3}, + {"SYS_CONNECT_NOCANCEL", Const, 0}, + {"SYS_COPYFILE", Const, 0}, + {"SYS_CPUSET", Const, 0}, + {"SYS_CPUSET_GETAFFINITY", Const, 0}, + {"SYS_CPUSET_GETID", Const, 0}, + {"SYS_CPUSET_SETAFFINITY", Const, 0}, + {"SYS_CPUSET_SETID", Const, 0}, + {"SYS_CREAT", Const, 0}, + {"SYS_CREATE_MODULE", Const, 0}, + {"SYS_CSOPS", Const, 0}, + {"SYS_CSOPS_AUDITTOKEN", Const, 16}, + {"SYS_DELETE", Const, 0}, + {"SYS_DELETE_MODULE", Const, 0}, + {"SYS_DUP", Const, 0}, + {"SYS_DUP2", Const, 0}, + {"SYS_DUP3", Const, 0}, + {"SYS_EACCESS", Const, 0}, + {"SYS_EPOLL_CREATE", Const, 0}, + {"SYS_EPOLL_CREATE1", Const, 0}, + {"SYS_EPOLL_CTL", Const, 0}, + {"SYS_EPOLL_CTL_OLD", Const, 0}, + {"SYS_EPOLL_PWAIT", Const, 0}, + {"SYS_EPOLL_WAIT", Const, 0}, + {"SYS_EPOLL_WAIT_OLD", Const, 0}, + {"SYS_EVENTFD", Const, 0}, + {"SYS_EVENTFD2", Const, 0}, + {"SYS_EXCHANGEDATA", Const, 0}, + {"SYS_EXECVE", Const, 0}, + {"SYS_EXIT", Const, 0}, + {"SYS_EXIT_GROUP", Const, 0}, + {"SYS_EXTATTRCTL", Const, 0}, + {"SYS_EXTATTR_DELETE_FD", Const, 0}, + {"SYS_EXTATTR_DELETE_FILE", Const, 0}, + {"SYS_EXTATTR_DELETE_LINK", Const, 0}, + {"SYS_EXTATTR_GET_FD", Const, 0}, + {"SYS_EXTATTR_GET_FILE", Const, 0}, + {"SYS_EXTATTR_GET_LINK", Const, 0}, + {"SYS_EXTATTR_LIST_FD", Const, 0}, + {"SYS_EXTATTR_LIST_FILE", Const, 0}, + {"SYS_EXTATTR_LIST_LINK", Const, 0}, + {"SYS_EXTATTR_SET_FD", Const, 0}, + {"SYS_EXTATTR_SET_FILE", Const, 0}, + {"SYS_EXTATTR_SET_LINK", Const, 0}, + {"SYS_FACCESSAT", Const, 0}, + {"SYS_FADVISE64", Const, 0}, + {"SYS_FADVISE64_64", Const, 0}, + {"SYS_FALLOCATE", Const, 0}, + {"SYS_FANOTIFY_INIT", Const, 0}, + {"SYS_FANOTIFY_MARK", Const, 0}, + {"SYS_FCHDIR", Const, 0}, + {"SYS_FCHFLAGS", Const, 0}, + {"SYS_FCHMOD", Const, 0}, + {"SYS_FCHMODAT", Const, 0}, + {"SYS_FCHMOD_EXTENDED", Const, 0}, + {"SYS_FCHOWN", Const, 0}, + {"SYS_FCHOWN32", Const, 0}, + {"SYS_FCHOWNAT", Const, 0}, + {"SYS_FCHROOT", Const, 1}, + {"SYS_FCNTL", Const, 0}, + {"SYS_FCNTL64", Const, 0}, + {"SYS_FCNTL_NOCANCEL", Const, 0}, + {"SYS_FDATASYNC", Const, 0}, + {"SYS_FEXECVE", Const, 0}, + {"SYS_FFCLOCK_GETCOUNTER", Const, 0}, + {"SYS_FFCLOCK_GETESTIMATE", Const, 0}, + {"SYS_FFCLOCK_SETESTIMATE", Const, 0}, + {"SYS_FFSCTL", Const, 0}, + {"SYS_FGETATTRLIST", Const, 0}, + {"SYS_FGETXATTR", Const, 0}, + {"SYS_FHOPEN", Const, 0}, + {"SYS_FHSTAT", Const, 0}, + {"SYS_FHSTATFS", Const, 0}, + {"SYS_FILEPORT_MAKEFD", Const, 0}, + {"SYS_FILEPORT_MAKEPORT", Const, 0}, + {"SYS_FKTRACE", Const, 1}, + {"SYS_FLISTXATTR", Const, 0}, + {"SYS_FLOCK", Const, 0}, + {"SYS_FORK", Const, 0}, + {"SYS_FPATHCONF", Const, 0}, + {"SYS_FREEBSD6_FTRUNCATE", Const, 0}, + {"SYS_FREEBSD6_LSEEK", Const, 0}, + {"SYS_FREEBSD6_MMAP", Const, 0}, + {"SYS_FREEBSD6_PREAD", Const, 0}, + {"SYS_FREEBSD6_PWRITE", Const, 0}, + {"SYS_FREEBSD6_TRUNCATE", Const, 0}, + {"SYS_FREMOVEXATTR", Const, 0}, + {"SYS_FSCTL", Const, 0}, + {"SYS_FSETATTRLIST", Const, 0}, + {"SYS_FSETXATTR", Const, 0}, + {"SYS_FSGETPATH", Const, 0}, + {"SYS_FSTAT", Const, 0}, + {"SYS_FSTAT64", Const, 0}, + {"SYS_FSTAT64_EXTENDED", Const, 0}, + {"SYS_FSTATAT", Const, 0}, + {"SYS_FSTATAT64", Const, 0}, + {"SYS_FSTATFS", Const, 0}, + {"SYS_FSTATFS64", Const, 0}, + {"SYS_FSTATV", Const, 0}, + {"SYS_FSTATVFS1", Const, 1}, + {"SYS_FSTAT_EXTENDED", Const, 0}, + {"SYS_FSYNC", Const, 0}, + {"SYS_FSYNC_NOCANCEL", Const, 0}, + {"SYS_FSYNC_RANGE", Const, 1}, + {"SYS_FTIME", Const, 0}, + {"SYS_FTRUNCATE", Const, 0}, + {"SYS_FTRUNCATE64", Const, 0}, + {"SYS_FUTEX", Const, 0}, + {"SYS_FUTIMENS", Const, 1}, + {"SYS_FUTIMES", Const, 0}, + {"SYS_FUTIMESAT", Const, 0}, + {"SYS_GETATTRLIST", Const, 0}, + {"SYS_GETAUDIT", Const, 0}, + {"SYS_GETAUDIT_ADDR", Const, 0}, + {"SYS_GETAUID", Const, 0}, + {"SYS_GETCONTEXT", Const, 0}, + {"SYS_GETCPU", Const, 0}, + {"SYS_GETCWD", Const, 0}, + {"SYS_GETDENTS", Const, 0}, + {"SYS_GETDENTS64", Const, 0}, + {"SYS_GETDIRENTRIES", Const, 0}, + {"SYS_GETDIRENTRIES64", Const, 0}, + {"SYS_GETDIRENTRIESATTR", Const, 0}, + {"SYS_GETDTABLECOUNT", Const, 1}, + {"SYS_GETDTABLESIZE", Const, 0}, + {"SYS_GETEGID", Const, 0}, + {"SYS_GETEGID32", Const, 0}, + {"SYS_GETEUID", Const, 0}, + {"SYS_GETEUID32", Const, 0}, + {"SYS_GETFH", Const, 0}, + {"SYS_GETFSSTAT", Const, 0}, + {"SYS_GETFSSTAT64", Const, 0}, + {"SYS_GETGID", Const, 0}, + {"SYS_GETGID32", Const, 0}, + {"SYS_GETGROUPS", Const, 0}, + {"SYS_GETGROUPS32", Const, 0}, + {"SYS_GETHOSTUUID", Const, 0}, + {"SYS_GETITIMER", Const, 0}, + {"SYS_GETLCID", Const, 0}, + {"SYS_GETLOGIN", Const, 0}, + {"SYS_GETLOGINCLASS", Const, 0}, + {"SYS_GETPEERNAME", Const, 0}, + {"SYS_GETPGID", Const, 0}, + {"SYS_GETPGRP", Const, 0}, + {"SYS_GETPID", Const, 0}, + {"SYS_GETPMSG", Const, 0}, + {"SYS_GETPPID", Const, 0}, + {"SYS_GETPRIORITY", Const, 0}, + {"SYS_GETRESGID", Const, 0}, + {"SYS_GETRESGID32", Const, 0}, + {"SYS_GETRESUID", Const, 0}, + {"SYS_GETRESUID32", Const, 0}, + {"SYS_GETRLIMIT", Const, 0}, + {"SYS_GETRTABLE", Const, 1}, + {"SYS_GETRUSAGE", Const, 0}, + {"SYS_GETSGROUPS", Const, 0}, + {"SYS_GETSID", Const, 0}, + {"SYS_GETSOCKNAME", Const, 0}, + {"SYS_GETSOCKOPT", Const, 0}, + {"SYS_GETTHRID", Const, 1}, + {"SYS_GETTID", Const, 0}, + {"SYS_GETTIMEOFDAY", Const, 0}, + {"SYS_GETUID", Const, 0}, + {"SYS_GETUID32", Const, 0}, + {"SYS_GETVFSSTAT", Const, 1}, + {"SYS_GETWGROUPS", Const, 0}, + {"SYS_GETXATTR", Const, 0}, + {"SYS_GET_KERNEL_SYMS", Const, 0}, + {"SYS_GET_MEMPOLICY", Const, 0}, + {"SYS_GET_ROBUST_LIST", Const, 0}, + {"SYS_GET_THREAD_AREA", Const, 0}, + {"SYS_GSSD_SYSCALL", Const, 14}, + {"SYS_GTTY", Const, 0}, + {"SYS_IDENTITYSVC", Const, 0}, + {"SYS_IDLE", Const, 0}, + {"SYS_INITGROUPS", Const, 0}, + {"SYS_INIT_MODULE", Const, 0}, + {"SYS_INOTIFY_ADD_WATCH", Const, 0}, + {"SYS_INOTIFY_INIT", Const, 0}, + {"SYS_INOTIFY_INIT1", Const, 0}, + {"SYS_INOTIFY_RM_WATCH", Const, 0}, + {"SYS_IOCTL", Const, 0}, + {"SYS_IOPERM", Const, 0}, + {"SYS_IOPL", Const, 0}, + {"SYS_IOPOLICYSYS", Const, 0}, + {"SYS_IOPRIO_GET", Const, 0}, + {"SYS_IOPRIO_SET", Const, 0}, + {"SYS_IO_CANCEL", Const, 0}, + {"SYS_IO_DESTROY", Const, 0}, + {"SYS_IO_GETEVENTS", Const, 0}, + {"SYS_IO_SETUP", Const, 0}, + {"SYS_IO_SUBMIT", Const, 0}, + {"SYS_IPC", Const, 0}, + {"SYS_ISSETUGID", Const, 0}, + {"SYS_JAIL", Const, 0}, + {"SYS_JAIL_ATTACH", Const, 0}, + {"SYS_JAIL_GET", Const, 0}, + {"SYS_JAIL_REMOVE", Const, 0}, + {"SYS_JAIL_SET", Const, 0}, + {"SYS_KAS_INFO", Const, 16}, + {"SYS_KDEBUG_TRACE", Const, 0}, + {"SYS_KENV", Const, 0}, + {"SYS_KEVENT", Const, 0}, + {"SYS_KEVENT64", Const, 0}, + {"SYS_KEXEC_LOAD", Const, 0}, + {"SYS_KEYCTL", Const, 0}, + {"SYS_KILL", Const, 0}, + {"SYS_KLDFIND", Const, 0}, + {"SYS_KLDFIRSTMOD", Const, 0}, + {"SYS_KLDLOAD", Const, 0}, + {"SYS_KLDNEXT", Const, 0}, + {"SYS_KLDSTAT", Const, 0}, + {"SYS_KLDSYM", Const, 0}, + {"SYS_KLDUNLOAD", Const, 0}, + {"SYS_KLDUNLOADF", Const, 0}, + {"SYS_KMQ_NOTIFY", Const, 14}, + {"SYS_KMQ_OPEN", Const, 14}, + {"SYS_KMQ_SETATTR", Const, 14}, + {"SYS_KMQ_TIMEDRECEIVE", Const, 14}, + {"SYS_KMQ_TIMEDSEND", Const, 14}, + {"SYS_KMQ_UNLINK", Const, 14}, + {"SYS_KQUEUE", Const, 0}, + {"SYS_KQUEUE1", Const, 1}, + {"SYS_KSEM_CLOSE", Const, 14}, + {"SYS_KSEM_DESTROY", Const, 14}, + {"SYS_KSEM_GETVALUE", Const, 14}, + {"SYS_KSEM_INIT", Const, 14}, + {"SYS_KSEM_OPEN", Const, 14}, + {"SYS_KSEM_POST", Const, 14}, + {"SYS_KSEM_TIMEDWAIT", Const, 14}, + {"SYS_KSEM_TRYWAIT", Const, 14}, + {"SYS_KSEM_UNLINK", Const, 14}, + {"SYS_KSEM_WAIT", Const, 14}, + {"SYS_KTIMER_CREATE", Const, 0}, + {"SYS_KTIMER_DELETE", Const, 0}, + {"SYS_KTIMER_GETOVERRUN", Const, 0}, + {"SYS_KTIMER_GETTIME", Const, 0}, + {"SYS_KTIMER_SETTIME", Const, 0}, + {"SYS_KTRACE", Const, 0}, + {"SYS_LCHFLAGS", Const, 0}, + {"SYS_LCHMOD", Const, 0}, + {"SYS_LCHOWN", Const, 0}, + {"SYS_LCHOWN32", Const, 0}, + {"SYS_LEDGER", Const, 16}, + {"SYS_LGETFH", Const, 0}, + {"SYS_LGETXATTR", Const, 0}, + {"SYS_LINK", Const, 0}, + {"SYS_LINKAT", Const, 0}, + {"SYS_LIO_LISTIO", Const, 0}, + {"SYS_LISTEN", Const, 0}, + {"SYS_LISTXATTR", Const, 0}, + {"SYS_LLISTXATTR", Const, 0}, + {"SYS_LOCK", Const, 0}, + {"SYS_LOOKUP_DCOOKIE", Const, 0}, + {"SYS_LPATHCONF", Const, 0}, + {"SYS_LREMOVEXATTR", Const, 0}, + {"SYS_LSEEK", Const, 0}, + {"SYS_LSETXATTR", Const, 0}, + {"SYS_LSTAT", Const, 0}, + {"SYS_LSTAT64", Const, 0}, + {"SYS_LSTAT64_EXTENDED", Const, 0}, + {"SYS_LSTATV", Const, 0}, + {"SYS_LSTAT_EXTENDED", Const, 0}, + {"SYS_LUTIMES", Const, 0}, + {"SYS_MAC_SYSCALL", Const, 0}, + {"SYS_MADVISE", Const, 0}, + {"SYS_MADVISE1", Const, 0}, + {"SYS_MAXSYSCALL", Const, 0}, + {"SYS_MBIND", Const, 0}, + {"SYS_MIGRATE_PAGES", Const, 0}, + {"SYS_MINCORE", Const, 0}, + {"SYS_MINHERIT", Const, 0}, + {"SYS_MKCOMPLEX", Const, 0}, + {"SYS_MKDIR", Const, 0}, + {"SYS_MKDIRAT", Const, 0}, + {"SYS_MKDIR_EXTENDED", Const, 0}, + {"SYS_MKFIFO", Const, 0}, + {"SYS_MKFIFOAT", Const, 0}, + {"SYS_MKFIFO_EXTENDED", Const, 0}, + {"SYS_MKNOD", Const, 0}, + {"SYS_MKNODAT", Const, 0}, + {"SYS_MLOCK", Const, 0}, + {"SYS_MLOCKALL", Const, 0}, + {"SYS_MMAP", Const, 0}, + {"SYS_MMAP2", Const, 0}, + {"SYS_MODCTL", Const, 1}, + {"SYS_MODFIND", Const, 0}, + {"SYS_MODFNEXT", Const, 0}, + {"SYS_MODIFY_LDT", Const, 0}, + {"SYS_MODNEXT", Const, 0}, + {"SYS_MODSTAT", Const, 0}, + {"SYS_MODWATCH", Const, 0}, + {"SYS_MOUNT", Const, 0}, + {"SYS_MOVE_PAGES", Const, 0}, + {"SYS_MPROTECT", Const, 0}, + {"SYS_MPX", Const, 0}, + {"SYS_MQUERY", Const, 1}, + {"SYS_MQ_GETSETATTR", Const, 0}, + {"SYS_MQ_NOTIFY", Const, 0}, + {"SYS_MQ_OPEN", Const, 0}, + {"SYS_MQ_TIMEDRECEIVE", Const, 0}, + {"SYS_MQ_TIMEDSEND", Const, 0}, + {"SYS_MQ_UNLINK", Const, 0}, + {"SYS_MREMAP", Const, 0}, + {"SYS_MSGCTL", Const, 0}, + {"SYS_MSGGET", Const, 0}, + {"SYS_MSGRCV", Const, 0}, + {"SYS_MSGRCV_NOCANCEL", Const, 0}, + {"SYS_MSGSND", Const, 0}, + {"SYS_MSGSND_NOCANCEL", Const, 0}, + {"SYS_MSGSYS", Const, 0}, + {"SYS_MSYNC", Const, 0}, + {"SYS_MSYNC_NOCANCEL", Const, 0}, + {"SYS_MUNLOCK", Const, 0}, + {"SYS_MUNLOCKALL", Const, 0}, + {"SYS_MUNMAP", Const, 0}, + {"SYS_NAME_TO_HANDLE_AT", Const, 0}, + {"SYS_NANOSLEEP", Const, 0}, + {"SYS_NEWFSTATAT", Const, 0}, + {"SYS_NFSCLNT", Const, 0}, + {"SYS_NFSSERVCTL", Const, 0}, + {"SYS_NFSSVC", Const, 0}, + {"SYS_NFSTAT", Const, 0}, + {"SYS_NICE", Const, 0}, + {"SYS_NLM_SYSCALL", Const, 14}, + {"SYS_NLSTAT", Const, 0}, + {"SYS_NMOUNT", Const, 0}, + {"SYS_NSTAT", Const, 0}, + {"SYS_NTP_ADJTIME", Const, 0}, + {"SYS_NTP_GETTIME", Const, 0}, + {"SYS_NUMA_GETAFFINITY", Const, 14}, + {"SYS_NUMA_SETAFFINITY", Const, 14}, + {"SYS_OABI_SYSCALL_BASE", Const, 0}, + {"SYS_OBREAK", Const, 0}, + {"SYS_OLDFSTAT", Const, 0}, + {"SYS_OLDLSTAT", Const, 0}, + {"SYS_OLDOLDUNAME", Const, 0}, + {"SYS_OLDSTAT", Const, 0}, + {"SYS_OLDUNAME", Const, 0}, + {"SYS_OPEN", Const, 0}, + {"SYS_OPENAT", Const, 0}, + {"SYS_OPENBSD_POLL", Const, 0}, + {"SYS_OPEN_BY_HANDLE_AT", Const, 0}, + {"SYS_OPEN_DPROTECTED_NP", Const, 16}, + {"SYS_OPEN_EXTENDED", Const, 0}, + {"SYS_OPEN_NOCANCEL", Const, 0}, + {"SYS_OVADVISE", Const, 0}, + {"SYS_PACCEPT", Const, 1}, + {"SYS_PATHCONF", Const, 0}, + {"SYS_PAUSE", Const, 0}, + {"SYS_PCICONFIG_IOBASE", Const, 0}, + {"SYS_PCICONFIG_READ", Const, 0}, + {"SYS_PCICONFIG_WRITE", Const, 0}, + {"SYS_PDFORK", Const, 0}, + {"SYS_PDGETPID", Const, 0}, + {"SYS_PDKILL", Const, 0}, + {"SYS_PERF_EVENT_OPEN", Const, 0}, + {"SYS_PERSONALITY", Const, 0}, + {"SYS_PID_HIBERNATE", Const, 0}, + {"SYS_PID_RESUME", Const, 0}, + {"SYS_PID_SHUTDOWN_SOCKETS", Const, 0}, + {"SYS_PID_SUSPEND", Const, 0}, + {"SYS_PIPE", Const, 0}, + {"SYS_PIPE2", Const, 0}, + {"SYS_PIVOT_ROOT", Const, 0}, + {"SYS_PMC_CONTROL", Const, 1}, + {"SYS_PMC_GET_INFO", Const, 1}, + {"SYS_POLL", Const, 0}, + {"SYS_POLLTS", Const, 1}, + {"SYS_POLL_NOCANCEL", Const, 0}, + {"SYS_POSIX_FADVISE", Const, 0}, + {"SYS_POSIX_FALLOCATE", Const, 0}, + {"SYS_POSIX_OPENPT", Const, 0}, + {"SYS_POSIX_SPAWN", Const, 0}, + {"SYS_PPOLL", Const, 0}, + {"SYS_PRCTL", Const, 0}, + {"SYS_PREAD", Const, 0}, + {"SYS_PREAD64", Const, 0}, + {"SYS_PREADV", Const, 0}, + {"SYS_PREAD_NOCANCEL", Const, 0}, + {"SYS_PRLIMIT64", Const, 0}, + {"SYS_PROCCTL", Const, 3}, + {"SYS_PROCESS_POLICY", Const, 0}, + {"SYS_PROCESS_VM_READV", Const, 0}, + {"SYS_PROCESS_VM_WRITEV", Const, 0}, + {"SYS_PROC_INFO", Const, 0}, + {"SYS_PROF", Const, 0}, + {"SYS_PROFIL", Const, 0}, + {"SYS_PSELECT", Const, 0}, + {"SYS_PSELECT6", Const, 0}, + {"SYS_PSET_ASSIGN", Const, 1}, + {"SYS_PSET_CREATE", Const, 1}, + {"SYS_PSET_DESTROY", Const, 1}, + {"SYS_PSYNCH_CVBROAD", Const, 0}, + {"SYS_PSYNCH_CVCLRPREPOST", Const, 0}, + {"SYS_PSYNCH_CVSIGNAL", Const, 0}, + {"SYS_PSYNCH_CVWAIT", Const, 0}, + {"SYS_PSYNCH_MUTEXDROP", Const, 0}, + {"SYS_PSYNCH_MUTEXWAIT", Const, 0}, + {"SYS_PSYNCH_RW_DOWNGRADE", Const, 0}, + {"SYS_PSYNCH_RW_LONGRDLOCK", Const, 0}, + {"SYS_PSYNCH_RW_RDLOCK", Const, 0}, + {"SYS_PSYNCH_RW_UNLOCK", Const, 0}, + {"SYS_PSYNCH_RW_UNLOCK2", Const, 0}, + {"SYS_PSYNCH_RW_UPGRADE", Const, 0}, + {"SYS_PSYNCH_RW_WRLOCK", Const, 0}, + {"SYS_PSYNCH_RW_YIELDWRLOCK", Const, 0}, + {"SYS_PTRACE", Const, 0}, + {"SYS_PUTPMSG", Const, 0}, + {"SYS_PWRITE", Const, 0}, + {"SYS_PWRITE64", Const, 0}, + {"SYS_PWRITEV", Const, 0}, + {"SYS_PWRITE_NOCANCEL", Const, 0}, + {"SYS_QUERY_MODULE", Const, 0}, + {"SYS_QUOTACTL", Const, 0}, + {"SYS_RASCTL", Const, 1}, + {"SYS_RCTL_ADD_RULE", Const, 0}, + {"SYS_RCTL_GET_LIMITS", Const, 0}, + {"SYS_RCTL_GET_RACCT", Const, 0}, + {"SYS_RCTL_GET_RULES", Const, 0}, + {"SYS_RCTL_REMOVE_RULE", Const, 0}, + {"SYS_READ", Const, 0}, + {"SYS_READAHEAD", Const, 0}, + {"SYS_READDIR", Const, 0}, + {"SYS_READLINK", Const, 0}, + {"SYS_READLINKAT", Const, 0}, + {"SYS_READV", Const, 0}, + {"SYS_READV_NOCANCEL", Const, 0}, + {"SYS_READ_NOCANCEL", Const, 0}, + {"SYS_REBOOT", Const, 0}, + {"SYS_RECV", Const, 0}, + {"SYS_RECVFROM", Const, 0}, + {"SYS_RECVFROM_NOCANCEL", Const, 0}, + {"SYS_RECVMMSG", Const, 0}, + {"SYS_RECVMSG", Const, 0}, + {"SYS_RECVMSG_NOCANCEL", Const, 0}, + {"SYS_REMAP_FILE_PAGES", Const, 0}, + {"SYS_REMOVEXATTR", Const, 0}, + {"SYS_RENAME", Const, 0}, + {"SYS_RENAMEAT", Const, 0}, + {"SYS_REQUEST_KEY", Const, 0}, + {"SYS_RESTART_SYSCALL", Const, 0}, + {"SYS_REVOKE", Const, 0}, + {"SYS_RFORK", Const, 0}, + {"SYS_RMDIR", Const, 0}, + {"SYS_RTPRIO", Const, 0}, + {"SYS_RTPRIO_THREAD", Const, 0}, + {"SYS_RT_SIGACTION", Const, 0}, + {"SYS_RT_SIGPENDING", Const, 0}, + {"SYS_RT_SIGPROCMASK", Const, 0}, + {"SYS_RT_SIGQUEUEINFO", Const, 0}, + {"SYS_RT_SIGRETURN", Const, 0}, + {"SYS_RT_SIGSUSPEND", Const, 0}, + {"SYS_RT_SIGTIMEDWAIT", Const, 0}, + {"SYS_RT_TGSIGQUEUEINFO", Const, 0}, + {"SYS_SBRK", Const, 0}, + {"SYS_SCHED_GETAFFINITY", Const, 0}, + {"SYS_SCHED_GETPARAM", Const, 0}, + {"SYS_SCHED_GETSCHEDULER", Const, 0}, + {"SYS_SCHED_GET_PRIORITY_MAX", Const, 0}, + {"SYS_SCHED_GET_PRIORITY_MIN", Const, 0}, + {"SYS_SCHED_RR_GET_INTERVAL", Const, 0}, + {"SYS_SCHED_SETAFFINITY", Const, 0}, + {"SYS_SCHED_SETPARAM", Const, 0}, + {"SYS_SCHED_SETSCHEDULER", Const, 0}, + {"SYS_SCHED_YIELD", Const, 0}, + {"SYS_SCTP_GENERIC_RECVMSG", Const, 0}, + {"SYS_SCTP_GENERIC_SENDMSG", Const, 0}, + {"SYS_SCTP_GENERIC_SENDMSG_IOV", Const, 0}, + {"SYS_SCTP_PEELOFF", Const, 0}, + {"SYS_SEARCHFS", Const, 0}, + {"SYS_SECURITY", Const, 0}, + {"SYS_SELECT", Const, 0}, + {"SYS_SELECT_NOCANCEL", Const, 0}, + {"SYS_SEMCONFIG", Const, 1}, + {"SYS_SEMCTL", Const, 0}, + {"SYS_SEMGET", Const, 0}, + {"SYS_SEMOP", Const, 0}, + {"SYS_SEMSYS", Const, 0}, + {"SYS_SEMTIMEDOP", Const, 0}, + {"SYS_SEM_CLOSE", Const, 0}, + {"SYS_SEM_DESTROY", Const, 0}, + {"SYS_SEM_GETVALUE", Const, 0}, + {"SYS_SEM_INIT", Const, 0}, + {"SYS_SEM_OPEN", Const, 0}, + {"SYS_SEM_POST", Const, 0}, + {"SYS_SEM_TRYWAIT", Const, 0}, + {"SYS_SEM_UNLINK", Const, 0}, + {"SYS_SEM_WAIT", Const, 0}, + {"SYS_SEM_WAIT_NOCANCEL", Const, 0}, + {"SYS_SEND", Const, 0}, + {"SYS_SENDFILE", Const, 0}, + {"SYS_SENDFILE64", Const, 0}, + {"SYS_SENDMMSG", Const, 0}, + {"SYS_SENDMSG", Const, 0}, + {"SYS_SENDMSG_NOCANCEL", Const, 0}, + {"SYS_SENDTO", Const, 0}, + {"SYS_SENDTO_NOCANCEL", Const, 0}, + {"SYS_SETATTRLIST", Const, 0}, + {"SYS_SETAUDIT", Const, 0}, + {"SYS_SETAUDIT_ADDR", Const, 0}, + {"SYS_SETAUID", Const, 0}, + {"SYS_SETCONTEXT", Const, 0}, + {"SYS_SETDOMAINNAME", Const, 0}, + {"SYS_SETEGID", Const, 0}, + {"SYS_SETEUID", Const, 0}, + {"SYS_SETFIB", Const, 0}, + {"SYS_SETFSGID", Const, 0}, + {"SYS_SETFSGID32", Const, 0}, + {"SYS_SETFSUID", Const, 0}, + {"SYS_SETFSUID32", Const, 0}, + {"SYS_SETGID", Const, 0}, + {"SYS_SETGID32", Const, 0}, + {"SYS_SETGROUPS", Const, 0}, + {"SYS_SETGROUPS32", Const, 0}, + {"SYS_SETHOSTNAME", Const, 0}, + {"SYS_SETITIMER", Const, 0}, + {"SYS_SETLCID", Const, 0}, + {"SYS_SETLOGIN", Const, 0}, + {"SYS_SETLOGINCLASS", Const, 0}, + {"SYS_SETNS", Const, 0}, + {"SYS_SETPGID", Const, 0}, + {"SYS_SETPRIORITY", Const, 0}, + {"SYS_SETPRIVEXEC", Const, 0}, + {"SYS_SETREGID", Const, 0}, + {"SYS_SETREGID32", Const, 0}, + {"SYS_SETRESGID", Const, 0}, + {"SYS_SETRESGID32", Const, 0}, + {"SYS_SETRESUID", Const, 0}, + {"SYS_SETRESUID32", Const, 0}, + {"SYS_SETREUID", Const, 0}, + {"SYS_SETREUID32", Const, 0}, + {"SYS_SETRLIMIT", Const, 0}, + {"SYS_SETRTABLE", Const, 1}, + {"SYS_SETSGROUPS", Const, 0}, + {"SYS_SETSID", Const, 0}, + {"SYS_SETSOCKOPT", Const, 0}, + {"SYS_SETTID", Const, 0}, + {"SYS_SETTID_WITH_PID", Const, 0}, + {"SYS_SETTIMEOFDAY", Const, 0}, + {"SYS_SETUID", Const, 0}, + {"SYS_SETUID32", Const, 0}, + {"SYS_SETWGROUPS", Const, 0}, + {"SYS_SETXATTR", Const, 0}, + {"SYS_SET_MEMPOLICY", Const, 0}, + {"SYS_SET_ROBUST_LIST", Const, 0}, + {"SYS_SET_THREAD_AREA", Const, 0}, + {"SYS_SET_TID_ADDRESS", Const, 0}, + {"SYS_SGETMASK", Const, 0}, + {"SYS_SHARED_REGION_CHECK_NP", Const, 0}, + {"SYS_SHARED_REGION_MAP_AND_SLIDE_NP", Const, 0}, + {"SYS_SHMAT", Const, 0}, + {"SYS_SHMCTL", Const, 0}, + {"SYS_SHMDT", Const, 0}, + {"SYS_SHMGET", Const, 0}, + {"SYS_SHMSYS", Const, 0}, + {"SYS_SHM_OPEN", Const, 0}, + {"SYS_SHM_UNLINK", Const, 0}, + {"SYS_SHUTDOWN", Const, 0}, + {"SYS_SIGACTION", Const, 0}, + {"SYS_SIGALTSTACK", Const, 0}, + {"SYS_SIGNAL", Const, 0}, + {"SYS_SIGNALFD", Const, 0}, + {"SYS_SIGNALFD4", Const, 0}, + {"SYS_SIGPENDING", Const, 0}, + {"SYS_SIGPROCMASK", Const, 0}, + {"SYS_SIGQUEUE", Const, 0}, + {"SYS_SIGQUEUEINFO", Const, 1}, + {"SYS_SIGRETURN", Const, 0}, + {"SYS_SIGSUSPEND", Const, 0}, + {"SYS_SIGSUSPEND_NOCANCEL", Const, 0}, + {"SYS_SIGTIMEDWAIT", Const, 0}, + {"SYS_SIGWAIT", Const, 0}, + {"SYS_SIGWAITINFO", Const, 0}, + {"SYS_SOCKET", Const, 0}, + {"SYS_SOCKETCALL", Const, 0}, + {"SYS_SOCKETPAIR", Const, 0}, + {"SYS_SPLICE", Const, 0}, + {"SYS_SSETMASK", Const, 0}, + {"SYS_SSTK", Const, 0}, + {"SYS_STACK_SNAPSHOT", Const, 0}, + {"SYS_STAT", Const, 0}, + {"SYS_STAT64", Const, 0}, + {"SYS_STAT64_EXTENDED", Const, 0}, + {"SYS_STATFS", Const, 0}, + {"SYS_STATFS64", Const, 0}, + {"SYS_STATV", Const, 0}, + {"SYS_STATVFS1", Const, 1}, + {"SYS_STAT_EXTENDED", Const, 0}, + {"SYS_STIME", Const, 0}, + {"SYS_STTY", Const, 0}, + {"SYS_SWAPCONTEXT", Const, 0}, + {"SYS_SWAPCTL", Const, 1}, + {"SYS_SWAPOFF", Const, 0}, + {"SYS_SWAPON", Const, 0}, + {"SYS_SYMLINK", Const, 0}, + {"SYS_SYMLINKAT", Const, 0}, + {"SYS_SYNC", Const, 0}, + {"SYS_SYNCFS", Const, 0}, + {"SYS_SYNC_FILE_RANGE", Const, 0}, + {"SYS_SYSARCH", Const, 0}, + {"SYS_SYSCALL", Const, 0}, + {"SYS_SYSCALL_BASE", Const, 0}, + {"SYS_SYSFS", Const, 0}, + {"SYS_SYSINFO", Const, 0}, + {"SYS_SYSLOG", Const, 0}, + {"SYS_TEE", Const, 0}, + {"SYS_TGKILL", Const, 0}, + {"SYS_THREAD_SELFID", Const, 0}, + {"SYS_THR_CREATE", Const, 0}, + {"SYS_THR_EXIT", Const, 0}, + {"SYS_THR_KILL", Const, 0}, + {"SYS_THR_KILL2", Const, 0}, + {"SYS_THR_NEW", Const, 0}, + {"SYS_THR_SELF", Const, 0}, + {"SYS_THR_SET_NAME", Const, 0}, + {"SYS_THR_SUSPEND", Const, 0}, + {"SYS_THR_WAKE", Const, 0}, + {"SYS_TIME", Const, 0}, + {"SYS_TIMERFD_CREATE", Const, 0}, + {"SYS_TIMERFD_GETTIME", Const, 0}, + {"SYS_TIMERFD_SETTIME", Const, 0}, + {"SYS_TIMER_CREATE", Const, 0}, + {"SYS_TIMER_DELETE", Const, 0}, + {"SYS_TIMER_GETOVERRUN", Const, 0}, + {"SYS_TIMER_GETTIME", Const, 0}, + {"SYS_TIMER_SETTIME", Const, 0}, + {"SYS_TIMES", Const, 0}, + {"SYS_TKILL", Const, 0}, + {"SYS_TRUNCATE", Const, 0}, + {"SYS_TRUNCATE64", Const, 0}, + {"SYS_TUXCALL", Const, 0}, + {"SYS_UGETRLIMIT", Const, 0}, + {"SYS_ULIMIT", Const, 0}, + {"SYS_UMASK", Const, 0}, + {"SYS_UMASK_EXTENDED", Const, 0}, + {"SYS_UMOUNT", Const, 0}, + {"SYS_UMOUNT2", Const, 0}, + {"SYS_UNAME", Const, 0}, + {"SYS_UNDELETE", Const, 0}, + {"SYS_UNLINK", Const, 0}, + {"SYS_UNLINKAT", Const, 0}, + {"SYS_UNMOUNT", Const, 0}, + {"SYS_UNSHARE", Const, 0}, + {"SYS_USELIB", Const, 0}, + {"SYS_USTAT", Const, 0}, + {"SYS_UTIME", Const, 0}, + {"SYS_UTIMENSAT", Const, 0}, + {"SYS_UTIMES", Const, 0}, + {"SYS_UTRACE", Const, 0}, + {"SYS_UUIDGEN", Const, 0}, + {"SYS_VADVISE", Const, 1}, + {"SYS_VFORK", Const, 0}, + {"SYS_VHANGUP", Const, 0}, + {"SYS_VM86", Const, 0}, + {"SYS_VM86OLD", Const, 0}, + {"SYS_VMSPLICE", Const, 0}, + {"SYS_VM_PRESSURE_MONITOR", Const, 0}, + {"SYS_VSERVER", Const, 0}, + {"SYS_WAIT4", Const, 0}, + {"SYS_WAIT4_NOCANCEL", Const, 0}, + {"SYS_WAIT6", Const, 1}, + {"SYS_WAITEVENT", Const, 0}, + {"SYS_WAITID", Const, 0}, + {"SYS_WAITID_NOCANCEL", Const, 0}, + {"SYS_WAITPID", Const, 0}, + {"SYS_WATCHEVENT", Const, 0}, + {"SYS_WORKQ_KERNRETURN", Const, 0}, + {"SYS_WORKQ_OPEN", Const, 0}, + {"SYS_WRITE", Const, 0}, + {"SYS_WRITEV", Const, 0}, + {"SYS_WRITEV_NOCANCEL", Const, 0}, + {"SYS_WRITE_NOCANCEL", Const, 0}, + {"SYS_YIELD", Const, 0}, + {"SYS__LLSEEK", Const, 0}, + {"SYS__LWP_CONTINUE", Const, 1}, + {"SYS__LWP_CREATE", Const, 1}, + {"SYS__LWP_CTL", Const, 1}, + {"SYS__LWP_DETACH", Const, 1}, + {"SYS__LWP_EXIT", Const, 1}, + {"SYS__LWP_GETNAME", Const, 1}, + {"SYS__LWP_GETPRIVATE", Const, 1}, + {"SYS__LWP_KILL", Const, 1}, + {"SYS__LWP_PARK", Const, 1}, + {"SYS__LWP_SELF", Const, 1}, + {"SYS__LWP_SETNAME", Const, 1}, + {"SYS__LWP_SETPRIVATE", Const, 1}, + {"SYS__LWP_SUSPEND", Const, 1}, + {"SYS__LWP_UNPARK", Const, 1}, + {"SYS__LWP_UNPARK_ALL", Const, 1}, + {"SYS__LWP_WAIT", Const, 1}, + {"SYS__LWP_WAKEUP", Const, 1}, + {"SYS__NEWSELECT", Const, 0}, + {"SYS__PSET_BIND", Const, 1}, + {"SYS__SCHED_GETAFFINITY", Const, 1}, + {"SYS__SCHED_GETPARAM", Const, 1}, + {"SYS__SCHED_SETAFFINITY", Const, 1}, + {"SYS__SCHED_SETPARAM", Const, 1}, + {"SYS__SYSCTL", Const, 0}, + {"SYS__UMTX_LOCK", Const, 0}, + {"SYS__UMTX_OP", Const, 0}, + {"SYS__UMTX_UNLOCK", Const, 0}, + {"SYS___ACL_ACLCHECK_FD", Const, 0}, + {"SYS___ACL_ACLCHECK_FILE", Const, 0}, + {"SYS___ACL_ACLCHECK_LINK", Const, 0}, + {"SYS___ACL_DELETE_FD", Const, 0}, + {"SYS___ACL_DELETE_FILE", Const, 0}, + {"SYS___ACL_DELETE_LINK", Const, 0}, + {"SYS___ACL_GET_FD", Const, 0}, + {"SYS___ACL_GET_FILE", Const, 0}, + {"SYS___ACL_GET_LINK", Const, 0}, + {"SYS___ACL_SET_FD", Const, 0}, + {"SYS___ACL_SET_FILE", Const, 0}, + {"SYS___ACL_SET_LINK", Const, 0}, + {"SYS___CAP_RIGHTS_GET", Const, 14}, + {"SYS___CLONE", Const, 1}, + {"SYS___DISABLE_THREADSIGNAL", Const, 0}, + {"SYS___GETCWD", Const, 0}, + {"SYS___GETLOGIN", Const, 1}, + {"SYS___GET_TCB", Const, 1}, + {"SYS___MAC_EXECVE", Const, 0}, + {"SYS___MAC_GETFSSTAT", Const, 0}, + {"SYS___MAC_GET_FD", Const, 0}, + {"SYS___MAC_GET_FILE", Const, 0}, + {"SYS___MAC_GET_LCID", Const, 0}, + {"SYS___MAC_GET_LCTX", Const, 0}, + {"SYS___MAC_GET_LINK", Const, 0}, + {"SYS___MAC_GET_MOUNT", Const, 0}, + {"SYS___MAC_GET_PID", Const, 0}, + {"SYS___MAC_GET_PROC", Const, 0}, + {"SYS___MAC_MOUNT", Const, 0}, + {"SYS___MAC_SET_FD", Const, 0}, + {"SYS___MAC_SET_FILE", Const, 0}, + {"SYS___MAC_SET_LCTX", Const, 0}, + {"SYS___MAC_SET_LINK", Const, 0}, + {"SYS___MAC_SET_PROC", Const, 0}, + {"SYS___MAC_SYSCALL", Const, 0}, + {"SYS___OLD_SEMWAIT_SIGNAL", Const, 0}, + {"SYS___OLD_SEMWAIT_SIGNAL_NOCANCEL", Const, 0}, + {"SYS___POSIX_CHOWN", Const, 1}, + {"SYS___POSIX_FCHOWN", Const, 1}, + {"SYS___POSIX_LCHOWN", Const, 1}, + {"SYS___POSIX_RENAME", Const, 1}, + {"SYS___PTHREAD_CANCELED", Const, 0}, + {"SYS___PTHREAD_CHDIR", Const, 0}, + {"SYS___PTHREAD_FCHDIR", Const, 0}, + {"SYS___PTHREAD_KILL", Const, 0}, + {"SYS___PTHREAD_MARKCANCEL", Const, 0}, + {"SYS___PTHREAD_SIGMASK", Const, 0}, + {"SYS___QUOTACTL", Const, 1}, + {"SYS___SEMCTL", Const, 1}, + {"SYS___SEMWAIT_SIGNAL", Const, 0}, + {"SYS___SEMWAIT_SIGNAL_NOCANCEL", Const, 0}, + {"SYS___SETLOGIN", Const, 1}, + {"SYS___SETUGID", Const, 0}, + {"SYS___SET_TCB", Const, 1}, + {"SYS___SIGACTION_SIGTRAMP", Const, 1}, + {"SYS___SIGTIMEDWAIT", Const, 1}, + {"SYS___SIGWAIT", Const, 0}, + {"SYS___SIGWAIT_NOCANCEL", Const, 0}, + {"SYS___SYSCTL", Const, 0}, + {"SYS___TFORK", Const, 1}, + {"SYS___THREXIT", Const, 1}, + {"SYS___THRSIGDIVERT", Const, 1}, + {"SYS___THRSLEEP", Const, 1}, + {"SYS___THRWAKEUP", Const, 1}, + {"S_ARCH1", Const, 1}, + {"S_ARCH2", Const, 1}, + {"S_BLKSIZE", Const, 0}, + {"S_IEXEC", Const, 0}, + {"S_IFBLK", Const, 0}, + {"S_IFCHR", Const, 0}, + {"S_IFDIR", Const, 0}, + {"S_IFIFO", Const, 0}, + {"S_IFLNK", Const, 0}, + {"S_IFMT", Const, 0}, + {"S_IFREG", Const, 0}, + {"S_IFSOCK", Const, 0}, + {"S_IFWHT", Const, 0}, + {"S_IREAD", Const, 0}, + {"S_IRGRP", Const, 0}, + {"S_IROTH", Const, 0}, + {"S_IRUSR", Const, 0}, + {"S_IRWXG", Const, 0}, + {"S_IRWXO", Const, 0}, + {"S_IRWXU", Const, 0}, + {"S_ISGID", Const, 0}, + {"S_ISTXT", Const, 0}, + {"S_ISUID", Const, 0}, + {"S_ISVTX", Const, 0}, + {"S_IWGRP", Const, 0}, + {"S_IWOTH", Const, 0}, + {"S_IWRITE", Const, 0}, + {"S_IWUSR", Const, 0}, + {"S_IXGRP", Const, 0}, + {"S_IXOTH", Const, 0}, + {"S_IXUSR", Const, 0}, + {"S_LOGIN_SET", Const, 1}, + {"SecurityAttributes", Type, 0}, + {"SecurityAttributes.InheritHandle", Field, 0}, + {"SecurityAttributes.Length", Field, 0}, + {"SecurityAttributes.SecurityDescriptor", Field, 0}, + {"Seek", Func, 0}, + {"Select", Func, 0}, + {"Sendfile", Func, 0}, + {"Sendmsg", Func, 0}, + {"SendmsgN", Func, 3}, + {"Sendto", Func, 0}, + {"Servent", Type, 0}, + {"Servent.Aliases", Field, 0}, + {"Servent.Name", Field, 0}, + {"Servent.Port", Field, 0}, + {"Servent.Proto", Field, 0}, + {"SetBpf", Func, 0}, + {"SetBpfBuflen", Func, 0}, + {"SetBpfDatalink", Func, 0}, + {"SetBpfHeadercmpl", Func, 0}, + {"SetBpfImmediate", Func, 0}, + {"SetBpfInterface", Func, 0}, + {"SetBpfPromisc", Func, 0}, + {"SetBpfTimeout", Func, 0}, + {"SetCurrentDirectory", Func, 0}, + {"SetEndOfFile", Func, 0}, + {"SetEnvironmentVariable", Func, 0}, + {"SetFileAttributes", Func, 0}, + {"SetFileCompletionNotificationModes", Func, 2}, + {"SetFilePointer", Func, 0}, + {"SetFileTime", Func, 0}, + {"SetHandleInformation", Func, 0}, + {"SetKevent", Func, 0}, + {"SetLsfPromisc", Func, 0}, + {"SetNonblock", Func, 0}, + {"Setdomainname", Func, 0}, + {"Setegid", Func, 0}, + {"Setenv", Func, 0}, + {"Seteuid", Func, 0}, + {"Setfsgid", Func, 0}, + {"Setfsuid", Func, 0}, + {"Setgid", Func, 0}, + {"Setgroups", Func, 0}, + {"Sethostname", Func, 0}, + {"Setlogin", Func, 0}, + {"Setpgid", Func, 0}, + {"Setpriority", Func, 0}, + {"Setprivexec", Func, 0}, + {"Setregid", Func, 0}, + {"Setresgid", Func, 0}, + {"Setresuid", Func, 0}, + {"Setreuid", Func, 0}, + {"Setrlimit", Func, 0}, + {"Setsid", Func, 0}, + {"Setsockopt", Func, 0}, + {"SetsockoptByte", Func, 0}, + {"SetsockoptICMPv6Filter", Func, 2}, + {"SetsockoptIPMreq", Func, 0}, + {"SetsockoptIPMreqn", Func, 0}, + {"SetsockoptIPv6Mreq", Func, 0}, + {"SetsockoptInet4Addr", Func, 0}, + {"SetsockoptInt", Func, 0}, + {"SetsockoptLinger", Func, 0}, + {"SetsockoptString", Func, 0}, + {"SetsockoptTimeval", Func, 0}, + {"Settimeofday", Func, 0}, + {"Setuid", Func, 0}, + {"Setxattr", Func, 1}, + {"Shutdown", Func, 0}, + {"SidTypeAlias", Const, 0}, + {"SidTypeComputer", Const, 0}, + {"SidTypeDeletedAccount", Const, 0}, + {"SidTypeDomain", Const, 0}, + {"SidTypeGroup", Const, 0}, + {"SidTypeInvalid", Const, 0}, + {"SidTypeLabel", Const, 0}, + {"SidTypeUnknown", Const, 0}, + {"SidTypeUser", Const, 0}, + {"SidTypeWellKnownGroup", Const, 0}, + {"Signal", Type, 0}, + {"SizeofBpfHdr", Const, 0}, + {"SizeofBpfInsn", Const, 0}, + {"SizeofBpfProgram", Const, 0}, + {"SizeofBpfStat", Const, 0}, + {"SizeofBpfVersion", Const, 0}, + {"SizeofBpfZbuf", Const, 0}, + {"SizeofBpfZbufHeader", Const, 0}, + {"SizeofCmsghdr", Const, 0}, + {"SizeofICMPv6Filter", Const, 2}, + {"SizeofIPMreq", Const, 0}, + {"SizeofIPMreqn", Const, 0}, + {"SizeofIPv6MTUInfo", Const, 2}, + {"SizeofIPv6Mreq", Const, 0}, + {"SizeofIfAddrmsg", Const, 0}, + {"SizeofIfAnnounceMsghdr", Const, 1}, + {"SizeofIfData", Const, 0}, + {"SizeofIfInfomsg", Const, 0}, + {"SizeofIfMsghdr", Const, 0}, + {"SizeofIfaMsghdr", Const, 0}, + {"SizeofIfmaMsghdr", Const, 0}, + {"SizeofIfmaMsghdr2", Const, 0}, + {"SizeofInet4Pktinfo", Const, 0}, + {"SizeofInet6Pktinfo", Const, 0}, + {"SizeofInotifyEvent", Const, 0}, + {"SizeofLinger", Const, 0}, + {"SizeofMsghdr", Const, 0}, + {"SizeofNlAttr", Const, 0}, + {"SizeofNlMsgerr", Const, 0}, + {"SizeofNlMsghdr", Const, 0}, + {"SizeofRtAttr", Const, 0}, + {"SizeofRtGenmsg", Const, 0}, + {"SizeofRtMetrics", Const, 0}, + {"SizeofRtMsg", Const, 0}, + {"SizeofRtMsghdr", Const, 0}, + {"SizeofRtNexthop", Const, 0}, + {"SizeofSockFilter", Const, 0}, + {"SizeofSockFprog", Const, 0}, + {"SizeofSockaddrAny", Const, 0}, + {"SizeofSockaddrDatalink", Const, 0}, + {"SizeofSockaddrInet4", Const, 0}, + {"SizeofSockaddrInet6", Const, 0}, + {"SizeofSockaddrLinklayer", Const, 0}, + {"SizeofSockaddrNetlink", Const, 0}, + {"SizeofSockaddrUnix", Const, 0}, + {"SizeofTCPInfo", Const, 1}, + {"SizeofUcred", Const, 0}, + {"SlicePtrFromStrings", Func, 1}, + {"SockFilter", Type, 0}, + {"SockFilter.Code", Field, 0}, + {"SockFilter.Jf", Field, 0}, + {"SockFilter.Jt", Field, 0}, + {"SockFilter.K", Field, 0}, + {"SockFprog", Type, 0}, + {"SockFprog.Filter", Field, 0}, + {"SockFprog.Len", Field, 0}, + {"SockFprog.Pad_cgo_0", Field, 0}, + {"Sockaddr", Type, 0}, + {"SockaddrDatalink", Type, 0}, + {"SockaddrDatalink.Alen", Field, 0}, + {"SockaddrDatalink.Data", Field, 0}, + {"SockaddrDatalink.Family", Field, 0}, + {"SockaddrDatalink.Index", Field, 0}, + {"SockaddrDatalink.Len", Field, 0}, + {"SockaddrDatalink.Nlen", Field, 0}, + {"SockaddrDatalink.Slen", Field, 0}, + {"SockaddrDatalink.Type", Field, 0}, + {"SockaddrGen", Type, 0}, + {"SockaddrInet4", Type, 0}, + {"SockaddrInet4.Addr", Field, 0}, + {"SockaddrInet4.Port", Field, 0}, + {"SockaddrInet6", Type, 0}, + {"SockaddrInet6.Addr", Field, 0}, + {"SockaddrInet6.Port", Field, 0}, + {"SockaddrInet6.ZoneId", Field, 0}, + {"SockaddrLinklayer", Type, 0}, + {"SockaddrLinklayer.Addr", Field, 0}, + {"SockaddrLinklayer.Halen", Field, 0}, + {"SockaddrLinklayer.Hatype", Field, 0}, + {"SockaddrLinklayer.Ifindex", Field, 0}, + {"SockaddrLinklayer.Pkttype", Field, 0}, + {"SockaddrLinklayer.Protocol", Field, 0}, + {"SockaddrNetlink", Type, 0}, + {"SockaddrNetlink.Family", Field, 0}, + {"SockaddrNetlink.Groups", Field, 0}, + {"SockaddrNetlink.Pad", Field, 0}, + {"SockaddrNetlink.Pid", Field, 0}, + {"SockaddrUnix", Type, 0}, + {"SockaddrUnix.Name", Field, 0}, + {"Socket", Func, 0}, + {"SocketControlMessage", Type, 0}, + {"SocketControlMessage.Data", Field, 0}, + {"SocketControlMessage.Header", Field, 0}, + {"SocketDisableIPv6", Var, 0}, + {"Socketpair", Func, 0}, + {"Splice", Func, 0}, + {"StartProcess", Func, 0}, + {"StartupInfo", Type, 0}, + {"StartupInfo.Cb", Field, 0}, + {"StartupInfo.Desktop", Field, 0}, + {"StartupInfo.FillAttribute", Field, 0}, + {"StartupInfo.Flags", Field, 0}, + {"StartupInfo.ShowWindow", Field, 0}, + {"StartupInfo.StdErr", Field, 0}, + {"StartupInfo.StdInput", Field, 0}, + {"StartupInfo.StdOutput", Field, 0}, + {"StartupInfo.Title", Field, 0}, + {"StartupInfo.X", Field, 0}, + {"StartupInfo.XCountChars", Field, 0}, + {"StartupInfo.XSize", Field, 0}, + {"StartupInfo.Y", Field, 0}, + {"StartupInfo.YCountChars", Field, 0}, + {"StartupInfo.YSize", Field, 0}, + {"Stat", Func, 0}, + {"Stat_t", Type, 0}, + {"Stat_t.Atim", Field, 0}, + {"Stat_t.Atim_ext", Field, 12}, + {"Stat_t.Atimespec", Field, 0}, + {"Stat_t.Birthtimespec", Field, 0}, + {"Stat_t.Blksize", Field, 0}, + {"Stat_t.Blocks", Field, 0}, + {"Stat_t.Btim_ext", Field, 12}, + {"Stat_t.Ctim", Field, 0}, + {"Stat_t.Ctim_ext", Field, 12}, + {"Stat_t.Ctimespec", Field, 0}, + {"Stat_t.Dev", Field, 0}, + {"Stat_t.Flags", Field, 0}, + {"Stat_t.Gen", Field, 0}, + {"Stat_t.Gid", Field, 0}, + {"Stat_t.Ino", Field, 0}, + {"Stat_t.Lspare", Field, 0}, + {"Stat_t.Lspare0", Field, 2}, + {"Stat_t.Lspare1", Field, 2}, + {"Stat_t.Mode", Field, 0}, + {"Stat_t.Mtim", Field, 0}, + {"Stat_t.Mtim_ext", Field, 12}, + {"Stat_t.Mtimespec", Field, 0}, + {"Stat_t.Nlink", Field, 0}, + {"Stat_t.Pad_cgo_0", Field, 0}, + {"Stat_t.Pad_cgo_1", Field, 0}, + {"Stat_t.Pad_cgo_2", Field, 0}, + {"Stat_t.Padding0", Field, 12}, + {"Stat_t.Padding1", Field, 12}, + {"Stat_t.Qspare", Field, 0}, + {"Stat_t.Rdev", Field, 0}, + {"Stat_t.Size", Field, 0}, + {"Stat_t.Spare", Field, 2}, + {"Stat_t.Uid", Field, 0}, + {"Stat_t.X__pad0", Field, 0}, + {"Stat_t.X__pad1", Field, 0}, + {"Stat_t.X__pad2", Field, 0}, + {"Stat_t.X__st_birthtim", Field, 2}, + {"Stat_t.X__st_ino", Field, 0}, + {"Stat_t.X__unused", Field, 0}, + {"Statfs", Func, 0}, + {"Statfs_t", Type, 0}, + {"Statfs_t.Asyncreads", Field, 0}, + {"Statfs_t.Asyncwrites", Field, 0}, + {"Statfs_t.Bavail", Field, 0}, + {"Statfs_t.Bfree", Field, 0}, + {"Statfs_t.Blocks", Field, 0}, + {"Statfs_t.Bsize", Field, 0}, + {"Statfs_t.Charspare", Field, 0}, + {"Statfs_t.F_asyncreads", Field, 2}, + {"Statfs_t.F_asyncwrites", Field, 2}, + {"Statfs_t.F_bavail", Field, 2}, + {"Statfs_t.F_bfree", Field, 2}, + {"Statfs_t.F_blocks", Field, 2}, + {"Statfs_t.F_bsize", Field, 2}, + {"Statfs_t.F_ctime", Field, 2}, + {"Statfs_t.F_favail", Field, 2}, + {"Statfs_t.F_ffree", Field, 2}, + {"Statfs_t.F_files", Field, 2}, + {"Statfs_t.F_flags", Field, 2}, + {"Statfs_t.F_fsid", Field, 2}, + {"Statfs_t.F_fstypename", Field, 2}, + {"Statfs_t.F_iosize", Field, 2}, + {"Statfs_t.F_mntfromname", Field, 2}, + {"Statfs_t.F_mntfromspec", Field, 3}, + {"Statfs_t.F_mntonname", Field, 2}, + {"Statfs_t.F_namemax", Field, 2}, + {"Statfs_t.F_owner", Field, 2}, + {"Statfs_t.F_spare", Field, 2}, + {"Statfs_t.F_syncreads", Field, 2}, + {"Statfs_t.F_syncwrites", Field, 2}, + {"Statfs_t.Ffree", Field, 0}, + {"Statfs_t.Files", Field, 0}, + {"Statfs_t.Flags", Field, 0}, + {"Statfs_t.Frsize", Field, 0}, + {"Statfs_t.Fsid", Field, 0}, + {"Statfs_t.Fssubtype", Field, 0}, + {"Statfs_t.Fstypename", Field, 0}, + {"Statfs_t.Iosize", Field, 0}, + {"Statfs_t.Mntfromname", Field, 0}, + {"Statfs_t.Mntonname", Field, 0}, + {"Statfs_t.Mount_info", Field, 2}, + {"Statfs_t.Namelen", Field, 0}, + {"Statfs_t.Namemax", Field, 0}, + {"Statfs_t.Owner", Field, 0}, + {"Statfs_t.Pad_cgo_0", Field, 0}, + {"Statfs_t.Pad_cgo_1", Field, 2}, + {"Statfs_t.Reserved", Field, 0}, + {"Statfs_t.Spare", Field, 0}, + {"Statfs_t.Syncreads", Field, 0}, + {"Statfs_t.Syncwrites", Field, 0}, + {"Statfs_t.Type", Field, 0}, + {"Statfs_t.Version", Field, 0}, + {"Stderr", Var, 0}, + {"Stdin", Var, 0}, + {"Stdout", Var, 0}, + {"StringBytePtr", Func, 0}, + {"StringByteSlice", Func, 0}, + {"StringSlicePtr", Func, 0}, + {"StringToSid", Func, 0}, + {"StringToUTF16", Func, 0}, + {"StringToUTF16Ptr", Func, 0}, + {"Symlink", Func, 0}, + {"Sync", Func, 0}, + {"SyncFileRange", Func, 0}, + {"SysProcAttr", Type, 0}, + {"SysProcAttr.AdditionalInheritedHandles", Field, 17}, + {"SysProcAttr.AmbientCaps", Field, 9}, + {"SysProcAttr.CgroupFD", Field, 20}, + {"SysProcAttr.Chroot", Field, 0}, + {"SysProcAttr.Cloneflags", Field, 2}, + {"SysProcAttr.CmdLine", Field, 0}, + {"SysProcAttr.CreationFlags", Field, 1}, + {"SysProcAttr.Credential", Field, 0}, + {"SysProcAttr.Ctty", Field, 1}, + {"SysProcAttr.Foreground", Field, 5}, + {"SysProcAttr.GidMappings", Field, 4}, + {"SysProcAttr.GidMappingsEnableSetgroups", Field, 5}, + {"SysProcAttr.HideWindow", Field, 0}, + {"SysProcAttr.Jail", Field, 21}, + {"SysProcAttr.NoInheritHandles", Field, 16}, + {"SysProcAttr.Noctty", Field, 0}, + {"SysProcAttr.ParentProcess", Field, 17}, + {"SysProcAttr.Pdeathsig", Field, 0}, + {"SysProcAttr.Pgid", Field, 5}, + {"SysProcAttr.PidFD", Field, 22}, + {"SysProcAttr.ProcessAttributes", Field, 13}, + {"SysProcAttr.Ptrace", Field, 0}, + {"SysProcAttr.Setctty", Field, 0}, + {"SysProcAttr.Setpgid", Field, 0}, + {"SysProcAttr.Setsid", Field, 0}, + {"SysProcAttr.ThreadAttributes", Field, 13}, + {"SysProcAttr.Token", Field, 10}, + {"SysProcAttr.UidMappings", Field, 4}, + {"SysProcAttr.Unshareflags", Field, 7}, + {"SysProcAttr.UseCgroupFD", Field, 20}, + {"SysProcIDMap", Type, 4}, + {"SysProcIDMap.ContainerID", Field, 4}, + {"SysProcIDMap.HostID", Field, 4}, + {"SysProcIDMap.Size", Field, 4}, + {"Syscall", Func, 0}, + {"Syscall12", Func, 0}, + {"Syscall15", Func, 0}, + {"Syscall18", Func, 12}, + {"Syscall6", Func, 0}, + {"Syscall9", Func, 0}, + {"SyscallN", Func, 18}, + {"Sysctl", Func, 0}, + {"SysctlUint32", Func, 0}, + {"Sysctlnode", Type, 2}, + {"Sysctlnode.Flags", Field, 2}, + {"Sysctlnode.Name", Field, 2}, + {"Sysctlnode.Num", Field, 2}, + {"Sysctlnode.Un", Field, 2}, + {"Sysctlnode.Ver", Field, 2}, + {"Sysctlnode.X__rsvd", Field, 2}, + {"Sysctlnode.X_sysctl_desc", Field, 2}, + {"Sysctlnode.X_sysctl_func", Field, 2}, + {"Sysctlnode.X_sysctl_parent", Field, 2}, + {"Sysctlnode.X_sysctl_size", Field, 2}, + {"Sysinfo", Func, 0}, + {"Sysinfo_t", Type, 0}, + {"Sysinfo_t.Bufferram", Field, 0}, + {"Sysinfo_t.Freehigh", Field, 0}, + {"Sysinfo_t.Freeram", Field, 0}, + {"Sysinfo_t.Freeswap", Field, 0}, + {"Sysinfo_t.Loads", Field, 0}, + {"Sysinfo_t.Pad", Field, 0}, + {"Sysinfo_t.Pad_cgo_0", Field, 0}, + {"Sysinfo_t.Pad_cgo_1", Field, 0}, + {"Sysinfo_t.Procs", Field, 0}, + {"Sysinfo_t.Sharedram", Field, 0}, + {"Sysinfo_t.Totalhigh", Field, 0}, + {"Sysinfo_t.Totalram", Field, 0}, + {"Sysinfo_t.Totalswap", Field, 0}, + {"Sysinfo_t.Unit", Field, 0}, + {"Sysinfo_t.Uptime", Field, 0}, + {"Sysinfo_t.X_f", Field, 0}, + {"Systemtime", Type, 0}, + {"Systemtime.Day", Field, 0}, + {"Systemtime.DayOfWeek", Field, 0}, + {"Systemtime.Hour", Field, 0}, + {"Systemtime.Milliseconds", Field, 0}, + {"Systemtime.Minute", Field, 0}, + {"Systemtime.Month", Field, 0}, + {"Systemtime.Second", Field, 0}, + {"Systemtime.Year", Field, 0}, + {"TCGETS", Const, 0}, + {"TCIFLUSH", Const, 1}, + {"TCIOFLUSH", Const, 1}, + {"TCOFLUSH", Const, 1}, + {"TCPInfo", Type, 1}, + {"TCPInfo.Advmss", Field, 1}, + {"TCPInfo.Ato", Field, 1}, + {"TCPInfo.Backoff", Field, 1}, + {"TCPInfo.Ca_state", Field, 1}, + {"TCPInfo.Fackets", Field, 1}, + {"TCPInfo.Last_ack_recv", Field, 1}, + {"TCPInfo.Last_ack_sent", Field, 1}, + {"TCPInfo.Last_data_recv", Field, 1}, + {"TCPInfo.Last_data_sent", Field, 1}, + {"TCPInfo.Lost", Field, 1}, + {"TCPInfo.Options", Field, 1}, + {"TCPInfo.Pad_cgo_0", Field, 1}, + {"TCPInfo.Pmtu", Field, 1}, + {"TCPInfo.Probes", Field, 1}, + {"TCPInfo.Rcv_mss", Field, 1}, + {"TCPInfo.Rcv_rtt", Field, 1}, + {"TCPInfo.Rcv_space", Field, 1}, + {"TCPInfo.Rcv_ssthresh", Field, 1}, + {"TCPInfo.Reordering", Field, 1}, + {"TCPInfo.Retrans", Field, 1}, + {"TCPInfo.Retransmits", Field, 1}, + {"TCPInfo.Rto", Field, 1}, + {"TCPInfo.Rtt", Field, 1}, + {"TCPInfo.Rttvar", Field, 1}, + {"TCPInfo.Sacked", Field, 1}, + {"TCPInfo.Snd_cwnd", Field, 1}, + {"TCPInfo.Snd_mss", Field, 1}, + {"TCPInfo.Snd_ssthresh", Field, 1}, + {"TCPInfo.State", Field, 1}, + {"TCPInfo.Total_retrans", Field, 1}, + {"TCPInfo.Unacked", Field, 1}, + {"TCPKeepalive", Type, 3}, + {"TCPKeepalive.Interval", Field, 3}, + {"TCPKeepalive.OnOff", Field, 3}, + {"TCPKeepalive.Time", Field, 3}, + {"TCP_CA_NAME_MAX", Const, 0}, + {"TCP_CONGCTL", Const, 1}, + {"TCP_CONGESTION", Const, 0}, + {"TCP_CONNECTIONTIMEOUT", Const, 0}, + {"TCP_CORK", Const, 0}, + {"TCP_DEFER_ACCEPT", Const, 0}, + {"TCP_ENABLE_ECN", Const, 16}, + {"TCP_INFO", Const, 0}, + {"TCP_KEEPALIVE", Const, 0}, + {"TCP_KEEPCNT", Const, 0}, + {"TCP_KEEPIDLE", Const, 0}, + {"TCP_KEEPINIT", Const, 1}, + {"TCP_KEEPINTVL", Const, 0}, + {"TCP_LINGER2", Const, 0}, + {"TCP_MAXBURST", Const, 0}, + {"TCP_MAXHLEN", Const, 0}, + {"TCP_MAXOLEN", Const, 0}, + {"TCP_MAXSEG", Const, 0}, + {"TCP_MAXWIN", Const, 0}, + {"TCP_MAX_SACK", Const, 0}, + {"TCP_MAX_WINSHIFT", Const, 0}, + {"TCP_MD5SIG", Const, 0}, + {"TCP_MD5SIG_MAXKEYLEN", Const, 0}, + {"TCP_MINMSS", Const, 0}, + {"TCP_MINMSSOVERLOAD", Const, 0}, + {"TCP_MSS", Const, 0}, + {"TCP_NODELAY", Const, 0}, + {"TCP_NOOPT", Const, 0}, + {"TCP_NOPUSH", Const, 0}, + {"TCP_NOTSENT_LOWAT", Const, 16}, + {"TCP_NSTATES", Const, 1}, + {"TCP_QUICKACK", Const, 0}, + {"TCP_RXT_CONNDROPTIME", Const, 0}, + {"TCP_RXT_FINDROP", Const, 0}, + {"TCP_SACK_ENABLE", Const, 1}, + {"TCP_SENDMOREACKS", Const, 16}, + {"TCP_SYNCNT", Const, 0}, + {"TCP_VENDOR", Const, 3}, + {"TCP_WINDOW_CLAMP", Const, 0}, + {"TCSAFLUSH", Const, 1}, + {"TCSETS", Const, 0}, + {"TF_DISCONNECT", Const, 0}, + {"TF_REUSE_SOCKET", Const, 0}, + {"TF_USE_DEFAULT_WORKER", Const, 0}, + {"TF_USE_KERNEL_APC", Const, 0}, + {"TF_USE_SYSTEM_THREAD", Const, 0}, + {"TF_WRITE_BEHIND", Const, 0}, + {"TH32CS_INHERIT", Const, 4}, + {"TH32CS_SNAPALL", Const, 4}, + {"TH32CS_SNAPHEAPLIST", Const, 4}, + {"TH32CS_SNAPMODULE", Const, 4}, + {"TH32CS_SNAPMODULE32", Const, 4}, + {"TH32CS_SNAPPROCESS", Const, 4}, + {"TH32CS_SNAPTHREAD", Const, 4}, + {"TIME_ZONE_ID_DAYLIGHT", Const, 0}, + {"TIME_ZONE_ID_STANDARD", Const, 0}, + {"TIME_ZONE_ID_UNKNOWN", Const, 0}, + {"TIOCCBRK", Const, 0}, + {"TIOCCDTR", Const, 0}, + {"TIOCCONS", Const, 0}, + {"TIOCDCDTIMESTAMP", Const, 0}, + {"TIOCDRAIN", Const, 0}, + {"TIOCDSIMICROCODE", Const, 0}, + {"TIOCEXCL", Const, 0}, + {"TIOCEXT", Const, 0}, + {"TIOCFLAG_CDTRCTS", Const, 1}, + {"TIOCFLAG_CLOCAL", Const, 1}, + {"TIOCFLAG_CRTSCTS", Const, 1}, + {"TIOCFLAG_MDMBUF", Const, 1}, + {"TIOCFLAG_PPS", Const, 1}, + {"TIOCFLAG_SOFTCAR", Const, 1}, + {"TIOCFLUSH", Const, 0}, + {"TIOCGDEV", Const, 0}, + {"TIOCGDRAINWAIT", Const, 0}, + {"TIOCGETA", Const, 0}, + {"TIOCGETD", Const, 0}, + {"TIOCGFLAGS", Const, 1}, + {"TIOCGICOUNT", Const, 0}, + {"TIOCGLCKTRMIOS", Const, 0}, + {"TIOCGLINED", Const, 1}, + {"TIOCGPGRP", Const, 0}, + {"TIOCGPTN", Const, 0}, + {"TIOCGQSIZE", Const, 1}, + {"TIOCGRANTPT", Const, 1}, + {"TIOCGRS485", Const, 0}, + {"TIOCGSERIAL", Const, 0}, + {"TIOCGSID", Const, 0}, + {"TIOCGSIZE", Const, 1}, + {"TIOCGSOFTCAR", Const, 0}, + {"TIOCGTSTAMP", Const, 1}, + {"TIOCGWINSZ", Const, 0}, + {"TIOCINQ", Const, 0}, + {"TIOCIXOFF", Const, 0}, + {"TIOCIXON", Const, 0}, + {"TIOCLINUX", Const, 0}, + {"TIOCMBIC", Const, 0}, + {"TIOCMBIS", Const, 0}, + {"TIOCMGDTRWAIT", Const, 0}, + {"TIOCMGET", Const, 0}, + {"TIOCMIWAIT", Const, 0}, + {"TIOCMODG", Const, 0}, + {"TIOCMODS", Const, 0}, + {"TIOCMSDTRWAIT", Const, 0}, + {"TIOCMSET", Const, 0}, + {"TIOCM_CAR", Const, 0}, + {"TIOCM_CD", Const, 0}, + {"TIOCM_CTS", Const, 0}, + {"TIOCM_DCD", Const, 0}, + {"TIOCM_DSR", Const, 0}, + {"TIOCM_DTR", Const, 0}, + {"TIOCM_LE", Const, 0}, + {"TIOCM_RI", Const, 0}, + {"TIOCM_RNG", Const, 0}, + {"TIOCM_RTS", Const, 0}, + {"TIOCM_SR", Const, 0}, + {"TIOCM_ST", Const, 0}, + {"TIOCNOTTY", Const, 0}, + {"TIOCNXCL", Const, 0}, + {"TIOCOUTQ", Const, 0}, + {"TIOCPKT", Const, 0}, + {"TIOCPKT_DATA", Const, 0}, + {"TIOCPKT_DOSTOP", Const, 0}, + {"TIOCPKT_FLUSHREAD", Const, 0}, + {"TIOCPKT_FLUSHWRITE", Const, 0}, + {"TIOCPKT_IOCTL", Const, 0}, + {"TIOCPKT_NOSTOP", Const, 0}, + {"TIOCPKT_START", Const, 0}, + {"TIOCPKT_STOP", Const, 0}, + {"TIOCPTMASTER", Const, 0}, + {"TIOCPTMGET", Const, 1}, + {"TIOCPTSNAME", Const, 1}, + {"TIOCPTYGNAME", Const, 0}, + {"TIOCPTYGRANT", Const, 0}, + {"TIOCPTYUNLK", Const, 0}, + {"TIOCRCVFRAME", Const, 1}, + {"TIOCREMOTE", Const, 0}, + {"TIOCSBRK", Const, 0}, + {"TIOCSCONS", Const, 0}, + {"TIOCSCTTY", Const, 0}, + {"TIOCSDRAINWAIT", Const, 0}, + {"TIOCSDTR", Const, 0}, + {"TIOCSERCONFIG", Const, 0}, + {"TIOCSERGETLSR", Const, 0}, + {"TIOCSERGETMULTI", Const, 0}, + {"TIOCSERGSTRUCT", Const, 0}, + {"TIOCSERGWILD", Const, 0}, + {"TIOCSERSETMULTI", Const, 0}, + {"TIOCSERSWILD", Const, 0}, + {"TIOCSER_TEMT", Const, 0}, + {"TIOCSETA", Const, 0}, + {"TIOCSETAF", Const, 0}, + {"TIOCSETAW", Const, 0}, + {"TIOCSETD", Const, 0}, + {"TIOCSFLAGS", Const, 1}, + {"TIOCSIG", Const, 0}, + {"TIOCSLCKTRMIOS", Const, 0}, + {"TIOCSLINED", Const, 1}, + {"TIOCSPGRP", Const, 0}, + {"TIOCSPTLCK", Const, 0}, + {"TIOCSQSIZE", Const, 1}, + {"TIOCSRS485", Const, 0}, + {"TIOCSSERIAL", Const, 0}, + {"TIOCSSIZE", Const, 1}, + {"TIOCSSOFTCAR", Const, 0}, + {"TIOCSTART", Const, 0}, + {"TIOCSTAT", Const, 0}, + {"TIOCSTI", Const, 0}, + {"TIOCSTOP", Const, 0}, + {"TIOCSTSTAMP", Const, 1}, + {"TIOCSWINSZ", Const, 0}, + {"TIOCTIMESTAMP", Const, 0}, + {"TIOCUCNTL", Const, 0}, + {"TIOCVHANGUP", Const, 0}, + {"TIOCXMTFRAME", Const, 1}, + {"TOKEN_ADJUST_DEFAULT", Const, 0}, + {"TOKEN_ADJUST_GROUPS", Const, 0}, + {"TOKEN_ADJUST_PRIVILEGES", Const, 0}, + {"TOKEN_ADJUST_SESSIONID", Const, 11}, + {"TOKEN_ALL_ACCESS", Const, 0}, + {"TOKEN_ASSIGN_PRIMARY", Const, 0}, + {"TOKEN_DUPLICATE", Const, 0}, + {"TOKEN_EXECUTE", Const, 0}, + {"TOKEN_IMPERSONATE", Const, 0}, + {"TOKEN_QUERY", Const, 0}, + {"TOKEN_QUERY_SOURCE", Const, 0}, + {"TOKEN_READ", Const, 0}, + {"TOKEN_WRITE", Const, 0}, + {"TOSTOP", Const, 0}, + {"TRUNCATE_EXISTING", Const, 0}, + {"TUNATTACHFILTER", Const, 0}, + {"TUNDETACHFILTER", Const, 0}, + {"TUNGETFEATURES", Const, 0}, + {"TUNGETIFF", Const, 0}, + {"TUNGETSNDBUF", Const, 0}, + {"TUNGETVNETHDRSZ", Const, 0}, + {"TUNSETDEBUG", Const, 0}, + {"TUNSETGROUP", Const, 0}, + {"TUNSETIFF", Const, 0}, + {"TUNSETLINK", Const, 0}, + {"TUNSETNOCSUM", Const, 0}, + {"TUNSETOFFLOAD", Const, 0}, + {"TUNSETOWNER", Const, 0}, + {"TUNSETPERSIST", Const, 0}, + {"TUNSETSNDBUF", Const, 0}, + {"TUNSETTXFILTER", Const, 0}, + {"TUNSETVNETHDRSZ", Const, 0}, + {"Tee", Func, 0}, + {"TerminateProcess", Func, 0}, + {"Termios", Type, 0}, + {"Termios.Cc", Field, 0}, + {"Termios.Cflag", Field, 0}, + {"Termios.Iflag", Field, 0}, + {"Termios.Ispeed", Field, 0}, + {"Termios.Lflag", Field, 0}, + {"Termios.Line", Field, 0}, + {"Termios.Oflag", Field, 0}, + {"Termios.Ospeed", Field, 0}, + {"Termios.Pad_cgo_0", Field, 0}, + {"Tgkill", Func, 0}, + {"Time", Func, 0}, + {"Time_t", Type, 0}, + {"Times", Func, 0}, + {"Timespec", Type, 0}, + {"Timespec.Nsec", Field, 0}, + {"Timespec.Pad_cgo_0", Field, 2}, + {"Timespec.Sec", Field, 0}, + {"TimespecToNsec", Func, 0}, + {"Timeval", Type, 0}, + {"Timeval.Pad_cgo_0", Field, 0}, + {"Timeval.Sec", Field, 0}, + {"Timeval.Usec", Field, 0}, + {"Timeval32", Type, 0}, + {"Timeval32.Sec", Field, 0}, + {"Timeval32.Usec", Field, 0}, + {"TimevalToNsec", Func, 0}, + {"Timex", Type, 0}, + {"Timex.Calcnt", Field, 0}, + {"Timex.Constant", Field, 0}, + {"Timex.Errcnt", Field, 0}, + {"Timex.Esterror", Field, 0}, + {"Timex.Freq", Field, 0}, + {"Timex.Jitcnt", Field, 0}, + {"Timex.Jitter", Field, 0}, + {"Timex.Maxerror", Field, 0}, + {"Timex.Modes", Field, 0}, + {"Timex.Offset", Field, 0}, + {"Timex.Pad_cgo_0", Field, 0}, + {"Timex.Pad_cgo_1", Field, 0}, + {"Timex.Pad_cgo_2", Field, 0}, + {"Timex.Pad_cgo_3", Field, 0}, + {"Timex.Ppsfreq", Field, 0}, + {"Timex.Precision", Field, 0}, + {"Timex.Shift", Field, 0}, + {"Timex.Stabil", Field, 0}, + {"Timex.Status", Field, 0}, + {"Timex.Stbcnt", Field, 0}, + {"Timex.Tai", Field, 0}, + {"Timex.Tick", Field, 0}, + {"Timex.Time", Field, 0}, + {"Timex.Tolerance", Field, 0}, + {"Timezoneinformation", Type, 0}, + {"Timezoneinformation.Bias", Field, 0}, + {"Timezoneinformation.DaylightBias", Field, 0}, + {"Timezoneinformation.DaylightDate", Field, 0}, + {"Timezoneinformation.DaylightName", Field, 0}, + {"Timezoneinformation.StandardBias", Field, 0}, + {"Timezoneinformation.StandardDate", Field, 0}, + {"Timezoneinformation.StandardName", Field, 0}, + {"Tms", Type, 0}, + {"Tms.Cstime", Field, 0}, + {"Tms.Cutime", Field, 0}, + {"Tms.Stime", Field, 0}, + {"Tms.Utime", Field, 0}, + {"Token", Type, 0}, + {"TokenAccessInformation", Const, 0}, + {"TokenAuditPolicy", Const, 0}, + {"TokenDefaultDacl", Const, 0}, + {"TokenElevation", Const, 0}, + {"TokenElevationType", Const, 0}, + {"TokenGroups", Const, 0}, + {"TokenGroupsAndPrivileges", Const, 0}, + {"TokenHasRestrictions", Const, 0}, + {"TokenImpersonationLevel", Const, 0}, + {"TokenIntegrityLevel", Const, 0}, + {"TokenLinkedToken", Const, 0}, + {"TokenLogonSid", Const, 0}, + {"TokenMandatoryPolicy", Const, 0}, + {"TokenOrigin", Const, 0}, + {"TokenOwner", Const, 0}, + {"TokenPrimaryGroup", Const, 0}, + {"TokenPrivileges", Const, 0}, + {"TokenRestrictedSids", Const, 0}, + {"TokenSandBoxInert", Const, 0}, + {"TokenSessionId", Const, 0}, + {"TokenSessionReference", Const, 0}, + {"TokenSource", Const, 0}, + {"TokenStatistics", Const, 0}, + {"TokenType", Const, 0}, + {"TokenUIAccess", Const, 0}, + {"TokenUser", Const, 0}, + {"TokenVirtualizationAllowed", Const, 0}, + {"TokenVirtualizationEnabled", Const, 0}, + {"Tokenprimarygroup", Type, 0}, + {"Tokenprimarygroup.PrimaryGroup", Field, 0}, + {"Tokenuser", Type, 0}, + {"Tokenuser.User", Field, 0}, + {"TranslateAccountName", Func, 0}, + {"TranslateName", Func, 0}, + {"TransmitFile", Func, 0}, + {"TransmitFileBuffers", Type, 0}, + {"TransmitFileBuffers.Head", Field, 0}, + {"TransmitFileBuffers.HeadLength", Field, 0}, + {"TransmitFileBuffers.Tail", Field, 0}, + {"TransmitFileBuffers.TailLength", Field, 0}, + {"Truncate", Func, 0}, + {"UNIX_PATH_MAX", Const, 12}, + {"USAGE_MATCH_TYPE_AND", Const, 0}, + {"USAGE_MATCH_TYPE_OR", Const, 0}, + {"UTF16FromString", Func, 1}, + {"UTF16PtrFromString", Func, 1}, + {"UTF16ToString", Func, 0}, + {"Ucred", Type, 0}, + {"Ucred.Gid", Field, 0}, + {"Ucred.Pid", Field, 0}, + {"Ucred.Uid", Field, 0}, + {"Umask", Func, 0}, + {"Uname", Func, 0}, + {"Undelete", Func, 0}, + {"UnixCredentials", Func, 0}, + {"UnixRights", Func, 0}, + {"Unlink", Func, 0}, + {"Unlinkat", Func, 0}, + {"UnmapViewOfFile", Func, 0}, + {"Unmount", Func, 0}, + {"Unsetenv", Func, 4}, + {"Unshare", Func, 0}, + {"UserInfo10", Type, 0}, + {"UserInfo10.Comment", Field, 0}, + {"UserInfo10.FullName", Field, 0}, + {"UserInfo10.Name", Field, 0}, + {"UserInfo10.UsrComment", Field, 0}, + {"Ustat", Func, 0}, + {"Ustat_t", Type, 0}, + {"Ustat_t.Fname", Field, 0}, + {"Ustat_t.Fpack", Field, 0}, + {"Ustat_t.Pad_cgo_0", Field, 0}, + {"Ustat_t.Pad_cgo_1", Field, 0}, + {"Ustat_t.Tfree", Field, 0}, + {"Ustat_t.Tinode", Field, 0}, + {"Utimbuf", Type, 0}, + {"Utimbuf.Actime", Field, 0}, + {"Utimbuf.Modtime", Field, 0}, + {"Utime", Func, 0}, + {"Utimes", Func, 0}, + {"UtimesNano", Func, 1}, + {"Utsname", Type, 0}, + {"Utsname.Domainname", Field, 0}, + {"Utsname.Machine", Field, 0}, + {"Utsname.Nodename", Field, 0}, + {"Utsname.Release", Field, 0}, + {"Utsname.Sysname", Field, 0}, + {"Utsname.Version", Field, 0}, + {"VDISCARD", Const, 0}, + {"VDSUSP", Const, 1}, + {"VEOF", Const, 0}, + {"VEOL", Const, 0}, + {"VEOL2", Const, 0}, + {"VERASE", Const, 0}, + {"VERASE2", Const, 1}, + {"VINTR", Const, 0}, + {"VKILL", Const, 0}, + {"VLNEXT", Const, 0}, + {"VMIN", Const, 0}, + {"VQUIT", Const, 0}, + {"VREPRINT", Const, 0}, + {"VSTART", Const, 0}, + {"VSTATUS", Const, 1}, + {"VSTOP", Const, 0}, + {"VSUSP", Const, 0}, + {"VSWTC", Const, 0}, + {"VT0", Const, 1}, + {"VT1", Const, 1}, + {"VTDLY", Const, 1}, + {"VTIME", Const, 0}, + {"VWERASE", Const, 0}, + {"VirtualLock", Func, 0}, + {"VirtualUnlock", Func, 0}, + {"WAIT_ABANDONED", Const, 0}, + {"WAIT_FAILED", Const, 0}, + {"WAIT_OBJECT_0", Const, 0}, + {"WAIT_TIMEOUT", Const, 0}, + {"WALL", Const, 0}, + {"WALLSIG", Const, 1}, + {"WALTSIG", Const, 1}, + {"WCLONE", Const, 0}, + {"WCONTINUED", Const, 0}, + {"WCOREFLAG", Const, 0}, + {"WEXITED", Const, 0}, + {"WLINUXCLONE", Const, 0}, + {"WNOHANG", Const, 0}, + {"WNOTHREAD", Const, 0}, + {"WNOWAIT", Const, 0}, + {"WNOZOMBIE", Const, 1}, + {"WOPTSCHECKED", Const, 1}, + {"WORDSIZE", Const, 0}, + {"WSABuf", Type, 0}, + {"WSABuf.Buf", Field, 0}, + {"WSABuf.Len", Field, 0}, + {"WSACleanup", Func, 0}, + {"WSADESCRIPTION_LEN", Const, 0}, + {"WSAData", Type, 0}, + {"WSAData.Description", Field, 0}, + {"WSAData.HighVersion", Field, 0}, + {"WSAData.MaxSockets", Field, 0}, + {"WSAData.MaxUdpDg", Field, 0}, + {"WSAData.SystemStatus", Field, 0}, + {"WSAData.VendorInfo", Field, 0}, + {"WSAData.Version", Field, 0}, + {"WSAEACCES", Const, 2}, + {"WSAECONNABORTED", Const, 9}, + {"WSAECONNRESET", Const, 3}, + {"WSAENOPROTOOPT", Const, 23}, + {"WSAEnumProtocols", Func, 2}, + {"WSAID_CONNECTEX", Var, 1}, + {"WSAIoctl", Func, 0}, + {"WSAPROTOCOL_LEN", Const, 2}, + {"WSAProtocolChain", Type, 2}, + {"WSAProtocolChain.ChainEntries", Field, 2}, + {"WSAProtocolChain.ChainLen", Field, 2}, + {"WSAProtocolInfo", Type, 2}, + {"WSAProtocolInfo.AddressFamily", Field, 2}, + {"WSAProtocolInfo.CatalogEntryId", Field, 2}, + {"WSAProtocolInfo.MaxSockAddr", Field, 2}, + {"WSAProtocolInfo.MessageSize", Field, 2}, + {"WSAProtocolInfo.MinSockAddr", Field, 2}, + {"WSAProtocolInfo.NetworkByteOrder", Field, 2}, + {"WSAProtocolInfo.Protocol", Field, 2}, + {"WSAProtocolInfo.ProtocolChain", Field, 2}, + {"WSAProtocolInfo.ProtocolMaxOffset", Field, 2}, + {"WSAProtocolInfo.ProtocolName", Field, 2}, + {"WSAProtocolInfo.ProviderFlags", Field, 2}, + {"WSAProtocolInfo.ProviderId", Field, 2}, + {"WSAProtocolInfo.ProviderReserved", Field, 2}, + {"WSAProtocolInfo.SecurityScheme", Field, 2}, + {"WSAProtocolInfo.ServiceFlags1", Field, 2}, + {"WSAProtocolInfo.ServiceFlags2", Field, 2}, + {"WSAProtocolInfo.ServiceFlags3", Field, 2}, + {"WSAProtocolInfo.ServiceFlags4", Field, 2}, + {"WSAProtocolInfo.SocketType", Field, 2}, + {"WSAProtocolInfo.Version", Field, 2}, + {"WSARecv", Func, 0}, + {"WSARecvFrom", Func, 0}, + {"WSASYS_STATUS_LEN", Const, 0}, + {"WSASend", Func, 0}, + {"WSASendTo", Func, 0}, + {"WSASendto", Func, 0}, + {"WSAStartup", Func, 0}, + {"WSTOPPED", Const, 0}, + {"WTRAPPED", Const, 1}, + {"WUNTRACED", Const, 0}, + {"Wait4", Func, 0}, + {"WaitForSingleObject", Func, 0}, + {"WaitStatus", Type, 0}, + {"WaitStatus.ExitCode", Field, 0}, + {"Win32FileAttributeData", Type, 0}, + {"Win32FileAttributeData.CreationTime", Field, 0}, + {"Win32FileAttributeData.FileAttributes", Field, 0}, + {"Win32FileAttributeData.FileSizeHigh", Field, 0}, + {"Win32FileAttributeData.FileSizeLow", Field, 0}, + {"Win32FileAttributeData.LastAccessTime", Field, 0}, + {"Win32FileAttributeData.LastWriteTime", Field, 0}, + {"Win32finddata", Type, 0}, + {"Win32finddata.AlternateFileName", Field, 0}, + {"Win32finddata.CreationTime", Field, 0}, + {"Win32finddata.FileAttributes", Field, 0}, + {"Win32finddata.FileName", Field, 0}, + {"Win32finddata.FileSizeHigh", Field, 0}, + {"Win32finddata.FileSizeLow", Field, 0}, + {"Win32finddata.LastAccessTime", Field, 0}, + {"Win32finddata.LastWriteTime", Field, 0}, + {"Win32finddata.Reserved0", Field, 0}, + {"Win32finddata.Reserved1", Field, 0}, + {"Write", Func, 0}, + {"WriteConsole", Func, 1}, + {"WriteFile", Func, 0}, + {"X509_ASN_ENCODING", Const, 0}, + {"XCASE", Const, 0}, + {"XP1_CONNECTIONLESS", Const, 2}, + {"XP1_CONNECT_DATA", Const, 2}, + {"XP1_DISCONNECT_DATA", Const, 2}, + {"XP1_EXPEDITED_DATA", Const, 2}, + {"XP1_GRACEFUL_CLOSE", Const, 2}, + {"XP1_GUARANTEED_DELIVERY", Const, 2}, + {"XP1_GUARANTEED_ORDER", Const, 2}, + {"XP1_IFS_HANDLES", Const, 2}, + {"XP1_MESSAGE_ORIENTED", Const, 2}, + {"XP1_MULTIPOINT_CONTROL_PLANE", Const, 2}, + {"XP1_MULTIPOINT_DATA_PLANE", Const, 2}, + {"XP1_PARTIAL_MESSAGE", Const, 2}, + {"XP1_PSEUDO_STREAM", Const, 2}, + {"XP1_QOS_SUPPORTED", Const, 2}, + {"XP1_SAN_SUPPORT_SDP", Const, 2}, + {"XP1_SUPPORT_BROADCAST", Const, 2}, + {"XP1_SUPPORT_MULTIPOINT", Const, 2}, + {"XP1_UNI_RECV", Const, 2}, + {"XP1_UNI_SEND", Const, 2}, + }, + "syscall/js": { + {"CopyBytesToGo", Func, 0}, + {"CopyBytesToJS", Func, 0}, + {"Error", Type, 0}, + {"Func", Type, 0}, + {"FuncOf", Func, 0}, + {"Global", Func, 0}, + {"Null", Func, 0}, + {"Type", Type, 0}, + {"TypeBoolean", Const, 0}, + {"TypeFunction", Const, 0}, + {"TypeNull", Const, 0}, + {"TypeNumber", Const, 0}, + {"TypeObject", Const, 0}, + {"TypeString", Const, 0}, + {"TypeSymbol", Const, 0}, + {"TypeUndefined", Const, 0}, + {"Undefined", Func, 0}, + {"Value", Type, 0}, + {"ValueError", Type, 0}, + {"ValueOf", Func, 0}, + }, + "testing": { + {"(*B).Cleanup", Method, 14}, + {"(*B).Elapsed", Method, 20}, + {"(*B).Error", Method, 0}, + {"(*B).Errorf", Method, 0}, + {"(*B).Fail", Method, 0}, + {"(*B).FailNow", Method, 0}, + {"(*B).Failed", Method, 0}, + {"(*B).Fatal", Method, 0}, + {"(*B).Fatalf", Method, 0}, + {"(*B).Helper", Method, 9}, + {"(*B).Log", Method, 0}, + {"(*B).Logf", Method, 0}, + {"(*B).Name", Method, 8}, + {"(*B).ReportAllocs", Method, 1}, + {"(*B).ReportMetric", Method, 13}, + {"(*B).ResetTimer", Method, 0}, + {"(*B).Run", Method, 7}, + {"(*B).RunParallel", Method, 3}, + {"(*B).SetBytes", Method, 0}, + {"(*B).SetParallelism", Method, 3}, + {"(*B).Setenv", Method, 17}, + {"(*B).Skip", Method, 1}, + {"(*B).SkipNow", Method, 1}, + {"(*B).Skipf", Method, 1}, + {"(*B).Skipped", Method, 1}, + {"(*B).StartTimer", Method, 0}, + {"(*B).StopTimer", Method, 0}, + {"(*B).TempDir", Method, 15}, + {"(*F).Add", Method, 18}, + {"(*F).Cleanup", Method, 18}, + {"(*F).Error", Method, 18}, + {"(*F).Errorf", Method, 18}, + {"(*F).Fail", Method, 18}, + {"(*F).FailNow", Method, 18}, + {"(*F).Failed", Method, 18}, + {"(*F).Fatal", Method, 18}, + {"(*F).Fatalf", Method, 18}, + {"(*F).Fuzz", Method, 18}, + {"(*F).Helper", Method, 18}, + {"(*F).Log", Method, 18}, + {"(*F).Logf", Method, 18}, + {"(*F).Name", Method, 18}, + {"(*F).Setenv", Method, 18}, + {"(*F).Skip", Method, 18}, + {"(*F).SkipNow", Method, 18}, + {"(*F).Skipf", Method, 18}, + {"(*F).Skipped", Method, 18}, + {"(*F).TempDir", Method, 18}, + {"(*M).Run", Method, 4}, + {"(*PB).Next", Method, 3}, + {"(*T).Cleanup", Method, 14}, + {"(*T).Deadline", Method, 15}, + {"(*T).Error", Method, 0}, + {"(*T).Errorf", Method, 0}, + {"(*T).Fail", Method, 0}, + {"(*T).FailNow", Method, 0}, + {"(*T).Failed", Method, 0}, + {"(*T).Fatal", Method, 0}, + {"(*T).Fatalf", Method, 0}, + {"(*T).Helper", Method, 9}, + {"(*T).Log", Method, 0}, + {"(*T).Logf", Method, 0}, + {"(*T).Name", Method, 8}, + {"(*T).Parallel", Method, 0}, + {"(*T).Run", Method, 7}, + {"(*T).Setenv", Method, 17}, + {"(*T).Skip", Method, 1}, + {"(*T).SkipNow", Method, 1}, + {"(*T).Skipf", Method, 1}, + {"(*T).Skipped", Method, 1}, + {"(*T).TempDir", Method, 15}, + {"(BenchmarkResult).AllocedBytesPerOp", Method, 1}, + {"(BenchmarkResult).AllocsPerOp", Method, 1}, + {"(BenchmarkResult).MemString", Method, 1}, + {"(BenchmarkResult).NsPerOp", Method, 0}, + {"(BenchmarkResult).String", Method, 0}, + {"AllocsPerRun", Func, 1}, + {"B", Type, 0}, + {"B.N", Field, 0}, + {"Benchmark", Func, 0}, + {"BenchmarkResult", Type, 0}, + {"BenchmarkResult.Bytes", Field, 0}, + {"BenchmarkResult.Extra", Field, 13}, + {"BenchmarkResult.MemAllocs", Field, 1}, + {"BenchmarkResult.MemBytes", Field, 1}, + {"BenchmarkResult.N", Field, 0}, + {"BenchmarkResult.T", Field, 0}, + {"Cover", Type, 2}, + {"Cover.Blocks", Field, 2}, + {"Cover.Counters", Field, 2}, + {"Cover.CoveredPackages", Field, 2}, + {"Cover.Mode", Field, 2}, + {"CoverBlock", Type, 2}, + {"CoverBlock.Col0", Field, 2}, + {"CoverBlock.Col1", Field, 2}, + {"CoverBlock.Line0", Field, 2}, + {"CoverBlock.Line1", Field, 2}, + {"CoverBlock.Stmts", Field, 2}, + {"CoverMode", Func, 8}, + {"Coverage", Func, 4}, + {"F", Type, 18}, + {"Init", Func, 13}, + {"InternalBenchmark", Type, 0}, + {"InternalBenchmark.F", Field, 0}, + {"InternalBenchmark.Name", Field, 0}, + {"InternalExample", Type, 0}, + {"InternalExample.F", Field, 0}, + {"InternalExample.Name", Field, 0}, + {"InternalExample.Output", Field, 0}, + {"InternalExample.Unordered", Field, 7}, + {"InternalFuzzTarget", Type, 18}, + {"InternalFuzzTarget.Fn", Field, 18}, + {"InternalFuzzTarget.Name", Field, 18}, + {"InternalTest", Type, 0}, + {"InternalTest.F", Field, 0}, + {"InternalTest.Name", Field, 0}, + {"M", Type, 4}, + {"Main", Func, 0}, + {"MainStart", Func, 4}, + {"PB", Type, 3}, + {"RegisterCover", Func, 2}, + {"RunBenchmarks", Func, 0}, + {"RunExamples", Func, 0}, + {"RunTests", Func, 0}, + {"Short", Func, 0}, + {"T", Type, 0}, + {"TB", Type, 2}, + {"Testing", Func, 21}, + {"Verbose", Func, 1}, + }, + "testing/fstest": { + {"(MapFS).Glob", Method, 16}, + {"(MapFS).Open", Method, 16}, + {"(MapFS).ReadDir", Method, 16}, + {"(MapFS).ReadFile", Method, 16}, + {"(MapFS).Stat", Method, 16}, + {"(MapFS).Sub", Method, 16}, + {"MapFS", Type, 16}, + {"MapFile", Type, 16}, + {"MapFile.Data", Field, 16}, + {"MapFile.ModTime", Field, 16}, + {"MapFile.Mode", Field, 16}, + {"MapFile.Sys", Field, 16}, + {"TestFS", Func, 16}, + }, + "testing/iotest": { + {"DataErrReader", Func, 0}, + {"ErrReader", Func, 16}, + {"ErrTimeout", Var, 0}, + {"HalfReader", Func, 0}, + {"NewReadLogger", Func, 0}, + {"NewWriteLogger", Func, 0}, + {"OneByteReader", Func, 0}, + {"TestReader", Func, 16}, + {"TimeoutReader", Func, 0}, + {"TruncateWriter", Func, 0}, + }, + "testing/quick": { + {"(*CheckEqualError).Error", Method, 0}, + {"(*CheckError).Error", Method, 0}, + {"(SetupError).Error", Method, 0}, + {"Check", Func, 0}, + {"CheckEqual", Func, 0}, + {"CheckEqualError", Type, 0}, + {"CheckEqualError.CheckError", Field, 0}, + {"CheckEqualError.Out1", Field, 0}, + {"CheckEqualError.Out2", Field, 0}, + {"CheckError", Type, 0}, + {"CheckError.Count", Field, 0}, + {"CheckError.In", Field, 0}, + {"Config", Type, 0}, + {"Config.MaxCount", Field, 0}, + {"Config.MaxCountScale", Field, 0}, + {"Config.Rand", Field, 0}, + {"Config.Values", Field, 0}, + {"Generator", Type, 0}, + {"SetupError", Type, 0}, + {"Value", Func, 0}, + }, + "testing/slogtest": { + {"Run", Func, 22}, + {"TestHandler", Func, 21}, + }, + "text/scanner": { + {"(*Position).IsValid", Method, 0}, + {"(*Scanner).Init", Method, 0}, + {"(*Scanner).IsValid", Method, 0}, + {"(*Scanner).Next", Method, 0}, + {"(*Scanner).Peek", Method, 0}, + {"(*Scanner).Pos", Method, 0}, + {"(*Scanner).Scan", Method, 0}, + {"(*Scanner).TokenText", Method, 0}, + {"(Position).String", Method, 0}, + {"(Scanner).String", Method, 0}, + {"Char", Const, 0}, + {"Comment", Const, 0}, + {"EOF", Const, 0}, + {"Float", Const, 0}, + {"GoTokens", Const, 0}, + {"GoWhitespace", Const, 0}, + {"Ident", Const, 0}, + {"Int", Const, 0}, + {"Position", Type, 0}, + {"Position.Column", Field, 0}, + {"Position.Filename", Field, 0}, + {"Position.Line", Field, 0}, + {"Position.Offset", Field, 0}, + {"RawString", Const, 0}, + {"ScanChars", Const, 0}, + {"ScanComments", Const, 0}, + {"ScanFloats", Const, 0}, + {"ScanIdents", Const, 0}, + {"ScanInts", Const, 0}, + {"ScanRawStrings", Const, 0}, + {"ScanStrings", Const, 0}, + {"Scanner", Type, 0}, + {"Scanner.Error", Field, 0}, + {"Scanner.ErrorCount", Field, 0}, + {"Scanner.IsIdentRune", Field, 4}, + {"Scanner.Mode", Field, 0}, + {"Scanner.Position", Field, 0}, + {"Scanner.Whitespace", Field, 0}, + {"SkipComments", Const, 0}, + {"String", Const, 0}, + {"TokenString", Func, 0}, + }, + "text/tabwriter": { + {"(*Writer).Flush", Method, 0}, + {"(*Writer).Init", Method, 0}, + {"(*Writer).Write", Method, 0}, + {"AlignRight", Const, 0}, + {"Debug", Const, 0}, + {"DiscardEmptyColumns", Const, 0}, + {"Escape", Const, 0}, + {"FilterHTML", Const, 0}, + {"NewWriter", Func, 0}, + {"StripEscape", Const, 0}, + {"TabIndent", Const, 0}, + {"Writer", Type, 0}, + }, + "text/template": { + {"(*Template).AddParseTree", Method, 0}, + {"(*Template).Clone", Method, 0}, + {"(*Template).DefinedTemplates", Method, 5}, + {"(*Template).Delims", Method, 0}, + {"(*Template).Execute", Method, 0}, + {"(*Template).ExecuteTemplate", Method, 0}, + {"(*Template).Funcs", Method, 0}, + {"(*Template).Lookup", Method, 0}, + {"(*Template).Name", Method, 0}, + {"(*Template).New", Method, 0}, + {"(*Template).Option", Method, 5}, + {"(*Template).Parse", Method, 0}, + {"(*Template).ParseFS", Method, 16}, + {"(*Template).ParseFiles", Method, 0}, + {"(*Template).ParseGlob", Method, 0}, + {"(*Template).Templates", Method, 0}, + {"(ExecError).Error", Method, 6}, + {"(ExecError).Unwrap", Method, 13}, + {"(Template).Copy", Method, 2}, + {"(Template).ErrorContext", Method, 1}, + {"ExecError", Type, 6}, + {"ExecError.Err", Field, 6}, + {"ExecError.Name", Field, 6}, + {"FuncMap", Type, 0}, + {"HTMLEscape", Func, 0}, + {"HTMLEscapeString", Func, 0}, + {"HTMLEscaper", Func, 0}, + {"IsTrue", Func, 6}, + {"JSEscape", Func, 0}, + {"JSEscapeString", Func, 0}, + {"JSEscaper", Func, 0}, + {"Must", Func, 0}, + {"New", Func, 0}, + {"ParseFS", Func, 16}, + {"ParseFiles", Func, 0}, + {"ParseGlob", Func, 0}, + {"Template", Type, 0}, + {"Template.Tree", Field, 0}, + {"URLQueryEscaper", Func, 0}, + }, + "text/template/parse": { + {"(*ActionNode).Copy", Method, 0}, + {"(*ActionNode).String", Method, 0}, + {"(*BoolNode).Copy", Method, 0}, + {"(*BoolNode).String", Method, 0}, + {"(*BranchNode).Copy", Method, 4}, + {"(*BranchNode).String", Method, 0}, + {"(*BreakNode).Copy", Method, 18}, + {"(*BreakNode).String", Method, 18}, + {"(*ChainNode).Add", Method, 1}, + {"(*ChainNode).Copy", Method, 1}, + {"(*ChainNode).String", Method, 1}, + {"(*CommandNode).Copy", Method, 0}, + {"(*CommandNode).String", Method, 0}, + {"(*CommentNode).Copy", Method, 16}, + {"(*CommentNode).String", Method, 16}, + {"(*ContinueNode).Copy", Method, 18}, + {"(*ContinueNode).String", Method, 18}, + {"(*DotNode).Copy", Method, 0}, + {"(*DotNode).String", Method, 0}, + {"(*DotNode).Type", Method, 0}, + {"(*FieldNode).Copy", Method, 0}, + {"(*FieldNode).String", Method, 0}, + {"(*IdentifierNode).Copy", Method, 0}, + {"(*IdentifierNode).SetPos", Method, 1}, + {"(*IdentifierNode).SetTree", Method, 4}, + {"(*IdentifierNode).String", Method, 0}, + {"(*IfNode).Copy", Method, 0}, + {"(*IfNode).String", Method, 0}, + {"(*ListNode).Copy", Method, 0}, + {"(*ListNode).CopyList", Method, 0}, + {"(*ListNode).String", Method, 0}, + {"(*NilNode).Copy", Method, 1}, + {"(*NilNode).String", Method, 1}, + {"(*NilNode).Type", Method, 1}, + {"(*NumberNode).Copy", Method, 0}, + {"(*NumberNode).String", Method, 0}, + {"(*PipeNode).Copy", Method, 0}, + {"(*PipeNode).CopyPipe", Method, 0}, + {"(*PipeNode).String", Method, 0}, + {"(*RangeNode).Copy", Method, 0}, + {"(*RangeNode).String", Method, 0}, + {"(*StringNode).Copy", Method, 0}, + {"(*StringNode).String", Method, 0}, + {"(*TemplateNode).Copy", Method, 0}, + {"(*TemplateNode).String", Method, 0}, + {"(*TextNode).Copy", Method, 0}, + {"(*TextNode).String", Method, 0}, + {"(*Tree).Copy", Method, 2}, + {"(*Tree).ErrorContext", Method, 1}, + {"(*Tree).Parse", Method, 0}, + {"(*VariableNode).Copy", Method, 0}, + {"(*VariableNode).String", Method, 0}, + {"(*WithNode).Copy", Method, 0}, + {"(*WithNode).String", Method, 0}, + {"(ActionNode).Position", Method, 1}, + {"(ActionNode).Type", Method, 0}, + {"(BoolNode).Position", Method, 1}, + {"(BoolNode).Type", Method, 0}, + {"(BranchNode).Position", Method, 1}, + {"(BranchNode).Type", Method, 0}, + {"(BreakNode).Position", Method, 18}, + {"(BreakNode).Type", Method, 18}, + {"(ChainNode).Position", Method, 1}, + {"(ChainNode).Type", Method, 1}, + {"(CommandNode).Position", Method, 1}, + {"(CommandNode).Type", Method, 0}, + {"(CommentNode).Position", Method, 16}, + {"(CommentNode).Type", Method, 16}, + {"(ContinueNode).Position", Method, 18}, + {"(ContinueNode).Type", Method, 18}, + {"(DotNode).Position", Method, 1}, + {"(FieldNode).Position", Method, 1}, + {"(FieldNode).Type", Method, 0}, + {"(IdentifierNode).Position", Method, 1}, + {"(IdentifierNode).Type", Method, 0}, + {"(IfNode).Position", Method, 1}, + {"(IfNode).Type", Method, 0}, + {"(ListNode).Position", Method, 1}, + {"(ListNode).Type", Method, 0}, + {"(NilNode).Position", Method, 1}, + {"(NodeType).Type", Method, 0}, + {"(NumberNode).Position", Method, 1}, + {"(NumberNode).Type", Method, 0}, + {"(PipeNode).Position", Method, 1}, + {"(PipeNode).Type", Method, 0}, + {"(Pos).Position", Method, 1}, + {"(RangeNode).Position", Method, 1}, + {"(RangeNode).Type", Method, 0}, + {"(StringNode).Position", Method, 1}, + {"(StringNode).Type", Method, 0}, + {"(TemplateNode).Position", Method, 1}, + {"(TemplateNode).Type", Method, 0}, + {"(TextNode).Position", Method, 1}, + {"(TextNode).Type", Method, 0}, + {"(VariableNode).Position", Method, 1}, + {"(VariableNode).Type", Method, 0}, + {"(WithNode).Position", Method, 1}, + {"(WithNode).Type", Method, 0}, + {"ActionNode", Type, 0}, + {"ActionNode.Line", Field, 0}, + {"ActionNode.NodeType", Field, 0}, + {"ActionNode.Pipe", Field, 0}, + {"ActionNode.Pos", Field, 1}, + {"BoolNode", Type, 0}, + {"BoolNode.NodeType", Field, 0}, + {"BoolNode.Pos", Field, 1}, + {"BoolNode.True", Field, 0}, + {"BranchNode", Type, 0}, + {"BranchNode.ElseList", Field, 0}, + {"BranchNode.Line", Field, 0}, + {"BranchNode.List", Field, 0}, + {"BranchNode.NodeType", Field, 0}, + {"BranchNode.Pipe", Field, 0}, + {"BranchNode.Pos", Field, 1}, + {"BreakNode", Type, 18}, + {"BreakNode.Line", Field, 18}, + {"BreakNode.NodeType", Field, 18}, + {"BreakNode.Pos", Field, 18}, + {"ChainNode", Type, 1}, + {"ChainNode.Field", Field, 1}, + {"ChainNode.Node", Field, 1}, + {"ChainNode.NodeType", Field, 1}, + {"ChainNode.Pos", Field, 1}, + {"CommandNode", Type, 0}, + {"CommandNode.Args", Field, 0}, + {"CommandNode.NodeType", Field, 0}, + {"CommandNode.Pos", Field, 1}, + {"CommentNode", Type, 16}, + {"CommentNode.NodeType", Field, 16}, + {"CommentNode.Pos", Field, 16}, + {"CommentNode.Text", Field, 16}, + {"ContinueNode", Type, 18}, + {"ContinueNode.Line", Field, 18}, + {"ContinueNode.NodeType", Field, 18}, + {"ContinueNode.Pos", Field, 18}, + {"DotNode", Type, 0}, + {"DotNode.NodeType", Field, 4}, + {"DotNode.Pos", Field, 1}, + {"FieldNode", Type, 0}, + {"FieldNode.Ident", Field, 0}, + {"FieldNode.NodeType", Field, 0}, + {"FieldNode.Pos", Field, 1}, + {"IdentifierNode", Type, 0}, + {"IdentifierNode.Ident", Field, 0}, + {"IdentifierNode.NodeType", Field, 0}, + {"IdentifierNode.Pos", Field, 1}, + {"IfNode", Type, 0}, + {"IfNode.BranchNode", Field, 0}, + {"IsEmptyTree", Func, 0}, + {"ListNode", Type, 0}, + {"ListNode.NodeType", Field, 0}, + {"ListNode.Nodes", Field, 0}, + {"ListNode.Pos", Field, 1}, + {"Mode", Type, 16}, + {"New", Func, 0}, + {"NewIdentifier", Func, 0}, + {"NilNode", Type, 1}, + {"NilNode.NodeType", Field, 4}, + {"NilNode.Pos", Field, 1}, + {"Node", Type, 0}, + {"NodeAction", Const, 0}, + {"NodeBool", Const, 0}, + {"NodeBreak", Const, 18}, + {"NodeChain", Const, 1}, + {"NodeCommand", Const, 0}, + {"NodeComment", Const, 16}, + {"NodeContinue", Const, 18}, + {"NodeDot", Const, 0}, + {"NodeField", Const, 0}, + {"NodeIdentifier", Const, 0}, + {"NodeIf", Const, 0}, + {"NodeList", Const, 0}, + {"NodeNil", Const, 1}, + {"NodeNumber", Const, 0}, + {"NodePipe", Const, 0}, + {"NodeRange", Const, 0}, + {"NodeString", Const, 0}, + {"NodeTemplate", Const, 0}, + {"NodeText", Const, 0}, + {"NodeType", Type, 0}, + {"NodeVariable", Const, 0}, + {"NodeWith", Const, 0}, + {"NumberNode", Type, 0}, + {"NumberNode.Complex128", Field, 0}, + {"NumberNode.Float64", Field, 0}, + {"NumberNode.Int64", Field, 0}, + {"NumberNode.IsComplex", Field, 0}, + {"NumberNode.IsFloat", Field, 0}, + {"NumberNode.IsInt", Field, 0}, + {"NumberNode.IsUint", Field, 0}, + {"NumberNode.NodeType", Field, 0}, + {"NumberNode.Pos", Field, 1}, + {"NumberNode.Text", Field, 0}, + {"NumberNode.Uint64", Field, 0}, + {"Parse", Func, 0}, + {"ParseComments", Const, 16}, + {"PipeNode", Type, 0}, + {"PipeNode.Cmds", Field, 0}, + {"PipeNode.Decl", Field, 0}, + {"PipeNode.IsAssign", Field, 11}, + {"PipeNode.Line", Field, 0}, + {"PipeNode.NodeType", Field, 0}, + {"PipeNode.Pos", Field, 1}, + {"Pos", Type, 1}, + {"RangeNode", Type, 0}, + {"RangeNode.BranchNode", Field, 0}, + {"SkipFuncCheck", Const, 17}, + {"StringNode", Type, 0}, + {"StringNode.NodeType", Field, 0}, + {"StringNode.Pos", Field, 1}, + {"StringNode.Quoted", Field, 0}, + {"StringNode.Text", Field, 0}, + {"TemplateNode", Type, 0}, + {"TemplateNode.Line", Field, 0}, + {"TemplateNode.Name", Field, 0}, + {"TemplateNode.NodeType", Field, 0}, + {"TemplateNode.Pipe", Field, 0}, + {"TemplateNode.Pos", Field, 1}, + {"TextNode", Type, 0}, + {"TextNode.NodeType", Field, 0}, + {"TextNode.Pos", Field, 1}, + {"TextNode.Text", Field, 0}, + {"Tree", Type, 0}, + {"Tree.Mode", Field, 16}, + {"Tree.Name", Field, 0}, + {"Tree.ParseName", Field, 1}, + {"Tree.Root", Field, 0}, + {"VariableNode", Type, 0}, + {"VariableNode.Ident", Field, 0}, + {"VariableNode.NodeType", Field, 0}, + {"VariableNode.Pos", Field, 1}, + {"WithNode", Type, 0}, + {"WithNode.BranchNode", Field, 0}, + }, + "time": { + {"(*Location).String", Method, 0}, + {"(*ParseError).Error", Method, 0}, + {"(*Ticker).Reset", Method, 15}, + {"(*Ticker).Stop", Method, 0}, + {"(*Time).GobDecode", Method, 0}, + {"(*Time).UnmarshalBinary", Method, 2}, + {"(*Time).UnmarshalJSON", Method, 0}, + {"(*Time).UnmarshalText", Method, 2}, + {"(*Timer).Reset", Method, 1}, + {"(*Timer).Stop", Method, 0}, + {"(Duration).Abs", Method, 19}, + {"(Duration).Hours", Method, 0}, + {"(Duration).Microseconds", Method, 13}, + {"(Duration).Milliseconds", Method, 13}, + {"(Duration).Minutes", Method, 0}, + {"(Duration).Nanoseconds", Method, 0}, + {"(Duration).Round", Method, 9}, + {"(Duration).Seconds", Method, 0}, + {"(Duration).String", Method, 0}, + {"(Duration).Truncate", Method, 9}, + {"(Month).String", Method, 0}, + {"(Time).Add", Method, 0}, + {"(Time).AddDate", Method, 0}, + {"(Time).After", Method, 0}, + {"(Time).AppendFormat", Method, 5}, + {"(Time).Before", Method, 0}, + {"(Time).Clock", Method, 0}, + {"(Time).Compare", Method, 20}, + {"(Time).Date", Method, 0}, + {"(Time).Day", Method, 0}, + {"(Time).Equal", Method, 0}, + {"(Time).Format", Method, 0}, + {"(Time).GoString", Method, 17}, + {"(Time).GobEncode", Method, 0}, + {"(Time).Hour", Method, 0}, + {"(Time).ISOWeek", Method, 0}, + {"(Time).In", Method, 0}, + {"(Time).IsDST", Method, 17}, + {"(Time).IsZero", Method, 0}, + {"(Time).Local", Method, 0}, + {"(Time).Location", Method, 0}, + {"(Time).MarshalBinary", Method, 2}, + {"(Time).MarshalJSON", Method, 0}, + {"(Time).MarshalText", Method, 2}, + {"(Time).Minute", Method, 0}, + {"(Time).Month", Method, 0}, + {"(Time).Nanosecond", Method, 0}, + {"(Time).Round", Method, 1}, + {"(Time).Second", Method, 0}, + {"(Time).String", Method, 0}, + {"(Time).Sub", Method, 0}, + {"(Time).Truncate", Method, 1}, + {"(Time).UTC", Method, 0}, + {"(Time).Unix", Method, 0}, + {"(Time).UnixMicro", Method, 17}, + {"(Time).UnixMilli", Method, 17}, + {"(Time).UnixNano", Method, 0}, + {"(Time).Weekday", Method, 0}, + {"(Time).Year", Method, 0}, + {"(Time).YearDay", Method, 1}, + {"(Time).Zone", Method, 0}, + {"(Time).ZoneBounds", Method, 19}, + {"(Weekday).String", Method, 0}, + {"ANSIC", Const, 0}, + {"After", Func, 0}, + {"AfterFunc", Func, 0}, + {"April", Const, 0}, + {"August", Const, 0}, + {"Date", Func, 0}, + {"DateOnly", Const, 20}, + {"DateTime", Const, 20}, + {"December", Const, 0}, + {"Duration", Type, 0}, + {"February", Const, 0}, + {"FixedZone", Func, 0}, + {"Friday", Const, 0}, + {"Hour", Const, 0}, + {"January", Const, 0}, + {"July", Const, 0}, + {"June", Const, 0}, + {"Kitchen", Const, 0}, + {"Layout", Const, 17}, + {"LoadLocation", Func, 0}, + {"LoadLocationFromTZData", Func, 10}, + {"Local", Var, 0}, + {"Location", Type, 0}, + {"March", Const, 0}, + {"May", Const, 0}, + {"Microsecond", Const, 0}, + {"Millisecond", Const, 0}, + {"Minute", Const, 0}, + {"Monday", Const, 0}, + {"Month", Type, 0}, + {"Nanosecond", Const, 0}, + {"NewTicker", Func, 0}, + {"NewTimer", Func, 0}, + {"November", Const, 0}, + {"Now", Func, 0}, + {"October", Const, 0}, + {"Parse", Func, 0}, + {"ParseDuration", Func, 0}, + {"ParseError", Type, 0}, + {"ParseError.Layout", Field, 0}, + {"ParseError.LayoutElem", Field, 0}, + {"ParseError.Message", Field, 0}, + {"ParseError.Value", Field, 0}, + {"ParseError.ValueElem", Field, 0}, + {"ParseInLocation", Func, 1}, + {"RFC1123", Const, 0}, + {"RFC1123Z", Const, 0}, + {"RFC3339", Const, 0}, + {"RFC3339Nano", Const, 0}, + {"RFC822", Const, 0}, + {"RFC822Z", Const, 0}, + {"RFC850", Const, 0}, + {"RubyDate", Const, 0}, + {"Saturday", Const, 0}, + {"Second", Const, 0}, + {"September", Const, 0}, + {"Since", Func, 0}, + {"Sleep", Func, 0}, + {"Stamp", Const, 0}, + {"StampMicro", Const, 0}, + {"StampMilli", Const, 0}, + {"StampNano", Const, 0}, + {"Sunday", Const, 0}, + {"Thursday", Const, 0}, + {"Tick", Func, 0}, + {"Ticker", Type, 0}, + {"Ticker.C", Field, 0}, + {"Time", Type, 0}, + {"TimeOnly", Const, 20}, + {"Timer", Type, 0}, + {"Timer.C", Field, 0}, + {"Tuesday", Const, 0}, + {"UTC", Var, 0}, + {"Unix", Func, 0}, + {"UnixDate", Const, 0}, + {"UnixMicro", Func, 17}, + {"UnixMilli", Func, 17}, + {"Until", Func, 8}, + {"Wednesday", Const, 0}, + {"Weekday", Type, 0}, + }, + "unicode": { + {"(SpecialCase).ToLower", Method, 0}, + {"(SpecialCase).ToTitle", Method, 0}, + {"(SpecialCase).ToUpper", Method, 0}, + {"ASCII_Hex_Digit", Var, 0}, + {"Adlam", Var, 7}, + {"Ahom", Var, 5}, + {"Anatolian_Hieroglyphs", Var, 5}, + {"Arabic", Var, 0}, + {"Armenian", Var, 0}, + {"Avestan", Var, 0}, + {"AzeriCase", Var, 0}, + {"Balinese", Var, 0}, + {"Bamum", Var, 0}, + {"Bassa_Vah", Var, 4}, + {"Batak", Var, 0}, + {"Bengali", Var, 0}, + {"Bhaiksuki", Var, 7}, + {"Bidi_Control", Var, 0}, + {"Bopomofo", Var, 0}, + {"Brahmi", Var, 0}, + {"Braille", Var, 0}, + {"Buginese", Var, 0}, + {"Buhid", Var, 0}, + {"C", Var, 0}, + {"Canadian_Aboriginal", Var, 0}, + {"Carian", Var, 0}, + {"CaseRange", Type, 0}, + {"CaseRange.Delta", Field, 0}, + {"CaseRange.Hi", Field, 0}, + {"CaseRange.Lo", Field, 0}, + {"CaseRanges", Var, 0}, + {"Categories", Var, 0}, + {"Caucasian_Albanian", Var, 4}, + {"Cc", Var, 0}, + {"Cf", Var, 0}, + {"Chakma", Var, 1}, + {"Cham", Var, 0}, + {"Cherokee", Var, 0}, + {"Chorasmian", Var, 16}, + {"Co", Var, 0}, + {"Common", Var, 0}, + {"Coptic", Var, 0}, + {"Cs", Var, 0}, + {"Cuneiform", Var, 0}, + {"Cypriot", Var, 0}, + {"Cypro_Minoan", Var, 21}, + {"Cyrillic", Var, 0}, + {"Dash", Var, 0}, + {"Deprecated", Var, 0}, + {"Deseret", Var, 0}, + {"Devanagari", Var, 0}, + {"Diacritic", Var, 0}, + {"Digit", Var, 0}, + {"Dives_Akuru", Var, 16}, + {"Dogra", Var, 13}, + {"Duployan", Var, 4}, + {"Egyptian_Hieroglyphs", Var, 0}, + {"Elbasan", Var, 4}, + {"Elymaic", Var, 14}, + {"Ethiopic", Var, 0}, + {"Extender", Var, 0}, + {"FoldCategory", Var, 0}, + {"FoldScript", Var, 0}, + {"Georgian", Var, 0}, + {"Glagolitic", Var, 0}, + {"Gothic", Var, 0}, + {"Grantha", Var, 4}, + {"GraphicRanges", Var, 0}, + {"Greek", Var, 0}, + {"Gujarati", Var, 0}, + {"Gunjala_Gondi", Var, 13}, + {"Gurmukhi", Var, 0}, + {"Han", Var, 0}, + {"Hangul", Var, 0}, + {"Hanifi_Rohingya", Var, 13}, + {"Hanunoo", Var, 0}, + {"Hatran", Var, 5}, + {"Hebrew", Var, 0}, + {"Hex_Digit", Var, 0}, + {"Hiragana", Var, 0}, + {"Hyphen", Var, 0}, + {"IDS_Binary_Operator", Var, 0}, + {"IDS_Trinary_Operator", Var, 0}, + {"Ideographic", Var, 0}, + {"Imperial_Aramaic", Var, 0}, + {"In", Func, 2}, + {"Inherited", Var, 0}, + {"Inscriptional_Pahlavi", Var, 0}, + {"Inscriptional_Parthian", Var, 0}, + {"Is", Func, 0}, + {"IsControl", Func, 0}, + {"IsDigit", Func, 0}, + {"IsGraphic", Func, 0}, + {"IsLetter", Func, 0}, + {"IsLower", Func, 0}, + {"IsMark", Func, 0}, + {"IsNumber", Func, 0}, + {"IsOneOf", Func, 0}, + {"IsPrint", Func, 0}, + {"IsPunct", Func, 0}, + {"IsSpace", Func, 0}, + {"IsSymbol", Func, 0}, + {"IsTitle", Func, 0}, + {"IsUpper", Func, 0}, + {"Javanese", Var, 0}, + {"Join_Control", Var, 0}, + {"Kaithi", Var, 0}, + {"Kannada", Var, 0}, + {"Katakana", Var, 0}, + {"Kawi", Var, 21}, + {"Kayah_Li", Var, 0}, + {"Kharoshthi", Var, 0}, + {"Khitan_Small_Script", Var, 16}, + {"Khmer", Var, 0}, + {"Khojki", Var, 4}, + {"Khudawadi", Var, 4}, + {"L", Var, 0}, + {"Lao", Var, 0}, + {"Latin", Var, 0}, + {"Lepcha", Var, 0}, + {"Letter", Var, 0}, + {"Limbu", Var, 0}, + {"Linear_A", Var, 4}, + {"Linear_B", Var, 0}, + {"Lisu", Var, 0}, + {"Ll", Var, 0}, + {"Lm", Var, 0}, + {"Lo", Var, 0}, + {"Logical_Order_Exception", Var, 0}, + {"Lower", Var, 0}, + {"LowerCase", Const, 0}, + {"Lt", Var, 0}, + {"Lu", Var, 0}, + {"Lycian", Var, 0}, + {"Lydian", Var, 0}, + {"M", Var, 0}, + {"Mahajani", Var, 4}, + {"Makasar", Var, 13}, + {"Malayalam", Var, 0}, + {"Mandaic", Var, 0}, + {"Manichaean", Var, 4}, + {"Marchen", Var, 7}, + {"Mark", Var, 0}, + {"Masaram_Gondi", Var, 10}, + {"MaxASCII", Const, 0}, + {"MaxCase", Const, 0}, + {"MaxLatin1", Const, 0}, + {"MaxRune", Const, 0}, + {"Mc", Var, 0}, + {"Me", Var, 0}, + {"Medefaidrin", Var, 13}, + {"Meetei_Mayek", Var, 0}, + {"Mende_Kikakui", Var, 4}, + {"Meroitic_Cursive", Var, 1}, + {"Meroitic_Hieroglyphs", Var, 1}, + {"Miao", Var, 1}, + {"Mn", Var, 0}, + {"Modi", Var, 4}, + {"Mongolian", Var, 0}, + {"Mro", Var, 4}, + {"Multani", Var, 5}, + {"Myanmar", Var, 0}, + {"N", Var, 0}, + {"Nabataean", Var, 4}, + {"Nag_Mundari", Var, 21}, + {"Nandinagari", Var, 14}, + {"Nd", Var, 0}, + {"New_Tai_Lue", Var, 0}, + {"Newa", Var, 7}, + {"Nko", Var, 0}, + {"Nl", Var, 0}, + {"No", Var, 0}, + {"Noncharacter_Code_Point", Var, 0}, + {"Number", Var, 0}, + {"Nushu", Var, 10}, + {"Nyiakeng_Puachue_Hmong", Var, 14}, + {"Ogham", Var, 0}, + {"Ol_Chiki", Var, 0}, + {"Old_Hungarian", Var, 5}, + {"Old_Italic", Var, 0}, + {"Old_North_Arabian", Var, 4}, + {"Old_Permic", Var, 4}, + {"Old_Persian", Var, 0}, + {"Old_Sogdian", Var, 13}, + {"Old_South_Arabian", Var, 0}, + {"Old_Turkic", Var, 0}, + {"Old_Uyghur", Var, 21}, + {"Oriya", Var, 0}, + {"Osage", Var, 7}, + {"Osmanya", Var, 0}, + {"Other", Var, 0}, + {"Other_Alphabetic", Var, 0}, + {"Other_Default_Ignorable_Code_Point", Var, 0}, + {"Other_Grapheme_Extend", Var, 0}, + {"Other_ID_Continue", Var, 0}, + {"Other_ID_Start", Var, 0}, + {"Other_Lowercase", Var, 0}, + {"Other_Math", Var, 0}, + {"Other_Uppercase", Var, 0}, + {"P", Var, 0}, + {"Pahawh_Hmong", Var, 4}, + {"Palmyrene", Var, 4}, + {"Pattern_Syntax", Var, 0}, + {"Pattern_White_Space", Var, 0}, + {"Pau_Cin_Hau", Var, 4}, + {"Pc", Var, 0}, + {"Pd", Var, 0}, + {"Pe", Var, 0}, + {"Pf", Var, 0}, + {"Phags_Pa", Var, 0}, + {"Phoenician", Var, 0}, + {"Pi", Var, 0}, + {"Po", Var, 0}, + {"Prepended_Concatenation_Mark", Var, 7}, + {"PrintRanges", Var, 0}, + {"Properties", Var, 0}, + {"Ps", Var, 0}, + {"Psalter_Pahlavi", Var, 4}, + {"Punct", Var, 0}, + {"Quotation_Mark", Var, 0}, + {"Radical", Var, 0}, + {"Range16", Type, 0}, + {"Range16.Hi", Field, 0}, + {"Range16.Lo", Field, 0}, + {"Range16.Stride", Field, 0}, + {"Range32", Type, 0}, + {"Range32.Hi", Field, 0}, + {"Range32.Lo", Field, 0}, + {"Range32.Stride", Field, 0}, + {"RangeTable", Type, 0}, + {"RangeTable.LatinOffset", Field, 1}, + {"RangeTable.R16", Field, 0}, + {"RangeTable.R32", Field, 0}, + {"Regional_Indicator", Var, 10}, + {"Rejang", Var, 0}, + {"ReplacementChar", Const, 0}, + {"Runic", Var, 0}, + {"S", Var, 0}, + {"STerm", Var, 0}, + {"Samaritan", Var, 0}, + {"Saurashtra", Var, 0}, + {"Sc", Var, 0}, + {"Scripts", Var, 0}, + {"Sentence_Terminal", Var, 7}, + {"Sharada", Var, 1}, + {"Shavian", Var, 0}, + {"Siddham", Var, 4}, + {"SignWriting", Var, 5}, + {"SimpleFold", Func, 0}, + {"Sinhala", Var, 0}, + {"Sk", Var, 0}, + {"Sm", Var, 0}, + {"So", Var, 0}, + {"Soft_Dotted", Var, 0}, + {"Sogdian", Var, 13}, + {"Sora_Sompeng", Var, 1}, + {"Soyombo", Var, 10}, + {"Space", Var, 0}, + {"SpecialCase", Type, 0}, + {"Sundanese", Var, 0}, + {"Syloti_Nagri", Var, 0}, + {"Symbol", Var, 0}, + {"Syriac", Var, 0}, + {"Tagalog", Var, 0}, + {"Tagbanwa", Var, 0}, + {"Tai_Le", Var, 0}, + {"Tai_Tham", Var, 0}, + {"Tai_Viet", Var, 0}, + {"Takri", Var, 1}, + {"Tamil", Var, 0}, + {"Tangsa", Var, 21}, + {"Tangut", Var, 7}, + {"Telugu", Var, 0}, + {"Terminal_Punctuation", Var, 0}, + {"Thaana", Var, 0}, + {"Thai", Var, 0}, + {"Tibetan", Var, 0}, + {"Tifinagh", Var, 0}, + {"Tirhuta", Var, 4}, + {"Title", Var, 0}, + {"TitleCase", Const, 0}, + {"To", Func, 0}, + {"ToLower", Func, 0}, + {"ToTitle", Func, 0}, + {"ToUpper", Func, 0}, + {"Toto", Var, 21}, + {"TurkishCase", Var, 0}, + {"Ugaritic", Var, 0}, + {"Unified_Ideograph", Var, 0}, + {"Upper", Var, 0}, + {"UpperCase", Const, 0}, + {"UpperLower", Const, 0}, + {"Vai", Var, 0}, + {"Variation_Selector", Var, 0}, + {"Version", Const, 0}, + {"Vithkuqi", Var, 21}, + {"Wancho", Var, 14}, + {"Warang_Citi", Var, 4}, + {"White_Space", Var, 0}, + {"Yezidi", Var, 16}, + {"Yi", Var, 0}, + {"Z", Var, 0}, + {"Zanabazar_Square", Var, 10}, + {"Zl", Var, 0}, + {"Zp", Var, 0}, + {"Zs", Var, 0}, + }, + "unicode/utf16": { + {"AppendRune", Func, 20}, + {"Decode", Func, 0}, + {"DecodeRune", Func, 0}, + {"Encode", Func, 0}, + {"EncodeRune", Func, 0}, + {"IsSurrogate", Func, 0}, + {"RuneLen", Func, 23}, + }, + "unicode/utf8": { + {"AppendRune", Func, 18}, + {"DecodeLastRune", Func, 0}, + {"DecodeLastRuneInString", Func, 0}, + {"DecodeRune", Func, 0}, + {"DecodeRuneInString", Func, 0}, + {"EncodeRune", Func, 0}, + {"FullRune", Func, 0}, + {"FullRuneInString", Func, 0}, + {"MaxRune", Const, 0}, + {"RuneCount", Func, 0}, + {"RuneCountInString", Func, 0}, + {"RuneError", Const, 0}, + {"RuneLen", Func, 0}, + {"RuneSelf", Const, 0}, + {"RuneStart", Func, 0}, + {"UTFMax", Const, 0}, + {"Valid", Func, 0}, + {"ValidRune", Func, 1}, + {"ValidString", Func, 0}, + }, + "unique": { + {"(Handle).Value", Method, 23}, + {"Handle", Type, 23}, + {"Make", Func, 23}, + }, + "unsafe": { + {"Add", Func, 0}, + {"Alignof", Func, 0}, + {"Offsetof", Func, 0}, + {"Pointer", Type, 0}, + {"Sizeof", Func, 0}, + {"Slice", Func, 0}, + {"SliceData", Func, 0}, + {"String", Func, 0}, + {"StringData", Func, 0}, + }, +} diff --git a/vendor/golang.org/x/tools/internal/stdlib/stdlib.go b/vendor/golang.org/x/tools/internal/stdlib/stdlib.go new file mode 100644 index 00000000..98904017 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/stdlib/stdlib.go @@ -0,0 +1,97 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run generate.go + +// Package stdlib provides a table of all exported symbols in the +// standard library, along with the version at which they first +// appeared. +package stdlib + +import ( + "fmt" + "strings" +) + +type Symbol struct { + Name string + Kind Kind + Version Version // Go version that first included the symbol +} + +// A Kind indicates the kind of a symbol: +// function, variable, constant, type, and so on. +type Kind int8 + +const ( + Invalid Kind = iota // Example name: + Type // "Buffer" + Func // "Println" + Var // "EOF" + Const // "Pi" + Field // "Point.X" + Method // "(*Buffer).Grow" +) + +func (kind Kind) String() string { + return [...]string{ + Invalid: "invalid", + Type: "type", + Func: "func", + Var: "var", + Const: "const", + Field: "field", + Method: "method", + }[kind] +} + +// A Version represents a version of Go of the form "go1.%d". +type Version int8 + +// String returns a version string of the form "go1.23", without allocating. +func (v Version) String() string { return versions[v] } + +var versions [30]string // (increase constant as needed) + +func init() { + for i := range versions { + versions[i] = fmt.Sprintf("go1.%d", i) + } +} + +// HasPackage reports whether the specified package path is part of +// the standard library's public API. +func HasPackage(path string) bool { + _, ok := PackageSymbols[path] + return ok +} + +// SplitField splits the field symbol name into type and field +// components. It must be called only on Field symbols. +// +// Example: "File.Package" -> ("File", "Package") +func (sym *Symbol) SplitField() (typename, name string) { + if sym.Kind != Field { + panic("not a field") + } + typename, name, _ = strings.Cut(sym.Name, ".") + return +} + +// SplitMethod splits the method symbol name into pointer, receiver, +// and method components. It must be called only on Method symbols. +// +// Example: "(*Buffer).Grow" -> (true, "Buffer", "Grow") +func (sym *Symbol) SplitMethod() (ptr bool, recv, name string) { + if sym.Kind != Method { + panic("not a method") + } + recv, name, _ = strings.Cut(sym.Name, ".") + recv = recv[len("(") : len(recv)-len(")")] + ptr = recv[0] == '*' + if ptr { + recv = recv[len("*"):] + } + return +} diff --git a/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go b/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go deleted file mode 100644 index 7e638ec2..00000000 --- a/vendor/golang.org/x/tools/internal/tokeninternal/tokeninternal.go +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// package tokeninternal provides access to some internal features of the token -// package. -package tokeninternal - -import ( - "fmt" - "go/token" - "sort" - "sync" - "unsafe" -) - -// GetLines returns the table of line-start offsets from a token.File. -func GetLines(file *token.File) []int { - // token.File has a Lines method on Go 1.21 and later. - if file, ok := (interface{})(file).(interface{ Lines() []int }); ok { - return file.Lines() - } - - // This declaration must match that of token.File. - // This creates a risk of dependency skew. - // For now we check that the size of the two - // declarations is the same, on the (fragile) assumption - // that future changes would add fields. - type tokenFile119 struct { - _ string - _ int - _ int - mu sync.Mutex // we're not complete monsters - lines []int - _ []struct{} - } - type tokenFile118 struct { - _ *token.FileSet // deleted in go1.19 - tokenFile119 - } - - type uP = unsafe.Pointer - switch unsafe.Sizeof(*file) { - case unsafe.Sizeof(tokenFile118{}): - var ptr *tokenFile118 - *(*uP)(uP(&ptr)) = uP(file) - ptr.mu.Lock() - defer ptr.mu.Unlock() - return ptr.lines - - case unsafe.Sizeof(tokenFile119{}): - var ptr *tokenFile119 - *(*uP)(uP(&ptr)) = uP(file) - ptr.mu.Lock() - defer ptr.mu.Unlock() - return ptr.lines - - default: - panic("unexpected token.File size") - } -} - -// AddExistingFiles adds the specified files to the FileSet if they -// are not already present. It panics if any pair of files in the -// resulting FileSet would overlap. -func AddExistingFiles(fset *token.FileSet, files []*token.File) { - // Punch through the FileSet encapsulation. - type tokenFileSet struct { - // This type remained essentially consistent from go1.16 to go1.21. - mutex sync.RWMutex - base int - files []*token.File - _ *token.File // changed to atomic.Pointer[token.File] in go1.19 - } - - // If the size of token.FileSet changes, this will fail to compile. - const delta = int64(unsafe.Sizeof(tokenFileSet{})) - int64(unsafe.Sizeof(token.FileSet{})) - var _ [-delta * delta]int - - type uP = unsafe.Pointer - var ptr *tokenFileSet - *(*uP)(uP(&ptr)) = uP(fset) - ptr.mutex.Lock() - defer ptr.mutex.Unlock() - - // Merge and sort. - newFiles := append(ptr.files, files...) - sort.Slice(newFiles, func(i, j int) bool { - return newFiles[i].Base() < newFiles[j].Base() - }) - - // Reject overlapping files. - // Discard adjacent identical files. - out := newFiles[:0] - for i, file := range newFiles { - if i > 0 { - prev := newFiles[i-1] - if file == prev { - continue - } - if prev.Base()+prev.Size()+1 > file.Base() { - panic(fmt.Sprintf("file %s (%d-%d) overlaps with file %s (%d-%d)", - prev.Name(), prev.Base(), prev.Base()+prev.Size(), - file.Name(), file.Base(), file.Base()+file.Size())) - } - } - out = append(out, file) - } - newFiles = out - - ptr.files = newFiles - - // Advance FileSet.Base(). - if len(newFiles) > 0 { - last := newFiles[len(newFiles)-1] - newBase := last.Base() + last.Size() + 1 - if ptr.base < newBase { - ptr.base = newBase - } - } -} - -// FileSetFor returns a new FileSet containing a sequence of new Files with -// the same base, size, and line as the input files, for use in APIs that -// require a FileSet. -// -// Precondition: the input files must be non-overlapping, and sorted in order -// of their Base. -func FileSetFor(files ...*token.File) *token.FileSet { - fset := token.NewFileSet() - for _, f := range files { - f2 := fset.AddFile(f.Name(), f.Base(), f.Size()) - lines := GetLines(f) - f2.SetLines(lines) - } - return fset -} - -// CloneFileSet creates a new FileSet holding all files in fset. It does not -// create copies of the token.Files in fset: they are added to the resulting -// FileSet unmodified. -func CloneFileSet(fset *token.FileSet) *token.FileSet { - var files []*token.File - fset.Iterate(func(f *token.File) bool { - files = append(files, f) - return true - }) - newFileSet := token.NewFileSet() - AddExistingFiles(newFileSet, files) - return newFileSet -} diff --git a/vendor/golang.org/x/tools/internal/typeparams/common.go b/vendor/golang.org/x/tools/internal/typeparams/common.go index d0d0649f..0b84acc5 100644 --- a/vendor/golang.org/x/tools/internal/typeparams/common.go +++ b/vendor/golang.org/x/tools/internal/typeparams/common.go @@ -2,20 +2,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package typeparams contains common utilities for writing tools that interact -// with generic Go code, as introduced with Go 1.18. -// -// Many of the types and functions in this package are proxies for the new APIs -// introduced in the standard library with Go 1.18. For example, the -// typeparams.Union type is an alias for go/types.Union, and the ForTypeSpec -// function returns the value of the go/ast.TypeSpec.TypeParams field. At Go -// versions older than 1.18 these helpers are implemented as stubs, allowing -// users of this package to write code that handles generic constructs inline, -// even if the Go version being used to compile does not support generics. -// -// Additionally, this package contains common utilities for working with the -// new generic constructs, to supplement the standard library APIs. Notably, -// the StructuralTerms API computes a minimal representation of the structural +// Package typeparams contains common utilities for writing tools that +// interact with generic Go code, as introduced with Go 1.18. It +// supplements the standard library APIs. Notably, the StructuralTerms +// API computes a minimal representation of the structural // restrictions on a type parameter. // // An external version of these APIs is available in the @@ -23,7 +13,6 @@ package typeparams import ( - "fmt" "go/ast" "go/token" "go/types" @@ -42,7 +31,7 @@ func UnpackIndexExpr(n ast.Node) (x ast.Expr, lbrack token.Pos, indices []ast.Ex switch e := n.(type) { case *ast.IndexExpr: return e.X, e.Lbrack, []ast.Expr{e.Index}, e.Rbrack - case *IndexListExpr: + case *ast.IndexListExpr: return e.X, e.Lbrack, e.Indices, e.Rbrack } return nil, token.NoPos, nil, token.NoPos @@ -63,7 +52,7 @@ func PackIndexExpr(x ast.Expr, lbrack token.Pos, indices []ast.Expr, rbrack toke Rbrack: rbrack, } default: - return &IndexListExpr{ + return &ast.IndexListExpr{ X: x, Lbrack: lbrack, Indices: indices, @@ -72,73 +61,17 @@ func PackIndexExpr(x ast.Expr, lbrack token.Pos, indices []ast.Expr, rbrack toke } } -// IsTypeParam reports whether t is a type parameter. +// IsTypeParam reports whether t is a type parameter (or an alias of one). func IsTypeParam(t types.Type) bool { - _, ok := t.(*TypeParam) + _, ok := types.Unalias(t).(*types.TypeParam) return ok } -// OriginMethod returns the origin method associated with the method fn. -// For methods on a non-generic receiver base type, this is just -// fn. However, for methods with a generic receiver, OriginMethod returns the -// corresponding method in the method set of the origin type. -// -// As a special case, if fn is not a method (has no receiver), OriginMethod -// returns fn. -func OriginMethod(fn *types.Func) *types.Func { - recv := fn.Type().(*types.Signature).Recv() - if recv == nil { - return fn - } - base := recv.Type() - p, isPtr := base.(*types.Pointer) - if isPtr { - base = p.Elem() - } - named, isNamed := base.(*types.Named) - if !isNamed { - // Receiver is a *types.Interface. - return fn - } - if ForNamed(named).Len() == 0 { - // Receiver base has no type parameters, so we can avoid the lookup below. - return fn - } - orig := NamedTypeOrigin(named) - gfn, _, _ := types.LookupFieldOrMethod(orig, true, fn.Pkg(), fn.Name()) - - // This is a fix for a gopls crash (#60628) due to a go/types bug (#60634). In: - // package p - // type T *int - // func (*T) f() {} - // LookupFieldOrMethod(T, true, p, f)=nil, but NewMethodSet(*T)={(*T).f}. - // Here we make them consistent by force. - // (The go/types bug is general, but this workaround is reached only - // for generic T thanks to the early return above.) - if gfn == nil { - mset := types.NewMethodSet(types.NewPointer(orig)) - for i := 0; i < mset.Len(); i++ { - m := mset.At(i) - if m.Obj().Id() == fn.Id() { - gfn = m.Obj() - break - } - } - } - - // In golang/go#61196, we observe another crash, this time inexplicable. - if gfn == nil { - panic(fmt.Sprintf("missing origin method for %s.%s; named == origin: %t, named.NumMethods(): %d, origin.NumMethods(): %d", named, fn, named == orig, named.NumMethods(), orig.NumMethods())) - } - - return gfn.(*types.Func) -} - // GenericAssignableTo is a generalization of types.AssignableTo that // implements the following rule for uninstantiated generic types: // // If V and T are generic named types, then V is considered assignable to T if, -// for every possible instantation of V[A_1, ..., A_N], the instantiation +// for every possible instantiation of V[A_1, ..., A_N], the instantiation // T[A_1, ..., A_N] is valid and V[A_1, ..., A_N] implements T[A_1, ..., A_N]. // // If T has structural constraints, they must be satisfied by V. @@ -157,7 +90,10 @@ func OriginMethod(fn *types.Func) *types.Func { // // In this case, GenericAssignableTo reports that instantiations of Container // are assignable to the corresponding instantiation of Interface. -func GenericAssignableTo(ctxt *Context, V, T types.Type) bool { +func GenericAssignableTo(ctxt *types.Context, V, T types.Type) bool { + V = types.Unalias(V) + T = types.Unalias(T) + // If V and T are not both named, or do not have matching non-empty type // parameter lists, fall back on types.AssignableTo. @@ -167,9 +103,9 @@ func GenericAssignableTo(ctxt *Context, V, T types.Type) bool { return types.AssignableTo(V, T) } - vtparams := ForNamed(VN) - ttparams := ForNamed(TN) - if vtparams.Len() == 0 || vtparams.Len() != ttparams.Len() || NamedTypeArgs(VN).Len() != 0 || NamedTypeArgs(TN).Len() != 0 { + vtparams := VN.TypeParams() + ttparams := TN.TypeParams() + if vtparams.Len() == 0 || vtparams.Len() != ttparams.Len() || VN.TypeArgs().Len() != 0 || TN.TypeArgs().Len() != 0 { return types.AssignableTo(V, T) } @@ -182,7 +118,7 @@ func GenericAssignableTo(ctxt *Context, V, T types.Type) bool { // Minor optimization: ensure we share a context across the two // instantiations below. if ctxt == nil { - ctxt = NewContext() + ctxt = types.NewContext() } var targs []types.Type @@ -190,12 +126,12 @@ func GenericAssignableTo(ctxt *Context, V, T types.Type) bool { targs = append(targs, vtparams.At(i)) } - vinst, err := Instantiate(ctxt, V, targs, true) + vinst, err := types.Instantiate(ctxt, V, targs, true) if err != nil { panic("type parameters should satisfy their own constraints") } - tinst, err := Instantiate(ctxt, T, targs, true) + tinst, err := types.Instantiate(ctxt, T, targs, true) if err != nil { return false } diff --git a/vendor/golang.org/x/tools/internal/typeparams/coretype.go b/vendor/golang.org/x/tools/internal/typeparams/coretype.go index 993135ec..6e83c6fb 100644 --- a/vendor/golang.org/x/tools/internal/typeparams/coretype.go +++ b/vendor/golang.org/x/tools/internal/typeparams/coretype.go @@ -5,6 +5,7 @@ package typeparams import ( + "fmt" "go/types" ) @@ -17,7 +18,7 @@ func CoreType(T types.Type) types.Type { return U // for non-interface types, } - terms, err := _NormalTerms(U) + terms, err := NormalTerms(U) if len(terms) == 0 || err != nil { // len(terms) -> empty type set of interface. // err != nil => U is invalid, exceeds complexity bounds, or has an empty type set. @@ -63,7 +64,7 @@ func CoreType(T types.Type) types.Type { return ch } -// _NormalTerms returns a slice of terms representing the normalized structural +// NormalTerms returns a slice of terms representing the normalized structural // type restrictions of a type, if any. // // For all types other than *types.TypeParam, *types.Interface, and @@ -81,42 +82,69 @@ func CoreType(T types.Type) types.Type { // restrictions may be arbitrarily complex. For example, consider the // following: // -// type A interface{ ~string|~[]byte } +// type A interface{ ~string|~[]byte } // -// type B interface{ int|string } +// type B interface{ int|string } // -// type C interface { ~string|~int } +// type C interface { ~string|~int } // -// type T[P interface{ A|B; C }] int +// type T[P interface{ A|B; C }] int // // In this example, the structural type restriction of P is ~string|int: A|B // expands to ~string|~[]byte|int|string, which reduces to ~string|~[]byte|int, // which when intersected with C (~string|~int) yields ~string|int. // -// _NormalTerms computes these expansions and reductions, producing a +// NormalTerms computes these expansions and reductions, producing a // "normalized" form of the embeddings. A structural restriction is normalized // if it is a single union containing no interface terms, and is minimal in the // sense that removing any term changes the set of types satisfying the // constraint. It is left as a proof for the reader that, modulo sorting, there // is exactly one such normalized form. // -// Because the minimal representation always takes this form, _NormalTerms +// Because the minimal representation always takes this form, NormalTerms // returns a slice of tilde terms corresponding to the terms of the union in // the normalized structural restriction. An error is returned if the type is // invalid, exceeds complexity bounds, or has an empty type set. In the latter -// case, _NormalTerms returns ErrEmptyTypeSet. +// case, NormalTerms returns ErrEmptyTypeSet. // -// _NormalTerms makes no guarantees about the order of terms, except that it +// NormalTerms makes no guarantees about the order of terms, except that it // is deterministic. -func _NormalTerms(typ types.Type) ([]*Term, error) { - switch typ := typ.(type) { - case *TypeParam: +func NormalTerms(typ types.Type) ([]*types.Term, error) { + switch typ := typ.Underlying().(type) { + case *types.TypeParam: return StructuralTerms(typ) - case *Union: + case *types.Union: return UnionTermSet(typ) case *types.Interface: return InterfaceTermSet(typ) default: - return []*Term{NewTerm(false, typ)}, nil + return []*types.Term{types.NewTerm(false, typ)}, nil } } + +// Deref returns the type of the variable pointed to by t, +// if t's core type is a pointer; otherwise it returns t. +// +// Do not assume that Deref(T)==T implies T is not a pointer: +// consider "type T *T", for example. +// +// TODO(adonovan): ideally this would live in typesinternal, but that +// creates an import cycle. Move there when we melt this package down. +func Deref(t types.Type) types.Type { + if ptr, ok := CoreType(t).(*types.Pointer); ok { + return ptr.Elem() + } + return t +} + +// MustDeref returns the type of the variable pointed to by t. +// It panics if t's core type is not a pointer. +// +// TODO(adonovan): ideally this would live in typesinternal, but that +// creates an import cycle. Move there when we melt this package down. +func MustDeref(t types.Type) types.Type { + if ptr, ok := CoreType(t).(*types.Pointer); ok { + return ptr.Elem() + } + panic(fmt.Sprintf("%v is not a pointer", t)) +} diff --git a/vendor/golang.org/x/tools/internal/typeparams/enabled_go117.go b/vendor/golang.org/x/tools/internal/typeparams/enabled_go117.go deleted file mode 100644 index 18212390..00000000 --- a/vendor/golang.org/x/tools/internal/typeparams/enabled_go117.go +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.18 -// +build !go1.18 - -package typeparams - -// Enabled reports whether type parameters are enabled in the current build -// environment. -const Enabled = false diff --git a/vendor/golang.org/x/tools/internal/typeparams/enabled_go118.go b/vendor/golang.org/x/tools/internal/typeparams/enabled_go118.go deleted file mode 100644 index d6714882..00000000 --- a/vendor/golang.org/x/tools/internal/typeparams/enabled_go118.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.18 -// +build go1.18 - -package typeparams - -// Note: this constant is in a separate file as this is the only acceptable -// diff between the <1.18 API of this package and the 1.18 API. - -// Enabled reports whether type parameters are enabled in the current build -// environment. -const Enabled = true diff --git a/vendor/golang.org/x/tools/internal/typeparams/free.go b/vendor/golang.org/x/tools/internal/typeparams/free.go new file mode 100644 index 00000000..35810826 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/typeparams/free.go @@ -0,0 +1,118 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typeparams + +import ( + "go/types" +) + +// Free is a memoization of the set of free type parameters within a +// type. It makes a sequence of calls to [Free.Has] for overlapping +// types more efficient. The zero value is ready for use. +// +// NOTE: Adapted from go/types/infer.go. If it is later exported, factor. +type Free struct { + seen map[types.Type]bool +} + +// Has reports whether the specified type has a free type parameter. +func (w *Free) Has(typ types.Type) (res bool) { + // detect cycles + if x, ok := w.seen[typ]; ok { + return x + } + if w.seen == nil { + w.seen = make(map[types.Type]bool) + } + w.seen[typ] = false + defer func() { + w.seen[typ] = res + }() + + switch t := typ.(type) { + case nil, *types.Basic: // TODO(gri) should nil be handled here? + break + + case *types.Alias: + return w.Has(types.Unalias(t)) + + case *types.Array: + return w.Has(t.Elem()) + + case *types.Slice: + return w.Has(t.Elem()) + + case *types.Struct: + for i, n := 0, t.NumFields(); i < n; i++ { + if w.Has(t.Field(i).Type()) { + return true + } + } + + case *types.Pointer: + return w.Has(t.Elem()) + + case *types.Tuple: + n := t.Len() + for i := 0; i < n; i++ { + if w.Has(t.At(i).Type()) { + return true + } + } + + case *types.Signature: + // t.tparams may not be nil if we are looking at a signature + // of a generic function type (or an interface method) that is + // part of the type we're testing. We don't care about these type + // parameters. + // Similarly, the receiver of a method may declare (rather than + // use) type parameters, we don't care about those either. + // Thus, we only need to look at the input and result parameters. + return w.Has(t.Params()) || w.Has(t.Results()) + + case *types.Interface: + for i, n := 0, t.NumMethods(); i < n; i++ { + if w.Has(t.Method(i).Type()) { + return true + } + } + terms, err := InterfaceTermSet(t) + if err != nil { + return false // ill typed + } + for _, term := range terms { + if w.Has(term.Type()) { + return true + } + } + + case *types.Map: + return w.Has(t.Key()) || w.Has(t.Elem()) + + case *types.Chan: + return w.Has(t.Elem()) + + case *types.Named: + args := t.TypeArgs() + // TODO(taking): this does not match go/types/infer.go. Check with rfindley. + if params := t.TypeParams(); params.Len() > args.Len() { + return true + } + for i, n := 0, args.Len(); i < n; i++ { + if w.Has(args.At(i)) { + return true + } + } + return w.Has(t.Underlying()) // recurse for types local to parameterized functions + + case *types.TypeParam: + return true + + default: + panic(t) // unreachable + } + + return false +} diff --git a/vendor/golang.org/x/tools/internal/typeparams/normalize.go b/vendor/golang.org/x/tools/internal/typeparams/normalize.go index 9c631b65..93c80fdc 100644 --- a/vendor/golang.org/x/tools/internal/typeparams/normalize.go +++ b/vendor/golang.org/x/tools/internal/typeparams/normalize.go @@ -60,7 +60,7 @@ var ErrEmptyTypeSet = errors.New("empty type set") // // StructuralTerms makes no guarantees about the order of terms, except that it // is deterministic. -func StructuralTerms(tparam *TypeParam) ([]*Term, error) { +func StructuralTerms(tparam *types.TypeParam) ([]*types.Term, error) { constraint := tparam.Constraint() if constraint == nil { return nil, fmt.Errorf("%s has nil constraint", tparam) @@ -78,7 +78,7 @@ func StructuralTerms(tparam *TypeParam) ([]*Term, error) { // // See the documentation of StructuralTerms for more information on // normalization. -func InterfaceTermSet(iface *types.Interface) ([]*Term, error) { +func InterfaceTermSet(iface *types.Interface) ([]*types.Term, error) { return computeTermSet(iface) } @@ -88,11 +88,11 @@ func InterfaceTermSet(iface *types.Interface) ([]*Term, error) { // // See the documentation of StructuralTerms for more information on // normalization. -func UnionTermSet(union *Union) ([]*Term, error) { +func UnionTermSet(union *types.Union) ([]*types.Term, error) { return computeTermSet(union) } -func computeTermSet(typ types.Type) ([]*Term, error) { +func computeTermSet(typ types.Type) ([]*types.Term, error) { tset, err := computeTermSetInternal(typ, make(map[types.Type]*termSet), 0) if err != nil { return nil, err @@ -103,9 +103,9 @@ func computeTermSet(typ types.Type) ([]*Term, error) { if tset.terms.isAll() { return nil, nil } - var terms []*Term + var terms []*types.Term for _, term := range tset.terms { - terms = append(terms, NewTerm(term.tilde, term.typ)) + terms = append(terms, types.NewTerm(term.tilde, term.typ)) } return terms, nil } @@ -162,7 +162,7 @@ func computeTermSetInternal(t types.Type, seen map[types.Type]*termSet, depth in tset.terms = allTermlist for i := 0; i < u.NumEmbeddeds(); i++ { embedded := u.EmbeddedType(i) - if _, ok := embedded.Underlying().(*TypeParam); ok { + if _, ok := embedded.Underlying().(*types.TypeParam); ok { return nil, fmt.Errorf("invalid embedded type %T", embedded) } tset2, err := computeTermSetInternal(embedded, seen, depth+1) @@ -171,7 +171,7 @@ func computeTermSetInternal(t types.Type, seen map[types.Type]*termSet, depth in } tset.terms = tset.terms.intersect(tset2.terms) } - case *Union: + case *types.Union: // The term set of a union is the union of term sets of its terms. tset.terms = nil for i := 0; i < u.Len(); i++ { @@ -184,7 +184,7 @@ func computeTermSetInternal(t types.Type, seen map[types.Type]*termSet, depth in return nil, err } terms = tset2.terms - case *TypeParam, *Union: + case *types.TypeParam, *types.Union: // A stand-alone type parameter or union is not permitted as union // term. return nil, fmt.Errorf("invalid union term %T", t) @@ -199,7 +199,7 @@ func computeTermSetInternal(t types.Type, seen map[types.Type]*termSet, depth in return nil, fmt.Errorf("exceeded max term count %d", maxTermCount) } } - case *TypeParam: + case *types.TypeParam: panic("unreachable") default: // For all other types, the term set is just a single non-tilde term diff --git a/vendor/golang.org/x/tools/internal/typeparams/termlist.go b/vendor/golang.org/x/tools/internal/typeparams/termlist.go index 933106a2..cbd12f80 100644 --- a/vendor/golang.org/x/tools/internal/typeparams/termlist.go +++ b/vendor/golang.org/x/tools/internal/typeparams/termlist.go @@ -30,7 +30,7 @@ func (xl termlist) String() string { var buf bytes.Buffer for i, x := range xl { if i > 0 { - buf.WriteString(" ∪ ") + buf.WriteString(" | ") } buf.WriteString(x.String()) } diff --git a/vendor/golang.org/x/tools/internal/typeparams/typeparams_go117.go b/vendor/golang.org/x/tools/internal/typeparams/typeparams_go117.go deleted file mode 100644 index 7ed86e17..00000000 --- a/vendor/golang.org/x/tools/internal/typeparams/typeparams_go117.go +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.18 -// +build !go1.18 - -package typeparams - -import ( - "go/ast" - "go/token" - "go/types" -) - -func unsupported() { - panic("type parameters are unsupported at this go version") -} - -// IndexListExpr is a placeholder type, as type parameters are not supported at -// this Go version. Its methods panic on use. -type IndexListExpr struct { - ast.Expr - X ast.Expr // expression - Lbrack token.Pos // position of "[" - Indices []ast.Expr // index expressions - Rbrack token.Pos // position of "]" -} - -// ForTypeSpec returns an empty field list, as type parameters on not supported -// at this Go version. -func ForTypeSpec(*ast.TypeSpec) *ast.FieldList { - return nil -} - -// ForFuncType returns an empty field list, as type parameters are not -// supported at this Go version. -func ForFuncType(*ast.FuncType) *ast.FieldList { - return nil -} - -// TypeParam is a placeholder type, as type parameters are not supported at -// this Go version. Its methods panic on use. -type TypeParam struct{ types.Type } - -func (*TypeParam) Index() int { unsupported(); return 0 } -func (*TypeParam) Constraint() types.Type { unsupported(); return nil } -func (*TypeParam) Obj() *types.TypeName { unsupported(); return nil } - -// TypeParamList is a placeholder for an empty type parameter list. -type TypeParamList struct{} - -func (*TypeParamList) Len() int { return 0 } -func (*TypeParamList) At(int) *TypeParam { unsupported(); return nil } - -// TypeList is a placeholder for an empty type list. -type TypeList struct{} - -func (*TypeList) Len() int { return 0 } -func (*TypeList) At(int) types.Type { unsupported(); return nil } - -// NewTypeParam is unsupported at this Go version, and panics. -func NewTypeParam(name *types.TypeName, constraint types.Type) *TypeParam { - unsupported() - return nil -} - -// SetTypeParamConstraint is unsupported at this Go version, and panics. -func SetTypeParamConstraint(tparam *TypeParam, constraint types.Type) { - unsupported() -} - -// NewSignatureType calls types.NewSignature, panicking if recvTypeParams or -// typeParams is non-empty. -func NewSignatureType(recv *types.Var, recvTypeParams, typeParams []*TypeParam, params, results *types.Tuple, variadic bool) *types.Signature { - if len(recvTypeParams) != 0 || len(typeParams) != 0 { - panic("signatures cannot have type parameters at this Go version") - } - return types.NewSignature(recv, params, results, variadic) -} - -// ForSignature returns an empty slice. -func ForSignature(*types.Signature) *TypeParamList { - return nil -} - -// RecvTypeParams returns a nil slice. -func RecvTypeParams(sig *types.Signature) *TypeParamList { - return nil -} - -// IsComparable returns false, as no interfaces are type-restricted at this Go -// version. -func IsComparable(*types.Interface) bool { - return false -} - -// IsMethodSet returns true, as no interfaces are type-restricted at this Go -// version. -func IsMethodSet(*types.Interface) bool { - return true -} - -// IsImplicit returns false, as no interfaces are implicit at this Go version. -func IsImplicit(*types.Interface) bool { - return false -} - -// MarkImplicit does nothing, because this Go version does not have implicit -// interfaces. -func MarkImplicit(*types.Interface) {} - -// ForNamed returns an empty type parameter list, as type parameters are not -// supported at this Go version. -func ForNamed(*types.Named) *TypeParamList { - return nil -} - -// SetForNamed panics if tparams is non-empty. -func SetForNamed(_ *types.Named, tparams []*TypeParam) { - if len(tparams) > 0 { - unsupported() - } -} - -// NamedTypeArgs returns nil. -func NamedTypeArgs(*types.Named) *TypeList { - return nil -} - -// NamedTypeOrigin is the identity method at this Go version. -func NamedTypeOrigin(named *types.Named) *types.Named { - return named -} - -// Term holds information about a structural type restriction. -type Term struct { - tilde bool - typ types.Type -} - -func (m *Term) Tilde() bool { return m.tilde } -func (m *Term) Type() types.Type { return m.typ } -func (m *Term) String() string { - pre := "" - if m.tilde { - pre = "~" - } - return pre + m.typ.String() -} - -// NewTerm is unsupported at this Go version, and panics. -func NewTerm(tilde bool, typ types.Type) *Term { - return &Term{tilde, typ} -} - -// Union is a placeholder type, as type parameters are not supported at this Go -// version. Its methods panic on use. -type Union struct{ types.Type } - -func (*Union) Len() int { return 0 } -func (*Union) Term(i int) *Term { unsupported(); return nil } - -// NewUnion is unsupported at this Go version, and panics. -func NewUnion(terms []*Term) *Union { - unsupported() - return nil -} - -// InitInstanceInfo is a noop at this Go version. -func InitInstanceInfo(*types.Info) {} - -// Instance is a placeholder type, as type parameters are not supported at this -// Go version. -type Instance struct { - TypeArgs *TypeList - Type types.Type -} - -// GetInstances returns a nil map, as type parameters are not supported at this -// Go version. -func GetInstances(info *types.Info) map[*ast.Ident]Instance { return nil } - -// Context is a placeholder type, as type parameters are not supported at -// this Go version. -type Context struct{} - -// NewContext returns a placeholder Context instance. -func NewContext() *Context { - return &Context{} -} - -// Instantiate is unsupported on this Go version, and panics. -func Instantiate(ctxt *Context, typ types.Type, targs []types.Type, validate bool) (types.Type, error) { - unsupported() - return nil, nil -} diff --git a/vendor/golang.org/x/tools/internal/typeparams/typeparams_go118.go b/vendor/golang.org/x/tools/internal/typeparams/typeparams_go118.go deleted file mode 100644 index cf301af1..00000000 --- a/vendor/golang.org/x/tools/internal/typeparams/typeparams_go118.go +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.18 -// +build go1.18 - -package typeparams - -import ( - "go/ast" - "go/types" -) - -// IndexListExpr is an alias for ast.IndexListExpr. -type IndexListExpr = ast.IndexListExpr - -// ForTypeSpec returns n.TypeParams. -func ForTypeSpec(n *ast.TypeSpec) *ast.FieldList { - if n == nil { - return nil - } - return n.TypeParams -} - -// ForFuncType returns n.TypeParams. -func ForFuncType(n *ast.FuncType) *ast.FieldList { - if n == nil { - return nil - } - return n.TypeParams -} - -// TypeParam is an alias for types.TypeParam -type TypeParam = types.TypeParam - -// TypeParamList is an alias for types.TypeParamList -type TypeParamList = types.TypeParamList - -// TypeList is an alias for types.TypeList -type TypeList = types.TypeList - -// NewTypeParam calls types.NewTypeParam. -func NewTypeParam(name *types.TypeName, constraint types.Type) *TypeParam { - return types.NewTypeParam(name, constraint) -} - -// SetTypeParamConstraint calls tparam.SetConstraint(constraint). -func SetTypeParamConstraint(tparam *TypeParam, constraint types.Type) { - tparam.SetConstraint(constraint) -} - -// NewSignatureType calls types.NewSignatureType. -func NewSignatureType(recv *types.Var, recvTypeParams, typeParams []*TypeParam, params, results *types.Tuple, variadic bool) *types.Signature { - return types.NewSignatureType(recv, recvTypeParams, typeParams, params, results, variadic) -} - -// ForSignature returns sig.TypeParams() -func ForSignature(sig *types.Signature) *TypeParamList { - return sig.TypeParams() -} - -// RecvTypeParams returns sig.RecvTypeParams(). -func RecvTypeParams(sig *types.Signature) *TypeParamList { - return sig.RecvTypeParams() -} - -// IsComparable calls iface.IsComparable(). -func IsComparable(iface *types.Interface) bool { - return iface.IsComparable() -} - -// IsMethodSet calls iface.IsMethodSet(). -func IsMethodSet(iface *types.Interface) bool { - return iface.IsMethodSet() -} - -// IsImplicit calls iface.IsImplicit(). -func IsImplicit(iface *types.Interface) bool { - return iface.IsImplicit() -} - -// MarkImplicit calls iface.MarkImplicit(). -func MarkImplicit(iface *types.Interface) { - iface.MarkImplicit() -} - -// ForNamed extracts the (possibly empty) type parameter object list from -// named. -func ForNamed(named *types.Named) *TypeParamList { - return named.TypeParams() -} - -// SetForNamed sets the type params tparams on n. Each tparam must be of -// dynamic type *types.TypeParam. -func SetForNamed(n *types.Named, tparams []*TypeParam) { - n.SetTypeParams(tparams) -} - -// NamedTypeArgs returns named.TypeArgs(). -func NamedTypeArgs(named *types.Named) *TypeList { - return named.TypeArgs() -} - -// NamedTypeOrigin returns named.Orig(). -func NamedTypeOrigin(named *types.Named) *types.Named { - return named.Origin() -} - -// Term is an alias for types.Term. -type Term = types.Term - -// NewTerm calls types.NewTerm. -func NewTerm(tilde bool, typ types.Type) *Term { - return types.NewTerm(tilde, typ) -} - -// Union is an alias for types.Union -type Union = types.Union - -// NewUnion calls types.NewUnion. -func NewUnion(terms []*Term) *Union { - return types.NewUnion(terms) -} - -// InitInstanceInfo initializes info to record information about type and -// function instances. -func InitInstanceInfo(info *types.Info) { - info.Instances = make(map[*ast.Ident]types.Instance) -} - -// Instance is an alias for types.Instance. -type Instance = types.Instance - -// GetInstances returns info.Instances. -func GetInstances(info *types.Info) map[*ast.Ident]Instance { - return info.Instances -} - -// Context is an alias for types.Context. -type Context = types.Context - -// NewContext calls types.NewContext. -func NewContext() *Context { - return types.NewContext() -} - -// Instantiate calls types.Instantiate. -func Instantiate(ctxt *Context, typ types.Type, targs []types.Type, validate bool) (types.Type, error) { - return types.Instantiate(ctxt, typ, targs, validate) -} diff --git a/vendor/golang.org/x/tools/internal/typeparams/typeterm.go b/vendor/golang.org/x/tools/internal/typeparams/typeterm.go index 7ddee28d..7350bb70 100644 --- a/vendor/golang.org/x/tools/internal/typeparams/typeterm.go +++ b/vendor/golang.org/x/tools/internal/typeparams/typeterm.go @@ -10,11 +10,10 @@ import "go/types" // A term describes elementary type sets: // -// ∅: (*term)(nil) == ∅ // set of no types (empty set) -// 𝓤: &term{} == 𝓤 // set of all types (𝓤niverse) -// T: &term{false, T} == {T} // set of type T -// ~t: &term{true, t} == {t' | under(t') == t} // set of types with underlying type t -// +// ∅: (*term)(nil) == ∅ // set of no types (empty set) +// 𝓤: &term{} == 𝓤 // set of all types (𝓤niverse) +// T: &term{false, T} == {T} // set of type T +// ~t: &term{true, t} == {t' | under(t') == t} // set of types with underlying type t type term struct { tilde bool // valid if typ != nil typ types.Type diff --git a/vendor/golang.org/x/tools/internal/typesinternal/element.go b/vendor/golang.org/x/tools/internal/typesinternal/element.go new file mode 100644 index 00000000..4957f021 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/typesinternal/element.go @@ -0,0 +1,133 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typesinternal + +import ( + "fmt" + "go/types" + + "golang.org/x/tools/go/types/typeutil" +) + +// ForEachElement calls f for type T and each type reachable from its +// type through reflection. It does this by recursively stripping off +// type constructors; in addition, for each named type N, the type *N +// is added to the result as it may have additional methods. +// +// The caller must provide an initially empty set used to de-duplicate +// identical types, potentially across multiple calls to ForEachElement. +// (Its final value holds all the elements seen, matching the arguments +// passed to f.) +// +// TODO(adonovan): share/harmonize with go/callgraph/rta. +func ForEachElement(rtypes *typeutil.Map, msets *typeutil.MethodSetCache, T types.Type, f func(types.Type)) { + var visit func(T types.Type, skip bool) + visit = func(T types.Type, skip bool) { + if !skip { + if seen, _ := rtypes.Set(T, true).(bool); seen { + return // de-dup + } + + f(T) // notify caller of new element type + } + + // Recursion over signatures of each method. + tmset := msets.MethodSet(T) + for i := 0; i < tmset.Len(); i++ { + sig := tmset.At(i).Type().(*types.Signature) + // It is tempting to call visit(sig, false) + // but, as noted in golang.org/cl/65450043, + // the Signature.Recv field is ignored by + // types.Identical and typeutil.Map, which + // is confusing at best. + // + // More importantly, the true signature rtype + // reachable from a method using reflection + // has no receiver but an extra ordinary parameter. + // For the Read method of io.Reader we want: + // func(Reader, []byte) (int, error) + // but here sig is: + // func([]byte) (int, error) + // with .Recv = Reader (though it is hard to + // notice because it doesn't affect Signature.String + // or types.Identical). + // + // TODO(adonovan): construct and visit the correct + // non-method signature with an extra parameter + // (though since unnamed func types have no methods + // there is essentially no actual demand for this). + // + // TODO(adonovan): document whether or not it is + // safe to skip non-exported methods (as RTA does). + visit(sig.Params(), true) // skip the Tuple + visit(sig.Results(), true) // skip the Tuple + } + + switch T := T.(type) { + case *types.Alias: + visit(types.Unalias(T), skip) // emulates the pre-Alias behavior + + case *types.Basic: + // nop + + case *types.Interface: + // nop---handled by recursion over method set. + + case *types.Pointer: + visit(T.Elem(), false) + + case *types.Slice: + visit(T.Elem(), false) + + case *types.Chan: + visit(T.Elem(), false) + + case *types.Map: + visit(T.Key(), false) + visit(T.Elem(), false) + + case *types.Signature: + if T.Recv() != nil { + panic(fmt.Sprintf("Signature %s has Recv %s", T, T.Recv())) + } + visit(T.Params(), true) // skip the Tuple + visit(T.Results(), true) // skip the Tuple + + case *types.Named: + // A pointer-to-named type can be derived from a named + // type via reflection. It may have methods too. + visit(types.NewPointer(T), false) + + // Consider 'type T struct{S}' where S has methods. + // Reflection provides no way to get from T to struct{S}, + // only to S, so the method set of struct{S} is unwanted, + // so set 'skip' flag during recursion. + visit(T.Underlying(), true) // skip the unnamed type + + case *types.Array: + visit(T.Elem(), false) + + case *types.Struct: + for i, n := 0, T.NumFields(); i < n; i++ { + // TODO(adonovan): document whether or not + // it is safe to skip non-exported fields. + visit(T.Field(i).Type(), false) + } + + case *types.Tuple: + for i, n := 0, T.Len(); i < n; i++ { + visit(T.At(i).Type(), false) + } + + case *types.TypeParam, *types.Union: + // forEachReachable must not be called on parameterized types. + panic(T) + + default: + panic(T) + } + } + visit(T, false) +} diff --git a/vendor/golang.org/x/tools/internal/typesinternal/errorcode.go b/vendor/golang.org/x/tools/internal/typesinternal/errorcode.go index 07484073..131caab2 100644 --- a/vendor/golang.org/x/tools/internal/typesinternal/errorcode.go +++ b/vendor/golang.org/x/tools/internal/typesinternal/errorcode.go @@ -167,7 +167,7 @@ const ( UntypedNilUse // WrongAssignCount occurs when the number of values on the right-hand side - // of an assignment or or initialization expression does not match the number + // of an assignment or initialization expression does not match the number // of variables on the left-hand side. // // Example: @@ -838,7 +838,7 @@ const ( // InvalidCap occurs when an argument to the cap built-in function is not of // supported type. // - // See https://golang.org/ref/spec#Lengthand_capacity for information on + // See https://golang.org/ref/spec#Length_and_capacity for information on // which underlying types are supported as arguments to cap and len. // // Example: @@ -859,7 +859,7 @@ const ( // InvalidCopy occurs when the arguments are not of slice type or do not // have compatible type. // - // See https://golang.org/ref/spec#Appendingand_copying_slices for more + // See https://golang.org/ref/spec#Appending_and_copying_slices for more // information on the type requirements for the copy built-in. // // Example: @@ -897,7 +897,7 @@ const ( // InvalidLen occurs when an argument to the len built-in function is not of // supported type. // - // See https://golang.org/ref/spec#Lengthand_capacity for information on + // See https://golang.org/ref/spec#Length_and_capacity for information on // which underlying types are supported as arguments to cap and len. // // Example: @@ -914,7 +914,7 @@ const ( // InvalidMake occurs when make is called with an unsupported type argument. // - // See https://golang.org/ref/spec#Makingslices_maps_and_channels for + // See https://golang.org/ref/spec#Making_slices_maps_and_channels for // information on the types that may be created using make. // // Example: @@ -1449,10 +1449,10 @@ const ( NotAGenericType // WrongTypeArgCount occurs when a type or function is instantiated with an - // incorrent number of type arguments, including when a generic type or + // incorrect number of type arguments, including when a generic type or // function is used without instantiation. // - // Errors inolving failed type inference are assigned other error codes. + // Errors involving failed type inference are assigned other error codes. // // Example: // type T[p any] int diff --git a/vendor/golang.org/x/tools/internal/typesinternal/objectpath.go b/vendor/golang.org/x/tools/internal/typesinternal/objectpath.go deleted file mode 100644 index 5e96e895..00000000 --- a/vendor/golang.org/x/tools/internal/typesinternal/objectpath.go +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2023 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package typesinternal - -import "go/types" - -// This file contains back doors that allow gopls to avoid method sorting when -// using the objectpath package. -// -// This is performance-critical in certain repositories, but changing the -// behavior of the objectpath package is still being discussed in -// golang/go#61443. If we decide to remove the sorting in objectpath we can -// simply delete these back doors. Otherwise, we should add a new API to -// objectpath that allows controlling the sorting. - -// SkipEncoderMethodSorting marks enc (which must be an *objectpath.Encoder) as -// not requiring sorted methods. -var SkipEncoderMethodSorting func(enc interface{}) - -// ObjectpathObject is like objectpath.Object, but allows suppressing method -// sorting. -var ObjectpathObject func(pkg *types.Package, p string, skipMethodSorting bool) (types.Object, error) diff --git a/vendor/golang.org/x/tools/internal/typesinternal/recv.go b/vendor/golang.org/x/tools/internal/typesinternal/recv.go new file mode 100644 index 00000000..ba6f4f4e --- /dev/null +++ b/vendor/golang.org/x/tools/internal/typesinternal/recv.go @@ -0,0 +1,41 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typesinternal + +import ( + "go/types" +) + +// ReceiverNamed returns the named type (if any) associated with the +// type of recv, which may be of the form N or *N, or aliases thereof. +// It also reports whether a Pointer was present. +func ReceiverNamed(recv *types.Var) (isPtr bool, named *types.Named) { + t := recv.Type() + if ptr, ok := types.Unalias(t).(*types.Pointer); ok { + isPtr = true + t = ptr.Elem() + } + named, _ = types.Unalias(t).(*types.Named) + return +} + +// Unpointer returns T given *T or an alias thereof. +// For all other types it is the identity function. +// It does not look at underlying types. +// The result may be an alias. +// +// Use this function to strip off the optional pointer on a receiver +// in a field or method selection, without losing the named type +// (which is needed to compute the method set). +// +// See also [typeparams.MustDeref], which removes one level of +// indirection from the type, regardless of named types (analogous to +// a LOAD instruction). +func Unpointer(t types.Type) types.Type { + if ptr, ok := types.Unalias(t).(*types.Pointer); ok { + return ptr.Elem() + } + return t +} diff --git a/vendor/golang.org/x/tools/internal/typesinternal/toonew.go b/vendor/golang.org/x/tools/internal/typesinternal/toonew.go new file mode 100644 index 00000000..cc86487e --- /dev/null +++ b/vendor/golang.org/x/tools/internal/typesinternal/toonew.go @@ -0,0 +1,89 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package typesinternal + +import ( + "go/types" + + "golang.org/x/tools/internal/stdlib" + "golang.org/x/tools/internal/versions" +) + +// TooNewStdSymbols computes the set of package-level symbols +// exported by pkg that are not available at the specified version. +// The result maps each symbol to its minimum version. +// +// The pkg is allowed to contain type errors. +func TooNewStdSymbols(pkg *types.Package, version string) map[types.Object]string { + disallowed := make(map[types.Object]string) + + // Pass 1: package-level symbols. + symbols := stdlib.PackageSymbols[pkg.Path()] + for _, sym := range symbols { + symver := sym.Version.String() + if versions.Before(version, symver) { + switch sym.Kind { + case stdlib.Func, stdlib.Var, stdlib.Const, stdlib.Type: + disallowed[pkg.Scope().Lookup(sym.Name)] = symver + } + } + } + + // Pass 2: fields and methods. + // + // We allow fields and methods if their associated type is + // disallowed, as otherwise we would report false positives + // for compatibility shims. Consider: + // + // //go:build go1.22 + // type T struct { F std.Real } // correct new API + // + // //go:build !go1.22 + // type T struct { F fake } // shim + // type fake struct { ... } + // func (fake) M () {} + // + // These alternative declarations of T use either the std.Real + // type, introduced in go1.22, or a fake type, for the field + // F. (The fakery could be arbitrarily deep, involving more + // nested fields and methods than are shown here.) Clients + // that use the compatibility shim T will compile with any + // version of go, whether older or newer than go1.22, but only + // the newer version will use the std.Real implementation. + // + // Now consider a reference to method M in new(T).F.M() in a + // module that requires a minimum of go1.21. The analysis may + // occur using a version of Go higher than 1.21, selecting the + // first version of T, so the method M is Real.M. This would + // spuriously cause the analyzer to report a reference to a + // too-new symbol even though this expression compiles just + // fine (with the fake implementation) using go1.21. + for _, sym := range symbols { + symVersion := sym.Version.String() + if !versions.Before(version, symVersion) { + continue // allowed + } + + var obj types.Object + switch sym.Kind { + case stdlib.Field: + typename, name := sym.SplitField() + if t := pkg.Scope().Lookup(typename); t != nil && disallowed[t] == "" { + obj, _, _ = types.LookupFieldOrMethod(t.Type(), false, pkg, name) + } + + case stdlib.Method: + ptr, recvname, name := sym.SplitMethod() + if t := pkg.Scope().Lookup(recvname); t != nil && disallowed[t] == "" { + obj, _, _ = types.LookupFieldOrMethod(t.Type(), ptr, pkg, name) + } + } + if obj != nil { + disallowed[obj] = symVersion + } + } + + return disallowed +} diff --git a/vendor/golang.org/x/tools/internal/typesinternal/types.go b/vendor/golang.org/x/tools/internal/typesinternal/types.go index ce7d4351..83923286 100644 --- a/vendor/golang.org/x/tools/internal/typesinternal/types.go +++ b/vendor/golang.org/x/tools/internal/typesinternal/types.go @@ -49,4 +49,17 @@ func ReadGo116ErrorData(err types.Error) (code ErrorCode, start, end token.Pos, return ErrorCode(data[0]), token.Pos(data[1]), token.Pos(data[2]), true } -var SetGoVersion = func(conf *types.Config, version string) bool { return false } +// NameRelativeTo returns a types.Qualifier that qualifies members of +// all packages other than pkg, using only the package name. +// (By contrast, [types.RelativeTo] uses the complete package path, +// which is often excessive.) +// +// If pkg is nil, it is equivalent to [*types.Package.Name]. +func NameRelativeTo(pkg *types.Package) types.Qualifier { + return func(other *types.Package) string { + if pkg != nil && pkg == other { + return "" // same package; unqualified + } + return other.Name() + } +} diff --git a/vendor/golang.org/x/tools/internal/typesinternal/types_118.go b/vendor/golang.org/x/tools/internal/typesinternal/types_118.go deleted file mode 100644 index a42b072a..00000000 --- a/vendor/golang.org/x/tools/internal/typesinternal/types_118.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.18 -// +build go1.18 - -package typesinternal - -import ( - "go/types" -) - -func init() { - SetGoVersion = func(conf *types.Config, version string) bool { - conf.GoVersion = version - return true - } -} diff --git a/vendor/golang.org/x/tools/internal/versions/constraint.go b/vendor/golang.org/x/tools/internal/versions/constraint.go new file mode 100644 index 00000000..179063d4 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/versions/constraint.go @@ -0,0 +1,13 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package versions + +import "go/build/constraint" + +// ConstraintGoVersion is constraint.GoVersion (if built with go1.21+). +// Otherwise nil. +// +// Deprecate once x/tools is after go1.21. +var ConstraintGoVersion func(x constraint.Expr) string diff --git a/vendor/golang.org/x/tools/internal/versions/constraint_go121.go b/vendor/golang.org/x/tools/internal/versions/constraint_go121.go new file mode 100644 index 00000000..38011407 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/versions/constraint_go121.go @@ -0,0 +1,14 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 +// +build go1.21 + +package versions + +import "go/build/constraint" + +func init() { + ConstraintGoVersion = constraint.GoVersion +} diff --git a/vendor/golang.org/x/tools/internal/versions/features.go b/vendor/golang.org/x/tools/internal/versions/features.go new file mode 100644 index 00000000..b53f1786 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/versions/features.go @@ -0,0 +1,43 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package versions + +// This file contains predicates for working with file versions to +// decide when a tool should consider a language feature enabled. + +// GoVersions that features in x/tools can be gated to. +const ( + Go1_18 = "go1.18" + Go1_19 = "go1.19" + Go1_20 = "go1.20" + Go1_21 = "go1.21" + Go1_22 = "go1.22" +) + +// Future is an invalid unknown Go version sometime in the future. +// Do not use directly with Compare. +const Future = "" + +// AtLeast reports whether the file version v comes after a Go release. +// +// Use this predicate to enable a behavior once a certain Go release +// has happened (and stays enabled in the future). +func AtLeast(v, release string) bool { + if v == Future { + return true // an unknown future version is always after y. + } + return Compare(Lang(v), Lang(release)) >= 0 +} + +// Before reports whether the file version v is strictly before a Go release. +// +// Use this predicate to disable a behavior once a certain Go release +// has happened (and stays enabled in the future). +func Before(v, release string) bool { + if v == Future { + return false // an unknown future version happens after y. + } + return Compare(Lang(v), Lang(release)) < 0 +} diff --git a/vendor/golang.org/x/tools/internal/versions/gover.go b/vendor/golang.org/x/tools/internal/versions/gover.go new file mode 100644 index 00000000..bbabcd22 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/versions/gover.go @@ -0,0 +1,172 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This is a fork of internal/gover for use by x/tools until +// go1.21 and earlier are no longer supported by x/tools. + +package versions + +import "strings" + +// A gover is a parsed Go gover: major[.Minor[.Patch]][kind[pre]] +// The numbers are the original decimal strings to avoid integer overflows +// and since there is very little actual math. (Probably overflow doesn't matter in practice, +// but at the time this code was written, there was an existing test that used +// go1.99999999999, which does not fit in an int on 32-bit platforms. +// The "big decimal" representation avoids the problem entirely.) +type gover struct { + major string // decimal + minor string // decimal or "" + patch string // decimal or "" + kind string // "", "alpha", "beta", "rc" + pre string // decimal or "" +} + +// compare returns -1, 0, or +1 depending on whether +// x < y, x == y, or x > y, interpreted as toolchain versions. +// The versions x and y must not begin with a "go" prefix: just "1.21" not "go1.21". +// Malformed versions compare less than well-formed versions and equal to each other. +// The language version "1.21" compares less than the release candidate and eventual releases "1.21rc1" and "1.21.0". +func compare(x, y string) int { + vx := parse(x) + vy := parse(y) + + if c := cmpInt(vx.major, vy.major); c != 0 { + return c + } + if c := cmpInt(vx.minor, vy.minor); c != 0 { + return c + } + if c := cmpInt(vx.patch, vy.patch); c != 0 { + return c + } + if c := strings.Compare(vx.kind, vy.kind); c != 0 { // "" < alpha < beta < rc + return c + } + if c := cmpInt(vx.pre, vy.pre); c != 0 { + return c + } + return 0 +} + +// lang returns the Go language version. For example, lang("1.2.3") == "1.2". +func lang(x string) string { + v := parse(x) + if v.minor == "" || v.major == "1" && v.minor == "0" { + return v.major + } + return v.major + "." + v.minor +} + +// isValid reports whether the version x is valid. +func isValid(x string) bool { + return parse(x) != gover{} +} + +// parse parses the Go version string x into a version. +// It returns the zero version if x is malformed. +func parse(x string) gover { + var v gover + + // Parse major version. + var ok bool + v.major, x, ok = cutInt(x) + if !ok { + return gover{} + } + if x == "" { + // Interpret "1" as "1.0.0". + v.minor = "0" + v.patch = "0" + return v + } + + // Parse . before minor version. + if x[0] != '.' { + return gover{} + } + + // Parse minor version. + v.minor, x, ok = cutInt(x[1:]) + if !ok { + return gover{} + } + if x == "" { + // Patch missing is same as "0" for older versions. + // Starting in Go 1.21, patch missing is different from explicit .0. + if cmpInt(v.minor, "21") < 0 { + v.patch = "0" + } + return v + } + + // Parse patch if present. + if x[0] == '.' { + v.patch, x, ok = cutInt(x[1:]) + if !ok || x != "" { + // Note that we are disallowing prereleases (alpha, beta, rc) for patch releases here (x != ""). + // Allowing them would be a bit confusing because we already have: + // 1.21 < 1.21rc1 + // But a prerelease of a patch would have the opposite effect: + // 1.21.3rc1 < 1.21.3 + // We've never needed them before, so let's not start now. + return gover{} + } + return v + } + + // Parse prerelease. + i := 0 + for i < len(x) && (x[i] < '0' || '9' < x[i]) { + if x[i] < 'a' || 'z' < x[i] { + return gover{} + } + i++ + } + if i == 0 { + return gover{} + } + v.kind, x = x[:i], x[i:] + if x == "" { + return v + } + v.pre, x, ok = cutInt(x) + if !ok || x != "" { + return gover{} + } + + return v +} + +// cutInt scans the leading decimal number at the start of x to an integer +// and returns that value and the rest of the string. +func cutInt(x string) (n, rest string, ok bool) { + i := 0 + for i < len(x) && '0' <= x[i] && x[i] <= '9' { + i++ + } + if i == 0 || x[0] == '0' && i != 1 { // no digits or unnecessary leading zero + return "", "", false + } + return x[:i], x[i:], true +} + +// cmpInt returns cmp.Compare(x, y) interpreting x and y as decimal numbers. +// (Copied from golang.org/x/mod/semver's compareInt.) +func cmpInt(x, y string) int { + if x == y { + return 0 + } + if len(x) < len(y) { + return -1 + } + if len(x) > len(y) { + return +1 + } + if x < y { + return -1 + } else { + return +1 + } +} diff --git a/vendor/golang.org/x/tools/internal/versions/types.go b/vendor/golang.org/x/tools/internal/versions/types.go new file mode 100644 index 00000000..f0bb0d15 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/versions/types.go @@ -0,0 +1,38 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package versions + +import ( + "go/ast" + "go/types" +) + +// FileVersion returns a file's Go version. +// The reported version is an unknown Future version if a +// version cannot be determined. +func FileVersion(info *types.Info, file *ast.File) string { + // In tools built with Go >= 1.22, the Go version of a file + // follow a cascades of sources: + // 1) types.Info.FileVersion, which follows the cascade: + // 1.a) file version (ast.File.GoVersion), + // 1.b) the package version (types.Config.GoVersion), or + // 2) is some unknown Future version. + // + // File versions require a valid package version to be provided to types + // in Config.GoVersion. Config.GoVersion is either from the package's module + // or the toolchain (go run). This value should be provided by go/packages + // or unitchecker.Config.GoVersion. + if v := info.FileVersions[file]; IsValid(v) { + return v + } + // Note: we could instead return runtime.Version() [if valid]. + // This would act as a max version on what a tool can support. + return Future +} + +// InitFileVersions initializes info to record Go versions for Go files. +func InitFileVersions(info *types.Info) { + info.FileVersions = make(map[*ast.File]string) +} diff --git a/vendor/golang.org/x/tools/internal/versions/versions.go b/vendor/golang.org/x/tools/internal/versions/versions.go new file mode 100644 index 00000000..8d1f7453 --- /dev/null +++ b/vendor/golang.org/x/tools/internal/versions/versions.go @@ -0,0 +1,57 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package versions + +import ( + "strings" +) + +// Note: If we use build tags to use go/versions when go >=1.22, +// we run into go.dev/issue/53737. Under some operations users would see an +// import of "go/versions" even if they would not compile the file. +// For example, during `go get -u ./...` (go.dev/issue/64490) we do not try to include +// For this reason, this library just a clone of go/versions for the moment. + +// Lang returns the Go language version for version x. +// If x is not a valid version, Lang returns the empty string. +// For example: +// +// Lang("go1.21rc2") = "go1.21" +// Lang("go1.21.2") = "go1.21" +// Lang("go1.21") = "go1.21" +// Lang("go1") = "go1" +// Lang("bad") = "" +// Lang("1.21") = "" +func Lang(x string) string { + v := lang(stripGo(x)) + if v == "" { + return "" + } + return x[:2+len(v)] // "go"+v without allocation +} + +// Compare returns -1, 0, or +1 depending on whether +// x < y, x == y, or x > y, interpreted as Go versions. +// The versions x and y must begin with a "go" prefix: "go1.21" not "1.21". +// Invalid versions, including the empty string, compare less than +// valid versions and equal to each other. +// The language version "go1.21" compares less than the +// release candidate and eventual releases "go1.21rc1" and "go1.21.0". +// Custom toolchain suffixes are ignored during comparison: +// "go1.21.0" and "go1.21.0-bigcorp" are equal. +func Compare(x, y string) int { return compare(stripGo(x), stripGo(y)) } + +// IsValid reports whether the version x is valid. +func IsValid(x string) bool { return isValid(stripGo(x)) } + +// stripGo converts from a "go1.21" version to a "1.21" version. +// If v does not start with "go", stripGo returns the empty string (a known invalid version). +func stripGo(v string) string { + v, _, _ = strings.Cut(v, "-") // strip -bigcorp suffix. + if len(v) < 2 || v[:2] != "go" { + return "" + } + return v[2:] +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/bind_std.go b/vendor/golang.zx2c4.com/wireguard/conn/bind_std.go new file mode 100644 index 00000000..46df7fd4 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/bind_std.go @@ -0,0 +1,544 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "context" + "errors" + "fmt" + "net" + "net/netip" + "runtime" + "strconv" + "sync" + "syscall" + + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" +) + +var ( + _ Bind = (*StdNetBind)(nil) +) + +// StdNetBind implements Bind for all platforms. While Windows has its own Bind +// (see bind_windows.go), it may fall back to StdNetBind. +// TODO: Remove usage of ipv{4,6}.PacketConn when net.UDPConn has comparable +// methods for sending and receiving multiple datagrams per-syscall. See the +// proposal in https://github.com/golang/go/issues/45886#issuecomment-1218301564. +type StdNetBind struct { + mu sync.Mutex // protects all fields except as specified + ipv4 *net.UDPConn + ipv6 *net.UDPConn + ipv4PC *ipv4.PacketConn // will be nil on non-Linux + ipv6PC *ipv6.PacketConn // will be nil on non-Linux + ipv4TxOffload bool + ipv4RxOffload bool + ipv6TxOffload bool + ipv6RxOffload bool + + // these two fields are not guarded by mu + udpAddrPool sync.Pool + msgsPool sync.Pool + + blackhole4 bool + blackhole6 bool +} + +func NewStdNetBind() Bind { + return &StdNetBind{ + udpAddrPool: sync.Pool{ + New: func() any { + return &net.UDPAddr{ + IP: make([]byte, 16), + } + }, + }, + + msgsPool: sync.Pool{ + New: func() any { + // ipv6.Message and ipv4.Message are interchangeable as they are + // both aliases for x/net/internal/socket.Message. + msgs := make([]ipv6.Message, IdealBatchSize) + for i := range msgs { + msgs[i].Buffers = make(net.Buffers, 1) + msgs[i].OOB = make([]byte, 0, stickyControlSize+gsoControlSize) + } + return &msgs + }, + }, + } +} + +type StdNetEndpoint struct { + // AddrPort is the endpoint destination. + netip.AddrPort + // src is the current sticky source address and interface index, if + // supported. Typically this is a PKTINFO structure from/for control + // messages, see unix.PKTINFO for an example. + src []byte +} + +var ( + _ Bind = (*StdNetBind)(nil) + _ Endpoint = &StdNetEndpoint{} +) + +func (*StdNetBind) ParseEndpoint(s string) (Endpoint, error) { + e, err := netip.ParseAddrPort(s) + if err != nil { + return nil, err + } + return &StdNetEndpoint{ + AddrPort: e, + }, nil +} + +func (e *StdNetEndpoint) ClearSrc() { + if e.src != nil { + // Truncate src, no need to reallocate. + e.src = e.src[:0] + } +} + +func (e *StdNetEndpoint) DstIP() netip.Addr { + return e.AddrPort.Addr() +} + +// See control_default,linux, etc for implementations of SrcIP and SrcIfidx. + +func (e *StdNetEndpoint) DstToBytes() []byte { + b, _ := e.AddrPort.MarshalBinary() + return b +} + +func (e *StdNetEndpoint) DstToString() string { + return e.AddrPort.String() +} + +func listenNet(network string, port int) (*net.UDPConn, int, error) { + conn, err := listenConfig().ListenPacket(context.Background(), network, ":"+strconv.Itoa(port)) + if err != nil { + return nil, 0, err + } + + // Retrieve port. + laddr := conn.LocalAddr() + uaddr, err := net.ResolveUDPAddr( + laddr.Network(), + laddr.String(), + ) + if err != nil { + return nil, 0, err + } + return conn.(*net.UDPConn), uaddr.Port, nil +} + +func (s *StdNetBind) Open(uport uint16) ([]ReceiveFunc, uint16, error) { + s.mu.Lock() + defer s.mu.Unlock() + + var err error + var tries int + + if s.ipv4 != nil || s.ipv6 != nil { + return nil, 0, ErrBindAlreadyOpen + } + + // Attempt to open ipv4 and ipv6 listeners on the same port. + // If uport is 0, we can retry on failure. +again: + port := int(uport) + var v4conn, v6conn *net.UDPConn + var v4pc *ipv4.PacketConn + var v6pc *ipv6.PacketConn + + v4conn, port, err = listenNet("udp4", port) + if err != nil && !errors.Is(err, syscall.EAFNOSUPPORT) { + return nil, 0, err + } + + // Listen on the same port as we're using for ipv4. + v6conn, port, err = listenNet("udp6", port) + if uport == 0 && errors.Is(err, syscall.EADDRINUSE) && tries < 100 { + v4conn.Close() + tries++ + goto again + } + if err != nil && !errors.Is(err, syscall.EAFNOSUPPORT) { + v4conn.Close() + return nil, 0, err + } + var fns []ReceiveFunc + if v4conn != nil { + s.ipv4TxOffload, s.ipv4RxOffload = supportsUDPOffload(v4conn) + if runtime.GOOS == "linux" || runtime.GOOS == "android" { + v4pc = ipv4.NewPacketConn(v4conn) + s.ipv4PC = v4pc + } + fns = append(fns, s.makeReceiveIPv4(v4pc, v4conn, s.ipv4RxOffload)) + s.ipv4 = v4conn + } + if v6conn != nil { + s.ipv6TxOffload, s.ipv6RxOffload = supportsUDPOffload(v6conn) + if runtime.GOOS == "linux" || runtime.GOOS == "android" { + v6pc = ipv6.NewPacketConn(v6conn) + s.ipv6PC = v6pc + } + fns = append(fns, s.makeReceiveIPv6(v6pc, v6conn, s.ipv6RxOffload)) + s.ipv6 = v6conn + } + if len(fns) == 0 { + return nil, 0, syscall.EAFNOSUPPORT + } + + return fns, uint16(port), nil +} + +func (s *StdNetBind) putMessages(msgs *[]ipv6.Message) { + for i := range *msgs { + (*msgs)[i].OOB = (*msgs)[i].OOB[:0] + (*msgs)[i] = ipv6.Message{Buffers: (*msgs)[i].Buffers, OOB: (*msgs)[i].OOB} + } + s.msgsPool.Put(msgs) +} + +func (s *StdNetBind) getMessages() *[]ipv6.Message { + return s.msgsPool.Get().(*[]ipv6.Message) +} + +var ( + // If compilation fails here these are no longer the same underlying type. + _ ipv6.Message = ipv4.Message{} +) + +type batchReader interface { + ReadBatch([]ipv6.Message, int) (int, error) +} + +type batchWriter interface { + WriteBatch([]ipv6.Message, int) (int, error) +} + +func (s *StdNetBind) receiveIP( + br batchReader, + conn *net.UDPConn, + rxOffload bool, + bufs [][]byte, + sizes []int, + eps []Endpoint, +) (n int, err error) { + msgs := s.getMessages() + for i := range bufs { + (*msgs)[i].Buffers[0] = bufs[i] + (*msgs)[i].OOB = (*msgs)[i].OOB[:cap((*msgs)[i].OOB)] + } + defer s.putMessages(msgs) + var numMsgs int + if runtime.GOOS == "linux" || runtime.GOOS == "android" { + if rxOffload { + readAt := len(*msgs) - (IdealBatchSize / udpSegmentMaxDatagrams) + numMsgs, err = br.ReadBatch((*msgs)[readAt:], 0) + if err != nil { + return 0, err + } + numMsgs, err = splitCoalescedMessages(*msgs, readAt, getGSOSize) + if err != nil { + return 0, err + } + } else { + numMsgs, err = br.ReadBatch(*msgs, 0) + if err != nil { + return 0, err + } + } + } else { + msg := &(*msgs)[0] + msg.N, msg.NN, _, msg.Addr, err = conn.ReadMsgUDP(msg.Buffers[0], msg.OOB) + if err != nil { + return 0, err + } + numMsgs = 1 + } + for i := 0; i < numMsgs; i++ { + msg := &(*msgs)[i] + sizes[i] = msg.N + if sizes[i] == 0 { + continue + } + addrPort := msg.Addr.(*net.UDPAddr).AddrPort() + ep := &StdNetEndpoint{AddrPort: addrPort} // TODO: remove allocation + getSrcFromControl(msg.OOB[:msg.NN], ep) + eps[i] = ep + } + return numMsgs, nil +} + +func (s *StdNetBind) makeReceiveIPv4(pc *ipv4.PacketConn, conn *net.UDPConn, rxOffload bool) ReceiveFunc { + return func(bufs [][]byte, sizes []int, eps []Endpoint) (n int, err error) { + return s.receiveIP(pc, conn, rxOffload, bufs, sizes, eps) + } +} + +func (s *StdNetBind) makeReceiveIPv6(pc *ipv6.PacketConn, conn *net.UDPConn, rxOffload bool) ReceiveFunc { + return func(bufs [][]byte, sizes []int, eps []Endpoint) (n int, err error) { + return s.receiveIP(pc, conn, rxOffload, bufs, sizes, eps) + } +} + +// TODO: When all Binds handle IdealBatchSize, remove this dynamic function and +// rename the IdealBatchSize constant to BatchSize. +func (s *StdNetBind) BatchSize() int { + if runtime.GOOS == "linux" || runtime.GOOS == "android" { + return IdealBatchSize + } + return 1 +} + +func (s *StdNetBind) Close() error { + s.mu.Lock() + defer s.mu.Unlock() + + var err1, err2 error + if s.ipv4 != nil { + err1 = s.ipv4.Close() + s.ipv4 = nil + s.ipv4PC = nil + } + if s.ipv6 != nil { + err2 = s.ipv6.Close() + s.ipv6 = nil + s.ipv6PC = nil + } + s.blackhole4 = false + s.blackhole6 = false + s.ipv4TxOffload = false + s.ipv4RxOffload = false + s.ipv6TxOffload = false + s.ipv6RxOffload = false + if err1 != nil { + return err1 + } + return err2 +} + +type ErrUDPGSODisabled struct { + onLaddr string + RetryErr error +} + +func (e ErrUDPGSODisabled) Error() string { + return fmt.Sprintf("disabled UDP GSO on %s, NIC(s) may not support checksum offload", e.onLaddr) +} + +func (e ErrUDPGSODisabled) Unwrap() error { + return e.RetryErr +} + +func (s *StdNetBind) Send(bufs [][]byte, endpoint Endpoint) error { + s.mu.Lock() + blackhole := s.blackhole4 + conn := s.ipv4 + offload := s.ipv4TxOffload + br := batchWriter(s.ipv4PC) + is6 := false + if endpoint.DstIP().Is6() { + blackhole = s.blackhole6 + conn = s.ipv6 + br = s.ipv6PC + is6 = true + offload = s.ipv6TxOffload + } + s.mu.Unlock() + + if blackhole { + return nil + } + if conn == nil { + return syscall.EAFNOSUPPORT + } + + msgs := s.getMessages() + defer s.putMessages(msgs) + ua := s.udpAddrPool.Get().(*net.UDPAddr) + defer s.udpAddrPool.Put(ua) + if is6 { + as16 := endpoint.DstIP().As16() + copy(ua.IP, as16[:]) + ua.IP = ua.IP[:16] + } else { + as4 := endpoint.DstIP().As4() + copy(ua.IP, as4[:]) + ua.IP = ua.IP[:4] + } + ua.Port = int(endpoint.(*StdNetEndpoint).Port()) + var ( + retried bool + err error + ) +retry: + if offload { + n := coalesceMessages(ua, endpoint.(*StdNetEndpoint), bufs, *msgs, setGSOSize) + err = s.send(conn, br, (*msgs)[:n]) + if err != nil && offload && errShouldDisableUDPGSO(err) { + offload = false + s.mu.Lock() + if is6 { + s.ipv6TxOffload = false + } else { + s.ipv4TxOffload = false + } + s.mu.Unlock() + retried = true + goto retry + } + } else { + for i := range bufs { + (*msgs)[i].Addr = ua + (*msgs)[i].Buffers[0] = bufs[i] + setSrcControl(&(*msgs)[i].OOB, endpoint.(*StdNetEndpoint)) + } + err = s.send(conn, br, (*msgs)[:len(bufs)]) + } + if retried { + return ErrUDPGSODisabled{onLaddr: conn.LocalAddr().String(), RetryErr: err} + } + return err +} + +func (s *StdNetBind) send(conn *net.UDPConn, pc batchWriter, msgs []ipv6.Message) error { + var ( + n int + err error + start int + ) + if runtime.GOOS == "linux" || runtime.GOOS == "android" { + for { + n, err = pc.WriteBatch(msgs[start:], 0) + if err != nil || n == len(msgs[start:]) { + break + } + start += n + } + } else { + for _, msg := range msgs { + _, _, err = conn.WriteMsgUDP(msg.Buffers[0], msg.OOB, msg.Addr.(*net.UDPAddr)) + if err != nil { + break + } + } + } + return err +} + +const ( + // Exceeding these values results in EMSGSIZE. They account for layer3 and + // layer4 headers. IPv6 does not need to account for itself as the payload + // length field is self excluding. + maxIPv4PayloadLen = 1<<16 - 1 - 20 - 8 + maxIPv6PayloadLen = 1<<16 - 1 - 8 + + // This is a hard limit imposed by the kernel. + udpSegmentMaxDatagrams = 64 +) + +type setGSOFunc func(control *[]byte, gsoSize uint16) + +func coalesceMessages(addr *net.UDPAddr, ep *StdNetEndpoint, bufs [][]byte, msgs []ipv6.Message, setGSO setGSOFunc) int { + var ( + base = -1 // index of msg we are currently coalescing into + gsoSize int // segmentation size of msgs[base] + dgramCnt int // number of dgrams coalesced into msgs[base] + endBatch bool // tracking flag to start a new batch on next iteration of bufs + ) + maxPayloadLen := maxIPv4PayloadLen + if ep.DstIP().Is6() { + maxPayloadLen = maxIPv6PayloadLen + } + for i, buf := range bufs { + if i > 0 { + msgLen := len(buf) + baseLenBefore := len(msgs[base].Buffers[0]) + freeBaseCap := cap(msgs[base].Buffers[0]) - baseLenBefore + if msgLen+baseLenBefore <= maxPayloadLen && + msgLen <= gsoSize && + msgLen <= freeBaseCap && + dgramCnt < udpSegmentMaxDatagrams && + !endBatch { + msgs[base].Buffers[0] = append(msgs[base].Buffers[0], buf...) + if i == len(bufs)-1 { + setGSO(&msgs[base].OOB, uint16(gsoSize)) + } + dgramCnt++ + if msgLen < gsoSize { + // A smaller than gsoSize packet on the tail is legal, but + // it must end the batch. + endBatch = true + } + continue + } + } + if dgramCnt > 1 { + setGSO(&msgs[base].OOB, uint16(gsoSize)) + } + // Reset prior to incrementing base since we are preparing to start a + // new potential batch. + endBatch = false + base++ + gsoSize = len(buf) + setSrcControl(&msgs[base].OOB, ep) + msgs[base].Buffers[0] = buf + msgs[base].Addr = addr + dgramCnt = 1 + } + return base + 1 +} + +type getGSOFunc func(control []byte) (int, error) + +func splitCoalescedMessages(msgs []ipv6.Message, firstMsgAt int, getGSO getGSOFunc) (n int, err error) { + for i := firstMsgAt; i < len(msgs); i++ { + msg := &msgs[i] + if msg.N == 0 { + return n, err + } + var ( + gsoSize int + start int + end = msg.N + numToSplit = 1 + ) + gsoSize, err = getGSO(msg.OOB[:msg.NN]) + if err != nil { + return n, err + } + if gsoSize > 0 { + numToSplit = (msg.N + gsoSize - 1) / gsoSize + end = gsoSize + } + for j := 0; j < numToSplit; j++ { + if n > i { + return n, errors.New("splitting coalesced packet resulted in overflow") + } + copied := copy(msgs[n].Buffers[0], msg.Buffers[0][start:end]) + msgs[n].N = copied + msgs[n].Addr = msg.Addr + start = end + end += gsoSize + if end > msg.N { + end = msg.N + } + n++ + } + if i != n-1 { + // It is legal for bytes to move within msg.Buffers[0] as a result + // of splitting, so we only zero the source msg len when it is not + // the destination of the last split operation above. + msg.N = 0 + } + } + return n, nil +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/bind_windows.go b/vendor/golang.zx2c4.com/wireguard/conn/bind_windows.go new file mode 100644 index 00000000..d5095e00 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/bind_windows.go @@ -0,0 +1,601 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "encoding/binary" + "io" + "net" + "net/netip" + "strconv" + "sync" + "sync/atomic" + "unsafe" + + "golang.org/x/sys/windows" + + "golang.zx2c4.com/wireguard/conn/winrio" +) + +const ( + packetsPerRing = 1024 + bytesPerPacket = 2048 - 32 + receiveSpins = 15 +) + +type ringPacket struct { + addr WinRingEndpoint + data [bytesPerPacket]byte +} + +type ringBuffer struct { + packets uintptr + head, tail uint32 + id winrio.BufferId + iocp windows.Handle + isFull bool + cq winrio.Cq + mu sync.Mutex + overlapped windows.Overlapped +} + +func (rb *ringBuffer) Push() *ringPacket { + for rb.isFull { + panic("ring is full") + } + ret := (*ringPacket)(unsafe.Pointer(rb.packets + (uintptr(rb.tail%packetsPerRing) * unsafe.Sizeof(ringPacket{})))) + rb.tail += 1 + if rb.tail%packetsPerRing == rb.head%packetsPerRing { + rb.isFull = true + } + return ret +} + +func (rb *ringBuffer) Return(count uint32) { + if rb.head%packetsPerRing == rb.tail%packetsPerRing && !rb.isFull { + return + } + rb.head += count + rb.isFull = false +} + +type afWinRingBind struct { + sock windows.Handle + rx, tx ringBuffer + rq winrio.Rq + mu sync.Mutex + blackhole bool +} + +// WinRingBind uses Windows registered I/O for fast ring buffered networking. +type WinRingBind struct { + v4, v6 afWinRingBind + mu sync.RWMutex + isOpen atomic.Uint32 // 0, 1, or 2 +} + +func NewDefaultBind() Bind { return NewWinRingBind() } + +func NewWinRingBind() Bind { + if !winrio.Initialize() { + return NewStdNetBind() + } + return new(WinRingBind) +} + +type WinRingEndpoint struct { + family uint16 + data [30]byte +} + +var ( + _ Bind = (*WinRingBind)(nil) + _ Endpoint = (*WinRingEndpoint)(nil) +) + +func (*WinRingBind) ParseEndpoint(s string) (Endpoint, error) { + host, port, err := net.SplitHostPort(s) + if err != nil { + return nil, err + } + host16, err := windows.UTF16PtrFromString(host) + if err != nil { + return nil, err + } + port16, err := windows.UTF16PtrFromString(port) + if err != nil { + return nil, err + } + hints := windows.AddrinfoW{ + Flags: windows.AI_NUMERICHOST, + Family: windows.AF_UNSPEC, + Socktype: windows.SOCK_DGRAM, + Protocol: windows.IPPROTO_UDP, + } + var addrinfo *windows.AddrinfoW + err = windows.GetAddrInfoW(host16, port16, &hints, &addrinfo) + if err != nil { + return nil, err + } + defer windows.FreeAddrInfoW(addrinfo) + if (addrinfo.Family != windows.AF_INET && addrinfo.Family != windows.AF_INET6) || addrinfo.Addrlen > unsafe.Sizeof(WinRingEndpoint{}) { + return nil, windows.ERROR_INVALID_ADDRESS + } + var dst [unsafe.Sizeof(WinRingEndpoint{})]byte + copy(dst[:], unsafe.Slice((*byte)(unsafe.Pointer(addrinfo.Addr)), addrinfo.Addrlen)) + return (*WinRingEndpoint)(unsafe.Pointer(&dst[0])), nil +} + +func (*WinRingEndpoint) ClearSrc() {} + +func (e *WinRingEndpoint) DstIP() netip.Addr { + switch e.family { + case windows.AF_INET: + return netip.AddrFrom4(*(*[4]byte)(e.data[2:6])) + case windows.AF_INET6: + return netip.AddrFrom16(*(*[16]byte)(e.data[6:22])) + } + return netip.Addr{} +} + +func (e *WinRingEndpoint) SrcIP() netip.Addr { + return netip.Addr{} // not supported +} + +func (e *WinRingEndpoint) DstToBytes() []byte { + switch e.family { + case windows.AF_INET: + b := make([]byte, 0, 6) + b = append(b, e.data[2:6]...) + b = append(b, e.data[1], e.data[0]) + return b + case windows.AF_INET6: + b := make([]byte, 0, 18) + b = append(b, e.data[6:22]...) + b = append(b, e.data[1], e.data[0]) + return b + } + return nil +} + +func (e *WinRingEndpoint) DstToString() string { + switch e.family { + case windows.AF_INET: + return netip.AddrPortFrom(netip.AddrFrom4(*(*[4]byte)(e.data[2:6])), binary.BigEndian.Uint16(e.data[0:2])).String() + case windows.AF_INET6: + var zone string + if scope := *(*uint32)(unsafe.Pointer(&e.data[22])); scope > 0 { + zone = strconv.FormatUint(uint64(scope), 10) + } + return netip.AddrPortFrom(netip.AddrFrom16(*(*[16]byte)(e.data[6:22])).WithZone(zone), binary.BigEndian.Uint16(e.data[0:2])).String() + } + return "" +} + +func (e *WinRingEndpoint) SrcToString() string { + return "" +} + +func (ring *ringBuffer) CloseAndZero() { + if ring.cq != 0 { + winrio.CloseCompletionQueue(ring.cq) + ring.cq = 0 + } + if ring.iocp != 0 { + windows.CloseHandle(ring.iocp) + ring.iocp = 0 + } + if ring.id != 0 { + winrio.DeregisterBuffer(ring.id) + ring.id = 0 + } + if ring.packets != 0 { + windows.VirtualFree(ring.packets, 0, windows.MEM_RELEASE) + ring.packets = 0 + } + ring.head = 0 + ring.tail = 0 + ring.isFull = false +} + +func (bind *afWinRingBind) CloseAndZero() { + bind.rx.CloseAndZero() + bind.tx.CloseAndZero() + if bind.sock != 0 { + windows.CloseHandle(bind.sock) + bind.sock = 0 + } + bind.blackhole = false +} + +func (bind *WinRingBind) closeAndZero() { + bind.isOpen.Store(0) + bind.v4.CloseAndZero() + bind.v6.CloseAndZero() +} + +func (ring *ringBuffer) Open() error { + var err error + packetsLen := unsafe.Sizeof(ringPacket{}) * packetsPerRing + ring.packets, err = windows.VirtualAlloc(0, packetsLen, windows.MEM_COMMIT|windows.MEM_RESERVE, windows.PAGE_READWRITE) + if err != nil { + return err + } + ring.id, err = winrio.RegisterPointer(unsafe.Pointer(ring.packets), uint32(packetsLen)) + if err != nil { + return err + } + ring.iocp, err = windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0) + if err != nil { + return err + } + ring.cq, err = winrio.CreateIOCPCompletionQueue(packetsPerRing, ring.iocp, 0, &ring.overlapped) + if err != nil { + return err + } + return nil +} + +func (bind *afWinRingBind) Open(family int32, sa windows.Sockaddr) (windows.Sockaddr, error) { + var err error + bind.sock, err = winrio.Socket(family, windows.SOCK_DGRAM, windows.IPPROTO_UDP) + if err != nil { + return nil, err + } + err = bind.rx.Open() + if err != nil { + return nil, err + } + err = bind.tx.Open() + if err != nil { + return nil, err + } + bind.rq, err = winrio.CreateRequestQueue(bind.sock, packetsPerRing, 1, packetsPerRing, 1, bind.rx.cq, bind.tx.cq, 0) + if err != nil { + return nil, err + } + err = windows.Bind(bind.sock, sa) + if err != nil { + return nil, err + } + sa, err = windows.Getsockname(bind.sock) + if err != nil { + return nil, err + } + return sa, nil +} + +func (bind *WinRingBind) Open(port uint16) (recvFns []ReceiveFunc, selectedPort uint16, err error) { + bind.mu.Lock() + defer bind.mu.Unlock() + defer func() { + if err != nil { + bind.closeAndZero() + } + }() + if bind.isOpen.Load() != 0 { + return nil, 0, ErrBindAlreadyOpen + } + var sa windows.Sockaddr + sa, err = bind.v4.Open(windows.AF_INET, &windows.SockaddrInet4{Port: int(port)}) + if err != nil { + return nil, 0, err + } + sa, err = bind.v6.Open(windows.AF_INET6, &windows.SockaddrInet6{Port: sa.(*windows.SockaddrInet4).Port}) + if err != nil { + return nil, 0, err + } + selectedPort = uint16(sa.(*windows.SockaddrInet6).Port) + for i := 0; i < packetsPerRing; i++ { + err = bind.v4.InsertReceiveRequest() + if err != nil { + return nil, 0, err + } + err = bind.v6.InsertReceiveRequest() + if err != nil { + return nil, 0, err + } + } + bind.isOpen.Store(1) + return []ReceiveFunc{bind.receiveIPv4, bind.receiveIPv6}, selectedPort, err +} + +func (bind *WinRingBind) Close() error { + bind.mu.RLock() + if bind.isOpen.Load() != 1 { + bind.mu.RUnlock() + return nil + } + bind.isOpen.Store(2) + windows.PostQueuedCompletionStatus(bind.v4.rx.iocp, 0, 0, nil) + windows.PostQueuedCompletionStatus(bind.v4.tx.iocp, 0, 0, nil) + windows.PostQueuedCompletionStatus(bind.v6.rx.iocp, 0, 0, nil) + windows.PostQueuedCompletionStatus(bind.v6.tx.iocp, 0, 0, nil) + bind.mu.RUnlock() + bind.mu.Lock() + defer bind.mu.Unlock() + bind.closeAndZero() + return nil +} + +// TODO: When all Binds handle IdealBatchSize, remove this dynamic function and +// rename the IdealBatchSize constant to BatchSize. +func (bind *WinRingBind) BatchSize() int { + // TODO: implement batching in and out of the ring + return 1 +} + +func (bind *WinRingBind) SetMark(mark uint32) error { + return nil +} + +func (bind *afWinRingBind) InsertReceiveRequest() error { + packet := bind.rx.Push() + dataBuffer := &winrio.Buffer{ + Id: bind.rx.id, + Offset: uint32(uintptr(unsafe.Pointer(&packet.data[0])) - bind.rx.packets), + Length: uint32(len(packet.data)), + } + addressBuffer := &winrio.Buffer{ + Id: bind.rx.id, + Offset: uint32(uintptr(unsafe.Pointer(&packet.addr)) - bind.rx.packets), + Length: uint32(unsafe.Sizeof(packet.addr)), + } + bind.mu.Lock() + defer bind.mu.Unlock() + return winrio.ReceiveEx(bind.rq, dataBuffer, 1, nil, addressBuffer, nil, nil, 0, uintptr(unsafe.Pointer(packet))) +} + +//go:linkname procyield runtime.procyield +func procyield(cycles uint32) + +func (bind *afWinRingBind) Receive(buf []byte, isOpen *atomic.Uint32) (int, Endpoint, error) { + if isOpen.Load() != 1 { + return 0, nil, net.ErrClosed + } + bind.rx.mu.Lock() + defer bind.rx.mu.Unlock() + + var err error + var count uint32 + var results [1]winrio.Result +retry: + count = 0 + for tries := 0; count == 0 && tries < receiveSpins; tries++ { + if tries > 0 { + if isOpen.Load() != 1 { + return 0, nil, net.ErrClosed + } + procyield(1) + } + count = winrio.DequeueCompletion(bind.rx.cq, results[:]) + } + if count == 0 { + err = winrio.Notify(bind.rx.cq) + if err != nil { + return 0, nil, err + } + var bytes uint32 + var key uintptr + var overlapped *windows.Overlapped + err = windows.GetQueuedCompletionStatus(bind.rx.iocp, &bytes, &key, &overlapped, windows.INFINITE) + if err != nil { + return 0, nil, err + } + if isOpen.Load() != 1 { + return 0, nil, net.ErrClosed + } + count = winrio.DequeueCompletion(bind.rx.cq, results[:]) + if count == 0 { + return 0, nil, io.ErrNoProgress + } + } + bind.rx.Return(1) + err = bind.InsertReceiveRequest() + if err != nil { + return 0, nil, err + } + // We limit the MTU well below the 65k max for practicality, but this means a remote host can still send us + // huge packets. Just try again when this happens. The infinite loop this could cause is still limited to + // attacker bandwidth, just like the rest of the receive path. + if windows.Errno(results[0].Status) == windows.WSAEMSGSIZE { + if isOpen.Load() != 1 { + return 0, nil, net.ErrClosed + } + goto retry + } + if results[0].Status != 0 { + return 0, nil, windows.Errno(results[0].Status) + } + packet := (*ringPacket)(unsafe.Pointer(uintptr(results[0].RequestContext))) + ep := packet.addr + n := copy(buf, packet.data[:results[0].BytesTransferred]) + return n, &ep, nil +} + +func (bind *WinRingBind) receiveIPv4(bufs [][]byte, sizes []int, eps []Endpoint) (int, error) { + bind.mu.RLock() + defer bind.mu.RUnlock() + n, ep, err := bind.v4.Receive(bufs[0], &bind.isOpen) + sizes[0] = n + eps[0] = ep + return 1, err +} + +func (bind *WinRingBind) receiveIPv6(bufs [][]byte, sizes []int, eps []Endpoint) (int, error) { + bind.mu.RLock() + defer bind.mu.RUnlock() + n, ep, err := bind.v6.Receive(bufs[0], &bind.isOpen) + sizes[0] = n + eps[0] = ep + return 1, err +} + +func (bind *afWinRingBind) Send(buf []byte, nend *WinRingEndpoint, isOpen *atomic.Uint32) error { + if isOpen.Load() != 1 { + return net.ErrClosed + } + if len(buf) > bytesPerPacket { + return io.ErrShortBuffer + } + bind.tx.mu.Lock() + defer bind.tx.mu.Unlock() + var results [packetsPerRing]winrio.Result + count := winrio.DequeueCompletion(bind.tx.cq, results[:]) + if count == 0 && bind.tx.isFull { + err := winrio.Notify(bind.tx.cq) + if err != nil { + return err + } + var bytes uint32 + var key uintptr + var overlapped *windows.Overlapped + err = windows.GetQueuedCompletionStatus(bind.tx.iocp, &bytes, &key, &overlapped, windows.INFINITE) + if err != nil { + return err + } + if isOpen.Load() != 1 { + return net.ErrClosed + } + count = winrio.DequeueCompletion(bind.tx.cq, results[:]) + if count == 0 { + return io.ErrNoProgress + } + } + if count > 0 { + bind.tx.Return(count) + } + packet := bind.tx.Push() + packet.addr = *nend + copy(packet.data[:], buf) + dataBuffer := &winrio.Buffer{ + Id: bind.tx.id, + Offset: uint32(uintptr(unsafe.Pointer(&packet.data[0])) - bind.tx.packets), + Length: uint32(len(buf)), + } + addressBuffer := &winrio.Buffer{ + Id: bind.tx.id, + Offset: uint32(uintptr(unsafe.Pointer(&packet.addr)) - bind.tx.packets), + Length: uint32(unsafe.Sizeof(packet.addr)), + } + bind.mu.Lock() + defer bind.mu.Unlock() + return winrio.SendEx(bind.rq, dataBuffer, 1, nil, addressBuffer, nil, nil, 0, 0) +} + +func (bind *WinRingBind) Send(bufs [][]byte, endpoint Endpoint) error { + nend, ok := endpoint.(*WinRingEndpoint) + if !ok { + return ErrWrongEndpointType + } + bind.mu.RLock() + defer bind.mu.RUnlock() + for _, buf := range bufs { + switch nend.family { + case windows.AF_INET: + if bind.v4.blackhole { + continue + } + if err := bind.v4.Send(buf, nend, &bind.isOpen); err != nil { + return err + } + case windows.AF_INET6: + if bind.v6.blackhole { + continue + } + if err := bind.v6.Send(buf, nend, &bind.isOpen); err != nil { + return err + } + } + } + return nil +} + +func (s *StdNetBind) BindSocketToInterface4(interfaceIndex uint32, blackhole bool) error { + s.mu.Lock() + defer s.mu.Unlock() + sysconn, err := s.ipv4.SyscallConn() + if err != nil { + return err + } + err2 := sysconn.Control(func(fd uintptr) { + err = bindSocketToInterface4(windows.Handle(fd), interfaceIndex) + }) + if err2 != nil { + return err2 + } + if err != nil { + return err + } + s.blackhole4 = blackhole + return nil +} + +func (s *StdNetBind) BindSocketToInterface6(interfaceIndex uint32, blackhole bool) error { + s.mu.Lock() + defer s.mu.Unlock() + sysconn, err := s.ipv6.SyscallConn() + if err != nil { + return err + } + err2 := sysconn.Control(func(fd uintptr) { + err = bindSocketToInterface6(windows.Handle(fd), interfaceIndex) + }) + if err2 != nil { + return err2 + } + if err != nil { + return err + } + s.blackhole6 = blackhole + return nil +} + +func (bind *WinRingBind) BindSocketToInterface4(interfaceIndex uint32, blackhole bool) error { + bind.mu.RLock() + defer bind.mu.RUnlock() + if bind.isOpen.Load() != 1 { + return net.ErrClosed + } + err := bindSocketToInterface4(bind.v4.sock, interfaceIndex) + if err != nil { + return err + } + bind.v4.blackhole = blackhole + return nil +} + +func (bind *WinRingBind) BindSocketToInterface6(interfaceIndex uint32, blackhole bool) error { + bind.mu.RLock() + defer bind.mu.RUnlock() + if bind.isOpen.Load() != 1 { + return net.ErrClosed + } + err := bindSocketToInterface6(bind.v6.sock, interfaceIndex) + if err != nil { + return err + } + bind.v6.blackhole = blackhole + return nil +} + +func bindSocketToInterface4(handle windows.Handle, interfaceIndex uint32) error { + const IP_UNICAST_IF = 31 + /* MSDN says for IPv4 this needs to be in net byte order, so that it's like an IP address with leading zeros. */ + var bytes [4]byte + binary.BigEndian.PutUint32(bytes[:], interfaceIndex) + interfaceIndex = *(*uint32)(unsafe.Pointer(&bytes[0])) + err := windows.SetsockoptInt(handle, windows.IPPROTO_IP, IP_UNICAST_IF, int(interfaceIndex)) + if err != nil { + return err + } + return nil +} + +func bindSocketToInterface6(handle windows.Handle, interfaceIndex uint32) error { + const IPV6_UNICAST_IF = 31 + return windows.SetsockoptInt(handle, windows.IPPROTO_IPV6, IPV6_UNICAST_IF, int(interfaceIndex)) +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/boundif_android.go b/vendor/golang.zx2c4.com/wireguard/conn/boundif_android.go new file mode 100644 index 00000000..dd3ca5b0 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/boundif_android.go @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +func (s *StdNetBind) PeekLookAtSocketFd4() (fd int, err error) { + sysconn, err := s.ipv4.SyscallConn() + if err != nil { + return -1, err + } + err = sysconn.Control(func(f uintptr) { + fd = int(f) + }) + if err != nil { + return -1, err + } + return +} + +func (s *StdNetBind) PeekLookAtSocketFd6() (fd int, err error) { + sysconn, err := s.ipv6.SyscallConn() + if err != nil { + return -1, err + } + err = sysconn.Control(func(f uintptr) { + fd = int(f) + }) + if err != nil { + return -1, err + } + return +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/conn.go b/vendor/golang.zx2c4.com/wireguard/conn/conn.go new file mode 100644 index 00000000..a1f57d2b --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/conn.go @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +// Package conn implements WireGuard's network connections. +package conn + +import ( + "errors" + "fmt" + "net/netip" + "reflect" + "runtime" + "strings" +) + +const ( + IdealBatchSize = 128 // maximum number of packets handled per read and write +) + +// A ReceiveFunc receives at least one packet from the network and writes them +// into packets. On a successful read it returns the number of elements of +// sizes, packets, and endpoints that should be evaluated. Some elements of +// sizes may be zero, and callers should ignore them. Callers must pass a sizes +// and eps slice with a length greater than or equal to the length of packets. +// These lengths must not exceed the length of the associated Bind.BatchSize(). +type ReceiveFunc func(packets [][]byte, sizes []int, eps []Endpoint) (n int, err error) + +// A Bind listens on a port for both IPv6 and IPv4 UDP traffic. +// +// A Bind interface may also be a PeekLookAtSocketFd or BindSocketToInterface, +// depending on the platform-specific implementation. +type Bind interface { + // Open puts the Bind into a listening state on a given port and reports the actual + // port that it bound to. Passing zero results in a random selection. + // fns is the set of functions that will be called to receive packets. + Open(port uint16) (fns []ReceiveFunc, actualPort uint16, err error) + + // Close closes the Bind listener. + // All fns returned by Open must return net.ErrClosed after a call to Close. + Close() error + + // SetMark sets the mark for each packet sent through this Bind. + // This mark is passed to the kernel as the socket option SO_MARK. + SetMark(mark uint32) error + + // Send writes one or more packets in bufs to address ep. The length of + // bufs must not exceed BatchSize(). + Send(bufs [][]byte, ep Endpoint) error + + // ParseEndpoint creates a new endpoint from a string. + ParseEndpoint(s string) (Endpoint, error) + + // BatchSize is the number of buffers expected to be passed to + // the ReceiveFuncs, and the maximum expected to be passed to SendBatch. + BatchSize() int +} + +// BindSocketToInterface is implemented by Bind objects that support being +// tied to a single network interface. Used by wireguard-windows. +type BindSocketToInterface interface { + BindSocketToInterface4(interfaceIndex uint32, blackhole bool) error + BindSocketToInterface6(interfaceIndex uint32, blackhole bool) error +} + +// PeekLookAtSocketFd is implemented by Bind objects that support having their +// file descriptor peeked at. Used by wireguard-android. +type PeekLookAtSocketFd interface { + PeekLookAtSocketFd4() (fd int, err error) + PeekLookAtSocketFd6() (fd int, err error) +} + +// An Endpoint maintains the source/destination caching for a peer. +// +// dst: the remote address of a peer ("endpoint" in uapi terminology) +// src: the local address from which datagrams originate going to the peer +type Endpoint interface { + ClearSrc() // clears the source address + SrcToString() string // returns the local source address (ip:port) + DstToString() string // returns the destination address (ip:port) + DstToBytes() []byte // used for mac2 cookie calculations + DstIP() netip.Addr + SrcIP() netip.Addr +} + +var ( + ErrBindAlreadyOpen = errors.New("bind is already open") + ErrWrongEndpointType = errors.New("endpoint type does not correspond with bind type") +) + +func (fn ReceiveFunc) PrettyName() string { + name := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name() + // 0. cheese/taco.beansIPv6.func12.func21218-fm + name = strings.TrimSuffix(name, "-fm") + // 1. cheese/taco.beansIPv6.func12.func21218 + if idx := strings.LastIndexByte(name, '/'); idx != -1 { + name = name[idx+1:] + // 2. taco.beansIPv6.func12.func21218 + } + for { + var idx int + for idx = len(name) - 1; idx >= 0; idx-- { + if name[idx] < '0' || name[idx] > '9' { + break + } + } + if idx == len(name)-1 { + break + } + const dotFunc = ".func" + if !strings.HasSuffix(name[:idx+1], dotFunc) { + break + } + name = name[:idx+1-len(dotFunc)] + // 3. taco.beansIPv6.func12 + // 4. taco.beansIPv6 + } + if idx := strings.LastIndexByte(name, '.'); idx != -1 { + name = name[idx+1:] + // 5. beansIPv6 + } + if name == "" { + return fmt.Sprintf("%p", fn) + } + if strings.HasSuffix(name, "IPv4") { + return "v4" + } + if strings.HasSuffix(name, "IPv6") { + return "v6" + } + return name +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/controlfns.go b/vendor/golang.zx2c4.com/wireguard/conn/controlfns.go new file mode 100644 index 00000000..4f7d90fa --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/controlfns.go @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "net" + "syscall" +) + +// UDP socket read/write buffer size (7MB). The value of 7MB is chosen as it is +// the max supported by a default configuration of macOS. Some platforms will +// silently clamp the value to other maximums, such as linux clamping to +// net.core.{r,w}mem_max (see _linux.go for additional implementation that works +// around this limitation) +const socketBufferSize = 7 << 20 + +// controlFn is the callback function signature from net.ListenConfig.Control. +// It is used to apply platform specific configuration to the socket prior to +// bind. +type controlFn func(network, address string, c syscall.RawConn) error + +// controlFns is a list of functions that are called from the listen config +// that can apply socket options. +var controlFns = []controlFn{} + +// listenConfig returns a net.ListenConfig that applies the controlFns to the +// socket prior to bind. This is used to apply socket buffer sizing and packet +// information OOB configuration for sticky sockets. +func listenConfig() *net.ListenConfig { + return &net.ListenConfig{ + Control: func(network, address string, c syscall.RawConn) error { + for _, fn := range controlFns { + if err := fn(network, address, c); err != nil { + return err + } + } + return nil + }, + } +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/controlfns_linux.go b/vendor/golang.zx2c4.com/wireguard/conn/controlfns_linux.go new file mode 100644 index 00000000..f6ab1d2e --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/controlfns_linux.go @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "fmt" + "runtime" + "syscall" + + "golang.org/x/sys/unix" +) + +func init() { + controlFns = append(controlFns, + + // Attempt to set the socket buffer size beyond net.core.{r,w}mem_max by + // using SO_*BUFFORCE. This requires CAP_NET_ADMIN, and is allowed here to + // fail silently - the result of failure is lower performance on very fast + // links or high latency links. + func(network, address string, c syscall.RawConn) error { + return c.Control(func(fd uintptr) { + // Set up to *mem_max + _ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUF, socketBufferSize) + _ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUF, socketBufferSize) + // Set beyond *mem_max if CAP_NET_ADMIN + _ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, socketBufferSize) + _ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, socketBufferSize) + }) + }, + + // Enable receiving of the packet information (IP_PKTINFO for IPv4, + // IPV6_PKTINFO for IPv6) that is used to implement sticky socket support. + func(network, address string, c syscall.RawConn) error { + var err error + switch network { + case "udp4": + if runtime.GOOS != "android" { + c.Control(func(fd uintptr) { + err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_PKTINFO, 1) + }) + } + case "udp6": + c.Control(func(fd uintptr) { + if runtime.GOOS != "android" { + err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_RECVPKTINFO, 1) + if err != nil { + return + } + } + err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_V6ONLY, 1) + }) + default: + err = fmt.Errorf("unhandled network: %s: %w", network, unix.EINVAL) + } + return err + }, + + // Attempt to enable UDP_GRO + func(network, address string, c syscall.RawConn) error { + c.Control(func(fd uintptr) { + _ = unix.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO, 1) + }) + return nil + }, + ) +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/controlfns_unix.go b/vendor/golang.zx2c4.com/wireguard/conn/controlfns_unix.go new file mode 100644 index 00000000..91692c0a --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/controlfns_unix.go @@ -0,0 +1,35 @@ +//go:build !windows && !linux && !wasm + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +func init() { + controlFns = append(controlFns, + func(network, address string, c syscall.RawConn) error { + return c.Control(func(fd uintptr) { + _ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUF, socketBufferSize) + _ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUF, socketBufferSize) + }) + }, + + func(network, address string, c syscall.RawConn) error { + var err error + if network == "udp6" { + c.Control(func(fd uintptr) { + err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_V6ONLY, 1) + }) + } + return err + }, + ) +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/controlfns_windows.go b/vendor/golang.zx2c4.com/wireguard/conn/controlfns_windows.go new file mode 100644 index 00000000..c3bdf7d3 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/controlfns_windows.go @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "syscall" + + "golang.org/x/sys/windows" +) + +func init() { + controlFns = append(controlFns, + func(network, address string, c syscall.RawConn) error { + return c.Control(func(fd uintptr) { + _ = windows.SetsockoptInt(windows.Handle(fd), windows.SOL_SOCKET, windows.SO_RCVBUF, socketBufferSize) + _ = windows.SetsockoptInt(windows.Handle(fd), windows.SOL_SOCKET, windows.SO_SNDBUF, socketBufferSize) + }) + }, + ) +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/default.go b/vendor/golang.zx2c4.com/wireguard/conn/default.go new file mode 100644 index 00000000..b6f761b9 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/default.go @@ -0,0 +1,10 @@ +//go:build !windows + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +func NewDefaultBind() Bind { return NewStdNetBind() } diff --git a/vendor/golang.zx2c4.com/wireguard/conn/errors_default.go b/vendor/golang.zx2c4.com/wireguard/conn/errors_default.go new file mode 100644 index 00000000..f1e5b90e --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/errors_default.go @@ -0,0 +1,12 @@ +//go:build !linux + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +func errShouldDisableUDPGSO(err error) bool { + return false +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/errors_linux.go b/vendor/golang.zx2c4.com/wireguard/conn/errors_linux.go new file mode 100644 index 00000000..8e61000f --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/errors_linux.go @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "errors" + "os" + + "golang.org/x/sys/unix" +) + +func errShouldDisableUDPGSO(err error) bool { + var serr *os.SyscallError + if errors.As(err, &serr) { + // EIO is returned by udp_send_skb() if the device driver does not have + // tx checksumming enabled, which is a hard requirement of UDP_SEGMENT. + // See: + // https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/tree/man7/udp.7?id=806eabd74910447f21005160e90957bde4db0183#n228 + // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/net/ipv4/udp.c?h=v6.2&id=c9c3395d5e3dcc6daee66c6908354d47bf98cb0c#n942 + return serr.Err == unix.EIO + } + return false +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/features_default.go b/vendor/golang.zx2c4.com/wireguard/conn/features_default.go new file mode 100644 index 00000000..d53ff5f7 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/features_default.go @@ -0,0 +1,15 @@ +//go:build !linux +// +build !linux + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import "net" + +func supportsUDPOffload(conn *net.UDPConn) (txOffload, rxOffload bool) { + return +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/features_linux.go b/vendor/golang.zx2c4.com/wireguard/conn/features_linux.go new file mode 100644 index 00000000..8959d935 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/features_linux.go @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "net" + + "golang.org/x/sys/unix" +) + +func supportsUDPOffload(conn *net.UDPConn) (txOffload, rxOffload bool) { + rc, err := conn.SyscallConn() + if err != nil { + return + } + err = rc.Control(func(fd uintptr) { + _, errSyscall := unix.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT) + txOffload = errSyscall == nil + opt, errSyscall := unix.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO) + rxOffload = errSyscall == nil && opt == 1 + }) + if err != nil { + return false, false + } + return txOffload, rxOffload +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/gso_default.go b/vendor/golang.zx2c4.com/wireguard/conn/gso_default.go new file mode 100644 index 00000000..57780dbb --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/gso_default.go @@ -0,0 +1,21 @@ +//go:build !linux + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +// getGSOSize parses control for UDP_GRO and if found returns its GSO size data. +func getGSOSize(control []byte) (int, error) { + return 0, nil +} + +// setGSOSize sets a UDP_SEGMENT in control based on gsoSize. +func setGSOSize(control *[]byte, gsoSize uint16) { +} + +// gsoControlSize returns the recommended buffer size for pooling sticky and UDP +// offloading control data. +const gsoControlSize = 0 diff --git a/vendor/golang.zx2c4.com/wireguard/conn/gso_linux.go b/vendor/golang.zx2c4.com/wireguard/conn/gso_linux.go new file mode 100644 index 00000000..8596b292 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/gso_linux.go @@ -0,0 +1,65 @@ +//go:build linux + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "fmt" + "unsafe" + + "golang.org/x/sys/unix" +) + +const ( + sizeOfGSOData = 2 +) + +// getGSOSize parses control for UDP_GRO and if found returns its GSO size data. +func getGSOSize(control []byte) (int, error) { + var ( + hdr unix.Cmsghdr + data []byte + rem = control + err error + ) + + for len(rem) > unix.SizeofCmsghdr { + hdr, data, rem, err = unix.ParseOneSocketControlMessage(rem) + if err != nil { + return 0, fmt.Errorf("error parsing socket control message: %w", err) + } + if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= sizeOfGSOData { + var gso uint16 + copy(unsafe.Slice((*byte)(unsafe.Pointer(&gso)), sizeOfGSOData), data[:sizeOfGSOData]) + return int(gso), nil + } + } + return 0, nil +} + +// setGSOSize sets a UDP_SEGMENT in control based on gsoSize. It leaves existing +// data in control untouched. +func setGSOSize(control *[]byte, gsoSize uint16) { + existingLen := len(*control) + avail := cap(*control) - existingLen + space := unix.CmsgSpace(sizeOfGSOData) + if avail < space { + return + } + *control = (*control)[:cap(*control)] + gsoControl := (*control)[existingLen:] + hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(gsoControl)[0])) + hdr.Level = unix.SOL_UDP + hdr.Type = unix.UDP_SEGMENT + hdr.SetLen(unix.CmsgLen(sizeOfGSOData)) + copy((gsoControl)[unix.CmsgLen(0):], unsafe.Slice((*byte)(unsafe.Pointer(&gsoSize)), sizeOfGSOData)) + *control = (*control)[:existingLen+space] +} + +// gsoControlSize returns the recommended buffer size for pooling UDP +// offloading control data. +var gsoControlSize = unix.CmsgSpace(sizeOfGSOData) diff --git a/vendor/golang.zx2c4.com/wireguard/conn/mark_default.go b/vendor/golang.zx2c4.com/wireguard/conn/mark_default.go new file mode 100644 index 00000000..31023844 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/mark_default.go @@ -0,0 +1,12 @@ +//go:build !linux && !openbsd && !freebsd + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +func (s *StdNetBind) SetMark(mark uint32) error { + return nil +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/mark_unix.go b/vendor/golang.zx2c4.com/wireguard/conn/mark_unix.go new file mode 100644 index 00000000..d9e46eea --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/mark_unix.go @@ -0,0 +1,65 @@ +//go:build linux || openbsd || freebsd + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "runtime" + + "golang.org/x/sys/unix" +) + +var fwmarkIoctl int + +func init() { + switch runtime.GOOS { + case "linux", "android": + fwmarkIoctl = 36 /* unix.SO_MARK */ + case "freebsd": + fwmarkIoctl = 0x1015 /* unix.SO_USER_COOKIE */ + case "openbsd": + fwmarkIoctl = 0x1021 /* unix.SO_RTABLE */ + } +} + +func (s *StdNetBind) SetMark(mark uint32) error { + var operr error + if fwmarkIoctl == 0 { + return nil + } + if s.ipv4 != nil { + fd, err := s.ipv4.SyscallConn() + if err != nil { + return err + } + err = fd.Control(func(fd uintptr) { + operr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, fwmarkIoctl, int(mark)) + }) + if err == nil { + err = operr + } + if err != nil { + return err + } + } + if s.ipv6 != nil { + fd, err := s.ipv6.SyscallConn() + if err != nil { + return err + } + err = fd.Control(func(fd uintptr) { + operr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, fwmarkIoctl, int(mark)) + }) + if err == nil { + err = operr + } + if err != nil { + return err + } + } + return nil +} diff --git a/vendor/golang.zx2c4.com/wireguard/conn/sticky_default.go b/vendor/golang.zx2c4.com/wireguard/conn/sticky_default.go new file mode 100644 index 00000000..0b213867 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/sticky_default.go @@ -0,0 +1,42 @@ +//go:build !linux || android + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import "net/netip" + +func (e *StdNetEndpoint) SrcIP() netip.Addr { + return netip.Addr{} +} + +func (e *StdNetEndpoint) SrcIfidx() int32 { + return 0 +} + +func (e *StdNetEndpoint) SrcToString() string { + return "" +} + +// TODO: macOS, FreeBSD and other BSDs likely do support the sticky sockets +// {get,set}srcControl feature set, but use alternatively named flags and need +// ports and require testing. + +// getSrcFromControl parses the control for PKTINFO and if found updates ep with +// the source information found. +func getSrcFromControl(control []byte, ep *StdNetEndpoint) { +} + +// setSrcControl parses the control for PKTINFO and if found updates ep with +// the source information found. +func setSrcControl(control *[]byte, ep *StdNetEndpoint) { +} + +// stickyControlSize returns the recommended buffer size for pooling sticky +// offloading control data. +const stickyControlSize = 0 + +const StdNetSupportsStickySockets = false diff --git a/vendor/golang.zx2c4.com/wireguard/conn/sticky_linux.go b/vendor/golang.zx2c4.com/wireguard/conn/sticky_linux.go new file mode 100644 index 00000000..8e206e90 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/sticky_linux.go @@ -0,0 +1,112 @@ +//go:build linux && !android + +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package conn + +import ( + "net/netip" + "unsafe" + + "golang.org/x/sys/unix" +) + +func (e *StdNetEndpoint) SrcIP() netip.Addr { + switch len(e.src) { + case unix.CmsgSpace(unix.SizeofInet4Pktinfo): + info := (*unix.Inet4Pktinfo)(unsafe.Pointer(&e.src[unix.CmsgLen(0)])) + return netip.AddrFrom4(info.Spec_dst) + case unix.CmsgSpace(unix.SizeofInet6Pktinfo): + info := (*unix.Inet6Pktinfo)(unsafe.Pointer(&e.src[unix.CmsgLen(0)])) + // TODO: set zone. in order to do so we need to check if the address is + // link local, and if it is perform a syscall to turn the ifindex into a + // zone string because netip uses string zones. + return netip.AddrFrom16(info.Addr) + } + return netip.Addr{} +} + +func (e *StdNetEndpoint) SrcIfidx() int32 { + switch len(e.src) { + case unix.CmsgSpace(unix.SizeofInet4Pktinfo): + info := (*unix.Inet4Pktinfo)(unsafe.Pointer(&e.src[unix.CmsgLen(0)])) + return info.Ifindex + case unix.CmsgSpace(unix.SizeofInet6Pktinfo): + info := (*unix.Inet6Pktinfo)(unsafe.Pointer(&e.src[unix.CmsgLen(0)])) + return int32(info.Ifindex) + } + return 0 +} + +func (e *StdNetEndpoint) SrcToString() string { + return e.SrcIP().String() +} + +// getSrcFromControl parses the control for PKTINFO and if found updates ep with +// the source information found. +func getSrcFromControl(control []byte, ep *StdNetEndpoint) { + ep.ClearSrc() + + var ( + hdr unix.Cmsghdr + data []byte + rem []byte = control + err error + ) + + for len(rem) > unix.SizeofCmsghdr { + hdr, data, rem, err = unix.ParseOneSocketControlMessage(rem) + if err != nil { + return + } + + if hdr.Level == unix.IPPROTO_IP && + hdr.Type == unix.IP_PKTINFO { + + if ep.src == nil || cap(ep.src) < unix.CmsgSpace(unix.SizeofInet4Pktinfo) { + ep.src = make([]byte, 0, unix.CmsgSpace(unix.SizeofInet4Pktinfo)) + } + ep.src = ep.src[:unix.CmsgSpace(unix.SizeofInet4Pktinfo)] + + hdrBuf := unsafe.Slice((*byte)(unsafe.Pointer(&hdr)), unix.SizeofCmsghdr) + copy(ep.src, hdrBuf) + copy(ep.src[unix.CmsgLen(0):], data) + return + } + + if hdr.Level == unix.IPPROTO_IPV6 && + hdr.Type == unix.IPV6_PKTINFO { + + if ep.src == nil || cap(ep.src) < unix.CmsgSpace(unix.SizeofInet6Pktinfo) { + ep.src = make([]byte, 0, unix.CmsgSpace(unix.SizeofInet6Pktinfo)) + } + + ep.src = ep.src[:unix.CmsgSpace(unix.SizeofInet6Pktinfo)] + + hdrBuf := unsafe.Slice((*byte)(unsafe.Pointer(&hdr)), unix.SizeofCmsghdr) + copy(ep.src, hdrBuf) + copy(ep.src[unix.CmsgLen(0):], data) + return + } + } +} + +// setSrcControl sets an IP{V6}_PKTINFO in control based on the source address +// and source ifindex found in ep. control's len will be set to 0 in the event +// that ep is a default value. +func setSrcControl(control *[]byte, ep *StdNetEndpoint) { + if cap(*control) < len(ep.src) { + return + } + *control = (*control)[:0] + *control = append(*control, ep.src...) +} + +// stickyControlSize returns the recommended buffer size for pooling sticky +// offloading control data. +var stickyControlSize = unix.CmsgSpace(unix.SizeofInet6Pktinfo) + +const StdNetSupportsStickySockets = true diff --git a/vendor/golang.zx2c4.com/wireguard/conn/winrio/rio_windows.go b/vendor/golang.zx2c4.com/wireguard/conn/winrio/rio_windows.go new file mode 100644 index 00000000..d1037bba --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/conn/winrio/rio_windows.go @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package winrio + +import ( + "log" + "sync" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +const ( + MsgDontNotify = 1 + MsgDefer = 2 + MsgWaitAll = 4 + MsgCommitOnly = 8 + + MaxCqSize = 0x8000000 + + invalidBufferId = 0xFFFFFFFF + invalidCq = 0 + invalidRq = 0 + corruptCq = 0xFFFFFFFF +) + +var extensionFunctionTable struct { + cbSize uint32 + rioReceive uintptr + rioReceiveEx uintptr + rioSend uintptr + rioSendEx uintptr + rioCloseCompletionQueue uintptr + rioCreateCompletionQueue uintptr + rioCreateRequestQueue uintptr + rioDequeueCompletion uintptr + rioDeregisterBuffer uintptr + rioNotify uintptr + rioRegisterBuffer uintptr + rioResizeCompletionQueue uintptr + rioResizeRequestQueue uintptr +} + +type Cq uintptr + +type Rq uintptr + +type BufferId uintptr + +type Buffer struct { + Id BufferId + Offset uint32 + Length uint32 +} + +type Result struct { + Status int32 + BytesTransferred uint32 + SocketContext uint64 + RequestContext uint64 +} + +type notificationCompletionType uint32 + +const ( + eventCompletion notificationCompletionType = 1 + iocpCompletion notificationCompletionType = 2 +) + +type eventNotificationCompletion struct { + completionType notificationCompletionType + event windows.Handle + notifyReset uint32 +} + +type iocpNotificationCompletion struct { + completionType notificationCompletionType + iocp windows.Handle + key uintptr + overlapped *windows.Overlapped +} + +var ( + initialized sync.Once + available bool +) + +func Initialize() bool { + initialized.Do(func() { + var ( + err error + socket windows.Handle + cq Cq + ) + defer func() { + if err == nil { + return + } + if maj, _, _ := windows.RtlGetNtVersionNumbers(); maj <= 7 { + return + } + log.Printf("Registered I/O is unavailable: %v", err) + }() + socket, err = Socket(windows.AF_INET, windows.SOCK_DGRAM, windows.IPPROTO_UDP) + if err != nil { + return + } + defer windows.CloseHandle(socket) + WSAID_MULTIPLE_RIO := &windows.GUID{0x8509e081, 0x96dd, 0x4005, [8]byte{0xb1, 0x65, 0x9e, 0x2e, 0xe8, 0xc7, 0x9e, 0x3f}} + const SIO_GET_MULTIPLE_EXTENSION_FUNCTION_POINTER = 0xc8000024 + ob := uint32(0) + err = windows.WSAIoctl(socket, SIO_GET_MULTIPLE_EXTENSION_FUNCTION_POINTER, + (*byte)(unsafe.Pointer(WSAID_MULTIPLE_RIO)), uint32(unsafe.Sizeof(*WSAID_MULTIPLE_RIO)), + (*byte)(unsafe.Pointer(&extensionFunctionTable)), uint32(unsafe.Sizeof(extensionFunctionTable)), + &ob, nil, 0) + if err != nil { + return + } + + // While we should be able to stop here, after getting the function pointers, some anti-virus actually causes + // failures in RIOCreateRequestQueue, so keep going to be certain this is supported. + var iocp windows.Handle + iocp, err = windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0) + if err != nil { + return + } + defer windows.CloseHandle(iocp) + var overlapped windows.Overlapped + cq, err = CreateIOCPCompletionQueue(2, iocp, 0, &overlapped) + if err != nil { + return + } + defer CloseCompletionQueue(cq) + _, err = CreateRequestQueue(socket, 1, 1, 1, 1, cq, cq, 0) + if err != nil { + return + } + available = true + }) + return available +} + +func Socket(af, typ, proto int32) (windows.Handle, error) { + return windows.WSASocket(af, typ, proto, nil, 0, windows.WSA_FLAG_REGISTERED_IO) +} + +func CloseCompletionQueue(cq Cq) { + _, _, _ = syscall.Syscall(extensionFunctionTable.rioCloseCompletionQueue, 1, uintptr(cq), 0, 0) +} + +func CreateEventCompletionQueue(queueSize uint32, event windows.Handle, notifyReset bool) (Cq, error) { + notificationCompletion := &eventNotificationCompletion{ + completionType: eventCompletion, + event: event, + } + if notifyReset { + notificationCompletion.notifyReset = 1 + } + ret, _, err := syscall.Syscall(extensionFunctionTable.rioCreateCompletionQueue, 2, uintptr(queueSize), uintptr(unsafe.Pointer(notificationCompletion)), 0) + if ret == invalidCq { + return 0, err + } + return Cq(ret), nil +} + +func CreateIOCPCompletionQueue(queueSize uint32, iocp windows.Handle, key uintptr, overlapped *windows.Overlapped) (Cq, error) { + notificationCompletion := &iocpNotificationCompletion{ + completionType: iocpCompletion, + iocp: iocp, + key: key, + overlapped: overlapped, + } + ret, _, err := syscall.Syscall(extensionFunctionTable.rioCreateCompletionQueue, 2, uintptr(queueSize), uintptr(unsafe.Pointer(notificationCompletion)), 0) + if ret == invalidCq { + return 0, err + } + return Cq(ret), nil +} + +func CreatePolledCompletionQueue(queueSize uint32) (Cq, error) { + ret, _, err := syscall.Syscall(extensionFunctionTable.rioCreateCompletionQueue, 2, uintptr(queueSize), 0, 0) + if ret == invalidCq { + return 0, err + } + return Cq(ret), nil +} + +func CreateRequestQueue(socket windows.Handle, maxOutstandingReceive, maxReceiveDataBuffers, maxOutstandingSend, maxSendDataBuffers uint32, receiveCq, sendCq Cq, socketContext uintptr) (Rq, error) { + ret, _, err := syscall.Syscall9(extensionFunctionTable.rioCreateRequestQueue, 8, uintptr(socket), uintptr(maxOutstandingReceive), uintptr(maxReceiveDataBuffers), uintptr(maxOutstandingSend), uintptr(maxSendDataBuffers), uintptr(receiveCq), uintptr(sendCq), socketContext, 0) + if ret == invalidRq { + return 0, err + } + return Rq(ret), nil +} + +func DequeueCompletion(cq Cq, results []Result) uint32 { + var array uintptr + if len(results) > 0 { + array = uintptr(unsafe.Pointer(&results[0])) + } + ret, _, _ := syscall.Syscall(extensionFunctionTable.rioDequeueCompletion, 3, uintptr(cq), array, uintptr(len(results))) + if ret == corruptCq { + panic("cq is corrupt") + } + return uint32(ret) +} + +func DeregisterBuffer(id BufferId) { + _, _, _ = syscall.Syscall(extensionFunctionTable.rioDeregisterBuffer, 1, uintptr(id), 0, 0) +} + +func RegisterBuffer(buffer []byte) (BufferId, error) { + var buf unsafe.Pointer + if len(buffer) > 0 { + buf = unsafe.Pointer(&buffer[0]) + } + return RegisterPointer(buf, uint32(len(buffer))) +} + +func RegisterPointer(ptr unsafe.Pointer, size uint32) (BufferId, error) { + ret, _, err := syscall.Syscall(extensionFunctionTable.rioRegisterBuffer, 2, uintptr(ptr), uintptr(size), 0) + if ret == invalidBufferId { + return 0, err + } + return BufferId(ret), nil +} + +func SendEx(rq Rq, buf *Buffer, dataBufferCount uint32, localAddress, remoteAddress, controlContext, flags *Buffer, sflags uint32, requestContext uintptr) error { + ret, _, err := syscall.Syscall9(extensionFunctionTable.rioSendEx, 9, uintptr(rq), uintptr(unsafe.Pointer(buf)), uintptr(dataBufferCount), uintptr(unsafe.Pointer(localAddress)), uintptr(unsafe.Pointer(remoteAddress)), uintptr(unsafe.Pointer(controlContext)), uintptr(unsafe.Pointer(flags)), uintptr(sflags), requestContext) + if ret == 0 { + return err + } + return nil +} + +func ReceiveEx(rq Rq, buf *Buffer, dataBufferCount uint32, localAddress, remoteAddress, controlContext, flags *Buffer, sflags uint32, requestContext uintptr) error { + ret, _, err := syscall.Syscall9(extensionFunctionTable.rioReceiveEx, 9, uintptr(rq), uintptr(unsafe.Pointer(buf)), uintptr(dataBufferCount), uintptr(unsafe.Pointer(localAddress)), uintptr(unsafe.Pointer(remoteAddress)), uintptr(unsafe.Pointer(controlContext)), uintptr(unsafe.Pointer(flags)), uintptr(sflags), requestContext) + if ret == 0 { + return err + } + return nil +} + +func Notify(cq Cq) error { + ret, _, _ := syscall.Syscall(extensionFunctionTable.rioNotify, 1, uintptr(cq), 0, 0) + if ret != 0 { + return windows.Errno(ret) + } + return nil +} diff --git a/vendor/golang.zx2c4.com/wireguard/rwcancel/rwcancel.go b/vendor/golang.zx2c4.com/wireguard/rwcancel/rwcancel.go index 63e1510b..e397c0e8 100644 --- a/vendor/golang.zx2c4.com/wireguard/rwcancel/rwcancel.go +++ b/vendor/golang.zx2c4.com/wireguard/rwcancel/rwcancel.go @@ -1,4 +1,4 @@ -//go:build !windows && !js +//go:build !windows && !wasm /* SPDX-License-Identifier: MIT * diff --git a/vendor/golang.zx2c4.com/wireguard/rwcancel/rwcancel_stub.go b/vendor/golang.zx2c4.com/wireguard/rwcancel/rwcancel_stub.go index 182940b3..2a98b2b4 100644 --- a/vendor/golang.zx2c4.com/wireguard/rwcancel/rwcancel_stub.go +++ b/vendor/golang.zx2c4.com/wireguard/rwcancel/rwcancel_stub.go @@ -1,4 +1,4 @@ -//go:build windows || js +//go:build windows || wasm // SPDX-License-Identifier: MIT diff --git a/vendor/golang.zx2c4.com/wireguard/tun/checksum.go b/vendor/golang.zx2c4.com/wireguard/tun/checksum.go new file mode 100644 index 00000000..29a8fc8f --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/tun/checksum.go @@ -0,0 +1,118 @@ +package tun + +import "encoding/binary" + +// TODO: Explore SIMD and/or other assembly optimizations. +// TODO: Test native endian loads. See RFC 1071 section 2 part B. +func checksumNoFold(b []byte, initial uint64) uint64 { + ac := initial + + for len(b) >= 128 { + ac += uint64(binary.BigEndian.Uint32(b[:4])) + ac += uint64(binary.BigEndian.Uint32(b[4:8])) + ac += uint64(binary.BigEndian.Uint32(b[8:12])) + ac += uint64(binary.BigEndian.Uint32(b[12:16])) + ac += uint64(binary.BigEndian.Uint32(b[16:20])) + ac += uint64(binary.BigEndian.Uint32(b[20:24])) + ac += uint64(binary.BigEndian.Uint32(b[24:28])) + ac += uint64(binary.BigEndian.Uint32(b[28:32])) + ac += uint64(binary.BigEndian.Uint32(b[32:36])) + ac += uint64(binary.BigEndian.Uint32(b[36:40])) + ac += uint64(binary.BigEndian.Uint32(b[40:44])) + ac += uint64(binary.BigEndian.Uint32(b[44:48])) + ac += uint64(binary.BigEndian.Uint32(b[48:52])) + ac += uint64(binary.BigEndian.Uint32(b[52:56])) + ac += uint64(binary.BigEndian.Uint32(b[56:60])) + ac += uint64(binary.BigEndian.Uint32(b[60:64])) + ac += uint64(binary.BigEndian.Uint32(b[64:68])) + ac += uint64(binary.BigEndian.Uint32(b[68:72])) + ac += uint64(binary.BigEndian.Uint32(b[72:76])) + ac += uint64(binary.BigEndian.Uint32(b[76:80])) + ac += uint64(binary.BigEndian.Uint32(b[80:84])) + ac += uint64(binary.BigEndian.Uint32(b[84:88])) + ac += uint64(binary.BigEndian.Uint32(b[88:92])) + ac += uint64(binary.BigEndian.Uint32(b[92:96])) + ac += uint64(binary.BigEndian.Uint32(b[96:100])) + ac += uint64(binary.BigEndian.Uint32(b[100:104])) + ac += uint64(binary.BigEndian.Uint32(b[104:108])) + ac += uint64(binary.BigEndian.Uint32(b[108:112])) + ac += uint64(binary.BigEndian.Uint32(b[112:116])) + ac += uint64(binary.BigEndian.Uint32(b[116:120])) + ac += uint64(binary.BigEndian.Uint32(b[120:124])) + ac += uint64(binary.BigEndian.Uint32(b[124:128])) + b = b[128:] + } + if len(b) >= 64 { + ac += uint64(binary.BigEndian.Uint32(b[:4])) + ac += uint64(binary.BigEndian.Uint32(b[4:8])) + ac += uint64(binary.BigEndian.Uint32(b[8:12])) + ac += uint64(binary.BigEndian.Uint32(b[12:16])) + ac += uint64(binary.BigEndian.Uint32(b[16:20])) + ac += uint64(binary.BigEndian.Uint32(b[20:24])) + ac += uint64(binary.BigEndian.Uint32(b[24:28])) + ac += uint64(binary.BigEndian.Uint32(b[28:32])) + ac += uint64(binary.BigEndian.Uint32(b[32:36])) + ac += uint64(binary.BigEndian.Uint32(b[36:40])) + ac += uint64(binary.BigEndian.Uint32(b[40:44])) + ac += uint64(binary.BigEndian.Uint32(b[44:48])) + ac += uint64(binary.BigEndian.Uint32(b[48:52])) + ac += uint64(binary.BigEndian.Uint32(b[52:56])) + ac += uint64(binary.BigEndian.Uint32(b[56:60])) + ac += uint64(binary.BigEndian.Uint32(b[60:64])) + b = b[64:] + } + if len(b) >= 32 { + ac += uint64(binary.BigEndian.Uint32(b[:4])) + ac += uint64(binary.BigEndian.Uint32(b[4:8])) + ac += uint64(binary.BigEndian.Uint32(b[8:12])) + ac += uint64(binary.BigEndian.Uint32(b[12:16])) + ac += uint64(binary.BigEndian.Uint32(b[16:20])) + ac += uint64(binary.BigEndian.Uint32(b[20:24])) + ac += uint64(binary.BigEndian.Uint32(b[24:28])) + ac += uint64(binary.BigEndian.Uint32(b[28:32])) + b = b[32:] + } + if len(b) >= 16 { + ac += uint64(binary.BigEndian.Uint32(b[:4])) + ac += uint64(binary.BigEndian.Uint32(b[4:8])) + ac += uint64(binary.BigEndian.Uint32(b[8:12])) + ac += uint64(binary.BigEndian.Uint32(b[12:16])) + b = b[16:] + } + if len(b) >= 8 { + ac += uint64(binary.BigEndian.Uint32(b[:4])) + ac += uint64(binary.BigEndian.Uint32(b[4:8])) + b = b[8:] + } + if len(b) >= 4 { + ac += uint64(binary.BigEndian.Uint32(b)) + b = b[4:] + } + if len(b) >= 2 { + ac += uint64(binary.BigEndian.Uint16(b)) + b = b[2:] + } + if len(b) == 1 { + ac += uint64(b[0]) << 8 + } + + return ac +} + +func checksum(b []byte, initial uint64) uint16 { + ac := checksumNoFold(b, initial) + ac = (ac >> 16) + (ac & 0xffff) + ac = (ac >> 16) + (ac & 0xffff) + ac = (ac >> 16) + (ac & 0xffff) + ac = (ac >> 16) + (ac & 0xffff) + return uint16(ac) +} + +func pseudoHeaderChecksumNoFold(protocol uint8, srcAddr, dstAddr []byte, totalLen uint16) uint64 { + sum := checksumNoFold(srcAddr, 0) + sum = checksumNoFold(dstAddr, sum) + sum = checksumNoFold([]byte{0, protocol}, sum) + tmp := make([]byte, 2) + binary.BigEndian.PutUint16(tmp, totalLen) + return checksumNoFold(tmp, sum) +} diff --git a/vendor/golang.zx2c4.com/wireguard/tun/errors.go b/vendor/golang.zx2c4.com/wireguard/tun/errors.go new file mode 100644 index 00000000..75ae3a43 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/tun/errors.go @@ -0,0 +1,12 @@ +package tun + +import ( + "errors" +) + +var ( + // ErrTooManySegments is returned by Device.Read() when segmentation + // overflows the length of supplied buffers. This error should not cause + // reads to cease. + ErrTooManySegments = errors.New("too many segments") +) diff --git a/vendor/golang.zx2c4.com/wireguard/tun/offload_linux.go b/vendor/golang.zx2c4.com/wireguard/tun/offload_linux.go new file mode 100644 index 00000000..9ff7fea8 --- /dev/null +++ b/vendor/golang.zx2c4.com/wireguard/tun/offload_linux.go @@ -0,0 +1,993 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved. + */ + +package tun + +import ( + "bytes" + "encoding/binary" + "errors" + "io" + "unsafe" + + "golang.org/x/sys/unix" + "golang.zx2c4.com/wireguard/conn" +) + +const tcpFlagsOffset = 13 + +const ( + tcpFlagFIN uint8 = 0x01 + tcpFlagPSH uint8 = 0x08 + tcpFlagACK uint8 = 0x10 +) + +// virtioNetHdr is defined in the kernel in include/uapi/linux/virtio_net.h. The +// kernel symbol is virtio_net_hdr. +type virtioNetHdr struct { + flags uint8 + gsoType uint8 + hdrLen uint16 + gsoSize uint16 + csumStart uint16 + csumOffset uint16 +} + +func (v *virtioNetHdr) decode(b []byte) error { + if len(b) < virtioNetHdrLen { + return io.ErrShortBuffer + } + copy(unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen), b[:virtioNetHdrLen]) + return nil +} + +func (v *virtioNetHdr) encode(b []byte) error { + if len(b) < virtioNetHdrLen { + return io.ErrShortBuffer + } + copy(b[:virtioNetHdrLen], unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen)) + return nil +} + +const ( + // virtioNetHdrLen is the length in bytes of virtioNetHdr. This matches the + // shape of the C ABI for its kernel counterpart -- sizeof(virtio_net_hdr). + virtioNetHdrLen = int(unsafe.Sizeof(virtioNetHdr{})) +) + +// tcpFlowKey represents the key for a TCP flow. +type tcpFlowKey struct { + srcAddr, dstAddr [16]byte + srcPort, dstPort uint16 + rxAck uint32 // varying ack values should not be coalesced. Treat them as separate flows. + isV6 bool +} + +// tcpGROTable holds flow and coalescing information for the purposes of TCP GRO. +type tcpGROTable struct { + itemsByFlow map[tcpFlowKey][]tcpGROItem + itemsPool [][]tcpGROItem +} + +func newTCPGROTable() *tcpGROTable { + t := &tcpGROTable{ + itemsByFlow: make(map[tcpFlowKey][]tcpGROItem, conn.IdealBatchSize), + itemsPool: make([][]tcpGROItem, conn.IdealBatchSize), + } + for i := range t.itemsPool { + t.itemsPool[i] = make([]tcpGROItem, 0, conn.IdealBatchSize) + } + return t +} + +func newTCPFlowKey(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset int) tcpFlowKey { + key := tcpFlowKey{} + addrSize := dstAddrOffset - srcAddrOffset + copy(key.srcAddr[:], pkt[srcAddrOffset:dstAddrOffset]) + copy(key.dstAddr[:], pkt[dstAddrOffset:dstAddrOffset+addrSize]) + key.srcPort = binary.BigEndian.Uint16(pkt[tcphOffset:]) + key.dstPort = binary.BigEndian.Uint16(pkt[tcphOffset+2:]) + key.rxAck = binary.BigEndian.Uint32(pkt[tcphOffset+8:]) + key.isV6 = addrSize == 16 + return key +} + +// lookupOrInsert looks up a flow for the provided packet and metadata, +// returning the packets found for the flow, or inserting a new one if none +// is found. +func (t *tcpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) ([]tcpGROItem, bool) { + key := newTCPFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset) + items, ok := t.itemsByFlow[key] + if ok { + return items, ok + } + // TODO: insert() performs another map lookup. This could be rearranged to avoid. + t.insert(pkt, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex) + return nil, false +} + +// insert an item in the table for the provided packet and packet metadata. +func (t *tcpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) { + key := newTCPFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset) + item := tcpGROItem{ + key: key, + bufsIndex: uint16(bufsIndex), + gsoSize: uint16(len(pkt[tcphOffset+tcphLen:])), + iphLen: uint8(tcphOffset), + tcphLen: uint8(tcphLen), + sentSeq: binary.BigEndian.Uint32(pkt[tcphOffset+4:]), + pshSet: pkt[tcphOffset+tcpFlagsOffset]&tcpFlagPSH != 0, + } + items, ok := t.itemsByFlow[key] + if !ok { + items = t.newItems() + } + items = append(items, item) + t.itemsByFlow[key] = items +} + +func (t *tcpGROTable) updateAt(item tcpGROItem, i int) { + items, _ := t.itemsByFlow[item.key] + items[i] = item +} + +func (t *tcpGROTable) deleteAt(key tcpFlowKey, i int) { + items, _ := t.itemsByFlow[key] + items = append(items[:i], items[i+1:]...) + t.itemsByFlow[key] = items +} + +// tcpGROItem represents bookkeeping data for a TCP packet during the lifetime +// of a GRO evaluation across a vector of packets. +type tcpGROItem struct { + key tcpFlowKey + sentSeq uint32 // the sequence number + bufsIndex uint16 // the index into the original bufs slice + numMerged uint16 // the number of packets merged into this item + gsoSize uint16 // payload size + iphLen uint8 // ip header len + tcphLen uint8 // tcp header len + pshSet bool // psh flag is set +} + +func (t *tcpGROTable) newItems() []tcpGROItem { + var items []tcpGROItem + items, t.itemsPool = t.itemsPool[len(t.itemsPool)-1], t.itemsPool[:len(t.itemsPool)-1] + return items +} + +func (t *tcpGROTable) reset() { + for k, items := range t.itemsByFlow { + items = items[:0] + t.itemsPool = append(t.itemsPool, items) + delete(t.itemsByFlow, k) + } +} + +// udpFlowKey represents the key for a UDP flow. +type udpFlowKey struct { + srcAddr, dstAddr [16]byte + srcPort, dstPort uint16 + isV6 bool +} + +// udpGROTable holds flow and coalescing information for the purposes of UDP GRO. +type udpGROTable struct { + itemsByFlow map[udpFlowKey][]udpGROItem + itemsPool [][]udpGROItem +} + +func newUDPGROTable() *udpGROTable { + u := &udpGROTable{ + itemsByFlow: make(map[udpFlowKey][]udpGROItem, conn.IdealBatchSize), + itemsPool: make([][]udpGROItem, conn.IdealBatchSize), + } + for i := range u.itemsPool { + u.itemsPool[i] = make([]udpGROItem, 0, conn.IdealBatchSize) + } + return u +} + +func newUDPFlowKey(pkt []byte, srcAddrOffset, dstAddrOffset, udphOffset int) udpFlowKey { + key := udpFlowKey{} + addrSize := dstAddrOffset - srcAddrOffset + copy(key.srcAddr[:], pkt[srcAddrOffset:dstAddrOffset]) + copy(key.dstAddr[:], pkt[dstAddrOffset:dstAddrOffset+addrSize]) + key.srcPort = binary.BigEndian.Uint16(pkt[udphOffset:]) + key.dstPort = binary.BigEndian.Uint16(pkt[udphOffset+2:]) + key.isV6 = addrSize == 16 + return key +} + +// lookupOrInsert looks up a flow for the provided packet and metadata, +// returning the packets found for the flow, or inserting a new one if none +// is found. +func (u *udpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, udphOffset, bufsIndex int) ([]udpGROItem, bool) { + key := newUDPFlowKey(pkt, srcAddrOffset, dstAddrOffset, udphOffset) + items, ok := u.itemsByFlow[key] + if ok { + return items, ok + } + // TODO: insert() performs another map lookup. This could be rearranged to avoid. + u.insert(pkt, srcAddrOffset, dstAddrOffset, udphOffset, bufsIndex, false) + return nil, false +} + +// insert an item in the table for the provided packet and packet metadata. +func (u *udpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, udphOffset, bufsIndex int, cSumKnownInvalid bool) { + key := newUDPFlowKey(pkt, srcAddrOffset, dstAddrOffset, udphOffset) + item := udpGROItem{ + key: key, + bufsIndex: uint16(bufsIndex), + gsoSize: uint16(len(pkt[udphOffset+udphLen:])), + iphLen: uint8(udphOffset), + cSumKnownInvalid: cSumKnownInvalid, + } + items, ok := u.itemsByFlow[key] + if !ok { + items = u.newItems() + } + items = append(items, item) + u.itemsByFlow[key] = items +} + +func (u *udpGROTable) updateAt(item udpGROItem, i int) { + items, _ := u.itemsByFlow[item.key] + items[i] = item +} + +// udpGROItem represents bookkeeping data for a UDP packet during the lifetime +// of a GRO evaluation across a vector of packets. +type udpGROItem struct { + key udpFlowKey + bufsIndex uint16 // the index into the original bufs slice + numMerged uint16 // the number of packets merged into this item + gsoSize uint16 // payload size + iphLen uint8 // ip header len + cSumKnownInvalid bool // UDP header checksum validity; a false value DOES NOT imply valid, just unknown. +} + +func (u *udpGROTable) newItems() []udpGROItem { + var items []udpGROItem + items, u.itemsPool = u.itemsPool[len(u.itemsPool)-1], u.itemsPool[:len(u.itemsPool)-1] + return items +} + +func (u *udpGROTable) reset() { + for k, items := range u.itemsByFlow { + items = items[:0] + u.itemsPool = append(u.itemsPool, items) + delete(u.itemsByFlow, k) + } +} + +// canCoalesce represents the outcome of checking if two TCP packets are +// candidates for coalescing. +type canCoalesce int + +const ( + coalescePrepend canCoalesce = -1 + coalesceUnavailable canCoalesce = 0 + coalesceAppend canCoalesce = 1 +) + +// ipHeadersCanCoalesce returns true if the IP headers found in pktA and pktB +// meet all requirements to be merged as part of a GRO operation, otherwise it +// returns false. +func ipHeadersCanCoalesce(pktA, pktB []byte) bool { + if len(pktA) < 9 || len(pktB) < 9 { + return false + } + if pktA[0]>>4 == 6 { + if pktA[0] != pktB[0] || pktA[1]>>4 != pktB[1]>>4 { + // cannot coalesce with unequal Traffic class values + return false + } + if pktA[7] != pktB[7] { + // cannot coalesce with unequal Hop limit values + return false + } + } else { + if pktA[1] != pktB[1] { + // cannot coalesce with unequal ToS values + return false + } + if pktA[6]>>5 != pktB[6]>>5 { + // cannot coalesce with unequal DF or reserved bits. MF is checked + // further up the stack. + return false + } + if pktA[8] != pktB[8] { + // cannot coalesce with unequal TTL values + return false + } + } + return true +} + +// udpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet +// described by item. iphLen and gsoSize describe pkt. bufs is the vector of +// packets involved in the current GRO evaluation. bufsOffset is the offset at +// which packet data begins within bufs. +func udpPacketsCanCoalesce(pkt []byte, iphLen uint8, gsoSize uint16, item udpGROItem, bufs [][]byte, bufsOffset int) canCoalesce { + pktTarget := bufs[item.bufsIndex][bufsOffset:] + if !ipHeadersCanCoalesce(pkt, pktTarget) { + return coalesceUnavailable + } + if len(pktTarget[iphLen+udphLen:])%int(item.gsoSize) != 0 { + // A smaller than gsoSize packet has been appended previously. + // Nothing can come after a smaller packet on the end. + return coalesceUnavailable + } + if gsoSize > item.gsoSize { + // We cannot have a larger packet following a smaller one. + return coalesceUnavailable + } + return coalesceAppend +} + +// tcpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet +// described by item. This function makes considerations that match the kernel's +// GRO self tests, which can be found in tools/testing/selftests/net/gro.c. +func tcpPacketsCanCoalesce(pkt []byte, iphLen, tcphLen uint8, seq uint32, pshSet bool, gsoSize uint16, item tcpGROItem, bufs [][]byte, bufsOffset int) canCoalesce { + pktTarget := bufs[item.bufsIndex][bufsOffset:] + if tcphLen != item.tcphLen { + // cannot coalesce with unequal tcp options len + return coalesceUnavailable + } + if tcphLen > 20 { + if !bytes.Equal(pkt[iphLen+20:iphLen+tcphLen], pktTarget[item.iphLen+20:iphLen+tcphLen]) { + // cannot coalesce with unequal tcp options + return coalesceUnavailable + } + } + if !ipHeadersCanCoalesce(pkt, pktTarget) { + return coalesceUnavailable + } + // seq adjacency + lhsLen := item.gsoSize + lhsLen += item.numMerged * item.gsoSize + if seq == item.sentSeq+uint32(lhsLen) { // pkt aligns following item from a seq num perspective + if item.pshSet { + // We cannot append to a segment that has the PSH flag set, PSH + // can only be set on the final segment in a reassembled group. + return coalesceUnavailable + } + if len(pktTarget[iphLen+tcphLen:])%int(item.gsoSize) != 0 { + // A smaller than gsoSize packet has been appended previously. + // Nothing can come after a smaller packet on the end. + return coalesceUnavailable + } + if gsoSize > item.gsoSize { + // We cannot have a larger packet following a smaller one. + return coalesceUnavailable + } + return coalesceAppend + } else if seq+uint32(gsoSize) == item.sentSeq { // pkt aligns in front of item from a seq num perspective + if pshSet { + // We cannot prepend with a segment that has the PSH flag set, PSH + // can only be set on the final segment in a reassembled group. + return coalesceUnavailable + } + if gsoSize < item.gsoSize { + // We cannot have a larger packet following a smaller one. + return coalesceUnavailable + } + if gsoSize > item.gsoSize && item.numMerged > 0 { + // There's at least one previous merge, and we're larger than all + // previous. This would put multiple smaller packets on the end. + return coalesceUnavailable + } + return coalescePrepend + } + return coalesceUnavailable +} + +func checksumValid(pkt []byte, iphLen, proto uint8, isV6 bool) bool { + srcAddrAt := ipv4SrcAddrOffset + addrSize := 4 + if isV6 { + srcAddrAt = ipv6SrcAddrOffset + addrSize = 16 + } + lenForPseudo := uint16(len(pkt) - int(iphLen)) + cSum := pseudoHeaderChecksumNoFold(proto, pkt[srcAddrAt:srcAddrAt+addrSize], pkt[srcAddrAt+addrSize:srcAddrAt+addrSize*2], lenForPseudo) + return ^checksum(pkt[iphLen:], cSum) == 0 +} + +// coalesceResult represents the result of attempting to coalesce two TCP +// packets. +type coalesceResult int + +const ( + coalesceInsufficientCap coalesceResult = iota + coalescePSHEnding + coalesceItemInvalidCSum + coalescePktInvalidCSum + coalesceSuccess +) + +// coalesceUDPPackets attempts to coalesce pkt with the packet described by +// item, and returns the outcome. +func coalesceUDPPackets(pkt []byte, item *udpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult { + pktHead := bufs[item.bufsIndex][bufsOffset:] // the packet that will end up at the front + headersLen := item.iphLen + udphLen + coalescedLen := len(bufs[item.bufsIndex][bufsOffset:]) + len(pkt) - int(headersLen) + + if cap(pktHead)-bufsOffset < coalescedLen { + // We don't want to allocate a new underlying array if capacity is + // too small. + return coalesceInsufficientCap + } + if item.numMerged == 0 { + if item.cSumKnownInvalid || !checksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, unix.IPPROTO_UDP, isV6) { + return coalesceItemInvalidCSum + } + } + if !checksumValid(pkt, item.iphLen, unix.IPPROTO_UDP, isV6) { + return coalescePktInvalidCSum + } + extendBy := len(pkt) - int(headersLen) + bufs[item.bufsIndex] = append(bufs[item.bufsIndex], make([]byte, extendBy)...) + copy(bufs[item.bufsIndex][bufsOffset+len(pktHead):], pkt[headersLen:]) + + item.numMerged++ + return coalesceSuccess +} + +// coalesceTCPPackets attempts to coalesce pkt with the packet described by +// item, and returns the outcome. This function may swap bufs elements in the +// event of a prepend as item's bufs index is already being tracked for writing +// to a Device. +func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize uint16, seq uint32, pshSet bool, item *tcpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult { + var pktHead []byte // the packet that will end up at the front + headersLen := item.iphLen + item.tcphLen + coalescedLen := len(bufs[item.bufsIndex][bufsOffset:]) + len(pkt) - int(headersLen) + + // Copy data + if mode == coalescePrepend { + pktHead = pkt + if cap(pkt)-bufsOffset < coalescedLen { + // We don't want to allocate a new underlying array if capacity is + // too small. + return coalesceInsufficientCap + } + if pshSet { + return coalescePSHEnding + } + if item.numMerged == 0 { + if !checksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, unix.IPPROTO_TCP, isV6) { + return coalesceItemInvalidCSum + } + } + if !checksumValid(pkt, item.iphLen, unix.IPPROTO_TCP, isV6) { + return coalescePktInvalidCSum + } + item.sentSeq = seq + extendBy := coalescedLen - len(pktHead) + bufs[pktBuffsIndex] = append(bufs[pktBuffsIndex], make([]byte, extendBy)...) + copy(bufs[pktBuffsIndex][bufsOffset+len(pkt):], bufs[item.bufsIndex][bufsOffset+int(headersLen):]) + // Flip the slice headers in bufs as part of prepend. The index of item + // is already being tracked for writing. + bufs[item.bufsIndex], bufs[pktBuffsIndex] = bufs[pktBuffsIndex], bufs[item.bufsIndex] + } else { + pktHead = bufs[item.bufsIndex][bufsOffset:] + if cap(pktHead)-bufsOffset < coalescedLen { + // We don't want to allocate a new underlying array if capacity is + // too small. + return coalesceInsufficientCap + } + if item.numMerged == 0 { + if !checksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, unix.IPPROTO_TCP, isV6) { + return coalesceItemInvalidCSum + } + } + if !checksumValid(pkt, item.iphLen, unix.IPPROTO_TCP, isV6) { + return coalescePktInvalidCSum + } + if pshSet { + // We are appending a segment with PSH set. + item.pshSet = pshSet + pktHead[item.iphLen+tcpFlagsOffset] |= tcpFlagPSH + } + extendBy := len(pkt) - int(headersLen) + bufs[item.bufsIndex] = append(bufs[item.bufsIndex], make([]byte, extendBy)...) + copy(bufs[item.bufsIndex][bufsOffset+len(pktHead):], pkt[headersLen:]) + } + + if gsoSize > item.gsoSize { + item.gsoSize = gsoSize + } + + item.numMerged++ + return coalesceSuccess +} + +const ( + ipv4FlagMoreFragments uint8 = 0x20 +) + +const ( + ipv4SrcAddrOffset = 12 + ipv6SrcAddrOffset = 8 + maxUint16 = 1<<16 - 1 +) + +type groResult int + +const ( + groResultNoop groResult = iota + groResultTableInsert + groResultCoalesced +) + +// tcpGRO evaluates the TCP packet at pktI in bufs for coalescing with +// existing packets tracked in table. It returns a groResultNoop when no +// action was taken, groResultTableInsert when the evaluated packet was +// inserted into table, and groResultCoalesced when the evaluated packet was +// coalesced with another packet in table. +func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) groResult { + pkt := bufs[pktI][offset:] + if len(pkt) > maxUint16 { + // A valid IPv4 or IPv6 packet will never exceed this. + return groResultNoop + } + iphLen := int((pkt[0] & 0x0F) * 4) + if isV6 { + iphLen = 40 + ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:])) + if ipv6HPayloadLen != len(pkt)-iphLen { + return groResultNoop + } + } else { + totalLen := int(binary.BigEndian.Uint16(pkt[2:])) + if totalLen != len(pkt) { + return groResultNoop + } + } + if len(pkt) < iphLen { + return groResultNoop + } + tcphLen := int((pkt[iphLen+12] >> 4) * 4) + if tcphLen < 20 || tcphLen > 60 { + return groResultNoop + } + if len(pkt) < iphLen+tcphLen { + return groResultNoop + } + if !isV6 { + if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 { + // no GRO support for fragmented segments for now + return groResultNoop + } + } + tcpFlags := pkt[iphLen+tcpFlagsOffset] + var pshSet bool + // not a candidate if any non-ACK flags (except PSH+ACK) are set + if tcpFlags != tcpFlagACK { + if pkt[iphLen+tcpFlagsOffset] != tcpFlagACK|tcpFlagPSH { + return groResultNoop + } + pshSet = true + } + gsoSize := uint16(len(pkt) - tcphLen - iphLen) + // not a candidate if payload len is 0 + if gsoSize < 1 { + return groResultNoop + } + seq := binary.BigEndian.Uint32(pkt[iphLen+4:]) + srcAddrOffset := ipv4SrcAddrOffset + addrLen := 4 + if isV6 { + srcAddrOffset = ipv6SrcAddrOffset + addrLen = 16 + } + items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI) + if !existing { + return groResultTableInsert + } + for i := len(items) - 1; i >= 0; i-- { + // In the best case of packets arriving in order iterating in reverse is + // more efficient if there are multiple items for a given flow. This + // also enables a natural table.deleteAt() in the + // coalesceItemInvalidCSum case without the need for index tracking. + // This algorithm makes a best effort to coalesce in the event of + // unordered packets, where pkt may land anywhere in items from a + // sequence number perspective, however once an item is inserted into + // the table it is never compared across other items later. + item := items[i] + can := tcpPacketsCanCoalesce(pkt, uint8(iphLen), uint8(tcphLen), seq, pshSet, gsoSize, item, bufs, offset) + if can != coalesceUnavailable { + result := coalesceTCPPackets(can, pkt, pktI, gsoSize, seq, pshSet, &item, bufs, offset, isV6) + switch result { + case coalesceSuccess: + table.updateAt(item, i) + return groResultCoalesced + case coalesceItemInvalidCSum: + // delete the item with an invalid csum + table.deleteAt(item.key, i) + case coalescePktInvalidCSum: + // no point in inserting an item that we can't coalesce + return groResultNoop + default: + } + } + } + // failed to coalesce with any other packets; store the item in the flow + table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI) + return groResultTableInsert +} + +// applyTCPCoalesceAccounting updates bufs to account for coalescing based on the +// metadata found in table. +func applyTCPCoalesceAccounting(bufs [][]byte, offset int, table *tcpGROTable) error { + for _, items := range table.itemsByFlow { + for _, item := range items { + if item.numMerged > 0 { + hdr := virtioNetHdr{ + flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb + hdrLen: uint16(item.iphLen + item.tcphLen), + gsoSize: item.gsoSize, + csumStart: uint16(item.iphLen), + csumOffset: 16, + } + pkt := bufs[item.bufsIndex][offset:] + + // Recalculate the total len (IPv4) or payload len (IPv6). + // Recalculate the (IPv4) header checksum. + if item.key.isV6 { + hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6 + binary.BigEndian.PutUint16(pkt[4:], uint16(len(pkt))-uint16(item.iphLen)) // set new IPv6 header payload len + } else { + hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV4 + pkt[10], pkt[11] = 0, 0 + binary.BigEndian.PutUint16(pkt[2:], uint16(len(pkt))) // set new total length + iphCSum := ^checksum(pkt[:item.iphLen], 0) // compute IPv4 header checksum + binary.BigEndian.PutUint16(pkt[10:], iphCSum) // set IPv4 header checksum field + } + err := hdr.encode(bufs[item.bufsIndex][offset-virtioNetHdrLen:]) + if err != nil { + return err + } + + // Calculate the pseudo header checksum and place it at the TCP + // checksum offset. Downstream checksum offloading will combine + // this with computation of the tcp header and payload checksum. + addrLen := 4 + addrOffset := ipv4SrcAddrOffset + if item.key.isV6 { + addrLen = 16 + addrOffset = ipv6SrcAddrOffset + } + srcAddrAt := offset + addrOffset + srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen] + dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2] + psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(len(pkt)-int(item.iphLen))) + binary.BigEndian.PutUint16(pkt[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum)) + } else { + hdr := virtioNetHdr{} + err := hdr.encode(bufs[item.bufsIndex][offset-virtioNetHdrLen:]) + if err != nil { + return err + } + } + } + } + return nil +} + +// applyUDPCoalesceAccounting updates bufs to account for coalescing based on the +// metadata found in table. +func applyUDPCoalesceAccounting(bufs [][]byte, offset int, table *udpGROTable) error { + for _, items := range table.itemsByFlow { + for _, item := range items { + if item.numMerged > 0 { + hdr := virtioNetHdr{ + flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb + hdrLen: uint16(item.iphLen + udphLen), + gsoSize: item.gsoSize, + csumStart: uint16(item.iphLen), + csumOffset: 6, + } + pkt := bufs[item.bufsIndex][offset:] + + // Recalculate the total len (IPv4) or payload len (IPv6). + // Recalculate the (IPv4) header checksum. + hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_UDP_L4 + if item.key.isV6 { + binary.BigEndian.PutUint16(pkt[4:], uint16(len(pkt))-uint16(item.iphLen)) // set new IPv6 header payload len + } else { + pkt[10], pkt[11] = 0, 0 + binary.BigEndian.PutUint16(pkt[2:], uint16(len(pkt))) // set new total length + iphCSum := ^checksum(pkt[:item.iphLen], 0) // compute IPv4 header checksum + binary.BigEndian.PutUint16(pkt[10:], iphCSum) // set IPv4 header checksum field + } + err := hdr.encode(bufs[item.bufsIndex][offset-virtioNetHdrLen:]) + if err != nil { + return err + } + + // Recalculate the UDP len field value + binary.BigEndian.PutUint16(pkt[item.iphLen+4:], uint16(len(pkt[item.iphLen:]))) + + // Calculate the pseudo header checksum and place it at the UDP + // checksum offset. Downstream checksum offloading will combine + // this with computation of the udp header and payload checksum. + addrLen := 4 + addrOffset := ipv4SrcAddrOffset + if item.key.isV6 { + addrLen = 16 + addrOffset = ipv6SrcAddrOffset + } + srcAddrAt := offset + addrOffset + srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen] + dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2] + psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_UDP, srcAddr, dstAddr, uint16(len(pkt)-int(item.iphLen))) + binary.BigEndian.PutUint16(pkt[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum)) + } else { + hdr := virtioNetHdr{} + err := hdr.encode(bufs[item.bufsIndex][offset-virtioNetHdrLen:]) + if err != nil { + return err + } + } + } + } + return nil +} + +type groCandidateType uint8 + +const ( + notGROCandidate groCandidateType = iota + tcp4GROCandidate + tcp6GROCandidate + udp4GROCandidate + udp6GROCandidate +) + +func packetIsGROCandidate(b []byte, canUDPGRO bool) groCandidateType { + if len(b) < 28 { + return notGROCandidate + } + if b[0]>>4 == 4 { + if b[0]&0x0F != 5 { + // IPv4 packets w/IP options do not coalesce + return notGROCandidate + } + if b[9] == unix.IPPROTO_TCP && len(b) >= 40 { + return tcp4GROCandidate + } + if b[9] == unix.IPPROTO_UDP && canUDPGRO { + return udp4GROCandidate + } + } else if b[0]>>4 == 6 { + if b[6] == unix.IPPROTO_TCP && len(b) >= 60 { + return tcp6GROCandidate + } + if b[6] == unix.IPPROTO_UDP && len(b) >= 48 && canUDPGRO { + return udp6GROCandidate + } + } + return notGROCandidate +} + +const ( + udphLen = 8 +) + +// udpGRO evaluates the UDP packet at pktI in bufs for coalescing with +// existing packets tracked in table. It returns a groResultNoop when no +// action was taken, groResultTableInsert when the evaluated packet was +// inserted into table, and groResultCoalesced when the evaluated packet was +// coalesced with another packet in table. +func udpGRO(bufs [][]byte, offset int, pktI int, table *udpGROTable, isV6 bool) groResult { + pkt := bufs[pktI][offset:] + if len(pkt) > maxUint16 { + // A valid IPv4 or IPv6 packet will never exceed this. + return groResultNoop + } + iphLen := int((pkt[0] & 0x0F) * 4) + if isV6 { + iphLen = 40 + ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:])) + if ipv6HPayloadLen != len(pkt)-iphLen { + return groResultNoop + } + } else { + totalLen := int(binary.BigEndian.Uint16(pkt[2:])) + if totalLen != len(pkt) { + return groResultNoop + } + } + if len(pkt) < iphLen { + return groResultNoop + } + if len(pkt) < iphLen+udphLen { + return groResultNoop + } + if !isV6 { + if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 { + // no GRO support for fragmented segments for now + return groResultNoop + } + } + gsoSize := uint16(len(pkt) - udphLen - iphLen) + // not a candidate if payload len is 0 + if gsoSize < 1 { + return groResultNoop + } + srcAddrOffset := ipv4SrcAddrOffset + addrLen := 4 + if isV6 { + srcAddrOffset = ipv6SrcAddrOffset + addrLen = 16 + } + items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, pktI) + if !existing { + return groResultTableInsert + } + // With UDP we only check the last item, otherwise we could reorder packets + // for a given flow. We must also always insert a new item, or successfully + // coalesce with an existing item, for the same reason. + item := items[len(items)-1] + can := udpPacketsCanCoalesce(pkt, uint8(iphLen), gsoSize, item, bufs, offset) + var pktCSumKnownInvalid bool + if can == coalesceAppend { + result := coalesceUDPPackets(pkt, &item, bufs, offset, isV6) + switch result { + case coalesceSuccess: + table.updateAt(item, len(items)-1) + return groResultCoalesced + case coalesceItemInvalidCSum: + // If the existing item has an invalid csum we take no action. A new + // item will be stored after it, and the existing item will never be + // revisited as part of future coalescing candidacy checks. + case coalescePktInvalidCSum: + // We must insert a new item, but we also mark it as invalid csum + // to prevent a repeat checksum validation. + pktCSumKnownInvalid = true + default: + } + } + // failed to coalesce with any other packets; store the item in the flow + table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, pktI, pktCSumKnownInvalid) + return groResultTableInsert +} + +// handleGRO evaluates bufs for GRO, and writes the indices of the resulting +// packets into toWrite. toWrite, tcpTable, and udpTable should initially be +// empty (but non-nil), and are passed in to save allocs as the caller may reset +// and recycle them across vectors of packets. canUDPGRO indicates if UDP GRO is +// supported. +func handleGRO(bufs [][]byte, offset int, tcpTable *tcpGROTable, udpTable *udpGROTable, canUDPGRO bool, toWrite *[]int) error { + for i := range bufs { + if offset < virtioNetHdrLen || offset > len(bufs[i])-1 { + return errors.New("invalid offset") + } + var result groResult + switch packetIsGROCandidate(bufs[i][offset:], canUDPGRO) { + case tcp4GROCandidate: + result = tcpGRO(bufs, offset, i, tcpTable, false) + case tcp6GROCandidate: + result = tcpGRO(bufs, offset, i, tcpTable, true) + case udp4GROCandidate: + result = udpGRO(bufs, offset, i, udpTable, false) + case udp6GROCandidate: + result = udpGRO(bufs, offset, i, udpTable, true) + } + switch result { + case groResultNoop: + hdr := virtioNetHdr{} + err := hdr.encode(bufs[i][offset-virtioNetHdrLen:]) + if err != nil { + return err + } + fallthrough + case groResultTableInsert: + *toWrite = append(*toWrite, i) + } + } + errTCP := applyTCPCoalesceAccounting(bufs, offset, tcpTable) + errUDP := applyUDPCoalesceAccounting(bufs, offset, udpTable) + return errors.Join(errTCP, errUDP) +} + +// gsoSplit splits packets from in into outBuffs, writing the size of each +// element into sizes. It returns the number of buffers populated, and/or an +// error. +func gsoSplit(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffset int, isV6 bool) (int, error) { + iphLen := int(hdr.csumStart) + srcAddrOffset := ipv6SrcAddrOffset + addrLen := 16 + if !isV6 { + in[10], in[11] = 0, 0 // clear ipv4 header checksum + srcAddrOffset = ipv4SrcAddrOffset + addrLen = 4 + } + transportCsumAt := int(hdr.csumStart + hdr.csumOffset) + in[transportCsumAt], in[transportCsumAt+1] = 0, 0 // clear tcp/udp checksum + var firstTCPSeqNum uint32 + var protocol uint8 + if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 || hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV6 { + protocol = unix.IPPROTO_TCP + firstTCPSeqNum = binary.BigEndian.Uint32(in[hdr.csumStart+4:]) + } else { + protocol = unix.IPPROTO_UDP + } + nextSegmentDataAt := int(hdr.hdrLen) + i := 0 + for ; nextSegmentDataAt < len(in); i++ { + if i == len(outBuffs) { + return i - 1, ErrTooManySegments + } + nextSegmentEnd := nextSegmentDataAt + int(hdr.gsoSize) + if nextSegmentEnd > len(in) { + nextSegmentEnd = len(in) + } + segmentDataLen := nextSegmentEnd - nextSegmentDataAt + totalLen := int(hdr.hdrLen) + segmentDataLen + sizes[i] = totalLen + out := outBuffs[i][outOffset:] + + copy(out, in[:iphLen]) + if !isV6 { + // For IPv4 we are responsible for incrementing the ID field, + // updating the total len field, and recalculating the header + // checksum. + if i > 0 { + id := binary.BigEndian.Uint16(out[4:]) + id += uint16(i) + binary.BigEndian.PutUint16(out[4:], id) + } + binary.BigEndian.PutUint16(out[2:], uint16(totalLen)) + ipv4CSum := ^checksum(out[:iphLen], 0) + binary.BigEndian.PutUint16(out[10:], ipv4CSum) + } else { + // For IPv6 we are responsible for updating the payload length field. + binary.BigEndian.PutUint16(out[4:], uint16(totalLen-iphLen)) + } + + // copy transport header + copy(out[hdr.csumStart:hdr.hdrLen], in[hdr.csumStart:hdr.hdrLen]) + + if protocol == unix.IPPROTO_TCP { + // set TCP seq and adjust TCP flags + tcpSeq := firstTCPSeqNum + uint32(hdr.gsoSize*uint16(i)) + binary.BigEndian.PutUint32(out[hdr.csumStart+4:], tcpSeq) + if nextSegmentEnd != len(in) { + // FIN and PSH should only be set on last segment + clearFlags := tcpFlagFIN | tcpFlagPSH + out[hdr.csumStart+tcpFlagsOffset] &^= clearFlags + } + } else { + // set UDP header len + binary.BigEndian.PutUint16(out[hdr.csumStart+4:], uint16(segmentDataLen)+(hdr.hdrLen-hdr.csumStart)) + } + + // payload + copy(out[hdr.hdrLen:], in[nextSegmentDataAt:nextSegmentEnd]) + + // transport checksum + transportHeaderLen := int(hdr.hdrLen - hdr.csumStart) + lenForPseudo := uint16(transportHeaderLen + segmentDataLen) + transportCSumNoFold := pseudoHeaderChecksumNoFold(protocol, in[srcAddrOffset:srcAddrOffset+addrLen], in[srcAddrOffset+addrLen:srcAddrOffset+addrLen*2], lenForPseudo) + transportCSum := ^checksum(out[hdr.csumStart:totalLen], transportCSumNoFold) + binary.BigEndian.PutUint16(out[hdr.csumStart+hdr.csumOffset:], transportCSum) + + nextSegmentDataAt += int(hdr.gsoSize) + } + return i, nil +} + +func gsoNoneChecksum(in []byte, cSumStart, cSumOffset uint16) error { + cSumAt := cSumStart + cSumOffset + // The initial value at the checksum offset should be summed with the + // checksum we compute. This is typically the pseudo-header checksum. + initial := binary.BigEndian.Uint16(in[cSumAt:]) + in[cSumAt], in[cSumAt+1] = 0, 0 + binary.BigEndian.PutUint16(in[cSumAt:], ^checksum(in[cSumStart:], uint64(initial))) + return nil +} diff --git a/vendor/golang.zx2c4.com/wireguard/tun/tun.go b/vendor/golang.zx2c4.com/wireguard/tun/tun.go index 01051b93..0ae53d07 100644 --- a/vendor/golang.zx2c4.com/wireguard/tun/tun.go +++ b/vendor/golang.zx2c4.com/wireguard/tun/tun.go @@ -18,12 +18,36 @@ const ( ) type Device interface { - File() *os.File // returns the file descriptor of the device - Read([]byte, int) (int, error) // read a packet from the device (without any additional headers) - Write([]byte, int) (int, error) // writes a packet to the device (without any additional headers) - Flush() error // flush all previous writes to the device - MTU() (int, error) // returns the MTU of the device - Name() (string, error) // fetches and returns the current name - Events() <-chan Event // returns a constant channel of events related to the device - Close() error // stops the device and closes the event channel + // File returns the file descriptor of the device. + File() *os.File + + // Read one or more packets from the Device (without any additional headers). + // On a successful read it returns the number of packets read, and sets + // packet lengths within the sizes slice. len(sizes) must be >= len(bufs). + // A nonzero offset can be used to instruct the Device on where to begin + // reading into each element of the bufs slice. + Read(bufs [][]byte, sizes []int, offset int) (n int, err error) + + // Write one or more packets to the device (without any additional headers). + // On a successful write it returns the number of packets written. A nonzero + // offset can be used to instruct the Device on where to begin writing from + // each packet contained within the bufs slice. + Write(bufs [][]byte, offset int) (int, error) + + // MTU returns the MTU of the Device. + MTU() (int, error) + + // Name returns the current name of the Device. + Name() (string, error) + + // Events returns a channel of type Event, which is fed Device events. + Events() <-chan Event + + // Close stops the Device and closes the Event channel. + Close() error + + // BatchSize returns the preferred/max number of packets that can be read or + // written in a single read/write call. BatchSize must not change over the + // lifetime of a Device. + BatchSize() int } diff --git a/vendor/golang.zx2c4.com/wireguard/tun/tun_darwin.go b/vendor/golang.zx2c4.com/wireguard/tun/tun_darwin.go index 7411a694..c9a6c0bc 100644 --- a/vendor/golang.zx2c4.com/wireguard/tun/tun_darwin.go +++ b/vendor/golang.zx2c4.com/wireguard/tun/tun_darwin.go @@ -8,6 +8,7 @@ package tun import ( "errors" "fmt" + "io" "net" "os" "sync" @@ -15,7 +16,6 @@ import ( "time" "unsafe" - "golang.org/x/net/ipv6" "golang.org/x/sys/unix" ) @@ -33,7 +33,7 @@ type NativeTun struct { func retryInterfaceByIndex(index int) (iface *net.Interface, err error) { for i := 0; i < 20; i++ { iface, err = net.InterfaceByIndex(index) - if err != nil && errors.Is(err, syscall.ENOMEM) { + if err != nil && errors.Is(err, unix.ENOMEM) { time.Sleep(time.Duration(i) * time.Second / 3) continue } @@ -55,7 +55,7 @@ func (tun *NativeTun) routineRouteListener(tunIfindex int) { retry: n, err := unix.Read(tun.routeSocket, data) if err != nil { - if errno, ok := err.(syscall.Errno); ok && errno == syscall.EINTR { + if errno, ok := err.(unix.Errno); ok && errno == unix.EINTR { goto retry } tun.errors <- err @@ -217,45 +217,46 @@ func (tun *NativeTun) Events() <-chan Event { return tun.events } -func (tun *NativeTun) Read(buff []byte, offset int) (int, error) { +func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) { + // TODO: the BSDs look very similar in Read() and Write(). They should be + // collapsed, with platform-specific files containing the varying parts of + // their implementations. select { case err := <-tun.errors: return 0, err default: - buff := buff[offset-4:] - n, err := tun.tunFile.Read(buff[:]) + buf := bufs[0][offset-4:] + n, err := tun.tunFile.Read(buf[:]) if n < 4 { return 0, err } - return n - 4, err + sizes[0] = n - 4 + return 1, err } } -func (tun *NativeTun) Write(buff []byte, offset int) (int, error) { - // reserve space for header - - buff = buff[offset-4:] - - // add packet information header - - buff[0] = 0x00 - buff[1] = 0x00 - buff[2] = 0x00 - - if buff[4]>>4 == ipv6.Version { - buff[3] = unix.AF_INET6 - } else { - buff[3] = unix.AF_INET +func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) { + if offset < 4 { + return 0, io.ErrShortBuffer } - - // write - - return tun.tunFile.Write(buff) -} - -func (tun *NativeTun) Flush() error { - // TODO: can flushing be implemented by buffering and using sendmmsg? - return nil + for i, buf := range bufs { + buf = buf[offset-4:] + buf[0] = 0x00 + buf[1] = 0x00 + buf[2] = 0x00 + switch buf[4] >> 4 { + case 4: + buf[3] = unix.AF_INET + case 6: + buf[3] = unix.AF_INET6 + default: + return i, unix.EAFNOSUPPORT + } + if _, err := tun.tunFile.Write(buf); err != nil { + return i, err + } + } + return len(bufs), nil } func (tun *NativeTun) Close() error { @@ -318,6 +319,10 @@ func (tun *NativeTun) MTU() (int, error) { return int(ifr.MTU), nil } +func (tun *NativeTun) BatchSize() int { + return 1 +} + func socketCloexec(family, sotype, proto int) (fd int, err error) { // See go/src/net/sys_cloexec.go for background. syscall.ForkLock.RLock() diff --git a/vendor/golang.zx2c4.com/wireguard/tun/tun_freebsd.go b/vendor/golang.zx2c4.com/wireguard/tun/tun_freebsd.go index 42431aa3..7c65fd99 100644 --- a/vendor/golang.zx2c4.com/wireguard/tun/tun_freebsd.go +++ b/vendor/golang.zx2c4.com/wireguard/tun/tun_freebsd.go @@ -333,45 +333,46 @@ func (tun *NativeTun) Events() <-chan Event { return tun.events } -func (tun *NativeTun) Read(buff []byte, offset int) (int, error) { +func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) { select { case err := <-tun.errors: return 0, err default: - buff := buff[offset-4:] - n, err := tun.tunFile.Read(buff[:]) + buf := bufs[0][offset-4:] + n, err := tun.tunFile.Read(buf[:]) if n < 4 { return 0, err } - return n - 4, err + sizes[0] = n - 4 + return 1, err } } -func (tun *NativeTun) Write(buf []byte, offset int) (int, error) { +func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) { if offset < 4 { return 0, io.ErrShortBuffer } - buf = buf[offset-4:] - if len(buf) < 5 { - return 0, io.ErrShortBuffer - } - buf[0] = 0x00 - buf[1] = 0x00 - buf[2] = 0x00 - switch buf[4] >> 4 { - case 4: - buf[3] = unix.AF_INET - case 6: - buf[3] = unix.AF_INET6 - default: - return 0, unix.EAFNOSUPPORT + for i, buf := range bufs { + buf = buf[offset-4:] + if len(buf) < 5 { + return i, io.ErrShortBuffer + } + buf[0] = 0x00 + buf[1] = 0x00 + buf[2] = 0x00 + switch buf[4] >> 4 { + case 4: + buf[3] = unix.AF_INET + case 6: + buf[3] = unix.AF_INET6 + default: + return i, unix.EAFNOSUPPORT + } + if _, err := tun.tunFile.Write(buf); err != nil { + return i, err + } } - return tun.tunFile.Write(buf) -} - -func (tun *NativeTun) Flush() error { - // TODO: can flushing be implemented by buffering and using sendmmsg? - return nil + return len(bufs), nil } func (tun *NativeTun) Close() error { @@ -428,3 +429,7 @@ func (tun *NativeTun) MTU() (int, error) { } return int(*(*int32)(unsafe.Pointer(&ifr.MTU))), nil } + +func (tun *NativeTun) BatchSize() int { + return 1 +} diff --git a/vendor/golang.zx2c4.com/wireguard/tun/tun_linux.go b/vendor/golang.zx2c4.com/wireguard/tun/tun_linux.go index 25dbc074..bd69cb55 100644 --- a/vendor/golang.zx2c4.com/wireguard/tun/tun_linux.go +++ b/vendor/golang.zx2c4.com/wireguard/tun/tun_linux.go @@ -17,9 +17,8 @@ import ( "time" "unsafe" - "golang.org/x/net/ipv6" "golang.org/x/sys/unix" - + "golang.zx2c4.com/wireguard/conn" "golang.zx2c4.com/wireguard/rwcancel" ) @@ -33,17 +32,27 @@ type NativeTun struct { index int32 // if index errors chan error // async error handling events chan Event // device related events - nopi bool // the device was passed IFF_NO_PI netlinkSock int netlinkCancel *rwcancel.RWCancel hackListenerClosed sync.Mutex statusListenersShutdown chan struct{} + batchSize int + vnetHdr bool + udpGSO bool closeOnce sync.Once nameOnce sync.Once // guards calling initNameCache, which sets following fields nameCache string // name of interface nameErr error + + readOpMu sync.Mutex // readOpMu guards readBuff + readBuff [virtioNetHdrLen + 65535]byte // if vnetHdr every read() is prefixed by virtioNetHdr + + writeOpMu sync.Mutex // writeOpMu guards toWrite, tcpGROTable + toWrite []int + tcpGROTable *tcpGROTable + udpGROTable *udpGROTable } func (tun *NativeTun) File() *os.File { @@ -323,57 +332,147 @@ func (tun *NativeTun) nameSlow() (string, error) { return unix.ByteSliceToString(ifr[:]), nil } -func (tun *NativeTun) Write(buf []byte, offset int) (int, error) { - if tun.nopi { - buf = buf[offset:] +func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) { + tun.writeOpMu.Lock() + defer func() { + tun.tcpGROTable.reset() + tun.udpGROTable.reset() + tun.writeOpMu.Unlock() + }() + var ( + errs error + total int + ) + tun.toWrite = tun.toWrite[:0] + if tun.vnetHdr { + err := handleGRO(bufs, offset, tun.tcpGROTable, tun.udpGROTable, tun.udpGSO, &tun.toWrite) + if err != nil { + return 0, err + } + offset -= virtioNetHdrLen } else { - // reserve space for header - buf = buf[offset-4:] - - // add packet information header - buf[0] = 0x00 - buf[1] = 0x00 - if buf[4]>>4 == ipv6.Version { - buf[2] = 0x86 - buf[3] = 0xdd + for i := range bufs { + tun.toWrite = append(tun.toWrite, i) + } + } + for _, bufsI := range tun.toWrite { + n, err := tun.tunFile.Write(bufs[bufsI][offset:]) + if errors.Is(err, syscall.EBADFD) { + return total, os.ErrClosed + } + if err != nil { + errs = errors.Join(errs, err) } else { - buf[2] = 0x08 - buf[3] = 0x00 + total += n } } + return total, errs +} - n, err := tun.tunFile.Write(buf) - if errors.Is(err, syscall.EBADFD) { - err = os.ErrClosed +// handleVirtioRead splits in into bufs, leaving offset bytes at the front of +// each buffer. It mutates sizes to reflect the size of each element of bufs, +// and returns the number of packets read. +func handleVirtioRead(in []byte, bufs [][]byte, sizes []int, offset int) (int, error) { + var hdr virtioNetHdr + err := hdr.decode(in) + if err != nil { + return 0, err + } + in = in[virtioNetHdrLen:] + if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_NONE { + if hdr.flags&unix.VIRTIO_NET_HDR_F_NEEDS_CSUM != 0 { + // This means CHECKSUM_PARTIAL in skb context. We are responsible + // for computing the checksum starting at hdr.csumStart and placing + // at hdr.csumOffset. + err = gsoNoneChecksum(in, hdr.csumStart, hdr.csumOffset) + if err != nil { + return 0, err + } + } + if len(in) > len(bufs[0][offset:]) { + return 0, fmt.Errorf("read len %d overflows bufs element len %d", len(in), len(bufs[0][offset:])) + } + n := copy(bufs[0][offset:], in) + sizes[0] = n + return 1, nil + } + if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 { + return 0, fmt.Errorf("unsupported virtio GSO type: %d", hdr.gsoType) } - return n, err -} -func (tun *NativeTun) Flush() error { - // TODO: can flushing be implemented by buffering and using sendmmsg? - return nil + ipVersion := in[0] >> 4 + switch ipVersion { + case 4: + if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 { + return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType) + } + case 6: + if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_UDP_L4 { + return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType) + } + default: + return 0, fmt.Errorf("invalid ip header version: %d", ipVersion) + } + + // Don't trust hdr.hdrLen from the kernel as it can be equal to the length + // of the entire first packet when the kernel is handling it as part of a + // FORWARD path. Instead, parse the transport header length and add it onto + // csumStart, which is synonymous for IP header length. + if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_UDP_L4 { + hdr.hdrLen = hdr.csumStart + 8 + } else { + if len(in) <= int(hdr.csumStart+12) { + return 0, errors.New("packet is too short") + } + + tcpHLen := uint16(in[hdr.csumStart+12] >> 4 * 4) + if tcpHLen < 20 || tcpHLen > 60 { + // A TCP header must be between 20 and 60 bytes in length. + return 0, fmt.Errorf("tcp header len is invalid: %d", tcpHLen) + } + hdr.hdrLen = hdr.csumStart + tcpHLen + } + + if len(in) < int(hdr.hdrLen) { + return 0, fmt.Errorf("length of packet (%d) < virtioNetHdr.hdrLen (%d)", len(in), hdr.hdrLen) + } + + if hdr.hdrLen < hdr.csumStart { + return 0, fmt.Errorf("virtioNetHdr.hdrLen (%d) < virtioNetHdr.csumStart (%d)", hdr.hdrLen, hdr.csumStart) + } + cSumAt := int(hdr.csumStart + hdr.csumOffset) + if cSumAt+1 >= len(in) { + return 0, fmt.Errorf("end of checksum offset (%d) exceeds packet length (%d)", cSumAt+1, len(in)) + } + + return gsoSplit(in, hdr, bufs, sizes, offset, ipVersion == 6) } -func (tun *NativeTun) Read(buf []byte, offset int) (n int, err error) { +func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) { + tun.readOpMu.Lock() + defer tun.readOpMu.Unlock() select { - case err = <-tun.errors: + case err := <-tun.errors: + return 0, err default: - if tun.nopi { - n, err = tun.tunFile.Read(buf[offset:]) + readInto := bufs[0][offset:] + if tun.vnetHdr { + readInto = tun.readBuff[:] + } + n, err := tun.tunFile.Read(readInto) + if errors.Is(err, syscall.EBADFD) { + err = os.ErrClosed + } + if err != nil { + return 0, err + } + if tun.vnetHdr { + return handleVirtioRead(readInto[:n], bufs, sizes, offset) } else { - buff := buf[offset-4:] - n, err = tun.tunFile.Read(buff[:]) - if errors.Is(err, syscall.EBADFD) { - err = os.ErrClosed - } - if n < 4 { - n = 0 - } else { - n -= 4 - } + sizes[0] = n + return 1, nil } } - return } func (tun *NativeTun) Events() <-chan Event { @@ -399,6 +498,56 @@ func (tun *NativeTun) Close() error { return err2 } +func (tun *NativeTun) BatchSize() int { + return tun.batchSize +} + +const ( + // TODO: support TSO with ECN bits + tunTCPOffloads = unix.TUN_F_CSUM | unix.TUN_F_TSO4 | unix.TUN_F_TSO6 + tunUDPOffloads = unix.TUN_F_USO4 | unix.TUN_F_USO6 +) + +func (tun *NativeTun) initFromFlags(name string) error { + sc, err := tun.tunFile.SyscallConn() + if err != nil { + return err + } + if e := sc.Control(func(fd uintptr) { + var ( + ifr *unix.Ifreq + ) + ifr, err = unix.NewIfreq(name) + if err != nil { + return + } + err = unix.IoctlIfreq(int(fd), unix.TUNGETIFF, ifr) + if err != nil { + return + } + got := ifr.Uint16() + if got&unix.IFF_VNET_HDR != 0 { + // tunTCPOffloads were added in Linux v2.6. We require their support + // if IFF_VNET_HDR is set. + err = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunTCPOffloads) + if err != nil { + return + } + tun.vnetHdr = true + tun.batchSize = conn.IdealBatchSize + // tunUDPOffloads were added in Linux v6.2. We do not return an + // error if they are unsupported at runtime. + tun.udpGSO = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunTCPOffloads|tunUDPOffloads) == nil + } else { + tun.batchSize = 1 + } + }); e != nil { + return e + } + return err +} + +// CreateTUN creates a Device with the provided name and MTU. func CreateTUN(name string, mtu int) (Device, error) { nfd, err := unix.Open(cloneDevicePath, unix.O_RDWR|unix.O_CLOEXEC, 0) if err != nil { @@ -408,25 +557,16 @@ func CreateTUN(name string, mtu int) (Device, error) { return nil, err } - var ifr [ifReqSize]byte - var flags uint16 = unix.IFF_TUN // | unix.IFF_NO_PI (disabled for TUN status hack) - nameBytes := []byte(name) - if len(nameBytes) >= unix.IFNAMSIZ { - unix.Close(nfd) - return nil, fmt.Errorf("interface name too long: %w", unix.ENAMETOOLONG) + ifr, err := unix.NewIfreq(name) + if err != nil { + return nil, err } - copy(ifr[:], nameBytes) - *(*uint16)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = flags - - _, _, errno := unix.Syscall( - unix.SYS_IOCTL, - uintptr(nfd), - uintptr(unix.TUNSETIFF), - uintptr(unsafe.Pointer(&ifr[0])), - ) - if errno != 0 { - unix.Close(nfd) - return nil, errno + // IFF_VNET_HDR enables the "tun status hack" via routineHackListener() + // where a null write will return EINVAL indicating the TUN is up. + ifr.SetUint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_VNET_HDR) + err = unix.IoctlIfreq(nfd, unix.TUNSETIFF, ifr) + if err != nil { + return nil, err } err = unix.SetNonblock(nfd, true) @@ -441,13 +581,16 @@ func CreateTUN(name string, mtu int) (Device, error) { return CreateTUNFromFile(fd, mtu) } +// CreateTUNFromFile creates a Device from an os.File with the provided MTU. func CreateTUNFromFile(file *os.File, mtu int) (Device, error) { tun := &NativeTun{ tunFile: file, events: make(chan Event, 5), errors: make(chan error, 5), statusListenersShutdown: make(chan struct{}), - nopi: false, + tcpGROTable: newTCPGROTable(), + udpGROTable: newUDPGROTable(), + toWrite: make([]int, 0, conn.IdealBatchSize), } name, err := tun.Name() @@ -455,8 +598,12 @@ func CreateTUNFromFile(file *os.File, mtu int) (Device, error) { return nil, err } - // start event listener + err = tun.initFromFlags(name) + if err != nil { + return nil, err + } + // start event listener tun.index, err = getIFIndex(name) if err != nil { return nil, err @@ -485,6 +632,8 @@ func CreateTUNFromFile(file *os.File, mtu int) (Device, error) { return tun, nil } +// CreateUnmonitoredTUNFromFD creates a Device from the provided file +// descriptor. func CreateUnmonitoredTUNFromFD(fd int) (Device, string, error) { err := unix.SetNonblock(fd, true) if err != nil { @@ -492,14 +641,20 @@ func CreateUnmonitoredTUNFromFD(fd int) (Device, string, error) { } file := os.NewFile(uintptr(fd), "/dev/tun") tun := &NativeTun{ - tunFile: file, - events: make(chan Event, 5), - errors: make(chan error, 5), - nopi: true, + tunFile: file, + events: make(chan Event, 5), + errors: make(chan error, 5), + tcpGROTable: newTCPGROTable(), + udpGROTable: newUDPGROTable(), + toWrite: make([]int, 0, conn.IdealBatchSize), } name, err := tun.Name() if err != nil { return nil, "", err } - return tun, name, nil + err = tun.initFromFlags(name) + if err != nil { + return nil, "", err + } + return tun, name, err } diff --git a/vendor/golang.zx2c4.com/wireguard/tun/tun_openbsd.go b/vendor/golang.zx2c4.com/wireguard/tun/tun_openbsd.go index e7fd79c5..ae571b90 100644 --- a/vendor/golang.zx2c4.com/wireguard/tun/tun_openbsd.go +++ b/vendor/golang.zx2c4.com/wireguard/tun/tun_openbsd.go @@ -8,13 +8,13 @@ package tun import ( "errors" "fmt" + "io" "net" "os" "sync" "syscall" "unsafe" - "golang.org/x/net/ipv6" "golang.org/x/sys/unix" ) @@ -204,45 +204,43 @@ func (tun *NativeTun) Events() <-chan Event { return tun.events } -func (tun *NativeTun) Read(buff []byte, offset int) (int, error) { +func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) { select { case err := <-tun.errors: return 0, err default: - buff := buff[offset-4:] - n, err := tun.tunFile.Read(buff[:]) + buf := bufs[0][offset-4:] + n, err := tun.tunFile.Read(buf[:]) if n < 4 { return 0, err } - return n - 4, err + sizes[0] = n - 4 + return 1, err } } -func (tun *NativeTun) Write(buff []byte, offset int) (int, error) { - // reserve space for header - - buff = buff[offset-4:] - - // add packet information header - - buff[0] = 0x00 - buff[1] = 0x00 - buff[2] = 0x00 - - if buff[4]>>4 == ipv6.Version { - buff[3] = unix.AF_INET6 - } else { - buff[3] = unix.AF_INET +func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) { + if offset < 4 { + return 0, io.ErrShortBuffer } - - // write - - return tun.tunFile.Write(buff) -} - -func (tun *NativeTun) Flush() error { - // TODO: can flushing be implemented by buffering and using sendmmsg? - return nil + for i, buf := range bufs { + buf = buf[offset-4:] + buf[0] = 0x00 + buf[1] = 0x00 + buf[2] = 0x00 + switch buf[4] >> 4 { + case 4: + buf[3] = unix.AF_INET + case 6: + buf[3] = unix.AF_INET6 + default: + return i, unix.EAFNOSUPPORT + } + if _, err := tun.tunFile.Write(buf); err != nil { + return i, err + } + } + return len(bufs), nil } func (tun *NativeTun) Close() error { @@ -329,3 +327,7 @@ func (tun *NativeTun) MTU() (int, error) { return int(*(*int32)(unsafe.Pointer(&ifr.MTU))), nil } + +func (tun *NativeTun) BatchSize() int { + return 1 +} diff --git a/vendor/golang.zx2c4.com/wireguard/tun/tun_windows.go b/vendor/golang.zx2c4.com/wireguard/tun/tun_windows.go index d5abb148..2af8e3e9 100644 --- a/vendor/golang.zx2c4.com/wireguard/tun/tun_windows.go +++ b/vendor/golang.zx2c4.com/wireguard/tun/tun_windows.go @@ -15,7 +15,6 @@ import ( _ "unsafe" "golang.org/x/sys/windows" - "golang.zx2c4.com/wintun" ) @@ -44,6 +43,7 @@ type NativeTun struct { closeOnce sync.Once close atomic.Bool forcedMTU int + outSizes []int } var ( @@ -127,6 +127,9 @@ func (tun *NativeTun) MTU() (int, error) { // TODO: This is a temporary hack. We really need to be monitoring the interface in real time and adapting to MTU changes. func (tun *NativeTun) ForceMTU(mtu int) { + if tun.close.Load() { + return + } update := tun.forcedMTU != mtu tun.forcedMTU = mtu if update { @@ -134,9 +137,14 @@ func (tun *NativeTun) ForceMTU(mtu int) { } } +func (tun *NativeTun) BatchSize() int { + // TODO: implement batching with wintun + return 1 +} + // Note: Read() and Write() assume the caller comes only from a single thread; there's no locking. -func (tun *NativeTun) Read(buff []byte, offset int) (int, error) { +func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) { tun.running.Add(1) defer tun.running.Done() retry: @@ -152,11 +160,11 @@ retry: packet, err := tun.session.ReceivePacket() switch err { case nil: - packetSize := len(packet) - copy(buff[offset:], packet) + n := copy(bufs[0][offset:], packet) + sizes[0] = n tun.session.ReleaseReceivePacket(packet) - tun.rate.update(uint64(packetSize)) - return packetSize, nil + tun.rate.update(uint64(n)) + return 1, nil case windows.ERROR_NO_MORE_ITEMS: if !shouldSpin || uint64(nanotime()-start) >= spinloopDuration { windows.WaitForSingleObject(tun.readWait, windows.INFINITE) @@ -173,33 +181,33 @@ retry: } } -func (tun *NativeTun) Flush() error { - return nil -} - -func (tun *NativeTun) Write(buff []byte, offset int) (int, error) { +func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) { tun.running.Add(1) defer tun.running.Done() if tun.close.Load() { return 0, os.ErrClosed } - packetSize := len(buff) - offset - tun.rate.update(uint64(packetSize)) + for i, buf := range bufs { + packetSize := len(buf) - offset + tun.rate.update(uint64(packetSize)) - packet, err := tun.session.AllocateSendPacket(packetSize) - if err == nil { - copy(packet, buff[offset:]) - tun.session.SendPacket(packet) - return packetSize, nil - } - switch err { - case windows.ERROR_HANDLE_EOF: - return 0, os.ErrClosed - case windows.ERROR_BUFFER_OVERFLOW: - return 0, nil // Dropping when ring is full. + packet, err := tun.session.AllocateSendPacket(packetSize) + switch err { + case nil: + // TODO: Explore options to eliminate this copy. + copy(packet, buf[offset:]) + tun.session.SendPacket(packet) + continue + case windows.ERROR_HANDLE_EOF: + return i, os.ErrClosed + case windows.ERROR_BUFFER_OVERFLOW: + continue // Dropping when ring is full. + default: + return i, fmt.Errorf("Write failed: %w", err) + } } - return 0, fmt.Errorf("Write failed: %w", err) + return len(bufs), nil } // LUID returns Windows interface instance ID. diff --git a/vendor/google.golang.org/protobuf/encoding/prototext/decode.go b/vendor/google.golang.org/protobuf/encoding/prototext/decode.go index 4921b2d4..24bc98ac 100644 --- a/vendor/google.golang.org/protobuf/encoding/prototext/decode.go +++ b/vendor/google.golang.org/protobuf/encoding/prototext/decode.go @@ -21,7 +21,7 @@ import ( "google.golang.org/protobuf/reflect/protoregistry" ) -// Unmarshal reads the given []byte into the given proto.Message. +// Unmarshal reads the given []byte into the given [proto.Message]. // The provided message must be mutable (e.g., a non-nil pointer to a message). func Unmarshal(b []byte, m proto.Message) error { return UnmarshalOptions{}.Unmarshal(b, m) @@ -51,7 +51,7 @@ type UnmarshalOptions struct { } } -// Unmarshal reads the given []byte and populates the given proto.Message +// Unmarshal reads the given []byte and populates the given [proto.Message] // using options in the UnmarshalOptions object. // The provided message must be mutable (e.g., a non-nil pointer to a message). func (o UnmarshalOptions) Unmarshal(b []byte, m proto.Message) error { @@ -84,7 +84,7 @@ type decoder struct { } // newError returns an error object with position info. -func (d decoder) newError(pos int, f string, x ...interface{}) error { +func (d decoder) newError(pos int, f string, x ...any) error { line, column := d.Position(pos) head := fmt.Sprintf("(line %d:%d): ", line, column) return errors.New(head+f, x...) @@ -96,7 +96,7 @@ func (d decoder) unexpectedTokenError(tok text.Token) error { } // syntaxError returns a syntax error for given position. -func (d decoder) syntaxError(pos int, f string, x ...interface{}) error { +func (d decoder) syntaxError(pos int, f string, x ...any) error { line, column := d.Position(pos) head := fmt.Sprintf("syntax error (line %d:%d): ", line, column) return errors.New(head+f, x...) @@ -739,7 +739,9 @@ func (d decoder) skipValue() error { case text.ListClose: return nil case text.MessageOpen: - return d.skipMessageValue() + if err := d.skipMessageValue(); err != nil { + return err + } default: // Skip items. This will not validate whether skipped values are // of the same type or not, same behavior as C++ diff --git a/vendor/google.golang.org/protobuf/encoding/prototext/encode.go b/vendor/google.golang.org/protobuf/encoding/prototext/encode.go index 722a7b41..1f57e661 100644 --- a/vendor/google.golang.org/protobuf/encoding/prototext/encode.go +++ b/vendor/google.golang.org/protobuf/encoding/prototext/encode.go @@ -27,15 +27,17 @@ const defaultIndent = " " // Format formats the message as a multiline string. // This function is only intended for human consumption and ignores errors. -// Do not depend on the output being stable. It may change over time across -// different versions of the program. +// Do not depend on the output being stable. Its output will change across +// different builds of your program, even when using the same version of the +// protobuf module. func Format(m proto.Message) string { return MarshalOptions{Multiline: true}.Format(m) } -// Marshal writes the given proto.Message in textproto format using default -// options. Do not depend on the output being stable. It may change over time -// across different versions of the program. +// Marshal writes the given [proto.Message] in textproto format using default +// options. Do not depend on the output being stable. Its output will change +// across different builds of your program, even when using the same version of +// the protobuf module. func Marshal(m proto.Message) ([]byte, error) { return MarshalOptions{}.Marshal(m) } @@ -84,8 +86,9 @@ type MarshalOptions struct { // Format formats the message as a string. // This method is only intended for human consumption and ignores errors. -// Do not depend on the output being stable. It may change over time across -// different versions of the program. +// Do not depend on the output being stable. Its output will change across +// different builds of your program, even when using the same version of the +// protobuf module. func (o MarshalOptions) Format(m proto.Message) string { if m == nil || !m.ProtoReflect().IsValid() { return "" // invalid syntax, but okay since this is for debugging @@ -97,9 +100,10 @@ func (o MarshalOptions) Format(m proto.Message) string { return string(b) } -// Marshal writes the given proto.Message in textproto format using options in -// MarshalOptions object. Do not depend on the output being stable. It may -// change over time across different versions of the program. +// Marshal writes the given [proto.Message] in textproto format using options in +// MarshalOptions object. Do not depend on the output being stable. Its output +// will change across different builds of your program, even when using the +// same version of the protobuf module. func (o MarshalOptions) Marshal(m proto.Message) ([]byte, error) { return o.marshal(nil, m) } diff --git a/vendor/google.golang.org/protobuf/encoding/protowire/wire.go b/vendor/google.golang.org/protobuf/encoding/protowire/wire.go index f4b4686c..e942bc98 100644 --- a/vendor/google.golang.org/protobuf/encoding/protowire/wire.go +++ b/vendor/google.golang.org/protobuf/encoding/protowire/wire.go @@ -6,7 +6,7 @@ // See https://protobuf.dev/programming-guides/encoding. // // For marshaling and unmarshaling entire protobuf messages, -// use the "google.golang.org/protobuf/proto" package instead. +// use the [google.golang.org/protobuf/proto] package instead. package protowire import ( @@ -87,7 +87,7 @@ func ParseError(n int) error { // ConsumeField parses an entire field record (both tag and value) and returns // the field number, the wire type, and the total length. -// This returns a negative length upon an error (see ParseError). +// This returns a negative length upon an error (see [ParseError]). // // The total length includes the tag header and the end group marker (if the // field is a group). @@ -104,8 +104,8 @@ func ConsumeField(b []byte) (Number, Type, int) { } // ConsumeFieldValue parses a field value and returns its length. -// This assumes that the field Number and wire Type have already been parsed. -// This returns a negative length upon an error (see ParseError). +// This assumes that the field [Number] and wire [Type] have already been parsed. +// This returns a negative length upon an error (see [ParseError]). // // When parsing a group, the length includes the end group marker and // the end group is verified to match the starting field number. @@ -164,7 +164,7 @@ func AppendTag(b []byte, num Number, typ Type) []byte { } // ConsumeTag parses b as a varint-encoded tag, reporting its length. -// This returns a negative length upon an error (see ParseError). +// This returns a negative length upon an error (see [ParseError]). func ConsumeTag(b []byte) (Number, Type, int) { v, n := ConsumeVarint(b) if n < 0 { @@ -263,7 +263,7 @@ func AppendVarint(b []byte, v uint64) []byte { } // ConsumeVarint parses b as a varint-encoded uint64, reporting its length. -// This returns a negative length upon an error (see ParseError). +// This returns a negative length upon an error (see [ParseError]). func ConsumeVarint(b []byte) (v uint64, n int) { var y uint64 if len(b) <= 0 { @@ -384,7 +384,7 @@ func AppendFixed32(b []byte, v uint32) []byte { } // ConsumeFixed32 parses b as a little-endian uint32, reporting its length. -// This returns a negative length upon an error (see ParseError). +// This returns a negative length upon an error (see [ParseError]). func ConsumeFixed32(b []byte) (v uint32, n int) { if len(b) < 4 { return 0, errCodeTruncated @@ -412,7 +412,7 @@ func AppendFixed64(b []byte, v uint64) []byte { } // ConsumeFixed64 parses b as a little-endian uint64, reporting its length. -// This returns a negative length upon an error (see ParseError). +// This returns a negative length upon an error (see [ParseError]). func ConsumeFixed64(b []byte) (v uint64, n int) { if len(b) < 8 { return 0, errCodeTruncated @@ -432,7 +432,7 @@ func AppendBytes(b []byte, v []byte) []byte { } // ConsumeBytes parses b as a length-prefixed bytes value, reporting its length. -// This returns a negative length upon an error (see ParseError). +// This returns a negative length upon an error (see [ParseError]). func ConsumeBytes(b []byte) (v []byte, n int) { m, n := ConsumeVarint(b) if n < 0 { @@ -456,7 +456,7 @@ func AppendString(b []byte, v string) []byte { } // ConsumeString parses b as a length-prefixed bytes value, reporting its length. -// This returns a negative length upon an error (see ParseError). +// This returns a negative length upon an error (see [ParseError]). func ConsumeString(b []byte) (v string, n int) { bb, n := ConsumeBytes(b) return string(bb), n @@ -471,7 +471,7 @@ func AppendGroup(b []byte, num Number, v []byte) []byte { // ConsumeGroup parses b as a group value until the trailing end group marker, // and verifies that the end marker matches the provided num. The value v // does not contain the end marker, while the length does contain the end marker. -// This returns a negative length upon an error (see ParseError). +// This returns a negative length upon an error (see [ParseError]). func ConsumeGroup(num Number, b []byte) (v []byte, n int) { n = ConsumeFieldValue(num, StartGroupType, b) if n < 0 { @@ -495,8 +495,8 @@ func SizeGroup(num Number, n int) int { return n + SizeTag(num) } -// DecodeTag decodes the field Number and wire Type from its unified form. -// The Number is -1 if the decoded field number overflows int32. +// DecodeTag decodes the field [Number] and wire [Type] from its unified form. +// The [Number] is -1 if the decoded field number overflows int32. // Other than overflow, this does not check for field number validity. func DecodeTag(x uint64) (Number, Type) { // NOTE: MessageSet allows for larger field numbers than normal. @@ -506,7 +506,7 @@ func DecodeTag(x uint64) (Number, Type) { return Number(x >> 3), Type(x & 7) } -// EncodeTag encodes the field Number and wire Type into its unified form. +// EncodeTag encodes the field [Number] and wire [Type] into its unified form. func EncodeTag(num Number, typ Type) uint64 { return uint64(num)<<3 | uint64(typ&7) } diff --git a/vendor/google.golang.org/protobuf/internal/descfmt/stringer.go b/vendor/google.golang.org/protobuf/internal/descfmt/stringer.go index db5248e1..87e46bd4 100644 --- a/vendor/google.golang.org/protobuf/internal/descfmt/stringer.go +++ b/vendor/google.golang.org/protobuf/internal/descfmt/stringer.go @@ -83,7 +83,13 @@ func formatListOpt(vs list, isRoot, allowMulti bool) string { case protoreflect.FileImports: for i := 0; i < vs.Len(); i++ { var rs records - rs.Append(reflect.ValueOf(vs.Get(i)), "Path", "Package", "IsPublic", "IsWeak") + rv := reflect.ValueOf(vs.Get(i)) + rs.Append(rv, []methodAndName{ + {rv.MethodByName("Path"), "Path"}, + {rv.MethodByName("Package"), "Package"}, + {rv.MethodByName("IsPublic"), "IsPublic"}, + {rv.MethodByName("IsWeak"), "IsWeak"}, + }...) ss = append(ss, "{"+rs.Join()+"}") } return start + joinStrings(ss, allowMulti) + end @@ -92,34 +98,26 @@ func formatListOpt(vs list, isRoot, allowMulti bool) string { for i := 0; i < vs.Len(); i++ { m := reflect.ValueOf(vs).MethodByName("Get") v := m.Call([]reflect.Value{reflect.ValueOf(i)})[0].Interface() - ss = append(ss, formatDescOpt(v.(protoreflect.Descriptor), false, allowMulti && !isEnumValue)) + ss = append(ss, formatDescOpt(v.(protoreflect.Descriptor), false, allowMulti && !isEnumValue, nil)) } return start + joinStrings(ss, allowMulti && isEnumValue) + end } } -// descriptorAccessors is a list of accessors to print for each descriptor. -// -// Do not print all accessors since some contain redundant information, -// while others are pointers that we do not want to follow since the descriptor -// is actually a cyclic graph. -// -// Using a list allows us to print the accessors in a sensible order. -var descriptorAccessors = map[reflect.Type][]string{ - reflect.TypeOf((*protoreflect.FileDescriptor)(nil)).Elem(): {"Path", "Package", "Imports", "Messages", "Enums", "Extensions", "Services"}, - reflect.TypeOf((*protoreflect.MessageDescriptor)(nil)).Elem(): {"IsMapEntry", "Fields", "Oneofs", "ReservedNames", "ReservedRanges", "RequiredNumbers", "ExtensionRanges", "Messages", "Enums", "Extensions"}, - reflect.TypeOf((*protoreflect.FieldDescriptor)(nil)).Elem(): {"Number", "Cardinality", "Kind", "HasJSONName", "JSONName", "HasPresence", "IsExtension", "IsPacked", "IsWeak", "IsList", "IsMap", "MapKey", "MapValue", "HasDefault", "Default", "ContainingOneof", "ContainingMessage", "Message", "Enum"}, - reflect.TypeOf((*protoreflect.OneofDescriptor)(nil)).Elem(): {"Fields"}, // not directly used; must keep in sync with formatDescOpt - reflect.TypeOf((*protoreflect.EnumDescriptor)(nil)).Elem(): {"Values", "ReservedNames", "ReservedRanges"}, - reflect.TypeOf((*protoreflect.EnumValueDescriptor)(nil)).Elem(): {"Number"}, - reflect.TypeOf((*protoreflect.ServiceDescriptor)(nil)).Elem(): {"Methods"}, - reflect.TypeOf((*protoreflect.MethodDescriptor)(nil)).Elem(): {"Input", "Output", "IsStreamingClient", "IsStreamingServer"}, +type methodAndName struct { + method reflect.Value + name string } func FormatDesc(s fmt.State, r rune, t protoreflect.Descriptor) { - io.WriteString(s, formatDescOpt(t, true, r == 'v' && (s.Flag('+') || s.Flag('#')))) + io.WriteString(s, formatDescOpt(t, true, r == 'v' && (s.Flag('+') || s.Flag('#')), nil)) } -func formatDescOpt(t protoreflect.Descriptor, isRoot, allowMulti bool) string { + +func InternalFormatDescOptForTesting(t protoreflect.Descriptor, isRoot, allowMulti bool, record func(string)) string { + return formatDescOpt(t, isRoot, allowMulti, record) +} + +func formatDescOpt(t protoreflect.Descriptor, isRoot, allowMulti bool, record func(string)) string { rv := reflect.ValueOf(t) rt := rv.MethodByName("ProtoType").Type().In(0) @@ -129,26 +127,60 @@ func formatDescOpt(t protoreflect.Descriptor, isRoot, allowMulti bool) string { } _, isFile := t.(protoreflect.FileDescriptor) - rs := records{allowMulti: allowMulti} + rs := records{ + allowMulti: allowMulti, + record: record, + } if t.IsPlaceholder() { if isFile { - rs.Append(rv, "Path", "Package", "IsPlaceholder") + rs.Append(rv, []methodAndName{ + {rv.MethodByName("Path"), "Path"}, + {rv.MethodByName("Package"), "Package"}, + {rv.MethodByName("IsPlaceholder"), "IsPlaceholder"}, + }...) } else { - rs.Append(rv, "FullName", "IsPlaceholder") + rs.Append(rv, []methodAndName{ + {rv.MethodByName("FullName"), "FullName"}, + {rv.MethodByName("IsPlaceholder"), "IsPlaceholder"}, + }...) } } else { switch { case isFile: - rs.Append(rv, "Syntax") + rs.Append(rv, methodAndName{rv.MethodByName("Syntax"), "Syntax"}) case isRoot: - rs.Append(rv, "Syntax", "FullName") + rs.Append(rv, []methodAndName{ + {rv.MethodByName("Syntax"), "Syntax"}, + {rv.MethodByName("FullName"), "FullName"}, + }...) default: - rs.Append(rv, "Name") + rs.Append(rv, methodAndName{rv.MethodByName("Name"), "Name"}) } switch t := t.(type) { case protoreflect.FieldDescriptor: - for _, s := range descriptorAccessors[rt] { - switch s { + accessors := []methodAndName{ + {rv.MethodByName("Number"), "Number"}, + {rv.MethodByName("Cardinality"), "Cardinality"}, + {rv.MethodByName("Kind"), "Kind"}, + {rv.MethodByName("HasJSONName"), "HasJSONName"}, + {rv.MethodByName("JSONName"), "JSONName"}, + {rv.MethodByName("HasPresence"), "HasPresence"}, + {rv.MethodByName("IsExtension"), "IsExtension"}, + {rv.MethodByName("IsPacked"), "IsPacked"}, + {rv.MethodByName("IsWeak"), "IsWeak"}, + {rv.MethodByName("IsList"), "IsList"}, + {rv.MethodByName("IsMap"), "IsMap"}, + {rv.MethodByName("MapKey"), "MapKey"}, + {rv.MethodByName("MapValue"), "MapValue"}, + {rv.MethodByName("HasDefault"), "HasDefault"}, + {rv.MethodByName("Default"), "Default"}, + {rv.MethodByName("ContainingOneof"), "ContainingOneof"}, + {rv.MethodByName("ContainingMessage"), "ContainingMessage"}, + {rv.MethodByName("Message"), "Message"}, + {rv.MethodByName("Enum"), "Enum"}, + } + for _, s := range accessors { + switch s.name { case "MapKey": if k := t.MapKey(); k != nil { rs.recs = append(rs.recs, [2]string{"MapKey", k.Kind().String()}) @@ -157,20 +189,20 @@ func formatDescOpt(t protoreflect.Descriptor, isRoot, allowMulti bool) string { if v := t.MapValue(); v != nil { switch v.Kind() { case protoreflect.EnumKind: - rs.recs = append(rs.recs, [2]string{"MapValue", string(v.Enum().FullName())}) + rs.AppendRecs("MapValue", [2]string{"MapValue", string(v.Enum().FullName())}) case protoreflect.MessageKind, protoreflect.GroupKind: - rs.recs = append(rs.recs, [2]string{"MapValue", string(v.Message().FullName())}) + rs.AppendRecs("MapValue", [2]string{"MapValue", string(v.Message().FullName())}) default: - rs.recs = append(rs.recs, [2]string{"MapValue", v.Kind().String()}) + rs.AppendRecs("MapValue", [2]string{"MapValue", v.Kind().String()}) } } case "ContainingOneof": if od := t.ContainingOneof(); od != nil { - rs.recs = append(rs.recs, [2]string{"Oneof", string(od.Name())}) + rs.AppendRecs("ContainingOneof", [2]string{"Oneof", string(od.Name())}) } case "ContainingMessage": if t.IsExtension() { - rs.recs = append(rs.recs, [2]string{"Extendee", string(t.ContainingMessage().FullName())}) + rs.AppendRecs("ContainingMessage", [2]string{"Extendee", string(t.ContainingMessage().FullName())}) } case "Message": if !t.IsMap() { @@ -187,13 +219,62 @@ func formatDescOpt(t protoreflect.Descriptor, isRoot, allowMulti bool) string { ss = append(ss, string(fs.Get(i).Name())) } if len(ss) > 0 { - rs.recs = append(rs.recs, [2]string{"Fields", "[" + joinStrings(ss, false) + "]"}) + rs.AppendRecs("Fields", [2]string{"Fields", "[" + joinStrings(ss, false) + "]"}) } - default: - rs.Append(rv, descriptorAccessors[rt]...) + + case protoreflect.FileDescriptor: + rs.Append(rv, []methodAndName{ + {rv.MethodByName("Path"), "Path"}, + {rv.MethodByName("Package"), "Package"}, + {rv.MethodByName("Imports"), "Imports"}, + {rv.MethodByName("Messages"), "Messages"}, + {rv.MethodByName("Enums"), "Enums"}, + {rv.MethodByName("Extensions"), "Extensions"}, + {rv.MethodByName("Services"), "Services"}, + }...) + + case protoreflect.MessageDescriptor: + rs.Append(rv, []methodAndName{ + {rv.MethodByName("IsMapEntry"), "IsMapEntry"}, + {rv.MethodByName("Fields"), "Fields"}, + {rv.MethodByName("Oneofs"), "Oneofs"}, + {rv.MethodByName("ReservedNames"), "ReservedNames"}, + {rv.MethodByName("ReservedRanges"), "ReservedRanges"}, + {rv.MethodByName("RequiredNumbers"), "RequiredNumbers"}, + {rv.MethodByName("ExtensionRanges"), "ExtensionRanges"}, + {rv.MethodByName("Messages"), "Messages"}, + {rv.MethodByName("Enums"), "Enums"}, + {rv.MethodByName("Extensions"), "Extensions"}, + }...) + + case protoreflect.EnumDescriptor: + rs.Append(rv, []methodAndName{ + {rv.MethodByName("Values"), "Values"}, + {rv.MethodByName("ReservedNames"), "ReservedNames"}, + {rv.MethodByName("ReservedRanges"), "ReservedRanges"}, + {rv.MethodByName("IsClosed"), "IsClosed"}, + }...) + + case protoreflect.EnumValueDescriptor: + rs.Append(rv, []methodAndName{ + {rv.MethodByName("Number"), "Number"}, + }...) + + case protoreflect.ServiceDescriptor: + rs.Append(rv, []methodAndName{ + {rv.MethodByName("Methods"), "Methods"}, + }...) + + case protoreflect.MethodDescriptor: + rs.Append(rv, []methodAndName{ + {rv.MethodByName("Input"), "Input"}, + {rv.MethodByName("Output"), "Output"}, + {rv.MethodByName("IsStreamingClient"), "IsStreamingClient"}, + {rv.MethodByName("IsStreamingServer"), "IsStreamingServer"}, + }...) } - if rv.MethodByName("GoType").IsValid() { - rs.Append(rv, "GoType") + if m := rv.MethodByName("GoType"); m.IsValid() { + rs.Append(rv, methodAndName{m, "GoType"}) } } return start + rs.Join() + end @@ -202,19 +283,34 @@ func formatDescOpt(t protoreflect.Descriptor, isRoot, allowMulti bool) string { type records struct { recs [][2]string allowMulti bool + + // record is a function that will be called for every Append() or + // AppendRecs() call, to be used for testing with the + // InternalFormatDescOptForTesting function. + record func(string) } -func (rs *records) Append(v reflect.Value, accessors ...string) { +func (rs *records) AppendRecs(fieldName string, newRecs [2]string) { + if rs.record != nil { + rs.record(fieldName) + } + rs.recs = append(rs.recs, newRecs) +} + +func (rs *records) Append(v reflect.Value, accessors ...methodAndName) { for _, a := range accessors { + if rs.record != nil { + rs.record(a.name) + } var rv reflect.Value - if m := v.MethodByName(a); m.IsValid() { - rv = m.Call(nil)[0] + if a.method.IsValid() { + rv = a.method.Call(nil)[0] } if v.Kind() == reflect.Struct && !rv.IsValid() { - rv = v.FieldByName(a) + rv = v.FieldByName(a.name) } if !rv.IsValid() { - panic(fmt.Sprintf("unknown accessor: %v.%s", v.Type(), a)) + panic(fmt.Sprintf("unknown accessor: %v.%s", v.Type(), a.name)) } if _, ok := rv.Interface().(protoreflect.Value); ok { rv = rv.MethodByName("Interface").Call(nil)[0] @@ -261,7 +357,7 @@ func (rs *records) Append(v reflect.Value, accessors ...string) { default: s = fmt.Sprint(v) } - rs.recs = append(rs.recs, [2]string{a, s}) + rs.recs = append(rs.recs, [2]string{a.name, s}) } } diff --git a/vendor/google.golang.org/protobuf/internal/descopts/options.go b/vendor/google.golang.org/protobuf/internal/descopts/options.go index 8401be8c..024ffebd 100644 --- a/vendor/google.golang.org/protobuf/internal/descopts/options.go +++ b/vendor/google.golang.org/protobuf/internal/descopts/options.go @@ -9,7 +9,7 @@ // dependency on the descriptor proto package). package descopts -import pref "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // These variables are set by the init function in descriptor.pb.go via logic // in internal/filetype. In other words, so long as the descriptor proto package @@ -17,13 +17,13 @@ import pref "google.golang.org/protobuf/reflect/protoreflect" // // Each variable is populated with a nil pointer to the options struct. var ( - File pref.ProtoMessage - Enum pref.ProtoMessage - EnumValue pref.ProtoMessage - Message pref.ProtoMessage - Field pref.ProtoMessage - Oneof pref.ProtoMessage - ExtensionRange pref.ProtoMessage - Service pref.ProtoMessage - Method pref.ProtoMessage + File protoreflect.ProtoMessage + Enum protoreflect.ProtoMessage + EnumValue protoreflect.ProtoMessage + Message protoreflect.ProtoMessage + Field protoreflect.ProtoMessage + Oneof protoreflect.ProtoMessage + ExtensionRange protoreflect.ProtoMessage + Service protoreflect.ProtoMessage + Method protoreflect.ProtoMessage ) diff --git a/vendor/google.golang.org/protobuf/internal/editiondefaults/defaults.go b/vendor/google.golang.org/protobuf/internal/editiondefaults/defaults.go new file mode 100644 index 00000000..14656b65 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/editiondefaults/defaults.go @@ -0,0 +1,12 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package editiondefaults contains the binary representation of the editions +// defaults. +package editiondefaults + +import _ "embed" + +//go:embed editions_defaults.binpb +var Defaults []byte diff --git a/vendor/google.golang.org/protobuf/internal/editiondefaults/editions_defaults.binpb b/vendor/google.golang.org/protobuf/internal/editiondefaults/editions_defaults.binpb new file mode 100644 index 0000000000000000000000000000000000000000..ff6a38360add36f53d48bb0863b701696e0d7b2d GIT binary patch literal 93 zcmd;*mUzal#C*w)K}(Q>QGiK;Nr72|(SYfa9TNv5m$bxlxFnMRqXeS@6Ht;7B*_4j Ve8H{+(u69m1u{(G8N0>{b^xZ!4_5#H literal 0 HcmV?d00001 diff --git a/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go b/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go new file mode 100644 index 00000000..08dad769 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/editionssupport/editions.go @@ -0,0 +1,13 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package editionssupport defines constants for editions that are supported. +package editionssupport + +import "google.golang.org/protobuf/types/descriptorpb" + +const ( + Minimum = descriptorpb.Edition_EDITION_PROTO2 + Maximum = descriptorpb.Edition_EDITION_2023 +) diff --git a/vendor/google.golang.org/protobuf/internal/encoding/tag/tag.go b/vendor/google.golang.org/protobuf/internal/encoding/tag/tag.go index 373d2083..7e87c760 100644 --- a/vendor/google.golang.org/protobuf/internal/encoding/tag/tag.go +++ b/vendor/google.golang.org/protobuf/internal/encoding/tag/tag.go @@ -32,6 +32,7 @@ var byteType = reflect.TypeOf(byte(0)) func Unmarshal(tag string, goType reflect.Type, evs protoreflect.EnumValueDescriptors) protoreflect.FieldDescriptor { f := new(filedesc.Field) f.L0.ParentFile = filedesc.SurrogateProto2 + f.L1.EditionFeatures = f.L0.ParentFile.L1.EditionFeatures for len(tag) > 0 { i := strings.IndexByte(tag, ',') if i < 0 { @@ -107,8 +108,7 @@ func Unmarshal(tag string, goType reflect.Type, evs protoreflect.EnumValueDescri f.L1.StringName.InitJSON(jsonName) } case s == "packed": - f.L1.HasPacked = true - f.L1.IsPacked = true + f.L1.EditionFeatures.IsPacked = true case strings.HasPrefix(s, "weak="): f.L1.IsWeak = true f.L1.Message = filedesc.PlaceholderMessage(protoreflect.FullName(s[len("weak="):])) diff --git a/vendor/google.golang.org/protobuf/internal/encoding/text/decode.go b/vendor/google.golang.org/protobuf/internal/encoding/text/decode.go index 87853e78..099b2bf4 100644 --- a/vendor/google.golang.org/protobuf/internal/encoding/text/decode.go +++ b/vendor/google.golang.org/protobuf/internal/encoding/text/decode.go @@ -601,7 +601,7 @@ func (d *Decoder) consumeToken(kind Kind, size int, attrs uint8) Token { // newSyntaxError returns a syntax error with line and column information for // current position. -func (d *Decoder) newSyntaxError(f string, x ...interface{}) error { +func (d *Decoder) newSyntaxError(f string, x ...any) error { e := errors.New(f, x...) line, column := d.Position(len(d.orig) - len(d.in)) return errors.New("syntax error (line %d:%d): %v", line, column, e) diff --git a/vendor/google.golang.org/protobuf/internal/errors/errors.go b/vendor/google.golang.org/protobuf/internal/errors/errors.go index 20c17b35..c2d6bd52 100644 --- a/vendor/google.golang.org/protobuf/internal/errors/errors.go +++ b/vendor/google.golang.org/protobuf/internal/errors/errors.go @@ -17,7 +17,7 @@ var Error = errors.New("protobuf error") // New formats a string according to the format specifier and arguments and // returns an error that has a "proto" prefix. -func New(f string, x ...interface{}) error { +func New(f string, x ...any) error { return &prefixError{s: format(f, x...)} } @@ -43,7 +43,7 @@ func (e *prefixError) Unwrap() error { // Wrap returns an error that has a "proto" prefix, the formatted string described // by the format specifier and arguments, and a suffix of err. The error wraps err. -func Wrap(err error, f string, x ...interface{}) error { +func Wrap(err error, f string, x ...any) error { return &wrapError{ s: format(f, x...), err: err, @@ -67,7 +67,7 @@ func (e *wrapError) Is(target error) bool { return target == Error } -func format(f string, x ...interface{}) string { +func format(f string, x ...any) string { // avoid "proto: " prefix when chaining for i := 0; i < len(x); i++ { switch e := x[i].(type) { @@ -87,3 +87,18 @@ func InvalidUTF8(name string) error { func RequiredNotSet(name string) error { return New("required field %v not set", name) } + +type SizeMismatchError struct { + Calculated, Measured int +} + +func (e *SizeMismatchError) Error() string { + return fmt.Sprintf("size mismatch (see https://github.com/golang/protobuf/issues/1609): calculated=%d, measured=%d", e.Calculated, e.Measured) +} + +func MismatchedSizeCalculation(calculated, measured int) error { + return &SizeMismatchError{ + Calculated: calculated, + Measured: measured, + } +} diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc.go index 7c3689ba..fa790e0f 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc.go @@ -7,6 +7,7 @@ package filedesc import ( "bytes" "fmt" + "strings" "sync" "sync/atomic" @@ -21,11 +22,26 @@ import ( "google.golang.org/protobuf/reflect/protoregistry" ) +// Edition is an Enum for proto2.Edition +type Edition int32 + +// These values align with the value of Enum in descriptor.proto which allows +// direct conversion between the proto enum and this enum. +const ( + EditionUnknown Edition = 0 + EditionProto2 Edition = 998 + EditionProto3 Edition = 999 + Edition2023 Edition = 1000 + EditionUnsupported Edition = 100000 +) + // The types in this file may have a suffix: // • L0: Contains fields common to all descriptors (except File) and // must be initialized up front. // • L1: Contains fields specific to a descriptor and -// must be initialized up front. +// must be initialized up front. If the associated proto uses Editions, the +// Editions features must always be resolved. If not explicitly set, the +// appropriate default must be resolved and set. // • L2: Contains fields that are lazily initialized when constructing // from the raw file descriptor. When constructing as a literal, the L2 // fields must be initialized up front. @@ -44,6 +60,7 @@ type ( } FileL1 struct { Syntax protoreflect.Syntax + Edition Edition // Only used if Syntax == Editions Path string Package protoreflect.FullName @@ -51,21 +68,53 @@ type ( Messages Messages Extensions Extensions Services Services + + EditionFeatures EditionFeatures } FileL2 struct { Options func() protoreflect.ProtoMessage Imports FileImports Locations SourceLocations } + + EditionFeatures struct { + // IsFieldPresence is true if field_presence is EXPLICIT + // https://protobuf.dev/editions/features/#field_presence + IsFieldPresence bool + // IsFieldPresence is true if field_presence is LEGACY_REQUIRED + // https://protobuf.dev/editions/features/#field_presence + IsLegacyRequired bool + // IsOpenEnum is true if enum_type is OPEN + // https://protobuf.dev/editions/features/#enum_type + IsOpenEnum bool + // IsPacked is true if repeated_field_encoding is PACKED + // https://protobuf.dev/editions/features/#repeated_field_encoding + IsPacked bool + // IsUTF8Validated is true if utf_validation is VERIFY + // https://protobuf.dev/editions/features/#utf8_validation + IsUTF8Validated bool + // IsDelimitedEncoded is true if message_encoding is DELIMITED + // https://protobuf.dev/editions/features/#message_encoding + IsDelimitedEncoded bool + // IsJSONCompliant is true if json_format is ALLOW + // https://protobuf.dev/editions/features/#json_format + IsJSONCompliant bool + // GenerateLegacyUnmarshalJSON determines if the plugin generates the + // UnmarshalJSON([]byte) error method for enums. + GenerateLegacyUnmarshalJSON bool + } ) func (fd *File) ParentFile() protoreflect.FileDescriptor { return fd } func (fd *File) Parent() protoreflect.Descriptor { return nil } func (fd *File) Index() int { return 0 } func (fd *File) Syntax() protoreflect.Syntax { return fd.L1.Syntax } -func (fd *File) Name() protoreflect.Name { return fd.L1.Package.Name() } -func (fd *File) FullName() protoreflect.FullName { return fd.L1.Package } -func (fd *File) IsPlaceholder() bool { return false } + +// Not exported and just used to reconstruct the original FileDescriptor proto +func (fd *File) Edition() int32 { return int32(fd.L1.Edition) } +func (fd *File) Name() protoreflect.Name { return fd.L1.Package.Name() } +func (fd *File) FullName() protoreflect.FullName { return fd.L1.Package } +func (fd *File) IsPlaceholder() bool { return false } func (fd *File) Options() protoreflect.ProtoMessage { if f := fd.lazyInit().Options; f != nil { return f() @@ -117,6 +166,8 @@ type ( } EnumL1 struct { eagerValues bool // controls whether EnumL2.Values is already populated + + EditionFeatures EditionFeatures } EnumL2 struct { Options func() protoreflect.ProtoMessage @@ -155,6 +206,9 @@ func (ed *Enum) lazyInit() *EnumL2 { ed.L0.ParentFile.lazyInit() // implicitly initializes L2 return ed.L2 } +func (ed *Enum) IsClosed() bool { + return !ed.L1.EditionFeatures.IsOpenEnum +} func (ed *EnumValue) Options() protoreflect.ProtoMessage { if f := ed.L1.Options; f != nil { @@ -178,6 +232,8 @@ type ( Extensions Extensions IsMapEntry bool // promoted from google.protobuf.MessageOptions IsMessageSet bool // promoted from google.protobuf.MessageOptions + + EditionFeatures EditionFeatures } MessageL2 struct { Options func() protoreflect.ProtoMessage @@ -202,14 +258,13 @@ type ( StringName stringName IsProto3Optional bool // promoted from google.protobuf.FieldDescriptorProto IsWeak bool // promoted from google.protobuf.FieldOptions - HasPacked bool // promoted from google.protobuf.FieldOptions - IsPacked bool // promoted from google.protobuf.FieldOptions - HasEnforceUTF8 bool // promoted from google.protobuf.FieldOptions - EnforceUTF8 bool // promoted from google.protobuf.FieldOptions + IsLazy bool // promoted from google.protobuf.FieldOptions Default defaultValue ContainingOneof protoreflect.OneofDescriptor // must be consistent with Message.Oneofs.Fields Enum protoreflect.EnumDescriptor Message protoreflect.MessageDescriptor + + EditionFeatures EditionFeatures } Oneof struct { @@ -219,6 +274,8 @@ type ( OneofL1 struct { Options func() protoreflect.ProtoMessage Fields OneofFields // must be consistent with Message.Fields.ContainingOneof + + EditionFeatures EditionFeatures } ) @@ -268,28 +325,34 @@ func (fd *Field) Options() protoreflect.ProtoMessage { } func (fd *Field) Number() protoreflect.FieldNumber { return fd.L1.Number } func (fd *Field) Cardinality() protoreflect.Cardinality { return fd.L1.Cardinality } -func (fd *Field) Kind() protoreflect.Kind { return fd.L1.Kind } -func (fd *Field) HasJSONName() bool { return fd.L1.StringName.hasJSON } -func (fd *Field) JSONName() string { return fd.L1.StringName.getJSON(fd) } -func (fd *Field) TextName() string { return fd.L1.StringName.getText(fd) } +func (fd *Field) Kind() protoreflect.Kind { + return fd.L1.Kind +} +func (fd *Field) HasJSONName() bool { return fd.L1.StringName.hasJSON } +func (fd *Field) JSONName() string { return fd.L1.StringName.getJSON(fd) } +func (fd *Field) TextName() string { return fd.L1.StringName.getText(fd) } func (fd *Field) HasPresence() bool { - return fd.L1.Cardinality != protoreflect.Repeated && (fd.L0.ParentFile.L1.Syntax == protoreflect.Proto2 || fd.L1.Message != nil || fd.L1.ContainingOneof != nil) + if fd.L1.Cardinality == protoreflect.Repeated { + return false + } + return fd.IsExtension() || fd.L1.EditionFeatures.IsFieldPresence || fd.L1.Message != nil || fd.L1.ContainingOneof != nil } func (fd *Field) HasOptionalKeyword() bool { return (fd.L0.ParentFile.L1.Syntax == protoreflect.Proto2 && fd.L1.Cardinality == protoreflect.Optional && fd.L1.ContainingOneof == nil) || fd.L1.IsProto3Optional } func (fd *Field) IsPacked() bool { - if !fd.L1.HasPacked && fd.L0.ParentFile.L1.Syntax != protoreflect.Proto2 && fd.L1.Cardinality == protoreflect.Repeated { - switch fd.L1.Kind { - case protoreflect.StringKind, protoreflect.BytesKind, protoreflect.MessageKind, protoreflect.GroupKind: - default: - return true - } + if fd.L1.Cardinality != protoreflect.Repeated { + return false } - return fd.L1.IsPacked + switch fd.L1.Kind { + case protoreflect.StringKind, protoreflect.BytesKind, protoreflect.MessageKind, protoreflect.GroupKind: + return false + } + return fd.L1.EditionFeatures.IsPacked } func (fd *Field) IsExtension() bool { return false } func (fd *Field) IsWeak() bool { return fd.L1.IsWeak } +func (fd *Field) IsLazy() bool { return fd.L1.IsLazy } func (fd *Field) IsList() bool { return fd.Cardinality() == protoreflect.Repeated && !fd.IsMap() } func (fd *Field) IsMap() bool { return fd.Message() != nil && fd.Message().IsMapEntry() } func (fd *Field) MapKey() protoreflect.FieldDescriptor { @@ -322,6 +385,10 @@ func (fd *Field) Message() protoreflect.MessageDescriptor { } return fd.L1.Message } +func (fd *Field) IsMapEntry() bool { + parent, ok := fd.L0.Parent.(protoreflect.MessageDescriptor) + return ok && parent.IsMapEntry() +} func (fd *Field) Format(s fmt.State, r rune) { descfmt.FormatDesc(s, r, fd) } func (fd *Field) ProtoType(protoreflect.FieldDescriptor) {} @@ -333,10 +400,7 @@ func (fd *Field) ProtoType(protoreflect.FieldDescriptor) {} // WARNING: This method is exempt from the compatibility promise and may be // removed in the future without warning. func (fd *Field) EnforceUTF8() bool { - if fd.L1.HasEnforceUTF8 { - return fd.L1.EnforceUTF8 - } - return fd.L0.ParentFile.L1.Syntax == protoreflect.Proto3 + return fd.L1.EditionFeatures.IsUTF8Validated } func (od *Oneof) IsSynthetic() bool { @@ -359,16 +423,17 @@ type ( L2 *ExtensionL2 // protected by fileDesc.once } ExtensionL1 struct { - Number protoreflect.FieldNumber - Extendee protoreflect.MessageDescriptor - Cardinality protoreflect.Cardinality - Kind protoreflect.Kind + Number protoreflect.FieldNumber + Extendee protoreflect.MessageDescriptor + Cardinality protoreflect.Cardinality + Kind protoreflect.Kind + IsLazy bool + EditionFeatures EditionFeatures } ExtensionL2 struct { Options func() protoreflect.ProtoMessage StringName stringName IsProto3Optional bool // promoted from google.protobuf.FieldDescriptorProto - IsPacked bool // promoted from google.protobuf.FieldOptions Default defaultValue Enum protoreflect.EnumDescriptor Message protoreflect.MessageDescriptor @@ -391,9 +456,19 @@ func (xd *Extension) HasPresence() bool { return xd.L1.Cardi func (xd *Extension) HasOptionalKeyword() bool { return (xd.L0.ParentFile.L1.Syntax == protoreflect.Proto2 && xd.L1.Cardinality == protoreflect.Optional) || xd.lazyInit().IsProto3Optional } -func (xd *Extension) IsPacked() bool { return xd.lazyInit().IsPacked } +func (xd *Extension) IsPacked() bool { + if xd.L1.Cardinality != protoreflect.Repeated { + return false + } + switch xd.L1.Kind { + case protoreflect.StringKind, protoreflect.BytesKind, protoreflect.MessageKind, protoreflect.GroupKind: + return false + } + return xd.L1.EditionFeatures.IsPacked +} func (xd *Extension) IsExtension() bool { return true } func (xd *Extension) IsWeak() bool { return false } +func (xd *Extension) IsLazy() bool { return xd.L1.IsLazy } func (xd *Extension) IsList() bool { return xd.Cardinality() == protoreflect.Repeated } func (xd *Extension) IsMap() bool { return false } func (xd *Extension) MapKey() protoreflect.FieldDescriptor { return nil } @@ -472,8 +547,9 @@ func (md *Method) ProtoInternal(pragma.DoNotImplement) {} // Surrogate files are can be used to create standalone descriptors // where the syntax is only information derived from the parent file. var ( - SurrogateProto2 = &File{L1: FileL1{Syntax: protoreflect.Proto2}, L2: &FileL2{}} - SurrogateProto3 = &File{L1: FileL1{Syntax: protoreflect.Proto3}, L2: &FileL2{}} + SurrogateProto2 = &File{L1: FileL1{Syntax: protoreflect.Proto2}, L2: &FileL2{}} + SurrogateProto3 = &File{L1: FileL1{Syntax: protoreflect.Proto3}, L2: &FileL2{}} + SurrogateEdition2023 = &File{L1: FileL1{Syntax: protoreflect.Editions, Edition: Edition2023}, L2: &FileL2{}} ) type ( @@ -515,6 +591,34 @@ func (s *stringName) InitJSON(name string) { s.nameJSON = name } +// Returns true if this field is structured like the synthetic field of a proto2 +// group. This allows us to expand our treatment of delimited fields without +// breaking proto2 files that have been upgraded to editions. +func isGroupLike(fd protoreflect.FieldDescriptor) bool { + // Groups are always group types. + if fd.Kind() != protoreflect.GroupKind { + return false + } + + // Group fields are always the lowercase type name. + if strings.ToLower(string(fd.Message().Name())) != string(fd.Name()) { + return false + } + + // Groups could only be defined in the same file they're used. + if fd.Message().ParentFile() != fd.ParentFile() { + return false + } + + // Group messages are always defined in the same scope as the field. File + // level extensions will compare NULL == NULL here, which is why the file + // comparison above is necessary to ensure both come from the same file. + if fd.IsExtension() { + return fd.Parent() == fd.Message().Parent() + } + return fd.ContainingMessage() == fd.Message().Parent() +} + func (s *stringName) lazyInit(fd protoreflect.FieldDescriptor) *stringName { s.once.Do(func() { if fd.IsExtension() { @@ -535,7 +639,7 @@ func (s *stringName) lazyInit(fd protoreflect.FieldDescriptor) *stringName { // Format the text name. s.nameText = string(fd.Name()) - if fd.Kind() == protoreflect.GroupKind { + if isGroupLike(fd) { s.nameText = string(fd.Message().Name()) } } diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go index 4a1584c9..d2f54949 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc_init.go @@ -5,6 +5,7 @@ package filedesc import ( + "fmt" "sync" "google.golang.org/protobuf/encoding/protowire" @@ -98,6 +99,7 @@ func (fd *File) unmarshalSeed(b []byte) { var prevField protoreflect.FieldNumber var numEnums, numMessages, numExtensions, numServices int var posEnums, posMessages, posExtensions, posServices int + var options []byte b0 := b for len(b) > 0 { num, typ, n := protowire.ConsumeTag(b) @@ -111,8 +113,12 @@ func (fd *File) unmarshalSeed(b []byte) { switch string(v) { case "proto2": fd.L1.Syntax = protoreflect.Proto2 + fd.L1.Edition = EditionProto2 case "proto3": fd.L1.Syntax = protoreflect.Proto3 + fd.L1.Edition = EditionProto3 + case "editions": + fd.L1.Syntax = protoreflect.Editions default: panic("invalid syntax") } @@ -120,6 +126,8 @@ func (fd *File) unmarshalSeed(b []byte) { fd.L1.Path = sb.MakeString(v) case genid.FileDescriptorProto_Package_field_number: fd.L1.Package = protoreflect.FullName(sb.MakeString(v)) + case genid.FileDescriptorProto_Options_field_number: + options = v case genid.FileDescriptorProto_EnumType_field_number: if prevField != genid.FileDescriptorProto_EnumType_field_number { if numEnums > 0 { @@ -154,6 +162,13 @@ func (fd *File) unmarshalSeed(b []byte) { numServices++ } prevField = num + case protowire.VarintType: + v, m := protowire.ConsumeVarint(b) + b = b[m:] + switch num { + case genid.FileDescriptorProto_Edition_field_number: + fd.L1.Edition = Edition(v) + } default: m := protowire.ConsumeFieldValue(num, typ, b) b = b[m:] @@ -164,6 +179,14 @@ func (fd *File) unmarshalSeed(b []byte) { // If syntax is missing, it is assumed to be proto2. if fd.L1.Syntax == 0 { fd.L1.Syntax = protoreflect.Proto2 + fd.L1.Edition = EditionProto2 + } + + fd.L1.EditionFeatures = getFeaturesFor(fd.L1.Edition) + + // Parse editions features from options if any + if options != nil { + fd.unmarshalSeedOptions(options) } // Must allocate all declarations before parsing each descriptor type @@ -219,10 +242,33 @@ func (fd *File) unmarshalSeed(b []byte) { } } +func (fd *File) unmarshalSeedOptions(b []byte) { + for b := b; len(b) > 0; { + num, typ, n := protowire.ConsumeTag(b) + b = b[n:] + switch typ { + case protowire.BytesType: + v, m := protowire.ConsumeBytes(b) + b = b[m:] + switch num { + case genid.FileOptions_Features_field_number: + if fd.Syntax() != protoreflect.Editions { + panic(fmt.Sprintf("invalid descriptor: using edition features in a proto with syntax %s", fd.Syntax())) + } + fd.L1.EditionFeatures = unmarshalFeatureSet(v, fd.L1.EditionFeatures) + } + default: + m := protowire.ConsumeFieldValue(num, typ, b) + b = b[m:] + } + } +} + func (ed *Enum) unmarshalSeed(b []byte, sb *strs.Builder, pf *File, pd protoreflect.Descriptor, i int) { ed.L0.ParentFile = pf ed.L0.Parent = pd ed.L0.Index = i + ed.L1.EditionFeatures = featuresFromParentDesc(ed.Parent()) var numValues int for b := b; len(b) > 0; { @@ -275,6 +321,7 @@ func (md *Message) unmarshalSeed(b []byte, sb *strs.Builder, pf *File, pd protor md.L0.ParentFile = pf md.L0.Parent = pd md.L0.Index = i + md.L1.EditionFeatures = featuresFromParentDesc(md.Parent()) var prevField protoreflect.FieldNumber var numEnums, numMessages, numExtensions int @@ -380,6 +427,13 @@ func (md *Message) unmarshalSeedOptions(b []byte) { case genid.MessageOptions_MessageSetWireFormat_field_number: md.L1.IsMessageSet = protowire.DecodeBool(v) } + case protowire.BytesType: + v, m := protowire.ConsumeBytes(b) + b = b[m:] + switch num { + case genid.MessageOptions_Features_field_number: + md.L1.EditionFeatures = unmarshalFeatureSet(v, md.L1.EditionFeatures) + } default: m := protowire.ConsumeFieldValue(num, typ, b) b = b[m:] @@ -391,6 +445,7 @@ func (xd *Extension) unmarshalSeed(b []byte, sb *strs.Builder, pf *File, pd prot xd.L0.ParentFile = pf xd.L0.Parent = pd xd.L0.Index = i + xd.L1.EditionFeatures = featuresFromParentDesc(pd) for len(b) > 0 { num, typ, n := protowire.ConsumeTag(b) @@ -415,6 +470,40 @@ func (xd *Extension) unmarshalSeed(b []byte, sb *strs.Builder, pf *File, pd prot xd.L0.FullName = appendFullName(sb, pd.FullName(), v) case genid.FieldDescriptorProto_Extendee_field_number: xd.L1.Extendee = PlaceholderMessage(makeFullName(sb, v)) + case genid.FieldDescriptorProto_Options_field_number: + xd.unmarshalOptions(v) + } + default: + m := protowire.ConsumeFieldValue(num, typ, b) + b = b[m:] + } + } + + if xd.L1.Kind == protoreflect.MessageKind && xd.L1.EditionFeatures.IsDelimitedEncoded { + xd.L1.Kind = protoreflect.GroupKind + } +} + +func (xd *Extension) unmarshalOptions(b []byte) { + for len(b) > 0 { + num, typ, n := protowire.ConsumeTag(b) + b = b[n:] + switch typ { + case protowire.VarintType: + v, m := protowire.ConsumeVarint(b) + b = b[m:] + switch num { + case genid.FieldOptions_Packed_field_number: + xd.L1.EditionFeatures.IsPacked = protowire.DecodeBool(v) + case genid.FieldOptions_Lazy_field_number: + xd.L1.IsLazy = protowire.DecodeBool(v) + } + case protowire.BytesType: + v, m := protowire.ConsumeBytes(b) + b = b[m:] + switch num { + case genid.FieldOptions_Features_field_number: + xd.L1.EditionFeatures = unmarshalFeatureSet(v, xd.L1.EditionFeatures) } default: m := protowire.ConsumeFieldValue(num, typ, b) @@ -447,7 +536,7 @@ func (sd *Service) unmarshalSeed(b []byte, sb *strs.Builder, pf *File, pd protor } var nameBuilderPool = sync.Pool{ - New: func() interface{} { return new(strs.Builder) }, + New: func() any { return new(strs.Builder) }, } func getBuilder() *strs.Builder { diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go index 736a19a7..67a51b32 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc_lazy.go @@ -45,6 +45,11 @@ func (file *File) resolveMessages() { case protoreflect.MessageKind, protoreflect.GroupKind: fd.L1.Message = file.resolveMessageDependency(fd.L1.Message, listFieldDeps, depIdx) depIdx++ + if fd.L1.Kind == protoreflect.GroupKind && (fd.IsMap() || fd.IsMapEntry()) { + // A map field might inherit delimited encoding from a file-wide default feature. + // But maps never actually use delimited encoding. (At least for now...) + fd.L1.Kind = protoreflect.MessageKind + } } // Default is resolved here since it depends on Enum being resolved. @@ -414,6 +419,7 @@ func (fd *Field) unmarshalFull(b []byte, sb *strs.Builder, pf *File, pd protoref fd.L0.ParentFile = pf fd.L0.Parent = pd fd.L0.Index = i + fd.L1.EditionFeatures = featuresFromParentDesc(fd.Parent()) var rawTypeName []byte var rawOptions []byte @@ -465,6 +471,12 @@ func (fd *Field) unmarshalFull(b []byte, sb *strs.Builder, pf *File, pd protoref b = b[m:] } } + if fd.L1.Kind == protoreflect.MessageKind && fd.L1.EditionFeatures.IsDelimitedEncoded { + fd.L1.Kind = protoreflect.GroupKind + } + if fd.L1.EditionFeatures.IsLegacyRequired { + fd.L1.Cardinality = protoreflect.Required + } if rawTypeName != nil { name := makeFullName(sb, rawTypeName) switch fd.L1.Kind { @@ -489,13 +501,20 @@ func (fd *Field) unmarshalOptions(b []byte) { b = b[m:] switch num { case genid.FieldOptions_Packed_field_number: - fd.L1.HasPacked = true - fd.L1.IsPacked = protowire.DecodeBool(v) + fd.L1.EditionFeatures.IsPacked = protowire.DecodeBool(v) case genid.FieldOptions_Weak_field_number: fd.L1.IsWeak = protowire.DecodeBool(v) + case genid.FieldOptions_Lazy_field_number: + fd.L1.IsLazy = protowire.DecodeBool(v) case FieldOptions_EnforceUTF8: - fd.L1.HasEnforceUTF8 = true - fd.L1.EnforceUTF8 = protowire.DecodeBool(v) + fd.L1.EditionFeatures.IsUTF8Validated = protowire.DecodeBool(v) + } + case protowire.BytesType: + v, m := protowire.ConsumeBytes(b) + b = b[m:] + switch num { + case genid.FieldOptions_Features_field_number: + fd.L1.EditionFeatures = unmarshalFeatureSet(v, fd.L1.EditionFeatures) } default: m := protowire.ConsumeFieldValue(num, typ, b) @@ -557,7 +576,6 @@ func (xd *Extension) unmarshalFull(b []byte, sb *strs.Builder) { case genid.FieldDescriptorProto_TypeName_field_number: rawTypeName = v case genid.FieldDescriptorProto_Options_field_number: - xd.unmarshalOptions(v) rawOptions = appendOptions(rawOptions, v) } default: @@ -577,25 +595,6 @@ func (xd *Extension) unmarshalFull(b []byte, sb *strs.Builder) { xd.L2.Options = xd.L0.ParentFile.builder.optionsUnmarshaler(&descopts.Field, rawOptions) } -func (xd *Extension) unmarshalOptions(b []byte) { - for len(b) > 0 { - num, typ, n := protowire.ConsumeTag(b) - b = b[n:] - switch typ { - case protowire.VarintType: - v, m := protowire.ConsumeVarint(b) - b = b[m:] - switch num { - case genid.FieldOptions_Packed_field_number: - xd.L2.IsPacked = protowire.DecodeBool(v) - } - default: - m := protowire.ConsumeFieldValue(num, typ, b) - b = b[m:] - } - } -} - func (sd *Service) unmarshalFull(b []byte, sb *strs.Builder) { var rawMethods [][]byte var rawOptions []byte diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/desc_list_gen.go b/vendor/google.golang.org/protobuf/internal/filedesc/desc_list_gen.go index 30db19fd..f4107c05 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/desc_list_gen.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/desc_list_gen.go @@ -8,6 +8,7 @@ package filedesc import ( "fmt" + "strings" "sync" "google.golang.org/protobuf/internal/descfmt" @@ -198,6 +199,16 @@ func (p *Fields) lazyInit() *Fields { if _, ok := p.byText[d.TextName()]; !ok { p.byText[d.TextName()] = d } + if isGroupLike(d) { + lowerJSONName := strings.ToLower(d.JSONName()) + if _, ok := p.byJSON[lowerJSONName]; !ok { + p.byJSON[lowerJSONName] = d + } + lowerTextName := strings.ToLower(d.TextName()) + if _, ok := p.byText[lowerTextName]; !ok { + p.byText[lowerTextName] = d + } + } if _, ok := p.byNum[d.Number()]; !ok { p.byNum[d.Number()] = d } diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/editions.go b/vendor/google.golang.org/protobuf/internal/filedesc/editions.go new file mode 100644 index 00000000..fd4d0c83 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/filedesc/editions.go @@ -0,0 +1,156 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package filedesc + +import ( + "fmt" + + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/internal/editiondefaults" + "google.golang.org/protobuf/internal/genid" + "google.golang.org/protobuf/reflect/protoreflect" +) + +var defaultsCache = make(map[Edition]EditionFeatures) +var defaultsKeys = []Edition{} + +func init() { + unmarshalEditionDefaults(editiondefaults.Defaults) + SurrogateProto2.L1.EditionFeatures = getFeaturesFor(EditionProto2) + SurrogateProto3.L1.EditionFeatures = getFeaturesFor(EditionProto3) + SurrogateEdition2023.L1.EditionFeatures = getFeaturesFor(Edition2023) +} + +func unmarshalGoFeature(b []byte, parent EditionFeatures) EditionFeatures { + for len(b) > 0 { + num, _, n := protowire.ConsumeTag(b) + b = b[n:] + switch num { + case genid.GoFeatures_LegacyUnmarshalJsonEnum_field_number: + v, m := protowire.ConsumeVarint(b) + b = b[m:] + parent.GenerateLegacyUnmarshalJSON = protowire.DecodeBool(v) + default: + panic(fmt.Sprintf("unkown field number %d while unmarshalling GoFeatures", num)) + } + } + return parent +} + +func unmarshalFeatureSet(b []byte, parent EditionFeatures) EditionFeatures { + for len(b) > 0 { + num, typ, n := protowire.ConsumeTag(b) + b = b[n:] + switch typ { + case protowire.VarintType: + v, m := protowire.ConsumeVarint(b) + b = b[m:] + switch num { + case genid.FeatureSet_FieldPresence_field_number: + parent.IsFieldPresence = v == genid.FeatureSet_EXPLICIT_enum_value || v == genid.FeatureSet_LEGACY_REQUIRED_enum_value + parent.IsLegacyRequired = v == genid.FeatureSet_LEGACY_REQUIRED_enum_value + case genid.FeatureSet_EnumType_field_number: + parent.IsOpenEnum = v == genid.FeatureSet_OPEN_enum_value + case genid.FeatureSet_RepeatedFieldEncoding_field_number: + parent.IsPacked = v == genid.FeatureSet_PACKED_enum_value + case genid.FeatureSet_Utf8Validation_field_number: + parent.IsUTF8Validated = v == genid.FeatureSet_VERIFY_enum_value + case genid.FeatureSet_MessageEncoding_field_number: + parent.IsDelimitedEncoded = v == genid.FeatureSet_DELIMITED_enum_value + case genid.FeatureSet_JsonFormat_field_number: + parent.IsJSONCompliant = v == genid.FeatureSet_ALLOW_enum_value + default: + panic(fmt.Sprintf("unkown field number %d while unmarshalling FeatureSet", num)) + } + case protowire.BytesType: + v, m := protowire.ConsumeBytes(b) + b = b[m:] + switch num { + case genid.FeatureSet_Go_ext_number: + parent = unmarshalGoFeature(v, parent) + } + } + } + + return parent +} + +func featuresFromParentDesc(parentDesc protoreflect.Descriptor) EditionFeatures { + var parentFS EditionFeatures + switch p := parentDesc.(type) { + case *File: + parentFS = p.L1.EditionFeatures + case *Message: + parentFS = p.L1.EditionFeatures + default: + panic(fmt.Sprintf("unknown parent type %T", parentDesc)) + } + return parentFS +} + +func unmarshalEditionDefault(b []byte) { + var ed Edition + var fs EditionFeatures + for len(b) > 0 { + num, typ, n := protowire.ConsumeTag(b) + b = b[n:] + switch typ { + case protowire.VarintType: + v, m := protowire.ConsumeVarint(b) + b = b[m:] + switch num { + case genid.FeatureSetDefaults_FeatureSetEditionDefault_Edition_field_number: + ed = Edition(v) + } + case protowire.BytesType: + v, m := protowire.ConsumeBytes(b) + b = b[m:] + switch num { + case genid.FeatureSetDefaults_FeatureSetEditionDefault_FixedFeatures_field_number: + fs = unmarshalFeatureSet(v, fs) + case genid.FeatureSetDefaults_FeatureSetEditionDefault_OverridableFeatures_field_number: + fs = unmarshalFeatureSet(v, fs) + } + } + } + defaultsCache[ed] = fs + defaultsKeys = append(defaultsKeys, ed) +} + +func unmarshalEditionDefaults(b []byte) { + for len(b) > 0 { + num, _, n := protowire.ConsumeTag(b) + b = b[n:] + switch num { + case genid.FeatureSetDefaults_Defaults_field_number: + def, m := protowire.ConsumeBytes(b) + b = b[m:] + unmarshalEditionDefault(def) + case genid.FeatureSetDefaults_MinimumEdition_field_number, + genid.FeatureSetDefaults_MaximumEdition_field_number: + // We don't care about the minimum and maximum editions. If the + // edition we are looking for later on is not in the cache we know + // it is outside of the range between minimum and maximum edition. + _, m := protowire.ConsumeVarint(b) + b = b[m:] + default: + panic(fmt.Sprintf("unkown field number %d while unmarshalling EditionDefault", num)) + } + } +} + +func getFeaturesFor(ed Edition) EditionFeatures { + match := EditionUnknown + for _, key := range defaultsKeys { + if key > ed { + break + } + match = key + } + if match == EditionUnknown { + panic(fmt.Sprintf("unsupported edition: %v", ed)) + } + return defaultsCache[match] +} diff --git a/vendor/google.golang.org/protobuf/internal/filedesc/placeholder.go b/vendor/google.golang.org/protobuf/internal/filedesc/placeholder.go index 28240ebc..bfb3b841 100644 --- a/vendor/google.golang.org/protobuf/internal/filedesc/placeholder.go +++ b/vendor/google.golang.org/protobuf/internal/filedesc/placeholder.go @@ -63,6 +63,7 @@ func (e PlaceholderEnum) Options() protoreflect.ProtoMessage { return des func (e PlaceholderEnum) Values() protoreflect.EnumValueDescriptors { return emptyEnumValues } func (e PlaceholderEnum) ReservedNames() protoreflect.Names { return emptyNames } func (e PlaceholderEnum) ReservedRanges() protoreflect.EnumRanges { return emptyEnumRanges } +func (e PlaceholderEnum) IsClosed() bool { return false } func (e PlaceholderEnum) ProtoType(protoreflect.EnumDescriptor) { return } func (e PlaceholderEnum) ProtoInternal(pragma.DoNotImplement) { return } diff --git a/vendor/google.golang.org/protobuf/internal/filetype/build.go b/vendor/google.golang.org/protobuf/internal/filetype/build.go index f0e38c4e..ba83fea4 100644 --- a/vendor/google.golang.org/protobuf/internal/filetype/build.go +++ b/vendor/google.golang.org/protobuf/internal/filetype/build.go @@ -68,7 +68,7 @@ type Builder struct { // and for input and output messages referenced by service methods. // Dependencies must come after declarations, but the ordering of // dependencies themselves is unspecified. - GoTypes []interface{} + GoTypes []any // DependencyIndexes is an ordered list of indexes into GoTypes for the // dependencies of messages, extensions, or services. @@ -268,7 +268,7 @@ func (x depIdxs) Get(i, j int32) int32 { type ( resolverByIndex struct { - goTypes []interface{} + goTypes []any depIdxs depIdxs fileRegistry } diff --git a/vendor/google.golang.org/protobuf/internal/genid/descriptor_gen.go b/vendor/google.golang.org/protobuf/internal/genid/descriptor_gen.go index 136f1b21..f30ab6b5 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/descriptor_gen.go +++ b/vendor/google.golang.org/protobuf/internal/genid/descriptor_gen.go @@ -12,6 +12,28 @@ import ( const File_google_protobuf_descriptor_proto = "google/protobuf/descriptor.proto" +// Full and short names for google.protobuf.Edition. +const ( + Edition_enum_fullname = "google.protobuf.Edition" + Edition_enum_name = "Edition" +) + +// Enum values for google.protobuf.Edition. +const ( + Edition_EDITION_UNKNOWN_enum_value = 0 + Edition_EDITION_LEGACY_enum_value = 900 + Edition_EDITION_PROTO2_enum_value = 998 + Edition_EDITION_PROTO3_enum_value = 999 + Edition_EDITION_2023_enum_value = 1000 + Edition_EDITION_2024_enum_value = 1001 + Edition_EDITION_1_TEST_ONLY_enum_value = 1 + Edition_EDITION_2_TEST_ONLY_enum_value = 2 + Edition_EDITION_99997_TEST_ONLY_enum_value = 99997 + Edition_EDITION_99998_TEST_ONLY_enum_value = 99998 + Edition_EDITION_99999_TEST_ONLY_enum_value = 99999 + Edition_EDITION_MAX_enum_value = 2147483647 +) + // Names for google.protobuf.FileDescriptorSet. const ( FileDescriptorSet_message_name protoreflect.Name = "FileDescriptorSet" @@ -81,7 +103,7 @@ const ( FileDescriptorProto_Options_field_number protoreflect.FieldNumber = 8 FileDescriptorProto_SourceCodeInfo_field_number protoreflect.FieldNumber = 9 FileDescriptorProto_Syntax_field_number protoreflect.FieldNumber = 12 - FileDescriptorProto_Edition_field_number protoreflect.FieldNumber = 13 + FileDescriptorProto_Edition_field_number protoreflect.FieldNumber = 14 ) // Names for google.protobuf.DescriptorProto. @@ -184,10 +206,12 @@ const ( const ( ExtensionRangeOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" ExtensionRangeOptions_Declaration_field_name protoreflect.Name = "declaration" + ExtensionRangeOptions_Features_field_name protoreflect.Name = "features" ExtensionRangeOptions_Verification_field_name protoreflect.Name = "verification" ExtensionRangeOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.uninterpreted_option" ExtensionRangeOptions_Declaration_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.declaration" + ExtensionRangeOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.features" ExtensionRangeOptions_Verification_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.verification" ) @@ -195,6 +219,7 @@ const ( const ( ExtensionRangeOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ExtensionRangeOptions_Declaration_field_number protoreflect.FieldNumber = 2 + ExtensionRangeOptions_Features_field_number protoreflect.FieldNumber = 50 ExtensionRangeOptions_Verification_field_number protoreflect.FieldNumber = 3 ) @@ -204,6 +229,12 @@ const ( ExtensionRangeOptions_VerificationState_enum_name = "VerificationState" ) +// Enum values for google.protobuf.ExtensionRangeOptions.VerificationState. +const ( + ExtensionRangeOptions_DECLARATION_enum_value = 0 + ExtensionRangeOptions_UNVERIFIED_enum_value = 1 +) + // Names for google.protobuf.ExtensionRangeOptions.Declaration. const ( ExtensionRangeOptions_Declaration_message_name protoreflect.Name = "Declaration" @@ -212,29 +243,26 @@ const ( // Field names for google.protobuf.ExtensionRangeOptions.Declaration. const ( - ExtensionRangeOptions_Declaration_Number_field_name protoreflect.Name = "number" - ExtensionRangeOptions_Declaration_FullName_field_name protoreflect.Name = "full_name" - ExtensionRangeOptions_Declaration_Type_field_name protoreflect.Name = "type" - ExtensionRangeOptions_Declaration_IsRepeated_field_name protoreflect.Name = "is_repeated" - ExtensionRangeOptions_Declaration_Reserved_field_name protoreflect.Name = "reserved" - ExtensionRangeOptions_Declaration_Repeated_field_name protoreflect.Name = "repeated" + ExtensionRangeOptions_Declaration_Number_field_name protoreflect.Name = "number" + ExtensionRangeOptions_Declaration_FullName_field_name protoreflect.Name = "full_name" + ExtensionRangeOptions_Declaration_Type_field_name protoreflect.Name = "type" + ExtensionRangeOptions_Declaration_Reserved_field_name protoreflect.Name = "reserved" + ExtensionRangeOptions_Declaration_Repeated_field_name protoreflect.Name = "repeated" - ExtensionRangeOptions_Declaration_Number_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.number" - ExtensionRangeOptions_Declaration_FullName_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.full_name" - ExtensionRangeOptions_Declaration_Type_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.type" - ExtensionRangeOptions_Declaration_IsRepeated_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.is_repeated" - ExtensionRangeOptions_Declaration_Reserved_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.reserved" - ExtensionRangeOptions_Declaration_Repeated_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.repeated" + ExtensionRangeOptions_Declaration_Number_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.number" + ExtensionRangeOptions_Declaration_FullName_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.full_name" + ExtensionRangeOptions_Declaration_Type_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.type" + ExtensionRangeOptions_Declaration_Reserved_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.reserved" + ExtensionRangeOptions_Declaration_Repeated_field_fullname protoreflect.FullName = "google.protobuf.ExtensionRangeOptions.Declaration.repeated" ) // Field numbers for google.protobuf.ExtensionRangeOptions.Declaration. const ( - ExtensionRangeOptions_Declaration_Number_field_number protoreflect.FieldNumber = 1 - ExtensionRangeOptions_Declaration_FullName_field_number protoreflect.FieldNumber = 2 - ExtensionRangeOptions_Declaration_Type_field_number protoreflect.FieldNumber = 3 - ExtensionRangeOptions_Declaration_IsRepeated_field_number protoreflect.FieldNumber = 4 - ExtensionRangeOptions_Declaration_Reserved_field_number protoreflect.FieldNumber = 5 - ExtensionRangeOptions_Declaration_Repeated_field_number protoreflect.FieldNumber = 6 + ExtensionRangeOptions_Declaration_Number_field_number protoreflect.FieldNumber = 1 + ExtensionRangeOptions_Declaration_FullName_field_number protoreflect.FieldNumber = 2 + ExtensionRangeOptions_Declaration_Type_field_number protoreflect.FieldNumber = 3 + ExtensionRangeOptions_Declaration_Reserved_field_number protoreflect.FieldNumber = 5 + ExtensionRangeOptions_Declaration_Repeated_field_number protoreflect.FieldNumber = 6 ) // Names for google.protobuf.FieldDescriptorProto. @@ -291,12 +319,41 @@ const ( FieldDescriptorProto_Type_enum_name = "Type" ) +// Enum values for google.protobuf.FieldDescriptorProto.Type. +const ( + FieldDescriptorProto_TYPE_DOUBLE_enum_value = 1 + FieldDescriptorProto_TYPE_FLOAT_enum_value = 2 + FieldDescriptorProto_TYPE_INT64_enum_value = 3 + FieldDescriptorProto_TYPE_UINT64_enum_value = 4 + FieldDescriptorProto_TYPE_INT32_enum_value = 5 + FieldDescriptorProto_TYPE_FIXED64_enum_value = 6 + FieldDescriptorProto_TYPE_FIXED32_enum_value = 7 + FieldDescriptorProto_TYPE_BOOL_enum_value = 8 + FieldDescriptorProto_TYPE_STRING_enum_value = 9 + FieldDescriptorProto_TYPE_GROUP_enum_value = 10 + FieldDescriptorProto_TYPE_MESSAGE_enum_value = 11 + FieldDescriptorProto_TYPE_BYTES_enum_value = 12 + FieldDescriptorProto_TYPE_UINT32_enum_value = 13 + FieldDescriptorProto_TYPE_ENUM_enum_value = 14 + FieldDescriptorProto_TYPE_SFIXED32_enum_value = 15 + FieldDescriptorProto_TYPE_SFIXED64_enum_value = 16 + FieldDescriptorProto_TYPE_SINT32_enum_value = 17 + FieldDescriptorProto_TYPE_SINT64_enum_value = 18 +) + // Full and short names for google.protobuf.FieldDescriptorProto.Label. const ( FieldDescriptorProto_Label_enum_fullname = "google.protobuf.FieldDescriptorProto.Label" FieldDescriptorProto_Label_enum_name = "Label" ) +// Enum values for google.protobuf.FieldDescriptorProto.Label. +const ( + FieldDescriptorProto_LABEL_OPTIONAL_enum_value = 1 + FieldDescriptorProto_LABEL_REPEATED_enum_value = 3 + FieldDescriptorProto_LABEL_REQUIRED_enum_value = 2 +) + // Names for google.protobuf.OneofDescriptorProto. const ( OneofDescriptorProto_message_name protoreflect.Name = "OneofDescriptorProto" @@ -468,7 +525,6 @@ const ( FileOptions_CcGenericServices_field_name protoreflect.Name = "cc_generic_services" FileOptions_JavaGenericServices_field_name protoreflect.Name = "java_generic_services" FileOptions_PyGenericServices_field_name protoreflect.Name = "py_generic_services" - FileOptions_PhpGenericServices_field_name protoreflect.Name = "php_generic_services" FileOptions_Deprecated_field_name protoreflect.Name = "deprecated" FileOptions_CcEnableArenas_field_name protoreflect.Name = "cc_enable_arenas" FileOptions_ObjcClassPrefix_field_name protoreflect.Name = "objc_class_prefix" @@ -478,6 +534,7 @@ const ( FileOptions_PhpNamespace_field_name protoreflect.Name = "php_namespace" FileOptions_PhpMetadataNamespace_field_name protoreflect.Name = "php_metadata_namespace" FileOptions_RubyPackage_field_name protoreflect.Name = "ruby_package" + FileOptions_Features_field_name protoreflect.Name = "features" FileOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" FileOptions_JavaPackage_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.java_package" @@ -490,7 +547,6 @@ const ( FileOptions_CcGenericServices_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.cc_generic_services" FileOptions_JavaGenericServices_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.java_generic_services" FileOptions_PyGenericServices_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.py_generic_services" - FileOptions_PhpGenericServices_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.php_generic_services" FileOptions_Deprecated_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.deprecated" FileOptions_CcEnableArenas_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.cc_enable_arenas" FileOptions_ObjcClassPrefix_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.objc_class_prefix" @@ -500,6 +556,7 @@ const ( FileOptions_PhpNamespace_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.php_namespace" FileOptions_PhpMetadataNamespace_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.php_metadata_namespace" FileOptions_RubyPackage_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.ruby_package" + FileOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.features" FileOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.FileOptions.uninterpreted_option" ) @@ -515,7 +572,6 @@ const ( FileOptions_CcGenericServices_field_number protoreflect.FieldNumber = 16 FileOptions_JavaGenericServices_field_number protoreflect.FieldNumber = 17 FileOptions_PyGenericServices_field_number protoreflect.FieldNumber = 18 - FileOptions_PhpGenericServices_field_number protoreflect.FieldNumber = 42 FileOptions_Deprecated_field_number protoreflect.FieldNumber = 23 FileOptions_CcEnableArenas_field_number protoreflect.FieldNumber = 31 FileOptions_ObjcClassPrefix_field_number protoreflect.FieldNumber = 36 @@ -525,6 +581,7 @@ const ( FileOptions_PhpNamespace_field_number protoreflect.FieldNumber = 41 FileOptions_PhpMetadataNamespace_field_number protoreflect.FieldNumber = 44 FileOptions_RubyPackage_field_number protoreflect.FieldNumber = 45 + FileOptions_Features_field_number protoreflect.FieldNumber = 50 FileOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ) @@ -534,6 +591,13 @@ const ( FileOptions_OptimizeMode_enum_name = "OptimizeMode" ) +// Enum values for google.protobuf.FileOptions.OptimizeMode. +const ( + FileOptions_SPEED_enum_value = 1 + FileOptions_CODE_SIZE_enum_value = 2 + FileOptions_LITE_RUNTIME_enum_value = 3 +) + // Names for google.protobuf.MessageOptions. const ( MessageOptions_message_name protoreflect.Name = "MessageOptions" @@ -547,6 +611,7 @@ const ( MessageOptions_Deprecated_field_name protoreflect.Name = "deprecated" MessageOptions_MapEntry_field_name protoreflect.Name = "map_entry" MessageOptions_DeprecatedLegacyJsonFieldConflicts_field_name protoreflect.Name = "deprecated_legacy_json_field_conflicts" + MessageOptions_Features_field_name protoreflect.Name = "features" MessageOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" MessageOptions_MessageSetWireFormat_field_fullname protoreflect.FullName = "google.protobuf.MessageOptions.message_set_wire_format" @@ -554,6 +619,7 @@ const ( MessageOptions_Deprecated_field_fullname protoreflect.FullName = "google.protobuf.MessageOptions.deprecated" MessageOptions_MapEntry_field_fullname protoreflect.FullName = "google.protobuf.MessageOptions.map_entry" MessageOptions_DeprecatedLegacyJsonFieldConflicts_field_fullname protoreflect.FullName = "google.protobuf.MessageOptions.deprecated_legacy_json_field_conflicts" + MessageOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.MessageOptions.features" MessageOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.MessageOptions.uninterpreted_option" ) @@ -564,6 +630,7 @@ const ( MessageOptions_Deprecated_field_number protoreflect.FieldNumber = 3 MessageOptions_MapEntry_field_number protoreflect.FieldNumber = 7 MessageOptions_DeprecatedLegacyJsonFieldConflicts_field_number protoreflect.FieldNumber = 11 + MessageOptions_Features_field_number protoreflect.FieldNumber = 12 MessageOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ) @@ -584,8 +651,10 @@ const ( FieldOptions_Weak_field_name protoreflect.Name = "weak" FieldOptions_DebugRedact_field_name protoreflect.Name = "debug_redact" FieldOptions_Retention_field_name protoreflect.Name = "retention" - FieldOptions_Target_field_name protoreflect.Name = "target" FieldOptions_Targets_field_name protoreflect.Name = "targets" + FieldOptions_EditionDefaults_field_name protoreflect.Name = "edition_defaults" + FieldOptions_Features_field_name protoreflect.Name = "features" + FieldOptions_FeatureSupport_field_name protoreflect.Name = "feature_support" FieldOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" FieldOptions_Ctype_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.ctype" @@ -597,8 +666,10 @@ const ( FieldOptions_Weak_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.weak" FieldOptions_DebugRedact_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.debug_redact" FieldOptions_Retention_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.retention" - FieldOptions_Target_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.target" FieldOptions_Targets_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.targets" + FieldOptions_EditionDefaults_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.edition_defaults" + FieldOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.features" + FieldOptions_FeatureSupport_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.feature_support" FieldOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.uninterpreted_option" ) @@ -613,8 +684,10 @@ const ( FieldOptions_Weak_field_number protoreflect.FieldNumber = 10 FieldOptions_DebugRedact_field_number protoreflect.FieldNumber = 16 FieldOptions_Retention_field_number protoreflect.FieldNumber = 17 - FieldOptions_Target_field_number protoreflect.FieldNumber = 18 FieldOptions_Targets_field_number protoreflect.FieldNumber = 19 + FieldOptions_EditionDefaults_field_number protoreflect.FieldNumber = 20 + FieldOptions_Features_field_number protoreflect.FieldNumber = 21 + FieldOptions_FeatureSupport_field_number protoreflect.FieldNumber = 22 FieldOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ) @@ -624,24 +697,107 @@ const ( FieldOptions_CType_enum_name = "CType" ) +// Enum values for google.protobuf.FieldOptions.CType. +const ( + FieldOptions_STRING_enum_value = 0 + FieldOptions_CORD_enum_value = 1 + FieldOptions_STRING_PIECE_enum_value = 2 +) + // Full and short names for google.protobuf.FieldOptions.JSType. const ( FieldOptions_JSType_enum_fullname = "google.protobuf.FieldOptions.JSType" FieldOptions_JSType_enum_name = "JSType" ) +// Enum values for google.protobuf.FieldOptions.JSType. +const ( + FieldOptions_JS_NORMAL_enum_value = 0 + FieldOptions_JS_STRING_enum_value = 1 + FieldOptions_JS_NUMBER_enum_value = 2 +) + // Full and short names for google.protobuf.FieldOptions.OptionRetention. const ( FieldOptions_OptionRetention_enum_fullname = "google.protobuf.FieldOptions.OptionRetention" FieldOptions_OptionRetention_enum_name = "OptionRetention" ) +// Enum values for google.protobuf.FieldOptions.OptionRetention. +const ( + FieldOptions_RETENTION_UNKNOWN_enum_value = 0 + FieldOptions_RETENTION_RUNTIME_enum_value = 1 + FieldOptions_RETENTION_SOURCE_enum_value = 2 +) + // Full and short names for google.protobuf.FieldOptions.OptionTargetType. const ( FieldOptions_OptionTargetType_enum_fullname = "google.protobuf.FieldOptions.OptionTargetType" FieldOptions_OptionTargetType_enum_name = "OptionTargetType" ) +// Enum values for google.protobuf.FieldOptions.OptionTargetType. +const ( + FieldOptions_TARGET_TYPE_UNKNOWN_enum_value = 0 + FieldOptions_TARGET_TYPE_FILE_enum_value = 1 + FieldOptions_TARGET_TYPE_EXTENSION_RANGE_enum_value = 2 + FieldOptions_TARGET_TYPE_MESSAGE_enum_value = 3 + FieldOptions_TARGET_TYPE_FIELD_enum_value = 4 + FieldOptions_TARGET_TYPE_ONEOF_enum_value = 5 + FieldOptions_TARGET_TYPE_ENUM_enum_value = 6 + FieldOptions_TARGET_TYPE_ENUM_ENTRY_enum_value = 7 + FieldOptions_TARGET_TYPE_SERVICE_enum_value = 8 + FieldOptions_TARGET_TYPE_METHOD_enum_value = 9 +) + +// Names for google.protobuf.FieldOptions.EditionDefault. +const ( + FieldOptions_EditionDefault_message_name protoreflect.Name = "EditionDefault" + FieldOptions_EditionDefault_message_fullname protoreflect.FullName = "google.protobuf.FieldOptions.EditionDefault" +) + +// Field names for google.protobuf.FieldOptions.EditionDefault. +const ( + FieldOptions_EditionDefault_Edition_field_name protoreflect.Name = "edition" + FieldOptions_EditionDefault_Value_field_name protoreflect.Name = "value" + + FieldOptions_EditionDefault_Edition_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.EditionDefault.edition" + FieldOptions_EditionDefault_Value_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.EditionDefault.value" +) + +// Field numbers for google.protobuf.FieldOptions.EditionDefault. +const ( + FieldOptions_EditionDefault_Edition_field_number protoreflect.FieldNumber = 3 + FieldOptions_EditionDefault_Value_field_number protoreflect.FieldNumber = 2 +) + +// Names for google.protobuf.FieldOptions.FeatureSupport. +const ( + FieldOptions_FeatureSupport_message_name protoreflect.Name = "FeatureSupport" + FieldOptions_FeatureSupport_message_fullname protoreflect.FullName = "google.protobuf.FieldOptions.FeatureSupport" +) + +// Field names for google.protobuf.FieldOptions.FeatureSupport. +const ( + FieldOptions_FeatureSupport_EditionIntroduced_field_name protoreflect.Name = "edition_introduced" + FieldOptions_FeatureSupport_EditionDeprecated_field_name protoreflect.Name = "edition_deprecated" + FieldOptions_FeatureSupport_DeprecationWarning_field_name protoreflect.Name = "deprecation_warning" + FieldOptions_FeatureSupport_EditionRemoved_field_name protoreflect.Name = "edition_removed" + + FieldOptions_FeatureSupport_EditionIntroduced_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.FeatureSupport.edition_introduced" + FieldOptions_FeatureSupport_EditionDeprecated_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.FeatureSupport.edition_deprecated" + FieldOptions_FeatureSupport_DeprecationWarning_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.FeatureSupport.deprecation_warning" + FieldOptions_FeatureSupport_EditionRemoved_field_fullname protoreflect.FullName = "google.protobuf.FieldOptions.FeatureSupport.edition_removed" +) + +// Field numbers for google.protobuf.FieldOptions.FeatureSupport. +const ( + FieldOptions_FeatureSupport_EditionIntroduced_field_number protoreflect.FieldNumber = 1 + FieldOptions_FeatureSupport_EditionDeprecated_field_number protoreflect.FieldNumber = 2 + FieldOptions_FeatureSupport_DeprecationWarning_field_number protoreflect.FieldNumber = 3 + FieldOptions_FeatureSupport_EditionRemoved_field_number protoreflect.FieldNumber = 4 +) + // Names for google.protobuf.OneofOptions. const ( OneofOptions_message_name protoreflect.Name = "OneofOptions" @@ -650,13 +806,16 @@ const ( // Field names for google.protobuf.OneofOptions. const ( + OneofOptions_Features_field_name protoreflect.Name = "features" OneofOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" + OneofOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.OneofOptions.features" OneofOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.OneofOptions.uninterpreted_option" ) // Field numbers for google.protobuf.OneofOptions. const ( + OneofOptions_Features_field_number protoreflect.FieldNumber = 1 OneofOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ) @@ -671,11 +830,13 @@ const ( EnumOptions_AllowAlias_field_name protoreflect.Name = "allow_alias" EnumOptions_Deprecated_field_name protoreflect.Name = "deprecated" EnumOptions_DeprecatedLegacyJsonFieldConflicts_field_name protoreflect.Name = "deprecated_legacy_json_field_conflicts" + EnumOptions_Features_field_name protoreflect.Name = "features" EnumOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" EnumOptions_AllowAlias_field_fullname protoreflect.FullName = "google.protobuf.EnumOptions.allow_alias" EnumOptions_Deprecated_field_fullname protoreflect.FullName = "google.protobuf.EnumOptions.deprecated" EnumOptions_DeprecatedLegacyJsonFieldConflicts_field_fullname protoreflect.FullName = "google.protobuf.EnumOptions.deprecated_legacy_json_field_conflicts" + EnumOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.EnumOptions.features" EnumOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.EnumOptions.uninterpreted_option" ) @@ -684,6 +845,7 @@ const ( EnumOptions_AllowAlias_field_number protoreflect.FieldNumber = 2 EnumOptions_Deprecated_field_number protoreflect.FieldNumber = 3 EnumOptions_DeprecatedLegacyJsonFieldConflicts_field_number protoreflect.FieldNumber = 6 + EnumOptions_Features_field_number protoreflect.FieldNumber = 7 EnumOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ) @@ -696,15 +858,24 @@ const ( // Field names for google.protobuf.EnumValueOptions. const ( EnumValueOptions_Deprecated_field_name protoreflect.Name = "deprecated" + EnumValueOptions_Features_field_name protoreflect.Name = "features" + EnumValueOptions_DebugRedact_field_name protoreflect.Name = "debug_redact" + EnumValueOptions_FeatureSupport_field_name protoreflect.Name = "feature_support" EnumValueOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" EnumValueOptions_Deprecated_field_fullname protoreflect.FullName = "google.protobuf.EnumValueOptions.deprecated" + EnumValueOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.EnumValueOptions.features" + EnumValueOptions_DebugRedact_field_fullname protoreflect.FullName = "google.protobuf.EnumValueOptions.debug_redact" + EnumValueOptions_FeatureSupport_field_fullname protoreflect.FullName = "google.protobuf.EnumValueOptions.feature_support" EnumValueOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.EnumValueOptions.uninterpreted_option" ) // Field numbers for google.protobuf.EnumValueOptions. const ( EnumValueOptions_Deprecated_field_number protoreflect.FieldNumber = 1 + EnumValueOptions_Features_field_number protoreflect.FieldNumber = 2 + EnumValueOptions_DebugRedact_field_number protoreflect.FieldNumber = 3 + EnumValueOptions_FeatureSupport_field_number protoreflect.FieldNumber = 4 EnumValueOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ) @@ -716,15 +887,18 @@ const ( // Field names for google.protobuf.ServiceOptions. const ( + ServiceOptions_Features_field_name protoreflect.Name = "features" ServiceOptions_Deprecated_field_name protoreflect.Name = "deprecated" ServiceOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" + ServiceOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.ServiceOptions.features" ServiceOptions_Deprecated_field_fullname protoreflect.FullName = "google.protobuf.ServiceOptions.deprecated" ServiceOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.ServiceOptions.uninterpreted_option" ) // Field numbers for google.protobuf.ServiceOptions. const ( + ServiceOptions_Features_field_number protoreflect.FieldNumber = 34 ServiceOptions_Deprecated_field_number protoreflect.FieldNumber = 33 ServiceOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ) @@ -739,10 +913,12 @@ const ( const ( MethodOptions_Deprecated_field_name protoreflect.Name = "deprecated" MethodOptions_IdempotencyLevel_field_name protoreflect.Name = "idempotency_level" + MethodOptions_Features_field_name protoreflect.Name = "features" MethodOptions_UninterpretedOption_field_name protoreflect.Name = "uninterpreted_option" MethodOptions_Deprecated_field_fullname protoreflect.FullName = "google.protobuf.MethodOptions.deprecated" MethodOptions_IdempotencyLevel_field_fullname protoreflect.FullName = "google.protobuf.MethodOptions.idempotency_level" + MethodOptions_Features_field_fullname protoreflect.FullName = "google.protobuf.MethodOptions.features" MethodOptions_UninterpretedOption_field_fullname protoreflect.FullName = "google.protobuf.MethodOptions.uninterpreted_option" ) @@ -750,6 +926,7 @@ const ( const ( MethodOptions_Deprecated_field_number protoreflect.FieldNumber = 33 MethodOptions_IdempotencyLevel_field_number protoreflect.FieldNumber = 34 + MethodOptions_Features_field_number protoreflect.FieldNumber = 35 MethodOptions_UninterpretedOption_field_number protoreflect.FieldNumber = 999 ) @@ -759,6 +936,13 @@ const ( MethodOptions_IdempotencyLevel_enum_name = "IdempotencyLevel" ) +// Enum values for google.protobuf.MethodOptions.IdempotencyLevel. +const ( + MethodOptions_IDEMPOTENCY_UNKNOWN_enum_value = 0 + MethodOptions_NO_SIDE_EFFECTS_enum_value = 1 + MethodOptions_IDEMPOTENT_enum_value = 2 +) + // Names for google.protobuf.UninterpretedOption. const ( UninterpretedOption_message_name protoreflect.Name = "UninterpretedOption" @@ -816,6 +1000,166 @@ const ( UninterpretedOption_NamePart_IsExtension_field_number protoreflect.FieldNumber = 2 ) +// Names for google.protobuf.FeatureSet. +const ( + FeatureSet_message_name protoreflect.Name = "FeatureSet" + FeatureSet_message_fullname protoreflect.FullName = "google.protobuf.FeatureSet" +) + +// Field names for google.protobuf.FeatureSet. +const ( + FeatureSet_FieldPresence_field_name protoreflect.Name = "field_presence" + FeatureSet_EnumType_field_name protoreflect.Name = "enum_type" + FeatureSet_RepeatedFieldEncoding_field_name protoreflect.Name = "repeated_field_encoding" + FeatureSet_Utf8Validation_field_name protoreflect.Name = "utf8_validation" + FeatureSet_MessageEncoding_field_name protoreflect.Name = "message_encoding" + FeatureSet_JsonFormat_field_name protoreflect.Name = "json_format" + + FeatureSet_FieldPresence_field_fullname protoreflect.FullName = "google.protobuf.FeatureSet.field_presence" + FeatureSet_EnumType_field_fullname protoreflect.FullName = "google.protobuf.FeatureSet.enum_type" + FeatureSet_RepeatedFieldEncoding_field_fullname protoreflect.FullName = "google.protobuf.FeatureSet.repeated_field_encoding" + FeatureSet_Utf8Validation_field_fullname protoreflect.FullName = "google.protobuf.FeatureSet.utf8_validation" + FeatureSet_MessageEncoding_field_fullname protoreflect.FullName = "google.protobuf.FeatureSet.message_encoding" + FeatureSet_JsonFormat_field_fullname protoreflect.FullName = "google.protobuf.FeatureSet.json_format" +) + +// Field numbers for google.protobuf.FeatureSet. +const ( + FeatureSet_FieldPresence_field_number protoreflect.FieldNumber = 1 + FeatureSet_EnumType_field_number protoreflect.FieldNumber = 2 + FeatureSet_RepeatedFieldEncoding_field_number protoreflect.FieldNumber = 3 + FeatureSet_Utf8Validation_field_number protoreflect.FieldNumber = 4 + FeatureSet_MessageEncoding_field_number protoreflect.FieldNumber = 5 + FeatureSet_JsonFormat_field_number protoreflect.FieldNumber = 6 +) + +// Full and short names for google.protobuf.FeatureSet.FieldPresence. +const ( + FeatureSet_FieldPresence_enum_fullname = "google.protobuf.FeatureSet.FieldPresence" + FeatureSet_FieldPresence_enum_name = "FieldPresence" +) + +// Enum values for google.protobuf.FeatureSet.FieldPresence. +const ( + FeatureSet_FIELD_PRESENCE_UNKNOWN_enum_value = 0 + FeatureSet_EXPLICIT_enum_value = 1 + FeatureSet_IMPLICIT_enum_value = 2 + FeatureSet_LEGACY_REQUIRED_enum_value = 3 +) + +// Full and short names for google.protobuf.FeatureSet.EnumType. +const ( + FeatureSet_EnumType_enum_fullname = "google.protobuf.FeatureSet.EnumType" + FeatureSet_EnumType_enum_name = "EnumType" +) + +// Enum values for google.protobuf.FeatureSet.EnumType. +const ( + FeatureSet_ENUM_TYPE_UNKNOWN_enum_value = 0 + FeatureSet_OPEN_enum_value = 1 + FeatureSet_CLOSED_enum_value = 2 +) + +// Full and short names for google.protobuf.FeatureSet.RepeatedFieldEncoding. +const ( + FeatureSet_RepeatedFieldEncoding_enum_fullname = "google.protobuf.FeatureSet.RepeatedFieldEncoding" + FeatureSet_RepeatedFieldEncoding_enum_name = "RepeatedFieldEncoding" +) + +// Enum values for google.protobuf.FeatureSet.RepeatedFieldEncoding. +const ( + FeatureSet_REPEATED_FIELD_ENCODING_UNKNOWN_enum_value = 0 + FeatureSet_PACKED_enum_value = 1 + FeatureSet_EXPANDED_enum_value = 2 +) + +// Full and short names for google.protobuf.FeatureSet.Utf8Validation. +const ( + FeatureSet_Utf8Validation_enum_fullname = "google.protobuf.FeatureSet.Utf8Validation" + FeatureSet_Utf8Validation_enum_name = "Utf8Validation" +) + +// Enum values for google.protobuf.FeatureSet.Utf8Validation. +const ( + FeatureSet_UTF8_VALIDATION_UNKNOWN_enum_value = 0 + FeatureSet_VERIFY_enum_value = 2 + FeatureSet_NONE_enum_value = 3 +) + +// Full and short names for google.protobuf.FeatureSet.MessageEncoding. +const ( + FeatureSet_MessageEncoding_enum_fullname = "google.protobuf.FeatureSet.MessageEncoding" + FeatureSet_MessageEncoding_enum_name = "MessageEncoding" +) + +// Enum values for google.protobuf.FeatureSet.MessageEncoding. +const ( + FeatureSet_MESSAGE_ENCODING_UNKNOWN_enum_value = 0 + FeatureSet_LENGTH_PREFIXED_enum_value = 1 + FeatureSet_DELIMITED_enum_value = 2 +) + +// Full and short names for google.protobuf.FeatureSet.JsonFormat. +const ( + FeatureSet_JsonFormat_enum_fullname = "google.protobuf.FeatureSet.JsonFormat" + FeatureSet_JsonFormat_enum_name = "JsonFormat" +) + +// Enum values for google.protobuf.FeatureSet.JsonFormat. +const ( + FeatureSet_JSON_FORMAT_UNKNOWN_enum_value = 0 + FeatureSet_ALLOW_enum_value = 1 + FeatureSet_LEGACY_BEST_EFFORT_enum_value = 2 +) + +// Names for google.protobuf.FeatureSetDefaults. +const ( + FeatureSetDefaults_message_name protoreflect.Name = "FeatureSetDefaults" + FeatureSetDefaults_message_fullname protoreflect.FullName = "google.protobuf.FeatureSetDefaults" +) + +// Field names for google.protobuf.FeatureSetDefaults. +const ( + FeatureSetDefaults_Defaults_field_name protoreflect.Name = "defaults" + FeatureSetDefaults_MinimumEdition_field_name protoreflect.Name = "minimum_edition" + FeatureSetDefaults_MaximumEdition_field_name protoreflect.Name = "maximum_edition" + + FeatureSetDefaults_Defaults_field_fullname protoreflect.FullName = "google.protobuf.FeatureSetDefaults.defaults" + FeatureSetDefaults_MinimumEdition_field_fullname protoreflect.FullName = "google.protobuf.FeatureSetDefaults.minimum_edition" + FeatureSetDefaults_MaximumEdition_field_fullname protoreflect.FullName = "google.protobuf.FeatureSetDefaults.maximum_edition" +) + +// Field numbers for google.protobuf.FeatureSetDefaults. +const ( + FeatureSetDefaults_Defaults_field_number protoreflect.FieldNumber = 1 + FeatureSetDefaults_MinimumEdition_field_number protoreflect.FieldNumber = 4 + FeatureSetDefaults_MaximumEdition_field_number protoreflect.FieldNumber = 5 +) + +// Names for google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault. +const ( + FeatureSetDefaults_FeatureSetEditionDefault_message_name protoreflect.Name = "FeatureSetEditionDefault" + FeatureSetDefaults_FeatureSetEditionDefault_message_fullname protoreflect.FullName = "google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault" +) + +// Field names for google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault. +const ( + FeatureSetDefaults_FeatureSetEditionDefault_Edition_field_name protoreflect.Name = "edition" + FeatureSetDefaults_FeatureSetEditionDefault_OverridableFeatures_field_name protoreflect.Name = "overridable_features" + FeatureSetDefaults_FeatureSetEditionDefault_FixedFeatures_field_name protoreflect.Name = "fixed_features" + + FeatureSetDefaults_FeatureSetEditionDefault_Edition_field_fullname protoreflect.FullName = "google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault.edition" + FeatureSetDefaults_FeatureSetEditionDefault_OverridableFeatures_field_fullname protoreflect.FullName = "google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault.overridable_features" + FeatureSetDefaults_FeatureSetEditionDefault_FixedFeatures_field_fullname protoreflect.FullName = "google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault.fixed_features" +) + +// Field numbers for google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault. +const ( + FeatureSetDefaults_FeatureSetEditionDefault_Edition_field_number protoreflect.FieldNumber = 3 + FeatureSetDefaults_FeatureSetEditionDefault_OverridableFeatures_field_number protoreflect.FieldNumber = 4 + FeatureSetDefaults_FeatureSetEditionDefault_FixedFeatures_field_number protoreflect.FieldNumber = 5 +) + // Names for google.protobuf.SourceCodeInfo. const ( SourceCodeInfo_message_name protoreflect.Name = "SourceCodeInfo" @@ -917,3 +1261,10 @@ const ( GeneratedCodeInfo_Annotation_Semantic_enum_fullname = "google.protobuf.GeneratedCodeInfo.Annotation.Semantic" GeneratedCodeInfo_Annotation_Semantic_enum_name = "Semantic" ) + +// Enum values for google.protobuf.GeneratedCodeInfo.Annotation.Semantic. +const ( + GeneratedCodeInfo_Annotation_NONE_enum_value = 0 + GeneratedCodeInfo_Annotation_SET_enum_value = 1 + GeneratedCodeInfo_Annotation_ALIAS_enum_value = 2 +) diff --git a/vendor/google.golang.org/protobuf/internal/genid/doc.go b/vendor/google.golang.org/protobuf/internal/genid/doc.go index 45ccd012..d9b9d916 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/doc.go +++ b/vendor/google.golang.org/protobuf/internal/genid/doc.go @@ -6,6 +6,6 @@ // and the well-known types. package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" const GoogleProtobuf_package protoreflect.FullName = "google.protobuf" diff --git a/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go b/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go new file mode 100644 index 00000000..7f67cbb6 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/genid/go_features_gen.go @@ -0,0 +1,36 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Code generated by generate-protos. DO NOT EDIT. + +package genid + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" +) + +const File_google_protobuf_go_features_proto = "google/protobuf/go_features.proto" + +// Names for pb.GoFeatures. +const ( + GoFeatures_message_name protoreflect.Name = "GoFeatures" + GoFeatures_message_fullname protoreflect.FullName = "pb.GoFeatures" +) + +// Field names for pb.GoFeatures. +const ( + GoFeatures_LegacyUnmarshalJsonEnum_field_name protoreflect.Name = "legacy_unmarshal_json_enum" + + GoFeatures_LegacyUnmarshalJsonEnum_field_fullname protoreflect.FullName = "pb.GoFeatures.legacy_unmarshal_json_enum" +) + +// Field numbers for pb.GoFeatures. +const ( + GoFeatures_LegacyUnmarshalJsonEnum_field_number protoreflect.FieldNumber = 1 +) + +// Extension numbers +const ( + FeatureSet_Go_ext_number protoreflect.FieldNumber = 1002 +) diff --git a/vendor/google.golang.org/protobuf/internal/genid/map_entry.go b/vendor/google.golang.org/protobuf/internal/genid/map_entry.go index 8f9ea02f..bef5a25f 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/map_entry.go +++ b/vendor/google.golang.org/protobuf/internal/genid/map_entry.go @@ -4,7 +4,7 @@ package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // Generic field names and numbers for synthetic map entry messages. const ( diff --git a/vendor/google.golang.org/protobuf/internal/genid/struct_gen.go b/vendor/google.golang.org/protobuf/internal/genid/struct_gen.go index 1a38944b..ad6f80c4 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/struct_gen.go +++ b/vendor/google.golang.org/protobuf/internal/genid/struct_gen.go @@ -18,6 +18,11 @@ const ( NullValue_enum_name = "NullValue" ) +// Enum values for google.protobuf.NullValue. +const ( + NullValue_NULL_VALUE_enum_value = 0 +) + // Names for google.protobuf.Struct. const ( Struct_message_name protoreflect.Name = "Struct" diff --git a/vendor/google.golang.org/protobuf/internal/genid/type_gen.go b/vendor/google.golang.org/protobuf/internal/genid/type_gen.go index e0f75fea..49bc73e2 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/type_gen.go +++ b/vendor/google.golang.org/protobuf/internal/genid/type_gen.go @@ -18,6 +18,13 @@ const ( Syntax_enum_name = "Syntax" ) +// Enum values for google.protobuf.Syntax. +const ( + Syntax_SYNTAX_PROTO2_enum_value = 0 + Syntax_SYNTAX_PROTO3_enum_value = 1 + Syntax_SYNTAX_EDITIONS_enum_value = 2 +) + // Names for google.protobuf.Type. const ( Type_message_name protoreflect.Name = "Type" @@ -105,12 +112,43 @@ const ( Field_Kind_enum_name = "Kind" ) +// Enum values for google.protobuf.Field.Kind. +const ( + Field_TYPE_UNKNOWN_enum_value = 0 + Field_TYPE_DOUBLE_enum_value = 1 + Field_TYPE_FLOAT_enum_value = 2 + Field_TYPE_INT64_enum_value = 3 + Field_TYPE_UINT64_enum_value = 4 + Field_TYPE_INT32_enum_value = 5 + Field_TYPE_FIXED64_enum_value = 6 + Field_TYPE_FIXED32_enum_value = 7 + Field_TYPE_BOOL_enum_value = 8 + Field_TYPE_STRING_enum_value = 9 + Field_TYPE_GROUP_enum_value = 10 + Field_TYPE_MESSAGE_enum_value = 11 + Field_TYPE_BYTES_enum_value = 12 + Field_TYPE_UINT32_enum_value = 13 + Field_TYPE_ENUM_enum_value = 14 + Field_TYPE_SFIXED32_enum_value = 15 + Field_TYPE_SFIXED64_enum_value = 16 + Field_TYPE_SINT32_enum_value = 17 + Field_TYPE_SINT64_enum_value = 18 +) + // Full and short names for google.protobuf.Field.Cardinality. const ( Field_Cardinality_enum_fullname = "google.protobuf.Field.Cardinality" Field_Cardinality_enum_name = "Cardinality" ) +// Enum values for google.protobuf.Field.Cardinality. +const ( + Field_CARDINALITY_UNKNOWN_enum_value = 0 + Field_CARDINALITY_OPTIONAL_enum_value = 1 + Field_CARDINALITY_REQUIRED_enum_value = 2 + Field_CARDINALITY_REPEATED_enum_value = 3 +) + // Names for google.protobuf.Enum. const ( Enum_message_name protoreflect.Name = "Enum" diff --git a/vendor/google.golang.org/protobuf/internal/genid/wrappers.go b/vendor/google.golang.org/protobuf/internal/genid/wrappers.go index 429384b8..9404270d 100644 --- a/vendor/google.golang.org/protobuf/internal/genid/wrappers.go +++ b/vendor/google.golang.org/protobuf/internal/genid/wrappers.go @@ -4,7 +4,7 @@ package genid -import protoreflect "google.golang.org/protobuf/reflect/protoreflect" +import "google.golang.org/protobuf/reflect/protoreflect" // Generic field name and number for messages in wrappers.proto. const ( diff --git a/vendor/google.golang.org/protobuf/internal/impl/api_export.go b/vendor/google.golang.org/protobuf/internal/impl/api_export.go index a371f98d..5d5771c2 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/api_export.go +++ b/vendor/google.golang.org/protobuf/internal/impl/api_export.go @@ -22,13 +22,13 @@ type Export struct{} // NewError formats a string according to the format specifier and arguments and // returns an error that has a "proto" prefix. -func (Export) NewError(f string, x ...interface{}) error { +func (Export) NewError(f string, x ...any) error { return errors.New(f, x...) } // enum is any enum type generated by protoc-gen-go // and must be a named int32 type. -type enum = interface{} +type enum = any // EnumOf returns the protoreflect.Enum interface over e. // It returns nil if e is nil. @@ -81,7 +81,7 @@ func (Export) EnumStringOf(ed protoreflect.EnumDescriptor, n protoreflect.EnumNu // message is any message type generated by protoc-gen-go // and must be a pointer to a named struct type. -type message = interface{} +type message = any // legacyMessageWrapper wraps a v2 message as a v1 message. type legacyMessageWrapper struct{ m protoreflect.ProtoMessage } diff --git a/vendor/google.golang.org/protobuf/internal/impl/checkinit.go b/vendor/google.golang.org/protobuf/internal/impl/checkinit.go index bff041ed..f29e6a8f 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/checkinit.go +++ b/vendor/google.golang.org/protobuf/internal/impl/checkinit.go @@ -68,7 +68,7 @@ func (mi *MessageInfo) isInitExtensions(ext *map[int32]ExtensionField) error { } for _, x := range *ext { ei := getExtensionFieldInfo(x.Type()) - if ei.funcs.isInit == nil { + if ei.funcs.isInit == nil || x.isUnexpandedLazy() { continue } v := x.Value() diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go b/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go index e74cefdc..0d5b546e 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_extension.go @@ -21,26 +21,18 @@ type extensionFieldInfo struct { validation validationInfo } -var legacyExtensionFieldInfoCache sync.Map // map[protoreflect.ExtensionType]*extensionFieldInfo - func getExtensionFieldInfo(xt protoreflect.ExtensionType) *extensionFieldInfo { if xi, ok := xt.(*ExtensionInfo); ok { xi.lazyInit() return xi.info } - return legacyLoadExtensionFieldInfo(xt) -} - -// legacyLoadExtensionFieldInfo dynamically loads a *ExtensionInfo for xt. -func legacyLoadExtensionFieldInfo(xt protoreflect.ExtensionType) *extensionFieldInfo { - if xi, ok := legacyExtensionFieldInfoCache.Load(xt); ok { - return xi.(*extensionFieldInfo) - } - e := makeExtensionFieldInfo(xt.TypeDescriptor()) - if e, ok := legacyMessageTypeCache.LoadOrStore(xt, e); ok { - return e.(*extensionFieldInfo) - } - return e + // Ideally we'd cache the resulting *extensionFieldInfo so we don't have to + // recompute this metadata repeatedly. But without support for something like + // weak references, such a cache would pin temporary values (like dynamic + // extension types, constructed for the duration of a user request) to the + // heap forever, causing memory usage of the cache to grow unbounded. + // See discussion in https://github.com/golang/protobuf/issues/1521. + return makeExtensionFieldInfo(xt.TypeDescriptor()) } func makeExtensionFieldInfo(xd protoreflect.ExtensionDescriptor) *extensionFieldInfo { @@ -75,7 +67,6 @@ type lazyExtensionValue struct { xi *extensionFieldInfo value protoreflect.Value b []byte - fn func() protoreflect.Value } type ExtensionField struct { @@ -107,6 +98,28 @@ func (f *ExtensionField) canLazy(xt protoreflect.ExtensionType) bool { return false } +// isUnexpandedLazy returns true if the ExensionField is lazy and not +// yet expanded, which means it's present and already checked for +// initialized required fields. +func (f *ExtensionField) isUnexpandedLazy() bool { + return f.lazy != nil && atomic.LoadUint32(&f.lazy.atomicOnce) == 0 +} + +// lazyBuffer retrieves the buffer for a lazy extension if it's not yet expanded. +// +// The returned buffer has to be kept over whatever operation we're planning, +// as re-retrieving it will fail after the message is lazily decoded. +func (f *ExtensionField) lazyBuffer() []byte { + // This function might be in the critical path, so check the atomic without + // taking a look first, then only take the lock if needed. + if !f.isUnexpandedLazy() { + return nil + } + f.lazy.mu.Lock() + defer f.lazy.mu.Unlock() + return f.lazy.b +} + func (f *ExtensionField) lazyInit() { f.lazy.mu.Lock() defer f.lazy.mu.Unlock() @@ -144,10 +157,9 @@ func (f *ExtensionField) lazyInit() { } f.lazy.value = val } else { - f.lazy.value = f.lazy.fn() + panic("No support for lazy fns for ExtensionField") } f.lazy.xi = nil - f.lazy.fn = nil f.lazy.b = nil atomic.StoreUint32(&f.lazy.atomicOnce, 1) } @@ -160,13 +172,6 @@ func (f *ExtensionField) Set(t protoreflect.ExtensionType, v protoreflect.Value) f.lazy = nil } -// SetLazy sets the type and a value that is to be lazily evaluated upon first use. -// This must not be called concurrently. -func (f *ExtensionField) SetLazy(t protoreflect.ExtensionType, fn func() protoreflect.Value) { - f.typ = t - f.lazy = &lazyExtensionValue{fn: fn} -} - // Value returns the value of the extension field. // This may be called concurrently. func (f *ExtensionField) Value() protoreflect.Value { diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_field.go b/vendor/google.golang.org/protobuf/internal/impl/codec_field.go index 3fadd241..7c1f66c8 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_field.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_field.go @@ -65,6 +65,9 @@ func (mi *MessageInfo) initOneofFieldCoders(od protoreflect.OneofDescriptor, si if err != nil { return out, err } + if cf.funcs.isInit == nil { + out.initialized = true + } vi.Set(vw) return out, nil } @@ -233,9 +236,15 @@ func sizeMessageInfo(p pointer, f *coderFieldInfo, opts marshalOptions) int { } func appendMessageInfo(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { + calculatedSize := f.mi.sizePointer(p.Elem(), opts) b = protowire.AppendVarint(b, f.wiretag) - b = protowire.AppendVarint(b, uint64(f.mi.sizePointer(p.Elem(), opts))) - return f.mi.marshalAppendPointer(b, p.Elem(), opts) + b = protowire.AppendVarint(b, uint64(calculatedSize)) + before := len(b) + b, err := f.mi.marshalAppendPointer(b, p.Elem(), opts) + if measuredSize := len(b) - before; calculatedSize != measuredSize && err == nil { + return nil, errors.MismatchedSizeCalculation(calculatedSize, measuredSize) + } + return b, err } func consumeMessageInfo(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { @@ -262,14 +271,21 @@ func isInitMessageInfo(p pointer, f *coderFieldInfo) error { return f.mi.checkInitializedPointer(p.Elem()) } -func sizeMessage(m proto.Message, tagsize int, _ marshalOptions) int { - return protowire.SizeBytes(proto.Size(m)) + tagsize +func sizeMessage(m proto.Message, tagsize int, opts marshalOptions) int { + return protowire.SizeBytes(opts.Options().Size(m)) + tagsize } func appendMessage(b []byte, m proto.Message, wiretag uint64, opts marshalOptions) ([]byte, error) { + mopts := opts.Options() + calculatedSize := mopts.Size(m) b = protowire.AppendVarint(b, wiretag) - b = protowire.AppendVarint(b, uint64(proto.Size(m))) - return opts.Options().MarshalAppend(b, m) + b = protowire.AppendVarint(b, uint64(calculatedSize)) + before := len(b) + b, err := mopts.MarshalAppend(b, m) + if measuredSize := len(b) - before; calculatedSize != measuredSize && err == nil { + return nil, errors.MismatchedSizeCalculation(calculatedSize, measuredSize) + } + return b, err } func consumeMessage(b []byte, m proto.Message, wtyp protowire.Type, opts unmarshalOptions) (out unmarshalOutput, err error) { @@ -405,8 +421,8 @@ func consumeGroupType(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInf return f.mi.unmarshalPointer(b, p.Elem(), f.num, opts) } -func sizeGroup(m proto.Message, tagsize int, _ marshalOptions) int { - return 2*tagsize + proto.Size(m) +func sizeGroup(m proto.Message, tagsize int, opts marshalOptions) int { + return 2*tagsize + opts.Options().Size(m) } func appendGroup(b []byte, m proto.Message, wiretag uint64, opts marshalOptions) ([]byte, error) { @@ -482,10 +498,14 @@ func appendMessageSliceInfo(b []byte, p pointer, f *coderFieldInfo, opts marshal b = protowire.AppendVarint(b, f.wiretag) siz := f.mi.sizePointer(v, opts) b = protowire.AppendVarint(b, uint64(siz)) + before := len(b) b, err = f.mi.marshalAppendPointer(b, v, opts) if err != nil { return b, err } + if measuredSize := len(b) - before; siz != measuredSize { + return nil, errors.MismatchedSizeCalculation(siz, measuredSize) + } } return b, nil } @@ -520,28 +540,34 @@ func isInitMessageSliceInfo(p pointer, f *coderFieldInfo) error { return nil } -func sizeMessageSlice(p pointer, goType reflect.Type, tagsize int, _ marshalOptions) int { +func sizeMessageSlice(p pointer, goType reflect.Type, tagsize int, opts marshalOptions) int { + mopts := opts.Options() s := p.PointerSlice() n := 0 for _, v := range s { m := asMessage(v.AsValueOf(goType.Elem())) - n += protowire.SizeBytes(proto.Size(m)) + tagsize + n += protowire.SizeBytes(mopts.Size(m)) + tagsize } return n } func appendMessageSlice(b []byte, p pointer, wiretag uint64, goType reflect.Type, opts marshalOptions) ([]byte, error) { + mopts := opts.Options() s := p.PointerSlice() var err error for _, v := range s { m := asMessage(v.AsValueOf(goType.Elem())) b = protowire.AppendVarint(b, wiretag) - siz := proto.Size(m) + siz := mopts.Size(m) b = protowire.AppendVarint(b, uint64(siz)) - b, err = opts.Options().MarshalAppend(b, m) + before := len(b) + b, err = mopts.MarshalAppend(b, m) if err != nil { return b, err } + if measuredSize := len(b) - before; siz != measuredSize { + return nil, errors.MismatchedSizeCalculation(siz, measuredSize) + } } return b, nil } @@ -582,11 +608,12 @@ func isInitMessageSlice(p pointer, goType reflect.Type) error { // Slices of messages func sizeMessageSliceValue(listv protoreflect.Value, tagsize int, opts marshalOptions) int { + mopts := opts.Options() list := listv.List() n := 0 for i, llen := 0, list.Len(); i < llen; i++ { m := list.Get(i).Message().Interface() - n += protowire.SizeBytes(proto.Size(m)) + tagsize + n += protowire.SizeBytes(mopts.Size(m)) + tagsize } return n } @@ -597,13 +624,17 @@ func appendMessageSliceValue(b []byte, listv protoreflect.Value, wiretag uint64, for i, llen := 0, list.Len(); i < llen; i++ { m := list.Get(i).Message().Interface() b = protowire.AppendVarint(b, wiretag) - siz := proto.Size(m) + siz := mopts.Size(m) b = protowire.AppendVarint(b, uint64(siz)) + before := len(b) var err error b, err = mopts.MarshalAppend(b, m) if err != nil { return b, err } + if measuredSize := len(b) - before; siz != measuredSize { + return nil, errors.MismatchedSizeCalculation(siz, measuredSize) + } } return b, nil } @@ -651,11 +682,12 @@ var coderMessageSliceValue = valueCoderFuncs{ } func sizeGroupSliceValue(listv protoreflect.Value, tagsize int, opts marshalOptions) int { + mopts := opts.Options() list := listv.List() n := 0 for i, llen := 0, list.Len(); i < llen; i++ { m := list.Get(i).Message().Interface() - n += 2*tagsize + proto.Size(m) + n += 2*tagsize + mopts.Size(m) } return n } @@ -738,12 +770,13 @@ func makeGroupSliceFieldCoder(fd protoreflect.FieldDescriptor, ft reflect.Type) } } -func sizeGroupSlice(p pointer, messageType reflect.Type, tagsize int, _ marshalOptions) int { +func sizeGroupSlice(p pointer, messageType reflect.Type, tagsize int, opts marshalOptions) int { + mopts := opts.Options() s := p.PointerSlice() n := 0 for _, v := range s { m := asMessage(v.AsValueOf(messageType.Elem())) - n += 2*tagsize + proto.Size(m) + n += 2*tagsize + mopts.Size(m) } return n } diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_gen.go b/vendor/google.golang.org/protobuf/internal/impl/codec_gen.go index 1a509b63..f55dc01e 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_gen.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_gen.go @@ -162,11 +162,20 @@ func appendBoolSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions func consumeBoolSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.BoolSlice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := 0 + for _, v := range b { + if v < 0x80 { + count++ + } + } + if count > 0 { + p.growBoolSlice(count) + } + s := *sp for len(b) > 0 { var v uint64 var n int @@ -732,11 +741,20 @@ func appendInt32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOption func consumeInt32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Int32Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := 0 + for _, v := range b { + if v < 0x80 { + count++ + } + } + if count > 0 { + p.growInt32Slice(count) + } + s := *sp for len(b) > 0 { var v uint64 var n int @@ -1138,11 +1156,20 @@ func appendSint32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio func consumeSint32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Int32Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := 0 + for _, v := range b { + if v < 0x80 { + count++ + } + } + if count > 0 { + p.growInt32Slice(count) + } + s := *sp for len(b) > 0 { var v uint64 var n int @@ -1544,11 +1571,20 @@ func appendUint32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio func consumeUint32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Uint32Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := 0 + for _, v := range b { + if v < 0x80 { + count++ + } + } + if count > 0 { + p.growUint32Slice(count) + } + s := *sp for len(b) > 0 { var v uint64 var n int @@ -1950,11 +1986,20 @@ func appendInt64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOption func consumeInt64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Int64Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := 0 + for _, v := range b { + if v < 0x80 { + count++ + } + } + if count > 0 { + p.growInt64Slice(count) + } + s := *sp for len(b) > 0 { var v uint64 var n int @@ -2356,11 +2401,20 @@ func appendSint64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio func consumeSint64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Int64Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := 0 + for _, v := range b { + if v < 0x80 { + count++ + } + } + if count > 0 { + p.growInt64Slice(count) + } + s := *sp for len(b) > 0 { var v uint64 var n int @@ -2762,11 +2816,20 @@ func appendUint64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio func consumeUint64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Uint64Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := 0 + for _, v := range b { + if v < 0x80 { + count++ + } + } + if count > 0 { + p.growUint64Slice(count) + } + s := *sp for len(b) > 0 { var v uint64 var n int @@ -3145,11 +3208,15 @@ func appendSfixed32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOpt func consumeSfixed32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Int32Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := len(b) / protowire.SizeFixed32() + if count > 0 { + p.growInt32Slice(count) + } + s := *sp for len(b) > 0 { v, n := protowire.ConsumeFixed32(b) if n < 0 { @@ -3461,11 +3528,15 @@ func appendFixed32Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOpti func consumeFixed32Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Uint32Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := len(b) / protowire.SizeFixed32() + if count > 0 { + p.growUint32Slice(count) + } + s := *sp for len(b) > 0 { v, n := protowire.ConsumeFixed32(b) if n < 0 { @@ -3777,11 +3848,15 @@ func appendFloatSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOption func consumeFloatSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Float32Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := len(b) / protowire.SizeFixed32() + if count > 0 { + p.growFloat32Slice(count) + } + s := *sp for len(b) > 0 { v, n := protowire.ConsumeFixed32(b) if n < 0 { @@ -4093,11 +4168,15 @@ func appendSfixed64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOpt func consumeSfixed64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Int64Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := len(b) / protowire.SizeFixed64() + if count > 0 { + p.growInt64Slice(count) + } + s := *sp for len(b) > 0 { v, n := protowire.ConsumeFixed64(b) if n < 0 { @@ -4409,11 +4488,15 @@ func appendFixed64Slice(b []byte, p pointer, f *coderFieldInfo, opts marshalOpti func consumeFixed64Slice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Uint64Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := len(b) / protowire.SizeFixed64() + if count > 0 { + p.growUint64Slice(count) + } + s := *sp for len(b) > 0 { v, n := protowire.ConsumeFixed64(b) if n < 0 { @@ -4725,11 +4808,15 @@ func appendDoubleSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptio func consumeDoubleSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { sp := p.Float64Slice() if wtyp == protowire.BytesType { - s := *sp b, n := protowire.ConsumeBytes(b) if n < 0 { return out, errDecode } + count := len(b) / protowire.SizeFixed64() + if count > 0 { + p.growFloat64Slice(count) + } + s := *sp for len(b) > 0 { v, n := protowire.ConsumeFixed64(b) if n < 0 { diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_map.go b/vendor/google.golang.org/protobuf/internal/impl/codec_map.go index 111b9d16..fb35f0ba 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_map.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_map.go @@ -9,6 +9,7 @@ import ( "sort" "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/internal/errors" "google.golang.org/protobuf/internal/genid" "google.golang.org/protobuf/reflect/protoreflect" ) @@ -240,11 +241,16 @@ func appendMapItem(b []byte, keyrv, valrv reflect.Value, mapi *mapInfo, f *coder size += mapi.keyFuncs.size(key.Value(), mapKeyTagSize, opts) size += mapi.valFuncs.size(val, mapValTagSize, opts) b = protowire.AppendVarint(b, uint64(size)) + before := len(b) b, err := mapi.keyFuncs.marshal(b, key.Value(), mapi.keyWiretag, opts) if err != nil { return nil, err } - return mapi.valFuncs.marshal(b, val, mapi.valWiretag, opts) + b, err = mapi.valFuncs.marshal(b, val, mapi.valWiretag, opts) + if measuredSize := len(b) - before; size != measuredSize && err == nil { + return nil, errors.MismatchedSizeCalculation(size, measuredSize) + } + return b, err } else { key := mapi.conv.keyConv.PBValueOf(keyrv).MapKey() val := pointerOfValue(valrv) @@ -259,7 +265,12 @@ func appendMapItem(b []byte, keyrv, valrv reflect.Value, mapi *mapInfo, f *coder } b = protowire.AppendVarint(b, mapi.valWiretag) b = protowire.AppendVarint(b, uint64(valSize)) - return f.mi.marshalAppendPointer(b, val, opts) + before := len(b) + b, err = f.mi.marshalAppendPointer(b, val, opts) + if measuredSize := len(b) - before; valSize != measuredSize && err == nil { + return nil, errors.MismatchedSizeCalculation(valSize, measuredSize) + } + return b, err } } diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_message.go b/vendor/google.golang.org/protobuf/internal/impl/codec_message.go index 6b2fdbb7..78be9df3 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_message.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_message.go @@ -189,6 +189,9 @@ func (mi *MessageInfo) makeCoderMethods(t reflect.Type, si structInfo) { if mi.methods.Merge == nil { mi.methods.Merge = mi.merge } + if mi.methods.Equal == nil { + mi.methods.Equal = equal + } } // getUnknownBytes returns a *[]byte for the unknown fields. diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_messageset.go b/vendor/google.golang.org/protobuf/internal/impl/codec_messageset.go index b7a23faf..7a16ec13 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_messageset.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_messageset.go @@ -26,6 +26,15 @@ func sizeMessageSet(mi *MessageInfo, p pointer, opts marshalOptions) (size int) } num, _ := protowire.DecodeTag(xi.wiretag) size += messageset.SizeField(num) + if fullyLazyExtensions(opts) { + // Don't expand the extension, instead use the buffer to calculate size + if lb := x.lazyBuffer(); lb != nil { + // We got hold of the buffer, so it's still lazy. + // Don't count the tag size in the extension buffer, it's already added. + size += protowire.SizeTag(messageset.FieldMessage) + len(lb) - xi.tagsize + continue + } + } size += xi.funcs.size(x.Value(), protowire.SizeTag(messageset.FieldMessage), opts) } @@ -85,6 +94,19 @@ func marshalMessageSetField(mi *MessageInfo, b []byte, x ExtensionField, opts ma xi := getExtensionFieldInfo(x.Type()) num, _ := protowire.DecodeTag(xi.wiretag) b = messageset.AppendFieldStart(b, num) + + if fullyLazyExtensions(opts) { + // Don't expand the extension if it's still in wire format, instead use the buffer content. + if lb := x.lazyBuffer(); lb != nil { + // The tag inside the lazy buffer is a different tag (the extension + // number), but what we need here is the tag for FieldMessage: + b = protowire.AppendVarint(b, protowire.EncodeTag(messageset.FieldMessage, protowire.BytesType)) + b = append(b, lb[xi.tagsize:]...) + b = messageset.AppendFieldEnd(b) + return b, nil + } + } + b, err := xi.funcs.marshal(b, x.Value(), protowire.EncodeTag(messageset.FieldMessage, protowire.BytesType), opts) if err != nil { return b, err diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go b/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go deleted file mode 100644 index 145c577b..00000000 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_reflect.go +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2019 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package impl - -import ( - "reflect" - - "google.golang.org/protobuf/encoding/protowire" -) - -func sizeEnum(p pointer, f *coderFieldInfo, _ marshalOptions) (size int) { - v := p.v.Elem().Int() - return f.tagsize + protowire.SizeVarint(uint64(v)) -} - -func appendEnum(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - v := p.v.Elem().Int() - b = protowire.AppendVarint(b, f.wiretag) - b = protowire.AppendVarint(b, uint64(v)) - return b, nil -} - -func consumeEnum(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, _ unmarshalOptions) (out unmarshalOutput, err error) { - if wtyp != protowire.VarintType { - return out, errUnknown - } - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - p.v.Elem().SetInt(int64(v)) - out.n = n - return out, nil -} - -func mergeEnum(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - dst.v.Elem().Set(src.v.Elem()) -} - -var coderEnum = pointerCoderFuncs{ - size: sizeEnum, - marshal: appendEnum, - unmarshal: consumeEnum, - merge: mergeEnum, -} - -func sizeEnumNoZero(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - if p.v.Elem().Int() == 0 { - return 0 - } - return sizeEnum(p, f, opts) -} - -func appendEnumNoZero(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - if p.v.Elem().Int() == 0 { - return b, nil - } - return appendEnum(b, p, f, opts) -} - -func mergeEnumNoZero(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - if src.v.Elem().Int() != 0 { - dst.v.Elem().Set(src.v.Elem()) - } -} - -var coderEnumNoZero = pointerCoderFuncs{ - size: sizeEnumNoZero, - marshal: appendEnumNoZero, - unmarshal: consumeEnum, - merge: mergeEnumNoZero, -} - -func sizeEnumPtr(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - return sizeEnum(pointer{p.v.Elem()}, f, opts) -} - -func appendEnumPtr(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - return appendEnum(b, pointer{p.v.Elem()}, f, opts) -} - -func consumeEnumPtr(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { - if wtyp != protowire.VarintType { - return out, errUnknown - } - if p.v.Elem().IsNil() { - p.v.Elem().Set(reflect.New(p.v.Elem().Type().Elem())) - } - return consumeEnum(b, pointer{p.v.Elem()}, wtyp, f, opts) -} - -func mergeEnumPtr(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - if !src.v.Elem().IsNil() { - v := reflect.New(dst.v.Type().Elem().Elem()) - v.Elem().Set(src.v.Elem().Elem()) - dst.v.Elem().Set(v) - } -} - -var coderEnumPtr = pointerCoderFuncs{ - size: sizeEnumPtr, - marshal: appendEnumPtr, - unmarshal: consumeEnumPtr, - merge: mergeEnumPtr, -} - -func sizeEnumSlice(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - s := p.v.Elem() - for i, llen := 0, s.Len(); i < llen; i++ { - size += protowire.SizeVarint(uint64(s.Index(i).Int())) + f.tagsize - } - return size -} - -func appendEnumSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - s := p.v.Elem() - for i, llen := 0, s.Len(); i < llen; i++ { - b = protowire.AppendVarint(b, f.wiretag) - b = protowire.AppendVarint(b, uint64(s.Index(i).Int())) - } - return b, nil -} - -func consumeEnumSlice(b []byte, p pointer, wtyp protowire.Type, f *coderFieldInfo, opts unmarshalOptions) (out unmarshalOutput, err error) { - s := p.v.Elem() - if wtyp == protowire.BytesType { - b, n := protowire.ConsumeBytes(b) - if n < 0 { - return out, errDecode - } - for len(b) > 0 { - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - rv := reflect.New(s.Type().Elem()).Elem() - rv.SetInt(int64(v)) - s.Set(reflect.Append(s, rv)) - b = b[n:] - } - out.n = n - return out, nil - } - if wtyp != protowire.VarintType { - return out, errUnknown - } - v, n := protowire.ConsumeVarint(b) - if n < 0 { - return out, errDecode - } - rv := reflect.New(s.Type().Elem()).Elem() - rv.SetInt(int64(v)) - s.Set(reflect.Append(s, rv)) - out.n = n - return out, nil -} - -func mergeEnumSlice(dst, src pointer, _ *coderFieldInfo, _ mergeOptions) { - dst.v.Elem().Set(reflect.AppendSlice(dst.v.Elem(), src.v.Elem())) -} - -var coderEnumSlice = pointerCoderFuncs{ - size: sizeEnumSlice, - marshal: appendEnumSlice, - unmarshal: consumeEnumSlice, - merge: mergeEnumSlice, -} - -func sizeEnumPackedSlice(p pointer, f *coderFieldInfo, opts marshalOptions) (size int) { - s := p.v.Elem() - llen := s.Len() - if llen == 0 { - return 0 - } - n := 0 - for i := 0; i < llen; i++ { - n += protowire.SizeVarint(uint64(s.Index(i).Int())) - } - return f.tagsize + protowire.SizeBytes(n) -} - -func appendEnumPackedSlice(b []byte, p pointer, f *coderFieldInfo, opts marshalOptions) ([]byte, error) { - s := p.v.Elem() - llen := s.Len() - if llen == 0 { - return b, nil - } - b = protowire.AppendVarint(b, f.wiretag) - n := 0 - for i := 0; i < llen; i++ { - n += protowire.SizeVarint(uint64(s.Index(i).Int())) - } - b = protowire.AppendVarint(b, uint64(n)) - for i := 0; i < llen; i++ { - b = protowire.AppendVarint(b, uint64(s.Index(i).Int())) - } - return b, nil -} - -var coderEnumPackedSlice = pointerCoderFuncs{ - size: sizeEnumPackedSlice, - marshal: appendEnumPackedSlice, - unmarshal: consumeEnumSlice, - merge: mergeEnumSlice, -} diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_tables.go b/vendor/google.golang.org/protobuf/internal/impl/codec_tables.go index 576dcf3a..13077751 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_tables.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_tables.go @@ -197,7 +197,7 @@ func fieldCoder(fd protoreflect.FieldDescriptor, ft reflect.Type) (*MessageInfo, return getMessageInfo(ft), makeMessageFieldCoder(fd, ft) case fd.Kind() == protoreflect.GroupKind: return getMessageInfo(ft), makeGroupFieldCoder(fd, ft) - case fd.Syntax() == protoreflect.Proto3 && fd.ContainingOneof() == nil: + case !fd.HasPresence() && fd.ContainingOneof() == nil: // Populated oneof fields always encode even if set to the zero value, // which normally are not encoded in proto3. switch fd.Kind() { diff --git a/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go b/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go index 757642e2..077712c2 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go +++ b/vendor/google.golang.org/protobuf/internal/impl/codec_unsafe.go @@ -2,9 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine -// +build !purego,!appengine - package impl // When using unsafe pointers, we can just treat enum values as int32s. diff --git a/vendor/google.golang.org/protobuf/internal/impl/convert.go b/vendor/google.golang.org/protobuf/internal/impl/convert.go index 185ef2ef..f72ddd88 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/convert.go +++ b/vendor/google.golang.org/protobuf/internal/impl/convert.go @@ -14,7 +14,7 @@ import ( // unwrapper unwraps the value to the underlying value. // This is implemented by List and Map. type unwrapper interface { - protoUnwrap() interface{} + protoUnwrap() any } // A Converter coverts to/from Go reflect.Value types and protobuf protoreflect.Value types. @@ -322,7 +322,7 @@ func (c *stringConverter) PBValueOf(v reflect.Value) protoreflect.Value { return protoreflect.ValueOfString(v.Convert(stringType).String()) } func (c *stringConverter) GoValueOf(v protoreflect.Value) reflect.Value { - // pref.Value.String never panics, so we go through an interface + // protoreflect.Value.String never panics, so we go through an interface // conversion here to check the type. s := v.Interface().(string) if c.goType.Kind() == reflect.Slice && s == "" { diff --git a/vendor/google.golang.org/protobuf/internal/impl/convert_list.go b/vendor/google.golang.org/protobuf/internal/impl/convert_list.go index f8913651..18cb96fd 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/convert_list.go +++ b/vendor/google.golang.org/protobuf/internal/impl/convert_list.go @@ -136,6 +136,6 @@ func (ls *listReflect) NewElement() protoreflect.Value { func (ls *listReflect) IsValid() bool { return !ls.v.IsNil() } -func (ls *listReflect) protoUnwrap() interface{} { +func (ls *listReflect) protoUnwrap() any { return ls.v.Interface() } diff --git a/vendor/google.golang.org/protobuf/internal/impl/convert_map.go b/vendor/google.golang.org/protobuf/internal/impl/convert_map.go index f30b0a05..304244a6 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/convert_map.go +++ b/vendor/google.golang.org/protobuf/internal/impl/convert_map.go @@ -116,6 +116,6 @@ func (ms *mapReflect) NewValue() protoreflect.Value { func (ms *mapReflect) IsValid() bool { return !ms.v.IsNil() } -func (ms *mapReflect) protoUnwrap() interface{} { +func (ms *mapReflect) protoUnwrap() any { return ms.v.Interface() } diff --git a/vendor/google.golang.org/protobuf/internal/impl/encode.go b/vendor/google.golang.org/protobuf/internal/impl/encode.go index 845c67d6..6254f5de 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/encode.go +++ b/vendor/google.golang.org/protobuf/internal/impl/encode.go @@ -10,7 +10,7 @@ import ( "sync/atomic" "google.golang.org/protobuf/internal/flags" - proto "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/proto" piface "google.golang.org/protobuf/runtime/protoiface" ) @@ -49,8 +49,11 @@ func (mi *MessageInfo) sizePointer(p pointer, opts marshalOptions) (size int) { return 0 } if opts.UseCachedSize() && mi.sizecacheOffset.IsValid() { - if size := atomic.LoadInt32(p.Apply(mi.sizecacheOffset).Int32()); size >= 0 { - return int(size) + // The size cache contains the size + 1, to allow the + // zero value to be invalid, while also allowing for a + // 0 size to be cached. + if size := atomic.LoadInt32(p.Apply(mi.sizecacheOffset).Int32()); size > 0 { + return int(size - 1) } } return mi.sizePointerSlow(p, opts) @@ -60,7 +63,7 @@ func (mi *MessageInfo) sizePointerSlow(p pointer, opts marshalOptions) (size int if flags.ProtoLegacy && mi.isMessageSet { size = sizeMessageSet(mi, p, opts) if mi.sizecacheOffset.IsValid() { - atomic.StoreInt32(p.Apply(mi.sizecacheOffset).Int32(), int32(size)) + atomic.StoreInt32(p.Apply(mi.sizecacheOffset).Int32(), int32(size+1)) } return size } @@ -84,13 +87,16 @@ func (mi *MessageInfo) sizePointerSlow(p pointer, opts marshalOptions) (size int } } if mi.sizecacheOffset.IsValid() { - if size > math.MaxInt32 { + if size > (math.MaxInt32 - 1) { // The size is too large for the int32 sizecache field. // We will need to recompute the size when encoding; // unfortunately expensive, but better than invalid output. - atomic.StoreInt32(p.Apply(mi.sizecacheOffset).Int32(), -1) + atomic.StoreInt32(p.Apply(mi.sizecacheOffset).Int32(), 0) } else { - atomic.StoreInt32(p.Apply(mi.sizecacheOffset).Int32(), int32(size)) + // The size cache contains the size + 1, to allow the + // zero value to be invalid, while also allowing for a + // 0 size to be cached. + atomic.StoreInt32(p.Apply(mi.sizecacheOffset).Int32(), int32(size+1)) } } return size @@ -149,6 +155,14 @@ func (mi *MessageInfo) marshalAppendPointer(b []byte, p pointer, opts marshalOpt return b, nil } +// fullyLazyExtensions returns true if we should attempt to keep extensions lazy over size and marshal. +func fullyLazyExtensions(opts marshalOptions) bool { + // When deterministic marshaling is requested, force an unmarshal for lazy + // extensions to produce a deterministic result, instead of passing through + // bytes lazily that may or may not match what Go Protobuf would produce. + return opts.flags&piface.MarshalDeterministic == 0 +} + func (mi *MessageInfo) sizeExtensions(ext *map[int32]ExtensionField, opts marshalOptions) (n int) { if ext == nil { return 0 @@ -158,6 +172,14 @@ func (mi *MessageInfo) sizeExtensions(ext *map[int32]ExtensionField, opts marsha if xi.funcs.size == nil { continue } + if fullyLazyExtensions(opts) { + // Don't expand the extension, instead use the buffer to calculate size + if lb := x.lazyBuffer(); lb != nil { + // We got hold of the buffer, so it's still lazy. + n += len(lb) + continue + } + } n += xi.funcs.size(x.Value(), xi.tagsize, opts) } return n @@ -176,6 +198,13 @@ func (mi *MessageInfo) appendExtensions(b []byte, ext *map[int32]ExtensionField, var err error for _, x := range *ext { xi := getExtensionFieldInfo(x.Type()) + if fullyLazyExtensions(opts) { + // Don't expand the extension if it's still in wire format, instead use the buffer content. + if lb := x.lazyBuffer(); lb != nil { + b = append(b, lb...) + continue + } + } b, err = xi.funcs.marshal(b, x.Value(), xi.wiretag, opts) } return b, err @@ -191,6 +220,13 @@ func (mi *MessageInfo) appendExtensions(b []byte, ext *map[int32]ExtensionField, for _, k := range keys { x := (*ext)[int32(k)] xi := getExtensionFieldInfo(x.Type()) + if fullyLazyExtensions(opts) { + // Don't expand the extension if it's still in wire format, instead use the buffer content. + if lb := x.lazyBuffer(); lb != nil { + b = append(b, lb...) + continue + } + } b, err = xi.funcs.marshal(b, x.Value(), xi.wiretag, opts) if err != nil { return b, err diff --git a/vendor/google.golang.org/protobuf/internal/impl/equal.go b/vendor/google.golang.org/protobuf/internal/impl/equal.go new file mode 100644 index 00000000..9f6c32a7 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/impl/equal.go @@ -0,0 +1,224 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package impl + +import ( + "bytes" + + "google.golang.org/protobuf/encoding/protowire" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/runtime/protoiface" +) + +func equal(in protoiface.EqualInput) protoiface.EqualOutput { + return protoiface.EqualOutput{Equal: equalMessage(in.MessageA, in.MessageB)} +} + +// equalMessage is a fast-path variant of protoreflect.equalMessage. +// It takes advantage of the internal messageState type to avoid +// unnecessary allocations, type assertions. +func equalMessage(mx, my protoreflect.Message) bool { + if mx == nil || my == nil { + return mx == my + } + if mx.Descriptor() != my.Descriptor() { + return false + } + + msx, ok := mx.(*messageState) + if !ok { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + msy, ok := my.(*messageState) + if !ok { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + + mi := msx.messageInfo() + miy := msy.messageInfo() + if mi != miy { + return protoreflect.ValueOfMessage(mx).Equal(protoreflect.ValueOfMessage(my)) + } + mi.init() + // Compares regular fields + // Modified Message.Range code that compares two messages of the same type + // while going over the fields. + for _, ri := range mi.rangeInfos { + var fd protoreflect.FieldDescriptor + var vx, vy protoreflect.Value + + switch ri := ri.(type) { + case *fieldInfo: + hx := ri.has(msx.pointer()) + hy := ri.has(msy.pointer()) + if hx != hy { + return false + } + if !hx { + continue + } + fd = ri.fieldDesc + vx = ri.get(msx.pointer()) + vy = ri.get(msy.pointer()) + case *oneofInfo: + fnx := ri.which(msx.pointer()) + fny := ri.which(msy.pointer()) + if fnx != fny { + return false + } + if fnx <= 0 { + continue + } + fi := mi.fields[fnx] + fd = fi.fieldDesc + vx = fi.get(msx.pointer()) + vy = fi.get(msy.pointer()) + } + + if !equalValue(fd, vx, vy) { + return false + } + } + + // Compare extensions. + // This is more complicated because mx or my could have empty/nil extension maps, + // however some populated extension map values are equal to nil extension maps. + emx := mi.extensionMap(msx.pointer()) + emy := mi.extensionMap(msy.pointer()) + if emx != nil { + for k, x := range *emx { + xd := x.Type().TypeDescriptor() + xv := x.Value() + var y ExtensionField + ok := false + if emy != nil { + y, ok = (*emy)[k] + } + // We need to treat empty lists as equal to nil values + if emy == nil || !ok { + if xd.IsList() && xv.List().Len() == 0 { + continue + } + return false + } + + if !equalValue(xd, xv, y.Value()) { + return false + } + } + } + if emy != nil { + // emy may have extensions emx does not have, need to check them as well + for k, y := range *emy { + if emx != nil { + // emx has the field, so we already checked it + if _, ok := (*emx)[k]; ok { + continue + } + } + // Empty lists are equal to nil + if y.Type().TypeDescriptor().IsList() && y.Value().List().Len() == 0 { + continue + } + + // Cant be equal if the extension is populated + return false + } + } + + return equalUnknown(mx.GetUnknown(), my.GetUnknown()) +} + +func equalValue(fd protoreflect.FieldDescriptor, vx, vy protoreflect.Value) bool { + // slow path + if fd.Kind() != protoreflect.MessageKind { + return vx.Equal(vy) + } + + // fast path special cases + if fd.IsMap() { + if fd.MapValue().Kind() == protoreflect.MessageKind { + return equalMessageMap(vx.Map(), vy.Map()) + } + return vx.Equal(vy) + } + + if fd.IsList() { + return equalMessageList(vx.List(), vy.List()) + } + + return equalMessage(vx.Message(), vy.Message()) +} + +// Mostly copied from protoreflect.equalMap. +// This variant only works for messages as map types. +// All other map types should be handled via Value.Equal. +func equalMessageMap(mx, my protoreflect.Map) bool { + if mx.Len() != my.Len() { + return false + } + equal := true + mx.Range(func(k protoreflect.MapKey, vx protoreflect.Value) bool { + if !my.Has(k) { + equal = false + return false + } + vy := my.Get(k) + equal = equalMessage(vx.Message(), vy.Message()) + return equal + }) + return equal +} + +// Mostly copied from protoreflect.equalList. +// The only change is the usage of equalImpl instead of protoreflect.equalValue. +func equalMessageList(lx, ly protoreflect.List) bool { + if lx.Len() != ly.Len() { + return false + } + for i := 0; i < lx.Len(); i++ { + // We only operate on messages here since equalImpl will not call us in any other case. + if !equalMessage(lx.Get(i).Message(), ly.Get(i).Message()) { + return false + } + } + return true +} + +// equalUnknown compares unknown fields by direct comparison on the raw bytes +// of each individual field number. +// Copied from protoreflect.equalUnknown. +func equalUnknown(x, y protoreflect.RawFields) bool { + if len(x) != len(y) { + return false + } + if bytes.Equal([]byte(x), []byte(y)) { + return true + } + + mx := make(map[protoreflect.FieldNumber]protoreflect.RawFields) + my := make(map[protoreflect.FieldNumber]protoreflect.RawFields) + for len(x) > 0 { + fnum, _, n := protowire.ConsumeField(x) + mx[fnum] = append(mx[fnum], x[:n]...) + x = x[n:] + } + for len(y) > 0 { + fnum, _, n := protowire.ConsumeField(y) + my[fnum] = append(my[fnum], y[:n]...) + y = y[n:] + } + if len(mx) != len(my) { + return false + } + + for k, v1 := range mx { + if v2, ok := my[k]; !ok || !bytes.Equal([]byte(v1), []byte(v2)) { + return false + } + } + + return true +} diff --git a/vendor/google.golang.org/protobuf/internal/impl/extension.go b/vendor/google.golang.org/protobuf/internal/impl/extension.go index cb25b0ba..e31249f6 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/extension.go +++ b/vendor/google.golang.org/protobuf/internal/impl/extension.go @@ -53,7 +53,7 @@ type ExtensionInfo struct { // type returned by InterfaceOf may not be identical. // // Deprecated: Use InterfaceOf(xt.Zero()) instead. - ExtensionType interface{} + ExtensionType any // Field is the field number of the extension. // @@ -95,16 +95,16 @@ func (xi *ExtensionInfo) New() protoreflect.Value { func (xi *ExtensionInfo) Zero() protoreflect.Value { return xi.lazyInit().Zero() } -func (xi *ExtensionInfo) ValueOf(v interface{}) protoreflect.Value { +func (xi *ExtensionInfo) ValueOf(v any) protoreflect.Value { return xi.lazyInit().PBValueOf(reflect.ValueOf(v)) } -func (xi *ExtensionInfo) InterfaceOf(v protoreflect.Value) interface{} { +func (xi *ExtensionInfo) InterfaceOf(v protoreflect.Value) any { return xi.lazyInit().GoValueOf(v).Interface() } func (xi *ExtensionInfo) IsValidValue(v protoreflect.Value) bool { return xi.lazyInit().IsValidPB(v) } -func (xi *ExtensionInfo) IsValidInterface(v interface{}) bool { +func (xi *ExtensionInfo) IsValidInterface(v any) bool { return xi.lazyInit().IsValidGo(reflect.ValueOf(v)) } func (xi *ExtensionInfo) TypeDescriptor() protoreflect.ExtensionTypeDescriptor { diff --git a/vendor/google.golang.org/protobuf/internal/impl/legacy_enum.go b/vendor/google.golang.org/protobuf/internal/impl/legacy_enum.go index c2a803bb..81b2b1a7 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/legacy_enum.go +++ b/vendor/google.golang.org/protobuf/internal/impl/legacy_enum.go @@ -97,7 +97,7 @@ func (e *legacyEnumWrapper) Number() protoreflect.EnumNumber { func (e *legacyEnumWrapper) ProtoReflect() protoreflect.Enum { return e } -func (e *legacyEnumWrapper) protoUnwrap() interface{} { +func (e *legacyEnumWrapper) protoUnwrap() any { v := reflect.New(e.goTyp).Elem() v.SetInt(int64(e.num)) return v.Interface() @@ -167,6 +167,7 @@ func aberrantLoadEnumDesc(t reflect.Type) protoreflect.EnumDescriptor { ed := &filedesc.Enum{L2: new(filedesc.EnumL2)} ed.L0.FullName = AberrantDeriveFullName(t) // e.g., github_com.user.repo.MyEnum ed.L0.ParentFile = filedesc.SurrogateProto3 + ed.L1.EditionFeatures = ed.L0.ParentFile.L1.EditionFeatures ed.L2.Values.List = append(ed.L2.Values.List, filedesc.EnumValue{}) // TODO: Use the presence of a UnmarshalJSON method to determine proto2? diff --git a/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go b/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go index 87b30d05..b6849d66 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go +++ b/vendor/google.golang.org/protobuf/internal/impl/legacy_extension.go @@ -118,7 +118,7 @@ func (xi *ExtensionInfo) initFromLegacy() { xd.L1.Number = protoreflect.FieldNumber(xi.Field) xd.L1.Cardinality = fd.L1.Cardinality xd.L1.Kind = fd.L1.Kind - xd.L2.IsPacked = fd.L1.IsPacked + xd.L1.EditionFeatures = fd.L1.EditionFeatures xd.L2.Default = fd.L1.Default xd.L1.Extendee = Export{}.MessageDescriptorOf(xi.ExtendedType) xd.L2.Enum = ed @@ -160,6 +160,7 @@ func (x placeholderExtension) HasPresence() bool func (x placeholderExtension) HasOptionalKeyword() bool { return false } func (x placeholderExtension) IsExtension() bool { return true } func (x placeholderExtension) IsWeak() bool { return false } +func (x placeholderExtension) IsLazy() bool { return false } func (x placeholderExtension) IsPacked() bool { return false } func (x placeholderExtension) IsList() bool { return false } func (x placeholderExtension) IsMap() bool { return false } diff --git a/vendor/google.golang.org/protobuf/internal/impl/legacy_file.go b/vendor/google.golang.org/protobuf/internal/impl/legacy_file.go index 9ab09108..b649f112 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/legacy_file.go +++ b/vendor/google.golang.org/protobuf/internal/impl/legacy_file.go @@ -7,7 +7,7 @@ package impl import ( "bytes" "compress/gzip" - "io/ioutil" + "io" "sync" "google.golang.org/protobuf/internal/filedesc" @@ -51,7 +51,7 @@ func legacyLoadFileDesc(b []byte) protoreflect.FileDescriptor { if err != nil { panic(err) } - b2, err := ioutil.ReadAll(zr) + b2, err := io.ReadAll(zr) if err != nil { panic(err) } diff --git a/vendor/google.golang.org/protobuf/internal/impl/legacy_message.go b/vendor/google.golang.org/protobuf/internal/impl/legacy_message.go index 61c483fa..bf0b6049 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/legacy_message.go +++ b/vendor/google.golang.org/protobuf/internal/impl/legacy_message.go @@ -204,15 +204,21 @@ func aberrantLoadMessageDescReentrant(t reflect.Type, name protoreflect.FullName } } + md.L1.EditionFeatures = md.L0.ParentFile.L1.EditionFeatures // Obtain a list of oneof wrapper types. var oneofWrappers []reflect.Type - for _, method := range []string{"XXX_OneofFuncs", "XXX_OneofWrappers"} { - if fn, ok := t.MethodByName(method); ok { - for _, v := range fn.Func.Call([]reflect.Value{reflect.Zero(fn.Type.In(0))}) { - if vs, ok := v.Interface().([]interface{}); ok { - for _, v := range vs { - oneofWrappers = append(oneofWrappers, reflect.TypeOf(v)) - } + methods := make([]reflect.Method, 0, 2) + if m, ok := t.MethodByName("XXX_OneofFuncs"); ok { + methods = append(methods, m) + } + if m, ok := t.MethodByName("XXX_OneofWrappers"); ok { + methods = append(methods, m) + } + for _, fn := range methods { + for _, v := range fn.Func.Call([]reflect.Value{reflect.Zero(fn.Type.In(0))}) { + if vs, ok := v.Interface().([]any); ok { + for _, v := range vs { + oneofWrappers = append(oneofWrappers, reflect.TypeOf(v)) } } } @@ -245,6 +251,7 @@ func aberrantLoadMessageDescReentrant(t reflect.Type, name protoreflect.FullName od := &md.L2.Oneofs.List[n] od.L0.FullName = md.FullName().Append(protoreflect.Name(tag)) od.L0.ParentFile = md.L0.ParentFile + od.L1.EditionFeatures = md.L1.EditionFeatures od.L0.Parent = md od.L0.Index = n @@ -255,6 +262,7 @@ func aberrantLoadMessageDescReentrant(t reflect.Type, name protoreflect.FullName aberrantAppendField(md, f.Type, tag, "", "") fd := &md.L2.Fields.List[len(md.L2.Fields.List)-1] fd.L1.ContainingOneof = od + fd.L1.EditionFeatures = od.L1.EditionFeatures od.L1.Fields.List = append(od.L1.Fields.List, fd) } } @@ -302,14 +310,14 @@ func aberrantAppendField(md *filedesc.Message, goType reflect.Type, tag, tagKey, fd.L0.Parent = md fd.L0.Index = n - if fd.L1.IsWeak || fd.L1.HasPacked { + if fd.L1.IsWeak || fd.L1.EditionFeatures.IsPacked { fd.L1.Options = func() protoreflect.ProtoMessage { opts := descopts.Field.ProtoReflect().New() if fd.L1.IsWeak { opts.Set(opts.Descriptor().Fields().ByName("weak"), protoreflect.ValueOfBool(true)) } - if fd.L1.HasPacked { - opts.Set(opts.Descriptor().Fields().ByName("packed"), protoreflect.ValueOfBool(fd.L1.IsPacked)) + if fd.L1.EditionFeatures.IsPacked { + opts.Set(opts.Descriptor().Fields().ByName("packed"), protoreflect.ValueOfBool(fd.L1.EditionFeatures.IsPacked)) } return opts.Interface() } @@ -339,6 +347,7 @@ func aberrantAppendField(md *filedesc.Message, goType reflect.Type, tag, tagKey, md2.L0.ParentFile = md.L0.ParentFile md2.L0.Parent = md md2.L0.Index = n + md2.L1.EditionFeatures = md.L1.EditionFeatures md2.L1.IsMapEntry = true md2.L2.Options = func() protoreflect.ProtoMessage { @@ -558,6 +567,6 @@ func (m aberrantMessage) IsValid() bool { func (m aberrantMessage) ProtoMethods() *protoiface.Methods { return aberrantProtoMethods } -func (m aberrantMessage) protoUnwrap() interface{} { +func (m aberrantMessage) protoUnwrap() any { return m.v.Interface() } diff --git a/vendor/google.golang.org/protobuf/internal/impl/message.go b/vendor/google.golang.org/protobuf/internal/impl/message.go index 4f5fb67a..741b5ed2 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/message.go +++ b/vendor/google.golang.org/protobuf/internal/impl/message.go @@ -30,12 +30,12 @@ type MessageInfo struct { // Desc is the underlying message descriptor type and must be populated. Desc protoreflect.MessageDescriptor - // Exporter must be provided in a purego environment in order to provide - // access to unexported fields. + // Deprecated: Exporter will be removed the next time we bump + // protoimpl.GenVersion. See https://github.com/golang/protobuf/issues/1640 Exporter exporter // OneofWrappers is list of pointers to oneof wrapper struct types. - OneofWrappers []interface{} + OneofWrappers []any initMu sync.Mutex // protects all unexported fields initDone uint32 @@ -47,7 +47,7 @@ type MessageInfo struct { // exporter is a function that returns a reference to the ith field of v, // where v is a pointer to a struct. It returns nil if it does not support // exporting the requested field (e.g., already exported). -type exporter func(v interface{}, i int) interface{} +type exporter func(v any, i int) any // getMessageInfo returns the MessageInfo for any message type that // is generated by our implementation of protoc-gen-go (for v2 and on). @@ -192,12 +192,17 @@ fieldLoop: // Derive a mapping of oneof wrappers to fields. oneofWrappers := mi.OneofWrappers - for _, method := range []string{"XXX_OneofFuncs", "XXX_OneofWrappers"} { - if fn, ok := reflect.PtrTo(t).MethodByName(method); ok { - for _, v := range fn.Func.Call([]reflect.Value{reflect.Zero(fn.Type.In(0))}) { - if vs, ok := v.Interface().([]interface{}); ok { - oneofWrappers = vs - } + methods := make([]reflect.Method, 0, 2) + if m, ok := reflect.PtrTo(t).MethodByName("XXX_OneofFuncs"); ok { + methods = append(methods, m) + } + if m, ok := reflect.PtrTo(t).MethodByName("XXX_OneofWrappers"); ok { + methods = append(methods, m) + } + for _, fn := range methods { + for _, v := range fn.Func.Call([]reflect.Value{reflect.Zero(fn.Type.In(0))}) { + if vs, ok := v.Interface().([]any); ok { + oneofWrappers = vs } } } @@ -251,7 +256,7 @@ func (mi *MessageInfo) Message(i int) protoreflect.MessageType { type mapEntryType struct { desc protoreflect.MessageDescriptor - valType interface{} // zero value of enum or message type + valType any // zero value of enum or message type } func (mt mapEntryType) New() protoreflect.Message { diff --git a/vendor/google.golang.org/protobuf/internal/impl/message_reflect.go b/vendor/google.golang.org/protobuf/internal/impl/message_reflect.go index d9ea010b..ecb4623d 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/message_reflect.go +++ b/vendor/google.golang.org/protobuf/internal/impl/message_reflect.go @@ -20,7 +20,7 @@ type reflectMessageInfo struct { // fieldTypes contains the zero value of an enum or message field. // For lists, it contains the element type. // For maps, it contains the entry value type. - fieldTypes map[protoreflect.FieldNumber]interface{} + fieldTypes map[protoreflect.FieldNumber]any // denseFields is a subset of fields where: // 0 < fieldDesc.Number() < len(denseFields) @@ -28,7 +28,7 @@ type reflectMessageInfo struct { denseFields []*fieldInfo // rangeInfos is a list of all fields (not belonging to a oneof) and oneofs. - rangeInfos []interface{} // either *fieldInfo or *oneofInfo + rangeInfos []any // either *fieldInfo or *oneofInfo getUnknown func(pointer) protoreflect.RawFields setUnknown func(pointer, protoreflect.RawFields) @@ -224,7 +224,7 @@ func (mi *MessageInfo) makeFieldTypes(si structInfo) { } if ft != nil { if mi.fieldTypes == nil { - mi.fieldTypes = make(map[protoreflect.FieldNumber]interface{}) + mi.fieldTypes = make(map[protoreflect.FieldNumber]any) } mi.fieldTypes[fd.Number()] = reflect.Zero(ft).Interface() } @@ -247,39 +247,39 @@ func (m *extensionMap) Range(f func(protoreflect.FieldDescriptor, protoreflect.V } } } -func (m *extensionMap) Has(xt protoreflect.ExtensionType) (ok bool) { +func (m *extensionMap) Has(xd protoreflect.ExtensionTypeDescriptor) (ok bool) { if m == nil { return false } - xd := xt.TypeDescriptor() x, ok := (*m)[int32(xd.Number())] if !ok { return false } + if x.isUnexpandedLazy() { + // Avoid calling x.Value(), which triggers a lazy unmarshal. + return true + } switch { case xd.IsList(): return x.Value().List().Len() > 0 case xd.IsMap(): return x.Value().Map().Len() > 0 - case xd.Message() != nil: - return x.Value().Message().IsValid() } return true } -func (m *extensionMap) Clear(xt protoreflect.ExtensionType) { - delete(*m, int32(xt.TypeDescriptor().Number())) +func (m *extensionMap) Clear(xd protoreflect.ExtensionTypeDescriptor) { + delete(*m, int32(xd.Number())) } -func (m *extensionMap) Get(xt protoreflect.ExtensionType) protoreflect.Value { - xd := xt.TypeDescriptor() +func (m *extensionMap) Get(xd protoreflect.ExtensionTypeDescriptor) protoreflect.Value { if m != nil { if x, ok := (*m)[int32(xd.Number())]; ok { return x.Value() } } - return xt.Zero() + return xd.Type().Zero() } -func (m *extensionMap) Set(xt protoreflect.ExtensionType, v protoreflect.Value) { - xd := xt.TypeDescriptor() +func (m *extensionMap) Set(xd protoreflect.ExtensionTypeDescriptor, v protoreflect.Value) { + xt := xd.Type() isValid := true switch { case !xt.IsValidValue(v): @@ -292,7 +292,7 @@ func (m *extensionMap) Set(xt protoreflect.ExtensionType, v protoreflect.Value) isValid = v.Message().IsValid() } if !isValid { - panic(fmt.Sprintf("%v: assigning invalid value", xt.TypeDescriptor().FullName())) + panic(fmt.Sprintf("%v: assigning invalid value", xd.FullName())) } if *m == nil { @@ -302,16 +302,15 @@ func (m *extensionMap) Set(xt protoreflect.ExtensionType, v protoreflect.Value) x.Set(xt, v) (*m)[int32(xd.Number())] = x } -func (m *extensionMap) Mutable(xt protoreflect.ExtensionType) protoreflect.Value { - xd := xt.TypeDescriptor() +func (m *extensionMap) Mutable(xd protoreflect.ExtensionTypeDescriptor) protoreflect.Value { if xd.Kind() != protoreflect.MessageKind && xd.Kind() != protoreflect.GroupKind && !xd.IsList() && !xd.IsMap() { panic("invalid Mutable on field with non-composite type") } if x, ok := (*m)[int32(xd.Number())]; ok { return x.Value() } - v := xt.New() - m.Set(xt, v) + v := xd.Type().New() + m.Set(xd, v) return v } @@ -394,7 +393,7 @@ var ( // MessageOf returns a reflective view over a message. The input must be a // pointer to a named Go struct. If the provided type has a ProtoReflect method, // it must be implemented by calling this method. -func (mi *MessageInfo) MessageOf(m interface{}) protoreflect.Message { +func (mi *MessageInfo) MessageOf(m any) protoreflect.Message { if reflect.TypeOf(m) != mi.GoReflectType { panic(fmt.Sprintf("type mismatch: got %T, want %v", m, mi.GoReflectType)) } @@ -422,13 +421,13 @@ func (m *messageIfaceWrapper) Reset() { func (m *messageIfaceWrapper) ProtoReflect() protoreflect.Message { return (*messageReflectWrapper)(m) } -func (m *messageIfaceWrapper) protoUnwrap() interface{} { +func (m *messageIfaceWrapper) protoUnwrap() any { return m.p.AsIfaceOf(m.mi.GoReflectType.Elem()) } // checkField verifies that the provided field descriptor is valid. // Exactly one of the returned values is populated. -func (mi *MessageInfo) checkField(fd protoreflect.FieldDescriptor) (*fieldInfo, protoreflect.ExtensionType) { +func (mi *MessageInfo) checkField(fd protoreflect.FieldDescriptor) (*fieldInfo, protoreflect.ExtensionTypeDescriptor) { var fi *fieldInfo if n := fd.Number(); 0 < n && int(n) < len(mi.denseFields) { fi = mi.denseFields[n] @@ -457,7 +456,7 @@ func (mi *MessageInfo) checkField(fd protoreflect.FieldDescriptor) (*fieldInfo, if !ok { panic(fmt.Sprintf("extension %v does not implement protoreflect.ExtensionTypeDescriptor", fd.FullName())) } - return nil, xtd.Type() + return nil, xtd } panic(fmt.Sprintf("field %v is invalid", fd.FullName())) } diff --git a/vendor/google.golang.org/protobuf/internal/impl/message_reflect_field.go b/vendor/google.golang.org/protobuf/internal/impl/message_reflect_field.go index 5e736c60..986322b1 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/message_reflect_field.go +++ b/vendor/google.golang.org/protobuf/internal/impl/message_reflect_field.go @@ -538,6 +538,6 @@ func isZero(v reflect.Value) bool { } return true default: - panic(&reflect.ValueError{"reflect.Value.IsZero", v.Kind()}) + panic(&reflect.ValueError{Method: "reflect.Value.IsZero", Kind: v.Kind()}) } } diff --git a/vendor/google.golang.org/protobuf/internal/impl/message_reflect_gen.go b/vendor/google.golang.org/protobuf/internal/impl/message_reflect_gen.go index 741d6e5b..99dc23c6 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/message_reflect_gen.go +++ b/vendor/google.golang.org/protobuf/internal/impl/message_reflect_gen.go @@ -23,12 +23,13 @@ func (m *messageState) New() protoreflect.Message { func (m *messageState) Interface() protoreflect.ProtoMessage { return m.protoUnwrap().(protoreflect.ProtoMessage) } -func (m *messageState) protoUnwrap() interface{} { +func (m *messageState) protoUnwrap() any { return m.pointer().AsIfaceOf(m.messageInfo().GoReflectType.Elem()) } func (m *messageState) ProtoMethods() *protoiface.Methods { - m.messageInfo().init() - return &m.messageInfo().methods + mi := m.messageInfo() + mi.init() + return &mi.methods } // ProtoMessageInfo is a pseudo-internal API for allowing the v1 code @@ -41,8 +42,9 @@ func (m *messageState) ProtoMessageInfo() *MessageInfo { } func (m *messageState) Range(f func(protoreflect.FieldDescriptor, protoreflect.Value) bool) { - m.messageInfo().init() - for _, ri := range m.messageInfo().rangeInfos { + mi := m.messageInfo() + mi.init() + for _, ri := range mi.rangeInfos { switch ri := ri.(type) { case *fieldInfo: if ri.has(m.pointer()) { @@ -52,77 +54,86 @@ func (m *messageState) Range(f func(protoreflect.FieldDescriptor, protoreflect.V } case *oneofInfo: if n := ri.which(m.pointer()); n > 0 { - fi := m.messageInfo().fields[n] + fi := mi.fields[n] if !f(fi.fieldDesc, fi.get(m.pointer())) { return } } } } - m.messageInfo().extensionMap(m.pointer()).Range(f) + mi.extensionMap(m.pointer()).Range(f) } func (m *messageState) Has(fd protoreflect.FieldDescriptor) bool { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { return fi.has(m.pointer()) } else { - return m.messageInfo().extensionMap(m.pointer()).Has(xt) + return mi.extensionMap(m.pointer()).Has(xd) } } func (m *messageState) Clear(fd protoreflect.FieldDescriptor) { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { fi.clear(m.pointer()) } else { - m.messageInfo().extensionMap(m.pointer()).Clear(xt) + mi.extensionMap(m.pointer()).Clear(xd) } } func (m *messageState) Get(fd protoreflect.FieldDescriptor) protoreflect.Value { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { return fi.get(m.pointer()) } else { - return m.messageInfo().extensionMap(m.pointer()).Get(xt) + return mi.extensionMap(m.pointer()).Get(xd) } } func (m *messageState) Set(fd protoreflect.FieldDescriptor, v protoreflect.Value) { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { fi.set(m.pointer(), v) } else { - m.messageInfo().extensionMap(m.pointer()).Set(xt, v) + mi.extensionMap(m.pointer()).Set(xd, v) } } func (m *messageState) Mutable(fd protoreflect.FieldDescriptor) protoreflect.Value { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { return fi.mutable(m.pointer()) } else { - return m.messageInfo().extensionMap(m.pointer()).Mutable(xt) + return mi.extensionMap(m.pointer()).Mutable(xd) } } func (m *messageState) NewField(fd protoreflect.FieldDescriptor) protoreflect.Value { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { return fi.newField() } else { - return xt.New() + return xd.Type().New() } } func (m *messageState) WhichOneof(od protoreflect.OneofDescriptor) protoreflect.FieldDescriptor { - m.messageInfo().init() - if oi := m.messageInfo().oneofs[od.Name()]; oi != nil && oi.oneofDesc == od { + mi := m.messageInfo() + mi.init() + if oi := mi.oneofs[od.Name()]; oi != nil && oi.oneofDesc == od { return od.Fields().ByNumber(oi.which(m.pointer())) } panic("invalid oneof descriptor " + string(od.FullName()) + " for message " + string(m.Descriptor().FullName())) } func (m *messageState) GetUnknown() protoreflect.RawFields { - m.messageInfo().init() - return m.messageInfo().getUnknown(m.pointer()) + mi := m.messageInfo() + mi.init() + return mi.getUnknown(m.pointer()) } func (m *messageState) SetUnknown(b protoreflect.RawFields) { - m.messageInfo().init() - m.messageInfo().setUnknown(m.pointer(), b) + mi := m.messageInfo() + mi.init() + mi.setUnknown(m.pointer(), b) } func (m *messageState) IsValid() bool { return !m.pointer().IsNil() @@ -143,12 +154,13 @@ func (m *messageReflectWrapper) Interface() protoreflect.ProtoMessage { } return (*messageIfaceWrapper)(m) } -func (m *messageReflectWrapper) protoUnwrap() interface{} { +func (m *messageReflectWrapper) protoUnwrap() any { return m.pointer().AsIfaceOf(m.messageInfo().GoReflectType.Elem()) } func (m *messageReflectWrapper) ProtoMethods() *protoiface.Methods { - m.messageInfo().init() - return &m.messageInfo().methods + mi := m.messageInfo() + mi.init() + return &mi.methods } // ProtoMessageInfo is a pseudo-internal API for allowing the v1 code @@ -161,8 +173,9 @@ func (m *messageReflectWrapper) ProtoMessageInfo() *MessageInfo { } func (m *messageReflectWrapper) Range(f func(protoreflect.FieldDescriptor, protoreflect.Value) bool) { - m.messageInfo().init() - for _, ri := range m.messageInfo().rangeInfos { + mi := m.messageInfo() + mi.init() + for _, ri := range mi.rangeInfos { switch ri := ri.(type) { case *fieldInfo: if ri.has(m.pointer()) { @@ -172,77 +185,86 @@ func (m *messageReflectWrapper) Range(f func(protoreflect.FieldDescriptor, proto } case *oneofInfo: if n := ri.which(m.pointer()); n > 0 { - fi := m.messageInfo().fields[n] + fi := mi.fields[n] if !f(fi.fieldDesc, fi.get(m.pointer())) { return } } } } - m.messageInfo().extensionMap(m.pointer()).Range(f) + mi.extensionMap(m.pointer()).Range(f) } func (m *messageReflectWrapper) Has(fd protoreflect.FieldDescriptor) bool { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { return fi.has(m.pointer()) } else { - return m.messageInfo().extensionMap(m.pointer()).Has(xt) + return mi.extensionMap(m.pointer()).Has(xd) } } func (m *messageReflectWrapper) Clear(fd protoreflect.FieldDescriptor) { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { fi.clear(m.pointer()) } else { - m.messageInfo().extensionMap(m.pointer()).Clear(xt) + mi.extensionMap(m.pointer()).Clear(xd) } } func (m *messageReflectWrapper) Get(fd protoreflect.FieldDescriptor) protoreflect.Value { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { return fi.get(m.pointer()) } else { - return m.messageInfo().extensionMap(m.pointer()).Get(xt) + return mi.extensionMap(m.pointer()).Get(xd) } } func (m *messageReflectWrapper) Set(fd protoreflect.FieldDescriptor, v protoreflect.Value) { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { fi.set(m.pointer(), v) } else { - m.messageInfo().extensionMap(m.pointer()).Set(xt, v) + mi.extensionMap(m.pointer()).Set(xd, v) } } func (m *messageReflectWrapper) Mutable(fd protoreflect.FieldDescriptor) protoreflect.Value { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { return fi.mutable(m.pointer()) } else { - return m.messageInfo().extensionMap(m.pointer()).Mutable(xt) + return mi.extensionMap(m.pointer()).Mutable(xd) } } func (m *messageReflectWrapper) NewField(fd protoreflect.FieldDescriptor) protoreflect.Value { - m.messageInfo().init() - if fi, xt := m.messageInfo().checkField(fd); fi != nil { + mi := m.messageInfo() + mi.init() + if fi, xd := mi.checkField(fd); fi != nil { return fi.newField() } else { - return xt.New() + return xd.Type().New() } } func (m *messageReflectWrapper) WhichOneof(od protoreflect.OneofDescriptor) protoreflect.FieldDescriptor { - m.messageInfo().init() - if oi := m.messageInfo().oneofs[od.Name()]; oi != nil && oi.oneofDesc == od { + mi := m.messageInfo() + mi.init() + if oi := mi.oneofs[od.Name()]; oi != nil && oi.oneofDesc == od { return od.Fields().ByNumber(oi.which(m.pointer())) } panic("invalid oneof descriptor " + string(od.FullName()) + " for message " + string(m.Descriptor().FullName())) } func (m *messageReflectWrapper) GetUnknown() protoreflect.RawFields { - m.messageInfo().init() - return m.messageInfo().getUnknown(m.pointer()) + mi := m.messageInfo() + mi.init() + return mi.getUnknown(m.pointer()) } func (m *messageReflectWrapper) SetUnknown(b protoreflect.RawFields) { - m.messageInfo().init() - m.messageInfo().setUnknown(m.pointer(), b) + mi := m.messageInfo() + mi.init() + mi.setUnknown(m.pointer(), b) } func (m *messageReflectWrapper) IsValid() bool { return !m.pointer().IsNil() diff --git a/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go b/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go deleted file mode 100644 index 4c491bdf..00000000 --- a/vendor/google.golang.org/protobuf/internal/impl/pointer_reflect.go +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package impl - -import ( - "fmt" - "reflect" - "sync" -) - -const UnsafeEnabled = false - -// Pointer is an opaque pointer type. -type Pointer interface{} - -// offset represents the offset to a struct field, accessible from a pointer. -// The offset is the field index into a struct. -type offset struct { - index int - export exporter -} - -// offsetOf returns a field offset for the struct field. -func offsetOf(f reflect.StructField, x exporter) offset { - if len(f.Index) != 1 { - panic("embedded structs are not supported") - } - if f.PkgPath == "" { - return offset{index: f.Index[0]} // field is already exported - } - if x == nil { - panic("exporter must be provided for unexported field") - } - return offset{index: f.Index[0], export: x} -} - -// IsValid reports whether the offset is valid. -func (f offset) IsValid() bool { return f.index >= 0 } - -// invalidOffset is an invalid field offset. -var invalidOffset = offset{index: -1} - -// zeroOffset is a noop when calling pointer.Apply. -var zeroOffset = offset{index: 0} - -// pointer is an abstract representation of a pointer to a struct or field. -type pointer struct{ v reflect.Value } - -// pointerOf returns p as a pointer. -func pointerOf(p Pointer) pointer { - return pointerOfIface(p) -} - -// pointerOfValue returns v as a pointer. -func pointerOfValue(v reflect.Value) pointer { - return pointer{v: v} -} - -// pointerOfIface returns the pointer portion of an interface. -func pointerOfIface(v interface{}) pointer { - return pointer{v: reflect.ValueOf(v)} -} - -// IsNil reports whether the pointer is nil. -func (p pointer) IsNil() bool { - return p.v.IsNil() -} - -// Apply adds an offset to the pointer to derive a new pointer -// to a specified field. The current pointer must be pointing at a struct. -func (p pointer) Apply(f offset) pointer { - if f.export != nil { - if v := reflect.ValueOf(f.export(p.v.Interface(), f.index)); v.IsValid() { - return pointer{v: v} - } - } - return pointer{v: p.v.Elem().Field(f.index).Addr()} -} - -// AsValueOf treats p as a pointer to an object of type t and returns the value. -// It is equivalent to reflect.ValueOf(p.AsIfaceOf(t)) -func (p pointer) AsValueOf(t reflect.Type) reflect.Value { - if got := p.v.Type().Elem(); got != t { - panic(fmt.Sprintf("invalid type: got %v, want %v", got, t)) - } - return p.v -} - -// AsIfaceOf treats p as a pointer to an object of type t and returns the value. -// It is equivalent to p.AsValueOf(t).Interface() -func (p pointer) AsIfaceOf(t reflect.Type) interface{} { - return p.AsValueOf(t).Interface() -} - -func (p pointer) Bool() *bool { return p.v.Interface().(*bool) } -func (p pointer) BoolPtr() **bool { return p.v.Interface().(**bool) } -func (p pointer) BoolSlice() *[]bool { return p.v.Interface().(*[]bool) } -func (p pointer) Int32() *int32 { return p.v.Interface().(*int32) } -func (p pointer) Int32Ptr() **int32 { return p.v.Interface().(**int32) } -func (p pointer) Int32Slice() *[]int32 { return p.v.Interface().(*[]int32) } -func (p pointer) Int64() *int64 { return p.v.Interface().(*int64) } -func (p pointer) Int64Ptr() **int64 { return p.v.Interface().(**int64) } -func (p pointer) Int64Slice() *[]int64 { return p.v.Interface().(*[]int64) } -func (p pointer) Uint32() *uint32 { return p.v.Interface().(*uint32) } -func (p pointer) Uint32Ptr() **uint32 { return p.v.Interface().(**uint32) } -func (p pointer) Uint32Slice() *[]uint32 { return p.v.Interface().(*[]uint32) } -func (p pointer) Uint64() *uint64 { return p.v.Interface().(*uint64) } -func (p pointer) Uint64Ptr() **uint64 { return p.v.Interface().(**uint64) } -func (p pointer) Uint64Slice() *[]uint64 { return p.v.Interface().(*[]uint64) } -func (p pointer) Float32() *float32 { return p.v.Interface().(*float32) } -func (p pointer) Float32Ptr() **float32 { return p.v.Interface().(**float32) } -func (p pointer) Float32Slice() *[]float32 { return p.v.Interface().(*[]float32) } -func (p pointer) Float64() *float64 { return p.v.Interface().(*float64) } -func (p pointer) Float64Ptr() **float64 { return p.v.Interface().(**float64) } -func (p pointer) Float64Slice() *[]float64 { return p.v.Interface().(*[]float64) } -func (p pointer) String() *string { return p.v.Interface().(*string) } -func (p pointer) StringPtr() **string { return p.v.Interface().(**string) } -func (p pointer) StringSlice() *[]string { return p.v.Interface().(*[]string) } -func (p pointer) Bytes() *[]byte { return p.v.Interface().(*[]byte) } -func (p pointer) BytesPtr() **[]byte { return p.v.Interface().(**[]byte) } -func (p pointer) BytesSlice() *[][]byte { return p.v.Interface().(*[][]byte) } -func (p pointer) WeakFields() *weakFields { return (*weakFields)(p.v.Interface().(*WeakFields)) } -func (p pointer) Extensions() *map[int32]ExtensionField { - return p.v.Interface().(*map[int32]ExtensionField) -} - -func (p pointer) Elem() pointer { - return pointer{v: p.v.Elem()} -} - -// PointerSlice copies []*T from p as a new []pointer. -// This behavior differs from the implementation in pointer_unsafe.go. -func (p pointer) PointerSlice() []pointer { - // TODO: reconsider this - if p.v.IsNil() { - return nil - } - n := p.v.Elem().Len() - s := make([]pointer, n) - for i := 0; i < n; i++ { - s[i] = pointer{v: p.v.Elem().Index(i)} - } - return s -} - -// AppendPointerSlice appends v to p, which must be a []*T. -func (p pointer) AppendPointerSlice(v pointer) { - sp := p.v.Elem() - sp.Set(reflect.Append(sp, v.v)) -} - -// SetPointer sets *p to v. -func (p pointer) SetPointer(v pointer) { - p.v.Elem().Set(v.v) -} - -func (Export) MessageStateOf(p Pointer) *messageState { panic("not supported") } -func (ms *messageState) pointer() pointer { panic("not supported") } -func (ms *messageState) messageInfo() *MessageInfo { panic("not supported") } -func (ms *messageState) LoadMessageInfo() *MessageInfo { panic("not supported") } -func (ms *messageState) StoreMessageInfo(mi *MessageInfo) { panic("not supported") } - -type atomicNilMessage struct { - once sync.Once - m messageReflectWrapper -} - -func (m *atomicNilMessage) Init(mi *MessageInfo) *messageReflectWrapper { - m.once.Do(func() { - m.m.p = pointerOfIface(reflect.Zero(mi.GoReflectType).Interface()) - m.m.mi = mi - }) - return &m.m -} diff --git a/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go b/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go index ee0e0573..79e18666 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go +++ b/vendor/google.golang.org/protobuf/internal/impl/pointer_unsafe.go @@ -2,9 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine -// +build !purego,!appengine - package impl import ( @@ -50,7 +47,7 @@ func pointerOfValue(v reflect.Value) pointer { } // pointerOfIface returns the pointer portion of an interface. -func pointerOfIface(v interface{}) pointer { +func pointerOfIface(v any) pointer { type ifaceHeader struct { Type unsafe.Pointer Data unsafe.Pointer @@ -80,7 +77,7 @@ func (p pointer) AsValueOf(t reflect.Type) reflect.Value { // AsIfaceOf treats p as a pointer to an object of type t and returns the value. // It is equivalent to p.AsValueOf(t).Interface() -func (p pointer) AsIfaceOf(t reflect.Type) interface{} { +func (p pointer) AsIfaceOf(t reflect.Type) any { // TODO: Use tricky unsafe magic to directly create ifaceHeader. return p.AsValueOf(t).Interface() } @@ -138,6 +135,46 @@ func (p pointer) SetPointer(v pointer) { *(*unsafe.Pointer)(p.p) = (unsafe.Pointer)(v.p) } +func (p pointer) growBoolSlice(addCap int) { + sp := p.BoolSlice() + s := make([]bool, 0, addCap+len(*sp)) + s = s[:len(*sp)] + copy(s, *sp) + *sp = s +} + +func (p pointer) growInt32Slice(addCap int) { + sp := p.Int32Slice() + s := make([]int32, 0, addCap+len(*sp)) + s = s[:len(*sp)] + copy(s, *sp) + *sp = s +} + +func (p pointer) growUint32Slice(addCap int) { + p.growInt32Slice(addCap) +} + +func (p pointer) growFloat32Slice(addCap int) { + p.growInt32Slice(addCap) +} + +func (p pointer) growInt64Slice(addCap int) { + sp := p.Int64Slice() + s := make([]int64, 0, addCap+len(*sp)) + s = s[:len(*sp)] + copy(s, *sp) + *sp = s +} + +func (p pointer) growUint64Slice(addCap int) { + p.growInt64Slice(addCap) +} + +func (p pointer) growFloat64Slice(addCap int) { + p.growInt64Slice(addCap) +} + // Static check that MessageState does not exceed the size of a pointer. const _ = uint(unsafe.Sizeof(unsafe.Pointer(nil)) - unsafe.Sizeof(MessageState{})) diff --git a/vendor/google.golang.org/protobuf/internal/order/range.go b/vendor/google.golang.org/protobuf/internal/order/range.go index 1665a68e..a1f09162 100644 --- a/vendor/google.golang.org/protobuf/internal/order/range.go +++ b/vendor/google.golang.org/protobuf/internal/order/range.go @@ -18,7 +18,7 @@ type messageField struct { } var messageFieldPool = sync.Pool{ - New: func() interface{} { return new([]messageField) }, + New: func() any { return new([]messageField) }, } type ( @@ -69,7 +69,7 @@ type mapEntry struct { } var mapEntryPool = sync.Pool{ - New: func() interface{} { return new([]mapEntry) }, + New: func() any { return new([]mapEntry) }, } type ( diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings.go b/vendor/google.golang.org/protobuf/internal/strs/strings.go index 0b74e765..a6e7df24 100644 --- a/vendor/google.golang.org/protobuf/internal/strs/strings.go +++ b/vendor/google.golang.org/protobuf/internal/strs/strings.go @@ -17,7 +17,7 @@ import ( // EnforceUTF8 reports whether to enforce strict UTF-8 validation. func EnforceUTF8(fd protoreflect.FieldDescriptor) bool { - if flags.ProtoLegacy { + if flags.ProtoLegacy || fd.Syntax() == protoreflect.Editions { if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok { return fd.EnforceUTF8() } diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go b/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go deleted file mode 100644 index a1f6f333..00000000 --- a/vendor/google.golang.org/protobuf/internal/strs/strings_pure.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package strs - -import pref "google.golang.org/protobuf/reflect/protoreflect" - -func UnsafeString(b []byte) string { - return string(b) -} - -func UnsafeBytes(s string) []byte { - return []byte(s) -} - -type Builder struct{} - -func (*Builder) AppendFullName(prefix pref.FullName, name pref.Name) pref.FullName { - return prefix.Append(name) -} - -func (*Builder) MakeString(b []byte) string { - return string(b) -} diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe.go b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go similarity index 97% rename from vendor/google.golang.org/protobuf/internal/strs/strings_unsafe.go rename to vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go index 61a84d34..832a7988 100644 --- a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe.go +++ b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go120.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine -// +build !purego,!appengine +//go:build !go1.21 package strs diff --git a/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go new file mode 100644 index 00000000..1ffddf68 --- /dev/null +++ b/vendor/google.golang.org/protobuf/internal/strs/strings_unsafe_go121.go @@ -0,0 +1,73 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package strs + +import ( + "unsafe" + + "google.golang.org/protobuf/reflect/protoreflect" +) + +// UnsafeString returns an unsafe string reference of b. +// The caller must treat the input slice as immutable. +// +// WARNING: Use carefully. The returned result must not leak to the end user +// unless the input slice is provably immutable. +func UnsafeString(b []byte) string { + return unsafe.String(unsafe.SliceData(b), len(b)) +} + +// UnsafeBytes returns an unsafe bytes slice reference of s. +// The caller must treat returned slice as immutable. +// +// WARNING: Use carefully. The returned result must not leak to the end user. +func UnsafeBytes(s string) []byte { + return unsafe.Slice(unsafe.StringData(s), len(s)) +} + +// Builder builds a set of strings with shared lifetime. +// This differs from strings.Builder, which is for building a single string. +type Builder struct { + buf []byte +} + +// AppendFullName is equivalent to protoreflect.FullName.Append, +// but optimized for large batches where each name has a shared lifetime. +func (sb *Builder) AppendFullName(prefix protoreflect.FullName, name protoreflect.Name) protoreflect.FullName { + n := len(prefix) + len(".") + len(name) + if len(prefix) == 0 { + n -= len(".") + } + sb.grow(n) + sb.buf = append(sb.buf, prefix...) + sb.buf = append(sb.buf, '.') + sb.buf = append(sb.buf, name...) + return protoreflect.FullName(sb.last(n)) +} + +// MakeString is equivalent to string(b), but optimized for large batches +// with a shared lifetime. +func (sb *Builder) MakeString(b []byte) string { + sb.grow(len(b)) + sb.buf = append(sb.buf, b...) + return sb.last(len(b)) +} + +func (sb *Builder) grow(n int) { + if cap(sb.buf)-len(sb.buf) >= n { + return + } + + // Unlike strings.Builder, we do not need to copy over the contents + // of the old buffer since our builder provides no API for + // retrieving previously created strings. + sb.buf = make([]byte, 0, 2*(cap(sb.buf)+n)) +} + +func (sb *Builder) last(n int) string { + return UnsafeString(sb.buf[len(sb.buf)-n:]) +} diff --git a/vendor/google.golang.org/protobuf/internal/version/version.go b/vendor/google.golang.org/protobuf/internal/version/version.go index 0999f29d..fb8e15e8 100644 --- a/vendor/google.golang.org/protobuf/internal/version/version.go +++ b/vendor/google.golang.org/protobuf/internal/version/version.go @@ -51,8 +51,8 @@ import ( // 10. Send out the CL for review and submit it. const ( Major = 1 - Minor = 31 - Patch = 0 + Minor = 35 + Patch = 1 PreRelease = "" ) diff --git a/vendor/google.golang.org/protobuf/proto/decode.go b/vendor/google.golang.org/protobuf/proto/decode.go index 48d47946..d75a6534 100644 --- a/vendor/google.golang.org/protobuf/proto/decode.go +++ b/vendor/google.golang.org/protobuf/proto/decode.go @@ -51,6 +51,8 @@ type UnmarshalOptions struct { // Unmarshal parses the wire-format message in b and places the result in m. // The provided message must be mutable (e.g., a non-nil pointer to a message). +// +// See the [UnmarshalOptions] type if you need more control. func Unmarshal(b []byte, m Message) error { _, err := UnmarshalOptions{RecursionLimit: protowire.DefaultRecursionLimit}.unmarshal(b, m.ProtoReflect()) return err @@ -69,7 +71,7 @@ func (o UnmarshalOptions) Unmarshal(b []byte, m Message) error { // UnmarshalState parses a wire-format message and places the result in m. // // This method permits fine-grained control over the unmarshaler. -// Most users should use Unmarshal instead. +// Most users should use [Unmarshal] instead. func (o UnmarshalOptions) UnmarshalState(in protoiface.UnmarshalInput) (protoiface.UnmarshalOutput, error) { if o.RecursionLimit == 0 { o.RecursionLimit = protowire.DefaultRecursionLimit diff --git a/vendor/google.golang.org/protobuf/proto/doc.go b/vendor/google.golang.org/protobuf/proto/doc.go index ec71e717..80ed16a0 100644 --- a/vendor/google.golang.org/protobuf/proto/doc.go +++ b/vendor/google.golang.org/protobuf/proto/doc.go @@ -18,27 +18,27 @@ // This package contains functions to convert to and from the wire format, // an efficient binary serialization of protocol buffers. // -// • Size reports the size of a message in the wire format. +// - [Size] reports the size of a message in the wire format. // -// • Marshal converts a message to the wire format. -// The MarshalOptions type provides more control over wire marshaling. +// - [Marshal] converts a message to the wire format. +// The [MarshalOptions] type provides more control over wire marshaling. // -// • Unmarshal converts a message from the wire format. -// The UnmarshalOptions type provides more control over wire unmarshaling. +// - [Unmarshal] converts a message from the wire format. +// The [UnmarshalOptions] type provides more control over wire unmarshaling. // // # Basic message operations // -// • Clone makes a deep copy of a message. +// - [Clone] makes a deep copy of a message. // -// • Merge merges the content of a message into another. +// - [Merge] merges the content of a message into another. // -// • Equal compares two messages. For more control over comparisons -// and detailed reporting of differences, see package -// "google.golang.org/protobuf/testing/protocmp". +// - [Equal] compares two messages. For more control over comparisons +// and detailed reporting of differences, see package +// [google.golang.org/protobuf/testing/protocmp]. // -// • Reset clears the content of a message. +// - [Reset] clears the content of a message. // -// • CheckInitialized reports whether all required fields in a message are set. +// - [CheckInitialized] reports whether all required fields in a message are set. // // # Optional scalar constructors // @@ -46,9 +46,9 @@ // as pointers to a value. For example, an optional string field has the // Go type *string. // -// • Bool, Int32, Int64, Uint32, Uint64, Float32, Float64, and String -// take a value and return a pointer to a new instance of it, -// to simplify construction of optional field values. +// - [Bool], [Int32], [Int64], [Uint32], [Uint64], [Float32], [Float64], and [String] +// take a value and return a pointer to a new instance of it, +// to simplify construction of optional field values. // // Generated enum types usually have an Enum method which performs the // same operation. @@ -57,29 +57,29 @@ // // # Extension accessors // -// • HasExtension, GetExtension, SetExtension, and ClearExtension -// access extension field values in a protocol buffer message. +// - [HasExtension], [GetExtension], [SetExtension], and [ClearExtension] +// access extension field values in a protocol buffer message. // // Extension fields are only supported in proto2. // // # Related packages // -// • Package "google.golang.org/protobuf/encoding/protojson" converts messages to -// and from JSON. +// - Package [google.golang.org/protobuf/encoding/protojson] converts messages to +// and from JSON. // -// • Package "google.golang.org/protobuf/encoding/prototext" converts messages to -// and from the text format. +// - Package [google.golang.org/protobuf/encoding/prototext] converts messages to +// and from the text format. // -// • Package "google.golang.org/protobuf/reflect/protoreflect" provides a -// reflection interface for protocol buffer data types. +// - Package [google.golang.org/protobuf/reflect/protoreflect] provides a +// reflection interface for protocol buffer data types. // -// • Package "google.golang.org/protobuf/testing/protocmp" provides features -// to compare protocol buffer messages with the "github.com/google/go-cmp/cmp" -// package. +// - Package [google.golang.org/protobuf/testing/protocmp] provides features +// to compare protocol buffer messages with the [github.com/google/go-cmp/cmp] +// package. // -// • Package "google.golang.org/protobuf/types/dynamicpb" provides a dynamic -// message type, suitable for working with messages where the protocol buffer -// type is only known at runtime. +// - Package [google.golang.org/protobuf/types/dynamicpb] provides a dynamic +// message type, suitable for working with messages where the protocol buffer +// type is only known at runtime. // // This module contains additional packages for more specialized use cases. // Consult the individual package documentation for details. diff --git a/vendor/google.golang.org/protobuf/proto/encode.go b/vendor/google.golang.org/protobuf/proto/encode.go index bf7f816d..1f847bcc 100644 --- a/vendor/google.golang.org/protobuf/proto/encode.go +++ b/vendor/google.golang.org/protobuf/proto/encode.go @@ -5,12 +5,17 @@ package proto import ( + "errors" + "fmt" + "google.golang.org/protobuf/encoding/protowire" "google.golang.org/protobuf/internal/encoding/messageset" "google.golang.org/protobuf/internal/order" "google.golang.org/protobuf/internal/pragma" "google.golang.org/protobuf/reflect/protoreflect" "google.golang.org/protobuf/runtime/protoiface" + + protoerrors "google.golang.org/protobuf/internal/errors" ) // MarshalOptions configures the marshaler. @@ -70,7 +75,32 @@ type MarshalOptions struct { UseCachedSize bool } +// flags turns the specified MarshalOptions (user-facing) into +// protoiface.MarshalInputFlags (used internally by the marshaler). +// +// See impl.marshalOptions.Options for the inverse operation. +func (o MarshalOptions) flags() protoiface.MarshalInputFlags { + var flags protoiface.MarshalInputFlags + + // Note: o.AllowPartial is always forced to true by MarshalOptions.marshal, + // which is why it is not a part of MarshalInputFlags. + + if o.Deterministic { + flags |= protoiface.MarshalDeterministic + } + + if o.UseCachedSize { + flags |= protoiface.MarshalUseCachedSize + } + + return flags +} + // Marshal returns the wire-format encoding of m. +// +// This is the most common entry point for encoding a Protobuf message. +// +// See the [MarshalOptions] type if you need more control. func Marshal(m Message) ([]byte, error) { // Treat nil message interface as an empty message; nothing to output. if m == nil { @@ -116,6 +146,9 @@ func emptyBytesForMessage(m Message) []byte { // MarshalAppend appends the wire-format encoding of m to b, // returning the result. +// +// This is a less common entry point than [Marshal], which is only needed if you +// need to supply your own buffers for performance reasons. func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) { // Treat nil message interface as an empty message; nothing to append. if m == nil { @@ -129,7 +162,7 @@ func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) { // MarshalState returns the wire-format encoding of a message. // // This method permits fine-grained control over the marshaler. -// Most users should use Marshal instead. +// Most users should use [Marshal] instead. func (o MarshalOptions) MarshalState(in protoiface.MarshalInput) (protoiface.MarshalOutput, error) { return o.marshal(in.Buf, in.Message) } @@ -145,12 +178,7 @@ func (o MarshalOptions) marshal(b []byte, m protoreflect.Message) (out protoifac in := protoiface.MarshalInput{ Message: m, Buf: b, - } - if o.Deterministic { - in.Flags |= protoiface.MarshalDeterministic - } - if o.UseCachedSize { - in.Flags |= protoiface.MarshalUseCachedSize + Flags: o.flags(), } if methods.Size != nil { sout := methods.Size(protoiface.SizeInput{ @@ -168,6 +196,10 @@ func (o MarshalOptions) marshal(b []byte, m protoreflect.Message) (out protoifac out.Buf, err = o.marshalMessageSlow(b, m) } if err != nil { + var mismatch *protoerrors.SizeMismatchError + if errors.As(err, &mismatch) { + return out, fmt.Errorf("marshaling %s: %v", string(m.Descriptor().FullName()), err) + } return out, err } if allowPartial { diff --git a/vendor/google.golang.org/protobuf/proto/equal.go b/vendor/google.golang.org/protobuf/proto/equal.go index 1a0be1b0..c36d4a9c 100644 --- a/vendor/google.golang.org/protobuf/proto/equal.go +++ b/vendor/google.golang.org/protobuf/proto/equal.go @@ -8,6 +8,7 @@ import ( "reflect" "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/runtime/protoiface" ) // Equal reports whether two messages are equal, @@ -51,6 +52,14 @@ func Equal(x, y Message) bool { if mx.IsValid() != my.IsValid() { return false } + + // Only one of the messages needs to implement the fast-path for it to work. + pmx := protoMethods(mx) + pmy := protoMethods(my) + if pmx != nil && pmy != nil && pmx.Equal != nil && pmy.Equal != nil { + return pmx.Equal(protoiface.EqualInput{MessageA: mx, MessageB: my}).Equal + } + vx := protoreflect.ValueOfMessage(mx) vy := protoreflect.ValueOfMessage(my) return vx.Equal(vy) diff --git a/vendor/google.golang.org/protobuf/proto/extension.go b/vendor/google.golang.org/protobuf/proto/extension.go index 5f293cda..78445d11 100644 --- a/vendor/google.golang.org/protobuf/proto/extension.go +++ b/vendor/google.golang.org/protobuf/proto/extension.go @@ -11,22 +11,25 @@ import ( // HasExtension reports whether an extension field is populated. // It returns false if m is invalid or if xt does not extend m. func HasExtension(m Message, xt protoreflect.ExtensionType) bool { - // Treat nil message interface as an empty message; no populated fields. - if m == nil { + // Treat nil message interface or descriptor as an empty message; no populated + // fields. + if m == nil || xt == nil { return false } // As a special-case, we reports invalid or mismatching descriptors // as always not being populated (since they aren't). - if xt == nil || m.ProtoReflect().Descriptor() != xt.TypeDescriptor().ContainingMessage() { + mr := m.ProtoReflect() + xd := xt.TypeDescriptor() + if mr.Descriptor() != xd.ContainingMessage() { return false } - return m.ProtoReflect().Has(xt.TypeDescriptor()) + return mr.Has(xd) } // ClearExtension clears an extension field such that subsequent -// HasExtension calls return false. +// [HasExtension] calls return false. // It panics if m is invalid or if xt does not extend m. func ClearExtension(m Message, xt protoreflect.ExtensionType) { m.ProtoReflect().Clear(xt.TypeDescriptor()) @@ -36,7 +39,49 @@ func ClearExtension(m Message, xt protoreflect.ExtensionType) { // If the field is unpopulated, it returns the default value for // scalars and an immutable, empty value for lists or messages. // It panics if xt does not extend m. -func GetExtension(m Message, xt protoreflect.ExtensionType) interface{} { +// +// The type of the value is dependent on the field type of the extension. +// For extensions generated by protoc-gen-go, the Go type is as follows: +// +// ╔═══════════════════╤═════════════════════════╗ +// ║ Go type │ Protobuf kind ║ +// ╠═══════════════════╪═════════════════════════╣ +// ║ bool │ bool ║ +// ║ int32 │ int32, sint32, sfixed32 ║ +// ║ int64 │ int64, sint64, sfixed64 ║ +// ║ uint32 │ uint32, fixed32 ║ +// ║ uint64 │ uint64, fixed64 ║ +// ║ float32 │ float ║ +// ║ float64 │ double ║ +// ║ string │ string ║ +// ║ []byte │ bytes ║ +// ║ protoreflect.Enum │ enum ║ +// ║ proto.Message │ message, group ║ +// ╚═══════════════════╧═════════════════════════╝ +// +// The protoreflect.Enum and proto.Message types are the concrete Go type +// associated with the named enum or message. Repeated fields are represented +// using a Go slice of the base element type. +// +// If a generated extension descriptor variable is directly passed to +// GetExtension, then the call should be followed immediately by a +// type assertion to the expected output value. For example: +// +// mm := proto.GetExtension(m, foopb.E_MyExtension).(*foopb.MyMessage) +// +// This pattern enables static analysis tools to verify that the asserted type +// matches the Go type associated with the extension field and +// also enables a possible future migration to a type-safe extension API. +// +// Since singular messages are the most common extension type, the pattern of +// calling HasExtension followed by GetExtension may be simplified to: +// +// if mm := proto.GetExtension(m, foopb.E_MyExtension).(*foopb.MyMessage); mm != nil { +// ... // make use of mm +// } +// +// The mm variable is non-nil if and only if HasExtension reports true. +func GetExtension(m Message, xt protoreflect.ExtensionType) any { // Treat nil message interface as an empty message; return the default. if m == nil { return xt.InterfaceOf(xt.Zero()) @@ -48,7 +93,36 @@ func GetExtension(m Message, xt protoreflect.ExtensionType) interface{} { // SetExtension stores the value of an extension field. // It panics if m is invalid, xt does not extend m, or if type of v // is invalid for the specified extension field. -func SetExtension(m Message, xt protoreflect.ExtensionType, v interface{}) { +// +// The type of the value is dependent on the field type of the extension. +// For extensions generated by protoc-gen-go, the Go type is as follows: +// +// ╔═══════════════════╤═════════════════════════╗ +// ║ Go type │ Protobuf kind ║ +// ╠═══════════════════╪═════════════════════════╣ +// ║ bool │ bool ║ +// ║ int32 │ int32, sint32, sfixed32 ║ +// ║ int64 │ int64, sint64, sfixed64 ║ +// ║ uint32 │ uint32, fixed32 ║ +// ║ uint64 │ uint64, fixed64 ║ +// ║ float32 │ float ║ +// ║ float64 │ double ║ +// ║ string │ string ║ +// ║ []byte │ bytes ║ +// ║ protoreflect.Enum │ enum ║ +// ║ proto.Message │ message, group ║ +// ╚═══════════════════╧═════════════════════════╝ +// +// The protoreflect.Enum and proto.Message types are the concrete Go type +// associated with the named enum or message. Repeated fields are represented +// using a Go slice of the base element type. +// +// If a generated extension descriptor variable is directly passed to +// SetExtension (e.g., foopb.E_MyExtension), then the value should be a +// concrete type that matches the expected Go type for the extension descriptor +// so that static analysis tools can verify type correctness. +// This also enables a possible future migration to a type-safe extension API. +func SetExtension(m Message, xt protoreflect.ExtensionType, v any) { xd := xt.TypeDescriptor() pv := xt.ValueOf(v) @@ -75,7 +149,7 @@ func SetExtension(m Message, xt protoreflect.ExtensionType, v interface{}) { // It returns immediately if f returns false. // While iterating, mutating operations may only be performed // on the current extension field. -func RangeExtensions(m Message, f func(protoreflect.ExtensionType, interface{}) bool) { +func RangeExtensions(m Message, f func(protoreflect.ExtensionType, any) bool) { // Treat nil message interface as an empty message; nothing to range over. if m == nil { return diff --git a/vendor/google.golang.org/protobuf/proto/merge.go b/vendor/google.golang.org/protobuf/proto/merge.go index d761ab33..3c6fe578 100644 --- a/vendor/google.golang.org/protobuf/proto/merge.go +++ b/vendor/google.golang.org/protobuf/proto/merge.go @@ -21,7 +21,7 @@ import ( // The unknown fields of src are appended to the unknown fields of dst. // // It is semantically equivalent to unmarshaling the encoded form of src -// into dst with the UnmarshalOptions.Merge option specified. +// into dst with the [UnmarshalOptions.Merge] option specified. func Merge(dst, src Message) { // TODO: Should nil src be treated as semantically equivalent to a // untyped, read-only, empty message? What about a nil dst? diff --git a/vendor/google.golang.org/protobuf/proto/messageset.go b/vendor/google.golang.org/protobuf/proto/messageset.go index 312d5d45..575d1483 100644 --- a/vendor/google.golang.org/protobuf/proto/messageset.go +++ b/vendor/google.golang.org/protobuf/proto/messageset.go @@ -47,11 +47,16 @@ func (o MarshalOptions) marshalMessageSet(b []byte, m protoreflect.Message) ([]b func (o MarshalOptions) marshalMessageSetField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) { b = messageset.AppendFieldStart(b, fd.Number()) b = protowire.AppendTag(b, messageset.FieldMessage, protowire.BytesType) - b = protowire.AppendVarint(b, uint64(o.Size(value.Message().Interface()))) + calculatedSize := o.Size(value.Message().Interface()) + b = protowire.AppendVarint(b, uint64(calculatedSize)) + before := len(b) b, err := o.marshalMessage(b, value.Message()) if err != nil { return b, err } + if measuredSize := len(b) - before; calculatedSize != measuredSize { + return nil, errors.MismatchedSizeCalculation(calculatedSize, measuredSize) + } b = messageset.AppendFieldEnd(b) return b, nil } diff --git a/vendor/google.golang.org/protobuf/proto/proto.go b/vendor/google.golang.org/protobuf/proto/proto.go index 1f0d183b..7543ee6b 100644 --- a/vendor/google.golang.org/protobuf/proto/proto.go +++ b/vendor/google.golang.org/protobuf/proto/proto.go @@ -15,18 +15,20 @@ import ( // protobuf module that accept a Message, except where otherwise specified. // // This is the v2 interface definition for protobuf messages. -// The v1 interface definition is "github.com/golang/protobuf/proto".Message. +// The v1 interface definition is [github.com/golang/protobuf/proto.Message]. // -// To convert a v1 message to a v2 message, -// use "github.com/golang/protobuf/proto".MessageV2. -// To convert a v2 message to a v1 message, -// use "github.com/golang/protobuf/proto".MessageV1. +// - To convert a v1 message to a v2 message, +// use [google.golang.org/protobuf/protoadapt.MessageV2Of]. +// - To convert a v2 message to a v1 message, +// use [google.golang.org/protobuf/protoadapt.MessageV1Of]. type Message = protoreflect.ProtoMessage -// Error matches all errors produced by packages in the protobuf module. +// Error matches all errors produced by packages in the protobuf module +// according to [errors.Is]. // -// That is, errors.Is(err, Error) reports whether an error is produced -// by this module. +// Example usage: +// +// if errors.Is(err, proto.Error) { ... } var Error error func init() { diff --git a/vendor/google.golang.org/protobuf/proto/size.go b/vendor/google.golang.org/protobuf/proto/size.go index f1692b49..052fb5ae 100644 --- a/vendor/google.golang.org/protobuf/proto/size.go +++ b/vendor/google.golang.org/protobuf/proto/size.go @@ -34,6 +34,7 @@ func (o MarshalOptions) size(m protoreflect.Message) (size int) { if methods != nil && methods.Size != nil { out := methods.Size(protoiface.SizeInput{ Message: m, + Flags: o.flags(), }) return out.Size } @@ -42,6 +43,7 @@ func (o MarshalOptions) size(m protoreflect.Message) (size int) { // This case is mainly used for legacy types with a Marshal method. out, _ := methods.Marshal(protoiface.MarshalInput{ Message: m, + Flags: o.flags(), }) return len(out.Buf) } diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/desc.go b/vendor/google.golang.org/protobuf/reflect/protodesc/desc.go index e4dfb120..8fbecb4f 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/desc.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/desc.go @@ -3,16 +3,17 @@ // license that can be found in the LICENSE file. // Package protodesc provides functionality for converting -// FileDescriptorProto messages to/from protoreflect.FileDescriptor values. +// FileDescriptorProto messages to/from [protoreflect.FileDescriptor] values. // // The google.protobuf.FileDescriptorProto is a protobuf message that describes // the type information for a .proto file in a form that is easily serializable. -// The protoreflect.FileDescriptor is a more structured representation of +// The [protoreflect.FileDescriptor] is a more structured representation of // the FileDescriptorProto message where references and remote dependencies // can be directly followed. package protodesc import ( + "google.golang.org/protobuf/internal/editionssupport" "google.golang.org/protobuf/internal/errors" "google.golang.org/protobuf/internal/filedesc" "google.golang.org/protobuf/internal/pragma" @@ -24,11 +25,11 @@ import ( "google.golang.org/protobuf/types/descriptorpb" ) -// Resolver is the resolver used by NewFile to resolve dependencies. +// Resolver is the resolver used by [NewFile] to resolve dependencies. // The enums and messages provided must belong to some parent file, // which is also registered. // -// It is implemented by protoregistry.Files. +// It is implemented by [protoregistry.Files]. type Resolver interface { FindFileByPath(string) (protoreflect.FileDescriptor, error) FindDescriptorByName(protoreflect.FullName) (protoreflect.Descriptor, error) @@ -61,19 +62,19 @@ type FileOptions struct { AllowUnresolvable bool } -// NewFile creates a new protoreflect.FileDescriptor from the provided -// file descriptor message. See FileOptions.New for more information. +// NewFile creates a new [protoreflect.FileDescriptor] from the provided +// file descriptor message. See [FileOptions.New] for more information. func NewFile(fd *descriptorpb.FileDescriptorProto, r Resolver) (protoreflect.FileDescriptor, error) { return FileOptions{}.New(fd, r) } -// NewFiles creates a new protoregistry.Files from the provided -// FileDescriptorSet message. See FileOptions.NewFiles for more information. +// NewFiles creates a new [protoregistry.Files] from the provided +// FileDescriptorSet message. See [FileOptions.NewFiles] for more information. func NewFiles(fd *descriptorpb.FileDescriptorSet) (*protoregistry.Files, error) { return FileOptions{}.NewFiles(fd) } -// New creates a new protoreflect.FileDescriptor from the provided +// New creates a new [protoreflect.FileDescriptor] from the provided // file descriptor message. The file must represent a valid proto file according // to protobuf semantics. The returned descriptor is a deep copy of the input. // @@ -91,11 +92,19 @@ func (o FileOptions) New(fd *descriptorpb.FileDescriptorProto, r Resolver) (prot switch fd.GetSyntax() { case "proto2", "": f.L1.Syntax = protoreflect.Proto2 + f.L1.Edition = filedesc.EditionProto2 case "proto3": f.L1.Syntax = protoreflect.Proto3 + f.L1.Edition = filedesc.EditionProto3 + case "editions": + f.L1.Syntax = protoreflect.Editions + f.L1.Edition = fromEditionProto(fd.GetEdition()) default: return nil, errors.New("invalid syntax: %q", fd.GetSyntax()) } + if f.L1.Syntax == protoreflect.Editions && (fd.GetEdition() < editionssupport.Minimum || fd.GetEdition() > editionssupport.Maximum) { + return nil, errors.New("use of edition %v not yet supported by the Go Protobuf runtime", fd.GetEdition()) + } f.L1.Path = fd.GetName() if f.L1.Path == "" { return nil, errors.New("file path must be populated") @@ -108,6 +117,7 @@ func (o FileOptions) New(fd *descriptorpb.FileDescriptorProto, r Resolver) (prot opts = proto.Clone(opts).(*descriptorpb.FileOptions) f.L2.Options = func() protoreflect.ProtoMessage { return opts } } + initFileDescFromFeatureSet(f, fd.GetOptions().GetFeatures()) f.L2.Imports = make(filedesc.FileImports, len(fd.GetDependency())) for _, i := range fd.GetPublicDependency() { @@ -210,10 +220,10 @@ func (o FileOptions) New(fd *descriptorpb.FileDescriptorProto, r Resolver) (prot if err := validateEnumDeclarations(f.L1.Enums.List, fd.GetEnumType()); err != nil { return nil, err } - if err := validateMessageDeclarations(f.L1.Messages.List, fd.GetMessageType()); err != nil { + if err := validateMessageDeclarations(f, f.L1.Messages.List, fd.GetMessageType()); err != nil { return nil, err } - if err := validateExtensionDeclarations(f.L1.Extensions.List, fd.GetExtension()); err != nil { + if err := validateExtensionDeclarations(f, f.L1.Extensions.List, fd.GetExtension()); err != nil { return nil, err } @@ -231,7 +241,7 @@ func (is importSet) importPublic(imps protoreflect.FileImports) { } } -// NewFiles creates a new protoregistry.Files from the provided +// NewFiles creates a new [protoregistry.Files] from the provided // FileDescriptorSet message. The descriptor set must include only // valid files according to protobuf semantics. The returned descriptors // are a deep copy of the input. diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go index 37efda1a..ebcb4a8a 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_init.go @@ -28,6 +28,7 @@ func (r descsByName) initEnumDeclarations(eds []*descriptorpb.EnumDescriptorProt opts = proto.Clone(opts).(*descriptorpb.EnumOptions) e.L2.Options = func() protoreflect.ProtoMessage { return opts } } + e.L1.EditionFeatures = mergeEditionFeatures(parent, ed.GetOptions().GetFeatures()) for _, s := range ed.GetReservedName() { e.L2.ReservedNames.List = append(e.L2.ReservedNames.List, protoreflect.Name(s)) } @@ -68,6 +69,7 @@ func (r descsByName) initMessagesDeclarations(mds []*descriptorpb.DescriptorProt if m.L0, err = r.makeBase(m, parent, md.GetName(), i, sb); err != nil { return nil, err } + m.L1.EditionFeatures = mergeEditionFeatures(parent, md.GetOptions().GetFeatures()) if opts := md.GetOptions(); opts != nil { opts = proto.Clone(opts).(*descriptorpb.MessageOptions) m.L2.Options = func() protoreflect.ProtoMessage { return opts } @@ -114,6 +116,27 @@ func (r descsByName) initMessagesDeclarations(mds []*descriptorpb.DescriptorProt return ms, nil } +// canBePacked returns whether the field can use packed encoding: +// https://protobuf.dev/programming-guides/encoding/#packed +func canBePacked(fd *descriptorpb.FieldDescriptorProto) bool { + if fd.GetLabel() != descriptorpb.FieldDescriptorProto_LABEL_REPEATED { + return false // not a repeated field + } + + switch protoreflect.Kind(fd.GetType()) { + case protoreflect.MessageKind, protoreflect.GroupKind: + return false // not a scalar type field + + case protoreflect.StringKind, protoreflect.BytesKind: + // string and bytes can explicitly not be declared as packed, + // see https://protobuf.dev/programming-guides/encoding/#packed + return false + + default: + return true + } +} + func (r descsByName) initFieldsFromDescriptorProto(fds []*descriptorpb.FieldDescriptorProto, parent protoreflect.Descriptor, sb *strs.Builder) (fs []filedesc.Field, err error) { fs = make([]filedesc.Field, len(fds)) // allocate up-front to ensure stable pointers for i, fd := range fds { @@ -121,13 +144,16 @@ func (r descsByName) initFieldsFromDescriptorProto(fds []*descriptorpb.FieldDesc if f.L0, err = r.makeBase(f, parent, fd.GetName(), i, sb); err != nil { return nil, err } + f.L1.EditionFeatures = mergeEditionFeatures(parent, fd.GetOptions().GetFeatures()) f.L1.IsProto3Optional = fd.GetProto3Optional() if opts := fd.GetOptions(); opts != nil { opts = proto.Clone(opts).(*descriptorpb.FieldOptions) f.L1.Options = func() protoreflect.ProtoMessage { return opts } f.L1.IsWeak = opts.GetWeak() - f.L1.HasPacked = opts.Packed != nil - f.L1.IsPacked = opts.GetPacked() + f.L1.IsLazy = opts.GetLazy() + if opts.Packed != nil { + f.L1.EditionFeatures.IsPacked = opts.GetPacked() + } } f.L1.Number = protoreflect.FieldNumber(fd.GetNumber()) f.L1.Cardinality = protoreflect.Cardinality(fd.GetLabel()) @@ -137,6 +163,14 @@ func (r descsByName) initFieldsFromDescriptorProto(fds []*descriptorpb.FieldDesc if fd.JsonName != nil { f.L1.StringName.InitJSON(fd.GetJsonName()) } + + if f.L1.EditionFeatures.IsLegacyRequired { + f.L1.Cardinality = protoreflect.Required + } + + if f.L1.Kind == protoreflect.MessageKind && f.L1.EditionFeatures.IsDelimitedEncoded { + f.L1.Kind = protoreflect.GroupKind + } } return fs, nil } @@ -148,6 +182,7 @@ func (r descsByName) initOneofsFromDescriptorProto(ods []*descriptorpb.OneofDesc if o.L0, err = r.makeBase(o, parent, od.GetName(), i, sb); err != nil { return nil, err } + o.L1.EditionFeatures = mergeEditionFeatures(parent, od.GetOptions().GetFeatures()) if opts := od.GetOptions(); opts != nil { opts = proto.Clone(opts).(*descriptorpb.OneofOptions) o.L1.Options = func() protoreflect.ProtoMessage { return opts } @@ -164,10 +199,13 @@ func (r descsByName) initExtensionDeclarations(xds []*descriptorpb.FieldDescript if x.L0, err = r.makeBase(x, parent, xd.GetName(), i, sb); err != nil { return nil, err } + x.L1.EditionFeatures = mergeEditionFeatures(parent, xd.GetOptions().GetFeatures()) if opts := xd.GetOptions(); opts != nil { opts = proto.Clone(opts).(*descriptorpb.FieldOptions) x.L2.Options = func() protoreflect.ProtoMessage { return opts } - x.L2.IsPacked = opts.GetPacked() + if opts.Packed != nil { + x.L1.EditionFeatures.IsPacked = opts.GetPacked() + } } x.L1.Number = protoreflect.FieldNumber(xd.GetNumber()) x.L1.Cardinality = protoreflect.Cardinality(xd.GetLabel()) @@ -177,6 +215,9 @@ func (r descsByName) initExtensionDeclarations(xds []*descriptorpb.FieldDescript if xd.JsonName != nil { x.L2.StringName.InitJSON(xd.GetJsonName()) } + if x.L1.Kind == protoreflect.MessageKind && x.L1.EditionFeatures.IsDelimitedEncoded { + x.L1.Kind = protoreflect.GroupKind + } } return xs, nil } diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_resolve.go b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_resolve.go index 27d7e350..f3cebab2 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_resolve.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_resolve.go @@ -46,6 +46,11 @@ func (r *resolver) resolveMessageDependencies(ms []filedesc.Message, mds []*desc if f.L1.Kind, f.L1.Enum, f.L1.Message, err = r.findTarget(f.Kind(), f.Parent().FullName(), partialName(fd.GetTypeName()), f.IsWeak()); err != nil { return errors.New("message field %q cannot resolve type: %v", f.FullName(), err) } + if f.L1.Kind == protoreflect.GroupKind && (f.IsMap() || f.IsMapEntry()) { + // A map field might inherit delimited encoding from a file-wide default feature. + // But maps never actually use delimited encoding. (At least for now...) + f.L1.Kind = protoreflect.MessageKind + } if fd.DefaultValue != nil { v, ev, err := unmarshalDefault(fd.GetDefaultValue(), f, r.allowUnresolvable) if err != nil { @@ -276,8 +281,8 @@ func unmarshalDefault(s string, fd protoreflect.FieldDescriptor, allowUnresolvab } else if err != nil { return v, ev, err } - if fd.Syntax() == protoreflect.Proto3 { - return v, ev, errors.New("cannot be specified under proto3 semantics") + if !fd.HasPresence() { + return v, ev, errors.New("cannot be specified with implicit field presence") } if fd.Kind() == protoreflect.MessageKind || fd.Kind() == protoreflect.GroupKind || fd.Cardinality() == protoreflect.Repeated { return v, ev, errors.New("cannot be specified on composite types") diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_validate.go b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_validate.go index 9af1d564..6de31c2e 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/desc_validate.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/desc_validate.go @@ -45,11 +45,11 @@ func validateEnumDeclarations(es []filedesc.Enum, eds []*descriptorpb.EnumDescri if allowAlias && !foundAlias { return errors.New("enum %q allows aliases, but none were found", e.FullName()) } - if e.Syntax() == protoreflect.Proto3 { + if !e.IsClosed() { if v := e.Values().Get(0); v.Number() != 0 { - return errors.New("enum %q using proto3 semantics must have zero number for the first value", v.FullName()) + return errors.New("enum %q using open semantics must have zero number for the first value", v.FullName()) } - // Verify that value names in proto3 do not conflict if the + // Verify that value names in open enums do not conflict if the // case-insensitive prefix is removed. // See protoc v3.8.0: src/google/protobuf/descriptor.cc:4991-5055 names := map[string]protoreflect.EnumValueDescriptor{} @@ -58,7 +58,7 @@ func validateEnumDeclarations(es []filedesc.Enum, eds []*descriptorpb.EnumDescri v1 := e.Values().Get(i) s := strs.EnumValueName(strs.TrimEnumPrefix(string(v1.Name()), prefix)) if v2, ok := names[s]; ok && v1.Number() != v2.Number() { - return errors.New("enum %q using proto3 semantics has conflict: %q with %q", e.FullName(), v1.Name(), v2.Name()) + return errors.New("enum %q using open semantics has conflict: %q with %q", e.FullName(), v1.Name(), v2.Name()) } names[s] = v1 } @@ -80,7 +80,9 @@ func validateEnumDeclarations(es []filedesc.Enum, eds []*descriptorpb.EnumDescri return nil } -func validateMessageDeclarations(ms []filedesc.Message, mds []*descriptorpb.DescriptorProto) error { +func validateMessageDeclarations(file *filedesc.File, ms []filedesc.Message, mds []*descriptorpb.DescriptorProto) error { + // There are a few limited exceptions only for proto3 + isProto3 := file.L1.Edition == fromEditionProto(descriptorpb.Edition_EDITION_PROTO3) for i, md := range mds { m := &ms[i] @@ -107,25 +109,13 @@ func validateMessageDeclarations(ms []filedesc.Message, mds []*descriptorpb.Desc if isMessageSet && !flags.ProtoLegacy { return errors.New("message %q is a MessageSet, which is a legacy proto1 feature that is no longer supported", m.FullName()) } - if isMessageSet && (m.Syntax() != protoreflect.Proto2 || m.Fields().Len() > 0 || m.ExtensionRanges().Len() == 0) { + if isMessageSet && (isProto3 || m.Fields().Len() > 0 || m.ExtensionRanges().Len() == 0) { return errors.New("message %q is an invalid proto1 MessageSet", m.FullName()) } - if m.Syntax() == protoreflect.Proto3 { + if isProto3 { if m.ExtensionRanges().Len() > 0 { return errors.New("message %q using proto3 semantics cannot have extension ranges", m.FullName()) } - // Verify that field names in proto3 do not conflict if lowercased - // with all underscores removed. - // See protoc v3.8.0: src/google/protobuf/descriptor.cc:5830-5847 - names := map[string]protoreflect.FieldDescriptor{} - for i := 0; i < m.Fields().Len(); i++ { - f1 := m.Fields().Get(i) - s := strings.Replace(strings.ToLower(string(f1.Name())), "_", "", -1) - if f2, ok := names[s]; ok { - return errors.New("message %q using proto3 semantics has conflict: %q with %q", m.FullName(), f1.Name(), f2.Name()) - } - names[s] = f1 - } } for j, fd := range md.GetField() { @@ -149,7 +139,7 @@ func validateMessageDeclarations(ms []filedesc.Message, mds []*descriptorpb.Desc return errors.New("message field %q may not have extendee: %q", f.FullName(), fd.GetExtendee()) } if f.L1.IsProto3Optional { - if f.Syntax() != protoreflect.Proto3 { + if !isProto3 { return errors.New("message field %q under proto3 optional semantics must be specified in the proto3 syntax", f.FullName()) } if f.Cardinality() != protoreflect.Optional { @@ -162,26 +152,29 @@ func validateMessageDeclarations(ms []filedesc.Message, mds []*descriptorpb.Desc if f.IsWeak() && !flags.ProtoLegacy { return errors.New("message field %q is a weak field, which is a legacy proto1 feature that is no longer supported", f.FullName()) } - if f.IsWeak() && (f.Syntax() != protoreflect.Proto2 || !isOptionalMessage(f) || f.ContainingOneof() != nil) { + if f.IsWeak() && (!f.HasPresence() || !isOptionalMessage(f) || f.ContainingOneof() != nil) { return errors.New("message field %q may only be weak for an optional message", f.FullName()) } if f.IsPacked() && !isPackable(f) { return errors.New("message field %q is not packable", f.FullName()) } - if err := checkValidGroup(f); err != nil { + if err := checkValidGroup(file, f); err != nil { return errors.New("message field %q is an invalid group: %v", f.FullName(), err) } if err := checkValidMap(f); err != nil { return errors.New("message field %q is an invalid map: %v", f.FullName(), err) } - if f.Syntax() == protoreflect.Proto3 { + if isProto3 { if f.Cardinality() == protoreflect.Required { return errors.New("message field %q using proto3 semantics cannot be required", f.FullName()) } - if f.Enum() != nil && !f.Enum().IsPlaceholder() && f.Enum().Syntax() != protoreflect.Proto3 { - return errors.New("message field %q using proto3 semantics may only depend on a proto3 enum", f.FullName()) + if f.Enum() != nil && !f.Enum().IsPlaceholder() && f.Enum().IsClosed() { + return errors.New("message field %q using proto3 semantics may only depend on open enums", f.FullName()) } } + if f.Cardinality() == protoreflect.Optional && !f.HasPresence() && f.Enum() != nil && !f.Enum().IsPlaceholder() && f.Enum().IsClosed() { + return errors.New("message field %q with implicit presence may only use open enums", f.FullName()) + } } seenSynthetic := false // synthetic oneofs for proto3 optional must come after real oneofs for j := range md.GetOneofDecl() { @@ -215,17 +208,17 @@ func validateMessageDeclarations(ms []filedesc.Message, mds []*descriptorpb.Desc if err := validateEnumDeclarations(m.L1.Enums.List, md.GetEnumType()); err != nil { return err } - if err := validateMessageDeclarations(m.L1.Messages.List, md.GetNestedType()); err != nil { + if err := validateMessageDeclarations(file, m.L1.Messages.List, md.GetNestedType()); err != nil { return err } - if err := validateExtensionDeclarations(m.L1.Extensions.List, md.GetExtension()); err != nil { + if err := validateExtensionDeclarations(file, m.L1.Extensions.List, md.GetExtension()); err != nil { return err } } return nil } -func validateExtensionDeclarations(xs []filedesc.Extension, xds []*descriptorpb.FieldDescriptorProto) error { +func validateExtensionDeclarations(f *filedesc.File, xs []filedesc.Extension, xds []*descriptorpb.FieldDescriptorProto) error { for i, xd := range xds { x := &xs[i] // NOTE: Avoid using the IsValid method since extensions to MessageSet @@ -267,13 +260,13 @@ func validateExtensionDeclarations(xs []filedesc.Extension, xds []*descriptorpb. if x.IsPacked() && !isPackable(x) { return errors.New("extension field %q is not packable", x.FullName()) } - if err := checkValidGroup(x); err != nil { + if err := checkValidGroup(f, x); err != nil { return errors.New("extension field %q is an invalid group: %v", x.FullName(), err) } if md := x.Message(); md != nil && md.IsMapEntry() { return errors.New("extension field %q cannot be a map entry", x.FullName()) } - if x.Syntax() == protoreflect.Proto3 { + if f.L1.Edition == fromEditionProto(descriptorpb.Edition_EDITION_PROTO3) { switch x.ContainingMessage().FullName() { case (*descriptorpb.FileOptions)(nil).ProtoReflect().Descriptor().FullName(): case (*descriptorpb.EnumOptions)(nil).ProtoReflect().Descriptor().FullName(): @@ -309,21 +302,25 @@ func isPackable(fd protoreflect.FieldDescriptor) bool { // checkValidGroup reports whether fd is a valid group according to the same // rules that protoc imposes. -func checkValidGroup(fd protoreflect.FieldDescriptor) error { +func checkValidGroup(f *filedesc.File, fd protoreflect.FieldDescriptor) error { md := fd.Message() switch { case fd.Kind() != protoreflect.GroupKind: return nil - case fd.Syntax() != protoreflect.Proto2: - return errors.New("invalid under proto2 semantics") + case f.L1.Edition == fromEditionProto(descriptorpb.Edition_EDITION_PROTO3): + return errors.New("invalid under proto3 semantics") case md == nil || md.IsPlaceholder(): return errors.New("message must be resolvable") - case fd.FullName().Parent() != md.FullName().Parent(): - return errors.New("message and field must be declared in the same scope") - case !unicode.IsUpper(rune(md.Name()[0])): - return errors.New("message name must start with an uppercase") - case fd.Name() != protoreflect.Name(strings.ToLower(string(md.Name()))): - return errors.New("field name must be lowercased form of the message name") + } + if f.L1.Edition < fromEditionProto(descriptorpb.Edition_EDITION_2023) { + switch { + case fd.FullName().Parent() != md.FullName().Parent(): + return errors.New("message and field must be declared in the same scope") + case !unicode.IsUpper(rune(md.Name()[0])): + return errors.New("message name must start with an uppercase") + case fd.Name() != protoreflect.Name(strings.ToLower(string(md.Name()))): + return errors.New("field name must be lowercased form of the message name") + } } return nil } diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go b/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go new file mode 100644 index 00000000..002e0047 --- /dev/null +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/editions.go @@ -0,0 +1,145 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package protodesc + +import ( + "fmt" + "os" + "sync" + + "google.golang.org/protobuf/internal/editiondefaults" + "google.golang.org/protobuf/internal/filedesc" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/types/descriptorpb" + "google.golang.org/protobuf/types/gofeaturespb" +) + +var defaults = &descriptorpb.FeatureSetDefaults{} +var defaultsCacheMu sync.Mutex +var defaultsCache = make(map[filedesc.Edition]*descriptorpb.FeatureSet) + +func init() { + err := proto.Unmarshal(editiondefaults.Defaults, defaults) + if err != nil { + fmt.Fprintf(os.Stderr, "unmarshal editions defaults: %v\n", err) + os.Exit(1) + } +} + +func fromEditionProto(epb descriptorpb.Edition) filedesc.Edition { + return filedesc.Edition(epb) +} + +func toEditionProto(ed filedesc.Edition) descriptorpb.Edition { + switch ed { + case filedesc.EditionUnknown: + return descriptorpb.Edition_EDITION_UNKNOWN + case filedesc.EditionProto2: + return descriptorpb.Edition_EDITION_PROTO2 + case filedesc.EditionProto3: + return descriptorpb.Edition_EDITION_PROTO3 + case filedesc.Edition2023: + return descriptorpb.Edition_EDITION_2023 + default: + panic(fmt.Sprintf("unknown value for edition: %v", ed)) + } +} + +func getFeatureSetFor(ed filedesc.Edition) *descriptorpb.FeatureSet { + defaultsCacheMu.Lock() + defer defaultsCacheMu.Unlock() + if def, ok := defaultsCache[ed]; ok { + return def + } + edpb := toEditionProto(ed) + if defaults.GetMinimumEdition() > edpb || defaults.GetMaximumEdition() < edpb { + // This should never happen protodesc.(FileOptions).New would fail when + // initializing the file descriptor. + // This most likely means the embedded defaults were not updated. + fmt.Fprintf(os.Stderr, "internal error: unsupported edition %v (did you forget to update the embedded defaults (i.e. the bootstrap descriptor proto)?)\n", edpb) + os.Exit(1) + } + fsed := defaults.GetDefaults()[0] + // Using a linear search for now. + // Editions are guaranteed to be sorted and thus we could use a binary search. + // Given that there are only a handful of editions (with one more per year) + // there is not much reason to use a binary search. + for _, def := range defaults.GetDefaults() { + if def.GetEdition() <= edpb { + fsed = def + } else { + break + } + } + fs := proto.Clone(fsed.GetFixedFeatures()).(*descriptorpb.FeatureSet) + proto.Merge(fs, fsed.GetOverridableFeatures()) + defaultsCache[ed] = fs + return fs +} + +// mergeEditionFeatures merges the parent and child feature sets. This function +// should be used when initializing Go descriptors from descriptor protos which +// is why the parent is a filedesc.EditionsFeatures (Go representation) while +// the child is a descriptorproto.FeatureSet (protoc representation). +// Any feature set by the child overwrites what is set by the parent. +func mergeEditionFeatures(parentDesc protoreflect.Descriptor, child *descriptorpb.FeatureSet) filedesc.EditionFeatures { + var parentFS filedesc.EditionFeatures + switch p := parentDesc.(type) { + case *filedesc.File: + parentFS = p.L1.EditionFeatures + case *filedesc.Message: + parentFS = p.L1.EditionFeatures + default: + panic(fmt.Sprintf("unknown parent type %T", parentDesc)) + } + if child == nil { + return parentFS + } + if fp := child.FieldPresence; fp != nil { + parentFS.IsFieldPresence = *fp == descriptorpb.FeatureSet_LEGACY_REQUIRED || + *fp == descriptorpb.FeatureSet_EXPLICIT + parentFS.IsLegacyRequired = *fp == descriptorpb.FeatureSet_LEGACY_REQUIRED + } + if et := child.EnumType; et != nil { + parentFS.IsOpenEnum = *et == descriptorpb.FeatureSet_OPEN + } + + if rfe := child.RepeatedFieldEncoding; rfe != nil { + parentFS.IsPacked = *rfe == descriptorpb.FeatureSet_PACKED + } + + if utf8val := child.Utf8Validation; utf8val != nil { + parentFS.IsUTF8Validated = *utf8val == descriptorpb.FeatureSet_VERIFY + } + + if me := child.MessageEncoding; me != nil { + parentFS.IsDelimitedEncoded = *me == descriptorpb.FeatureSet_DELIMITED + } + + if jf := child.JsonFormat; jf != nil { + parentFS.IsJSONCompliant = *jf == descriptorpb.FeatureSet_ALLOW + } + + if goFeatures, ok := proto.GetExtension(child, gofeaturespb.E_Go).(*gofeaturespb.GoFeatures); ok && goFeatures != nil { + if luje := goFeatures.LegacyUnmarshalJsonEnum; luje != nil { + parentFS.GenerateLegacyUnmarshalJSON = *luje + } + } + + return parentFS +} + +// initFileDescFromFeatureSet initializes editions related fields in fd based +// on fs. If fs is nil it is assumed to be an empty featureset and all fields +// will be initialized with the appropriate default. fd.L1.Edition must be set +// before calling this function. +func initFileDescFromFeatureSet(fd *filedesc.File, fs *descriptorpb.FeatureSet) { + dfs := getFeatureSetFor(fd.L1.Edition) + // initialize the featureset with the defaults + fd.L1.EditionFeatures = mergeEditionFeatures(fd, dfs) + // overwrite any options explicitly specified + fd.L1.EditionFeatures = mergeEditionFeatures(fd, fs) +} diff --git a/vendor/google.golang.org/protobuf/reflect/protodesc/proto.go b/vendor/google.golang.org/protobuf/reflect/protodesc/proto.go index a7c5ceff..a5de8d40 100644 --- a/vendor/google.golang.org/protobuf/reflect/protodesc/proto.go +++ b/vendor/google.golang.org/protobuf/reflect/protodesc/proto.go @@ -16,7 +16,7 @@ import ( "google.golang.org/protobuf/types/descriptorpb" ) -// ToFileDescriptorProto copies a protoreflect.FileDescriptor into a +// ToFileDescriptorProto copies a [protoreflect.FileDescriptor] into a // google.protobuf.FileDescriptorProto message. func ToFileDescriptorProto(file protoreflect.FileDescriptor) *descriptorpb.FileDescriptorProto { p := &descriptorpb.FileDescriptorProto{ @@ -70,13 +70,23 @@ func ToFileDescriptorProto(file protoreflect.FileDescriptor) *descriptorpb.FileD for i, exts := 0, file.Extensions(); i < exts.Len(); i++ { p.Extension = append(p.Extension, ToFieldDescriptorProto(exts.Get(i))) } - if syntax := file.Syntax(); syntax != protoreflect.Proto2 { + if syntax := file.Syntax(); syntax != protoreflect.Proto2 && syntax.IsValid() { p.Syntax = proto.String(file.Syntax().String()) } + if file.Syntax() == protoreflect.Editions { + desc := file + if fileImportDesc, ok := file.(protoreflect.FileImport); ok { + desc = fileImportDesc.FileDescriptor + } + + if editionsInterface, ok := desc.(interface{ Edition() int32 }); ok { + p.Edition = descriptorpb.Edition(editionsInterface.Edition()).Enum() + } + } return p } -// ToDescriptorProto copies a protoreflect.MessageDescriptor into a +// ToDescriptorProto copies a [protoreflect.MessageDescriptor] into a // google.protobuf.DescriptorProto message. func ToDescriptorProto(message protoreflect.MessageDescriptor) *descriptorpb.DescriptorProto { p := &descriptorpb.DescriptorProto{ @@ -119,7 +129,7 @@ func ToDescriptorProto(message protoreflect.MessageDescriptor) *descriptorpb.Des return p } -// ToFieldDescriptorProto copies a protoreflect.FieldDescriptor into a +// ToFieldDescriptorProto copies a [protoreflect.FieldDescriptor] into a // google.protobuf.FieldDescriptorProto message. func ToFieldDescriptorProto(field protoreflect.FieldDescriptor) *descriptorpb.FieldDescriptorProto { p := &descriptorpb.FieldDescriptorProto{ @@ -153,6 +163,18 @@ func ToFieldDescriptorProto(field protoreflect.FieldDescriptor) *descriptorpb.Fi if field.Syntax() == protoreflect.Proto3 && field.HasOptionalKeyword() { p.Proto3Optional = proto.Bool(true) } + if field.Syntax() == protoreflect.Editions { + // Editions have no group keyword, this type is only set so that downstream users continue + // treating this as delimited encoding. + if p.GetType() == descriptorpb.FieldDescriptorProto_TYPE_GROUP { + p.Type = descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum() + } + // Editions have no required keyword, this label is only set so that downstream users continue + // treating it as required. + if p.GetLabel() == descriptorpb.FieldDescriptorProto_LABEL_REQUIRED { + p.Label = descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum() + } + } if field.HasDefault() { def, err := defval.Marshal(field.Default(), field.DefaultEnumValue(), field.Kind(), defval.Descriptor) if err != nil && field.DefaultEnumValue() != nil { @@ -168,7 +190,7 @@ func ToFieldDescriptorProto(field protoreflect.FieldDescriptor) *descriptorpb.Fi return p } -// ToOneofDescriptorProto copies a protoreflect.OneofDescriptor into a +// ToOneofDescriptorProto copies a [protoreflect.OneofDescriptor] into a // google.protobuf.OneofDescriptorProto message. func ToOneofDescriptorProto(oneof protoreflect.OneofDescriptor) *descriptorpb.OneofDescriptorProto { return &descriptorpb.OneofDescriptorProto{ @@ -177,7 +199,7 @@ func ToOneofDescriptorProto(oneof protoreflect.OneofDescriptor) *descriptorpb.On } } -// ToEnumDescriptorProto copies a protoreflect.EnumDescriptor into a +// ToEnumDescriptorProto copies a [protoreflect.EnumDescriptor] into a // google.protobuf.EnumDescriptorProto message. func ToEnumDescriptorProto(enum protoreflect.EnumDescriptor) *descriptorpb.EnumDescriptorProto { p := &descriptorpb.EnumDescriptorProto{ @@ -200,7 +222,7 @@ func ToEnumDescriptorProto(enum protoreflect.EnumDescriptor) *descriptorpb.EnumD return p } -// ToEnumValueDescriptorProto copies a protoreflect.EnumValueDescriptor into a +// ToEnumValueDescriptorProto copies a [protoreflect.EnumValueDescriptor] into a // google.protobuf.EnumValueDescriptorProto message. func ToEnumValueDescriptorProto(value protoreflect.EnumValueDescriptor) *descriptorpb.EnumValueDescriptorProto { return &descriptorpb.EnumValueDescriptorProto{ @@ -210,7 +232,7 @@ func ToEnumValueDescriptorProto(value protoreflect.EnumValueDescriptor) *descrip } } -// ToServiceDescriptorProto copies a protoreflect.ServiceDescriptor into a +// ToServiceDescriptorProto copies a [protoreflect.ServiceDescriptor] into a // google.protobuf.ServiceDescriptorProto message. func ToServiceDescriptorProto(service protoreflect.ServiceDescriptor) *descriptorpb.ServiceDescriptorProto { p := &descriptorpb.ServiceDescriptorProto{ @@ -223,7 +245,7 @@ func ToServiceDescriptorProto(service protoreflect.ServiceDescriptor) *descripto return p } -// ToMethodDescriptorProto copies a protoreflect.MethodDescriptor into a +// ToMethodDescriptorProto copies a [protoreflect.MethodDescriptor] into a // google.protobuf.MethodDescriptorProto message. func ToMethodDescriptorProto(method protoreflect.MethodDescriptor) *descriptorpb.MethodDescriptorProto { p := &descriptorpb.MethodDescriptorProto{ diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go index d5d5af6e..742cb518 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/methods.go @@ -23,6 +23,7 @@ type ( Unmarshal func(unmarshalInput) (unmarshalOutput, error) Merge func(mergeInput) mergeOutput CheckInitialized func(checkInitializedInput) (checkInitializedOutput, error) + Equal func(equalInput) equalOutput } supportFlags = uint64 sizeInput = struct { @@ -75,4 +76,13 @@ type ( checkInitializedOutput = struct { pragma.NoUnkeyedLiterals } + equalInput = struct { + pragma.NoUnkeyedLiterals + MessageA Message + MessageB Message + } + equalOutput = struct { + pragma.NoUnkeyedLiterals + Equal bool + } ) diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/proto.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/proto.go index 55aa1492..c85bfaa5 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/proto.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/proto.go @@ -10,46 +10,46 @@ // // # Protocol Buffer Descriptors // -// Protobuf descriptors (e.g., EnumDescriptor or MessageDescriptor) +// Protobuf descriptors (e.g., [EnumDescriptor] or [MessageDescriptor]) // are immutable objects that represent protobuf type information. // They are wrappers around the messages declared in descriptor.proto. // Protobuf descriptors alone lack any information regarding Go types. // -// Enums and messages generated by this module implement Enum and ProtoMessage, +// Enums and messages generated by this module implement [Enum] and [ProtoMessage], // where the Descriptor and ProtoReflect.Descriptor accessors respectively // return the protobuf descriptor for the values. // // The protobuf descriptor interfaces are not meant to be implemented by // user code since they might need to be extended in the future to support // additions to the protobuf language. -// The "google.golang.org/protobuf/reflect/protodesc" package converts between +// The [google.golang.org/protobuf/reflect/protodesc] package converts between // google.protobuf.DescriptorProto messages and protobuf descriptors. // // # Go Type Descriptors // -// A type descriptor (e.g., EnumType or MessageType) is a constructor for +// A type descriptor (e.g., [EnumType] or [MessageType]) is a constructor for // a concrete Go type that represents the associated protobuf descriptor. // There is commonly a one-to-one relationship between protobuf descriptors and // Go type descriptors, but it can potentially be a one-to-many relationship. // -// Enums and messages generated by this module implement Enum and ProtoMessage, +// Enums and messages generated by this module implement [Enum] and [ProtoMessage], // where the Type and ProtoReflect.Type accessors respectively // return the protobuf descriptor for the values. // -// The "google.golang.org/protobuf/types/dynamicpb" package can be used to +// The [google.golang.org/protobuf/types/dynamicpb] package can be used to // create Go type descriptors from protobuf descriptors. // // # Value Interfaces // -// The Enum and Message interfaces provide a reflective view over an +// The [Enum] and [Message] interfaces provide a reflective view over an // enum or message instance. For enums, it provides the ability to retrieve // the enum value number for any concrete enum type. For messages, it provides // the ability to access or manipulate fields of the message. // -// To convert a proto.Message to a protoreflect.Message, use the +// To convert a [google.golang.org/protobuf/proto.Message] to a [protoreflect.Message], use the // former's ProtoReflect method. Since the ProtoReflect method is new to the // v2 message interface, it may not be present on older message implementations. -// The "github.com/golang/protobuf/proto".MessageReflect function can be used +// The [github.com/golang/protobuf/proto.MessageReflect] function can be used // to obtain a reflective view on older messages. // // # Relationships @@ -71,12 +71,12 @@ // │ │ // └────────────────── Type() ───────┘ // -// • An EnumType describes a concrete Go enum type. +// • An [EnumType] describes a concrete Go enum type. // It has an EnumDescriptor and can construct an Enum instance. // -// • An EnumDescriptor describes an abstract protobuf enum type. +// • An [EnumDescriptor] describes an abstract protobuf enum type. // -// • An Enum is a concrete enum instance. Generated enums implement Enum. +// • An [Enum] is a concrete enum instance. Generated enums implement Enum. // // ┌──────────────── New() ─────────────────┐ // │ │ @@ -90,24 +90,26 @@ // │ │ // └─────────────────── Type() ─────────┘ // -// • A MessageType describes a concrete Go message type. -// It has a MessageDescriptor and can construct a Message instance. -// Just as how Go's reflect.Type is a reflective description of a Go type, -// a MessageType is a reflective description of a Go type for a protobuf message. +// • A [MessageType] describes a concrete Go message type. +// It has a [MessageDescriptor] and can construct a [Message] instance. +// Just as how Go's [reflect.Type] is a reflective description of a Go type, +// a [MessageType] is a reflective description of a Go type for a protobuf message. // -// • A MessageDescriptor describes an abstract protobuf message type. -// It has no understanding of Go types. In order to construct a MessageType -// from just a MessageDescriptor, you can consider looking up the message type -// in the global registry using protoregistry.GlobalTypes.FindMessageByName -// or constructing a dynamic MessageType using dynamicpb.NewMessageType. +// • A [MessageDescriptor] describes an abstract protobuf message type. +// It has no understanding of Go types. In order to construct a [MessageType] +// from just a [MessageDescriptor], you can consider looking up the message type +// in the global registry using the FindMessageByName method on +// [google.golang.org/protobuf/reflect/protoregistry.GlobalTypes] +// or constructing a dynamic [MessageType] using +// [google.golang.org/protobuf/types/dynamicpb.NewMessageType]. // -// • A Message is a reflective view over a concrete message instance. -// Generated messages implement ProtoMessage, which can convert to a Message. -// Just as how Go's reflect.Value is a reflective view over a Go value, -// a Message is a reflective view over a concrete protobuf message instance. -// Using Go reflection as an analogy, the ProtoReflect method is similar to -// calling reflect.ValueOf, and the Message.Interface method is similar to -// calling reflect.Value.Interface. +// • A [Message] is a reflective view over a concrete message instance. +// Generated messages implement [ProtoMessage], which can convert to a [Message]. +// Just as how Go's [reflect.Value] is a reflective view over a Go value, +// a [Message] is a reflective view over a concrete protobuf message instance. +// Using Go reflection as an analogy, the [ProtoMessage.ProtoReflect] method is similar to +// calling [reflect.ValueOf], and the [Message.Interface] method is similar to +// calling [reflect.Value.Interface]. // // ┌── TypeDescriptor() ──┐ ┌───── Descriptor() ─────┐ // │ V │ V @@ -119,15 +121,15 @@ // │ │ // └────── implements ────────┘ // -// • An ExtensionType describes a concrete Go implementation of an extension. -// It has an ExtensionTypeDescriptor and can convert to/from -// abstract Values and Go values. +// • An [ExtensionType] describes a concrete Go implementation of an extension. +// It has an [ExtensionTypeDescriptor] and can convert to/from +// an abstract [Value] and a Go value. // -// • An ExtensionTypeDescriptor is an ExtensionDescriptor -// which also has an ExtensionType. +// • An [ExtensionTypeDescriptor] is an [ExtensionDescriptor] +// which also has an [ExtensionType]. // -// • An ExtensionDescriptor describes an abstract protobuf extension field and -// may not always be an ExtensionTypeDescriptor. +// • An [ExtensionDescriptor] describes an abstract protobuf extension field and +// may not always be an [ExtensionTypeDescriptor]. package protoreflect import ( @@ -142,7 +144,7 @@ type doNotImplement pragma.DoNotImplement // ProtoMessage is the top-level interface that all proto messages implement. // This is declared in the protoreflect package to avoid a cyclic dependency; -// use the proto.Message type instead, which aliases this type. +// use the [google.golang.org/protobuf/proto.Message] type instead, which aliases this type. type ProtoMessage interface{ ProtoReflect() Message } // Syntax is the language version of the proto file. @@ -151,14 +153,15 @@ type Syntax syntax type syntax int8 // keep exact type opaque as the int type may change const ( - Proto2 Syntax = 2 - Proto3 Syntax = 3 + Proto2 Syntax = 2 + Proto3 Syntax = 3 + Editions Syntax = 4 ) // IsValid reports whether the syntax is valid. func (s Syntax) IsValid() bool { switch s { - case Proto2, Proto3: + case Proto2, Proto3, Editions: return true default: return false @@ -172,6 +175,8 @@ func (s Syntax) String() string { return "proto2" case Proto3: return "proto3" + case Editions: + return "editions" default: return fmt.Sprintf("", s) } @@ -436,7 +441,7 @@ type Names interface { // FullName is a qualified name that uniquely identifies a proto declaration. // A qualified name is the concatenation of the proto package along with the // fully-declared name (i.e., name of parent preceding the name of the child), -// with a '.' delimiter placed between each Name. +// with a '.' delimiter placed between each [Name]. // // This should not have any leading or trailing dots. type FullName string // e.g., "google.protobuf.Field.Kind" @@ -480,7 +485,7 @@ func isLetterDigit(c byte) bool { } // Name returns the short name, which is the last identifier segment. -// A single segment FullName is the Name itself. +// A single segment FullName is the [Name] itself. func (n FullName) Name() Name { if i := strings.LastIndexByte(string(n), '.'); i >= 0 { return Name(n[i+1:]) diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/source_gen.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/source_gen.go index 717b106f..ea154eec 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/source_gen.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/source_gen.go @@ -35,7 +35,7 @@ func (p *SourcePath) appendFileDescriptorProto(b []byte) []byte { b = p.appendSingularField(b, "source_code_info", (*SourcePath).appendSourceCodeInfo) case 12: b = p.appendSingularField(b, "syntax", nil) - case 13: + case 14: b = p.appendSingularField(b, "edition", nil) } return b @@ -160,8 +160,6 @@ func (p *SourcePath) appendFileOptions(b []byte) []byte { b = p.appendSingularField(b, "java_generic_services", nil) case 18: b = p.appendSingularField(b, "py_generic_services", nil) - case 42: - b = p.appendSingularField(b, "php_generic_services", nil) case 23: b = p.appendSingularField(b, "deprecated", nil) case 31: @@ -180,6 +178,8 @@ func (p *SourcePath) appendFileOptions(b []byte) []byte { b = p.appendSingularField(b, "php_metadata_namespace", nil) case 45: b = p.appendSingularField(b, "ruby_package", nil) + case 50: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) case 999: b = p.appendRepeatedField(b, "uninterpreted_option", (*SourcePath).appendUninterpretedOption) } @@ -240,6 +240,8 @@ func (p *SourcePath) appendMessageOptions(b []byte) []byte { b = p.appendSingularField(b, "map_entry", nil) case 11: b = p.appendSingularField(b, "deprecated_legacy_json_field_conflicts", nil) + case 12: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) case 999: b = p.appendRepeatedField(b, "uninterpreted_option", (*SourcePath).appendUninterpretedOption) } @@ -285,6 +287,8 @@ func (p *SourcePath) appendEnumOptions(b []byte) []byte { b = p.appendSingularField(b, "deprecated", nil) case 6: b = p.appendSingularField(b, "deprecated_legacy_json_field_conflicts", nil) + case 7: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) case 999: b = p.appendRepeatedField(b, "uninterpreted_option", (*SourcePath).appendUninterpretedOption) } @@ -330,6 +334,8 @@ func (p *SourcePath) appendServiceOptions(b []byte) []byte { return b } switch (*p)[0] { + case 34: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) case 33: b = p.appendSingularField(b, "deprecated", nil) case 999: @@ -361,16 +367,41 @@ func (p *SourcePath) appendFieldOptions(b []byte) []byte { b = p.appendSingularField(b, "debug_redact", nil) case 17: b = p.appendSingularField(b, "retention", nil) - case 18: - b = p.appendSingularField(b, "target", nil) case 19: b = p.appendRepeatedField(b, "targets", nil) + case 20: + b = p.appendRepeatedField(b, "edition_defaults", (*SourcePath).appendFieldOptions_EditionDefault) + case 21: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) + case 22: + b = p.appendSingularField(b, "feature_support", (*SourcePath).appendFieldOptions_FeatureSupport) case 999: b = p.appendRepeatedField(b, "uninterpreted_option", (*SourcePath).appendUninterpretedOption) } return b } +func (p *SourcePath) appendFeatureSet(b []byte) []byte { + if len(*p) == 0 { + return b + } + switch (*p)[0] { + case 1: + b = p.appendSingularField(b, "field_presence", nil) + case 2: + b = p.appendSingularField(b, "enum_type", nil) + case 3: + b = p.appendSingularField(b, "repeated_field_encoding", nil) + case 4: + b = p.appendSingularField(b, "utf8_validation", nil) + case 5: + b = p.appendSingularField(b, "message_encoding", nil) + case 6: + b = p.appendSingularField(b, "json_format", nil) + } + return b +} + func (p *SourcePath) appendUninterpretedOption(b []byte) []byte { if len(*p) == 0 { return b @@ -422,6 +453,8 @@ func (p *SourcePath) appendExtensionRangeOptions(b []byte) []byte { b = p.appendRepeatedField(b, "uninterpreted_option", (*SourcePath).appendUninterpretedOption) case 2: b = p.appendRepeatedField(b, "declaration", (*SourcePath).appendExtensionRangeOptions_Declaration) + case 50: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) case 3: b = p.appendSingularField(b, "verification", nil) } @@ -433,6 +466,8 @@ func (p *SourcePath) appendOneofOptions(b []byte) []byte { return b } switch (*p)[0] { + case 1: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) case 999: b = p.appendRepeatedField(b, "uninterpreted_option", (*SourcePath).appendUninterpretedOption) } @@ -446,6 +481,12 @@ func (p *SourcePath) appendEnumValueOptions(b []byte) []byte { switch (*p)[0] { case 1: b = p.appendSingularField(b, "deprecated", nil) + case 2: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) + case 3: + b = p.appendSingularField(b, "debug_redact", nil) + case 4: + b = p.appendSingularField(b, "feature_support", (*SourcePath).appendFieldOptions_FeatureSupport) case 999: b = p.appendRepeatedField(b, "uninterpreted_option", (*SourcePath).appendUninterpretedOption) } @@ -461,12 +502,44 @@ func (p *SourcePath) appendMethodOptions(b []byte) []byte { b = p.appendSingularField(b, "deprecated", nil) case 34: b = p.appendSingularField(b, "idempotency_level", nil) + case 35: + b = p.appendSingularField(b, "features", (*SourcePath).appendFeatureSet) case 999: b = p.appendRepeatedField(b, "uninterpreted_option", (*SourcePath).appendUninterpretedOption) } return b } +func (p *SourcePath) appendFieldOptions_EditionDefault(b []byte) []byte { + if len(*p) == 0 { + return b + } + switch (*p)[0] { + case 3: + b = p.appendSingularField(b, "edition", nil) + case 2: + b = p.appendSingularField(b, "value", nil) + } + return b +} + +func (p *SourcePath) appendFieldOptions_FeatureSupport(b []byte) []byte { + if len(*p) == 0 { + return b + } + switch (*p)[0] { + case 1: + b = p.appendSingularField(b, "edition_introduced", nil) + case 2: + b = p.appendSingularField(b, "edition_deprecated", nil) + case 3: + b = p.appendSingularField(b, "deprecation_warning", nil) + case 4: + b = p.appendSingularField(b, "edition_removed", nil) + } + return b +} + func (p *SourcePath) appendUninterpretedOption_NamePart(b []byte) []byte { if len(*p) == 0 { return b @@ -491,8 +564,6 @@ func (p *SourcePath) appendExtensionRangeOptions_Declaration(b []byte) []byte { b = p.appendSingularField(b, "full_name", nil) case 3: b = p.appendSingularField(b, "type", nil) - case 4: - b = p.appendSingularField(b, "is_repeated", nil) case 5: b = p.appendSingularField(b, "reserved", nil) case 6: diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/type.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/type.go index 3867470d..cd8fadba 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/type.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/type.go @@ -12,7 +12,7 @@ package protoreflect // exactly identical. However, it is possible for the same semantically // identical proto type to be represented by multiple type descriptors. // -// For example, suppose we have t1 and t2 which are both MessageDescriptors. +// For example, suppose we have t1 and t2 which are both an [MessageDescriptor]. // If t1 == t2, then the types are definitely equal and all accessors return // the same information. However, if t1 != t2, then it is still possible that // they still represent the same proto type (e.g., t1.FullName == t2.FullName). @@ -115,7 +115,7 @@ type Descriptor interface { // corresponds with the google.protobuf.FileDescriptorProto message. // // Top-level declarations: -// EnumDescriptor, MessageDescriptor, FieldDescriptor, and/or ServiceDescriptor. +// [EnumDescriptor], [MessageDescriptor], [FieldDescriptor], and/or [ServiceDescriptor]. type FileDescriptor interface { Descriptor // Descriptor.FullName is identical to Package @@ -180,8 +180,8 @@ type FileImport struct { // corresponds with the google.protobuf.DescriptorProto message. // // Nested declarations: -// FieldDescriptor, OneofDescriptor, FieldDescriptor, EnumDescriptor, -// and/or MessageDescriptor. +// [FieldDescriptor], [OneofDescriptor], [FieldDescriptor], [EnumDescriptor], +// and/or [MessageDescriptor]. type MessageDescriptor interface { Descriptor @@ -214,7 +214,7 @@ type MessageDescriptor interface { ExtensionRanges() FieldRanges // ExtensionRangeOptions returns the ith extension range options. // - // To avoid a dependency cycle, this method returns a proto.Message value, + // To avoid a dependency cycle, this method returns a proto.Message] value, // which always contains a google.protobuf.ExtensionRangeOptions message. // This method returns a typed nil-pointer if no options are present. // The caller must import the descriptorpb package to use this. @@ -231,9 +231,9 @@ type MessageDescriptor interface { } type isMessageDescriptor interface{ ProtoType(MessageDescriptor) } -// MessageType encapsulates a MessageDescriptor with a concrete Go implementation. +// MessageType encapsulates a [MessageDescriptor] with a concrete Go implementation. // It is recommended that implementations of this interface also implement the -// MessageFieldTypes interface. +// [MessageFieldTypes] interface. type MessageType interface { // New returns a newly allocated empty message. // It may return nil for synthetic messages representing a map entry. @@ -249,19 +249,19 @@ type MessageType interface { Descriptor() MessageDescriptor } -// MessageFieldTypes extends a MessageType by providing type information +// MessageFieldTypes extends a [MessageType] by providing type information // regarding enums and messages referenced by the message fields. type MessageFieldTypes interface { MessageType - // Enum returns the EnumType for the ith field in Descriptor.Fields. + // Enum returns the EnumType for the ith field in MessageDescriptor.Fields. // It returns nil if the ith field is not an enum kind. // It panics if out of bounds. // // Invariant: mt.Enum(i).Descriptor() == mt.Descriptor().Fields(i).Enum() Enum(i int) EnumType - // Message returns the MessageType for the ith field in Descriptor.Fields. + // Message returns the MessageType for the ith field in MessageDescriptor.Fields. // It returns nil if the ith field is not a message or group kind. // It panics if out of bounds. // @@ -286,8 +286,8 @@ type MessageDescriptors interface { // corresponds with the google.protobuf.FieldDescriptorProto message. // // It is used for both normal fields defined within the parent message -// (e.g., MessageDescriptor.Fields) and fields that extend some remote message -// (e.g., FileDescriptor.Extensions or MessageDescriptor.Extensions). +// (e.g., [MessageDescriptor.Fields]) and fields that extend some remote message +// (e.g., [FileDescriptor.Extensions] or [MessageDescriptor.Extensions]). type FieldDescriptor interface { Descriptor @@ -344,7 +344,7 @@ type FieldDescriptor interface { // IsMap reports whether this field represents a map, // where the value type for the associated field is a Map. // It is equivalent to checking whether Cardinality is Repeated, - // that the Kind is MessageKind, and that Message.IsMapEntry reports true. + // that the Kind is MessageKind, and that MessageDescriptor.IsMapEntry reports true. IsMap() bool // MapKey returns the field descriptor for the key in the map entry. @@ -419,7 +419,7 @@ type OneofDescriptor interface { // IsSynthetic reports whether this is a synthetic oneof created to support // proto3 optional semantics. If true, Fields contains exactly one field - // with HasOptionalKeyword specified. + // with FieldDescriptor.HasOptionalKeyword specified. IsSynthetic() bool // Fields is a list of fields belonging to this oneof. @@ -442,10 +442,10 @@ type OneofDescriptors interface { doNotImplement } -// ExtensionDescriptor is an alias of FieldDescriptor for documentation. +// ExtensionDescriptor is an alias of [FieldDescriptor] for documentation. type ExtensionDescriptor = FieldDescriptor -// ExtensionTypeDescriptor is an ExtensionDescriptor with an associated ExtensionType. +// ExtensionTypeDescriptor is an [ExtensionDescriptor] with an associated [ExtensionType]. type ExtensionTypeDescriptor interface { ExtensionDescriptor @@ -470,12 +470,12 @@ type ExtensionDescriptors interface { doNotImplement } -// ExtensionType encapsulates an ExtensionDescriptor with a concrete +// ExtensionType encapsulates an [ExtensionDescriptor] with a concrete // Go implementation. The nested field descriptor must be for a extension field. // // While a normal field is a member of the parent message that it is declared -// within (see Descriptor.Parent), an extension field is a member of some other -// target message (see ExtensionDescriptor.Extendee) and may have no +// within (see [Descriptor.Parent]), an extension field is a member of some other +// target message (see [FieldDescriptor.ContainingMessage]) and may have no // relationship with the parent. However, the full name of an extension field is // relative to the parent that it is declared within. // @@ -510,7 +510,7 @@ type ExtensionType interface { // // ValueOf is more extensive than protoreflect.ValueOf for a given field's // value as it has more type information available. - ValueOf(interface{}) Value + ValueOf(any) Value // InterfaceOf completely unwraps the Value to the underlying Go type. // InterfaceOf panics if the input is nil or does not represent the @@ -519,20 +519,20 @@ type ExtensionType interface { // // InterfaceOf is able to unwrap the Value further than Value.Interface // as it has more type information available. - InterfaceOf(Value) interface{} + InterfaceOf(Value) any // IsValidValue reports whether the Value is valid to assign to the field. IsValidValue(Value) bool // IsValidInterface reports whether the input is valid to assign to the field. - IsValidInterface(interface{}) bool + IsValidInterface(any) bool } // EnumDescriptor describes an enum and // corresponds with the google.protobuf.EnumDescriptorProto message. // // Nested declarations: -// EnumValueDescriptor. +// [EnumValueDescriptor]. type EnumDescriptor interface { Descriptor @@ -544,11 +544,17 @@ type EnumDescriptor interface { // ReservedRanges is a list of reserved ranges of enum numbers. ReservedRanges() EnumRanges + // IsClosed reports whether this enum uses closed semantics. + // See https://protobuf.dev/programming-guides/enum/#definitions. + // Note: the Go protobuf implementation is not spec compliant and treats + // all enums as open enums. + IsClosed() bool + isEnumDescriptor } type isEnumDescriptor interface{ ProtoType(EnumDescriptor) } -// EnumType encapsulates an EnumDescriptor with a concrete Go implementation. +// EnumType encapsulates an [EnumDescriptor] with a concrete Go implementation. type EnumType interface { // New returns an instance of this enum type with its value set to n. New(n EnumNumber) Enum @@ -610,7 +616,7 @@ type EnumValueDescriptors interface { // ServiceDescriptor describes a service and // corresponds with the google.protobuf.ServiceDescriptorProto message. // -// Nested declarations: MethodDescriptor. +// Nested declarations: [MethodDescriptor]. type ServiceDescriptor interface { Descriptor diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value.go index 37601b78..a7b0d06f 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value.go @@ -27,16 +27,16 @@ type Enum interface { // Message is a reflective interface for a concrete message value, // encapsulating both type and value information for the message. // -// Accessor/mutators for individual fields are keyed by FieldDescriptor. +// Accessor/mutators for individual fields are keyed by [FieldDescriptor]. // For non-extension fields, the descriptor must exactly match the // field known by the parent message. -// For extension fields, the descriptor must implement ExtensionTypeDescriptor, -// extend the parent message (i.e., have the same message FullName), and +// For extension fields, the descriptor must implement [ExtensionTypeDescriptor], +// extend the parent message (i.e., have the same message [FullName]), and // be within the parent's extension range. // -// Each field Value can be a scalar or a composite type (Message, List, or Map). -// See Value for the Go types associated with a FieldDescriptor. -// Providing a Value that is invalid or of an incorrect type panics. +// Each field [Value] can be a scalar or a composite type ([Message], [List], or [Map]). +// See [Value] for the Go types associated with a [FieldDescriptor]. +// Providing a [Value] that is invalid or of an incorrect type panics. type Message interface { // Descriptor returns message descriptor, which contains only the protobuf // type information for the message. @@ -152,7 +152,7 @@ type Message interface { // This method may return nil. // // The returned methods type is identical to - // "google.golang.org/protobuf/runtime/protoiface".Methods. + // google.golang.org/protobuf/runtime/protoiface.Methods. // Consult the protoiface package documentation for details. ProtoMethods() *methods } @@ -175,8 +175,8 @@ func (b RawFields) IsValid() bool { } // List is a zero-indexed, ordered list. -// The element Value type is determined by FieldDescriptor.Kind. -// Providing a Value that is invalid or of an incorrect type panics. +// The element [Value] type is determined by [FieldDescriptor.Kind]. +// Providing a [Value] that is invalid or of an incorrect type panics. type List interface { // Len reports the number of entries in the List. // Get, Set, and Truncate panic with out of bound indexes. @@ -226,9 +226,9 @@ type List interface { } // Map is an unordered, associative map. -// The entry MapKey type is determined by FieldDescriptor.MapKey.Kind. -// The entry Value type is determined by FieldDescriptor.MapValue.Kind. -// Providing a MapKey or Value that is invalid or of an incorrect type panics. +// The entry [MapKey] type is determined by [FieldDescriptor.MapKey].Kind. +// The entry [Value] type is determined by [FieldDescriptor.MapValue].Kind. +// Providing a [MapKey] or [Value] that is invalid or of an incorrect type panics. type Map interface { // Len reports the number of elements in the map. Len() int diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_equal.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_equal.go index 59165254..654599d4 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_equal.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_equal.go @@ -24,19 +24,19 @@ import ( // Unlike the == operator, a NaN is equal to another NaN. // // - Enums are equal if they contain the same number. -// Since Value does not contain an enum descriptor, +// Since [Value] does not contain an enum descriptor, // enum values do not consider the type of the enum. // // - Other scalar values are equal if they contain the same value. // -// - Message values are equal if they belong to the same message descriptor, +// - [Message] values are equal if they belong to the same message descriptor, // have the same set of populated known and extension field values, // and the same set of unknown fields values. // -// - Lists are equal if they are the same length and +// - [List] values are equal if they are the same length and // each corresponding element is equal. // -// - Maps are equal if they have the same set of keys and +// - [Map] values are equal if they have the same set of keys and // the corresponding value for each key is equal. func (v1 Value) Equal(v2 Value) bool { return equalValue(v1, v2) diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go deleted file mode 100644 index 7ced876f..00000000 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_pure.go +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build purego || appengine -// +build purego appengine - -package protoreflect - -import "google.golang.org/protobuf/internal/pragma" - -type valueType int - -const ( - nilType valueType = iota - boolType - int32Type - int64Type - uint32Type - uint64Type - float32Type - float64Type - stringType - bytesType - enumType - ifaceType -) - -// value is a union where only one type can be represented at a time. -// This uses a distinct field for each type. This is type safe in Go, but -// occupies more memory than necessary (72B). -type value struct { - pragma.DoNotCompare // 0B - - typ valueType // 8B - num uint64 // 8B - str string // 16B - bin []byte // 24B - iface interface{} // 16B -} - -func valueOfString(v string) Value { - return Value{typ: stringType, str: v} -} -func valueOfBytes(v []byte) Value { - return Value{typ: bytesType, bin: v} -} -func valueOfIface(v interface{}) Value { - return Value{typ: ifaceType, iface: v} -} - -func (v Value) getString() string { - return v.str -} -func (v Value) getBytes() []byte { - return v.bin -} -func (v Value) getIface() interface{} { - return v.iface -} diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_union.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_union.go index 08e5ef73..9fe83cef 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_union.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_union.go @@ -11,7 +11,7 @@ import ( // Value is a union where only one Go type may be set at a time. // The Value is used to represent all possible values a field may take. -// The following shows which Go type is used to represent each proto Kind: +// The following shows which Go type is used to represent each proto [Kind]: // // ╔════════════╤═════════════════════════════════════╗ // ║ Go type │ Protobuf kind ║ @@ -31,22 +31,22 @@ import ( // // Multiple protobuf Kinds may be represented by a single Go type if the type // can losslessly represent the information for the proto kind. For example, -// Int64Kind, Sint64Kind, and Sfixed64Kind are all represented by int64, +// [Int64Kind], [Sint64Kind], and [Sfixed64Kind] are all represented by int64, // but use different integer encoding methods. // -// The List or Map types are used if the field cardinality is repeated. -// A field is a List if FieldDescriptor.IsList reports true. -// A field is a Map if FieldDescriptor.IsMap reports true. +// The [List] or [Map] types are used if the field cardinality is repeated. +// A field is a [List] if [FieldDescriptor.IsList] reports true. +// A field is a [Map] if [FieldDescriptor.IsMap] reports true. // // Converting to/from a Value and a concrete Go value panics on type mismatch. -// For example, ValueOf("hello").Int() panics because this attempts to +// For example, [ValueOf]("hello").Int() panics because this attempts to // retrieve an int64 from a string. // -// List, Map, and Message Values are called "composite" values. +// [List], [Map], and [Message] Values are called "composite" values. // // A composite Value may alias (reference) memory at some location, // such that changes to the Value updates the that location. -// A composite value acquired with a Mutable method, such as Message.Mutable, +// A composite value acquired with a Mutable method, such as [Message.Mutable], // always references the source object. // // For example: @@ -65,12 +65,12 @@ import ( // // appending to the List here may or may not modify the message. // list.Append(protoreflect.ValueOfInt32(0)) // -// Some operations, such as Message.Get, may return an "empty, read-only" +// Some operations, such as [Message.Get], may return an "empty, read-only" // composite Value. Modifying an empty, read-only value panics. type Value value -// The protoreflect API uses a custom Value union type instead of interface{} -// to keep the future open for performance optimizations. Using an interface{} +// The protoreflect API uses a custom Value union type instead of any +// to keep the future open for performance optimizations. Using an any // always incurs an allocation for primitives (e.g., int64) since it needs to // be boxed on the heap (as interfaces can only contain pointers natively). // Instead, we represent the Value union as a flat struct that internally keeps @@ -85,7 +85,7 @@ type Value value // ValueOf returns a Value initialized with the concrete value stored in v. // This panics if the type does not match one of the allowed types in the // Value union. -func ValueOf(v interface{}) Value { +func ValueOf(v any) Value { switch v := v.(type) { case nil: return Value{} @@ -192,10 +192,10 @@ func (v Value) IsValid() bool { return v.typ != nilType } -// Interface returns v as an interface{}. +// Interface returns v as an any. // // Invariant: v == ValueOf(v).Interface() -func (v Value) Interface() interface{} { +func (v Value) Interface() any { switch v.typ { case nilType: return nil @@ -306,7 +306,7 @@ func (v Value) Float() float64 { } } -// String returns v as a string. Since this method implements fmt.Stringer, +// String returns v as a string. Since this method implements [fmt.Stringer], // this returns the formatted string value for any non-string type. func (v Value) String() string { switch v.typ { @@ -327,7 +327,7 @@ func (v Value) Bytes() []byte { } } -// Enum returns v as a EnumNumber and panics if the type is not a EnumNumber. +// Enum returns v as a [EnumNumber] and panics if the type is not a [EnumNumber]. func (v Value) Enum() EnumNumber { switch v.typ { case enumType: @@ -337,7 +337,7 @@ func (v Value) Enum() EnumNumber { } } -// Message returns v as a Message and panics if the type is not a Message. +// Message returns v as a [Message] and panics if the type is not a [Message]. func (v Value) Message() Message { switch vi := v.getIface().(type) { case Message: @@ -347,7 +347,7 @@ func (v Value) Message() Message { } } -// List returns v as a List and panics if the type is not a List. +// List returns v as a [List] and panics if the type is not a [List]. func (v Value) List() List { switch vi := v.getIface().(type) { case List: @@ -357,7 +357,7 @@ func (v Value) List() List { } } -// Map returns v as a Map and panics if the type is not a Map. +// Map returns v as a [Map] and panics if the type is not a [Map]. func (v Value) Map() Map { switch vi := v.getIface().(type) { case Map: @@ -367,7 +367,7 @@ func (v Value) Map() Map { } } -// MapKey returns v as a MapKey and panics for invalid MapKey types. +// MapKey returns v as a [MapKey] and panics for invalid [MapKey] types. func (v Value) MapKey() MapKey { switch v.typ { case boolType, int32Type, int64Type, uint32Type, uint64Type, stringType: @@ -378,8 +378,8 @@ func (v Value) MapKey() MapKey { } // MapKey is used to index maps, where the Go type of the MapKey must match -// the specified key Kind (see MessageDescriptor.IsMapEntry). -// The following shows what Go type is used to represent each proto Kind: +// the specified key [Kind] (see [MessageDescriptor.IsMapEntry]). +// The following shows what Go type is used to represent each proto [Kind]: // // ╔═════════╤═════════════════════════════════════╗ // ║ Go type │ Protobuf kind ║ @@ -392,13 +392,13 @@ func (v Value) MapKey() MapKey { // ║ string │ StringKind ║ // ╚═════════╧═════════════════════════════════════╝ // -// A MapKey is constructed and accessed through a Value: +// A MapKey is constructed and accessed through a [Value]: // // k := ValueOf("hash").MapKey() // convert string to MapKey // s := k.String() // convert MapKey to string // -// The MapKey is a strict subset of valid types used in Value; -// converting a Value to a MapKey with an invalid type panics. +// The MapKey is a strict subset of valid types used in [Value]; +// converting a [Value] to a MapKey with an invalid type panics. type MapKey value // IsValid reports whether k is populated with a value. @@ -406,8 +406,8 @@ func (k MapKey) IsValid() bool { return Value(k).IsValid() } -// Interface returns k as an interface{}. -func (k MapKey) Interface() interface{} { +// Interface returns k as an any. +func (k MapKey) Interface() any { return Value(k).Interface() } @@ -426,13 +426,13 @@ func (k MapKey) Uint() uint64 { return Value(k).Uint() } -// String returns k as a string. Since this method implements fmt.Stringer, +// String returns k as a string. Since this method implements [fmt.Stringer], // this returns the formatted string value for any non-string type. func (k MapKey) String() string { return Value(k).String() } -// Value returns k as a Value. +// Value returns k as a [Value]. func (k MapKey) Value() Value { return Value(k) } diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go similarity index 93% rename from vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe.go rename to vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go index 702ddf22..0015fcb3 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe.go +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go120.go @@ -2,8 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build !purego && !appengine -// +build !purego,!appengine +//go:build !go1.21 package protoreflect @@ -45,7 +44,7 @@ var ( // typeOf returns a pointer to the Go type information. // The pointer is comparable and equal if and only if the types are identical. -func typeOf(t interface{}) unsafe.Pointer { +func typeOf(t any) unsafe.Pointer { return (*ifaceHeader)(unsafe.Pointer(&t)).Type } @@ -80,7 +79,7 @@ func valueOfBytes(v []byte) Value { p := (*sliceHeader)(unsafe.Pointer(&v)) return Value{typ: bytesType, ptr: p.Data, num: uint64(len(v))} } -func valueOfIface(v interface{}) Value { +func valueOfIface(v any) Value { p := (*ifaceHeader)(unsafe.Pointer(&v)) return Value{typ: p.Type, ptr: p.Data} } @@ -93,7 +92,7 @@ func (v Value) getBytes() (x []byte) { *(*sliceHeader)(unsafe.Pointer(&x)) = sliceHeader{Data: v.ptr, Len: int(v.num), Cap: int(v.num)} return x } -func (v Value) getIface() (x interface{}) { +func (v Value) getIface() (x any) { *(*ifaceHeader)(unsafe.Pointer(&x)) = ifaceHeader{Type: v.typ, Data: v.ptr} return x } diff --git a/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go new file mode 100644 index 00000000..479527b5 --- /dev/null +++ b/vendor/google.golang.org/protobuf/reflect/protoreflect/value_unsafe_go121.go @@ -0,0 +1,86 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.21 + +package protoreflect + +import ( + "unsafe" + + "google.golang.org/protobuf/internal/pragma" +) + +type ( + ifaceHeader struct { + _ [0]any // if interfaces have greater alignment than unsafe.Pointer, this will enforce it. + Type unsafe.Pointer + Data unsafe.Pointer + } +) + +var ( + nilType = typeOf(nil) + boolType = typeOf(*new(bool)) + int32Type = typeOf(*new(int32)) + int64Type = typeOf(*new(int64)) + uint32Type = typeOf(*new(uint32)) + uint64Type = typeOf(*new(uint64)) + float32Type = typeOf(*new(float32)) + float64Type = typeOf(*new(float64)) + stringType = typeOf(*new(string)) + bytesType = typeOf(*new([]byte)) + enumType = typeOf(*new(EnumNumber)) +) + +// typeOf returns a pointer to the Go type information. +// The pointer is comparable and equal if and only if the types are identical. +func typeOf(t any) unsafe.Pointer { + return (*ifaceHeader)(unsafe.Pointer(&t)).Type +} + +// value is a union where only one type can be represented at a time. +// The struct is 24B large on 64-bit systems and requires the minimum storage +// necessary to represent each possible type. +// +// The Go GC needs to be able to scan variables containing pointers. +// As such, pointers and non-pointers cannot be intermixed. +type value struct { + pragma.DoNotCompare // 0B + + // typ stores the type of the value as a pointer to the Go type. + typ unsafe.Pointer // 8B + + // ptr stores the data pointer for a String, Bytes, or interface value. + ptr unsafe.Pointer // 8B + + // num stores a Bool, Int32, Int64, Uint32, Uint64, Float32, Float64, or + // Enum value as a raw uint64. + // + // It is also used to store the length of a String or Bytes value; + // the capacity is ignored. + num uint64 // 8B +} + +func valueOfString(v string) Value { + return Value{typ: stringType, ptr: unsafe.Pointer(unsafe.StringData(v)), num: uint64(len(v))} +} +func valueOfBytes(v []byte) Value { + return Value{typ: bytesType, ptr: unsafe.Pointer(unsafe.SliceData(v)), num: uint64(len(v))} +} +func valueOfIface(v any) Value { + p := (*ifaceHeader)(unsafe.Pointer(&v)) + return Value{typ: p.Type, ptr: p.Data} +} + +func (v Value) getString() string { + return unsafe.String((*byte)(v.ptr), v.num) +} +func (v Value) getBytes() []byte { + return unsafe.Slice((*byte)(v.ptr), v.num) +} +func (v Value) getIface() (x any) { + *(*ifaceHeader)(unsafe.Pointer(&x)) = ifaceHeader{Type: v.typ, Data: v.ptr} + return x +} diff --git a/vendor/google.golang.org/protobuf/reflect/protoregistry/registry.go b/vendor/google.golang.org/protobuf/reflect/protoregistry/registry.go index aeb55977..de177733 100644 --- a/vendor/google.golang.org/protobuf/reflect/protoregistry/registry.go +++ b/vendor/google.golang.org/protobuf/reflect/protoregistry/registry.go @@ -5,12 +5,12 @@ // Package protoregistry provides data structures to register and lookup // protobuf descriptor types. // -// The Files registry contains file descriptors and provides the ability +// The [Files] registry contains file descriptors and provides the ability // to iterate over the files or lookup a specific descriptor within the files. -// Files only contains protobuf descriptors and has no understanding of Go +// [Files] only contains protobuf descriptors and has no understanding of Go // type information that may be associated with each descriptor. // -// The Types registry contains descriptor types for which there is a known +// The [Types] registry contains descriptor types for which there is a known // Go type associated with that descriptor. It provides the ability to iterate // over the registered types or lookup a type by name. package protoregistry @@ -95,7 +95,7 @@ type Files struct { // multiple files. Only top-level declarations are registered. // Note that enum values are in the top-level since that are in the same // scope as the parent enum. - descsByName map[protoreflect.FullName]interface{} + descsByName map[protoreflect.FullName]any filesByPath map[string][]protoreflect.FileDescriptor numFiles int } @@ -117,7 +117,7 @@ func (r *Files) RegisterFile(file protoreflect.FileDescriptor) error { defer globalMutex.Unlock() } if r.descsByName == nil { - r.descsByName = map[protoreflect.FullName]interface{}{ + r.descsByName = map[protoreflect.FullName]any{ "": &packageDescriptor{}, } r.filesByPath = make(map[string][]protoreflect.FileDescriptor) @@ -218,7 +218,7 @@ func (r *Files) checkGenProtoConflict(path string) { // FindDescriptorByName looks up a descriptor by the full name. // -// This returns (nil, NotFound) if not found. +// This returns (nil, [NotFound]) if not found. func (r *Files) FindDescriptorByName(name protoreflect.FullName) (protoreflect.Descriptor, error) { if r == nil { return nil, NotFound @@ -310,7 +310,7 @@ func (s *nameSuffix) Pop() (name protoreflect.Name) { // FindFileByPath looks up a file by the path. // -// This returns (nil, NotFound) if not found. +// This returns (nil, [NotFound]) if not found. // This returns an error if multiple files have the same path. func (r *Files) FindFileByPath(path string) (protoreflect.FileDescriptor, error) { if r == nil { @@ -431,7 +431,7 @@ func rangeTopLevelDescriptors(fd protoreflect.FileDescriptor, f func(protoreflec // A compliant implementation must deterministically return the same type // if no error is encountered. // -// The Types type implements this interface. +// The [Types] type implements this interface. type MessageTypeResolver interface { // FindMessageByName looks up a message by its full name. // E.g., "google.protobuf.Any" @@ -451,7 +451,7 @@ type MessageTypeResolver interface { // A compliant implementation must deterministically return the same type // if no error is encountered. // -// The Types type implements this interface. +// The [Types] type implements this interface. type ExtensionTypeResolver interface { // FindExtensionByName looks up a extension field by the field's full name. // Note that this is the full name of the field as determined by @@ -485,7 +485,7 @@ type Types struct { } type ( - typesByName map[protoreflect.FullName]interface{} + typesByName map[protoreflect.FullName]any extensionsByMessage map[protoreflect.FullName]extensionsByNumber extensionsByNumber map[protoreflect.FieldNumber]protoreflect.ExtensionType ) @@ -570,7 +570,7 @@ func (r *Types) RegisterExtension(xt protoreflect.ExtensionType) error { return nil } -func (r *Types) register(kind string, desc protoreflect.Descriptor, typ interface{}) error { +func (r *Types) register(kind string, desc protoreflect.Descriptor, typ any) error { name := desc.FullName() prev := r.typesByName[name] if prev != nil { @@ -590,7 +590,7 @@ func (r *Types) register(kind string, desc protoreflect.Descriptor, typ interfac // FindEnumByName looks up an enum by its full name. // E.g., "google.protobuf.Field.Kind". // -// This returns (nil, NotFound) if not found. +// This returns (nil, [NotFound]) if not found. func (r *Types) FindEnumByName(enum protoreflect.FullName) (protoreflect.EnumType, error) { if r == nil { return nil, NotFound @@ -611,7 +611,7 @@ func (r *Types) FindEnumByName(enum protoreflect.FullName) (protoreflect.EnumTyp // FindMessageByName looks up a message by its full name, // e.g. "google.protobuf.Any". // -// This returns (nil, NotFound) if not found. +// This returns (nil, [NotFound]) if not found. func (r *Types) FindMessageByName(message protoreflect.FullName) (protoreflect.MessageType, error) { if r == nil { return nil, NotFound @@ -632,7 +632,7 @@ func (r *Types) FindMessageByName(message protoreflect.FullName) (protoreflect.M // FindMessageByURL looks up a message by a URL identifier. // See documentation on google.protobuf.Any.type_url for the URL format. // -// This returns (nil, NotFound) if not found. +// This returns (nil, [NotFound]) if not found. func (r *Types) FindMessageByURL(url string) (protoreflect.MessageType, error) { // This function is similar to FindMessageByName but // truncates anything before and including '/' in the URL. @@ -662,7 +662,7 @@ func (r *Types) FindMessageByURL(url string) (protoreflect.MessageType, error) { // where the extension is declared and is unrelated to the full name of the // message being extended. // -// This returns (nil, NotFound) if not found. +// This returns (nil, [NotFound]) if not found. func (r *Types) FindExtensionByName(field protoreflect.FullName) (protoreflect.ExtensionType, error) { if r == nil { return nil, NotFound @@ -703,7 +703,7 @@ func (r *Types) FindExtensionByName(field protoreflect.FullName) (protoreflect.E // FindExtensionByNumber looks up a extension field by the field number // within some parent message, identified by full name. // -// This returns (nil, NotFound) if not found. +// This returns (nil, [NotFound]) if not found. func (r *Types) FindExtensionByNumber(message protoreflect.FullName, field protoreflect.FieldNumber) (protoreflect.ExtensionType, error) { if r == nil { return nil, NotFound @@ -841,7 +841,7 @@ func (r *Types) RangeExtensionsByMessage(message protoreflect.FullName, f func(p } } -func typeName(t interface{}) string { +func typeName(t any) string { switch t.(type) { case protoreflect.EnumType: return "enum" @@ -854,7 +854,7 @@ func typeName(t interface{}) string { } } -func amendErrorWithCaller(err error, prev, curr interface{}) error { +func amendErrorWithCaller(err error, prev, curr any) error { prevPkg := goPackage(prev) currPkg := goPackage(curr) if prevPkg == "" || currPkg == "" || prevPkg == currPkg { @@ -863,7 +863,7 @@ func amendErrorWithCaller(err error, prev, curr interface{}) error { return errors.New("%s\n\tpreviously from: %q\n\tcurrently from: %q", err, prevPkg, currPkg) } -func goPackage(v interface{}) string { +func goPackage(v any) string { switch d := v.(type) { case protoreflect.EnumType: v = d.Descriptor() diff --git a/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go b/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go index 44cf467d..24615656 100644 --- a/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go +++ b/vendor/google.golang.org/protobuf/runtime/protoiface/methods.go @@ -39,6 +39,9 @@ type Methods = struct { // CheckInitialized returns an error if any required fields in the message are not set. CheckInitialized func(CheckInitializedInput) (CheckInitializedOutput, error) + + // Equal compares two messages and returns EqualOutput.Equal == true if they are equal. + Equal func(EqualInput) EqualOutput } // SupportFlags indicate support for optional features. @@ -166,3 +169,18 @@ type CheckInitializedInput = struct { type CheckInitializedOutput = struct { pragma.NoUnkeyedLiterals } + +// EqualInput is input to the Equal method. +type EqualInput = struct { + pragma.NoUnkeyedLiterals + + MessageA protoreflect.Message + MessageB protoreflect.Message +} + +// EqualOutput is output from the Equal method. +type EqualOutput = struct { + pragma.NoUnkeyedLiterals + + Equal bool +} diff --git a/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go b/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go index 04c00f73..6dea75cd 100644 --- a/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go +++ b/vendor/google.golang.org/protobuf/types/descriptorpb/descriptor.pb.go @@ -48,6 +48,108 @@ import ( sync "sync" ) +// The full set of known editions. +type Edition int32 + +const ( + // A placeholder for an unknown edition value. + Edition_EDITION_UNKNOWN Edition = 0 + // A placeholder edition for specifying default behaviors *before* a feature + // was first introduced. This is effectively an "infinite past". + Edition_EDITION_LEGACY Edition = 900 + // Legacy syntax "editions". These pre-date editions, but behave much like + // distinct editions. These can't be used to specify the edition of proto + // files, but feature definitions must supply proto2/proto3 defaults for + // backwards compatibility. + Edition_EDITION_PROTO2 Edition = 998 + Edition_EDITION_PROTO3 Edition = 999 + // Editions that have been released. The specific values are arbitrary and + // should not be depended on, but they will always be time-ordered for easy + // comparison. + Edition_EDITION_2023 Edition = 1000 + Edition_EDITION_2024 Edition = 1001 + // Placeholder editions for testing feature resolution. These should not be + // used or relyed on outside of tests. + Edition_EDITION_1_TEST_ONLY Edition = 1 + Edition_EDITION_2_TEST_ONLY Edition = 2 + Edition_EDITION_99997_TEST_ONLY Edition = 99997 + Edition_EDITION_99998_TEST_ONLY Edition = 99998 + Edition_EDITION_99999_TEST_ONLY Edition = 99999 + // Placeholder for specifying unbounded edition support. This should only + // ever be used by plugins that can expect to never require any changes to + // support a new edition. + Edition_EDITION_MAX Edition = 2147483647 +) + +// Enum value maps for Edition. +var ( + Edition_name = map[int32]string{ + 0: "EDITION_UNKNOWN", + 900: "EDITION_LEGACY", + 998: "EDITION_PROTO2", + 999: "EDITION_PROTO3", + 1000: "EDITION_2023", + 1001: "EDITION_2024", + 1: "EDITION_1_TEST_ONLY", + 2: "EDITION_2_TEST_ONLY", + 99997: "EDITION_99997_TEST_ONLY", + 99998: "EDITION_99998_TEST_ONLY", + 99999: "EDITION_99999_TEST_ONLY", + 2147483647: "EDITION_MAX", + } + Edition_value = map[string]int32{ + "EDITION_UNKNOWN": 0, + "EDITION_LEGACY": 900, + "EDITION_PROTO2": 998, + "EDITION_PROTO3": 999, + "EDITION_2023": 1000, + "EDITION_2024": 1001, + "EDITION_1_TEST_ONLY": 1, + "EDITION_2_TEST_ONLY": 2, + "EDITION_99997_TEST_ONLY": 99997, + "EDITION_99998_TEST_ONLY": 99998, + "EDITION_99999_TEST_ONLY": 99999, + "EDITION_MAX": 2147483647, + } +) + +func (x Edition) Enum() *Edition { + p := new(Edition) + *p = x + return p +} + +func (x Edition) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (Edition) Descriptor() protoreflect.EnumDescriptor { + return file_google_protobuf_descriptor_proto_enumTypes[0].Descriptor() +} + +func (Edition) Type() protoreflect.EnumType { + return &file_google_protobuf_descriptor_proto_enumTypes[0] +} + +func (x Edition) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *Edition) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = Edition(num) + return nil +} + +// Deprecated: Use Edition.Descriptor instead. +func (Edition) EnumDescriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{0} +} + // The verification state of the extension range. type ExtensionRangeOptions_VerificationState int32 @@ -80,11 +182,11 @@ func (x ExtensionRangeOptions_VerificationState) String() string { } func (ExtensionRangeOptions_VerificationState) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[0].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[1].Descriptor() } func (ExtensionRangeOptions_VerificationState) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[0] + return &file_google_protobuf_descriptor_proto_enumTypes[1] } func (x ExtensionRangeOptions_VerificationState) Number() protoreflect.EnumNumber { @@ -125,9 +227,10 @@ const ( FieldDescriptorProto_TYPE_BOOL FieldDescriptorProto_Type = 8 FieldDescriptorProto_TYPE_STRING FieldDescriptorProto_Type = 9 // Tag-delimited aggregate. - // Group type is deprecated and not supported in proto3. However, Proto3 + // Group type is deprecated and not supported after google.protobuf. However, Proto3 // implementations should still be able to parse the group wire format and - // treat group fields as unknown fields. + // treat group fields as unknown fields. In Editions, the group wire format + // can be enabled via the `message_encoding` feature. FieldDescriptorProto_TYPE_GROUP FieldDescriptorProto_Type = 10 FieldDescriptorProto_TYPE_MESSAGE FieldDescriptorProto_Type = 11 // Length-delimited aggregate. // New in version 2. @@ -195,11 +298,11 @@ func (x FieldDescriptorProto_Type) String() string { } func (FieldDescriptorProto_Type) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[1].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[2].Descriptor() } func (FieldDescriptorProto_Type) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[1] + return &file_google_protobuf_descriptor_proto_enumTypes[2] } func (x FieldDescriptorProto_Type) Number() protoreflect.EnumNumber { @@ -226,21 +329,24 @@ type FieldDescriptorProto_Label int32 const ( // 0 is reserved for errors FieldDescriptorProto_LABEL_OPTIONAL FieldDescriptorProto_Label = 1 - FieldDescriptorProto_LABEL_REQUIRED FieldDescriptorProto_Label = 2 FieldDescriptorProto_LABEL_REPEATED FieldDescriptorProto_Label = 3 + // The required label is only allowed in google.protobuf. In proto3 and Editions + // it's explicitly prohibited. In Editions, the `field_presence` feature + // can be used to get this behavior. + FieldDescriptorProto_LABEL_REQUIRED FieldDescriptorProto_Label = 2 ) // Enum value maps for FieldDescriptorProto_Label. var ( FieldDescriptorProto_Label_name = map[int32]string{ 1: "LABEL_OPTIONAL", - 2: "LABEL_REQUIRED", 3: "LABEL_REPEATED", + 2: "LABEL_REQUIRED", } FieldDescriptorProto_Label_value = map[string]int32{ "LABEL_OPTIONAL": 1, - "LABEL_REQUIRED": 2, "LABEL_REPEATED": 3, + "LABEL_REQUIRED": 2, } ) @@ -255,11 +361,11 @@ func (x FieldDescriptorProto_Label) String() string { } func (FieldDescriptorProto_Label) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[2].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[3].Descriptor() } func (FieldDescriptorProto_Label) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[2] + return &file_google_protobuf_descriptor_proto_enumTypes[3] } func (x FieldDescriptorProto_Label) Number() protoreflect.EnumNumber { @@ -316,11 +422,11 @@ func (x FileOptions_OptimizeMode) String() string { } func (FileOptions_OptimizeMode) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[3].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[4].Descriptor() } func (FileOptions_OptimizeMode) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[3] + return &file_google_protobuf_descriptor_proto_enumTypes[4] } func (x FileOptions_OptimizeMode) Number() protoreflect.EnumNumber { @@ -382,11 +488,11 @@ func (x FieldOptions_CType) String() string { } func (FieldOptions_CType) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[4].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[5].Descriptor() } func (FieldOptions_CType) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[4] + return &file_google_protobuf_descriptor_proto_enumTypes[5] } func (x FieldOptions_CType) Number() protoreflect.EnumNumber { @@ -444,11 +550,11 @@ func (x FieldOptions_JSType) String() string { } func (FieldOptions_JSType) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[5].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[6].Descriptor() } func (FieldOptions_JSType) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[5] + return &file_google_protobuf_descriptor_proto_enumTypes[6] } func (x FieldOptions_JSType) Number() protoreflect.EnumNumber { @@ -506,11 +612,11 @@ func (x FieldOptions_OptionRetention) String() string { } func (FieldOptions_OptionRetention) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[6].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[7].Descriptor() } func (FieldOptions_OptionRetention) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[6] + return &file_google_protobuf_descriptor_proto_enumTypes[7] } func (x FieldOptions_OptionRetention) Number() protoreflect.EnumNumber { @@ -590,11 +696,11 @@ func (x FieldOptions_OptionTargetType) String() string { } func (FieldOptions_OptionTargetType) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[7].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[8].Descriptor() } func (FieldOptions_OptionTargetType) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[7] + return &file_google_protobuf_descriptor_proto_enumTypes[8] } func (x FieldOptions_OptionTargetType) Number() protoreflect.EnumNumber { @@ -651,31 +757,388 @@ func (x MethodOptions_IdempotencyLevel) String() string { return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) } -func (MethodOptions_IdempotencyLevel) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[8].Descriptor() +func (MethodOptions_IdempotencyLevel) Descriptor() protoreflect.EnumDescriptor { + return file_google_protobuf_descriptor_proto_enumTypes[9].Descriptor() +} + +func (MethodOptions_IdempotencyLevel) Type() protoreflect.EnumType { + return &file_google_protobuf_descriptor_proto_enumTypes[9] +} + +func (x MethodOptions_IdempotencyLevel) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *MethodOptions_IdempotencyLevel) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = MethodOptions_IdempotencyLevel(num) + return nil +} + +// Deprecated: Use MethodOptions_IdempotencyLevel.Descriptor instead. +func (MethodOptions_IdempotencyLevel) EnumDescriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{17, 0} +} + +type FeatureSet_FieldPresence int32 + +const ( + FeatureSet_FIELD_PRESENCE_UNKNOWN FeatureSet_FieldPresence = 0 + FeatureSet_EXPLICIT FeatureSet_FieldPresence = 1 + FeatureSet_IMPLICIT FeatureSet_FieldPresence = 2 + FeatureSet_LEGACY_REQUIRED FeatureSet_FieldPresence = 3 +) + +// Enum value maps for FeatureSet_FieldPresence. +var ( + FeatureSet_FieldPresence_name = map[int32]string{ + 0: "FIELD_PRESENCE_UNKNOWN", + 1: "EXPLICIT", + 2: "IMPLICIT", + 3: "LEGACY_REQUIRED", + } + FeatureSet_FieldPresence_value = map[string]int32{ + "FIELD_PRESENCE_UNKNOWN": 0, + "EXPLICIT": 1, + "IMPLICIT": 2, + "LEGACY_REQUIRED": 3, + } +) + +func (x FeatureSet_FieldPresence) Enum() *FeatureSet_FieldPresence { + p := new(FeatureSet_FieldPresence) + *p = x + return p +} + +func (x FeatureSet_FieldPresence) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (FeatureSet_FieldPresence) Descriptor() protoreflect.EnumDescriptor { + return file_google_protobuf_descriptor_proto_enumTypes[10].Descriptor() +} + +func (FeatureSet_FieldPresence) Type() protoreflect.EnumType { + return &file_google_protobuf_descriptor_proto_enumTypes[10] +} + +func (x FeatureSet_FieldPresence) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *FeatureSet_FieldPresence) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = FeatureSet_FieldPresence(num) + return nil +} + +// Deprecated: Use FeatureSet_FieldPresence.Descriptor instead. +func (FeatureSet_FieldPresence) EnumDescriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19, 0} +} + +type FeatureSet_EnumType int32 + +const ( + FeatureSet_ENUM_TYPE_UNKNOWN FeatureSet_EnumType = 0 + FeatureSet_OPEN FeatureSet_EnumType = 1 + FeatureSet_CLOSED FeatureSet_EnumType = 2 +) + +// Enum value maps for FeatureSet_EnumType. +var ( + FeatureSet_EnumType_name = map[int32]string{ + 0: "ENUM_TYPE_UNKNOWN", + 1: "OPEN", + 2: "CLOSED", + } + FeatureSet_EnumType_value = map[string]int32{ + "ENUM_TYPE_UNKNOWN": 0, + "OPEN": 1, + "CLOSED": 2, + } +) + +func (x FeatureSet_EnumType) Enum() *FeatureSet_EnumType { + p := new(FeatureSet_EnumType) + *p = x + return p +} + +func (x FeatureSet_EnumType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (FeatureSet_EnumType) Descriptor() protoreflect.EnumDescriptor { + return file_google_protobuf_descriptor_proto_enumTypes[11].Descriptor() +} + +func (FeatureSet_EnumType) Type() protoreflect.EnumType { + return &file_google_protobuf_descriptor_proto_enumTypes[11] +} + +func (x FeatureSet_EnumType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *FeatureSet_EnumType) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = FeatureSet_EnumType(num) + return nil +} + +// Deprecated: Use FeatureSet_EnumType.Descriptor instead. +func (FeatureSet_EnumType) EnumDescriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19, 1} +} + +type FeatureSet_RepeatedFieldEncoding int32 + +const ( + FeatureSet_REPEATED_FIELD_ENCODING_UNKNOWN FeatureSet_RepeatedFieldEncoding = 0 + FeatureSet_PACKED FeatureSet_RepeatedFieldEncoding = 1 + FeatureSet_EXPANDED FeatureSet_RepeatedFieldEncoding = 2 +) + +// Enum value maps for FeatureSet_RepeatedFieldEncoding. +var ( + FeatureSet_RepeatedFieldEncoding_name = map[int32]string{ + 0: "REPEATED_FIELD_ENCODING_UNKNOWN", + 1: "PACKED", + 2: "EXPANDED", + } + FeatureSet_RepeatedFieldEncoding_value = map[string]int32{ + "REPEATED_FIELD_ENCODING_UNKNOWN": 0, + "PACKED": 1, + "EXPANDED": 2, + } +) + +func (x FeatureSet_RepeatedFieldEncoding) Enum() *FeatureSet_RepeatedFieldEncoding { + p := new(FeatureSet_RepeatedFieldEncoding) + *p = x + return p +} + +func (x FeatureSet_RepeatedFieldEncoding) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (FeatureSet_RepeatedFieldEncoding) Descriptor() protoreflect.EnumDescriptor { + return file_google_protobuf_descriptor_proto_enumTypes[12].Descriptor() +} + +func (FeatureSet_RepeatedFieldEncoding) Type() protoreflect.EnumType { + return &file_google_protobuf_descriptor_proto_enumTypes[12] +} + +func (x FeatureSet_RepeatedFieldEncoding) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *FeatureSet_RepeatedFieldEncoding) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = FeatureSet_RepeatedFieldEncoding(num) + return nil +} + +// Deprecated: Use FeatureSet_RepeatedFieldEncoding.Descriptor instead. +func (FeatureSet_RepeatedFieldEncoding) EnumDescriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19, 2} +} + +type FeatureSet_Utf8Validation int32 + +const ( + FeatureSet_UTF8_VALIDATION_UNKNOWN FeatureSet_Utf8Validation = 0 + FeatureSet_VERIFY FeatureSet_Utf8Validation = 2 + FeatureSet_NONE FeatureSet_Utf8Validation = 3 +) + +// Enum value maps for FeatureSet_Utf8Validation. +var ( + FeatureSet_Utf8Validation_name = map[int32]string{ + 0: "UTF8_VALIDATION_UNKNOWN", + 2: "VERIFY", + 3: "NONE", + } + FeatureSet_Utf8Validation_value = map[string]int32{ + "UTF8_VALIDATION_UNKNOWN": 0, + "VERIFY": 2, + "NONE": 3, + } +) + +func (x FeatureSet_Utf8Validation) Enum() *FeatureSet_Utf8Validation { + p := new(FeatureSet_Utf8Validation) + *p = x + return p +} + +func (x FeatureSet_Utf8Validation) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (FeatureSet_Utf8Validation) Descriptor() protoreflect.EnumDescriptor { + return file_google_protobuf_descriptor_proto_enumTypes[13].Descriptor() +} + +func (FeatureSet_Utf8Validation) Type() protoreflect.EnumType { + return &file_google_protobuf_descriptor_proto_enumTypes[13] +} + +func (x FeatureSet_Utf8Validation) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *FeatureSet_Utf8Validation) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = FeatureSet_Utf8Validation(num) + return nil +} + +// Deprecated: Use FeatureSet_Utf8Validation.Descriptor instead. +func (FeatureSet_Utf8Validation) EnumDescriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19, 3} +} + +type FeatureSet_MessageEncoding int32 + +const ( + FeatureSet_MESSAGE_ENCODING_UNKNOWN FeatureSet_MessageEncoding = 0 + FeatureSet_LENGTH_PREFIXED FeatureSet_MessageEncoding = 1 + FeatureSet_DELIMITED FeatureSet_MessageEncoding = 2 +) + +// Enum value maps for FeatureSet_MessageEncoding. +var ( + FeatureSet_MessageEncoding_name = map[int32]string{ + 0: "MESSAGE_ENCODING_UNKNOWN", + 1: "LENGTH_PREFIXED", + 2: "DELIMITED", + } + FeatureSet_MessageEncoding_value = map[string]int32{ + "MESSAGE_ENCODING_UNKNOWN": 0, + "LENGTH_PREFIXED": 1, + "DELIMITED": 2, + } +) + +func (x FeatureSet_MessageEncoding) Enum() *FeatureSet_MessageEncoding { + p := new(FeatureSet_MessageEncoding) + *p = x + return p +} + +func (x FeatureSet_MessageEncoding) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (FeatureSet_MessageEncoding) Descriptor() protoreflect.EnumDescriptor { + return file_google_protobuf_descriptor_proto_enumTypes[14].Descriptor() +} + +func (FeatureSet_MessageEncoding) Type() protoreflect.EnumType { + return &file_google_protobuf_descriptor_proto_enumTypes[14] +} + +func (x FeatureSet_MessageEncoding) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *FeatureSet_MessageEncoding) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = FeatureSet_MessageEncoding(num) + return nil +} + +// Deprecated: Use FeatureSet_MessageEncoding.Descriptor instead. +func (FeatureSet_MessageEncoding) EnumDescriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19, 4} +} + +type FeatureSet_JsonFormat int32 + +const ( + FeatureSet_JSON_FORMAT_UNKNOWN FeatureSet_JsonFormat = 0 + FeatureSet_ALLOW FeatureSet_JsonFormat = 1 + FeatureSet_LEGACY_BEST_EFFORT FeatureSet_JsonFormat = 2 +) + +// Enum value maps for FeatureSet_JsonFormat. +var ( + FeatureSet_JsonFormat_name = map[int32]string{ + 0: "JSON_FORMAT_UNKNOWN", + 1: "ALLOW", + 2: "LEGACY_BEST_EFFORT", + } + FeatureSet_JsonFormat_value = map[string]int32{ + "JSON_FORMAT_UNKNOWN": 0, + "ALLOW": 1, + "LEGACY_BEST_EFFORT": 2, + } +) + +func (x FeatureSet_JsonFormat) Enum() *FeatureSet_JsonFormat { + p := new(FeatureSet_JsonFormat) + *p = x + return p +} + +func (x FeatureSet_JsonFormat) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (FeatureSet_JsonFormat) Descriptor() protoreflect.EnumDescriptor { + return file_google_protobuf_descriptor_proto_enumTypes[15].Descriptor() } -func (MethodOptions_IdempotencyLevel) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[8] +func (FeatureSet_JsonFormat) Type() protoreflect.EnumType { + return &file_google_protobuf_descriptor_proto_enumTypes[15] } -func (x MethodOptions_IdempotencyLevel) Number() protoreflect.EnumNumber { +func (x FeatureSet_JsonFormat) Number() protoreflect.EnumNumber { return protoreflect.EnumNumber(x) } // Deprecated: Do not use. -func (x *MethodOptions_IdempotencyLevel) UnmarshalJSON(b []byte) error { +func (x *FeatureSet_JsonFormat) UnmarshalJSON(b []byte) error { num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) if err != nil { return err } - *x = MethodOptions_IdempotencyLevel(num) + *x = FeatureSet_JsonFormat(num) return nil } -// Deprecated: Use MethodOptions_IdempotencyLevel.Descriptor instead. -func (MethodOptions_IdempotencyLevel) EnumDescriptor() ([]byte, []int) { - return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{17, 0} +// Deprecated: Use FeatureSet_JsonFormat.Descriptor instead. +func (FeatureSet_JsonFormat) EnumDescriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19, 5} } // Represents the identified object's effect on the element in the original @@ -716,11 +1179,11 @@ func (x GeneratedCodeInfo_Annotation_Semantic) String() string { } func (GeneratedCodeInfo_Annotation_Semantic) Descriptor() protoreflect.EnumDescriptor { - return file_google_protobuf_descriptor_proto_enumTypes[9].Descriptor() + return file_google_protobuf_descriptor_proto_enumTypes[16].Descriptor() } func (GeneratedCodeInfo_Annotation_Semantic) Type() protoreflect.EnumType { - return &file_google_protobuf_descriptor_proto_enumTypes[9] + return &file_google_protobuf_descriptor_proto_enumTypes[16] } func (x GeneratedCodeInfo_Annotation_Semantic) Number() protoreflect.EnumNumber { @@ -739,7 +1202,7 @@ func (x *GeneratedCodeInfo_Annotation_Semantic) UnmarshalJSON(b []byte) error { // Deprecated: Use GeneratedCodeInfo_Annotation_Semantic.Descriptor instead. func (GeneratedCodeInfo_Annotation_Semantic) EnumDescriptor() ([]byte, []int) { - return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{20, 0, 0} + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{22, 0, 0} } // The protocol compiler can output a FileDescriptorSet containing the .proto @@ -754,11 +1217,9 @@ type FileDescriptorSet struct { func (x *FileDescriptorSet) Reset() { *x = FileDescriptorSet{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileDescriptorSet) String() string { @@ -769,7 +1230,7 @@ func (*FileDescriptorSet) ProtoMessage() {} func (x *FileDescriptorSet) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -822,17 +1283,15 @@ type FileDescriptorProto struct { // // If `edition` is present, this value must be "editions". Syntax *string `protobuf:"bytes,12,opt,name=syntax" json:"syntax,omitempty"` - // The edition of the proto file, which is an opaque string. - Edition *string `protobuf:"bytes,13,opt,name=edition" json:"edition,omitempty"` + // The edition of the proto file. + Edition *Edition `protobuf:"varint,14,opt,name=edition,enum=google.protobuf.Edition" json:"edition,omitempty"` } func (x *FileDescriptorProto) Reset() { *x = FileDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileDescriptorProto) String() string { @@ -843,7 +1302,7 @@ func (*FileDescriptorProto) ProtoMessage() {} func (x *FileDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -942,11 +1401,11 @@ func (x *FileDescriptorProto) GetSyntax() string { return "" } -func (x *FileDescriptorProto) GetEdition() string { +func (x *FileDescriptorProto) GetEdition() Edition { if x != nil && x.Edition != nil { return *x.Edition } - return "" + return Edition_EDITION_UNKNOWN } // Describes a message type. @@ -971,11 +1430,9 @@ type DescriptorProto struct { func (x *DescriptorProto) Reset() { *x = DescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto) String() string { @@ -986,7 +1443,7 @@ func (*DescriptorProto) ProtoMessage() {} func (x *DescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1079,13 +1536,14 @@ type ExtensionRangeOptions struct { // The parser stores options it doesn't recognize here. See above. UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` - // go/protobuf-stripping-extension-declarations - // Like Metadata, but we use a repeated field to hold all extension - // declarations. This should avoid the size increases of transforming a large - // extension range into small ranges in generated binaries. + // For external users: DO NOT USE. We are in the process of open sourcing + // extension declaration and executing internal cleanups before it can be + // used externally. Declaration []*ExtensionRangeOptions_Declaration `protobuf:"bytes,2,rep,name=declaration" json:"declaration,omitempty"` + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,50,opt,name=features" json:"features,omitempty"` // The verification state of the range. - // TODO(b/278783756): flip the default to DECLARATION once all empty ranges + // TODO: flip the default to DECLARATION once all empty ranges // are marked as UNVERIFIED. Verification *ExtensionRangeOptions_VerificationState `protobuf:"varint,3,opt,name=verification,enum=google.protobuf.ExtensionRangeOptions_VerificationState,def=1" json:"verification,omitempty"` } @@ -1097,11 +1555,9 @@ const ( func (x *ExtensionRangeOptions) Reset() { *x = ExtensionRangeOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ExtensionRangeOptions) String() string { @@ -1112,7 +1568,7 @@ func (*ExtensionRangeOptions) ProtoMessage() {} func (x *ExtensionRangeOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[3] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1141,6 +1597,13 @@ func (x *ExtensionRangeOptions) GetDeclaration() []*ExtensionRangeOptions_Declar return nil } +func (x *ExtensionRangeOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + func (x *ExtensionRangeOptions) GetVerification() ExtensionRangeOptions_VerificationState { if x != nil && x.Verification != nil { return *x.Verification @@ -1186,12 +1649,12 @@ type FieldDescriptorProto struct { // If true, this is a proto3 "optional". When a proto3 field is optional, it // tracks presence regardless of field type. // - // When proto3_optional is true, this field must be belong to a oneof to - // signal to old proto3 clients that presence is tracked for this field. This - // oneof is known as a "synthetic" oneof, and this field must be its sole - // member (each proto3 optional field gets its own synthetic oneof). Synthetic - // oneofs exist in the descriptor only, and do not generate any API. Synthetic - // oneofs must be ordered after all "real" oneofs. + // When proto3_optional is true, this field must belong to a oneof to signal + // to old proto3 clients that presence is tracked for this field. This oneof + // is known as a "synthetic" oneof, and this field must be its sole member + // (each proto3 optional field gets its own synthetic oneof). Synthetic oneofs + // exist in the descriptor only, and do not generate any API. Synthetic oneofs + // must be ordered after all "real" oneofs. // // For message fields, proto3_optional doesn't create any semantic change, // since non-repeated message fields always track presence. However it still @@ -1209,11 +1672,9 @@ type FieldDescriptorProto struct { func (x *FieldDescriptorProto) Reset() { *x = FieldDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldDescriptorProto) String() string { @@ -1224,7 +1685,7 @@ func (*FieldDescriptorProto) ProtoMessage() {} func (x *FieldDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[4] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1328,11 +1789,9 @@ type OneofDescriptorProto struct { func (x *OneofDescriptorProto) Reset() { *x = OneofDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *OneofDescriptorProto) String() string { @@ -1343,7 +1802,7 @@ func (*OneofDescriptorProto) ProtoMessage() {} func (x *OneofDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[5] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1392,11 +1851,9 @@ type EnumDescriptorProto struct { func (x *EnumDescriptorProto) Reset() { *x = EnumDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumDescriptorProto) String() string { @@ -1407,7 +1864,7 @@ func (*EnumDescriptorProto) ProtoMessage() {} func (x *EnumDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[6] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1470,11 +1927,9 @@ type EnumValueDescriptorProto struct { func (x *EnumValueDescriptorProto) Reset() { *x = EnumValueDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumValueDescriptorProto) String() string { @@ -1485,7 +1940,7 @@ func (*EnumValueDescriptorProto) ProtoMessage() {} func (x *EnumValueDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[7] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1534,11 +1989,9 @@ type ServiceDescriptorProto struct { func (x *ServiceDescriptorProto) Reset() { *x = ServiceDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[8] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ServiceDescriptorProto) String() string { @@ -1549,7 +2002,7 @@ func (*ServiceDescriptorProto) ProtoMessage() {} func (x *ServiceDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[8] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1611,11 +2064,9 @@ const ( func (x *MethodDescriptorProto) Reset() { *x = MethodDescriptorProto{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[9] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MethodDescriptorProto) String() string { @@ -1626,7 +2077,7 @@ func (*MethodDescriptorProto) ProtoMessage() {} func (x *MethodDescriptorProto) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[9] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1711,12 +2162,16 @@ type FileOptions struct { // // Deprecated: Marked as deprecated in google/protobuf/descriptor.proto. JavaGenerateEqualsAndHash *bool `protobuf:"varint,20,opt,name=java_generate_equals_and_hash,json=javaGenerateEqualsAndHash" json:"java_generate_equals_and_hash,omitempty"` - // If set true, then the Java2 code generator will generate code that - // throws an exception whenever an attempt is made to assign a non-UTF-8 - // byte sequence to a string field. - // Message reflection will do the same. - // However, an extension field still accepts non-UTF-8 byte sequences. - // This option has no effect on when used with the lite runtime. + // A proto2 file can set this to true to opt in to UTF-8 checking for Java, + // which will throw an exception if invalid UTF-8 is parsed from the wire or + // assigned to a string field. + // + // TODO: clarify exactly what kinds of field types this option + // applies to, and update these docs accordingly. + // + // Proto3 files already perform these checks. Setting the option explicitly to + // false has no effect: it cannot be used to opt proto3 files out of UTF-8 + // checks. JavaStringCheckUtf8 *bool `protobuf:"varint,27,opt,name=java_string_check_utf8,json=javaStringCheckUtf8,def=0" json:"java_string_check_utf8,omitempty"` OptimizeFor *FileOptions_OptimizeMode `protobuf:"varint,9,opt,name=optimize_for,json=optimizeFor,enum=google.protobuf.FileOptions_OptimizeMode,def=1" json:"optimize_for,omitempty"` // Sets the Go package where structs generated from this .proto will be @@ -1738,7 +2193,6 @@ type FileOptions struct { CcGenericServices *bool `protobuf:"varint,16,opt,name=cc_generic_services,json=ccGenericServices,def=0" json:"cc_generic_services,omitempty"` JavaGenericServices *bool `protobuf:"varint,17,opt,name=java_generic_services,json=javaGenericServices,def=0" json:"java_generic_services,omitempty"` PyGenericServices *bool `protobuf:"varint,18,opt,name=py_generic_services,json=pyGenericServices,def=0" json:"py_generic_services,omitempty"` - PhpGenericServices *bool `protobuf:"varint,42,opt,name=php_generic_services,json=phpGenericServices,def=0" json:"php_generic_services,omitempty"` // Is this file deprecated? // Depending on the target platform, this can emit Deprecated annotations // for everything in the file, or it will be completely ignored; in the very @@ -1772,6 +2226,8 @@ type FileOptions struct { // is empty. When this option is not set, the package name will be used for // determining the ruby package. RubyPackage *string `protobuf:"bytes,45,opt,name=ruby_package,json=rubyPackage" json:"ruby_package,omitempty"` + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,50,opt,name=features" json:"features,omitempty"` // The parser stores options it doesn't recognize here. // See the documentation for the "Options" section above. UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` @@ -1785,18 +2241,15 @@ const ( Default_FileOptions_CcGenericServices = bool(false) Default_FileOptions_JavaGenericServices = bool(false) Default_FileOptions_PyGenericServices = bool(false) - Default_FileOptions_PhpGenericServices = bool(false) Default_FileOptions_Deprecated = bool(false) Default_FileOptions_CcEnableArenas = bool(true) ) func (x *FileOptions) Reset() { *x = FileOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[10] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FileOptions) String() string { @@ -1807,7 +2260,7 @@ func (*FileOptions) ProtoMessage() {} func (x *FileOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[10] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -1893,13 +2346,6 @@ func (x *FileOptions) GetPyGenericServices() bool { return Default_FileOptions_PyGenericServices } -func (x *FileOptions) GetPhpGenericServices() bool { - if x != nil && x.PhpGenericServices != nil { - return *x.PhpGenericServices - } - return Default_FileOptions_PhpGenericServices -} - func (x *FileOptions) GetDeprecated() bool { if x != nil && x.Deprecated != nil { return *x.Deprecated @@ -1963,6 +2409,13 @@ func (x *FileOptions) GetRubyPackage() string { return "" } +func (x *FileOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + func (x *FileOptions) GetUninterpretedOption() []*UninterpretedOption { if x != nil { return x.UninterpretedOption @@ -2006,10 +2459,6 @@ type MessageOptions struct { // for the message, or it will be completely ignored; in the very least, // this is a formalization for deprecating messages. Deprecated *bool `protobuf:"varint,3,opt,name=deprecated,def=0" json:"deprecated,omitempty"` - // NOTE: Do not set the option in .proto files. Always use the maps syntax - // instead. The option should only be implicitly set by the proto compiler - // parser. - // // Whether the message is an automatically generated map entry type for the // maps field. // @@ -2030,6 +2479,10 @@ type MessageOptions struct { // use a native map in the target language to hold the keys and values. // The reflection APIs in such implementations still need to work as // if the field is a repeated message field. + // + // NOTE: Do not set the option in .proto files. Always use the maps syntax + // instead. The option should only be implicitly set by the proto compiler + // parser. MapEntry *bool `protobuf:"varint,7,opt,name=map_entry,json=mapEntry" json:"map_entry,omitempty"` // Enable the legacy handling of JSON field name conflicts. This lowercases // and strips underscored from the fields before comparison in proto3 only. @@ -2039,11 +2492,13 @@ type MessageOptions struct { // This should only be used as a temporary measure against broken builds due // to the change in behavior for JSON field name conflicts. // - // TODO(b/261750190) This is legacy behavior we plan to remove once downstream + // TODO This is legacy behavior we plan to remove once downstream // teams have had time to migrate. // // Deprecated: Marked as deprecated in google/protobuf/descriptor.proto. DeprecatedLegacyJsonFieldConflicts *bool `protobuf:"varint,11,opt,name=deprecated_legacy_json_field_conflicts,json=deprecatedLegacyJsonFieldConflicts" json:"deprecated_legacy_json_field_conflicts,omitempty"` + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,12,opt,name=features" json:"features,omitempty"` // The parser stores options it doesn't recognize here. See above. UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` } @@ -2057,11 +2512,9 @@ const ( func (x *MessageOptions) Reset() { *x = MessageOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[11] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MessageOptions) String() string { @@ -2072,7 +2525,7 @@ func (*MessageOptions) ProtoMessage() {} func (x *MessageOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[11] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2123,6 +2576,13 @@ func (x *MessageOptions) GetDeprecatedLegacyJsonFieldConflicts() bool { return false } +func (x *MessageOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + func (x *MessageOptions) GetUninterpretedOption() []*UninterpretedOption { if x != nil { return x.UninterpretedOption @@ -2147,7 +2607,9 @@ type FieldOptions struct { // a more efficient representation on the wire. Rather than repeatedly // writing the tag and type for each element, the entire array is encoded as // a single length-delimited blob. In proto3, only explicit setting it to - // false will avoid using packed encoding. + // false will avoid using packed encoding. This option is prohibited in + // Editions, but the `repeated_field_encoding` feature can be used to control + // the behavior. Packed *bool `protobuf:"varint,2,opt,name=packed" json:"packed,omitempty"` // The jstype option determines the JavaScript type used for values of the // field. The option is permitted only for 64 bit integral and fixed types @@ -2178,19 +2640,11 @@ type FieldOptions struct { // call from multiple threads concurrently, while non-const methods continue // to require exclusive access. // - // Note that implementations may choose not to check required fields within - // a lazy sub-message. That is, calling IsInitialized() on the outer message - // may return true even if the inner message has missing required fields. - // This is necessary because otherwise the inner message would have to be - // parsed in order to perform the check, defeating the purpose of lazy - // parsing. An implementation which chooses not to check required fields - // must be consistent about it. That is, for any particular sub-message, the - // implementation must either *always* check its required fields, or *never* - // check its required fields, regardless of whether or not the message has - // been parsed. - // - // As of May 2022, lazy verifies the contents of the byte stream during - // parsing. An invalid byte stream will cause the overall parsing to fail. + // Note that lazy message fields are still eagerly verified to check + // ill-formed wireformat or missing required fields. Calling IsInitialized() + // on the outer message would fail if the inner message has missing required + // fields. Failed verification would result in parsing failure (except when + // uninitialized messages are acceptable). Lazy *bool `protobuf:"varint,5,opt,name=lazy,def=0" json:"lazy,omitempty"` // unverified_lazy does no correctness checks on the byte stream. This should // only be used where lazy with verification is prohibitive for performance @@ -2205,11 +2659,13 @@ type FieldOptions struct { Weak *bool `protobuf:"varint,10,opt,name=weak,def=0" json:"weak,omitempty"` // Indicate that the field value should not be printed out when using debug // formats, e.g. when the field contains sensitive credentials. - DebugRedact *bool `protobuf:"varint,16,opt,name=debug_redact,json=debugRedact,def=0" json:"debug_redact,omitempty"` - Retention *FieldOptions_OptionRetention `protobuf:"varint,17,opt,name=retention,enum=google.protobuf.FieldOptions_OptionRetention" json:"retention,omitempty"` - // Deprecated: Marked as deprecated in google/protobuf/descriptor.proto. - Target *FieldOptions_OptionTargetType `protobuf:"varint,18,opt,name=target,enum=google.protobuf.FieldOptions_OptionTargetType" json:"target,omitempty"` - Targets []FieldOptions_OptionTargetType `protobuf:"varint,19,rep,name=targets,enum=google.protobuf.FieldOptions_OptionTargetType" json:"targets,omitempty"` + DebugRedact *bool `protobuf:"varint,16,opt,name=debug_redact,json=debugRedact,def=0" json:"debug_redact,omitempty"` + Retention *FieldOptions_OptionRetention `protobuf:"varint,17,opt,name=retention,enum=google.protobuf.FieldOptions_OptionRetention" json:"retention,omitempty"` + Targets []FieldOptions_OptionTargetType `protobuf:"varint,19,rep,name=targets,enum=google.protobuf.FieldOptions_OptionTargetType" json:"targets,omitempty"` + EditionDefaults []*FieldOptions_EditionDefault `protobuf:"bytes,20,rep,name=edition_defaults,json=editionDefaults" json:"edition_defaults,omitempty"` + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,21,opt,name=features" json:"features,omitempty"` + FeatureSupport *FieldOptions_FeatureSupport `protobuf:"bytes,22,opt,name=feature_support,json=featureSupport" json:"feature_support,omitempty"` // The parser stores options it doesn't recognize here. See above. UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` } @@ -2227,11 +2683,9 @@ const ( func (x *FieldOptions) Reset() { *x = FieldOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[12] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[12] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *FieldOptions) String() string { @@ -2242,7 +2696,7 @@ func (*FieldOptions) ProtoMessage() {} func (x *FieldOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[12] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2320,17 +2774,30 @@ func (x *FieldOptions) GetRetention() FieldOptions_OptionRetention { return FieldOptions_RETENTION_UNKNOWN } -// Deprecated: Marked as deprecated in google/protobuf/descriptor.proto. -func (x *FieldOptions) GetTarget() FieldOptions_OptionTargetType { - if x != nil && x.Target != nil { - return *x.Target +func (x *FieldOptions) GetTargets() []FieldOptions_OptionTargetType { + if x != nil { + return x.Targets } - return FieldOptions_TARGET_TYPE_UNKNOWN + return nil } -func (x *FieldOptions) GetTargets() []FieldOptions_OptionTargetType { +func (x *FieldOptions) GetEditionDefaults() []*FieldOptions_EditionDefault { if x != nil { - return x.Targets + return x.EditionDefaults + } + return nil +} + +func (x *FieldOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + +func (x *FieldOptions) GetFeatureSupport() *FieldOptions_FeatureSupport { + if x != nil { + return x.FeatureSupport } return nil } @@ -2348,17 +2815,17 @@ type OneofOptions struct { unknownFields protoimpl.UnknownFields extensionFields protoimpl.ExtensionFields + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,1,opt,name=features" json:"features,omitempty"` // The parser stores options it doesn't recognize here. See above. UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` } func (x *OneofOptions) Reset() { *x = OneofOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[13] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[13] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *OneofOptions) String() string { @@ -2369,7 +2836,7 @@ func (*OneofOptions) ProtoMessage() {} func (x *OneofOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[13] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2384,6 +2851,13 @@ func (*OneofOptions) Descriptor() ([]byte, []int) { return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{13} } +func (x *OneofOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + func (x *OneofOptions) GetUninterpretedOption() []*UninterpretedOption { if x != nil { return x.UninterpretedOption @@ -2409,11 +2883,13 @@ type EnumOptions struct { // and strips underscored from the fields before comparison in proto3 only. // The new behavior takes `json_name` into account and applies to proto2 as // well. - // TODO(b/261750190) Remove this legacy behavior once downstream teams have + // TODO Remove this legacy behavior once downstream teams have // had time to migrate. // // Deprecated: Marked as deprecated in google/protobuf/descriptor.proto. DeprecatedLegacyJsonFieldConflicts *bool `protobuf:"varint,6,opt,name=deprecated_legacy_json_field_conflicts,json=deprecatedLegacyJsonFieldConflicts" json:"deprecated_legacy_json_field_conflicts,omitempty"` + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,7,opt,name=features" json:"features,omitempty"` // The parser stores options it doesn't recognize here. See above. UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` } @@ -2425,11 +2901,9 @@ const ( func (x *EnumOptions) Reset() { *x = EnumOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[14] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[14] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumOptions) String() string { @@ -2440,7 +2914,7 @@ func (*EnumOptions) ProtoMessage() {} func (x *EnumOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[14] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2477,6 +2951,13 @@ func (x *EnumOptions) GetDeprecatedLegacyJsonFieldConflicts() bool { return false } +func (x *EnumOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + func (x *EnumOptions) GetUninterpretedOption() []*UninterpretedOption { if x != nil { return x.UninterpretedOption @@ -2495,22 +2976,29 @@ type EnumValueOptions struct { // for the enum value, or it will be completely ignored; in the very least, // this is a formalization for deprecating enum values. Deprecated *bool `protobuf:"varint,1,opt,name=deprecated,def=0" json:"deprecated,omitempty"` + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,2,opt,name=features" json:"features,omitempty"` + // Indicate that fields annotated with this enum value should not be printed + // out when using debug formats, e.g. when the field contains sensitive + // credentials. + DebugRedact *bool `protobuf:"varint,3,opt,name=debug_redact,json=debugRedact,def=0" json:"debug_redact,omitempty"` + // Information about the support window of a feature value. + FeatureSupport *FieldOptions_FeatureSupport `protobuf:"bytes,4,opt,name=feature_support,json=featureSupport" json:"feature_support,omitempty"` // The parser stores options it doesn't recognize here. See above. UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` } // Default values for EnumValueOptions fields. const ( - Default_EnumValueOptions_Deprecated = bool(false) + Default_EnumValueOptions_Deprecated = bool(false) + Default_EnumValueOptions_DebugRedact = bool(false) ) func (x *EnumValueOptions) Reset() { *x = EnumValueOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[15] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[15] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumValueOptions) String() string { @@ -2521,7 +3009,7 @@ func (*EnumValueOptions) ProtoMessage() {} func (x *EnumValueOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[15] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2543,6 +3031,27 @@ func (x *EnumValueOptions) GetDeprecated() bool { return Default_EnumValueOptions_Deprecated } +func (x *EnumValueOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + +func (x *EnumValueOptions) GetDebugRedact() bool { + if x != nil && x.DebugRedact != nil { + return *x.DebugRedact + } + return Default_EnumValueOptions_DebugRedact +} + +func (x *EnumValueOptions) GetFeatureSupport() *FieldOptions_FeatureSupport { + if x != nil { + return x.FeatureSupport + } + return nil +} + func (x *EnumValueOptions) GetUninterpretedOption() []*UninterpretedOption { if x != nil { return x.UninterpretedOption @@ -2556,6 +3065,8 @@ type ServiceOptions struct { unknownFields protoimpl.UnknownFields extensionFields protoimpl.ExtensionFields + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,34,opt,name=features" json:"features,omitempty"` // Is this service deprecated? // Depending on the target platform, this can emit Deprecated annotations // for the service, or it will be completely ignored; in the very least, @@ -2572,11 +3083,9 @@ const ( func (x *ServiceOptions) Reset() { *x = ServiceOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[16] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[16] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ServiceOptions) String() string { @@ -2587,7 +3096,7 @@ func (*ServiceOptions) ProtoMessage() {} func (x *ServiceOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[16] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2602,6 +3111,13 @@ func (*ServiceOptions) Descriptor() ([]byte, []int) { return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{16} } +func (x *ServiceOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + func (x *ServiceOptions) GetDeprecated() bool { if x != nil && x.Deprecated != nil { return *x.Deprecated @@ -2628,6 +3144,8 @@ type MethodOptions struct { // this is a formalization for deprecating methods. Deprecated *bool `protobuf:"varint,33,opt,name=deprecated,def=0" json:"deprecated,omitempty"` IdempotencyLevel *MethodOptions_IdempotencyLevel `protobuf:"varint,34,opt,name=idempotency_level,json=idempotencyLevel,enum=google.protobuf.MethodOptions_IdempotencyLevel,def=0" json:"idempotency_level,omitempty"` + // Any features defined in the specific edition. + Features *FeatureSet `protobuf:"bytes,35,opt,name=features" json:"features,omitempty"` // The parser stores options it doesn't recognize here. See above. UninterpretedOption []*UninterpretedOption `protobuf:"bytes,999,rep,name=uninterpreted_option,json=uninterpretedOption" json:"uninterpreted_option,omitempty"` } @@ -2640,11 +3158,9 @@ const ( func (x *MethodOptions) Reset() { *x = MethodOptions{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[17] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[17] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *MethodOptions) String() string { @@ -2655,7 +3171,7 @@ func (*MethodOptions) ProtoMessage() {} func (x *MethodOptions) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[17] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2684,6 +3200,13 @@ func (x *MethodOptions) GetIdempotencyLevel() MethodOptions_IdempotencyLevel { return Default_MethodOptions_IdempotencyLevel } +func (x *MethodOptions) GetFeatures() *FeatureSet { + if x != nil { + return x.Features + } + return nil +} + func (x *MethodOptions) GetUninterpretedOption() []*UninterpretedOption { if x != nil { return x.UninterpretedOption @@ -2715,11 +3238,9 @@ type UninterpretedOption struct { func (x *UninterpretedOption) Reset() { *x = UninterpretedOption{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[18] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[18] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *UninterpretedOption) String() string { @@ -2730,7 +3251,7 @@ func (*UninterpretedOption) ProtoMessage() {} func (x *UninterpretedOption) ProtoReflect() protoreflect.Message { mi := &file_google_protobuf_descriptor_proto_msgTypes[18] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2791,7 +3312,168 @@ func (x *UninterpretedOption) GetAggregateValue() string { if x != nil && x.AggregateValue != nil { return *x.AggregateValue } - return "" + return "" +} + +// TODO Enums in C++ gencode (and potentially other languages) are +// not well scoped. This means that each of the feature enums below can clash +// with each other. The short names we've chosen maximize call-site +// readability, but leave us very open to this scenario. A future feature will +// be designed and implemented to handle this, hopefully before we ever hit a +// conflict here. +type FeatureSet struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + extensionFields protoimpl.ExtensionFields + + FieldPresence *FeatureSet_FieldPresence `protobuf:"varint,1,opt,name=field_presence,json=fieldPresence,enum=google.protobuf.FeatureSet_FieldPresence" json:"field_presence,omitempty"` + EnumType *FeatureSet_EnumType `protobuf:"varint,2,opt,name=enum_type,json=enumType,enum=google.protobuf.FeatureSet_EnumType" json:"enum_type,omitempty"` + RepeatedFieldEncoding *FeatureSet_RepeatedFieldEncoding `protobuf:"varint,3,opt,name=repeated_field_encoding,json=repeatedFieldEncoding,enum=google.protobuf.FeatureSet_RepeatedFieldEncoding" json:"repeated_field_encoding,omitempty"` + Utf8Validation *FeatureSet_Utf8Validation `protobuf:"varint,4,opt,name=utf8_validation,json=utf8Validation,enum=google.protobuf.FeatureSet_Utf8Validation" json:"utf8_validation,omitempty"` + MessageEncoding *FeatureSet_MessageEncoding `protobuf:"varint,5,opt,name=message_encoding,json=messageEncoding,enum=google.protobuf.FeatureSet_MessageEncoding" json:"message_encoding,omitempty"` + JsonFormat *FeatureSet_JsonFormat `protobuf:"varint,6,opt,name=json_format,json=jsonFormat,enum=google.protobuf.FeatureSet_JsonFormat" json:"json_format,omitempty"` +} + +func (x *FeatureSet) Reset() { + *x = FeatureSet{} + mi := &file_google_protobuf_descriptor_proto_msgTypes[19] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FeatureSet) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FeatureSet) ProtoMessage() {} + +func (x *FeatureSet) ProtoReflect() protoreflect.Message { + mi := &file_google_protobuf_descriptor_proto_msgTypes[19] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FeatureSet.ProtoReflect.Descriptor instead. +func (*FeatureSet) Descriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19} +} + +func (x *FeatureSet) GetFieldPresence() FeatureSet_FieldPresence { + if x != nil && x.FieldPresence != nil { + return *x.FieldPresence + } + return FeatureSet_FIELD_PRESENCE_UNKNOWN +} + +func (x *FeatureSet) GetEnumType() FeatureSet_EnumType { + if x != nil && x.EnumType != nil { + return *x.EnumType + } + return FeatureSet_ENUM_TYPE_UNKNOWN +} + +func (x *FeatureSet) GetRepeatedFieldEncoding() FeatureSet_RepeatedFieldEncoding { + if x != nil && x.RepeatedFieldEncoding != nil { + return *x.RepeatedFieldEncoding + } + return FeatureSet_REPEATED_FIELD_ENCODING_UNKNOWN +} + +func (x *FeatureSet) GetUtf8Validation() FeatureSet_Utf8Validation { + if x != nil && x.Utf8Validation != nil { + return *x.Utf8Validation + } + return FeatureSet_UTF8_VALIDATION_UNKNOWN +} + +func (x *FeatureSet) GetMessageEncoding() FeatureSet_MessageEncoding { + if x != nil && x.MessageEncoding != nil { + return *x.MessageEncoding + } + return FeatureSet_MESSAGE_ENCODING_UNKNOWN +} + +func (x *FeatureSet) GetJsonFormat() FeatureSet_JsonFormat { + if x != nil && x.JsonFormat != nil { + return *x.JsonFormat + } + return FeatureSet_JSON_FORMAT_UNKNOWN +} + +// A compiled specification for the defaults of a set of features. These +// messages are generated from FeatureSet extensions and can be used to seed +// feature resolution. The resolution with this object becomes a simple search +// for the closest matching edition, followed by proto merges. +type FeatureSetDefaults struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Defaults []*FeatureSetDefaults_FeatureSetEditionDefault `protobuf:"bytes,1,rep,name=defaults" json:"defaults,omitempty"` + // The minimum supported edition (inclusive) when this was constructed. + // Editions before this will not have defaults. + MinimumEdition *Edition `protobuf:"varint,4,opt,name=minimum_edition,json=minimumEdition,enum=google.protobuf.Edition" json:"minimum_edition,omitempty"` + // The maximum known edition (inclusive) when this was constructed. Editions + // after this will not have reliable defaults. + MaximumEdition *Edition `protobuf:"varint,5,opt,name=maximum_edition,json=maximumEdition,enum=google.protobuf.Edition" json:"maximum_edition,omitempty"` +} + +func (x *FeatureSetDefaults) Reset() { + *x = FeatureSetDefaults{} + mi := &file_google_protobuf_descriptor_proto_msgTypes[20] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FeatureSetDefaults) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FeatureSetDefaults) ProtoMessage() {} + +func (x *FeatureSetDefaults) ProtoReflect() protoreflect.Message { + mi := &file_google_protobuf_descriptor_proto_msgTypes[20] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FeatureSetDefaults.ProtoReflect.Descriptor instead. +func (*FeatureSetDefaults) Descriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{20} +} + +func (x *FeatureSetDefaults) GetDefaults() []*FeatureSetDefaults_FeatureSetEditionDefault { + if x != nil { + return x.Defaults + } + return nil +} + +func (x *FeatureSetDefaults) GetMinimumEdition() Edition { + if x != nil && x.MinimumEdition != nil { + return *x.MinimumEdition + } + return Edition_EDITION_UNKNOWN +} + +func (x *FeatureSetDefaults) GetMaximumEdition() Edition { + if x != nil && x.MaximumEdition != nil { + return *x.MaximumEdition + } + return Edition_EDITION_UNKNOWN } // Encapsulates information about the original source file from which a @@ -2854,11 +3536,9 @@ type SourceCodeInfo struct { func (x *SourceCodeInfo) Reset() { *x = SourceCodeInfo{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[19] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[21] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *SourceCodeInfo) String() string { @@ -2868,8 +3548,8 @@ func (x *SourceCodeInfo) String() string { func (*SourceCodeInfo) ProtoMessage() {} func (x *SourceCodeInfo) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[19] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[21] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2881,7 +3561,7 @@ func (x *SourceCodeInfo) ProtoReflect() protoreflect.Message { // Deprecated: Use SourceCodeInfo.ProtoReflect.Descriptor instead. func (*SourceCodeInfo) Descriptor() ([]byte, []int) { - return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19} + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{21} } func (x *SourceCodeInfo) GetLocation() []*SourceCodeInfo_Location { @@ -2906,11 +3586,9 @@ type GeneratedCodeInfo struct { func (x *GeneratedCodeInfo) Reset() { *x = GeneratedCodeInfo{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[20] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[22] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *GeneratedCodeInfo) String() string { @@ -2920,8 +3598,8 @@ func (x *GeneratedCodeInfo) String() string { func (*GeneratedCodeInfo) ProtoMessage() {} func (x *GeneratedCodeInfo) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[20] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[22] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -2933,7 +3611,7 @@ func (x *GeneratedCodeInfo) ProtoReflect() protoreflect.Message { // Deprecated: Use GeneratedCodeInfo.ProtoReflect.Descriptor instead. func (*GeneratedCodeInfo) Descriptor() ([]byte, []int) { - return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{20} + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{22} } func (x *GeneratedCodeInfo) GetAnnotation() []*GeneratedCodeInfo_Annotation { @@ -2955,11 +3633,9 @@ type DescriptorProto_ExtensionRange struct { func (x *DescriptorProto_ExtensionRange) Reset() { *x = DescriptorProto_ExtensionRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[21] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[23] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto_ExtensionRange) String() string { @@ -2969,8 +3645,8 @@ func (x *DescriptorProto_ExtensionRange) String() string { func (*DescriptorProto_ExtensionRange) ProtoMessage() {} func (x *DescriptorProto_ExtensionRange) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[21] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[23] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3020,11 +3696,9 @@ type DescriptorProto_ReservedRange struct { func (x *DescriptorProto_ReservedRange) Reset() { *x = DescriptorProto_ReservedRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[22] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[24] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *DescriptorProto_ReservedRange) String() string { @@ -3034,8 +3708,8 @@ func (x *DescriptorProto_ReservedRange) String() string { func (*DescriptorProto_ReservedRange) ProtoMessage() {} func (x *DescriptorProto_ReservedRange) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[22] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[24] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3078,10 +3752,6 @@ type ExtensionRangeOptions_Declaration struct { // Metadata.type, Declaration.type must have a leading dot for messages // and enums. Type *string `protobuf:"bytes,3,opt,name=type" json:"type,omitempty"` - // Deprecated. Please use "repeated". - // - // Deprecated: Marked as deprecated in google/protobuf/descriptor.proto. - IsRepeated *bool `protobuf:"varint,4,opt,name=is_repeated,json=isRepeated" json:"is_repeated,omitempty"` // If true, indicates that the number is reserved in the extension range, // and any extension field with the number will fail to compile. Set this // when a declared extension field is deleted. @@ -3093,11 +3763,9 @@ type ExtensionRangeOptions_Declaration struct { func (x *ExtensionRangeOptions_Declaration) Reset() { *x = ExtensionRangeOptions_Declaration{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[23] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[25] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *ExtensionRangeOptions_Declaration) String() string { @@ -3107,8 +3775,8 @@ func (x *ExtensionRangeOptions_Declaration) String() string { func (*ExtensionRangeOptions_Declaration) ProtoMessage() {} func (x *ExtensionRangeOptions_Declaration) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[23] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[25] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3144,14 +3812,6 @@ func (x *ExtensionRangeOptions_Declaration) GetType() string { return "" } -// Deprecated: Marked as deprecated in google/protobuf/descriptor.proto. -func (x *ExtensionRangeOptions_Declaration) GetIsRepeated() bool { - if x != nil && x.IsRepeated != nil { - return *x.IsRepeated - } - return false -} - func (x *ExtensionRangeOptions_Declaration) GetReserved() bool { if x != nil && x.Reserved != nil { return *x.Reserved @@ -3183,11 +3843,9 @@ type EnumDescriptorProto_EnumReservedRange struct { func (x *EnumDescriptorProto_EnumReservedRange) Reset() { *x = EnumDescriptorProto_EnumReservedRange{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[24] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[26] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *EnumDescriptorProto_EnumReservedRange) String() string { @@ -3197,8 +3855,8 @@ func (x *EnumDescriptorProto_EnumReservedRange) String() string { func (*EnumDescriptorProto_EnumReservedRange) ProtoMessage() {} func (x *EnumDescriptorProto_EnumReservedRange) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[24] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[26] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3227,6 +3885,139 @@ func (x *EnumDescriptorProto_EnumReservedRange) GetEnd() int32 { return 0 } +type FieldOptions_EditionDefault struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Edition *Edition `protobuf:"varint,3,opt,name=edition,enum=google.protobuf.Edition" json:"edition,omitempty"` + Value *string `protobuf:"bytes,2,opt,name=value" json:"value,omitempty"` // Textproto value. +} + +func (x *FieldOptions_EditionDefault) Reset() { + *x = FieldOptions_EditionDefault{} + mi := &file_google_protobuf_descriptor_proto_msgTypes[27] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FieldOptions_EditionDefault) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FieldOptions_EditionDefault) ProtoMessage() {} + +func (x *FieldOptions_EditionDefault) ProtoReflect() protoreflect.Message { + mi := &file_google_protobuf_descriptor_proto_msgTypes[27] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FieldOptions_EditionDefault.ProtoReflect.Descriptor instead. +func (*FieldOptions_EditionDefault) Descriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{12, 0} +} + +func (x *FieldOptions_EditionDefault) GetEdition() Edition { + if x != nil && x.Edition != nil { + return *x.Edition + } + return Edition_EDITION_UNKNOWN +} + +func (x *FieldOptions_EditionDefault) GetValue() string { + if x != nil && x.Value != nil { + return *x.Value + } + return "" +} + +// Information about the support window of a feature. +type FieldOptions_FeatureSupport struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The edition that this feature was first available in. In editions + // earlier than this one, the default assigned to EDITION_LEGACY will be + // used, and proto files will not be able to override it. + EditionIntroduced *Edition `protobuf:"varint,1,opt,name=edition_introduced,json=editionIntroduced,enum=google.protobuf.Edition" json:"edition_introduced,omitempty"` + // The edition this feature becomes deprecated in. Using this after this + // edition may trigger warnings. + EditionDeprecated *Edition `protobuf:"varint,2,opt,name=edition_deprecated,json=editionDeprecated,enum=google.protobuf.Edition" json:"edition_deprecated,omitempty"` + // The deprecation warning text if this feature is used after the edition it + // was marked deprecated in. + DeprecationWarning *string `protobuf:"bytes,3,opt,name=deprecation_warning,json=deprecationWarning" json:"deprecation_warning,omitempty"` + // The edition this feature is no longer available in. In editions after + // this one, the last default assigned will be used, and proto files will + // not be able to override it. + EditionRemoved *Edition `protobuf:"varint,4,opt,name=edition_removed,json=editionRemoved,enum=google.protobuf.Edition" json:"edition_removed,omitempty"` +} + +func (x *FieldOptions_FeatureSupport) Reset() { + *x = FieldOptions_FeatureSupport{} + mi := &file_google_protobuf_descriptor_proto_msgTypes[28] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FieldOptions_FeatureSupport) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FieldOptions_FeatureSupport) ProtoMessage() {} + +func (x *FieldOptions_FeatureSupport) ProtoReflect() protoreflect.Message { + mi := &file_google_protobuf_descriptor_proto_msgTypes[28] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FieldOptions_FeatureSupport.ProtoReflect.Descriptor instead. +func (*FieldOptions_FeatureSupport) Descriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{12, 1} +} + +func (x *FieldOptions_FeatureSupport) GetEditionIntroduced() Edition { + if x != nil && x.EditionIntroduced != nil { + return *x.EditionIntroduced + } + return Edition_EDITION_UNKNOWN +} + +func (x *FieldOptions_FeatureSupport) GetEditionDeprecated() Edition { + if x != nil && x.EditionDeprecated != nil { + return *x.EditionDeprecated + } + return Edition_EDITION_UNKNOWN +} + +func (x *FieldOptions_FeatureSupport) GetDeprecationWarning() string { + if x != nil && x.DeprecationWarning != nil { + return *x.DeprecationWarning + } + return "" +} + +func (x *FieldOptions_FeatureSupport) GetEditionRemoved() Edition { + if x != nil && x.EditionRemoved != nil { + return *x.EditionRemoved + } + return Edition_EDITION_UNKNOWN +} + // The name of the uninterpreted option. Each string represents a segment in // a dot-separated name. is_extension is true iff a segment represents an // extension (denoted with parentheses in options specs in .proto files). @@ -3243,11 +4034,9 @@ type UninterpretedOption_NamePart struct { func (x *UninterpretedOption_NamePart) Reset() { *x = UninterpretedOption_NamePart{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[25] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[29] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *UninterpretedOption_NamePart) String() string { @@ -3257,8 +4046,8 @@ func (x *UninterpretedOption_NamePart) String() string { func (*UninterpretedOption_NamePart) ProtoMessage() {} func (x *UninterpretedOption_NamePart) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[25] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[29] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3287,6 +4076,73 @@ func (x *UninterpretedOption_NamePart) GetIsExtension() bool { return false } +// A map from every known edition with a unique set of defaults to its +// defaults. Not all editions may be contained here. For a given edition, +// the defaults at the closest matching edition ordered at or before it should +// be used. This field must be in strict ascending order by edition. +type FeatureSetDefaults_FeatureSetEditionDefault struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Edition *Edition `protobuf:"varint,3,opt,name=edition,enum=google.protobuf.Edition" json:"edition,omitempty"` + // Defaults of features that can be overridden in this edition. + OverridableFeatures *FeatureSet `protobuf:"bytes,4,opt,name=overridable_features,json=overridableFeatures" json:"overridable_features,omitempty"` + // Defaults of features that can't be overridden in this edition. + FixedFeatures *FeatureSet `protobuf:"bytes,5,opt,name=fixed_features,json=fixedFeatures" json:"fixed_features,omitempty"` +} + +func (x *FeatureSetDefaults_FeatureSetEditionDefault) Reset() { + *x = FeatureSetDefaults_FeatureSetEditionDefault{} + mi := &file_google_protobuf_descriptor_proto_msgTypes[30] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *FeatureSetDefaults_FeatureSetEditionDefault) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FeatureSetDefaults_FeatureSetEditionDefault) ProtoMessage() {} + +func (x *FeatureSetDefaults_FeatureSetEditionDefault) ProtoReflect() protoreflect.Message { + mi := &file_google_protobuf_descriptor_proto_msgTypes[30] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FeatureSetDefaults_FeatureSetEditionDefault.ProtoReflect.Descriptor instead. +func (*FeatureSetDefaults_FeatureSetEditionDefault) Descriptor() ([]byte, []int) { + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{20, 0} +} + +func (x *FeatureSetDefaults_FeatureSetEditionDefault) GetEdition() Edition { + if x != nil && x.Edition != nil { + return *x.Edition + } + return Edition_EDITION_UNKNOWN +} + +func (x *FeatureSetDefaults_FeatureSetEditionDefault) GetOverridableFeatures() *FeatureSet { + if x != nil { + return x.OverridableFeatures + } + return nil +} + +func (x *FeatureSetDefaults_FeatureSetEditionDefault) GetFixedFeatures() *FeatureSet { + if x != nil { + return x.FixedFeatures + } + return nil +} + type SourceCodeInfo_Location struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3296,7 +4152,7 @@ type SourceCodeInfo_Location struct { // location. // // Each element is a field number or an index. They form a path from - // the root FileDescriptorProto to the place where the definition occurs. + // the root FileDescriptorProto to the place where the definition appears. // For example, this path: // // [ 4, 3, 2, 7, 1 ] @@ -3387,11 +4243,9 @@ type SourceCodeInfo_Location struct { func (x *SourceCodeInfo_Location) Reset() { *x = SourceCodeInfo_Location{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[26] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[31] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *SourceCodeInfo_Location) String() string { @@ -3401,8 +4255,8 @@ func (x *SourceCodeInfo_Location) String() string { func (*SourceCodeInfo_Location) ProtoMessage() {} func (x *SourceCodeInfo_Location) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[26] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[31] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3414,7 +4268,7 @@ func (x *SourceCodeInfo_Location) ProtoReflect() protoreflect.Message { // Deprecated: Use SourceCodeInfo_Location.ProtoReflect.Descriptor instead. func (*SourceCodeInfo_Location) Descriptor() ([]byte, []int) { - return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{19, 0} + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{21, 0} } func (x *SourceCodeInfo_Location) GetPath() []int32 { @@ -3474,11 +4328,9 @@ type GeneratedCodeInfo_Annotation struct { func (x *GeneratedCodeInfo_Annotation) Reset() { *x = GeneratedCodeInfo_Annotation{} - if protoimpl.UnsafeEnabled { - mi := &file_google_protobuf_descriptor_proto_msgTypes[27] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_google_protobuf_descriptor_proto_msgTypes[32] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *GeneratedCodeInfo_Annotation) String() string { @@ -3488,8 +4340,8 @@ func (x *GeneratedCodeInfo_Annotation) String() string { func (*GeneratedCodeInfo_Annotation) ProtoMessage() {} func (x *GeneratedCodeInfo_Annotation) ProtoReflect() protoreflect.Message { - mi := &file_google_protobuf_descriptor_proto_msgTypes[27] - if protoimpl.UnsafeEnabled && x != nil { + mi := &file_google_protobuf_descriptor_proto_msgTypes[32] + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -3501,7 +4353,7 @@ func (x *GeneratedCodeInfo_Annotation) ProtoReflect() protoreflect.Message { // Deprecated: Use GeneratedCodeInfo_Annotation.ProtoReflect.Descriptor instead. func (*GeneratedCodeInfo_Annotation) Descriptor() ([]byte, []int) { - return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{20, 0} + return file_google_protobuf_descriptor_proto_rawDescGZIP(), []int{22, 0} } func (x *GeneratedCodeInfo_Annotation) GetPath() []int32 { @@ -3550,7 +4402,7 @@ var file_google_protobuf_descriptor_proto_rawDesc = []byte{ 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x04, 0x66, 0x69, - 0x6c, 0x65, 0x22, 0xfe, 0x04, 0x0a, 0x13, 0x46, 0x69, 0x6c, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, + 0x6c, 0x65, 0x22, 0x98, 0x05, 0x0a, 0x13, 0x46, 0x69, 0x6c, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, @@ -3588,250 +4440,250 @@ var file_google_protobuf_descriptor_proto_rawDesc = []byte{ 0x75, 0x66, 0x2e, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x0e, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x79, 0x6e, 0x74, 0x61, 0x78, 0x18, 0x0c, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x06, 0x73, 0x79, 0x6e, 0x74, 0x61, 0x78, 0x12, 0x18, 0x0a, 0x07, 0x65, 0x64, 0x69, - 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x65, 0x64, 0x69, 0x74, - 0x69, 0x6f, 0x6e, 0x22, 0xb9, 0x06, 0x0a, 0x0f, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, - 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x66, - 0x69, 0x65, 0x6c, 0x64, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, 0x6f, - 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, - 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, - 0x6f, 0x52, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x43, 0x0a, 0x09, 0x65, 0x78, 0x74, 0x65, - 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, - 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, - 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, - 0x74, 0x6f, 0x52, 0x09, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x41, 0x0a, - 0x0b, 0x6e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, - 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x0a, 0x6e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x54, 0x79, 0x70, 0x65, - 0x12, 0x41, 0x0a, 0x09, 0x65, 0x6e, 0x75, 0x6d, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, - 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x08, 0x65, 0x6e, 0x75, 0x6d, 0x54, - 0x79, 0x70, 0x65, 0x12, 0x58, 0x0a, 0x0f, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, - 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2f, 0x2e, 0x67, + 0x09, 0x52, 0x06, 0x73, 0x79, 0x6e, 0x74, 0x61, 0x78, 0x12, 0x32, 0x0a, 0x07, 0x65, 0x64, 0x69, + 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x64, 0x69, + 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x07, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0xb9, 0x06, + 0x0a, 0x0f, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x3b, 0x0a, 0x05, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x18, 0x02, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, + 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x05, 0x66, 0x69, 0x65, + 0x6c, 0x64, 0x12, 0x43, 0x0a, 0x09, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x18, + 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, + 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x09, 0x65, 0x78, + 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x41, 0x0a, 0x0b, 0x6e, 0x65, 0x73, 0x74, 0x65, + 0x64, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, - 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, - 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x52, 0x0e, 0x65, - 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x44, 0x0a, - 0x0a, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x5f, 0x64, 0x65, 0x63, 0x6c, 0x18, 0x08, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0x62, 0x75, 0x66, 0x2e, 0x4f, 0x6e, 0x65, 0x6f, 0x66, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, - 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x09, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x44, - 0x65, 0x63, 0x6c, 0x12, 0x39, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x07, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x55, - 0x0a, 0x0e, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, - 0x18, 0x09, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, + 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x0a, + 0x6e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x54, 0x79, 0x70, 0x65, 0x12, 0x41, 0x0a, 0x09, 0x65, 0x6e, + 0x75, 0x6d, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x45, 0x6e, 0x75, 0x6d, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, + 0x6f, 0x74, 0x6f, 0x52, 0x08, 0x65, 0x6e, 0x75, 0x6d, 0x54, 0x79, 0x70, 0x65, 0x12, 0x58, 0x0a, + 0x0f, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, + 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, - 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x52, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, + 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, + 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x52, 0x0e, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, + 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x44, 0x0a, 0x0a, 0x6f, 0x6e, 0x65, 0x6f, 0x66, + 0x5f, 0x64, 0x65, 0x63, 0x6c, 0x18, 0x08, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4f, 0x6e, + 0x65, 0x6f, 0x66, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, + 0x74, 0x6f, 0x52, 0x09, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x44, 0x65, 0x63, 0x6c, 0x12, 0x39, 0x0a, + 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, + 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x55, 0x0a, 0x0e, 0x72, 0x65, 0x73, 0x65, + 0x72, 0x76, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x18, 0x09, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x2e, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x52, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, + 0x52, 0x0d, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, + 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x5f, 0x6e, 0x61, 0x6d, 0x65, + 0x18, 0x0a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, + 0x4e, 0x61, 0x6d, 0x65, 0x1a, 0x7a, 0x0a, 0x0e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, + 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, + 0x65, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x12, 0x40, + 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x26, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x1a, 0x37, 0x0a, 0x0d, 0x52, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, + 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x22, 0xcc, 0x04, 0x0a, 0x15, 0x45, 0x78, + 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, + 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, + 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, + 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x59, 0x0a, + 0x0b, 0x64, 0x65, 0x63, 0x6c, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, + 0x6e, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x44, 0x65, 0x63, 0x6c, 0x61, + 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x03, 0x88, 0x01, 0x02, 0x52, 0x0b, 0x64, 0x65, 0x63, + 0x6c, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x37, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, + 0x75, 0x72, 0x65, 0x73, 0x18, 0x32, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, 0x61, + 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x73, 0x12, 0x6d, 0x0a, 0x0c, 0x76, 0x65, 0x72, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x38, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, + 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, + 0x56, 0x65, 0x72, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x3a, 0x0a, 0x55, 0x4e, 0x56, 0x45, 0x52, 0x49, 0x46, 0x49, 0x45, 0x44, 0x42, 0x03, 0x88, + 0x01, 0x02, 0x52, 0x0c, 0x76, 0x65, 0x72, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x1a, 0x94, 0x01, 0x0a, 0x0b, 0x44, 0x65, 0x63, 0x6c, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x75, 0x6c, 0x6c, + 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x66, 0x75, 0x6c, + 0x6c, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, + 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, + 0x64, 0x4a, 0x04, 0x08, 0x04, 0x10, 0x05, 0x22, 0x34, 0x0a, 0x11, 0x56, 0x65, 0x72, 0x69, 0x66, + 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0f, 0x0a, 0x0b, + 0x44, 0x45, 0x43, 0x4c, 0x41, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x10, 0x00, 0x12, 0x0e, 0x0a, + 0x0a, 0x55, 0x4e, 0x56, 0x45, 0x52, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x01, 0x2a, 0x09, 0x08, + 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0xc1, 0x06, 0x0a, 0x14, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x41, 0x0a, + 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2b, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, + 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, + 0x12, 0x3e, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2a, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, + 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, + 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1a, 0x0a, + 0x08, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x08, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x64, 0x65, 0x66, + 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0c, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1f, + 0x0a, 0x0b, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x5f, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x09, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x0a, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, + 0x1b, 0x0a, 0x09, 0x6a, 0x73, 0x6f, 0x6e, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x0a, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x08, 0x6a, 0x73, 0x6f, 0x6e, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x37, 0x0a, 0x07, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x27, 0x0a, 0x0f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, 0x5f, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x18, 0x11, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x22, 0xb6, + 0x02, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, + 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x03, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, + 0x5f, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x04, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, + 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, 0x05, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x59, 0x50, + 0x45, 0x5f, 0x46, 0x49, 0x58, 0x45, 0x44, 0x36, 0x34, 0x10, 0x06, 0x12, 0x10, 0x0a, 0x0c, 0x54, + 0x59, 0x50, 0x45, 0x5f, 0x46, 0x49, 0x58, 0x45, 0x44, 0x33, 0x32, 0x10, 0x07, 0x12, 0x0d, 0x0a, + 0x09, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, 0x4f, 0x4f, 0x4c, 0x10, 0x08, 0x12, 0x0f, 0x0a, 0x0b, + 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x09, 0x12, 0x0e, 0x0a, + 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x10, 0x0a, 0x12, 0x10, 0x0a, + 0x0c, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4d, 0x45, 0x53, 0x53, 0x41, 0x47, 0x45, 0x10, 0x0b, 0x12, + 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, 0x59, 0x54, 0x45, 0x53, 0x10, 0x0c, 0x12, + 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, 0x0d, + 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x45, 0x4e, 0x55, 0x4d, 0x10, 0x0e, 0x12, + 0x11, 0x0a, 0x0d, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x46, 0x49, 0x58, 0x45, 0x44, 0x33, 0x32, + 0x10, 0x0f, 0x12, 0x11, 0x0a, 0x0d, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x46, 0x49, 0x58, 0x45, + 0x44, 0x36, 0x34, 0x10, 0x10, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x49, + 0x4e, 0x54, 0x33, 0x32, 0x10, 0x11, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, + 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x12, 0x22, 0x43, 0x0a, 0x05, 0x4c, 0x61, 0x62, 0x65, 0x6c, + 0x12, 0x12, 0x0a, 0x0e, 0x4c, 0x41, 0x42, 0x45, 0x4c, 0x5f, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, + 0x41, 0x4c, 0x10, 0x01, 0x12, 0x12, 0x0a, 0x0e, 0x4c, 0x41, 0x42, 0x45, 0x4c, 0x5f, 0x52, 0x45, + 0x50, 0x45, 0x41, 0x54, 0x45, 0x44, 0x10, 0x03, 0x12, 0x12, 0x0a, 0x0e, 0x4c, 0x41, 0x42, 0x45, + 0x4c, 0x5f, 0x52, 0x45, 0x51, 0x55, 0x49, 0x52, 0x45, 0x44, 0x10, 0x02, 0x22, 0x63, 0x0a, 0x14, + 0x4f, 0x6e, 0x65, 0x6f, 0x66, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, + 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x37, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4f, 0x6e, 0x65, 0x6f, + 0x66, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x22, 0xe3, 0x02, 0x0a, 0x13, 0x45, 0x6e, 0x75, 0x6d, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, + 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x3f, 0x0a, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, + 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, + 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x36, + 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1c, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x5d, 0x0a, 0x0e, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, + 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x36, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x52, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x52, 0x0d, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, - 0x64, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x0a, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, - 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x1a, 0x7a, 0x0a, 0x0e, 0x45, 0x78, - 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, - 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x73, 0x74, 0x61, - 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x03, 0x65, 0x6e, 0x64, 0x12, 0x40, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, - 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x37, 0x0a, 0x0d, 0x52, 0x65, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, - 0x03, 0x65, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x22, - 0xad, 0x04, 0x0a, 0x15, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, - 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, - 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, - 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, - 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x12, 0x59, 0x0a, 0x0b, 0x64, 0x65, 0x63, 0x6c, 0x61, 0x72, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, - 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x2e, 0x44, 0x65, 0x63, 0x6c, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x03, 0x88, 0x01, - 0x02, 0x52, 0x0b, 0x64, 0x65, 0x63, 0x6c, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x68, - 0x0a, 0x0c, 0x76, 0x65, 0x72, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x0e, 0x32, 0x38, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, - 0x52, 0x61, 0x6e, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x56, 0x65, 0x72, - 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, 0x61, 0x74, 0x65, 0x3a, 0x0a, - 0x55, 0x4e, 0x56, 0x45, 0x52, 0x49, 0x46, 0x49, 0x45, 0x44, 0x52, 0x0c, 0x76, 0x65, 0x72, 0x69, - 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0xb3, 0x01, 0x0a, 0x0b, 0x44, 0x65, 0x63, - 0x6c, 0x61, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, - 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, - 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x75, 0x6c, 0x6c, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x08, 0x66, 0x75, 0x6c, 0x6c, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, - 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, - 0x65, 0x12, 0x23, 0x0a, 0x0b, 0x69, 0x73, 0x5f, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x0a, 0x69, 0x73, 0x52, 0x65, - 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18, 0x06, - 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x22, 0x34, - 0x0a, 0x11, 0x56, 0x65, 0x72, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x53, 0x74, - 0x61, 0x74, 0x65, 0x12, 0x0f, 0x0a, 0x0b, 0x44, 0x45, 0x43, 0x4c, 0x41, 0x52, 0x41, 0x54, 0x49, - 0x4f, 0x4e, 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x55, 0x4e, 0x56, 0x45, 0x52, 0x49, 0x46, 0x49, - 0x45, 0x44, 0x10, 0x01, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, - 0xc1, 0x06, 0x0a, 0x14, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x64, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, + 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x4e, 0x61, 0x6d, 0x65, 0x1a, 0x3b, 0x0a, 0x11, 0x45, 0x6e, + 0x75, 0x6d, 0x52, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, + 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, + 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x22, 0x83, 0x01, 0x0a, 0x18, 0x45, 0x6e, 0x75, 0x6d, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, + 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, + 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, + 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x21, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xa7, 0x01, + 0x0a, 0x16, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x16, 0x0a, 0x06, - 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x6e, 0x75, - 0x6d, 0x62, 0x65, 0x72, 0x12, 0x41, 0x0a, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x2b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x61, 0x62, 0x65, 0x6c, - 0x52, 0x05, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x3e, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x44, 0x65, 0x73, - 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x54, 0x79, 0x70, - 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x79, 0x70, 0x65, 0x5f, - 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x79, 0x70, 0x65, - 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x65, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x65, - 0x12, 0x23, 0x0a, 0x0d, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, - 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x5f, 0x69, - 0x6e, 0x64, 0x65, 0x78, 0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x6f, 0x6e, 0x65, 0x6f, - 0x66, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x12, 0x1b, 0x0a, 0x09, 0x6a, 0x73, 0x6f, 0x6e, 0x5f, 0x6e, - 0x61, 0x6d, 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x6a, 0x73, 0x6f, 0x6e, 0x4e, - 0x61, 0x6d, 0x65, 0x12, 0x37, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x08, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x27, 0x0a, 0x0f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x18, - 0x11, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x22, 0xb6, 0x02, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0f, - 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x44, 0x4f, 0x55, 0x42, 0x4c, 0x45, 0x10, 0x01, 0x12, - 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x10, 0x02, 0x12, - 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x03, 0x12, - 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x04, - 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, 0x05, - 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x46, 0x49, 0x58, 0x45, 0x44, 0x36, 0x34, - 0x10, 0x06, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x46, 0x49, 0x58, 0x45, 0x44, - 0x33, 0x32, 0x10, 0x07, 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, 0x4f, 0x4f, - 0x4c, 0x10, 0x08, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x54, 0x52, 0x49, - 0x4e, 0x47, 0x10, 0x09, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x47, 0x52, 0x4f, - 0x55, 0x50, 0x10, 0x0a, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4d, 0x45, 0x53, - 0x53, 0x41, 0x47, 0x45, 0x10, 0x0b, 0x12, 0x0e, 0x0a, 0x0a, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, - 0x59, 0x54, 0x45, 0x53, 0x10, 0x0c, 0x12, 0x0f, 0x0a, 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x55, - 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, 0x0d, 0x12, 0x0d, 0x0a, 0x09, 0x54, 0x59, 0x50, 0x45, 0x5f, - 0x45, 0x4e, 0x55, 0x4d, 0x10, 0x0e, 0x12, 0x11, 0x0a, 0x0d, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, - 0x46, 0x49, 0x58, 0x45, 0x44, 0x33, 0x32, 0x10, 0x0f, 0x12, 0x11, 0x0a, 0x0d, 0x54, 0x59, 0x50, - 0x45, 0x5f, 0x53, 0x46, 0x49, 0x58, 0x45, 0x44, 0x36, 0x34, 0x10, 0x10, 0x12, 0x0f, 0x0a, 0x0b, - 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, 0x11, 0x12, 0x0f, 0x0a, - 0x0b, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x53, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x12, 0x22, 0x43, - 0x0a, 0x05, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x12, 0x12, 0x0a, 0x0e, 0x4c, 0x41, 0x42, 0x45, 0x4c, - 0x5f, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, 0x41, 0x4c, 0x10, 0x01, 0x12, 0x12, 0x0a, 0x0e, 0x4c, - 0x41, 0x42, 0x45, 0x4c, 0x5f, 0x52, 0x45, 0x51, 0x55, 0x49, 0x52, 0x45, 0x44, 0x10, 0x02, 0x12, - 0x12, 0x0a, 0x0e, 0x4c, 0x41, 0x42, 0x45, 0x4c, 0x5f, 0x52, 0x45, 0x50, 0x45, 0x41, 0x54, 0x45, - 0x44, 0x10, 0x03, 0x22, 0x63, 0x0a, 0x14, 0x4f, 0x6e, 0x65, 0x6f, 0x66, 0x44, 0x65, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, - 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, - 0x37, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x1d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, - 0x75, 0x66, 0x2e, 0x4f, 0x6e, 0x65, 0x6f, 0x66, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, - 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xe3, 0x02, 0x0a, 0x13, 0x45, 0x6e, 0x75, - 0x6d, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, - 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, - 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x3f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x44, - 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x36, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x5d, 0x0a, - 0x0e, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x5f, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x18, - 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x36, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x44, 0x65, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6e, 0x75, 0x6d, - 0x52, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x52, 0x0d, 0x72, - 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x23, 0x0a, 0x0d, - 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x05, 0x20, - 0x03, 0x28, 0x09, 0x52, 0x0c, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x4e, 0x61, 0x6d, - 0x65, 0x1a, 0x3b, 0x0a, 0x11, 0x45, 0x6e, 0x75, 0x6d, 0x52, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, - 0x64, 0x52, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, - 0x65, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x22, 0x83, - 0x01, 0x0a, 0x18, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x44, 0x65, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, - 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, - 0x16, 0x0a, 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x06, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x3b, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x56, - 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x22, 0xa7, 0x01, 0x0a, 0x16, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, - 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, - 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, - 0x61, 0x6d, 0x65, 0x12, 0x3e, 0x0a, 0x06, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x02, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x44, 0x65, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x06, 0x6d, 0x65, 0x74, - 0x68, 0x6f, 0x64, 0x12, 0x39, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x89, - 0x02, 0x0a, 0x15, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, - 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1d, 0x0a, 0x0a, - 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x09, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x6f, - 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0a, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x38, 0x0a, 0x07, - 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x3e, 0x0a, 0x06, + 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x26, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4d, + 0x65, 0x74, 0x68, 0x6f, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, + 0x72, 0x6f, 0x74, 0x6f, 0x52, 0x06, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x12, 0x39, 0x0a, 0x07, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, - 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x30, 0x0a, 0x10, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, - 0x5f, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, - 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x53, - 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x12, 0x30, 0x0a, 0x10, 0x73, 0x65, 0x72, 0x76, - 0x65, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x18, 0x06, 0x20, 0x01, - 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0f, 0x73, 0x65, 0x72, 0x76, 0x65, - 0x72, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x22, 0x91, 0x09, 0x0a, 0x0b, 0x46, - 0x69, 0x6c, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x6a, 0x61, - 0x76, 0x61, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x0b, 0x6a, 0x61, 0x76, 0x61, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x12, 0x30, 0x0a, - 0x14, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x6f, 0x75, 0x74, 0x65, 0x72, 0x5f, 0x63, 0x6c, 0x61, 0x73, - 0x73, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x6a, 0x61, 0x76, - 0x61, 0x4f, 0x75, 0x74, 0x65, 0x72, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x6e, 0x61, 0x6d, 0x65, 0x12, - 0x35, 0x0a, 0x13, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x70, 0x6c, 0x65, - 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, - 0x6c, 0x73, 0x65, 0x52, 0x11, 0x6a, 0x61, 0x76, 0x61, 0x4d, 0x75, 0x6c, 0x74, 0x69, 0x70, 0x6c, - 0x65, 0x46, 0x69, 0x6c, 0x65, 0x73, 0x12, 0x44, 0x0a, 0x1d, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x67, - 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x5f, 0x65, 0x71, 0x75, 0x61, 0x6c, 0x73, 0x5f, 0x61, - 0x6e, 0x64, 0x5f, 0x68, 0x61, 0x73, 0x68, 0x18, 0x14, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, - 0x01, 0x52, 0x19, 0x6a, 0x61, 0x76, 0x61, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x45, - 0x71, 0x75, 0x61, 0x6c, 0x73, 0x41, 0x6e, 0x64, 0x48, 0x61, 0x73, 0x68, 0x12, 0x3a, 0x0a, 0x16, - 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x63, 0x68, 0x65, 0x63, - 0x6b, 0x5f, 0x75, 0x74, 0x66, 0x38, 0x18, 0x1b, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, - 0x6c, 0x73, 0x65, 0x52, 0x13, 0x6a, 0x61, 0x76, 0x61, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x43, - 0x68, 0x65, 0x63, 0x6b, 0x55, 0x74, 0x66, 0x38, 0x12, 0x53, 0x0a, 0x0c, 0x6f, 0x70, 0x74, 0x69, - 0x6d, 0x69, 0x7a, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x29, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x70, 0x74, - 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x4d, 0x6f, 0x64, 0x65, 0x3a, 0x05, 0x53, 0x50, 0x45, 0x45, 0x44, - 0x52, 0x0b, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x46, 0x6f, 0x72, 0x12, 0x1d, 0x0a, - 0x0a, 0x67, 0x6f, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x18, 0x0b, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x09, 0x67, 0x6f, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x12, 0x35, 0x0a, 0x13, - 0x63, 0x63, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, - 0x63, 0x65, 0x73, 0x18, 0x10, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, - 0x52, 0x11, 0x63, 0x63, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x53, 0x65, 0x72, 0x76, 0x69, - 0x63, 0x65, 0x73, 0x12, 0x39, 0x0a, 0x15, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x67, 0x65, 0x6e, 0x65, - 0x72, 0x69, 0x63, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x18, 0x11, 0x20, 0x01, - 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x13, 0x6a, 0x61, 0x76, 0x61, 0x47, - 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x12, 0x35, - 0x0a, 0x13, 0x70, 0x79, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x5f, 0x73, 0x65, 0x72, - 0x76, 0x69, 0x63, 0x65, 0x73, 0x18, 0x12, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, - 0x73, 0x65, 0x52, 0x11, 0x70, 0x79, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x53, 0x65, 0x72, - 0x76, 0x69, 0x63, 0x65, 0x73, 0x12, 0x37, 0x0a, 0x14, 0x70, 0x68, 0x70, 0x5f, 0x67, 0x65, 0x6e, - 0x65, 0x72, 0x69, 0x63, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x18, 0x2a, 0x20, - 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x12, 0x70, 0x68, 0x70, 0x47, + 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x22, 0x89, 0x02, 0x0a, 0x15, 0x4d, 0x65, 0x74, 0x68, + 0x6f, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x69, 0x6e, 0x70, 0x75, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x5f, 0x74, + 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x6f, 0x75, 0x74, 0x70, 0x75, + 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x38, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, + 0x30, 0x0a, 0x10, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x5f, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, + 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, + 0x52, 0x0f, 0x63, 0x6c, 0x69, 0x65, 0x6e, 0x74, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, + 0x67, 0x12, 0x30, 0x0a, 0x10, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x73, 0x74, 0x72, 0x65, + 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, + 0x73, 0x65, 0x52, 0x0f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, + 0x69, 0x6e, 0x67, 0x22, 0xad, 0x09, 0x0a, 0x0b, 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x12, 0x21, 0x0a, 0x0c, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x70, 0x61, 0x63, 0x6b, + 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x6a, 0x61, 0x76, 0x61, 0x50, + 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x12, 0x30, 0x0a, 0x14, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x6f, + 0x75, 0x74, 0x65, 0x72, 0x5f, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x08, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x6a, 0x61, 0x76, 0x61, 0x4f, 0x75, 0x74, 0x65, 0x72, 0x43, + 0x6c, 0x61, 0x73, 0x73, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x35, 0x0a, 0x13, 0x6a, 0x61, 0x76, 0x61, + 0x5f, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x70, 0x6c, 0x65, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, + 0x0a, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x11, 0x6a, 0x61, + 0x76, 0x61, 0x4d, 0x75, 0x6c, 0x74, 0x69, 0x70, 0x6c, 0x65, 0x46, 0x69, 0x6c, 0x65, 0x73, 0x12, + 0x44, 0x0a, 0x1d, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, + 0x5f, 0x65, 0x71, 0x75, 0x61, 0x6c, 0x73, 0x5f, 0x61, 0x6e, 0x64, 0x5f, 0x68, 0x61, 0x73, 0x68, + 0x18, 0x14, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x19, 0x6a, 0x61, 0x76, 0x61, + 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x45, 0x71, 0x75, 0x61, 0x6c, 0x73, 0x41, 0x6e, + 0x64, 0x48, 0x61, 0x73, 0x68, 0x12, 0x3a, 0x0a, 0x16, 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x73, 0x74, + 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x63, 0x68, 0x65, 0x63, 0x6b, 0x5f, 0x75, 0x74, 0x66, 0x38, 0x18, + 0x1b, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x13, 0x6a, 0x61, + 0x76, 0x61, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x55, 0x74, 0x66, + 0x38, 0x12, 0x53, 0x0a, 0x0c, 0x6f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x5f, 0x66, 0x6f, + 0x72, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x29, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x4d, 0x6f, + 0x64, 0x65, 0x3a, 0x05, 0x53, 0x50, 0x45, 0x45, 0x44, 0x52, 0x0b, 0x6f, 0x70, 0x74, 0x69, 0x6d, + 0x69, 0x7a, 0x65, 0x46, 0x6f, 0x72, 0x12, 0x1d, 0x0a, 0x0a, 0x67, 0x6f, 0x5f, 0x70, 0x61, 0x63, + 0x6b, 0x61, 0x67, 0x65, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x67, 0x6f, 0x50, 0x61, + 0x63, 0x6b, 0x61, 0x67, 0x65, 0x12, 0x35, 0x0a, 0x13, 0x63, 0x63, 0x5f, 0x67, 0x65, 0x6e, 0x65, + 0x72, 0x69, 0x63, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x18, 0x10, 0x20, 0x01, + 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x11, 0x63, 0x63, 0x47, 0x65, 0x6e, + 0x65, 0x72, 0x69, 0x63, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x12, 0x39, 0x0a, 0x15, + 0x6a, 0x61, 0x76, 0x61, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x5f, 0x73, 0x65, 0x72, + 0x76, 0x69, 0x63, 0x65, 0x73, 0x18, 0x11, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, + 0x73, 0x65, 0x52, 0x13, 0x6a, 0x61, 0x76, 0x61, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x53, + 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x12, 0x35, 0x0a, 0x13, 0x70, 0x79, 0x5f, 0x67, 0x65, + 0x6e, 0x65, 0x72, 0x69, 0x63, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x18, 0x12, + 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x11, 0x70, 0x79, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x73, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x18, 0x17, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, @@ -3856,88 +4708,130 @@ var file_google_protobuf_descriptor_proto_rawDesc = []byte{ 0x70, 0x68, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x72, 0x75, 0x62, 0x79, 0x5f, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x72, 0x75, 0x62, 0x79, - 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, + 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x12, 0x37, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x73, 0x18, 0x32, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, + 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, + 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, + 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, + 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x3a, 0x0a, 0x0c, 0x4f, 0x70, + 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x09, 0x0a, 0x05, 0x53, 0x50, + 0x45, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x53, 0x49, + 0x5a, 0x45, 0x10, 0x02, 0x12, 0x10, 0x0a, 0x0c, 0x4c, 0x49, 0x54, 0x45, 0x5f, 0x52, 0x55, 0x4e, + 0x54, 0x49, 0x4d, 0x45, 0x10, 0x03, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, + 0x02, 0x4a, 0x04, 0x08, 0x2a, 0x10, 0x2b, 0x4a, 0x04, 0x08, 0x26, 0x10, 0x27, 0x52, 0x14, 0x70, + 0x68, 0x70, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x69, 0x63, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, + 0x63, 0x65, 0x73, 0x22, 0xf4, 0x03, 0x0a, 0x0e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x3c, 0x0a, 0x17, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, + 0x65, 0x5f, 0x73, 0x65, 0x74, 0x5f, 0x77, 0x69, 0x72, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, + 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x14, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x53, 0x65, 0x74, 0x57, 0x69, 0x72, 0x65, 0x46, 0x6f, + 0x72, 0x6d, 0x61, 0x74, 0x12, 0x4c, 0x0a, 0x1f, 0x6e, 0x6f, 0x5f, 0x73, 0x74, 0x61, 0x6e, 0x64, + 0x61, 0x72, 0x64, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x5f, 0x61, + 0x63, 0x63, 0x65, 0x73, 0x73, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x52, 0x1c, 0x6e, 0x6f, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x61, 0x72, 0x64, + 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x41, 0x63, 0x63, 0x65, 0x73, 0x73, + 0x6f, 0x72, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, + 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x6d, 0x61, 0x70, + 0x5f, 0x65, 0x6e, 0x74, 0x72, 0x79, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6d, 0x61, + 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x56, 0x0a, 0x26, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, + 0x61, 0x74, 0x65, 0x64, 0x5f, 0x6c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x5f, 0x6a, 0x73, 0x6f, 0x6e, + 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x6c, 0x69, 0x63, 0x74, 0x73, + 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x22, 0x64, 0x65, 0x70, 0x72, + 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x4c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x4a, 0x73, 0x6f, 0x6e, + 0x46, 0x69, 0x65, 0x6c, 0x64, 0x43, 0x6f, 0x6e, 0x66, 0x6c, 0x69, 0x63, 0x74, 0x73, 0x12, 0x37, + 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x1b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x22, 0x3a, 0x0a, 0x0c, 0x4f, 0x70, 0x74, 0x69, 0x6d, 0x69, 0x7a, 0x65, 0x4d, 0x6f, 0x64, - 0x65, 0x12, 0x09, 0x0a, 0x05, 0x53, 0x50, 0x45, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, - 0x43, 0x4f, 0x44, 0x45, 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x10, 0x02, 0x12, 0x10, 0x0a, 0x0c, 0x4c, - 0x49, 0x54, 0x45, 0x5f, 0x52, 0x55, 0x4e, 0x54, 0x49, 0x4d, 0x45, 0x10, 0x03, 0x2a, 0x09, 0x08, - 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x4a, 0x04, 0x08, 0x26, 0x10, 0x27, 0x22, 0xbb, - 0x03, 0x0a, 0x0e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x12, 0x3c, 0x0a, 0x17, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x74, - 0x5f, 0x77, 0x69, 0x72, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x14, 0x6d, 0x65, 0x73, 0x73, 0x61, - 0x67, 0x65, 0x53, 0x65, 0x74, 0x57, 0x69, 0x72, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, - 0x4c, 0x0a, 0x1f, 0x6e, 0x6f, 0x5f, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x61, 0x72, 0x64, 0x5f, 0x64, - 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x5f, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, - 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, - 0x1c, 0x6e, 0x6f, 0x53, 0x74, 0x61, 0x6e, 0x64, 0x61, 0x72, 0x64, 0x44, 0x65, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x6f, 0x72, 0x41, 0x63, 0x63, 0x65, 0x73, 0x73, 0x6f, 0x72, 0x12, 0x25, 0x0a, - 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, - 0x61, 0x74, 0x65, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x6d, 0x61, 0x70, 0x5f, 0x65, 0x6e, 0x74, 0x72, - 0x79, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x56, 0x0a, 0x26, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x5f, - 0x6c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x5f, 0x6a, 0x73, 0x6f, 0x6e, 0x5f, 0x66, 0x69, 0x65, 0x6c, - 0x64, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x6c, 0x69, 0x63, 0x74, 0x73, 0x18, 0x0b, 0x20, 0x01, 0x28, - 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x22, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, - 0x64, 0x4c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x4a, 0x73, 0x6f, 0x6e, 0x46, 0x69, 0x65, 0x6c, 0x64, - 0x43, 0x6f, 0x6e, 0x66, 0x6c, 0x69, 0x63, 0x74, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, - 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, - 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, - 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x4a, 0x04, - 0x08, 0x04, 0x10, 0x05, 0x4a, 0x04, 0x08, 0x05, 0x10, 0x06, 0x4a, 0x04, 0x08, 0x06, 0x10, 0x07, - 0x4a, 0x04, 0x08, 0x08, 0x10, 0x09, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x22, 0x85, 0x09, 0x0a, - 0x0c, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x41, 0x0a, - 0x05, 0x63, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x67, - 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, - 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x43, 0x54, 0x79, 0x70, - 0x65, 0x3a, 0x06, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x52, 0x05, 0x63, 0x74, 0x79, 0x70, 0x65, - 0x12, 0x16, 0x0a, 0x06, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x06, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x12, 0x47, 0x0a, 0x06, 0x6a, 0x73, 0x74, 0x79, - 0x70, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x6e, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x4a, 0x04, 0x08, 0x04, + 0x10, 0x05, 0x4a, 0x04, 0x08, 0x05, 0x10, 0x06, 0x4a, 0x04, 0x08, 0x06, 0x10, 0x07, 0x4a, 0x04, + 0x08, 0x08, 0x10, 0x09, 0x4a, 0x04, 0x08, 0x09, 0x10, 0x0a, 0x22, 0x9d, 0x0d, 0x0a, 0x0c, 0x46, + 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x41, 0x0a, 0x05, 0x63, + 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x43, 0x54, 0x79, 0x70, 0x65, 0x3a, + 0x06, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x52, 0x05, 0x63, 0x74, 0x79, 0x70, 0x65, 0x12, 0x16, + 0x0a, 0x06, 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, + 0x70, 0x61, 0x63, 0x6b, 0x65, 0x64, 0x12, 0x47, 0x0a, 0x06, 0x6a, 0x73, 0x74, 0x79, 0x70, 0x65, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4a, 0x53, 0x54, 0x79, 0x70, 0x65, 0x3a, 0x09, 0x4a, 0x53, + 0x5f, 0x4e, 0x4f, 0x52, 0x4d, 0x41, 0x4c, 0x52, 0x06, 0x6a, 0x73, 0x74, 0x79, 0x70, 0x65, 0x12, + 0x19, 0x0a, 0x04, 0x6c, 0x61, 0x7a, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, + 0x61, 0x6c, 0x73, 0x65, 0x52, 0x04, 0x6c, 0x61, 0x7a, 0x79, 0x12, 0x2e, 0x0a, 0x0f, 0x75, 0x6e, + 0x76, 0x65, 0x72, 0x69, 0x66, 0x69, 0x65, 0x64, 0x5f, 0x6c, 0x61, 0x7a, 0x79, 0x18, 0x0f, 0x20, + 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0e, 0x75, 0x6e, 0x76, 0x65, + 0x72, 0x69, 0x66, 0x69, 0x65, 0x64, 0x4c, 0x61, 0x7a, 0x79, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, + 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, + 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, + 0x64, 0x12, 0x19, 0x0a, 0x04, 0x77, 0x65, 0x61, 0x6b, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x08, 0x3a, + 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x04, 0x77, 0x65, 0x61, 0x6b, 0x12, 0x28, 0x0a, 0x0c, + 0x64, 0x65, 0x62, 0x75, 0x67, 0x5f, 0x72, 0x65, 0x64, 0x61, 0x63, 0x74, 0x18, 0x10, 0x20, 0x01, + 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0b, 0x64, 0x65, 0x62, 0x75, 0x67, + 0x52, 0x65, 0x64, 0x61, 0x63, 0x74, 0x12, 0x4b, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, + 0x69, 0x6f, 0x6e, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, + 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, + 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x72, 0x65, 0x74, 0x65, 0x6e, 0x74, + 0x69, 0x6f, 0x6e, 0x12, 0x48, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x13, + 0x20, 0x03, 0x28, 0x0e, 0x32, 0x2e, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x54, 0x79, 0x70, 0x65, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x57, 0x0a, + 0x10, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, + 0x73, 0x18, 0x14, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x44, 0x65, + 0x66, 0x61, 0x75, 0x6c, 0x74, 0x52, 0x0f, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x44, 0x65, + 0x66, 0x61, 0x75, 0x6c, 0x74, 0x73, 0x12, 0x37, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x73, 0x18, 0x15, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, + 0x55, 0x0a, 0x0f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x73, 0x75, 0x70, 0x70, 0x6f, + 0x72, 0x74, 0x18, 0x16, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4a, 0x53, 0x54, 0x79, 0x70, 0x65, 0x3a, 0x09, - 0x4a, 0x53, 0x5f, 0x4e, 0x4f, 0x52, 0x4d, 0x41, 0x4c, 0x52, 0x06, 0x6a, 0x73, 0x74, 0x79, 0x70, - 0x65, 0x12, 0x19, 0x0a, 0x04, 0x6c, 0x61, 0x7a, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x3a, - 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x04, 0x6c, 0x61, 0x7a, 0x79, 0x12, 0x2e, 0x0a, 0x0f, - 0x75, 0x6e, 0x76, 0x65, 0x72, 0x69, 0x66, 0x69, 0x65, 0x64, 0x5f, 0x6c, 0x61, 0x7a, 0x79, 0x18, - 0x0f, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0e, 0x75, 0x6e, - 0x76, 0x65, 0x72, 0x69, 0x66, 0x69, 0x65, 0x64, 0x4c, 0x61, 0x7a, 0x79, 0x12, 0x25, 0x0a, 0x0a, - 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, - 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, - 0x74, 0x65, 0x64, 0x12, 0x19, 0x0a, 0x04, 0x77, 0x65, 0x61, 0x6b, 0x18, 0x0a, 0x20, 0x01, 0x28, - 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x04, 0x77, 0x65, 0x61, 0x6b, 0x12, 0x28, - 0x0a, 0x0c, 0x64, 0x65, 0x62, 0x75, 0x67, 0x5f, 0x72, 0x65, 0x64, 0x61, 0x63, 0x74, 0x18, 0x10, - 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0b, 0x64, 0x65, 0x62, - 0x75, 0x67, 0x52, 0x65, 0x64, 0x61, 0x63, 0x74, 0x12, 0x4b, 0x0a, 0x09, 0x72, 0x65, 0x74, 0x65, - 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2d, 0x2e, 0x67, 0x6f, - 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, - 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x52, 0x65, 0x74, 0x65, 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x72, 0x65, 0x74, 0x65, - 0x6e, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x4a, 0x0a, 0x06, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x18, - 0x12, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2e, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x54, 0x79, 0x70, 0x65, 0x42, 0x02, 0x18, 0x01, 0x52, 0x06, 0x74, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x12, 0x48, 0x0a, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x18, 0x13, 0x20, 0x03, - 0x28, 0x0e, 0x32, 0x2e, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x54, 0x79, - 0x70, 0x65, 0x52, 0x07, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, - 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, - 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, + 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, + 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x52, 0x0e, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, + 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, + 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, + 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, + 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x2f, 0x0a, 0x05, 0x43, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0a, + 0x1a, 0x5a, 0x0a, 0x0e, 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x44, 0x65, 0x66, 0x61, 0x75, + 0x6c, 0x74, 0x12, 0x32, 0x0a, 0x07, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x07, 0x65, + 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x1a, 0x96, 0x02, 0x0a, + 0x0e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x12, + 0x47, 0x0a, 0x12, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x69, 0x6e, 0x74, 0x72, 0x6f, + 0x64, 0x75, 0x63, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x64, + 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x11, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, + 0x74, 0x72, 0x6f, 0x64, 0x75, 0x63, 0x65, 0x64, 0x12, 0x47, 0x0a, 0x12, 0x65, 0x64, 0x69, 0x74, + 0x69, 0x6f, 0x6e, 0x5f, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x11, + 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x44, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, + 0x64, 0x12, 0x2f, 0x0a, 0x13, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x5f, 0x77, 0x61, 0x72, 0x6e, 0x69, 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, + 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x57, 0x61, 0x72, 0x6e, 0x69, + 0x6e, 0x67, 0x12, 0x41, 0x0a, 0x0f, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x72, 0x65, + 0x6d, 0x6f, 0x76, 0x65, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x64, + 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, + 0x6d, 0x6f, 0x76, 0x65, 0x64, 0x22, 0x2f, 0x0a, 0x05, 0x43, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x43, 0x4f, 0x52, 0x44, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x5f, 0x50, 0x49, 0x45, 0x43, 0x45, 0x10, 0x02, 0x22, 0x35, 0x0a, 0x06, 0x4a, 0x53, 0x54, 0x79, 0x70, 0x65, @@ -3967,14 +4861,18 @@ var file_google_protobuf_descriptor_proto_rawDesc = []byte{ 0x45, 0x5f, 0x53, 0x45, 0x52, 0x56, 0x49, 0x43, 0x45, 0x10, 0x08, 0x12, 0x16, 0x0a, 0x12, 0x54, 0x41, 0x52, 0x47, 0x45, 0x54, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4d, 0x45, 0x54, 0x48, 0x4f, 0x44, 0x10, 0x09, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x4a, 0x04, - 0x08, 0x04, 0x10, 0x05, 0x22, 0x73, 0x0a, 0x0c, 0x4f, 0x6e, 0x65, 0x6f, 0x66, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, + 0x08, 0x04, 0x10, 0x05, 0x4a, 0x04, 0x08, 0x12, 0x10, 0x13, 0x22, 0xac, 0x01, 0x0a, 0x0c, 0x4f, + 0x6e, 0x65, 0x6f, 0x66, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x08, 0x66, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, + 0x75, 0x72, 0x65, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x09, - 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x98, 0x02, 0x0a, 0x0b, 0x45, 0x6e, + 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0xd1, 0x02, 0x0a, 0x0b, 0x45, 0x6e, 0x75, 0x6d, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x5f, 0x61, 0x6c, 0x69, 0x61, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x41, 0x6c, 0x69, 0x61, 0x73, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, @@ -3985,130 +4883,284 @@ var file_google_protobuf_descriptor_proto_rawDesc = []byte{ 0x64, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x6c, 0x69, 0x63, 0x74, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x42, 0x02, 0x18, 0x01, 0x52, 0x22, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x4c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x4a, 0x73, 0x6f, 0x6e, 0x46, 0x69, 0x65, 0x6c, 0x64, - 0x43, 0x6f, 0x6e, 0x66, 0x6c, 0x69, 0x63, 0x74, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, - 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, - 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, - 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x4a, 0x04, - 0x08, 0x05, 0x10, 0x06, 0x22, 0x9e, 0x01, 0x0a, 0x10, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, - 0x75, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, 0x70, - 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, - 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, - 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, - 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, - 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, - 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, - 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x9c, 0x01, 0x0a, 0x0e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x72, - 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x18, 0x21, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, - 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x12, - 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, - 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, + 0x43, 0x6f, 0x6e, 0x66, 0x6c, 0x69, 0x63, 0x74, 0x73, 0x12, 0x37, 0x0a, 0x08, 0x66, 0x65, 0x61, + 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, + 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, + 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, + 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, 0xe8, + 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x4a, 0x04, 0x08, 0x05, 0x10, 0x06, 0x22, 0xd8, 0x02, + 0x0a, 0x10, 0x45, 0x6e, 0x75, 0x6d, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, + 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x12, 0x37, 0x0a, 0x08, 0x66, 0x65, 0x61, + 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x73, 0x12, 0x28, 0x0a, 0x0c, 0x64, 0x65, 0x62, 0x75, 0x67, 0x5f, 0x72, 0x65, 0x64, 0x61, + 0x63, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, + 0x0b, 0x64, 0x65, 0x62, 0x75, 0x67, 0x52, 0x65, 0x64, 0x61, 0x63, 0x74, 0x12, 0x55, 0x0a, 0x0f, + 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x75, 0x70, 0x70, + 0x6f, 0x72, 0x74, 0x52, 0x0e, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x75, 0x70, 0x70, + 0x6f, 0x72, 0x74, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, + 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, + 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, + 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, + 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0xd5, 0x01, 0x0a, 0x0e, 0x53, 0x65, 0x72, + 0x76, 0x69, 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x08, 0x66, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x22, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, + 0x75, 0x72, 0x65, 0x73, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, + 0x65, 0x64, 0x18, 0x21, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, + 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x12, 0x58, 0x0a, 0x14, 0x75, + 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, + 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, + 0x22, 0x99, 0x03, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, + 0x18, 0x21, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x52, 0x0a, 0x64, + 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x12, 0x71, 0x0a, 0x11, 0x69, 0x64, 0x65, + 0x6d, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x63, 0x79, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x22, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x49, 0x64, 0x65, 0x6d, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x63, 0x79, + 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x3a, 0x13, 0x49, 0x44, 0x45, 0x4d, 0x50, 0x4f, 0x54, 0x45, 0x4e, + 0x43, 0x59, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x52, 0x10, 0x69, 0x64, 0x65, 0x6d, + 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x63, 0x79, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x37, 0x0a, 0x08, + 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x23, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, - 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, - 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, - 0x80, 0x80, 0x80, 0x02, 0x22, 0xe0, 0x02, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x25, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, - 0x61, 0x74, 0x65, 0x64, 0x18, 0x21, 0x20, 0x01, 0x28, 0x08, 0x3a, 0x05, 0x66, 0x61, 0x6c, 0x73, - 0x65, 0x52, 0x0a, 0x64, 0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x12, 0x71, 0x0a, - 0x11, 0x69, 0x64, 0x65, 0x6d, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x63, 0x79, 0x5f, 0x6c, 0x65, 0x76, - 0x65, 0x6c, 0x18, 0x22, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2f, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x4d, 0x65, 0x74, 0x68, 0x6f, - 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x49, 0x64, 0x65, 0x6d, 0x70, 0x6f, 0x74, - 0x65, 0x6e, 0x63, 0x79, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x3a, 0x13, 0x49, 0x44, 0x45, 0x4d, 0x50, - 0x4f, 0x54, 0x45, 0x4e, 0x43, 0x59, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x52, 0x10, - 0x69, 0x64, 0x65, 0x6d, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x63, 0x79, 0x4c, 0x65, 0x76, 0x65, 0x6c, - 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, - 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x08, 0x66, 0x65, 0x61, + 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x58, 0x0a, 0x14, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, + 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0xe7, 0x07, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, + 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, + 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, + 0x50, 0x0a, 0x10, 0x49, 0x64, 0x65, 0x6d, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x63, 0x79, 0x4c, 0x65, + 0x76, 0x65, 0x6c, 0x12, 0x17, 0x0a, 0x13, 0x49, 0x44, 0x45, 0x4d, 0x50, 0x4f, 0x54, 0x45, 0x4e, + 0x43, 0x59, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x13, 0x0a, 0x0f, + 0x4e, 0x4f, 0x5f, 0x53, 0x49, 0x44, 0x45, 0x5f, 0x45, 0x46, 0x46, 0x45, 0x43, 0x54, 0x53, 0x10, + 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x49, 0x44, 0x45, 0x4d, 0x50, 0x4f, 0x54, 0x45, 0x4e, 0x54, 0x10, + 0x02, 0x2a, 0x09, 0x08, 0xe8, 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x9a, 0x03, 0x0a, + 0x13, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x41, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, + 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x4e, 0x61, 0x6d, 0x65, 0x50, 0x61, 0x72, + 0x74, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x69, 0x64, 0x65, 0x6e, 0x74, + 0x69, 0x66, 0x69, 0x65, 0x72, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x56, 0x61, 0x6c, + 0x75, 0x65, 0x12, 0x2c, 0x0a, 0x12, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x69, + 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x10, + 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x49, 0x6e, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, + 0x12, 0x2c, 0x0a, 0x12, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x69, 0x6e, 0x74, + 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x10, 0x6e, 0x65, + 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x49, 0x6e, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, + 0x0a, 0x0c, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x06, + 0x20, 0x01, 0x28, 0x01, 0x52, 0x0b, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x56, 0x61, 0x6c, 0x75, + 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, + 0x65, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x61, + 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, 0x65, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x1a, 0x4a, 0x0a, + 0x08, 0x4e, 0x61, 0x6d, 0x65, 0x50, 0x61, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x6e, 0x61, 0x6d, + 0x65, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x08, 0x6e, 0x61, + 0x6d, 0x65, 0x50, 0x61, 0x72, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x69, 0x73, 0x5f, 0x65, 0x78, 0x74, + 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x02, 0x28, 0x08, 0x52, 0x0b, 0x69, 0x73, + 0x45, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0xa7, 0x0a, 0x0a, 0x0a, 0x46, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x12, 0x91, 0x01, 0x0a, 0x0e, 0x66, 0x69, 0x65, + 0x6c, 0x64, 0x5f, 0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0e, 0x32, 0x29, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x2e, 0x46, + 0x69, 0x65, 0x6c, 0x64, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x42, 0x3f, 0x88, 0x01, + 0x01, 0x98, 0x01, 0x04, 0x98, 0x01, 0x01, 0xa2, 0x01, 0x0d, 0x12, 0x08, 0x45, 0x58, 0x50, 0x4c, + 0x49, 0x43, 0x49, 0x54, 0x18, 0xe6, 0x07, 0xa2, 0x01, 0x0d, 0x12, 0x08, 0x49, 0x4d, 0x50, 0x4c, + 0x49, 0x43, 0x49, 0x54, 0x18, 0xe7, 0x07, 0xa2, 0x01, 0x0d, 0x12, 0x08, 0x45, 0x58, 0x50, 0x4c, + 0x49, 0x43, 0x49, 0x54, 0x18, 0xe8, 0x07, 0xb2, 0x01, 0x03, 0x08, 0xe8, 0x07, 0x52, 0x0d, 0x66, + 0x69, 0x65, 0x6c, 0x64, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x12, 0x6c, 0x0a, 0x09, + 0x65, 0x6e, 0x75, 0x6d, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x24, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, - 0x66, 0x2e, 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, - 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x13, 0x75, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, - 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x50, 0x0a, 0x10, 0x49, 0x64, - 0x65, 0x6d, 0x70, 0x6f, 0x74, 0x65, 0x6e, 0x63, 0x79, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x17, - 0x0a, 0x13, 0x49, 0x44, 0x45, 0x4d, 0x50, 0x4f, 0x54, 0x45, 0x4e, 0x43, 0x59, 0x5f, 0x55, 0x4e, - 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x13, 0x0a, 0x0f, 0x4e, 0x4f, 0x5f, 0x53, 0x49, - 0x44, 0x45, 0x5f, 0x45, 0x46, 0x46, 0x45, 0x43, 0x54, 0x53, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, - 0x49, 0x44, 0x45, 0x4d, 0x50, 0x4f, 0x54, 0x45, 0x4e, 0x54, 0x10, 0x02, 0x2a, 0x09, 0x08, 0xe8, - 0x07, 0x10, 0x80, 0x80, 0x80, 0x80, 0x02, 0x22, 0x9a, 0x03, 0x0a, 0x13, 0x55, 0x6e, 0x69, 0x6e, - 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, - 0x41, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, + 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x2e, 0x45, 0x6e, 0x75, + 0x6d, 0x54, 0x79, 0x70, 0x65, 0x42, 0x29, 0x88, 0x01, 0x01, 0x98, 0x01, 0x06, 0x98, 0x01, 0x01, + 0xa2, 0x01, 0x0b, 0x12, 0x06, 0x43, 0x4c, 0x4f, 0x53, 0x45, 0x44, 0x18, 0xe6, 0x07, 0xa2, 0x01, + 0x09, 0x12, 0x04, 0x4f, 0x50, 0x45, 0x4e, 0x18, 0xe7, 0x07, 0xb2, 0x01, 0x03, 0x08, 0xe8, 0x07, + 0x52, 0x08, 0x65, 0x6e, 0x75, 0x6d, 0x54, 0x79, 0x70, 0x65, 0x12, 0x98, 0x01, 0x0a, 0x17, 0x72, + 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x5f, 0x65, 0x6e, + 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x31, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x2e, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, + 0x65, 0x64, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x42, + 0x2d, 0x88, 0x01, 0x01, 0x98, 0x01, 0x04, 0x98, 0x01, 0x01, 0xa2, 0x01, 0x0d, 0x12, 0x08, 0x45, + 0x58, 0x50, 0x41, 0x4e, 0x44, 0x45, 0x44, 0x18, 0xe6, 0x07, 0xa2, 0x01, 0x0b, 0x12, 0x06, 0x50, + 0x41, 0x43, 0x4b, 0x45, 0x44, 0x18, 0xe7, 0x07, 0xb2, 0x01, 0x03, 0x08, 0xe8, 0x07, 0x52, 0x15, + 0x72, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x45, 0x6e, 0x63, + 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x7e, 0x0a, 0x0f, 0x75, 0x74, 0x66, 0x38, 0x5f, 0x76, 0x61, + 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x2a, + 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, + 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x2e, 0x55, 0x74, 0x66, 0x38, + 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x42, 0x29, 0x88, 0x01, 0x01, 0x98, + 0x01, 0x04, 0x98, 0x01, 0x01, 0xa2, 0x01, 0x09, 0x12, 0x04, 0x4e, 0x4f, 0x4e, 0x45, 0x18, 0xe6, + 0x07, 0xa2, 0x01, 0x0b, 0x12, 0x06, 0x56, 0x45, 0x52, 0x49, 0x46, 0x59, 0x18, 0xe7, 0x07, 0xb2, + 0x01, 0x03, 0x08, 0xe8, 0x07, 0x52, 0x0e, 0x75, 0x74, 0x66, 0x38, 0x56, 0x61, 0x6c, 0x69, 0x64, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x7e, 0x0a, 0x10, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, + 0x5f, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x2b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x2e, 0x4d, 0x65, 0x73, + 0x73, 0x61, 0x67, 0x65, 0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x42, 0x26, 0x88, 0x01, + 0x01, 0x98, 0x01, 0x04, 0x98, 0x01, 0x01, 0xa2, 0x01, 0x14, 0x12, 0x0f, 0x4c, 0x45, 0x4e, 0x47, + 0x54, 0x48, 0x5f, 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x45, 0x44, 0x18, 0xe6, 0x07, 0xb2, 0x01, + 0x03, 0x08, 0xe8, 0x07, 0x52, 0x0f, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x45, 0x6e, 0x63, + 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x82, 0x01, 0x0a, 0x0b, 0x6a, 0x73, 0x6f, 0x6e, 0x5f, 0x66, + 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x26, 0x2e, 0x67, 0x6f, + 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x2e, 0x4a, 0x73, 0x6f, 0x6e, 0x46, 0x6f, 0x72, + 0x6d, 0x61, 0x74, 0x42, 0x39, 0x88, 0x01, 0x01, 0x98, 0x01, 0x03, 0x98, 0x01, 0x06, 0x98, 0x01, + 0x01, 0xa2, 0x01, 0x17, 0x12, 0x12, 0x4c, 0x45, 0x47, 0x41, 0x43, 0x59, 0x5f, 0x42, 0x45, 0x53, + 0x54, 0x5f, 0x45, 0x46, 0x46, 0x4f, 0x52, 0x54, 0x18, 0xe6, 0x07, 0xa2, 0x01, 0x0a, 0x12, 0x05, + 0x41, 0x4c, 0x4c, 0x4f, 0x57, 0x18, 0xe7, 0x07, 0xb2, 0x01, 0x03, 0x08, 0xe8, 0x07, 0x52, 0x0a, + 0x6a, 0x73, 0x6f, 0x6e, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x22, 0x5c, 0x0a, 0x0d, 0x46, 0x69, + 0x65, 0x6c, 0x64, 0x50, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x63, 0x65, 0x12, 0x1a, 0x0a, 0x16, 0x46, + 0x49, 0x45, 0x4c, 0x44, 0x5f, 0x50, 0x52, 0x45, 0x53, 0x45, 0x4e, 0x43, 0x45, 0x5f, 0x55, 0x4e, + 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x45, 0x58, 0x50, 0x4c, 0x49, + 0x43, 0x49, 0x54, 0x10, 0x01, 0x12, 0x0c, 0x0a, 0x08, 0x49, 0x4d, 0x50, 0x4c, 0x49, 0x43, 0x49, + 0x54, 0x10, 0x02, 0x12, 0x13, 0x0a, 0x0f, 0x4c, 0x45, 0x47, 0x41, 0x43, 0x59, 0x5f, 0x52, 0x45, + 0x51, 0x55, 0x49, 0x52, 0x45, 0x44, 0x10, 0x03, 0x22, 0x37, 0x0a, 0x08, 0x45, 0x6e, 0x75, 0x6d, + 0x54, 0x79, 0x70, 0x65, 0x12, 0x15, 0x0a, 0x11, 0x45, 0x4e, 0x55, 0x4d, 0x5f, 0x54, 0x59, 0x50, + 0x45, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x4f, + 0x50, 0x45, 0x4e, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x43, 0x4c, 0x4f, 0x53, 0x45, 0x44, 0x10, + 0x02, 0x22, 0x56, 0x0a, 0x15, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x45, 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x23, 0x0a, 0x1f, 0x52, 0x45, + 0x50, 0x45, 0x41, 0x54, 0x45, 0x44, 0x5f, 0x46, 0x49, 0x45, 0x4c, 0x44, 0x5f, 0x45, 0x4e, 0x43, + 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, + 0x0a, 0x0a, 0x06, 0x50, 0x41, 0x43, 0x4b, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0c, 0x0a, 0x08, 0x45, + 0x58, 0x50, 0x41, 0x4e, 0x44, 0x45, 0x44, 0x10, 0x02, 0x22, 0x49, 0x0a, 0x0e, 0x55, 0x74, 0x66, + 0x38, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1b, 0x0a, 0x17, 0x55, + 0x54, 0x46, 0x38, 0x5f, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x55, + 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x56, 0x45, 0x52, 0x49, + 0x46, 0x59, 0x10, 0x02, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x03, 0x22, 0x04, + 0x08, 0x01, 0x10, 0x01, 0x22, 0x53, 0x0a, 0x0f, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x45, + 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x12, 0x1c, 0x0a, 0x18, 0x4d, 0x45, 0x53, 0x53, 0x41, + 0x47, 0x45, 0x5f, 0x45, 0x4e, 0x43, 0x4f, 0x44, 0x49, 0x4e, 0x47, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, + 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x13, 0x0a, 0x0f, 0x4c, 0x45, 0x4e, 0x47, 0x54, 0x48, 0x5f, + 0x50, 0x52, 0x45, 0x46, 0x49, 0x58, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x44, 0x45, + 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x45, 0x44, 0x10, 0x02, 0x22, 0x48, 0x0a, 0x0a, 0x4a, 0x73, 0x6f, + 0x6e, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x17, 0x0a, 0x13, 0x4a, 0x53, 0x4f, 0x4e, 0x5f, + 0x46, 0x4f, 0x52, 0x4d, 0x41, 0x54, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, + 0x12, 0x09, 0x0a, 0x05, 0x41, 0x4c, 0x4c, 0x4f, 0x57, 0x10, 0x01, 0x12, 0x16, 0x0a, 0x12, 0x4c, + 0x45, 0x47, 0x41, 0x43, 0x59, 0x5f, 0x42, 0x45, 0x53, 0x54, 0x5f, 0x45, 0x46, 0x46, 0x4f, 0x52, + 0x54, 0x10, 0x02, 0x2a, 0x06, 0x08, 0xe8, 0x07, 0x10, 0x8b, 0x4e, 0x2a, 0x06, 0x08, 0x8b, 0x4e, + 0x10, 0x90, 0x4e, 0x2a, 0x06, 0x08, 0x90, 0x4e, 0x10, 0x91, 0x4e, 0x4a, 0x06, 0x08, 0xe7, 0x07, + 0x10, 0xe8, 0x07, 0x22, 0xef, 0x03, 0x0a, 0x12, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, + 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x73, 0x12, 0x58, 0x0a, 0x08, 0x64, 0x65, + 0x66, 0x61, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3c, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, + 0x73, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x45, 0x64, 0x69, 0x74, + 0x69, 0x6f, 0x6e, 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x52, 0x08, 0x64, 0x65, 0x66, 0x61, + 0x75, 0x6c, 0x74, 0x73, 0x12, 0x41, 0x0a, 0x0f, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x75, 0x6d, 0x5f, + 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, - 0x55, 0x6e, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x70, 0x72, 0x65, 0x74, 0x65, 0x64, 0x4f, 0x70, 0x74, - 0x69, 0x6f, 0x6e, 0x2e, 0x4e, 0x61, 0x6d, 0x65, 0x50, 0x61, 0x72, 0x74, 0x52, 0x04, 0x6e, 0x61, - 0x6d, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, - 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x69, 0x64, - 0x65, 0x6e, 0x74, 0x69, 0x66, 0x69, 0x65, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x2c, 0x0a, - 0x12, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x69, 0x6e, 0x74, 0x5f, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x10, 0x70, 0x6f, 0x73, 0x69, 0x74, - 0x69, 0x76, 0x65, 0x49, 0x6e, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x2c, 0x0a, 0x12, 0x6e, - 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x69, 0x6e, 0x74, 0x5f, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x10, 0x6e, 0x65, 0x67, 0x61, 0x74, 0x69, 0x76, - 0x65, 0x49, 0x6e, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x64, 0x6f, 0x75, - 0x62, 0x6c, 0x65, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x01, 0x52, - 0x0b, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, 0x0a, 0x0c, - 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x07, 0x20, 0x01, - 0x28, 0x0c, 0x52, 0x0b, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, - 0x27, 0x0a, 0x0f, 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, 0x61, 0x74, 0x65, 0x5f, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x61, 0x67, 0x67, 0x72, 0x65, 0x67, - 0x61, 0x74, 0x65, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x1a, 0x4a, 0x0a, 0x08, 0x4e, 0x61, 0x6d, 0x65, - 0x50, 0x61, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x5f, 0x70, 0x61, 0x72, - 0x74, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x08, 0x6e, 0x61, 0x6d, 0x65, 0x50, 0x61, 0x72, - 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x69, 0x73, 0x5f, 0x65, 0x78, 0x74, 0x65, 0x6e, 0x73, 0x69, 0x6f, - 0x6e, 0x18, 0x02, 0x20, 0x02, 0x28, 0x08, 0x52, 0x0b, 0x69, 0x73, 0x45, 0x78, 0x74, 0x65, 0x6e, - 0x73, 0x69, 0x6f, 0x6e, 0x22, 0xa7, 0x02, 0x0a, 0x0e, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x43, - 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x44, 0x0a, 0x08, 0x6c, 0x6f, 0x63, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x67, 0x6f, 0x6f, 0x67, - 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x6f, 0x75, 0x72, - 0x63, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x52, 0x08, 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0xce, 0x01, - 0x0a, 0x08, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x04, 0x70, 0x61, - 0x74, 0x68, 0x18, 0x01, 0x20, 0x03, 0x28, 0x05, 0x42, 0x02, 0x10, 0x01, 0x52, 0x04, 0x70, 0x61, - 0x74, 0x68, 0x12, 0x16, 0x0a, 0x04, 0x73, 0x70, 0x61, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x05, - 0x42, 0x02, 0x10, 0x01, 0x52, 0x04, 0x73, 0x70, 0x61, 0x6e, 0x12, 0x29, 0x0a, 0x10, 0x6c, 0x65, - 0x61, 0x64, 0x69, 0x6e, 0x67, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x6c, 0x65, 0x61, 0x64, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6d, - 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x2b, 0x0a, 0x11, 0x74, 0x72, 0x61, 0x69, 0x6c, 0x69, 0x6e, - 0x67, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x10, 0x74, 0x72, 0x61, 0x69, 0x6c, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, - 0x74, 0x73, 0x12, 0x3a, 0x0a, 0x19, 0x6c, 0x65, 0x61, 0x64, 0x69, 0x6e, 0x67, 0x5f, 0x64, 0x65, - 0x74, 0x61, 0x63, 0x68, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, - 0x06, 0x20, 0x03, 0x28, 0x09, 0x52, 0x17, 0x6c, 0x65, 0x61, 0x64, 0x69, 0x6e, 0x67, 0x44, 0x65, - 0x74, 0x61, 0x63, 0x68, 0x65, 0x64, 0x43, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0xd0, - 0x02, 0x0a, 0x11, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x43, 0x6f, 0x64, 0x65, - 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x4d, 0x0a, 0x0a, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, - 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, - 0x61, 0x74, 0x65, 0x64, 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x41, 0x6e, 0x6e, - 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x1a, 0xeb, 0x01, 0x0a, 0x0a, 0x41, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x03, 0x28, 0x05, - 0x42, 0x02, 0x10, 0x01, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x6f, - 0x75, 0x72, 0x63, 0x65, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0a, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x62, - 0x65, 0x67, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x62, 0x65, 0x67, 0x69, - 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, - 0x65, 0x6e, 0x64, 0x12, 0x52, 0x0a, 0x08, 0x73, 0x65, 0x6d, 0x61, 0x6e, 0x74, 0x69, 0x63, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, - 0x64, 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x41, 0x6e, 0x6e, 0x6f, 0x74, 0x61, - 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x53, 0x65, 0x6d, 0x61, 0x6e, 0x74, 0x69, 0x63, 0x52, 0x08, 0x73, - 0x65, 0x6d, 0x61, 0x6e, 0x74, 0x69, 0x63, 0x22, 0x28, 0x0a, 0x08, 0x53, 0x65, 0x6d, 0x61, 0x6e, - 0x74, 0x69, 0x63, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x00, 0x12, 0x07, 0x0a, - 0x03, 0x53, 0x45, 0x54, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x4c, 0x49, 0x41, 0x53, 0x10, - 0x02, 0x42, 0x7e, 0x0a, 0x13, 0x63, 0x6f, 0x6d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x42, 0x10, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, - 0x70, 0x74, 0x6f, 0x72, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x48, 0x01, 0x5a, 0x2d, 0x67, 0x6f, - 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x67, 0x6f, 0x6c, 0x61, 0x6e, 0x67, 0x2e, 0x6f, 0x72, 0x67, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2f, 0x64, - 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x70, 0x62, 0xf8, 0x01, 0x01, 0xa2, 0x02, - 0x03, 0x47, 0x50, 0x42, 0xaa, 0x02, 0x1a, 0x47, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x50, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x52, 0x65, 0x66, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, - 0x6e, + 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x75, 0x6d, + 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x41, 0x0a, 0x0f, 0x6d, 0x61, 0x78, 0x69, 0x6d, + 0x75, 0x6d, 0x5f, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, + 0x32, 0x18, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0e, 0x6d, 0x61, 0x78, 0x69, + 0x6d, 0x75, 0x6d, 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0xf8, 0x01, 0x0a, 0x18, 0x46, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, + 0x44, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x12, 0x32, 0x0a, 0x07, 0x65, 0x64, 0x69, 0x74, 0x69, + 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x64, 0x69, 0x74, 0x69, + 0x6f, 0x6e, 0x52, 0x07, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x4e, 0x0a, 0x14, 0x6f, + 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, + 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x52, 0x13, 0x6f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x61, + 0x62, 0x6c, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x42, 0x0a, 0x0e, 0x66, + 0x69, 0x78, 0x65, 0x64, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, + 0x52, 0x0d, 0x66, 0x69, 0x78, 0x65, 0x64, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x4a, + 0x04, 0x08, 0x01, 0x10, 0x02, 0x4a, 0x04, 0x08, 0x02, 0x10, 0x03, 0x52, 0x08, 0x66, 0x65, 0x61, + 0x74, 0x75, 0x72, 0x65, 0x73, 0x22, 0xa7, 0x02, 0x0a, 0x0e, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, + 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x44, 0x0a, 0x08, 0x6c, 0x6f, 0x63, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x28, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x6f, 0x75, + 0x72, 0x63, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x4c, 0x6f, 0x63, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x08, 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0xce, + 0x01, 0x0a, 0x08, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x04, 0x70, + 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x03, 0x28, 0x05, 0x42, 0x02, 0x10, 0x01, 0x52, 0x04, 0x70, + 0x61, 0x74, 0x68, 0x12, 0x16, 0x0a, 0x04, 0x73, 0x70, 0x61, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x05, 0x42, 0x02, 0x10, 0x01, 0x52, 0x04, 0x73, 0x70, 0x61, 0x6e, 0x12, 0x29, 0x0a, 0x10, 0x6c, + 0x65, 0x61, 0x64, 0x69, 0x6e, 0x67, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x6c, 0x65, 0x61, 0x64, 0x69, 0x6e, 0x67, 0x43, 0x6f, + 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x2b, 0x0a, 0x11, 0x74, 0x72, 0x61, 0x69, 0x6c, 0x69, + 0x6e, 0x67, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x10, 0x74, 0x72, 0x61, 0x69, 0x6c, 0x69, 0x6e, 0x67, 0x43, 0x6f, 0x6d, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x12, 0x3a, 0x0a, 0x19, 0x6c, 0x65, 0x61, 0x64, 0x69, 0x6e, 0x67, 0x5f, 0x64, + 0x65, 0x74, 0x61, 0x63, 0x68, 0x65, 0x64, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, + 0x18, 0x06, 0x20, 0x03, 0x28, 0x09, 0x52, 0x17, 0x6c, 0x65, 0x61, 0x64, 0x69, 0x6e, 0x67, 0x44, + 0x65, 0x74, 0x61, 0x63, 0x68, 0x65, 0x64, 0x43, 0x6f, 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, + 0xd0, 0x02, 0x0a, 0x11, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x65, 0x64, 0x43, 0x6f, 0x64, + 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x4d, 0x0a, 0x0a, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, + 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x47, 0x65, 0x6e, 0x65, + 0x72, 0x61, 0x74, 0x65, 0x64, 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x41, 0x6e, + 0x6e, 0x6f, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x0a, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0xeb, 0x01, 0x0a, 0x0a, 0x41, 0x6e, 0x6e, 0x6f, 0x74, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x05, 0x42, 0x02, 0x10, 0x01, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, 0x12, 0x1f, 0x0a, 0x0b, 0x73, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x0a, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x14, 0x0a, 0x05, + 0x62, 0x65, 0x67, 0x69, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x62, 0x65, 0x67, + 0x69, 0x6e, 0x12, 0x10, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x03, 0x65, 0x6e, 0x64, 0x12, 0x52, 0x0a, 0x08, 0x73, 0x65, 0x6d, 0x61, 0x6e, 0x74, 0x69, 0x63, + 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, + 0x65, 0x64, 0x43, 0x6f, 0x64, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x2e, 0x41, 0x6e, 0x6e, 0x6f, 0x74, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x53, 0x65, 0x6d, 0x61, 0x6e, 0x74, 0x69, 0x63, 0x52, 0x08, + 0x73, 0x65, 0x6d, 0x61, 0x6e, 0x74, 0x69, 0x63, 0x22, 0x28, 0x0a, 0x08, 0x53, 0x65, 0x6d, 0x61, + 0x6e, 0x74, 0x69, 0x63, 0x12, 0x08, 0x0a, 0x04, 0x4e, 0x4f, 0x4e, 0x45, 0x10, 0x00, 0x12, 0x07, + 0x0a, 0x03, 0x53, 0x45, 0x54, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x41, 0x4c, 0x49, 0x41, 0x53, + 0x10, 0x02, 0x2a, 0xa7, 0x02, 0x0a, 0x07, 0x45, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x13, + 0x0a, 0x0f, 0x45, 0x44, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, + 0x4e, 0x10, 0x00, 0x12, 0x13, 0x0a, 0x0e, 0x45, 0x44, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x4c, + 0x45, 0x47, 0x41, 0x43, 0x59, 0x10, 0x84, 0x07, 0x12, 0x13, 0x0a, 0x0e, 0x45, 0x44, 0x49, 0x54, + 0x49, 0x4f, 0x4e, 0x5f, 0x50, 0x52, 0x4f, 0x54, 0x4f, 0x32, 0x10, 0xe6, 0x07, 0x12, 0x13, 0x0a, + 0x0e, 0x45, 0x44, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x50, 0x52, 0x4f, 0x54, 0x4f, 0x33, 0x10, + 0xe7, 0x07, 0x12, 0x11, 0x0a, 0x0c, 0x45, 0x44, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x32, 0x30, + 0x32, 0x33, 0x10, 0xe8, 0x07, 0x12, 0x11, 0x0a, 0x0c, 0x45, 0x44, 0x49, 0x54, 0x49, 0x4f, 0x4e, + 0x5f, 0x32, 0x30, 0x32, 0x34, 0x10, 0xe9, 0x07, 0x12, 0x17, 0x0a, 0x13, 0x45, 0x44, 0x49, 0x54, + 0x49, 0x4f, 0x4e, 0x5f, 0x31, 0x5f, 0x54, 0x45, 0x53, 0x54, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, + 0x01, 0x12, 0x17, 0x0a, 0x13, 0x45, 0x44, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x32, 0x5f, 0x54, + 0x45, 0x53, 0x54, 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x02, 0x12, 0x1d, 0x0a, 0x17, 0x45, 0x44, + 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x39, 0x39, 0x39, 0x39, 0x37, 0x5f, 0x54, 0x45, 0x53, 0x54, + 0x5f, 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x9d, 0x8d, 0x06, 0x12, 0x1d, 0x0a, 0x17, 0x45, 0x44, 0x49, + 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x39, 0x39, 0x39, 0x39, 0x38, 0x5f, 0x54, 0x45, 0x53, 0x54, 0x5f, + 0x4f, 0x4e, 0x4c, 0x59, 0x10, 0x9e, 0x8d, 0x06, 0x12, 0x1d, 0x0a, 0x17, 0x45, 0x44, 0x49, 0x54, + 0x49, 0x4f, 0x4e, 0x5f, 0x39, 0x39, 0x39, 0x39, 0x39, 0x5f, 0x54, 0x45, 0x53, 0x54, 0x5f, 0x4f, + 0x4e, 0x4c, 0x59, 0x10, 0x9f, 0x8d, 0x06, 0x12, 0x13, 0x0a, 0x0b, 0x45, 0x44, 0x49, 0x54, 0x49, + 0x4f, 0x4e, 0x5f, 0x4d, 0x41, 0x58, 0x10, 0xff, 0xff, 0xff, 0xff, 0x07, 0x42, 0x7e, 0x0a, 0x13, + 0x63, 0x6f, 0x6d, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x42, 0x10, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x50, + 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x48, 0x01, 0x5a, 0x2d, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, + 0x67, 0x6f, 0x6c, 0x61, 0x6e, 0x67, 0x2e, 0x6f, 0x72, 0x67, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x75, 0x66, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x6f, 0x72, 0x70, 0x62, 0xf8, 0x01, 0x01, 0xa2, 0x02, 0x03, 0x47, 0x50, 0x42, 0xaa, + 0x02, 0x1a, 0x47, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x52, 0x65, 0x66, 0x6c, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, } var ( @@ -4123,103 +5175,143 @@ func file_google_protobuf_descriptor_proto_rawDescGZIP() []byte { return file_google_protobuf_descriptor_proto_rawDescData } -var file_google_protobuf_descriptor_proto_enumTypes = make([]protoimpl.EnumInfo, 10) -var file_google_protobuf_descriptor_proto_msgTypes = make([]protoimpl.MessageInfo, 28) -var file_google_protobuf_descriptor_proto_goTypes = []interface{}{ - (ExtensionRangeOptions_VerificationState)(0), // 0: google.protobuf.ExtensionRangeOptions.VerificationState - (FieldDescriptorProto_Type)(0), // 1: google.protobuf.FieldDescriptorProto.Type - (FieldDescriptorProto_Label)(0), // 2: google.protobuf.FieldDescriptorProto.Label - (FileOptions_OptimizeMode)(0), // 3: google.protobuf.FileOptions.OptimizeMode - (FieldOptions_CType)(0), // 4: google.protobuf.FieldOptions.CType - (FieldOptions_JSType)(0), // 5: google.protobuf.FieldOptions.JSType - (FieldOptions_OptionRetention)(0), // 6: google.protobuf.FieldOptions.OptionRetention - (FieldOptions_OptionTargetType)(0), // 7: google.protobuf.FieldOptions.OptionTargetType - (MethodOptions_IdempotencyLevel)(0), // 8: google.protobuf.MethodOptions.IdempotencyLevel - (GeneratedCodeInfo_Annotation_Semantic)(0), // 9: google.protobuf.GeneratedCodeInfo.Annotation.Semantic - (*FileDescriptorSet)(nil), // 10: google.protobuf.FileDescriptorSet - (*FileDescriptorProto)(nil), // 11: google.protobuf.FileDescriptorProto - (*DescriptorProto)(nil), // 12: google.protobuf.DescriptorProto - (*ExtensionRangeOptions)(nil), // 13: google.protobuf.ExtensionRangeOptions - (*FieldDescriptorProto)(nil), // 14: google.protobuf.FieldDescriptorProto - (*OneofDescriptorProto)(nil), // 15: google.protobuf.OneofDescriptorProto - (*EnumDescriptorProto)(nil), // 16: google.protobuf.EnumDescriptorProto - (*EnumValueDescriptorProto)(nil), // 17: google.protobuf.EnumValueDescriptorProto - (*ServiceDescriptorProto)(nil), // 18: google.protobuf.ServiceDescriptorProto - (*MethodDescriptorProto)(nil), // 19: google.protobuf.MethodDescriptorProto - (*FileOptions)(nil), // 20: google.protobuf.FileOptions - (*MessageOptions)(nil), // 21: google.protobuf.MessageOptions - (*FieldOptions)(nil), // 22: google.protobuf.FieldOptions - (*OneofOptions)(nil), // 23: google.protobuf.OneofOptions - (*EnumOptions)(nil), // 24: google.protobuf.EnumOptions - (*EnumValueOptions)(nil), // 25: google.protobuf.EnumValueOptions - (*ServiceOptions)(nil), // 26: google.protobuf.ServiceOptions - (*MethodOptions)(nil), // 27: google.protobuf.MethodOptions - (*UninterpretedOption)(nil), // 28: google.protobuf.UninterpretedOption - (*SourceCodeInfo)(nil), // 29: google.protobuf.SourceCodeInfo - (*GeneratedCodeInfo)(nil), // 30: google.protobuf.GeneratedCodeInfo - (*DescriptorProto_ExtensionRange)(nil), // 31: google.protobuf.DescriptorProto.ExtensionRange - (*DescriptorProto_ReservedRange)(nil), // 32: google.protobuf.DescriptorProto.ReservedRange - (*ExtensionRangeOptions_Declaration)(nil), // 33: google.protobuf.ExtensionRangeOptions.Declaration - (*EnumDescriptorProto_EnumReservedRange)(nil), // 34: google.protobuf.EnumDescriptorProto.EnumReservedRange - (*UninterpretedOption_NamePart)(nil), // 35: google.protobuf.UninterpretedOption.NamePart - (*SourceCodeInfo_Location)(nil), // 36: google.protobuf.SourceCodeInfo.Location - (*GeneratedCodeInfo_Annotation)(nil), // 37: google.protobuf.GeneratedCodeInfo.Annotation +var file_google_protobuf_descriptor_proto_enumTypes = make([]protoimpl.EnumInfo, 17) +var file_google_protobuf_descriptor_proto_msgTypes = make([]protoimpl.MessageInfo, 33) +var file_google_protobuf_descriptor_proto_goTypes = []any{ + (Edition)(0), // 0: google.protobuf.Edition + (ExtensionRangeOptions_VerificationState)(0), // 1: google.protobuf.ExtensionRangeOptions.VerificationState + (FieldDescriptorProto_Type)(0), // 2: google.protobuf.FieldDescriptorProto.Type + (FieldDescriptorProto_Label)(0), // 3: google.protobuf.FieldDescriptorProto.Label + (FileOptions_OptimizeMode)(0), // 4: google.protobuf.FileOptions.OptimizeMode + (FieldOptions_CType)(0), // 5: google.protobuf.FieldOptions.CType + (FieldOptions_JSType)(0), // 6: google.protobuf.FieldOptions.JSType + (FieldOptions_OptionRetention)(0), // 7: google.protobuf.FieldOptions.OptionRetention + (FieldOptions_OptionTargetType)(0), // 8: google.protobuf.FieldOptions.OptionTargetType + (MethodOptions_IdempotencyLevel)(0), // 9: google.protobuf.MethodOptions.IdempotencyLevel + (FeatureSet_FieldPresence)(0), // 10: google.protobuf.FeatureSet.FieldPresence + (FeatureSet_EnumType)(0), // 11: google.protobuf.FeatureSet.EnumType + (FeatureSet_RepeatedFieldEncoding)(0), // 12: google.protobuf.FeatureSet.RepeatedFieldEncoding + (FeatureSet_Utf8Validation)(0), // 13: google.protobuf.FeatureSet.Utf8Validation + (FeatureSet_MessageEncoding)(0), // 14: google.protobuf.FeatureSet.MessageEncoding + (FeatureSet_JsonFormat)(0), // 15: google.protobuf.FeatureSet.JsonFormat + (GeneratedCodeInfo_Annotation_Semantic)(0), // 16: google.protobuf.GeneratedCodeInfo.Annotation.Semantic + (*FileDescriptorSet)(nil), // 17: google.protobuf.FileDescriptorSet + (*FileDescriptorProto)(nil), // 18: google.protobuf.FileDescriptorProto + (*DescriptorProto)(nil), // 19: google.protobuf.DescriptorProto + (*ExtensionRangeOptions)(nil), // 20: google.protobuf.ExtensionRangeOptions + (*FieldDescriptorProto)(nil), // 21: google.protobuf.FieldDescriptorProto + (*OneofDescriptorProto)(nil), // 22: google.protobuf.OneofDescriptorProto + (*EnumDescriptorProto)(nil), // 23: google.protobuf.EnumDescriptorProto + (*EnumValueDescriptorProto)(nil), // 24: google.protobuf.EnumValueDescriptorProto + (*ServiceDescriptorProto)(nil), // 25: google.protobuf.ServiceDescriptorProto + (*MethodDescriptorProto)(nil), // 26: google.protobuf.MethodDescriptorProto + (*FileOptions)(nil), // 27: google.protobuf.FileOptions + (*MessageOptions)(nil), // 28: google.protobuf.MessageOptions + (*FieldOptions)(nil), // 29: google.protobuf.FieldOptions + (*OneofOptions)(nil), // 30: google.protobuf.OneofOptions + (*EnumOptions)(nil), // 31: google.protobuf.EnumOptions + (*EnumValueOptions)(nil), // 32: google.protobuf.EnumValueOptions + (*ServiceOptions)(nil), // 33: google.protobuf.ServiceOptions + (*MethodOptions)(nil), // 34: google.protobuf.MethodOptions + (*UninterpretedOption)(nil), // 35: google.protobuf.UninterpretedOption + (*FeatureSet)(nil), // 36: google.protobuf.FeatureSet + (*FeatureSetDefaults)(nil), // 37: google.protobuf.FeatureSetDefaults + (*SourceCodeInfo)(nil), // 38: google.protobuf.SourceCodeInfo + (*GeneratedCodeInfo)(nil), // 39: google.protobuf.GeneratedCodeInfo + (*DescriptorProto_ExtensionRange)(nil), // 40: google.protobuf.DescriptorProto.ExtensionRange + (*DescriptorProto_ReservedRange)(nil), // 41: google.protobuf.DescriptorProto.ReservedRange + (*ExtensionRangeOptions_Declaration)(nil), // 42: google.protobuf.ExtensionRangeOptions.Declaration + (*EnumDescriptorProto_EnumReservedRange)(nil), // 43: google.protobuf.EnumDescriptorProto.EnumReservedRange + (*FieldOptions_EditionDefault)(nil), // 44: google.protobuf.FieldOptions.EditionDefault + (*FieldOptions_FeatureSupport)(nil), // 45: google.protobuf.FieldOptions.FeatureSupport + (*UninterpretedOption_NamePart)(nil), // 46: google.protobuf.UninterpretedOption.NamePart + (*FeatureSetDefaults_FeatureSetEditionDefault)(nil), // 47: google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault + (*SourceCodeInfo_Location)(nil), // 48: google.protobuf.SourceCodeInfo.Location + (*GeneratedCodeInfo_Annotation)(nil), // 49: google.protobuf.GeneratedCodeInfo.Annotation } var file_google_protobuf_descriptor_proto_depIdxs = []int32{ - 11, // 0: google.protobuf.FileDescriptorSet.file:type_name -> google.protobuf.FileDescriptorProto - 12, // 1: google.protobuf.FileDescriptorProto.message_type:type_name -> google.protobuf.DescriptorProto - 16, // 2: google.protobuf.FileDescriptorProto.enum_type:type_name -> google.protobuf.EnumDescriptorProto - 18, // 3: google.protobuf.FileDescriptorProto.service:type_name -> google.protobuf.ServiceDescriptorProto - 14, // 4: google.protobuf.FileDescriptorProto.extension:type_name -> google.protobuf.FieldDescriptorProto - 20, // 5: google.protobuf.FileDescriptorProto.options:type_name -> google.protobuf.FileOptions - 29, // 6: google.protobuf.FileDescriptorProto.source_code_info:type_name -> google.protobuf.SourceCodeInfo - 14, // 7: google.protobuf.DescriptorProto.field:type_name -> google.protobuf.FieldDescriptorProto - 14, // 8: google.protobuf.DescriptorProto.extension:type_name -> google.protobuf.FieldDescriptorProto - 12, // 9: google.protobuf.DescriptorProto.nested_type:type_name -> google.protobuf.DescriptorProto - 16, // 10: google.protobuf.DescriptorProto.enum_type:type_name -> google.protobuf.EnumDescriptorProto - 31, // 11: google.protobuf.DescriptorProto.extension_range:type_name -> google.protobuf.DescriptorProto.ExtensionRange - 15, // 12: google.protobuf.DescriptorProto.oneof_decl:type_name -> google.protobuf.OneofDescriptorProto - 21, // 13: google.protobuf.DescriptorProto.options:type_name -> google.protobuf.MessageOptions - 32, // 14: google.protobuf.DescriptorProto.reserved_range:type_name -> google.protobuf.DescriptorProto.ReservedRange - 28, // 15: google.protobuf.ExtensionRangeOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 33, // 16: google.protobuf.ExtensionRangeOptions.declaration:type_name -> google.protobuf.ExtensionRangeOptions.Declaration - 0, // 17: google.protobuf.ExtensionRangeOptions.verification:type_name -> google.protobuf.ExtensionRangeOptions.VerificationState - 2, // 18: google.protobuf.FieldDescriptorProto.label:type_name -> google.protobuf.FieldDescriptorProto.Label - 1, // 19: google.protobuf.FieldDescriptorProto.type:type_name -> google.protobuf.FieldDescriptorProto.Type - 22, // 20: google.protobuf.FieldDescriptorProto.options:type_name -> google.protobuf.FieldOptions - 23, // 21: google.protobuf.OneofDescriptorProto.options:type_name -> google.protobuf.OneofOptions - 17, // 22: google.protobuf.EnumDescriptorProto.value:type_name -> google.protobuf.EnumValueDescriptorProto - 24, // 23: google.protobuf.EnumDescriptorProto.options:type_name -> google.protobuf.EnumOptions - 34, // 24: google.protobuf.EnumDescriptorProto.reserved_range:type_name -> google.protobuf.EnumDescriptorProto.EnumReservedRange - 25, // 25: google.protobuf.EnumValueDescriptorProto.options:type_name -> google.protobuf.EnumValueOptions - 19, // 26: google.protobuf.ServiceDescriptorProto.method:type_name -> google.protobuf.MethodDescriptorProto - 26, // 27: google.protobuf.ServiceDescriptorProto.options:type_name -> google.protobuf.ServiceOptions - 27, // 28: google.protobuf.MethodDescriptorProto.options:type_name -> google.protobuf.MethodOptions - 3, // 29: google.protobuf.FileOptions.optimize_for:type_name -> google.protobuf.FileOptions.OptimizeMode - 28, // 30: google.protobuf.FileOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 28, // 31: google.protobuf.MessageOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 4, // 32: google.protobuf.FieldOptions.ctype:type_name -> google.protobuf.FieldOptions.CType - 5, // 33: google.protobuf.FieldOptions.jstype:type_name -> google.protobuf.FieldOptions.JSType - 6, // 34: google.protobuf.FieldOptions.retention:type_name -> google.protobuf.FieldOptions.OptionRetention - 7, // 35: google.protobuf.FieldOptions.target:type_name -> google.protobuf.FieldOptions.OptionTargetType - 7, // 36: google.protobuf.FieldOptions.targets:type_name -> google.protobuf.FieldOptions.OptionTargetType - 28, // 37: google.protobuf.FieldOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 28, // 38: google.protobuf.OneofOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 28, // 39: google.protobuf.EnumOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 28, // 40: google.protobuf.EnumValueOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 28, // 41: google.protobuf.ServiceOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 8, // 42: google.protobuf.MethodOptions.idempotency_level:type_name -> google.protobuf.MethodOptions.IdempotencyLevel - 28, // 43: google.protobuf.MethodOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption - 35, // 44: google.protobuf.UninterpretedOption.name:type_name -> google.protobuf.UninterpretedOption.NamePart - 36, // 45: google.protobuf.SourceCodeInfo.location:type_name -> google.protobuf.SourceCodeInfo.Location - 37, // 46: google.protobuf.GeneratedCodeInfo.annotation:type_name -> google.protobuf.GeneratedCodeInfo.Annotation - 13, // 47: google.protobuf.DescriptorProto.ExtensionRange.options:type_name -> google.protobuf.ExtensionRangeOptions - 9, // 48: google.protobuf.GeneratedCodeInfo.Annotation.semantic:type_name -> google.protobuf.GeneratedCodeInfo.Annotation.Semantic - 49, // [49:49] is the sub-list for method output_type - 49, // [49:49] is the sub-list for method input_type - 49, // [49:49] is the sub-list for extension type_name - 49, // [49:49] is the sub-list for extension extendee - 0, // [0:49] is the sub-list for field type_name + 18, // 0: google.protobuf.FileDescriptorSet.file:type_name -> google.protobuf.FileDescriptorProto + 19, // 1: google.protobuf.FileDescriptorProto.message_type:type_name -> google.protobuf.DescriptorProto + 23, // 2: google.protobuf.FileDescriptorProto.enum_type:type_name -> google.protobuf.EnumDescriptorProto + 25, // 3: google.protobuf.FileDescriptorProto.service:type_name -> google.protobuf.ServiceDescriptorProto + 21, // 4: google.protobuf.FileDescriptorProto.extension:type_name -> google.protobuf.FieldDescriptorProto + 27, // 5: google.protobuf.FileDescriptorProto.options:type_name -> google.protobuf.FileOptions + 38, // 6: google.protobuf.FileDescriptorProto.source_code_info:type_name -> google.protobuf.SourceCodeInfo + 0, // 7: google.protobuf.FileDescriptorProto.edition:type_name -> google.protobuf.Edition + 21, // 8: google.protobuf.DescriptorProto.field:type_name -> google.protobuf.FieldDescriptorProto + 21, // 9: google.protobuf.DescriptorProto.extension:type_name -> google.protobuf.FieldDescriptorProto + 19, // 10: google.protobuf.DescriptorProto.nested_type:type_name -> google.protobuf.DescriptorProto + 23, // 11: google.protobuf.DescriptorProto.enum_type:type_name -> google.protobuf.EnumDescriptorProto + 40, // 12: google.protobuf.DescriptorProto.extension_range:type_name -> google.protobuf.DescriptorProto.ExtensionRange + 22, // 13: google.protobuf.DescriptorProto.oneof_decl:type_name -> google.protobuf.OneofDescriptorProto + 28, // 14: google.protobuf.DescriptorProto.options:type_name -> google.protobuf.MessageOptions + 41, // 15: google.protobuf.DescriptorProto.reserved_range:type_name -> google.protobuf.DescriptorProto.ReservedRange + 35, // 16: google.protobuf.ExtensionRangeOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 42, // 17: google.protobuf.ExtensionRangeOptions.declaration:type_name -> google.protobuf.ExtensionRangeOptions.Declaration + 36, // 18: google.protobuf.ExtensionRangeOptions.features:type_name -> google.protobuf.FeatureSet + 1, // 19: google.protobuf.ExtensionRangeOptions.verification:type_name -> google.protobuf.ExtensionRangeOptions.VerificationState + 3, // 20: google.protobuf.FieldDescriptorProto.label:type_name -> google.protobuf.FieldDescriptorProto.Label + 2, // 21: google.protobuf.FieldDescriptorProto.type:type_name -> google.protobuf.FieldDescriptorProto.Type + 29, // 22: google.protobuf.FieldDescriptorProto.options:type_name -> google.protobuf.FieldOptions + 30, // 23: google.protobuf.OneofDescriptorProto.options:type_name -> google.protobuf.OneofOptions + 24, // 24: google.protobuf.EnumDescriptorProto.value:type_name -> google.protobuf.EnumValueDescriptorProto + 31, // 25: google.protobuf.EnumDescriptorProto.options:type_name -> google.protobuf.EnumOptions + 43, // 26: google.protobuf.EnumDescriptorProto.reserved_range:type_name -> google.protobuf.EnumDescriptorProto.EnumReservedRange + 32, // 27: google.protobuf.EnumValueDescriptorProto.options:type_name -> google.protobuf.EnumValueOptions + 26, // 28: google.protobuf.ServiceDescriptorProto.method:type_name -> google.protobuf.MethodDescriptorProto + 33, // 29: google.protobuf.ServiceDescriptorProto.options:type_name -> google.protobuf.ServiceOptions + 34, // 30: google.protobuf.MethodDescriptorProto.options:type_name -> google.protobuf.MethodOptions + 4, // 31: google.protobuf.FileOptions.optimize_for:type_name -> google.protobuf.FileOptions.OptimizeMode + 36, // 32: google.protobuf.FileOptions.features:type_name -> google.protobuf.FeatureSet + 35, // 33: google.protobuf.FileOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 36, // 34: google.protobuf.MessageOptions.features:type_name -> google.protobuf.FeatureSet + 35, // 35: google.protobuf.MessageOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 5, // 36: google.protobuf.FieldOptions.ctype:type_name -> google.protobuf.FieldOptions.CType + 6, // 37: google.protobuf.FieldOptions.jstype:type_name -> google.protobuf.FieldOptions.JSType + 7, // 38: google.protobuf.FieldOptions.retention:type_name -> google.protobuf.FieldOptions.OptionRetention + 8, // 39: google.protobuf.FieldOptions.targets:type_name -> google.protobuf.FieldOptions.OptionTargetType + 44, // 40: google.protobuf.FieldOptions.edition_defaults:type_name -> google.protobuf.FieldOptions.EditionDefault + 36, // 41: google.protobuf.FieldOptions.features:type_name -> google.protobuf.FeatureSet + 45, // 42: google.protobuf.FieldOptions.feature_support:type_name -> google.protobuf.FieldOptions.FeatureSupport + 35, // 43: google.protobuf.FieldOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 36, // 44: google.protobuf.OneofOptions.features:type_name -> google.protobuf.FeatureSet + 35, // 45: google.protobuf.OneofOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 36, // 46: google.protobuf.EnumOptions.features:type_name -> google.protobuf.FeatureSet + 35, // 47: google.protobuf.EnumOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 36, // 48: google.protobuf.EnumValueOptions.features:type_name -> google.protobuf.FeatureSet + 45, // 49: google.protobuf.EnumValueOptions.feature_support:type_name -> google.protobuf.FieldOptions.FeatureSupport + 35, // 50: google.protobuf.EnumValueOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 36, // 51: google.protobuf.ServiceOptions.features:type_name -> google.protobuf.FeatureSet + 35, // 52: google.protobuf.ServiceOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 9, // 53: google.protobuf.MethodOptions.idempotency_level:type_name -> google.protobuf.MethodOptions.IdempotencyLevel + 36, // 54: google.protobuf.MethodOptions.features:type_name -> google.protobuf.FeatureSet + 35, // 55: google.protobuf.MethodOptions.uninterpreted_option:type_name -> google.protobuf.UninterpretedOption + 46, // 56: google.protobuf.UninterpretedOption.name:type_name -> google.protobuf.UninterpretedOption.NamePart + 10, // 57: google.protobuf.FeatureSet.field_presence:type_name -> google.protobuf.FeatureSet.FieldPresence + 11, // 58: google.protobuf.FeatureSet.enum_type:type_name -> google.protobuf.FeatureSet.EnumType + 12, // 59: google.protobuf.FeatureSet.repeated_field_encoding:type_name -> google.protobuf.FeatureSet.RepeatedFieldEncoding + 13, // 60: google.protobuf.FeatureSet.utf8_validation:type_name -> google.protobuf.FeatureSet.Utf8Validation + 14, // 61: google.protobuf.FeatureSet.message_encoding:type_name -> google.protobuf.FeatureSet.MessageEncoding + 15, // 62: google.protobuf.FeatureSet.json_format:type_name -> google.protobuf.FeatureSet.JsonFormat + 47, // 63: google.protobuf.FeatureSetDefaults.defaults:type_name -> google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault + 0, // 64: google.protobuf.FeatureSetDefaults.minimum_edition:type_name -> google.protobuf.Edition + 0, // 65: google.protobuf.FeatureSetDefaults.maximum_edition:type_name -> google.protobuf.Edition + 48, // 66: google.protobuf.SourceCodeInfo.location:type_name -> google.protobuf.SourceCodeInfo.Location + 49, // 67: google.protobuf.GeneratedCodeInfo.annotation:type_name -> google.protobuf.GeneratedCodeInfo.Annotation + 20, // 68: google.protobuf.DescriptorProto.ExtensionRange.options:type_name -> google.protobuf.ExtensionRangeOptions + 0, // 69: google.protobuf.FieldOptions.EditionDefault.edition:type_name -> google.protobuf.Edition + 0, // 70: google.protobuf.FieldOptions.FeatureSupport.edition_introduced:type_name -> google.protobuf.Edition + 0, // 71: google.protobuf.FieldOptions.FeatureSupport.edition_deprecated:type_name -> google.protobuf.Edition + 0, // 72: google.protobuf.FieldOptions.FeatureSupport.edition_removed:type_name -> google.protobuf.Edition + 0, // 73: google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault.edition:type_name -> google.protobuf.Edition + 36, // 74: google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault.overridable_features:type_name -> google.protobuf.FeatureSet + 36, // 75: google.protobuf.FeatureSetDefaults.FeatureSetEditionDefault.fixed_features:type_name -> google.protobuf.FeatureSet + 16, // 76: google.protobuf.GeneratedCodeInfo.Annotation.semantic:type_name -> google.protobuf.GeneratedCodeInfo.Annotation.Semantic + 77, // [77:77] is the sub-list for method output_type + 77, // [77:77] is the sub-list for method input_type + 77, // [77:77] is the sub-list for extension type_name + 77, // [77:77] is the sub-list for extension extendee + 0, // [0:77] is the sub-list for field type_name } func init() { file_google_protobuf_descriptor_proto_init() } @@ -4227,369 +5319,13 @@ func file_google_protobuf_descriptor_proto_init() { if File_google_protobuf_descriptor_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_google_protobuf_descriptor_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*FileDescriptorSet); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*FileDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*DescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ExtensionRangeOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*FieldDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*OneofDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*EnumDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*EnumValueDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ServiceDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MethodDescriptorProto); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*FileOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MessageOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*FieldOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*OneofOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*EnumOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[15].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*EnumValueOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[16].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ServiceOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[17].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*MethodOptions); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - case 3: - return &v.extensionFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[18].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*UninterpretedOption); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[19].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*SourceCodeInfo); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GeneratedCodeInfo); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*DescriptorProto_ExtensionRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*DescriptorProto_ReservedRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[23].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ExtensionRangeOptions_Declaration); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[24].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*EnumDescriptorProto_EnumReservedRange); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[25].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*UninterpretedOption_NamePart); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[26].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*SourceCodeInfo_Location); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_google_protobuf_descriptor_proto_msgTypes[27].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GeneratedCodeInfo_Annotation); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_google_protobuf_descriptor_proto_rawDesc, - NumEnums: 10, - NumMessages: 28, + NumEnums: 17, + NumMessages: 33, NumExtensions: 0, NumServices: 0, }, diff --git a/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go b/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go new file mode 100644 index 00000000..c7e860fc --- /dev/null +++ b/vendor/google.golang.org/protobuf/types/gofeaturespb/go_features.pb.go @@ -0,0 +1,165 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2023 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: google/protobuf/go_features.proto + +package gofeaturespb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + descriptorpb "google.golang.org/protobuf/types/descriptorpb" + reflect "reflect" + sync "sync" +) + +type GoFeatures struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Whether or not to generate the deprecated UnmarshalJSON method for enums. + LegacyUnmarshalJsonEnum *bool `protobuf:"varint,1,opt,name=legacy_unmarshal_json_enum,json=legacyUnmarshalJsonEnum" json:"legacy_unmarshal_json_enum,omitempty"` +} + +func (x *GoFeatures) Reset() { + *x = GoFeatures{} + mi := &file_google_protobuf_go_features_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *GoFeatures) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GoFeatures) ProtoMessage() {} + +func (x *GoFeatures) ProtoReflect() protoreflect.Message { + mi := &file_google_protobuf_go_features_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GoFeatures.ProtoReflect.Descriptor instead. +func (*GoFeatures) Descriptor() ([]byte, []int) { + return file_google_protobuf_go_features_proto_rawDescGZIP(), []int{0} +} + +func (x *GoFeatures) GetLegacyUnmarshalJsonEnum() bool { + if x != nil && x.LegacyUnmarshalJsonEnum != nil { + return *x.LegacyUnmarshalJsonEnum + } + return false +} + +var file_google_protobuf_go_features_proto_extTypes = []protoimpl.ExtensionInfo{ + { + ExtendedType: (*descriptorpb.FeatureSet)(nil), + ExtensionType: (*GoFeatures)(nil), + Field: 1002, + Name: "pb.go", + Tag: "bytes,1002,opt,name=go", + Filename: "google/protobuf/go_features.proto", + }, +} + +// Extension fields to descriptorpb.FeatureSet. +var ( + // optional pb.GoFeatures go = 1002; + E_Go = &file_google_protobuf_go_features_proto_extTypes[0] +) + +var File_google_protobuf_go_features_proto protoreflect.FileDescriptor + +var file_google_protobuf_go_features_proto_rawDesc = []byte{ + 0x0a, 0x21, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2f, 0x67, 0x6f, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x12, 0x02, 0x70, 0x62, 0x1a, 0x20, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x74, 0x6f, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xcd, 0x01, 0x0a, 0x0a, 0x47, 0x6f, + 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0xbe, 0x01, 0x0a, 0x1a, 0x6c, 0x65, 0x67, + 0x61, 0x63, 0x79, 0x5f, 0x75, 0x6e, 0x6d, 0x61, 0x72, 0x73, 0x68, 0x61, 0x6c, 0x5f, 0x6a, 0x73, + 0x6f, 0x6e, 0x5f, 0x65, 0x6e, 0x75, 0x6d, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x42, 0x80, 0x01, + 0x88, 0x01, 0x01, 0x98, 0x01, 0x06, 0x98, 0x01, 0x01, 0xa2, 0x01, 0x09, 0x12, 0x04, 0x74, 0x72, + 0x75, 0x65, 0x18, 0x84, 0x07, 0xa2, 0x01, 0x0a, 0x12, 0x05, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x18, + 0xe7, 0x07, 0xb2, 0x01, 0x5b, 0x08, 0xe8, 0x07, 0x10, 0xe8, 0x07, 0x1a, 0x53, 0x54, 0x68, 0x65, + 0x20, 0x6c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x20, 0x55, 0x6e, 0x6d, 0x61, 0x72, 0x73, 0x68, 0x61, + 0x6c, 0x4a, 0x53, 0x4f, 0x4e, 0x20, 0x41, 0x50, 0x49, 0x20, 0x69, 0x73, 0x20, 0x64, 0x65, 0x70, + 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x6c, 0x6c, + 0x20, 0x62, 0x65, 0x20, 0x72, 0x65, 0x6d, 0x6f, 0x76, 0x65, 0x64, 0x20, 0x69, 0x6e, 0x20, 0x61, + 0x20, 0x66, 0x75, 0x74, 0x75, 0x72, 0x65, 0x20, 0x65, 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x2e, + 0x52, 0x17, 0x6c, 0x65, 0x67, 0x61, 0x63, 0x79, 0x55, 0x6e, 0x6d, 0x61, 0x72, 0x73, 0x68, 0x61, + 0x6c, 0x4a, 0x73, 0x6f, 0x6e, 0x45, 0x6e, 0x75, 0x6d, 0x3a, 0x3c, 0x0a, 0x02, 0x67, 0x6f, 0x12, + 0x1b, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x74, 0x18, 0xea, 0x07, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x70, 0x62, 0x2e, 0x47, 0x6f, 0x46, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x73, 0x52, 0x02, 0x67, 0x6f, 0x42, 0x2f, 0x5a, 0x2d, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x67, 0x6f, 0x6c, 0x61, 0x6e, 0x67, 0x2e, 0x6f, 0x72, 0x67, 0x2f, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2f, 0x67, 0x6f, 0x66, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x70, 0x62, +} + +var ( + file_google_protobuf_go_features_proto_rawDescOnce sync.Once + file_google_protobuf_go_features_proto_rawDescData = file_google_protobuf_go_features_proto_rawDesc +) + +func file_google_protobuf_go_features_proto_rawDescGZIP() []byte { + file_google_protobuf_go_features_proto_rawDescOnce.Do(func() { + file_google_protobuf_go_features_proto_rawDescData = protoimpl.X.CompressGZIP(file_google_protobuf_go_features_proto_rawDescData) + }) + return file_google_protobuf_go_features_proto_rawDescData +} + +var file_google_protobuf_go_features_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_google_protobuf_go_features_proto_goTypes = []any{ + (*GoFeatures)(nil), // 0: pb.GoFeatures + (*descriptorpb.FeatureSet)(nil), // 1: google.protobuf.FeatureSet +} +var file_google_protobuf_go_features_proto_depIdxs = []int32{ + 1, // 0: pb.go:extendee -> google.protobuf.FeatureSet + 0, // 1: pb.go:type_name -> pb.GoFeatures + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 1, // [1:2] is the sub-list for extension type_name + 0, // [0:1] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_google_protobuf_go_features_proto_init() } +func file_google_protobuf_go_features_proto_init() { + if File_google_protobuf_go_features_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_google_protobuf_go_features_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 1, + NumServices: 0, + }, + GoTypes: file_google_protobuf_go_features_proto_goTypes, + DependencyIndexes: file_google_protobuf_go_features_proto_depIdxs, + MessageInfos: file_google_protobuf_go_features_proto_msgTypes, + ExtensionInfos: file_google_protobuf_go_features_proto_extTypes, + }.Build() + File_google_protobuf_go_features_proto = out.File + file_google_protobuf_go_features_proto_rawDesc = nil + file_google_protobuf_go_features_proto_goTypes = nil + file_google_protobuf_go_features_proto_depIdxs = nil +} diff --git a/vendor/gopkg.in/yaml.v2/.travis.yml b/vendor/gopkg.in/yaml.v2/.travis.yml deleted file mode 100644 index 7348c50c..00000000 --- a/vendor/gopkg.in/yaml.v2/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -language: go - -go: - - "1.4.x" - - "1.5.x" - - "1.6.x" - - "1.7.x" - - "1.8.x" - - "1.9.x" - - "1.10.x" - - "1.11.x" - - "1.12.x" - - "1.13.x" - - "1.14.x" - - "tip" - -go_import_path: gopkg.in/yaml.v2 diff --git a/vendor/gopkg.in/yaml.v2/LICENSE.libyaml b/vendor/gopkg.in/yaml.v2/LICENSE.libyaml deleted file mode 100644 index 8da58fbf..00000000 --- a/vendor/gopkg.in/yaml.v2/LICENSE.libyaml +++ /dev/null @@ -1,31 +0,0 @@ -The following files were ported to Go from C files of libyaml, and thus -are still covered by their original copyright and license: - - apic.go - emitterc.go - parserc.go - readerc.go - scannerc.go - writerc.go - yamlh.go - yamlprivateh.go - -Copyright (c) 2006 Kirill Simonov - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/vendor/gopkg.in/yaml.v2/NOTICE b/vendor/gopkg.in/yaml.v2/NOTICE deleted file mode 100644 index 866d74a7..00000000 --- a/vendor/gopkg.in/yaml.v2/NOTICE +++ /dev/null @@ -1,13 +0,0 @@ -Copyright 2011-2016 Canonical Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/vendor/gopkg.in/yaml.v2/README.md b/vendor/gopkg.in/yaml.v2/README.md deleted file mode 100644 index b50c6e87..00000000 --- a/vendor/gopkg.in/yaml.v2/README.md +++ /dev/null @@ -1,133 +0,0 @@ -# YAML support for the Go language - -Introduction ------------- - -The yaml package enables Go programs to comfortably encode and decode YAML -values. It was developed within [Canonical](https://www.canonical.com) as -part of the [juju](https://juju.ubuntu.com) project, and is based on a -pure Go port of the well-known [libyaml](http://pyyaml.org/wiki/LibYAML) -C library to parse and generate YAML data quickly and reliably. - -Compatibility -------------- - -The yaml package supports most of YAML 1.1 and 1.2, including support for -anchors, tags, map merging, etc. Multi-document unmarshalling is not yet -implemented, and base-60 floats from YAML 1.1 are purposefully not -supported since they're a poor design and are gone in YAML 1.2. - -Installation and usage ----------------------- - -The import path for the package is *gopkg.in/yaml.v2*. - -To install it, run: - - go get gopkg.in/yaml.v2 - -API documentation ------------------ - -If opened in a browser, the import path itself leads to the API documentation: - - * [https://gopkg.in/yaml.v2](https://gopkg.in/yaml.v2) - -API stability -------------- - -The package API for yaml v2 will remain stable as described in [gopkg.in](https://gopkg.in). - - -License -------- - -The yaml package is licensed under the Apache License 2.0. Please see the LICENSE file for details. - - -Example -------- - -```Go -package main - -import ( - "fmt" - "log" - - "gopkg.in/yaml.v2" -) - -var data = ` -a: Easy! -b: - c: 2 - d: [3, 4] -` - -// Note: struct fields must be public in order for unmarshal to -// correctly populate the data. -type T struct { - A string - B struct { - RenamedC int `yaml:"c"` - D []int `yaml:",flow"` - } -} - -func main() { - t := T{} - - err := yaml.Unmarshal([]byte(data), &t) - if err != nil { - log.Fatalf("error: %v", err) - } - fmt.Printf("--- t:\n%v\n\n", t) - - d, err := yaml.Marshal(&t) - if err != nil { - log.Fatalf("error: %v", err) - } - fmt.Printf("--- t dump:\n%s\n\n", string(d)) - - m := make(map[interface{}]interface{}) - - err = yaml.Unmarshal([]byte(data), &m) - if err != nil { - log.Fatalf("error: %v", err) - } - fmt.Printf("--- m:\n%v\n\n", m) - - d, err = yaml.Marshal(&m) - if err != nil { - log.Fatalf("error: %v", err) - } - fmt.Printf("--- m dump:\n%s\n\n", string(d)) -} -``` - -This example will generate the following output: - -``` ---- t: -{Easy! {2 [3 4]}} - ---- t dump: -a: Easy! -b: - c: 2 - d: [3, 4] - - ---- m: -map[a:Easy! b:map[c:2 d:[3 4]]] - ---- m dump: -a: Easy! -b: - c: 2 - d: - - 3 - - 4 -``` - diff --git a/vendor/gopkg.in/yaml.v2/apic.go b/vendor/gopkg.in/yaml.v2/apic.go deleted file mode 100644 index acf71402..00000000 --- a/vendor/gopkg.in/yaml.v2/apic.go +++ /dev/null @@ -1,744 +0,0 @@ -package yaml - -import ( - "io" -) - -func yaml_insert_token(parser *yaml_parser_t, pos int, token *yaml_token_t) { - //fmt.Println("yaml_insert_token", "pos:", pos, "typ:", token.typ, "head:", parser.tokens_head, "len:", len(parser.tokens)) - - // Check if we can move the queue at the beginning of the buffer. - if parser.tokens_head > 0 && len(parser.tokens) == cap(parser.tokens) { - if parser.tokens_head != len(parser.tokens) { - copy(parser.tokens, parser.tokens[parser.tokens_head:]) - } - parser.tokens = parser.tokens[:len(parser.tokens)-parser.tokens_head] - parser.tokens_head = 0 - } - parser.tokens = append(parser.tokens, *token) - if pos < 0 { - return - } - copy(parser.tokens[parser.tokens_head+pos+1:], parser.tokens[parser.tokens_head+pos:]) - parser.tokens[parser.tokens_head+pos] = *token -} - -// Create a new parser object. -func yaml_parser_initialize(parser *yaml_parser_t) bool { - *parser = yaml_parser_t{ - raw_buffer: make([]byte, 0, input_raw_buffer_size), - buffer: make([]byte, 0, input_buffer_size), - } - return true -} - -// Destroy a parser object. -func yaml_parser_delete(parser *yaml_parser_t) { - *parser = yaml_parser_t{} -} - -// String read handler. -func yaml_string_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { - if parser.input_pos == len(parser.input) { - return 0, io.EOF - } - n = copy(buffer, parser.input[parser.input_pos:]) - parser.input_pos += n - return n, nil -} - -// Reader read handler. -func yaml_reader_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { - return parser.input_reader.Read(buffer) -} - -// Set a string input. -func yaml_parser_set_input_string(parser *yaml_parser_t, input []byte) { - if parser.read_handler != nil { - panic("must set the input source only once") - } - parser.read_handler = yaml_string_read_handler - parser.input = input - parser.input_pos = 0 -} - -// Set a file input. -func yaml_parser_set_input_reader(parser *yaml_parser_t, r io.Reader) { - if parser.read_handler != nil { - panic("must set the input source only once") - } - parser.read_handler = yaml_reader_read_handler - parser.input_reader = r -} - -// Set the source encoding. -func yaml_parser_set_encoding(parser *yaml_parser_t, encoding yaml_encoding_t) { - if parser.encoding != yaml_ANY_ENCODING { - panic("must set the encoding only once") - } - parser.encoding = encoding -} - -var disableLineWrapping = false - -// Create a new emitter object. -func yaml_emitter_initialize(emitter *yaml_emitter_t) { - *emitter = yaml_emitter_t{ - buffer: make([]byte, output_buffer_size), - raw_buffer: make([]byte, 0, output_raw_buffer_size), - states: make([]yaml_emitter_state_t, 0, initial_stack_size), - events: make([]yaml_event_t, 0, initial_queue_size), - } - if disableLineWrapping { - emitter.best_width = -1 - } -} - -// Destroy an emitter object. -func yaml_emitter_delete(emitter *yaml_emitter_t) { - *emitter = yaml_emitter_t{} -} - -// String write handler. -func yaml_string_write_handler(emitter *yaml_emitter_t, buffer []byte) error { - *emitter.output_buffer = append(*emitter.output_buffer, buffer...) - return nil -} - -// yaml_writer_write_handler uses emitter.output_writer to write the -// emitted text. -func yaml_writer_write_handler(emitter *yaml_emitter_t, buffer []byte) error { - _, err := emitter.output_writer.Write(buffer) - return err -} - -// Set a string output. -func yaml_emitter_set_output_string(emitter *yaml_emitter_t, output_buffer *[]byte) { - if emitter.write_handler != nil { - panic("must set the output target only once") - } - emitter.write_handler = yaml_string_write_handler - emitter.output_buffer = output_buffer -} - -// Set a file output. -func yaml_emitter_set_output_writer(emitter *yaml_emitter_t, w io.Writer) { - if emitter.write_handler != nil { - panic("must set the output target only once") - } - emitter.write_handler = yaml_writer_write_handler - emitter.output_writer = w -} - -// Set the output encoding. -func yaml_emitter_set_encoding(emitter *yaml_emitter_t, encoding yaml_encoding_t) { - if emitter.encoding != yaml_ANY_ENCODING { - panic("must set the output encoding only once") - } - emitter.encoding = encoding -} - -// Set the canonical output style. -func yaml_emitter_set_canonical(emitter *yaml_emitter_t, canonical bool) { - emitter.canonical = canonical -} - -//// Set the indentation increment. -func yaml_emitter_set_indent(emitter *yaml_emitter_t, indent int) { - if indent < 2 || indent > 9 { - indent = 2 - } - emitter.best_indent = indent -} - -// Set the preferred line width. -func yaml_emitter_set_width(emitter *yaml_emitter_t, width int) { - if width < 0 { - width = -1 - } - emitter.best_width = width -} - -// Set if unescaped non-ASCII characters are allowed. -func yaml_emitter_set_unicode(emitter *yaml_emitter_t, unicode bool) { - emitter.unicode = unicode -} - -// Set the preferred line break character. -func yaml_emitter_set_break(emitter *yaml_emitter_t, line_break yaml_break_t) { - emitter.line_break = line_break -} - -///* -// * Destroy a token object. -// */ -// -//YAML_DECLARE(void) -//yaml_token_delete(yaml_token_t *token) -//{ -// assert(token); // Non-NULL token object expected. -// -// switch (token.type) -// { -// case YAML_TAG_DIRECTIVE_TOKEN: -// yaml_free(token.data.tag_directive.handle); -// yaml_free(token.data.tag_directive.prefix); -// break; -// -// case YAML_ALIAS_TOKEN: -// yaml_free(token.data.alias.value); -// break; -// -// case YAML_ANCHOR_TOKEN: -// yaml_free(token.data.anchor.value); -// break; -// -// case YAML_TAG_TOKEN: -// yaml_free(token.data.tag.handle); -// yaml_free(token.data.tag.suffix); -// break; -// -// case YAML_SCALAR_TOKEN: -// yaml_free(token.data.scalar.value); -// break; -// -// default: -// break; -// } -// -// memset(token, 0, sizeof(yaml_token_t)); -//} -// -///* -// * Check if a string is a valid UTF-8 sequence. -// * -// * Check 'reader.c' for more details on UTF-8 encoding. -// */ -// -//static int -//yaml_check_utf8(yaml_char_t *start, size_t length) -//{ -// yaml_char_t *end = start+length; -// yaml_char_t *pointer = start; -// -// while (pointer < end) { -// unsigned char octet; -// unsigned int width; -// unsigned int value; -// size_t k; -// -// octet = pointer[0]; -// width = (octet & 0x80) == 0x00 ? 1 : -// (octet & 0xE0) == 0xC0 ? 2 : -// (octet & 0xF0) == 0xE0 ? 3 : -// (octet & 0xF8) == 0xF0 ? 4 : 0; -// value = (octet & 0x80) == 0x00 ? octet & 0x7F : -// (octet & 0xE0) == 0xC0 ? octet & 0x1F : -// (octet & 0xF0) == 0xE0 ? octet & 0x0F : -// (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; -// if (!width) return 0; -// if (pointer+width > end) return 0; -// for (k = 1; k < width; k ++) { -// octet = pointer[k]; -// if ((octet & 0xC0) != 0x80) return 0; -// value = (value << 6) + (octet & 0x3F); -// } -// if (!((width == 1) || -// (width == 2 && value >= 0x80) || -// (width == 3 && value >= 0x800) || -// (width == 4 && value >= 0x10000))) return 0; -// -// pointer += width; -// } -// -// return 1; -//} -// - -// Create STREAM-START. -func yaml_stream_start_event_initialize(event *yaml_event_t, encoding yaml_encoding_t) { - *event = yaml_event_t{ - typ: yaml_STREAM_START_EVENT, - encoding: encoding, - } -} - -// Create STREAM-END. -func yaml_stream_end_event_initialize(event *yaml_event_t) { - *event = yaml_event_t{ - typ: yaml_STREAM_END_EVENT, - } -} - -// Create DOCUMENT-START. -func yaml_document_start_event_initialize( - event *yaml_event_t, - version_directive *yaml_version_directive_t, - tag_directives []yaml_tag_directive_t, - implicit bool, -) { - *event = yaml_event_t{ - typ: yaml_DOCUMENT_START_EVENT, - version_directive: version_directive, - tag_directives: tag_directives, - implicit: implicit, - } -} - -// Create DOCUMENT-END. -func yaml_document_end_event_initialize(event *yaml_event_t, implicit bool) { - *event = yaml_event_t{ - typ: yaml_DOCUMENT_END_EVENT, - implicit: implicit, - } -} - -///* -// * Create ALIAS. -// */ -// -//YAML_DECLARE(int) -//yaml_alias_event_initialize(event *yaml_event_t, anchor *yaml_char_t) -//{ -// mark yaml_mark_t = { 0, 0, 0 } -// anchor_copy *yaml_char_t = NULL -// -// assert(event) // Non-NULL event object is expected. -// assert(anchor) // Non-NULL anchor is expected. -// -// if (!yaml_check_utf8(anchor, strlen((char *)anchor))) return 0 -// -// anchor_copy = yaml_strdup(anchor) -// if (!anchor_copy) -// return 0 -// -// ALIAS_EVENT_INIT(*event, anchor_copy, mark, mark) -// -// return 1 -//} - -// Create SCALAR. -func yaml_scalar_event_initialize(event *yaml_event_t, anchor, tag, value []byte, plain_implicit, quoted_implicit bool, style yaml_scalar_style_t) bool { - *event = yaml_event_t{ - typ: yaml_SCALAR_EVENT, - anchor: anchor, - tag: tag, - value: value, - implicit: plain_implicit, - quoted_implicit: quoted_implicit, - style: yaml_style_t(style), - } - return true -} - -// Create SEQUENCE-START. -func yaml_sequence_start_event_initialize(event *yaml_event_t, anchor, tag []byte, implicit bool, style yaml_sequence_style_t) bool { - *event = yaml_event_t{ - typ: yaml_SEQUENCE_START_EVENT, - anchor: anchor, - tag: tag, - implicit: implicit, - style: yaml_style_t(style), - } - return true -} - -// Create SEQUENCE-END. -func yaml_sequence_end_event_initialize(event *yaml_event_t) bool { - *event = yaml_event_t{ - typ: yaml_SEQUENCE_END_EVENT, - } - return true -} - -// Create MAPPING-START. -func yaml_mapping_start_event_initialize(event *yaml_event_t, anchor, tag []byte, implicit bool, style yaml_mapping_style_t) { - *event = yaml_event_t{ - typ: yaml_MAPPING_START_EVENT, - anchor: anchor, - tag: tag, - implicit: implicit, - style: yaml_style_t(style), - } -} - -// Create MAPPING-END. -func yaml_mapping_end_event_initialize(event *yaml_event_t) { - *event = yaml_event_t{ - typ: yaml_MAPPING_END_EVENT, - } -} - -// Destroy an event object. -func yaml_event_delete(event *yaml_event_t) { - *event = yaml_event_t{} -} - -///* -// * Create a document object. -// */ -// -//YAML_DECLARE(int) -//yaml_document_initialize(document *yaml_document_t, -// version_directive *yaml_version_directive_t, -// tag_directives_start *yaml_tag_directive_t, -// tag_directives_end *yaml_tag_directive_t, -// start_implicit int, end_implicit int) -//{ -// struct { -// error yaml_error_type_t -// } context -// struct { -// start *yaml_node_t -// end *yaml_node_t -// top *yaml_node_t -// } nodes = { NULL, NULL, NULL } -// version_directive_copy *yaml_version_directive_t = NULL -// struct { -// start *yaml_tag_directive_t -// end *yaml_tag_directive_t -// top *yaml_tag_directive_t -// } tag_directives_copy = { NULL, NULL, NULL } -// value yaml_tag_directive_t = { NULL, NULL } -// mark yaml_mark_t = { 0, 0, 0 } -// -// assert(document) // Non-NULL document object is expected. -// assert((tag_directives_start && tag_directives_end) || -// (tag_directives_start == tag_directives_end)) -// // Valid tag directives are expected. -// -// if (!STACK_INIT(&context, nodes, INITIAL_STACK_SIZE)) goto error -// -// if (version_directive) { -// version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)) -// if (!version_directive_copy) goto error -// version_directive_copy.major = version_directive.major -// version_directive_copy.minor = version_directive.minor -// } -// -// if (tag_directives_start != tag_directives_end) { -// tag_directive *yaml_tag_directive_t -// if (!STACK_INIT(&context, tag_directives_copy, INITIAL_STACK_SIZE)) -// goto error -// for (tag_directive = tag_directives_start -// tag_directive != tag_directives_end; tag_directive ++) { -// assert(tag_directive.handle) -// assert(tag_directive.prefix) -// if (!yaml_check_utf8(tag_directive.handle, -// strlen((char *)tag_directive.handle))) -// goto error -// if (!yaml_check_utf8(tag_directive.prefix, -// strlen((char *)tag_directive.prefix))) -// goto error -// value.handle = yaml_strdup(tag_directive.handle) -// value.prefix = yaml_strdup(tag_directive.prefix) -// if (!value.handle || !value.prefix) goto error -// if (!PUSH(&context, tag_directives_copy, value)) -// goto error -// value.handle = NULL -// value.prefix = NULL -// } -// } -// -// DOCUMENT_INIT(*document, nodes.start, nodes.end, version_directive_copy, -// tag_directives_copy.start, tag_directives_copy.top, -// start_implicit, end_implicit, mark, mark) -// -// return 1 -// -//error: -// STACK_DEL(&context, nodes) -// yaml_free(version_directive_copy) -// while (!STACK_EMPTY(&context, tag_directives_copy)) { -// value yaml_tag_directive_t = POP(&context, tag_directives_copy) -// yaml_free(value.handle) -// yaml_free(value.prefix) -// } -// STACK_DEL(&context, tag_directives_copy) -// yaml_free(value.handle) -// yaml_free(value.prefix) -// -// return 0 -//} -// -///* -// * Destroy a document object. -// */ -// -//YAML_DECLARE(void) -//yaml_document_delete(document *yaml_document_t) -//{ -// struct { -// error yaml_error_type_t -// } context -// tag_directive *yaml_tag_directive_t -// -// context.error = YAML_NO_ERROR // Eliminate a compiler warning. -// -// assert(document) // Non-NULL document object is expected. -// -// while (!STACK_EMPTY(&context, document.nodes)) { -// node yaml_node_t = POP(&context, document.nodes) -// yaml_free(node.tag) -// switch (node.type) { -// case YAML_SCALAR_NODE: -// yaml_free(node.data.scalar.value) -// break -// case YAML_SEQUENCE_NODE: -// STACK_DEL(&context, node.data.sequence.items) -// break -// case YAML_MAPPING_NODE: -// STACK_DEL(&context, node.data.mapping.pairs) -// break -// default: -// assert(0) // Should not happen. -// } -// } -// STACK_DEL(&context, document.nodes) -// -// yaml_free(document.version_directive) -// for (tag_directive = document.tag_directives.start -// tag_directive != document.tag_directives.end -// tag_directive++) { -// yaml_free(tag_directive.handle) -// yaml_free(tag_directive.prefix) -// } -// yaml_free(document.tag_directives.start) -// -// memset(document, 0, sizeof(yaml_document_t)) -//} -// -///** -// * Get a document node. -// */ -// -//YAML_DECLARE(yaml_node_t *) -//yaml_document_get_node(document *yaml_document_t, index int) -//{ -// assert(document) // Non-NULL document object is expected. -// -// if (index > 0 && document.nodes.start + index <= document.nodes.top) { -// return document.nodes.start + index - 1 -// } -// return NULL -//} -// -///** -// * Get the root object. -// */ -// -//YAML_DECLARE(yaml_node_t *) -//yaml_document_get_root_node(document *yaml_document_t) -//{ -// assert(document) // Non-NULL document object is expected. -// -// if (document.nodes.top != document.nodes.start) { -// return document.nodes.start -// } -// return NULL -//} -// -///* -// * Add a scalar node to a document. -// */ -// -//YAML_DECLARE(int) -//yaml_document_add_scalar(document *yaml_document_t, -// tag *yaml_char_t, value *yaml_char_t, length int, -// style yaml_scalar_style_t) -//{ -// struct { -// error yaml_error_type_t -// } context -// mark yaml_mark_t = { 0, 0, 0 } -// tag_copy *yaml_char_t = NULL -// value_copy *yaml_char_t = NULL -// node yaml_node_t -// -// assert(document) // Non-NULL document object is expected. -// assert(value) // Non-NULL value is expected. -// -// if (!tag) { -// tag = (yaml_char_t *)YAML_DEFAULT_SCALAR_TAG -// } -// -// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error -// tag_copy = yaml_strdup(tag) -// if (!tag_copy) goto error -// -// if (length < 0) { -// length = strlen((char *)value) -// } -// -// if (!yaml_check_utf8(value, length)) goto error -// value_copy = yaml_malloc(length+1) -// if (!value_copy) goto error -// memcpy(value_copy, value, length) -// value_copy[length] = '\0' -// -// SCALAR_NODE_INIT(node, tag_copy, value_copy, length, style, mark, mark) -// if (!PUSH(&context, document.nodes, node)) goto error -// -// return document.nodes.top - document.nodes.start -// -//error: -// yaml_free(tag_copy) -// yaml_free(value_copy) -// -// return 0 -//} -// -///* -// * Add a sequence node to a document. -// */ -// -//YAML_DECLARE(int) -//yaml_document_add_sequence(document *yaml_document_t, -// tag *yaml_char_t, style yaml_sequence_style_t) -//{ -// struct { -// error yaml_error_type_t -// } context -// mark yaml_mark_t = { 0, 0, 0 } -// tag_copy *yaml_char_t = NULL -// struct { -// start *yaml_node_item_t -// end *yaml_node_item_t -// top *yaml_node_item_t -// } items = { NULL, NULL, NULL } -// node yaml_node_t -// -// assert(document) // Non-NULL document object is expected. -// -// if (!tag) { -// tag = (yaml_char_t *)YAML_DEFAULT_SEQUENCE_TAG -// } -// -// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error -// tag_copy = yaml_strdup(tag) -// if (!tag_copy) goto error -// -// if (!STACK_INIT(&context, items, INITIAL_STACK_SIZE)) goto error -// -// SEQUENCE_NODE_INIT(node, tag_copy, items.start, items.end, -// style, mark, mark) -// if (!PUSH(&context, document.nodes, node)) goto error -// -// return document.nodes.top - document.nodes.start -// -//error: -// STACK_DEL(&context, items) -// yaml_free(tag_copy) -// -// return 0 -//} -// -///* -// * Add a mapping node to a document. -// */ -// -//YAML_DECLARE(int) -//yaml_document_add_mapping(document *yaml_document_t, -// tag *yaml_char_t, style yaml_mapping_style_t) -//{ -// struct { -// error yaml_error_type_t -// } context -// mark yaml_mark_t = { 0, 0, 0 } -// tag_copy *yaml_char_t = NULL -// struct { -// start *yaml_node_pair_t -// end *yaml_node_pair_t -// top *yaml_node_pair_t -// } pairs = { NULL, NULL, NULL } -// node yaml_node_t -// -// assert(document) // Non-NULL document object is expected. -// -// if (!tag) { -// tag = (yaml_char_t *)YAML_DEFAULT_MAPPING_TAG -// } -// -// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error -// tag_copy = yaml_strdup(tag) -// if (!tag_copy) goto error -// -// if (!STACK_INIT(&context, pairs, INITIAL_STACK_SIZE)) goto error -// -// MAPPING_NODE_INIT(node, tag_copy, pairs.start, pairs.end, -// style, mark, mark) -// if (!PUSH(&context, document.nodes, node)) goto error -// -// return document.nodes.top - document.nodes.start -// -//error: -// STACK_DEL(&context, pairs) -// yaml_free(tag_copy) -// -// return 0 -//} -// -///* -// * Append an item to a sequence node. -// */ -// -//YAML_DECLARE(int) -//yaml_document_append_sequence_item(document *yaml_document_t, -// sequence int, item int) -//{ -// struct { -// error yaml_error_type_t -// } context -// -// assert(document) // Non-NULL document is required. -// assert(sequence > 0 -// && document.nodes.start + sequence <= document.nodes.top) -// // Valid sequence id is required. -// assert(document.nodes.start[sequence-1].type == YAML_SEQUENCE_NODE) -// // A sequence node is required. -// assert(item > 0 && document.nodes.start + item <= document.nodes.top) -// // Valid item id is required. -// -// if (!PUSH(&context, -// document.nodes.start[sequence-1].data.sequence.items, item)) -// return 0 -// -// return 1 -//} -// -///* -// * Append a pair of a key and a value to a mapping node. -// */ -// -//YAML_DECLARE(int) -//yaml_document_append_mapping_pair(document *yaml_document_t, -// mapping int, key int, value int) -//{ -// struct { -// error yaml_error_type_t -// } context -// -// pair yaml_node_pair_t -// -// assert(document) // Non-NULL document is required. -// assert(mapping > 0 -// && document.nodes.start + mapping <= document.nodes.top) -// // Valid mapping id is required. -// assert(document.nodes.start[mapping-1].type == YAML_MAPPING_NODE) -// // A mapping node is required. -// assert(key > 0 && document.nodes.start + key <= document.nodes.top) -// // Valid key id is required. -// assert(value > 0 && document.nodes.start + value <= document.nodes.top) -// // Valid value id is required. -// -// pair.key = key -// pair.value = value -// -// if (!PUSH(&context, -// document.nodes.start[mapping-1].data.mapping.pairs, pair)) -// return 0 -// -// return 1 -//} -// -// diff --git a/vendor/gopkg.in/yaml.v2/decode.go b/vendor/gopkg.in/yaml.v2/decode.go deleted file mode 100644 index 129bc2a9..00000000 --- a/vendor/gopkg.in/yaml.v2/decode.go +++ /dev/null @@ -1,815 +0,0 @@ -package yaml - -import ( - "encoding" - "encoding/base64" - "fmt" - "io" - "math" - "reflect" - "strconv" - "time" -) - -const ( - documentNode = 1 << iota - mappingNode - sequenceNode - scalarNode - aliasNode -) - -type node struct { - kind int - line, column int - tag string - // For an alias node, alias holds the resolved alias. - alias *node - value string - implicit bool - children []*node - anchors map[string]*node -} - -// ---------------------------------------------------------------------------- -// Parser, produces a node tree out of a libyaml event stream. - -type parser struct { - parser yaml_parser_t - event yaml_event_t - doc *node - doneInit bool -} - -func newParser(b []byte) *parser { - p := parser{} - if !yaml_parser_initialize(&p.parser) { - panic("failed to initialize YAML emitter") - } - if len(b) == 0 { - b = []byte{'\n'} - } - yaml_parser_set_input_string(&p.parser, b) - return &p -} - -func newParserFromReader(r io.Reader) *parser { - p := parser{} - if !yaml_parser_initialize(&p.parser) { - panic("failed to initialize YAML emitter") - } - yaml_parser_set_input_reader(&p.parser, r) - return &p -} - -func (p *parser) init() { - if p.doneInit { - return - } - p.expect(yaml_STREAM_START_EVENT) - p.doneInit = true -} - -func (p *parser) destroy() { - if p.event.typ != yaml_NO_EVENT { - yaml_event_delete(&p.event) - } - yaml_parser_delete(&p.parser) -} - -// expect consumes an event from the event stream and -// checks that it's of the expected type. -func (p *parser) expect(e yaml_event_type_t) { - if p.event.typ == yaml_NO_EVENT { - if !yaml_parser_parse(&p.parser, &p.event) { - p.fail() - } - } - if p.event.typ == yaml_STREAM_END_EVENT { - failf("attempted to go past the end of stream; corrupted value?") - } - if p.event.typ != e { - p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ) - p.fail() - } - yaml_event_delete(&p.event) - p.event.typ = yaml_NO_EVENT -} - -// peek peeks at the next event in the event stream, -// puts the results into p.event and returns the event type. -func (p *parser) peek() yaml_event_type_t { - if p.event.typ != yaml_NO_EVENT { - return p.event.typ - } - if !yaml_parser_parse(&p.parser, &p.event) { - p.fail() - } - return p.event.typ -} - -func (p *parser) fail() { - var where string - var line int - if p.parser.problem_mark.line != 0 { - line = p.parser.problem_mark.line - // Scanner errors don't iterate line before returning error - if p.parser.error == yaml_SCANNER_ERROR { - line++ - } - } else if p.parser.context_mark.line != 0 { - line = p.parser.context_mark.line - } - if line != 0 { - where = "line " + strconv.Itoa(line) + ": " - } - var msg string - if len(p.parser.problem) > 0 { - msg = p.parser.problem - } else { - msg = "unknown problem parsing YAML content" - } - failf("%s%s", where, msg) -} - -func (p *parser) anchor(n *node, anchor []byte) { - if anchor != nil { - p.doc.anchors[string(anchor)] = n - } -} - -func (p *parser) parse() *node { - p.init() - switch p.peek() { - case yaml_SCALAR_EVENT: - return p.scalar() - case yaml_ALIAS_EVENT: - return p.alias() - case yaml_MAPPING_START_EVENT: - return p.mapping() - case yaml_SEQUENCE_START_EVENT: - return p.sequence() - case yaml_DOCUMENT_START_EVENT: - return p.document() - case yaml_STREAM_END_EVENT: - // Happens when attempting to decode an empty buffer. - return nil - default: - panic("attempted to parse unknown event: " + p.event.typ.String()) - } -} - -func (p *parser) node(kind int) *node { - return &node{ - kind: kind, - line: p.event.start_mark.line, - column: p.event.start_mark.column, - } -} - -func (p *parser) document() *node { - n := p.node(documentNode) - n.anchors = make(map[string]*node) - p.doc = n - p.expect(yaml_DOCUMENT_START_EVENT) - n.children = append(n.children, p.parse()) - p.expect(yaml_DOCUMENT_END_EVENT) - return n -} - -func (p *parser) alias() *node { - n := p.node(aliasNode) - n.value = string(p.event.anchor) - n.alias = p.doc.anchors[n.value] - if n.alias == nil { - failf("unknown anchor '%s' referenced", n.value) - } - p.expect(yaml_ALIAS_EVENT) - return n -} - -func (p *parser) scalar() *node { - n := p.node(scalarNode) - n.value = string(p.event.value) - n.tag = string(p.event.tag) - n.implicit = p.event.implicit - p.anchor(n, p.event.anchor) - p.expect(yaml_SCALAR_EVENT) - return n -} - -func (p *parser) sequence() *node { - n := p.node(sequenceNode) - p.anchor(n, p.event.anchor) - p.expect(yaml_SEQUENCE_START_EVENT) - for p.peek() != yaml_SEQUENCE_END_EVENT { - n.children = append(n.children, p.parse()) - } - p.expect(yaml_SEQUENCE_END_EVENT) - return n -} - -func (p *parser) mapping() *node { - n := p.node(mappingNode) - p.anchor(n, p.event.anchor) - p.expect(yaml_MAPPING_START_EVENT) - for p.peek() != yaml_MAPPING_END_EVENT { - n.children = append(n.children, p.parse(), p.parse()) - } - p.expect(yaml_MAPPING_END_EVENT) - return n -} - -// ---------------------------------------------------------------------------- -// Decoder, unmarshals a node into a provided value. - -type decoder struct { - doc *node - aliases map[*node]bool - mapType reflect.Type - terrors []string - strict bool - - decodeCount int - aliasCount int - aliasDepth int -} - -var ( - mapItemType = reflect.TypeOf(MapItem{}) - durationType = reflect.TypeOf(time.Duration(0)) - defaultMapType = reflect.TypeOf(map[interface{}]interface{}{}) - ifaceType = defaultMapType.Elem() - timeType = reflect.TypeOf(time.Time{}) - ptrTimeType = reflect.TypeOf(&time.Time{}) -) - -func newDecoder(strict bool) *decoder { - d := &decoder{mapType: defaultMapType, strict: strict} - d.aliases = make(map[*node]bool) - return d -} - -func (d *decoder) terror(n *node, tag string, out reflect.Value) { - if n.tag != "" { - tag = n.tag - } - value := n.value - if tag != yaml_SEQ_TAG && tag != yaml_MAP_TAG { - if len(value) > 10 { - value = " `" + value[:7] + "...`" - } else { - value = " `" + value + "`" - } - } - d.terrors = append(d.terrors, fmt.Sprintf("line %d: cannot unmarshal %s%s into %s", n.line+1, shortTag(tag), value, out.Type())) -} - -func (d *decoder) callUnmarshaler(n *node, u Unmarshaler) (good bool) { - terrlen := len(d.terrors) - err := u.UnmarshalYAML(func(v interface{}) (err error) { - defer handleErr(&err) - d.unmarshal(n, reflect.ValueOf(v)) - if len(d.terrors) > terrlen { - issues := d.terrors[terrlen:] - d.terrors = d.terrors[:terrlen] - return &TypeError{issues} - } - return nil - }) - if e, ok := err.(*TypeError); ok { - d.terrors = append(d.terrors, e.Errors...) - return false - } - if err != nil { - fail(err) - } - return true -} - -// d.prepare initializes and dereferences pointers and calls UnmarshalYAML -// if a value is found to implement it. -// It returns the initialized and dereferenced out value, whether -// unmarshalling was already done by UnmarshalYAML, and if so whether -// its types unmarshalled appropriately. -// -// If n holds a null value, prepare returns before doing anything. -func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) { - if n.tag == yaml_NULL_TAG || n.kind == scalarNode && n.tag == "" && (n.value == "null" || n.value == "~" || n.value == "" && n.implicit) { - return out, false, false - } - again := true - for again { - again = false - if out.Kind() == reflect.Ptr { - if out.IsNil() { - out.Set(reflect.New(out.Type().Elem())) - } - out = out.Elem() - again = true - } - if out.CanAddr() { - if u, ok := out.Addr().Interface().(Unmarshaler); ok { - good = d.callUnmarshaler(n, u) - return out, true, good - } - } - } - return out, false, false -} - -const ( - // 400,000 decode operations is ~500kb of dense object declarations, or - // ~5kb of dense object declarations with 10000% alias expansion - alias_ratio_range_low = 400000 - - // 4,000,000 decode operations is ~5MB of dense object declarations, or - // ~4.5MB of dense object declarations with 10% alias expansion - alias_ratio_range_high = 4000000 - - // alias_ratio_range is the range over which we scale allowed alias ratios - alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low) -) - -func allowedAliasRatio(decodeCount int) float64 { - switch { - case decodeCount <= alias_ratio_range_low: - // allow 99% to come from alias expansion for small-to-medium documents - return 0.99 - case decodeCount >= alias_ratio_range_high: - // allow 10% to come from alias expansion for very large documents - return 0.10 - default: - // scale smoothly from 99% down to 10% over the range. - // this maps to 396,000 - 400,000 allowed alias-driven decodes over the range. - // 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps). - return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range) - } -} - -func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) { - d.decodeCount++ - if d.aliasDepth > 0 { - d.aliasCount++ - } - if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) { - failf("document contains excessive aliasing") - } - switch n.kind { - case documentNode: - return d.document(n, out) - case aliasNode: - return d.alias(n, out) - } - out, unmarshaled, good := d.prepare(n, out) - if unmarshaled { - return good - } - switch n.kind { - case scalarNode: - good = d.scalar(n, out) - case mappingNode: - good = d.mapping(n, out) - case sequenceNode: - good = d.sequence(n, out) - default: - panic("internal error: unknown node kind: " + strconv.Itoa(n.kind)) - } - return good -} - -func (d *decoder) document(n *node, out reflect.Value) (good bool) { - if len(n.children) == 1 { - d.doc = n - d.unmarshal(n.children[0], out) - return true - } - return false -} - -func (d *decoder) alias(n *node, out reflect.Value) (good bool) { - if d.aliases[n] { - // TODO this could actually be allowed in some circumstances. - failf("anchor '%s' value contains itself", n.value) - } - d.aliases[n] = true - d.aliasDepth++ - good = d.unmarshal(n.alias, out) - d.aliasDepth-- - delete(d.aliases, n) - return good -} - -var zeroValue reflect.Value - -func resetMap(out reflect.Value) { - for _, k := range out.MapKeys() { - out.SetMapIndex(k, zeroValue) - } -} - -func (d *decoder) scalar(n *node, out reflect.Value) bool { - var tag string - var resolved interface{} - if n.tag == "" && !n.implicit { - tag = yaml_STR_TAG - resolved = n.value - } else { - tag, resolved = resolve(n.tag, n.value) - if tag == yaml_BINARY_TAG { - data, err := base64.StdEncoding.DecodeString(resolved.(string)) - if err != nil { - failf("!!binary value contains invalid base64 data") - } - resolved = string(data) - } - } - if resolved == nil { - if out.Kind() == reflect.Map && !out.CanAddr() { - resetMap(out) - } else { - out.Set(reflect.Zero(out.Type())) - } - return true - } - if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() { - // We've resolved to exactly the type we want, so use that. - out.Set(resolvedv) - return true - } - // Perhaps we can use the value as a TextUnmarshaler to - // set its value. - if out.CanAddr() { - u, ok := out.Addr().Interface().(encoding.TextUnmarshaler) - if ok { - var text []byte - if tag == yaml_BINARY_TAG { - text = []byte(resolved.(string)) - } else { - // We let any value be unmarshaled into TextUnmarshaler. - // That might be more lax than we'd like, but the - // TextUnmarshaler itself should bowl out any dubious values. - text = []byte(n.value) - } - err := u.UnmarshalText(text) - if err != nil { - fail(err) - } - return true - } - } - switch out.Kind() { - case reflect.String: - if tag == yaml_BINARY_TAG { - out.SetString(resolved.(string)) - return true - } - if resolved != nil { - out.SetString(n.value) - return true - } - case reflect.Interface: - if resolved == nil { - out.Set(reflect.Zero(out.Type())) - } else if tag == yaml_TIMESTAMP_TAG { - // It looks like a timestamp but for backward compatibility - // reasons we set it as a string, so that code that unmarshals - // timestamp-like values into interface{} will continue to - // see a string and not a time.Time. - // TODO(v3) Drop this. - out.Set(reflect.ValueOf(n.value)) - } else { - out.Set(reflect.ValueOf(resolved)) - } - return true - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - switch resolved := resolved.(type) { - case int: - if !out.OverflowInt(int64(resolved)) { - out.SetInt(int64(resolved)) - return true - } - case int64: - if !out.OverflowInt(resolved) { - out.SetInt(resolved) - return true - } - case uint64: - if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) { - out.SetInt(int64(resolved)) - return true - } - case float64: - if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) { - out.SetInt(int64(resolved)) - return true - } - case string: - if out.Type() == durationType { - d, err := time.ParseDuration(resolved) - if err == nil { - out.SetInt(int64(d)) - return true - } - } - } - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - switch resolved := resolved.(type) { - case int: - if resolved >= 0 && !out.OverflowUint(uint64(resolved)) { - out.SetUint(uint64(resolved)) - return true - } - case int64: - if resolved >= 0 && !out.OverflowUint(uint64(resolved)) { - out.SetUint(uint64(resolved)) - return true - } - case uint64: - if !out.OverflowUint(uint64(resolved)) { - out.SetUint(uint64(resolved)) - return true - } - case float64: - if resolved <= math.MaxUint64 && !out.OverflowUint(uint64(resolved)) { - out.SetUint(uint64(resolved)) - return true - } - } - case reflect.Bool: - switch resolved := resolved.(type) { - case bool: - out.SetBool(resolved) - return true - } - case reflect.Float32, reflect.Float64: - switch resolved := resolved.(type) { - case int: - out.SetFloat(float64(resolved)) - return true - case int64: - out.SetFloat(float64(resolved)) - return true - case uint64: - out.SetFloat(float64(resolved)) - return true - case float64: - out.SetFloat(resolved) - return true - } - case reflect.Struct: - if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() { - out.Set(resolvedv) - return true - } - case reflect.Ptr: - if out.Type().Elem() == reflect.TypeOf(resolved) { - // TODO DOes this make sense? When is out a Ptr except when decoding a nil value? - elem := reflect.New(out.Type().Elem()) - elem.Elem().Set(reflect.ValueOf(resolved)) - out.Set(elem) - return true - } - } - d.terror(n, tag, out) - return false -} - -func settableValueOf(i interface{}) reflect.Value { - v := reflect.ValueOf(i) - sv := reflect.New(v.Type()).Elem() - sv.Set(v) - return sv -} - -func (d *decoder) sequence(n *node, out reflect.Value) (good bool) { - l := len(n.children) - - var iface reflect.Value - switch out.Kind() { - case reflect.Slice: - out.Set(reflect.MakeSlice(out.Type(), l, l)) - case reflect.Array: - if l != out.Len() { - failf("invalid array: want %d elements but got %d", out.Len(), l) - } - case reflect.Interface: - // No type hints. Will have to use a generic sequence. - iface = out - out = settableValueOf(make([]interface{}, l)) - default: - d.terror(n, yaml_SEQ_TAG, out) - return false - } - et := out.Type().Elem() - - j := 0 - for i := 0; i < l; i++ { - e := reflect.New(et).Elem() - if ok := d.unmarshal(n.children[i], e); ok { - out.Index(j).Set(e) - j++ - } - } - if out.Kind() != reflect.Array { - out.Set(out.Slice(0, j)) - } - if iface.IsValid() { - iface.Set(out) - } - return true -} - -func (d *decoder) mapping(n *node, out reflect.Value) (good bool) { - switch out.Kind() { - case reflect.Struct: - return d.mappingStruct(n, out) - case reflect.Slice: - return d.mappingSlice(n, out) - case reflect.Map: - // okay - case reflect.Interface: - if d.mapType.Kind() == reflect.Map { - iface := out - out = reflect.MakeMap(d.mapType) - iface.Set(out) - } else { - slicev := reflect.New(d.mapType).Elem() - if !d.mappingSlice(n, slicev) { - return false - } - out.Set(slicev) - return true - } - default: - d.terror(n, yaml_MAP_TAG, out) - return false - } - outt := out.Type() - kt := outt.Key() - et := outt.Elem() - - mapType := d.mapType - if outt.Key() == ifaceType && outt.Elem() == ifaceType { - d.mapType = outt - } - - if out.IsNil() { - out.Set(reflect.MakeMap(outt)) - } - l := len(n.children) - for i := 0; i < l; i += 2 { - if isMerge(n.children[i]) { - d.merge(n.children[i+1], out) - continue - } - k := reflect.New(kt).Elem() - if d.unmarshal(n.children[i], k) { - kkind := k.Kind() - if kkind == reflect.Interface { - kkind = k.Elem().Kind() - } - if kkind == reflect.Map || kkind == reflect.Slice { - failf("invalid map key: %#v", k.Interface()) - } - e := reflect.New(et).Elem() - if d.unmarshal(n.children[i+1], e) { - d.setMapIndex(n.children[i+1], out, k, e) - } - } - } - d.mapType = mapType - return true -} - -func (d *decoder) setMapIndex(n *node, out, k, v reflect.Value) { - if d.strict && out.MapIndex(k) != zeroValue { - d.terrors = append(d.terrors, fmt.Sprintf("line %d: key %#v already set in map", n.line+1, k.Interface())) - return - } - out.SetMapIndex(k, v) -} - -func (d *decoder) mappingSlice(n *node, out reflect.Value) (good bool) { - outt := out.Type() - if outt.Elem() != mapItemType { - d.terror(n, yaml_MAP_TAG, out) - return false - } - - mapType := d.mapType - d.mapType = outt - - var slice []MapItem - var l = len(n.children) - for i := 0; i < l; i += 2 { - if isMerge(n.children[i]) { - d.merge(n.children[i+1], out) - continue - } - item := MapItem{} - k := reflect.ValueOf(&item.Key).Elem() - if d.unmarshal(n.children[i], k) { - v := reflect.ValueOf(&item.Value).Elem() - if d.unmarshal(n.children[i+1], v) { - slice = append(slice, item) - } - } - } - out.Set(reflect.ValueOf(slice)) - d.mapType = mapType - return true -} - -func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) { - sinfo, err := getStructInfo(out.Type()) - if err != nil { - panic(err) - } - name := settableValueOf("") - l := len(n.children) - - var inlineMap reflect.Value - var elemType reflect.Type - if sinfo.InlineMap != -1 { - inlineMap = out.Field(sinfo.InlineMap) - inlineMap.Set(reflect.New(inlineMap.Type()).Elem()) - elemType = inlineMap.Type().Elem() - } - - var doneFields []bool - if d.strict { - doneFields = make([]bool, len(sinfo.FieldsList)) - } - for i := 0; i < l; i += 2 { - ni := n.children[i] - if isMerge(ni) { - d.merge(n.children[i+1], out) - continue - } - if !d.unmarshal(ni, name) { - continue - } - if info, ok := sinfo.FieldsMap[name.String()]; ok { - if d.strict { - if doneFields[info.Id] { - d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s already set in type %s", ni.line+1, name.String(), out.Type())) - continue - } - doneFields[info.Id] = true - } - var field reflect.Value - if info.Inline == nil { - field = out.Field(info.Num) - } else { - field = out.FieldByIndex(info.Inline) - } - d.unmarshal(n.children[i+1], field) - } else if sinfo.InlineMap != -1 { - if inlineMap.IsNil() { - inlineMap.Set(reflect.MakeMap(inlineMap.Type())) - } - value := reflect.New(elemType).Elem() - d.unmarshal(n.children[i+1], value) - d.setMapIndex(n.children[i+1], inlineMap, name, value) - } else if d.strict { - d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s not found in type %s", ni.line+1, name.String(), out.Type())) - } - } - return true -} - -func failWantMap() { - failf("map merge requires map or sequence of maps as the value") -} - -func (d *decoder) merge(n *node, out reflect.Value) { - switch n.kind { - case mappingNode: - d.unmarshal(n, out) - case aliasNode: - if n.alias != nil && n.alias.kind != mappingNode { - failWantMap() - } - d.unmarshal(n, out) - case sequenceNode: - // Step backwards as earlier nodes take precedence. - for i := len(n.children) - 1; i >= 0; i-- { - ni := n.children[i] - if ni.kind == aliasNode { - if ni.alias != nil && ni.alias.kind != mappingNode { - failWantMap() - } - } else if ni.kind != mappingNode { - failWantMap() - } - d.unmarshal(ni, out) - } - default: - failWantMap() - } -} - -func isMerge(n *node) bool { - return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG) -} diff --git a/vendor/gopkg.in/yaml.v2/emitterc.go b/vendor/gopkg.in/yaml.v2/emitterc.go deleted file mode 100644 index a1c2cc52..00000000 --- a/vendor/gopkg.in/yaml.v2/emitterc.go +++ /dev/null @@ -1,1685 +0,0 @@ -package yaml - -import ( - "bytes" - "fmt" -) - -// Flush the buffer if needed. -func flush(emitter *yaml_emitter_t) bool { - if emitter.buffer_pos+5 >= len(emitter.buffer) { - return yaml_emitter_flush(emitter) - } - return true -} - -// Put a character to the output buffer. -func put(emitter *yaml_emitter_t, value byte) bool { - if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { - return false - } - emitter.buffer[emitter.buffer_pos] = value - emitter.buffer_pos++ - emitter.column++ - return true -} - -// Put a line break to the output buffer. -func put_break(emitter *yaml_emitter_t) bool { - if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { - return false - } - switch emitter.line_break { - case yaml_CR_BREAK: - emitter.buffer[emitter.buffer_pos] = '\r' - emitter.buffer_pos += 1 - case yaml_LN_BREAK: - emitter.buffer[emitter.buffer_pos] = '\n' - emitter.buffer_pos += 1 - case yaml_CRLN_BREAK: - emitter.buffer[emitter.buffer_pos+0] = '\r' - emitter.buffer[emitter.buffer_pos+1] = '\n' - emitter.buffer_pos += 2 - default: - panic("unknown line break setting") - } - emitter.column = 0 - emitter.line++ - return true -} - -// Copy a character from a string into buffer. -func write(emitter *yaml_emitter_t, s []byte, i *int) bool { - if emitter.buffer_pos+5 >= len(emitter.buffer) && !yaml_emitter_flush(emitter) { - return false - } - p := emitter.buffer_pos - w := width(s[*i]) - switch w { - case 4: - emitter.buffer[p+3] = s[*i+3] - fallthrough - case 3: - emitter.buffer[p+2] = s[*i+2] - fallthrough - case 2: - emitter.buffer[p+1] = s[*i+1] - fallthrough - case 1: - emitter.buffer[p+0] = s[*i+0] - default: - panic("unknown character width") - } - emitter.column++ - emitter.buffer_pos += w - *i += w - return true -} - -// Write a whole string into buffer. -func write_all(emitter *yaml_emitter_t, s []byte) bool { - for i := 0; i < len(s); { - if !write(emitter, s, &i) { - return false - } - } - return true -} - -// Copy a line break character from a string into buffer. -func write_break(emitter *yaml_emitter_t, s []byte, i *int) bool { - if s[*i] == '\n' { - if !put_break(emitter) { - return false - } - *i++ - } else { - if !write(emitter, s, i) { - return false - } - emitter.column = 0 - emitter.line++ - } - return true -} - -// Set an emitter error and return false. -func yaml_emitter_set_emitter_error(emitter *yaml_emitter_t, problem string) bool { - emitter.error = yaml_EMITTER_ERROR - emitter.problem = problem - return false -} - -// Emit an event. -func yaml_emitter_emit(emitter *yaml_emitter_t, event *yaml_event_t) bool { - emitter.events = append(emitter.events, *event) - for !yaml_emitter_need_more_events(emitter) { - event := &emitter.events[emitter.events_head] - if !yaml_emitter_analyze_event(emitter, event) { - return false - } - if !yaml_emitter_state_machine(emitter, event) { - return false - } - yaml_event_delete(event) - emitter.events_head++ - } - return true -} - -// Check if we need to accumulate more events before emitting. -// -// We accumulate extra -// - 1 event for DOCUMENT-START -// - 2 events for SEQUENCE-START -// - 3 events for MAPPING-START -// -func yaml_emitter_need_more_events(emitter *yaml_emitter_t) bool { - if emitter.events_head == len(emitter.events) { - return true - } - var accumulate int - switch emitter.events[emitter.events_head].typ { - case yaml_DOCUMENT_START_EVENT: - accumulate = 1 - break - case yaml_SEQUENCE_START_EVENT: - accumulate = 2 - break - case yaml_MAPPING_START_EVENT: - accumulate = 3 - break - default: - return false - } - if len(emitter.events)-emitter.events_head > accumulate { - return false - } - var level int - for i := emitter.events_head; i < len(emitter.events); i++ { - switch emitter.events[i].typ { - case yaml_STREAM_START_EVENT, yaml_DOCUMENT_START_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT: - level++ - case yaml_STREAM_END_EVENT, yaml_DOCUMENT_END_EVENT, yaml_SEQUENCE_END_EVENT, yaml_MAPPING_END_EVENT: - level-- - } - if level == 0 { - return false - } - } - return true -} - -// Append a directive to the directives stack. -func yaml_emitter_append_tag_directive(emitter *yaml_emitter_t, value *yaml_tag_directive_t, allow_duplicates bool) bool { - for i := 0; i < len(emitter.tag_directives); i++ { - if bytes.Equal(value.handle, emitter.tag_directives[i].handle) { - if allow_duplicates { - return true - } - return yaml_emitter_set_emitter_error(emitter, "duplicate %TAG directive") - } - } - - // [Go] Do we actually need to copy this given garbage collection - // and the lack of deallocating destructors? - tag_copy := yaml_tag_directive_t{ - handle: make([]byte, len(value.handle)), - prefix: make([]byte, len(value.prefix)), - } - copy(tag_copy.handle, value.handle) - copy(tag_copy.prefix, value.prefix) - emitter.tag_directives = append(emitter.tag_directives, tag_copy) - return true -} - -// Increase the indentation level. -func yaml_emitter_increase_indent(emitter *yaml_emitter_t, flow, indentless bool) bool { - emitter.indents = append(emitter.indents, emitter.indent) - if emitter.indent < 0 { - if flow { - emitter.indent = emitter.best_indent - } else { - emitter.indent = 0 - } - } else if !indentless { - emitter.indent += emitter.best_indent - } - return true -} - -// State dispatcher. -func yaml_emitter_state_machine(emitter *yaml_emitter_t, event *yaml_event_t) bool { - switch emitter.state { - default: - case yaml_EMIT_STREAM_START_STATE: - return yaml_emitter_emit_stream_start(emitter, event) - - case yaml_EMIT_FIRST_DOCUMENT_START_STATE: - return yaml_emitter_emit_document_start(emitter, event, true) - - case yaml_EMIT_DOCUMENT_START_STATE: - return yaml_emitter_emit_document_start(emitter, event, false) - - case yaml_EMIT_DOCUMENT_CONTENT_STATE: - return yaml_emitter_emit_document_content(emitter, event) - - case yaml_EMIT_DOCUMENT_END_STATE: - return yaml_emitter_emit_document_end(emitter, event) - - case yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE: - return yaml_emitter_emit_flow_sequence_item(emitter, event, true) - - case yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE: - return yaml_emitter_emit_flow_sequence_item(emitter, event, false) - - case yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE: - return yaml_emitter_emit_flow_mapping_key(emitter, event, true) - - case yaml_EMIT_FLOW_MAPPING_KEY_STATE: - return yaml_emitter_emit_flow_mapping_key(emitter, event, false) - - case yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE: - return yaml_emitter_emit_flow_mapping_value(emitter, event, true) - - case yaml_EMIT_FLOW_MAPPING_VALUE_STATE: - return yaml_emitter_emit_flow_mapping_value(emitter, event, false) - - case yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE: - return yaml_emitter_emit_block_sequence_item(emitter, event, true) - - case yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE: - return yaml_emitter_emit_block_sequence_item(emitter, event, false) - - case yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE: - return yaml_emitter_emit_block_mapping_key(emitter, event, true) - - case yaml_EMIT_BLOCK_MAPPING_KEY_STATE: - return yaml_emitter_emit_block_mapping_key(emitter, event, false) - - case yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE: - return yaml_emitter_emit_block_mapping_value(emitter, event, true) - - case yaml_EMIT_BLOCK_MAPPING_VALUE_STATE: - return yaml_emitter_emit_block_mapping_value(emitter, event, false) - - case yaml_EMIT_END_STATE: - return yaml_emitter_set_emitter_error(emitter, "expected nothing after STREAM-END") - } - panic("invalid emitter state") -} - -// Expect STREAM-START. -func yaml_emitter_emit_stream_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { - if event.typ != yaml_STREAM_START_EVENT { - return yaml_emitter_set_emitter_error(emitter, "expected STREAM-START") - } - if emitter.encoding == yaml_ANY_ENCODING { - emitter.encoding = event.encoding - if emitter.encoding == yaml_ANY_ENCODING { - emitter.encoding = yaml_UTF8_ENCODING - } - } - if emitter.best_indent < 2 || emitter.best_indent > 9 { - emitter.best_indent = 2 - } - if emitter.best_width >= 0 && emitter.best_width <= emitter.best_indent*2 { - emitter.best_width = 80 - } - if emitter.best_width < 0 { - emitter.best_width = 1<<31 - 1 - } - if emitter.line_break == yaml_ANY_BREAK { - emitter.line_break = yaml_LN_BREAK - } - - emitter.indent = -1 - emitter.line = 0 - emitter.column = 0 - emitter.whitespace = true - emitter.indention = true - - if emitter.encoding != yaml_UTF8_ENCODING { - if !yaml_emitter_write_bom(emitter) { - return false - } - } - emitter.state = yaml_EMIT_FIRST_DOCUMENT_START_STATE - return true -} - -// Expect DOCUMENT-START or STREAM-END. -func yaml_emitter_emit_document_start(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { - - if event.typ == yaml_DOCUMENT_START_EVENT { - - if event.version_directive != nil { - if !yaml_emitter_analyze_version_directive(emitter, event.version_directive) { - return false - } - } - - for i := 0; i < len(event.tag_directives); i++ { - tag_directive := &event.tag_directives[i] - if !yaml_emitter_analyze_tag_directive(emitter, tag_directive) { - return false - } - if !yaml_emitter_append_tag_directive(emitter, tag_directive, false) { - return false - } - } - - for i := 0; i < len(default_tag_directives); i++ { - tag_directive := &default_tag_directives[i] - if !yaml_emitter_append_tag_directive(emitter, tag_directive, true) { - return false - } - } - - implicit := event.implicit - if !first || emitter.canonical { - implicit = false - } - - if emitter.open_ended && (event.version_directive != nil || len(event.tag_directives) > 0) { - if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { - return false - } - if !yaml_emitter_write_indent(emitter) { - return false - } - } - - if event.version_directive != nil { - implicit = false - if !yaml_emitter_write_indicator(emitter, []byte("%YAML"), true, false, false) { - return false - } - if !yaml_emitter_write_indicator(emitter, []byte("1.1"), true, false, false) { - return false - } - if !yaml_emitter_write_indent(emitter) { - return false - } - } - - if len(event.tag_directives) > 0 { - implicit = false - for i := 0; i < len(event.tag_directives); i++ { - tag_directive := &event.tag_directives[i] - if !yaml_emitter_write_indicator(emitter, []byte("%TAG"), true, false, false) { - return false - } - if !yaml_emitter_write_tag_handle(emitter, tag_directive.handle) { - return false - } - if !yaml_emitter_write_tag_content(emitter, tag_directive.prefix, true) { - return false - } - if !yaml_emitter_write_indent(emitter) { - return false - } - } - } - - if yaml_emitter_check_empty_document(emitter) { - implicit = false - } - if !implicit { - if !yaml_emitter_write_indent(emitter) { - return false - } - if !yaml_emitter_write_indicator(emitter, []byte("---"), true, false, false) { - return false - } - if emitter.canonical { - if !yaml_emitter_write_indent(emitter) { - return false - } - } - } - - emitter.state = yaml_EMIT_DOCUMENT_CONTENT_STATE - return true - } - - if event.typ == yaml_STREAM_END_EVENT { - if emitter.open_ended { - if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { - return false - } - if !yaml_emitter_write_indent(emitter) { - return false - } - } - if !yaml_emitter_flush(emitter) { - return false - } - emitter.state = yaml_EMIT_END_STATE - return true - } - - return yaml_emitter_set_emitter_error(emitter, "expected DOCUMENT-START or STREAM-END") -} - -// Expect the root node. -func yaml_emitter_emit_document_content(emitter *yaml_emitter_t, event *yaml_event_t) bool { - emitter.states = append(emitter.states, yaml_EMIT_DOCUMENT_END_STATE) - return yaml_emitter_emit_node(emitter, event, true, false, false, false) -} - -// Expect DOCUMENT-END. -func yaml_emitter_emit_document_end(emitter *yaml_emitter_t, event *yaml_event_t) bool { - if event.typ != yaml_DOCUMENT_END_EVENT { - return yaml_emitter_set_emitter_error(emitter, "expected DOCUMENT-END") - } - if !yaml_emitter_write_indent(emitter) { - return false - } - if !event.implicit { - // [Go] Allocate the slice elsewhere. - if !yaml_emitter_write_indicator(emitter, []byte("..."), true, false, false) { - return false - } - if !yaml_emitter_write_indent(emitter) { - return false - } - } - if !yaml_emitter_flush(emitter) { - return false - } - emitter.state = yaml_EMIT_DOCUMENT_START_STATE - emitter.tag_directives = emitter.tag_directives[:0] - return true -} - -// Expect a flow item node. -func yaml_emitter_emit_flow_sequence_item(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { - if first { - if !yaml_emitter_write_indicator(emitter, []byte{'['}, true, true, false) { - return false - } - if !yaml_emitter_increase_indent(emitter, true, false) { - return false - } - emitter.flow_level++ - } - - if event.typ == yaml_SEQUENCE_END_EVENT { - emitter.flow_level-- - emitter.indent = emitter.indents[len(emitter.indents)-1] - emitter.indents = emitter.indents[:len(emitter.indents)-1] - if emitter.canonical && !first { - if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { - return false - } - if !yaml_emitter_write_indent(emitter) { - return false - } - } - if !yaml_emitter_write_indicator(emitter, []byte{']'}, false, false, false) { - return false - } - emitter.state = emitter.states[len(emitter.states)-1] - emitter.states = emitter.states[:len(emitter.states)-1] - - return true - } - - if !first { - if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { - return false - } - } - - if emitter.canonical || emitter.column > emitter.best_width { - if !yaml_emitter_write_indent(emitter) { - return false - } - } - emitter.states = append(emitter.states, yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE) - return yaml_emitter_emit_node(emitter, event, false, true, false, false) -} - -// Expect a flow key node. -func yaml_emitter_emit_flow_mapping_key(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { - if first { - if !yaml_emitter_write_indicator(emitter, []byte{'{'}, true, true, false) { - return false - } - if !yaml_emitter_increase_indent(emitter, true, false) { - return false - } - emitter.flow_level++ - } - - if event.typ == yaml_MAPPING_END_EVENT { - emitter.flow_level-- - emitter.indent = emitter.indents[len(emitter.indents)-1] - emitter.indents = emitter.indents[:len(emitter.indents)-1] - if emitter.canonical && !first { - if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { - return false - } - if !yaml_emitter_write_indent(emitter) { - return false - } - } - if !yaml_emitter_write_indicator(emitter, []byte{'}'}, false, false, false) { - return false - } - emitter.state = emitter.states[len(emitter.states)-1] - emitter.states = emitter.states[:len(emitter.states)-1] - return true - } - - if !first { - if !yaml_emitter_write_indicator(emitter, []byte{','}, false, false, false) { - return false - } - } - if emitter.canonical || emitter.column > emitter.best_width { - if !yaml_emitter_write_indent(emitter) { - return false - } - } - - if !emitter.canonical && yaml_emitter_check_simple_key(emitter) { - emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE) - return yaml_emitter_emit_node(emitter, event, false, false, true, true) - } - if !yaml_emitter_write_indicator(emitter, []byte{'?'}, true, false, false) { - return false - } - emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_VALUE_STATE) - return yaml_emitter_emit_node(emitter, event, false, false, true, false) -} - -// Expect a flow value node. -func yaml_emitter_emit_flow_mapping_value(emitter *yaml_emitter_t, event *yaml_event_t, simple bool) bool { - if simple { - if !yaml_emitter_write_indicator(emitter, []byte{':'}, false, false, false) { - return false - } - } else { - if emitter.canonical || emitter.column > emitter.best_width { - if !yaml_emitter_write_indent(emitter) { - return false - } - } - if !yaml_emitter_write_indicator(emitter, []byte{':'}, true, false, false) { - return false - } - } - emitter.states = append(emitter.states, yaml_EMIT_FLOW_MAPPING_KEY_STATE) - return yaml_emitter_emit_node(emitter, event, false, false, true, false) -} - -// Expect a block item node. -func yaml_emitter_emit_block_sequence_item(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { - if first { - if !yaml_emitter_increase_indent(emitter, false, emitter.mapping_context && !emitter.indention) { - return false - } - } - if event.typ == yaml_SEQUENCE_END_EVENT { - emitter.indent = emitter.indents[len(emitter.indents)-1] - emitter.indents = emitter.indents[:len(emitter.indents)-1] - emitter.state = emitter.states[len(emitter.states)-1] - emitter.states = emitter.states[:len(emitter.states)-1] - return true - } - if !yaml_emitter_write_indent(emitter) { - return false - } - if !yaml_emitter_write_indicator(emitter, []byte{'-'}, true, false, true) { - return false - } - emitter.states = append(emitter.states, yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE) - return yaml_emitter_emit_node(emitter, event, false, true, false, false) -} - -// Expect a block key node. -func yaml_emitter_emit_block_mapping_key(emitter *yaml_emitter_t, event *yaml_event_t, first bool) bool { - if first { - if !yaml_emitter_increase_indent(emitter, false, false) { - return false - } - } - if event.typ == yaml_MAPPING_END_EVENT { - emitter.indent = emitter.indents[len(emitter.indents)-1] - emitter.indents = emitter.indents[:len(emitter.indents)-1] - emitter.state = emitter.states[len(emitter.states)-1] - emitter.states = emitter.states[:len(emitter.states)-1] - return true - } - if !yaml_emitter_write_indent(emitter) { - return false - } - if yaml_emitter_check_simple_key(emitter) { - emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE) - return yaml_emitter_emit_node(emitter, event, false, false, true, true) - } - if !yaml_emitter_write_indicator(emitter, []byte{'?'}, true, false, true) { - return false - } - emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_VALUE_STATE) - return yaml_emitter_emit_node(emitter, event, false, false, true, false) -} - -// Expect a block value node. -func yaml_emitter_emit_block_mapping_value(emitter *yaml_emitter_t, event *yaml_event_t, simple bool) bool { - if simple { - if !yaml_emitter_write_indicator(emitter, []byte{':'}, false, false, false) { - return false - } - } else { - if !yaml_emitter_write_indent(emitter) { - return false - } - if !yaml_emitter_write_indicator(emitter, []byte{':'}, true, false, true) { - return false - } - } - emitter.states = append(emitter.states, yaml_EMIT_BLOCK_MAPPING_KEY_STATE) - return yaml_emitter_emit_node(emitter, event, false, false, true, false) -} - -// Expect a node. -func yaml_emitter_emit_node(emitter *yaml_emitter_t, event *yaml_event_t, - root bool, sequence bool, mapping bool, simple_key bool) bool { - - emitter.root_context = root - emitter.sequence_context = sequence - emitter.mapping_context = mapping - emitter.simple_key_context = simple_key - - switch event.typ { - case yaml_ALIAS_EVENT: - return yaml_emitter_emit_alias(emitter, event) - case yaml_SCALAR_EVENT: - return yaml_emitter_emit_scalar(emitter, event) - case yaml_SEQUENCE_START_EVENT: - return yaml_emitter_emit_sequence_start(emitter, event) - case yaml_MAPPING_START_EVENT: - return yaml_emitter_emit_mapping_start(emitter, event) - default: - return yaml_emitter_set_emitter_error(emitter, - fmt.Sprintf("expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS, but got %v", event.typ)) - } -} - -// Expect ALIAS. -func yaml_emitter_emit_alias(emitter *yaml_emitter_t, event *yaml_event_t) bool { - if !yaml_emitter_process_anchor(emitter) { - return false - } - emitter.state = emitter.states[len(emitter.states)-1] - emitter.states = emitter.states[:len(emitter.states)-1] - return true -} - -// Expect SCALAR. -func yaml_emitter_emit_scalar(emitter *yaml_emitter_t, event *yaml_event_t) bool { - if !yaml_emitter_select_scalar_style(emitter, event) { - return false - } - if !yaml_emitter_process_anchor(emitter) { - return false - } - if !yaml_emitter_process_tag(emitter) { - return false - } - if !yaml_emitter_increase_indent(emitter, true, false) { - return false - } - if !yaml_emitter_process_scalar(emitter) { - return false - } - emitter.indent = emitter.indents[len(emitter.indents)-1] - emitter.indents = emitter.indents[:len(emitter.indents)-1] - emitter.state = emitter.states[len(emitter.states)-1] - emitter.states = emitter.states[:len(emitter.states)-1] - return true -} - -// Expect SEQUENCE-START. -func yaml_emitter_emit_sequence_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { - if !yaml_emitter_process_anchor(emitter) { - return false - } - if !yaml_emitter_process_tag(emitter) { - return false - } - if emitter.flow_level > 0 || emitter.canonical || event.sequence_style() == yaml_FLOW_SEQUENCE_STYLE || - yaml_emitter_check_empty_sequence(emitter) { - emitter.state = yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE - } else { - emitter.state = yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE - } - return true -} - -// Expect MAPPING-START. -func yaml_emitter_emit_mapping_start(emitter *yaml_emitter_t, event *yaml_event_t) bool { - if !yaml_emitter_process_anchor(emitter) { - return false - } - if !yaml_emitter_process_tag(emitter) { - return false - } - if emitter.flow_level > 0 || emitter.canonical || event.mapping_style() == yaml_FLOW_MAPPING_STYLE || - yaml_emitter_check_empty_mapping(emitter) { - emitter.state = yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE - } else { - emitter.state = yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE - } - return true -} - -// Check if the document content is an empty scalar. -func yaml_emitter_check_empty_document(emitter *yaml_emitter_t) bool { - return false // [Go] Huh? -} - -// Check if the next events represent an empty sequence. -func yaml_emitter_check_empty_sequence(emitter *yaml_emitter_t) bool { - if len(emitter.events)-emitter.events_head < 2 { - return false - } - return emitter.events[emitter.events_head].typ == yaml_SEQUENCE_START_EVENT && - emitter.events[emitter.events_head+1].typ == yaml_SEQUENCE_END_EVENT -} - -// Check if the next events represent an empty mapping. -func yaml_emitter_check_empty_mapping(emitter *yaml_emitter_t) bool { - if len(emitter.events)-emitter.events_head < 2 { - return false - } - return emitter.events[emitter.events_head].typ == yaml_MAPPING_START_EVENT && - emitter.events[emitter.events_head+1].typ == yaml_MAPPING_END_EVENT -} - -// Check if the next node can be expressed as a simple key. -func yaml_emitter_check_simple_key(emitter *yaml_emitter_t) bool { - length := 0 - switch emitter.events[emitter.events_head].typ { - case yaml_ALIAS_EVENT: - length += len(emitter.anchor_data.anchor) - case yaml_SCALAR_EVENT: - if emitter.scalar_data.multiline { - return false - } - length += len(emitter.anchor_data.anchor) + - len(emitter.tag_data.handle) + - len(emitter.tag_data.suffix) + - len(emitter.scalar_data.value) - case yaml_SEQUENCE_START_EVENT: - if !yaml_emitter_check_empty_sequence(emitter) { - return false - } - length += len(emitter.anchor_data.anchor) + - len(emitter.tag_data.handle) + - len(emitter.tag_data.suffix) - case yaml_MAPPING_START_EVENT: - if !yaml_emitter_check_empty_mapping(emitter) { - return false - } - length += len(emitter.anchor_data.anchor) + - len(emitter.tag_data.handle) + - len(emitter.tag_data.suffix) - default: - return false - } - return length <= 128 -} - -// Determine an acceptable scalar style. -func yaml_emitter_select_scalar_style(emitter *yaml_emitter_t, event *yaml_event_t) bool { - - no_tag := len(emitter.tag_data.handle) == 0 && len(emitter.tag_data.suffix) == 0 - if no_tag && !event.implicit && !event.quoted_implicit { - return yaml_emitter_set_emitter_error(emitter, "neither tag nor implicit flags are specified") - } - - style := event.scalar_style() - if style == yaml_ANY_SCALAR_STYLE { - style = yaml_PLAIN_SCALAR_STYLE - } - if emitter.canonical { - style = yaml_DOUBLE_QUOTED_SCALAR_STYLE - } - if emitter.simple_key_context && emitter.scalar_data.multiline { - style = yaml_DOUBLE_QUOTED_SCALAR_STYLE - } - - if style == yaml_PLAIN_SCALAR_STYLE { - if emitter.flow_level > 0 && !emitter.scalar_data.flow_plain_allowed || - emitter.flow_level == 0 && !emitter.scalar_data.block_plain_allowed { - style = yaml_SINGLE_QUOTED_SCALAR_STYLE - } - if len(emitter.scalar_data.value) == 0 && (emitter.flow_level > 0 || emitter.simple_key_context) { - style = yaml_SINGLE_QUOTED_SCALAR_STYLE - } - if no_tag && !event.implicit { - style = yaml_SINGLE_QUOTED_SCALAR_STYLE - } - } - if style == yaml_SINGLE_QUOTED_SCALAR_STYLE { - if !emitter.scalar_data.single_quoted_allowed { - style = yaml_DOUBLE_QUOTED_SCALAR_STYLE - } - } - if style == yaml_LITERAL_SCALAR_STYLE || style == yaml_FOLDED_SCALAR_STYLE { - if !emitter.scalar_data.block_allowed || emitter.flow_level > 0 || emitter.simple_key_context { - style = yaml_DOUBLE_QUOTED_SCALAR_STYLE - } - } - - if no_tag && !event.quoted_implicit && style != yaml_PLAIN_SCALAR_STYLE { - emitter.tag_data.handle = []byte{'!'} - } - emitter.scalar_data.style = style - return true -} - -// Write an anchor. -func yaml_emitter_process_anchor(emitter *yaml_emitter_t) bool { - if emitter.anchor_data.anchor == nil { - return true - } - c := []byte{'&'} - if emitter.anchor_data.alias { - c[0] = '*' - } - if !yaml_emitter_write_indicator(emitter, c, true, false, false) { - return false - } - return yaml_emitter_write_anchor(emitter, emitter.anchor_data.anchor) -} - -// Write a tag. -func yaml_emitter_process_tag(emitter *yaml_emitter_t) bool { - if len(emitter.tag_data.handle) == 0 && len(emitter.tag_data.suffix) == 0 { - return true - } - if len(emitter.tag_data.handle) > 0 { - if !yaml_emitter_write_tag_handle(emitter, emitter.tag_data.handle) { - return false - } - if len(emitter.tag_data.suffix) > 0 { - if !yaml_emitter_write_tag_content(emitter, emitter.tag_data.suffix, false) { - return false - } - } - } else { - // [Go] Allocate these slices elsewhere. - if !yaml_emitter_write_indicator(emitter, []byte("!<"), true, false, false) { - return false - } - if !yaml_emitter_write_tag_content(emitter, emitter.tag_data.suffix, false) { - return false - } - if !yaml_emitter_write_indicator(emitter, []byte{'>'}, false, false, false) { - return false - } - } - return true -} - -// Write a scalar. -func yaml_emitter_process_scalar(emitter *yaml_emitter_t) bool { - switch emitter.scalar_data.style { - case yaml_PLAIN_SCALAR_STYLE: - return yaml_emitter_write_plain_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) - - case yaml_SINGLE_QUOTED_SCALAR_STYLE: - return yaml_emitter_write_single_quoted_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) - - case yaml_DOUBLE_QUOTED_SCALAR_STYLE: - return yaml_emitter_write_double_quoted_scalar(emitter, emitter.scalar_data.value, !emitter.simple_key_context) - - case yaml_LITERAL_SCALAR_STYLE: - return yaml_emitter_write_literal_scalar(emitter, emitter.scalar_data.value) - - case yaml_FOLDED_SCALAR_STYLE: - return yaml_emitter_write_folded_scalar(emitter, emitter.scalar_data.value) - } - panic("unknown scalar style") -} - -// Check if a %YAML directive is valid. -func yaml_emitter_analyze_version_directive(emitter *yaml_emitter_t, version_directive *yaml_version_directive_t) bool { - if version_directive.major != 1 || version_directive.minor != 1 { - return yaml_emitter_set_emitter_error(emitter, "incompatible %YAML directive") - } - return true -} - -// Check if a %TAG directive is valid. -func yaml_emitter_analyze_tag_directive(emitter *yaml_emitter_t, tag_directive *yaml_tag_directive_t) bool { - handle := tag_directive.handle - prefix := tag_directive.prefix - if len(handle) == 0 { - return yaml_emitter_set_emitter_error(emitter, "tag handle must not be empty") - } - if handle[0] != '!' { - return yaml_emitter_set_emitter_error(emitter, "tag handle must start with '!'") - } - if handle[len(handle)-1] != '!' { - return yaml_emitter_set_emitter_error(emitter, "tag handle must end with '!'") - } - for i := 1; i < len(handle)-1; i += width(handle[i]) { - if !is_alpha(handle, i) { - return yaml_emitter_set_emitter_error(emitter, "tag handle must contain alphanumerical characters only") - } - } - if len(prefix) == 0 { - return yaml_emitter_set_emitter_error(emitter, "tag prefix must not be empty") - } - return true -} - -// Check if an anchor is valid. -func yaml_emitter_analyze_anchor(emitter *yaml_emitter_t, anchor []byte, alias bool) bool { - if len(anchor) == 0 { - problem := "anchor value must not be empty" - if alias { - problem = "alias value must not be empty" - } - return yaml_emitter_set_emitter_error(emitter, problem) - } - for i := 0; i < len(anchor); i += width(anchor[i]) { - if !is_alpha(anchor, i) { - problem := "anchor value must contain alphanumerical characters only" - if alias { - problem = "alias value must contain alphanumerical characters only" - } - return yaml_emitter_set_emitter_error(emitter, problem) - } - } - emitter.anchor_data.anchor = anchor - emitter.anchor_data.alias = alias - return true -} - -// Check if a tag is valid. -func yaml_emitter_analyze_tag(emitter *yaml_emitter_t, tag []byte) bool { - if len(tag) == 0 { - return yaml_emitter_set_emitter_error(emitter, "tag value must not be empty") - } - for i := 0; i < len(emitter.tag_directives); i++ { - tag_directive := &emitter.tag_directives[i] - if bytes.HasPrefix(tag, tag_directive.prefix) { - emitter.tag_data.handle = tag_directive.handle - emitter.tag_data.suffix = tag[len(tag_directive.prefix):] - return true - } - } - emitter.tag_data.suffix = tag - return true -} - -// Check if a scalar is valid. -func yaml_emitter_analyze_scalar(emitter *yaml_emitter_t, value []byte) bool { - var ( - block_indicators = false - flow_indicators = false - line_breaks = false - special_characters = false - - leading_space = false - leading_break = false - trailing_space = false - trailing_break = false - break_space = false - space_break = false - - preceded_by_whitespace = false - followed_by_whitespace = false - previous_space = false - previous_break = false - ) - - emitter.scalar_data.value = value - - if len(value) == 0 { - emitter.scalar_data.multiline = false - emitter.scalar_data.flow_plain_allowed = false - emitter.scalar_data.block_plain_allowed = true - emitter.scalar_data.single_quoted_allowed = true - emitter.scalar_data.block_allowed = false - return true - } - - if len(value) >= 3 && ((value[0] == '-' && value[1] == '-' && value[2] == '-') || (value[0] == '.' && value[1] == '.' && value[2] == '.')) { - block_indicators = true - flow_indicators = true - } - - preceded_by_whitespace = true - for i, w := 0, 0; i < len(value); i += w { - w = width(value[i]) - followed_by_whitespace = i+w >= len(value) || is_blank(value, i+w) - - if i == 0 { - switch value[i] { - case '#', ',', '[', ']', '{', '}', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`': - flow_indicators = true - block_indicators = true - case '?', ':': - flow_indicators = true - if followed_by_whitespace { - block_indicators = true - } - case '-': - if followed_by_whitespace { - flow_indicators = true - block_indicators = true - } - } - } else { - switch value[i] { - case ',', '?', '[', ']', '{', '}': - flow_indicators = true - case ':': - flow_indicators = true - if followed_by_whitespace { - block_indicators = true - } - case '#': - if preceded_by_whitespace { - flow_indicators = true - block_indicators = true - } - } - } - - if !is_printable(value, i) || !is_ascii(value, i) && !emitter.unicode { - special_characters = true - } - if is_space(value, i) { - if i == 0 { - leading_space = true - } - if i+width(value[i]) == len(value) { - trailing_space = true - } - if previous_break { - break_space = true - } - previous_space = true - previous_break = false - } else if is_break(value, i) { - line_breaks = true - if i == 0 { - leading_break = true - } - if i+width(value[i]) == len(value) { - trailing_break = true - } - if previous_space { - space_break = true - } - previous_space = false - previous_break = true - } else { - previous_space = false - previous_break = false - } - - // [Go]: Why 'z'? Couldn't be the end of the string as that's the loop condition. - preceded_by_whitespace = is_blankz(value, i) - } - - emitter.scalar_data.multiline = line_breaks - emitter.scalar_data.flow_plain_allowed = true - emitter.scalar_data.block_plain_allowed = true - emitter.scalar_data.single_quoted_allowed = true - emitter.scalar_data.block_allowed = true - - if leading_space || leading_break || trailing_space || trailing_break { - emitter.scalar_data.flow_plain_allowed = false - emitter.scalar_data.block_plain_allowed = false - } - if trailing_space { - emitter.scalar_data.block_allowed = false - } - if break_space { - emitter.scalar_data.flow_plain_allowed = false - emitter.scalar_data.block_plain_allowed = false - emitter.scalar_data.single_quoted_allowed = false - } - if space_break || special_characters { - emitter.scalar_data.flow_plain_allowed = false - emitter.scalar_data.block_plain_allowed = false - emitter.scalar_data.single_quoted_allowed = false - emitter.scalar_data.block_allowed = false - } - if line_breaks { - emitter.scalar_data.flow_plain_allowed = false - emitter.scalar_data.block_plain_allowed = false - } - if flow_indicators { - emitter.scalar_data.flow_plain_allowed = false - } - if block_indicators { - emitter.scalar_data.block_plain_allowed = false - } - return true -} - -// Check if the event data is valid. -func yaml_emitter_analyze_event(emitter *yaml_emitter_t, event *yaml_event_t) bool { - - emitter.anchor_data.anchor = nil - emitter.tag_data.handle = nil - emitter.tag_data.suffix = nil - emitter.scalar_data.value = nil - - switch event.typ { - case yaml_ALIAS_EVENT: - if !yaml_emitter_analyze_anchor(emitter, event.anchor, true) { - return false - } - - case yaml_SCALAR_EVENT: - if len(event.anchor) > 0 { - if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { - return false - } - } - if len(event.tag) > 0 && (emitter.canonical || (!event.implicit && !event.quoted_implicit)) { - if !yaml_emitter_analyze_tag(emitter, event.tag) { - return false - } - } - if !yaml_emitter_analyze_scalar(emitter, event.value) { - return false - } - - case yaml_SEQUENCE_START_EVENT: - if len(event.anchor) > 0 { - if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { - return false - } - } - if len(event.tag) > 0 && (emitter.canonical || !event.implicit) { - if !yaml_emitter_analyze_tag(emitter, event.tag) { - return false - } - } - - case yaml_MAPPING_START_EVENT: - if len(event.anchor) > 0 { - if !yaml_emitter_analyze_anchor(emitter, event.anchor, false) { - return false - } - } - if len(event.tag) > 0 && (emitter.canonical || !event.implicit) { - if !yaml_emitter_analyze_tag(emitter, event.tag) { - return false - } - } - } - return true -} - -// Write the BOM character. -func yaml_emitter_write_bom(emitter *yaml_emitter_t) bool { - if !flush(emitter) { - return false - } - pos := emitter.buffer_pos - emitter.buffer[pos+0] = '\xEF' - emitter.buffer[pos+1] = '\xBB' - emitter.buffer[pos+2] = '\xBF' - emitter.buffer_pos += 3 - return true -} - -func yaml_emitter_write_indent(emitter *yaml_emitter_t) bool { - indent := emitter.indent - if indent < 0 { - indent = 0 - } - if !emitter.indention || emitter.column > indent || (emitter.column == indent && !emitter.whitespace) { - if !put_break(emitter) { - return false - } - } - for emitter.column < indent { - if !put(emitter, ' ') { - return false - } - } - emitter.whitespace = true - emitter.indention = true - return true -} - -func yaml_emitter_write_indicator(emitter *yaml_emitter_t, indicator []byte, need_whitespace, is_whitespace, is_indention bool) bool { - if need_whitespace && !emitter.whitespace { - if !put(emitter, ' ') { - return false - } - } - if !write_all(emitter, indicator) { - return false - } - emitter.whitespace = is_whitespace - emitter.indention = (emitter.indention && is_indention) - emitter.open_ended = false - return true -} - -func yaml_emitter_write_anchor(emitter *yaml_emitter_t, value []byte) bool { - if !write_all(emitter, value) { - return false - } - emitter.whitespace = false - emitter.indention = false - return true -} - -func yaml_emitter_write_tag_handle(emitter *yaml_emitter_t, value []byte) bool { - if !emitter.whitespace { - if !put(emitter, ' ') { - return false - } - } - if !write_all(emitter, value) { - return false - } - emitter.whitespace = false - emitter.indention = false - return true -} - -func yaml_emitter_write_tag_content(emitter *yaml_emitter_t, value []byte, need_whitespace bool) bool { - if need_whitespace && !emitter.whitespace { - if !put(emitter, ' ') { - return false - } - } - for i := 0; i < len(value); { - var must_write bool - switch value[i] { - case ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '_', '.', '~', '*', '\'', '(', ')', '[', ']': - must_write = true - default: - must_write = is_alpha(value, i) - } - if must_write { - if !write(emitter, value, &i) { - return false - } - } else { - w := width(value[i]) - for k := 0; k < w; k++ { - octet := value[i] - i++ - if !put(emitter, '%') { - return false - } - - c := octet >> 4 - if c < 10 { - c += '0' - } else { - c += 'A' - 10 - } - if !put(emitter, c) { - return false - } - - c = octet & 0x0f - if c < 10 { - c += '0' - } else { - c += 'A' - 10 - } - if !put(emitter, c) { - return false - } - } - } - } - emitter.whitespace = false - emitter.indention = false - return true -} - -func yaml_emitter_write_plain_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { - if !emitter.whitespace { - if !put(emitter, ' ') { - return false - } - } - - spaces := false - breaks := false - for i := 0; i < len(value); { - if is_space(value, i) { - if allow_breaks && !spaces && emitter.column > emitter.best_width && !is_space(value, i+1) { - if !yaml_emitter_write_indent(emitter) { - return false - } - i += width(value[i]) - } else { - if !write(emitter, value, &i) { - return false - } - } - spaces = true - } else if is_break(value, i) { - if !breaks && value[i] == '\n' { - if !put_break(emitter) { - return false - } - } - if !write_break(emitter, value, &i) { - return false - } - emitter.indention = true - breaks = true - } else { - if breaks { - if !yaml_emitter_write_indent(emitter) { - return false - } - } - if !write(emitter, value, &i) { - return false - } - emitter.indention = false - spaces = false - breaks = false - } - } - - emitter.whitespace = false - emitter.indention = false - if emitter.root_context { - emitter.open_ended = true - } - - return true -} - -func yaml_emitter_write_single_quoted_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { - - if !yaml_emitter_write_indicator(emitter, []byte{'\''}, true, false, false) { - return false - } - - spaces := false - breaks := false - for i := 0; i < len(value); { - if is_space(value, i) { - if allow_breaks && !spaces && emitter.column > emitter.best_width && i > 0 && i < len(value)-1 && !is_space(value, i+1) { - if !yaml_emitter_write_indent(emitter) { - return false - } - i += width(value[i]) - } else { - if !write(emitter, value, &i) { - return false - } - } - spaces = true - } else if is_break(value, i) { - if !breaks && value[i] == '\n' { - if !put_break(emitter) { - return false - } - } - if !write_break(emitter, value, &i) { - return false - } - emitter.indention = true - breaks = true - } else { - if breaks { - if !yaml_emitter_write_indent(emitter) { - return false - } - } - if value[i] == '\'' { - if !put(emitter, '\'') { - return false - } - } - if !write(emitter, value, &i) { - return false - } - emitter.indention = false - spaces = false - breaks = false - } - } - if !yaml_emitter_write_indicator(emitter, []byte{'\''}, false, false, false) { - return false - } - emitter.whitespace = false - emitter.indention = false - return true -} - -func yaml_emitter_write_double_quoted_scalar(emitter *yaml_emitter_t, value []byte, allow_breaks bool) bool { - spaces := false - if !yaml_emitter_write_indicator(emitter, []byte{'"'}, true, false, false) { - return false - } - - for i := 0; i < len(value); { - if !is_printable(value, i) || (!emitter.unicode && !is_ascii(value, i)) || - is_bom(value, i) || is_break(value, i) || - value[i] == '"' || value[i] == '\\' { - - octet := value[i] - - var w int - var v rune - switch { - case octet&0x80 == 0x00: - w, v = 1, rune(octet&0x7F) - case octet&0xE0 == 0xC0: - w, v = 2, rune(octet&0x1F) - case octet&0xF0 == 0xE0: - w, v = 3, rune(octet&0x0F) - case octet&0xF8 == 0xF0: - w, v = 4, rune(octet&0x07) - } - for k := 1; k < w; k++ { - octet = value[i+k] - v = (v << 6) + (rune(octet) & 0x3F) - } - i += w - - if !put(emitter, '\\') { - return false - } - - var ok bool - switch v { - case 0x00: - ok = put(emitter, '0') - case 0x07: - ok = put(emitter, 'a') - case 0x08: - ok = put(emitter, 'b') - case 0x09: - ok = put(emitter, 't') - case 0x0A: - ok = put(emitter, 'n') - case 0x0b: - ok = put(emitter, 'v') - case 0x0c: - ok = put(emitter, 'f') - case 0x0d: - ok = put(emitter, 'r') - case 0x1b: - ok = put(emitter, 'e') - case 0x22: - ok = put(emitter, '"') - case 0x5c: - ok = put(emitter, '\\') - case 0x85: - ok = put(emitter, 'N') - case 0xA0: - ok = put(emitter, '_') - case 0x2028: - ok = put(emitter, 'L') - case 0x2029: - ok = put(emitter, 'P') - default: - if v <= 0xFF { - ok = put(emitter, 'x') - w = 2 - } else if v <= 0xFFFF { - ok = put(emitter, 'u') - w = 4 - } else { - ok = put(emitter, 'U') - w = 8 - } - for k := (w - 1) * 4; ok && k >= 0; k -= 4 { - digit := byte((v >> uint(k)) & 0x0F) - if digit < 10 { - ok = put(emitter, digit+'0') - } else { - ok = put(emitter, digit+'A'-10) - } - } - } - if !ok { - return false - } - spaces = false - } else if is_space(value, i) { - if allow_breaks && !spaces && emitter.column > emitter.best_width && i > 0 && i < len(value)-1 { - if !yaml_emitter_write_indent(emitter) { - return false - } - if is_space(value, i+1) { - if !put(emitter, '\\') { - return false - } - } - i += width(value[i]) - } else if !write(emitter, value, &i) { - return false - } - spaces = true - } else { - if !write(emitter, value, &i) { - return false - } - spaces = false - } - } - if !yaml_emitter_write_indicator(emitter, []byte{'"'}, false, false, false) { - return false - } - emitter.whitespace = false - emitter.indention = false - return true -} - -func yaml_emitter_write_block_scalar_hints(emitter *yaml_emitter_t, value []byte) bool { - if is_space(value, 0) || is_break(value, 0) { - indent_hint := []byte{'0' + byte(emitter.best_indent)} - if !yaml_emitter_write_indicator(emitter, indent_hint, false, false, false) { - return false - } - } - - emitter.open_ended = false - - var chomp_hint [1]byte - if len(value) == 0 { - chomp_hint[0] = '-' - } else { - i := len(value) - 1 - for value[i]&0xC0 == 0x80 { - i-- - } - if !is_break(value, i) { - chomp_hint[0] = '-' - } else if i == 0 { - chomp_hint[0] = '+' - emitter.open_ended = true - } else { - i-- - for value[i]&0xC0 == 0x80 { - i-- - } - if is_break(value, i) { - chomp_hint[0] = '+' - emitter.open_ended = true - } - } - } - if chomp_hint[0] != 0 { - if !yaml_emitter_write_indicator(emitter, chomp_hint[:], false, false, false) { - return false - } - } - return true -} - -func yaml_emitter_write_literal_scalar(emitter *yaml_emitter_t, value []byte) bool { - if !yaml_emitter_write_indicator(emitter, []byte{'|'}, true, false, false) { - return false - } - if !yaml_emitter_write_block_scalar_hints(emitter, value) { - return false - } - if !put_break(emitter) { - return false - } - emitter.indention = true - emitter.whitespace = true - breaks := true - for i := 0; i < len(value); { - if is_break(value, i) { - if !write_break(emitter, value, &i) { - return false - } - emitter.indention = true - breaks = true - } else { - if breaks { - if !yaml_emitter_write_indent(emitter) { - return false - } - } - if !write(emitter, value, &i) { - return false - } - emitter.indention = false - breaks = false - } - } - - return true -} - -func yaml_emitter_write_folded_scalar(emitter *yaml_emitter_t, value []byte) bool { - if !yaml_emitter_write_indicator(emitter, []byte{'>'}, true, false, false) { - return false - } - if !yaml_emitter_write_block_scalar_hints(emitter, value) { - return false - } - - if !put_break(emitter) { - return false - } - emitter.indention = true - emitter.whitespace = true - - breaks := true - leading_spaces := true - for i := 0; i < len(value); { - if is_break(value, i) { - if !breaks && !leading_spaces && value[i] == '\n' { - k := 0 - for is_break(value, k) { - k += width(value[k]) - } - if !is_blankz(value, k) { - if !put_break(emitter) { - return false - } - } - } - if !write_break(emitter, value, &i) { - return false - } - emitter.indention = true - breaks = true - } else { - if breaks { - if !yaml_emitter_write_indent(emitter) { - return false - } - leading_spaces = is_blank(value, i) - } - if !breaks && is_space(value, i) && !is_space(value, i+1) && emitter.column > emitter.best_width { - if !yaml_emitter_write_indent(emitter) { - return false - } - i += width(value[i]) - } else { - if !write(emitter, value, &i) { - return false - } - } - emitter.indention = false - breaks = false - } - } - return true -} diff --git a/vendor/gopkg.in/yaml.v2/encode.go b/vendor/gopkg.in/yaml.v2/encode.go deleted file mode 100644 index 0ee738e1..00000000 --- a/vendor/gopkg.in/yaml.v2/encode.go +++ /dev/null @@ -1,390 +0,0 @@ -package yaml - -import ( - "encoding" - "fmt" - "io" - "reflect" - "regexp" - "sort" - "strconv" - "strings" - "time" - "unicode/utf8" -) - -// jsonNumber is the interface of the encoding/json.Number datatype. -// Repeating the interface here avoids a dependency on encoding/json, and also -// supports other libraries like jsoniter, which use a similar datatype with -// the same interface. Detecting this interface is useful when dealing with -// structures containing json.Number, which is a string under the hood. The -// encoder should prefer the use of Int64(), Float64() and string(), in that -// order, when encoding this type. -type jsonNumber interface { - Float64() (float64, error) - Int64() (int64, error) - String() string -} - -type encoder struct { - emitter yaml_emitter_t - event yaml_event_t - out []byte - flow bool - // doneInit holds whether the initial stream_start_event has been - // emitted. - doneInit bool -} - -func newEncoder() *encoder { - e := &encoder{} - yaml_emitter_initialize(&e.emitter) - yaml_emitter_set_output_string(&e.emitter, &e.out) - yaml_emitter_set_unicode(&e.emitter, true) - return e -} - -func newEncoderWithWriter(w io.Writer) *encoder { - e := &encoder{} - yaml_emitter_initialize(&e.emitter) - yaml_emitter_set_output_writer(&e.emitter, w) - yaml_emitter_set_unicode(&e.emitter, true) - return e -} - -func (e *encoder) init() { - if e.doneInit { - return - } - yaml_stream_start_event_initialize(&e.event, yaml_UTF8_ENCODING) - e.emit() - e.doneInit = true -} - -func (e *encoder) finish() { - e.emitter.open_ended = false - yaml_stream_end_event_initialize(&e.event) - e.emit() -} - -func (e *encoder) destroy() { - yaml_emitter_delete(&e.emitter) -} - -func (e *encoder) emit() { - // This will internally delete the e.event value. - e.must(yaml_emitter_emit(&e.emitter, &e.event)) -} - -func (e *encoder) must(ok bool) { - if !ok { - msg := e.emitter.problem - if msg == "" { - msg = "unknown problem generating YAML content" - } - failf("%s", msg) - } -} - -func (e *encoder) marshalDoc(tag string, in reflect.Value) { - e.init() - yaml_document_start_event_initialize(&e.event, nil, nil, true) - e.emit() - e.marshal(tag, in) - yaml_document_end_event_initialize(&e.event, true) - e.emit() -} - -func (e *encoder) marshal(tag string, in reflect.Value) { - if !in.IsValid() || in.Kind() == reflect.Ptr && in.IsNil() { - e.nilv() - return - } - iface := in.Interface() - switch m := iface.(type) { - case jsonNumber: - integer, err := m.Int64() - if err == nil { - // In this case the json.Number is a valid int64 - in = reflect.ValueOf(integer) - break - } - float, err := m.Float64() - if err == nil { - // In this case the json.Number is a valid float64 - in = reflect.ValueOf(float) - break - } - // fallback case - no number could be obtained - in = reflect.ValueOf(m.String()) - case time.Time, *time.Time: - // Although time.Time implements TextMarshaler, - // we don't want to treat it as a string for YAML - // purposes because YAML has special support for - // timestamps. - case Marshaler: - v, err := m.MarshalYAML() - if err != nil { - fail(err) - } - if v == nil { - e.nilv() - return - } - in = reflect.ValueOf(v) - case encoding.TextMarshaler: - text, err := m.MarshalText() - if err != nil { - fail(err) - } - in = reflect.ValueOf(string(text)) - case nil: - e.nilv() - return - } - switch in.Kind() { - case reflect.Interface: - e.marshal(tag, in.Elem()) - case reflect.Map: - e.mapv(tag, in) - case reflect.Ptr: - if in.Type() == ptrTimeType { - e.timev(tag, in.Elem()) - } else { - e.marshal(tag, in.Elem()) - } - case reflect.Struct: - if in.Type() == timeType { - e.timev(tag, in) - } else { - e.structv(tag, in) - } - case reflect.Slice, reflect.Array: - if in.Type().Elem() == mapItemType { - e.itemsv(tag, in) - } else { - e.slicev(tag, in) - } - case reflect.String: - e.stringv(tag, in) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if in.Type() == durationType { - e.stringv(tag, reflect.ValueOf(iface.(time.Duration).String())) - } else { - e.intv(tag, in) - } - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - e.uintv(tag, in) - case reflect.Float32, reflect.Float64: - e.floatv(tag, in) - case reflect.Bool: - e.boolv(tag, in) - default: - panic("cannot marshal type: " + in.Type().String()) - } -} - -func (e *encoder) mapv(tag string, in reflect.Value) { - e.mappingv(tag, func() { - keys := keyList(in.MapKeys()) - sort.Sort(keys) - for _, k := range keys { - e.marshal("", k) - e.marshal("", in.MapIndex(k)) - } - }) -} - -func (e *encoder) itemsv(tag string, in reflect.Value) { - e.mappingv(tag, func() { - slice := in.Convert(reflect.TypeOf([]MapItem{})).Interface().([]MapItem) - for _, item := range slice { - e.marshal("", reflect.ValueOf(item.Key)) - e.marshal("", reflect.ValueOf(item.Value)) - } - }) -} - -func (e *encoder) structv(tag string, in reflect.Value) { - sinfo, err := getStructInfo(in.Type()) - if err != nil { - panic(err) - } - e.mappingv(tag, func() { - for _, info := range sinfo.FieldsList { - var value reflect.Value - if info.Inline == nil { - value = in.Field(info.Num) - } else { - value = in.FieldByIndex(info.Inline) - } - if info.OmitEmpty && isZero(value) { - continue - } - e.marshal("", reflect.ValueOf(info.Key)) - e.flow = info.Flow - e.marshal("", value) - } - if sinfo.InlineMap >= 0 { - m := in.Field(sinfo.InlineMap) - if m.Len() > 0 { - e.flow = false - keys := keyList(m.MapKeys()) - sort.Sort(keys) - for _, k := range keys { - if _, found := sinfo.FieldsMap[k.String()]; found { - panic(fmt.Sprintf("Can't have key %q in inlined map; conflicts with struct field", k.String())) - } - e.marshal("", k) - e.flow = false - e.marshal("", m.MapIndex(k)) - } - } - } - }) -} - -func (e *encoder) mappingv(tag string, f func()) { - implicit := tag == "" - style := yaml_BLOCK_MAPPING_STYLE - if e.flow { - e.flow = false - style = yaml_FLOW_MAPPING_STYLE - } - yaml_mapping_start_event_initialize(&e.event, nil, []byte(tag), implicit, style) - e.emit() - f() - yaml_mapping_end_event_initialize(&e.event) - e.emit() -} - -func (e *encoder) slicev(tag string, in reflect.Value) { - implicit := tag == "" - style := yaml_BLOCK_SEQUENCE_STYLE - if e.flow { - e.flow = false - style = yaml_FLOW_SEQUENCE_STYLE - } - e.must(yaml_sequence_start_event_initialize(&e.event, nil, []byte(tag), implicit, style)) - e.emit() - n := in.Len() - for i := 0; i < n; i++ { - e.marshal("", in.Index(i)) - } - e.must(yaml_sequence_end_event_initialize(&e.event)) - e.emit() -} - -// isBase60 returns whether s is in base 60 notation as defined in YAML 1.1. -// -// The base 60 float notation in YAML 1.1 is a terrible idea and is unsupported -// in YAML 1.2 and by this package, but these should be marshalled quoted for -// the time being for compatibility with other parsers. -func isBase60Float(s string) (result bool) { - // Fast path. - if s == "" { - return false - } - c := s[0] - if !(c == '+' || c == '-' || c >= '0' && c <= '9') || strings.IndexByte(s, ':') < 0 { - return false - } - // Do the full match. - return base60float.MatchString(s) -} - -// From http://yaml.org/type/float.html, except the regular expression there -// is bogus. In practice parsers do not enforce the "\.[0-9_]*" suffix. -var base60float = regexp.MustCompile(`^[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+(?:\.[0-9_]*)?$`) - -func (e *encoder) stringv(tag string, in reflect.Value) { - var style yaml_scalar_style_t - s := in.String() - canUsePlain := true - switch { - case !utf8.ValidString(s): - if tag == yaml_BINARY_TAG { - failf("explicitly tagged !!binary data must be base64-encoded") - } - if tag != "" { - failf("cannot marshal invalid UTF-8 data as %s", shortTag(tag)) - } - // It can't be encoded directly as YAML so use a binary tag - // and encode it as base64. - tag = yaml_BINARY_TAG - s = encodeBase64(s) - case tag == "": - // Check to see if it would resolve to a specific - // tag when encoded unquoted. If it doesn't, - // there's no need to quote it. - rtag, _ := resolve("", s) - canUsePlain = rtag == yaml_STR_TAG && !isBase60Float(s) - } - // Note: it's possible for user code to emit invalid YAML - // if they explicitly specify a tag and a string containing - // text that's incompatible with that tag. - switch { - case strings.Contains(s, "\n"): - style = yaml_LITERAL_SCALAR_STYLE - case canUsePlain: - style = yaml_PLAIN_SCALAR_STYLE - default: - style = yaml_DOUBLE_QUOTED_SCALAR_STYLE - } - e.emitScalar(s, "", tag, style) -} - -func (e *encoder) boolv(tag string, in reflect.Value) { - var s string - if in.Bool() { - s = "true" - } else { - s = "false" - } - e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) -} - -func (e *encoder) intv(tag string, in reflect.Value) { - s := strconv.FormatInt(in.Int(), 10) - e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) -} - -func (e *encoder) uintv(tag string, in reflect.Value) { - s := strconv.FormatUint(in.Uint(), 10) - e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) -} - -func (e *encoder) timev(tag string, in reflect.Value) { - t := in.Interface().(time.Time) - s := t.Format(time.RFC3339Nano) - e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) -} - -func (e *encoder) floatv(tag string, in reflect.Value) { - // Issue #352: When formatting, use the precision of the underlying value - precision := 64 - if in.Kind() == reflect.Float32 { - precision = 32 - } - - s := strconv.FormatFloat(in.Float(), 'g', -1, precision) - switch s { - case "+Inf": - s = ".inf" - case "-Inf": - s = "-.inf" - case "NaN": - s = ".nan" - } - e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) -} - -func (e *encoder) nilv() { - e.emitScalar("null", "", "", yaml_PLAIN_SCALAR_STYLE) -} - -func (e *encoder) emitScalar(value, anchor, tag string, style yaml_scalar_style_t) { - implicit := tag == "" - e.must(yaml_scalar_event_initialize(&e.event, []byte(anchor), []byte(tag), []byte(value), implicit, implicit, style)) - e.emit() -} diff --git a/vendor/gopkg.in/yaml.v2/parserc.go b/vendor/gopkg.in/yaml.v2/parserc.go deleted file mode 100644 index 81d05dfe..00000000 --- a/vendor/gopkg.in/yaml.v2/parserc.go +++ /dev/null @@ -1,1095 +0,0 @@ -package yaml - -import ( - "bytes" -) - -// The parser implements the following grammar: -// -// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END -// implicit_document ::= block_node DOCUMENT-END* -// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -// block_node_or_indentless_sequence ::= -// ALIAS -// | properties (block_content | indentless_block_sequence)? -// | block_content -// | indentless_block_sequence -// block_node ::= ALIAS -// | properties block_content? -// | block_content -// flow_node ::= ALIAS -// | properties flow_content? -// | flow_content -// properties ::= TAG ANCHOR? | ANCHOR TAG? -// block_content ::= block_collection | flow_collection | SCALAR -// flow_content ::= flow_collection | SCALAR -// block_collection ::= block_sequence | block_mapping -// flow_collection ::= flow_sequence | flow_mapping -// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END -// indentless_sequence ::= (BLOCK-ENTRY block_node?)+ -// block_mapping ::= BLOCK-MAPPING_START -// ((KEY block_node_or_indentless_sequence?)? -// (VALUE block_node_or_indentless_sequence?)?)* -// BLOCK-END -// flow_sequence ::= FLOW-SEQUENCE-START -// (flow_sequence_entry FLOW-ENTRY)* -// flow_sequence_entry? -// FLOW-SEQUENCE-END -// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -// flow_mapping ::= FLOW-MAPPING-START -// (flow_mapping_entry FLOW-ENTRY)* -// flow_mapping_entry? -// FLOW-MAPPING-END -// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - -// Peek the next token in the token queue. -func peek_token(parser *yaml_parser_t) *yaml_token_t { - if parser.token_available || yaml_parser_fetch_more_tokens(parser) { - return &parser.tokens[parser.tokens_head] - } - return nil -} - -// Remove the next token from the queue (must be called after peek_token). -func skip_token(parser *yaml_parser_t) { - parser.token_available = false - parser.tokens_parsed++ - parser.stream_end_produced = parser.tokens[parser.tokens_head].typ == yaml_STREAM_END_TOKEN - parser.tokens_head++ -} - -// Get the next event. -func yaml_parser_parse(parser *yaml_parser_t, event *yaml_event_t) bool { - // Erase the event object. - *event = yaml_event_t{} - - // No events after the end of the stream or error. - if parser.stream_end_produced || parser.error != yaml_NO_ERROR || parser.state == yaml_PARSE_END_STATE { - return true - } - - // Generate the next event. - return yaml_parser_state_machine(parser, event) -} - -// Set parser error. -func yaml_parser_set_parser_error(parser *yaml_parser_t, problem string, problem_mark yaml_mark_t) bool { - parser.error = yaml_PARSER_ERROR - parser.problem = problem - parser.problem_mark = problem_mark - return false -} - -func yaml_parser_set_parser_error_context(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string, problem_mark yaml_mark_t) bool { - parser.error = yaml_PARSER_ERROR - parser.context = context - parser.context_mark = context_mark - parser.problem = problem - parser.problem_mark = problem_mark - return false -} - -// State dispatcher. -func yaml_parser_state_machine(parser *yaml_parser_t, event *yaml_event_t) bool { - //trace("yaml_parser_state_machine", "state:", parser.state.String()) - - switch parser.state { - case yaml_PARSE_STREAM_START_STATE: - return yaml_parser_parse_stream_start(parser, event) - - case yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE: - return yaml_parser_parse_document_start(parser, event, true) - - case yaml_PARSE_DOCUMENT_START_STATE: - return yaml_parser_parse_document_start(parser, event, false) - - case yaml_PARSE_DOCUMENT_CONTENT_STATE: - return yaml_parser_parse_document_content(parser, event) - - case yaml_PARSE_DOCUMENT_END_STATE: - return yaml_parser_parse_document_end(parser, event) - - case yaml_PARSE_BLOCK_NODE_STATE: - return yaml_parser_parse_node(parser, event, true, false) - - case yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: - return yaml_parser_parse_node(parser, event, true, true) - - case yaml_PARSE_FLOW_NODE_STATE: - return yaml_parser_parse_node(parser, event, false, false) - - case yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: - return yaml_parser_parse_block_sequence_entry(parser, event, true) - - case yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: - return yaml_parser_parse_block_sequence_entry(parser, event, false) - - case yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: - return yaml_parser_parse_indentless_sequence_entry(parser, event) - - case yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: - return yaml_parser_parse_block_mapping_key(parser, event, true) - - case yaml_PARSE_BLOCK_MAPPING_KEY_STATE: - return yaml_parser_parse_block_mapping_key(parser, event, false) - - case yaml_PARSE_BLOCK_MAPPING_VALUE_STATE: - return yaml_parser_parse_block_mapping_value(parser, event) - - case yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: - return yaml_parser_parse_flow_sequence_entry(parser, event, true) - - case yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE: - return yaml_parser_parse_flow_sequence_entry(parser, event, false) - - case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: - return yaml_parser_parse_flow_sequence_entry_mapping_key(parser, event) - - case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: - return yaml_parser_parse_flow_sequence_entry_mapping_value(parser, event) - - case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: - return yaml_parser_parse_flow_sequence_entry_mapping_end(parser, event) - - case yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: - return yaml_parser_parse_flow_mapping_key(parser, event, true) - - case yaml_PARSE_FLOW_MAPPING_KEY_STATE: - return yaml_parser_parse_flow_mapping_key(parser, event, false) - - case yaml_PARSE_FLOW_MAPPING_VALUE_STATE: - return yaml_parser_parse_flow_mapping_value(parser, event, false) - - case yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: - return yaml_parser_parse_flow_mapping_value(parser, event, true) - - default: - panic("invalid parser state") - } -} - -// Parse the production: -// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END -// ************ -func yaml_parser_parse_stream_start(parser *yaml_parser_t, event *yaml_event_t) bool { - token := peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_STREAM_START_TOKEN { - return yaml_parser_set_parser_error(parser, "did not find expected ", token.start_mark) - } - parser.state = yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE - *event = yaml_event_t{ - typ: yaml_STREAM_START_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - encoding: token.encoding, - } - skip_token(parser) - return true -} - -// Parse the productions: -// implicit_document ::= block_node DOCUMENT-END* -// * -// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -// ************************* -func yaml_parser_parse_document_start(parser *yaml_parser_t, event *yaml_event_t, implicit bool) bool { - - token := peek_token(parser) - if token == nil { - return false - } - - // Parse extra document end indicators. - if !implicit { - for token.typ == yaml_DOCUMENT_END_TOKEN { - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - } - } - - if implicit && token.typ != yaml_VERSION_DIRECTIVE_TOKEN && - token.typ != yaml_TAG_DIRECTIVE_TOKEN && - token.typ != yaml_DOCUMENT_START_TOKEN && - token.typ != yaml_STREAM_END_TOKEN { - // Parse an implicit document. - if !yaml_parser_process_directives(parser, nil, nil) { - return false - } - parser.states = append(parser.states, yaml_PARSE_DOCUMENT_END_STATE) - parser.state = yaml_PARSE_BLOCK_NODE_STATE - - *event = yaml_event_t{ - typ: yaml_DOCUMENT_START_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - } - - } else if token.typ != yaml_STREAM_END_TOKEN { - // Parse an explicit document. - var version_directive *yaml_version_directive_t - var tag_directives []yaml_tag_directive_t - start_mark := token.start_mark - if !yaml_parser_process_directives(parser, &version_directive, &tag_directives) { - return false - } - token = peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_DOCUMENT_START_TOKEN { - yaml_parser_set_parser_error(parser, - "did not find expected ", token.start_mark) - return false - } - parser.states = append(parser.states, yaml_PARSE_DOCUMENT_END_STATE) - parser.state = yaml_PARSE_DOCUMENT_CONTENT_STATE - end_mark := token.end_mark - - *event = yaml_event_t{ - typ: yaml_DOCUMENT_START_EVENT, - start_mark: start_mark, - end_mark: end_mark, - version_directive: version_directive, - tag_directives: tag_directives, - implicit: false, - } - skip_token(parser) - - } else { - // Parse the stream end. - parser.state = yaml_PARSE_END_STATE - *event = yaml_event_t{ - typ: yaml_STREAM_END_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - } - skip_token(parser) - } - - return true -} - -// Parse the productions: -// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -// *********** -// -func yaml_parser_parse_document_content(parser *yaml_parser_t, event *yaml_event_t) bool { - token := peek_token(parser) - if token == nil { - return false - } - if token.typ == yaml_VERSION_DIRECTIVE_TOKEN || - token.typ == yaml_TAG_DIRECTIVE_TOKEN || - token.typ == yaml_DOCUMENT_START_TOKEN || - token.typ == yaml_DOCUMENT_END_TOKEN || - token.typ == yaml_STREAM_END_TOKEN { - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - return yaml_parser_process_empty_scalar(parser, event, - token.start_mark) - } - return yaml_parser_parse_node(parser, event, true, false) -} - -// Parse the productions: -// implicit_document ::= block_node DOCUMENT-END* -// ************* -// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -// -func yaml_parser_parse_document_end(parser *yaml_parser_t, event *yaml_event_t) bool { - token := peek_token(parser) - if token == nil { - return false - } - - start_mark := token.start_mark - end_mark := token.start_mark - - implicit := true - if token.typ == yaml_DOCUMENT_END_TOKEN { - end_mark = token.end_mark - skip_token(parser) - implicit = false - } - - parser.tag_directives = parser.tag_directives[:0] - - parser.state = yaml_PARSE_DOCUMENT_START_STATE - *event = yaml_event_t{ - typ: yaml_DOCUMENT_END_EVENT, - start_mark: start_mark, - end_mark: end_mark, - implicit: implicit, - } - return true -} - -// Parse the productions: -// block_node_or_indentless_sequence ::= -// ALIAS -// ***** -// | properties (block_content | indentless_block_sequence)? -// ********** * -// | block_content | indentless_block_sequence -// * -// block_node ::= ALIAS -// ***** -// | properties block_content? -// ********** * -// | block_content -// * -// flow_node ::= ALIAS -// ***** -// | properties flow_content? -// ********** * -// | flow_content -// * -// properties ::= TAG ANCHOR? | ANCHOR TAG? -// ************************* -// block_content ::= block_collection | flow_collection | SCALAR -// ****** -// flow_content ::= flow_collection | SCALAR -// ****** -func yaml_parser_parse_node(parser *yaml_parser_t, event *yaml_event_t, block, indentless_sequence bool) bool { - //defer trace("yaml_parser_parse_node", "block:", block, "indentless_sequence:", indentless_sequence)() - - token := peek_token(parser) - if token == nil { - return false - } - - if token.typ == yaml_ALIAS_TOKEN { - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - *event = yaml_event_t{ - typ: yaml_ALIAS_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - anchor: token.value, - } - skip_token(parser) - return true - } - - start_mark := token.start_mark - end_mark := token.start_mark - - var tag_token bool - var tag_handle, tag_suffix, anchor []byte - var tag_mark yaml_mark_t - if token.typ == yaml_ANCHOR_TOKEN { - anchor = token.value - start_mark = token.start_mark - end_mark = token.end_mark - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - if token.typ == yaml_TAG_TOKEN { - tag_token = true - tag_handle = token.value - tag_suffix = token.suffix - tag_mark = token.start_mark - end_mark = token.end_mark - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - } - } else if token.typ == yaml_TAG_TOKEN { - tag_token = true - tag_handle = token.value - tag_suffix = token.suffix - start_mark = token.start_mark - tag_mark = token.start_mark - end_mark = token.end_mark - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - if token.typ == yaml_ANCHOR_TOKEN { - anchor = token.value - end_mark = token.end_mark - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - } - } - - var tag []byte - if tag_token { - if len(tag_handle) == 0 { - tag = tag_suffix - tag_suffix = nil - } else { - for i := range parser.tag_directives { - if bytes.Equal(parser.tag_directives[i].handle, tag_handle) { - tag = append([]byte(nil), parser.tag_directives[i].prefix...) - tag = append(tag, tag_suffix...) - break - } - } - if len(tag) == 0 { - yaml_parser_set_parser_error_context(parser, - "while parsing a node", start_mark, - "found undefined tag handle", tag_mark) - return false - } - } - } - - implicit := len(tag) == 0 - if indentless_sequence && token.typ == yaml_BLOCK_ENTRY_TOKEN { - end_mark = token.end_mark - parser.state = yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE - *event = yaml_event_t{ - typ: yaml_SEQUENCE_START_EVENT, - start_mark: start_mark, - end_mark: end_mark, - anchor: anchor, - tag: tag, - implicit: implicit, - style: yaml_style_t(yaml_BLOCK_SEQUENCE_STYLE), - } - return true - } - if token.typ == yaml_SCALAR_TOKEN { - var plain_implicit, quoted_implicit bool - end_mark = token.end_mark - if (len(tag) == 0 && token.style == yaml_PLAIN_SCALAR_STYLE) || (len(tag) == 1 && tag[0] == '!') { - plain_implicit = true - } else if len(tag) == 0 { - quoted_implicit = true - } - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - - *event = yaml_event_t{ - typ: yaml_SCALAR_EVENT, - start_mark: start_mark, - end_mark: end_mark, - anchor: anchor, - tag: tag, - value: token.value, - implicit: plain_implicit, - quoted_implicit: quoted_implicit, - style: yaml_style_t(token.style), - } - skip_token(parser) - return true - } - if token.typ == yaml_FLOW_SEQUENCE_START_TOKEN { - // [Go] Some of the events below can be merged as they differ only on style. - end_mark = token.end_mark - parser.state = yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE - *event = yaml_event_t{ - typ: yaml_SEQUENCE_START_EVENT, - start_mark: start_mark, - end_mark: end_mark, - anchor: anchor, - tag: tag, - implicit: implicit, - style: yaml_style_t(yaml_FLOW_SEQUENCE_STYLE), - } - return true - } - if token.typ == yaml_FLOW_MAPPING_START_TOKEN { - end_mark = token.end_mark - parser.state = yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE - *event = yaml_event_t{ - typ: yaml_MAPPING_START_EVENT, - start_mark: start_mark, - end_mark: end_mark, - anchor: anchor, - tag: tag, - implicit: implicit, - style: yaml_style_t(yaml_FLOW_MAPPING_STYLE), - } - return true - } - if block && token.typ == yaml_BLOCK_SEQUENCE_START_TOKEN { - end_mark = token.end_mark - parser.state = yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE - *event = yaml_event_t{ - typ: yaml_SEQUENCE_START_EVENT, - start_mark: start_mark, - end_mark: end_mark, - anchor: anchor, - tag: tag, - implicit: implicit, - style: yaml_style_t(yaml_BLOCK_SEQUENCE_STYLE), - } - return true - } - if block && token.typ == yaml_BLOCK_MAPPING_START_TOKEN { - end_mark = token.end_mark - parser.state = yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE - *event = yaml_event_t{ - typ: yaml_MAPPING_START_EVENT, - start_mark: start_mark, - end_mark: end_mark, - anchor: anchor, - tag: tag, - implicit: implicit, - style: yaml_style_t(yaml_BLOCK_MAPPING_STYLE), - } - return true - } - if len(anchor) > 0 || len(tag) > 0 { - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - - *event = yaml_event_t{ - typ: yaml_SCALAR_EVENT, - start_mark: start_mark, - end_mark: end_mark, - anchor: anchor, - tag: tag, - implicit: implicit, - quoted_implicit: false, - style: yaml_style_t(yaml_PLAIN_SCALAR_STYLE), - } - return true - } - - context := "while parsing a flow node" - if block { - context = "while parsing a block node" - } - yaml_parser_set_parser_error_context(parser, context, start_mark, - "did not find expected node content", token.start_mark) - return false -} - -// Parse the productions: -// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END -// ******************** *********** * ********* -// -func yaml_parser_parse_block_sequence_entry(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { - if first { - token := peek_token(parser) - parser.marks = append(parser.marks, token.start_mark) - skip_token(parser) - } - - token := peek_token(parser) - if token == nil { - return false - } - - if token.typ == yaml_BLOCK_ENTRY_TOKEN { - mark := token.end_mark - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_BLOCK_ENTRY_TOKEN && token.typ != yaml_BLOCK_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE) - return yaml_parser_parse_node(parser, event, true, false) - } else { - parser.state = yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE - return yaml_parser_process_empty_scalar(parser, event, mark) - } - } - if token.typ == yaml_BLOCK_END_TOKEN { - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - parser.marks = parser.marks[:len(parser.marks)-1] - - *event = yaml_event_t{ - typ: yaml_SEQUENCE_END_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - } - - skip_token(parser) - return true - } - - context_mark := parser.marks[len(parser.marks)-1] - parser.marks = parser.marks[:len(parser.marks)-1] - return yaml_parser_set_parser_error_context(parser, - "while parsing a block collection", context_mark, - "did not find expected '-' indicator", token.start_mark) -} - -// Parse the productions: -// indentless_sequence ::= (BLOCK-ENTRY block_node?)+ -// *********** * -func yaml_parser_parse_indentless_sequence_entry(parser *yaml_parser_t, event *yaml_event_t) bool { - token := peek_token(parser) - if token == nil { - return false - } - - if token.typ == yaml_BLOCK_ENTRY_TOKEN { - mark := token.end_mark - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_BLOCK_ENTRY_TOKEN && - token.typ != yaml_KEY_TOKEN && - token.typ != yaml_VALUE_TOKEN && - token.typ != yaml_BLOCK_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE) - return yaml_parser_parse_node(parser, event, true, false) - } - parser.state = yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE - return yaml_parser_process_empty_scalar(parser, event, mark) - } - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - - *event = yaml_event_t{ - typ: yaml_SEQUENCE_END_EVENT, - start_mark: token.start_mark, - end_mark: token.start_mark, // [Go] Shouldn't this be token.end_mark? - } - return true -} - -// Parse the productions: -// block_mapping ::= BLOCK-MAPPING_START -// ******************* -// ((KEY block_node_or_indentless_sequence?)? -// *** * -// (VALUE block_node_or_indentless_sequence?)?)* -// -// BLOCK-END -// ********* -// -func yaml_parser_parse_block_mapping_key(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { - if first { - token := peek_token(parser) - parser.marks = append(parser.marks, token.start_mark) - skip_token(parser) - } - - token := peek_token(parser) - if token == nil { - return false - } - - if token.typ == yaml_KEY_TOKEN { - mark := token.end_mark - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_KEY_TOKEN && - token.typ != yaml_VALUE_TOKEN && - token.typ != yaml_BLOCK_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_BLOCK_MAPPING_VALUE_STATE) - return yaml_parser_parse_node(parser, event, true, true) - } else { - parser.state = yaml_PARSE_BLOCK_MAPPING_VALUE_STATE - return yaml_parser_process_empty_scalar(parser, event, mark) - } - } else if token.typ == yaml_BLOCK_END_TOKEN { - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - parser.marks = parser.marks[:len(parser.marks)-1] - *event = yaml_event_t{ - typ: yaml_MAPPING_END_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - } - skip_token(parser) - return true - } - - context_mark := parser.marks[len(parser.marks)-1] - parser.marks = parser.marks[:len(parser.marks)-1] - return yaml_parser_set_parser_error_context(parser, - "while parsing a block mapping", context_mark, - "did not find expected key", token.start_mark) -} - -// Parse the productions: -// block_mapping ::= BLOCK-MAPPING_START -// -// ((KEY block_node_or_indentless_sequence?)? -// -// (VALUE block_node_or_indentless_sequence?)?)* -// ***** * -// BLOCK-END -// -// -func yaml_parser_parse_block_mapping_value(parser *yaml_parser_t, event *yaml_event_t) bool { - token := peek_token(parser) - if token == nil { - return false - } - if token.typ == yaml_VALUE_TOKEN { - mark := token.end_mark - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_KEY_TOKEN && - token.typ != yaml_VALUE_TOKEN && - token.typ != yaml_BLOCK_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_BLOCK_MAPPING_KEY_STATE) - return yaml_parser_parse_node(parser, event, true, true) - } - parser.state = yaml_PARSE_BLOCK_MAPPING_KEY_STATE - return yaml_parser_process_empty_scalar(parser, event, mark) - } - parser.state = yaml_PARSE_BLOCK_MAPPING_KEY_STATE - return yaml_parser_process_empty_scalar(parser, event, token.start_mark) -} - -// Parse the productions: -// flow_sequence ::= FLOW-SEQUENCE-START -// ******************* -// (flow_sequence_entry FLOW-ENTRY)* -// * ********** -// flow_sequence_entry? -// * -// FLOW-SEQUENCE-END -// ***************** -// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -// * -// -func yaml_parser_parse_flow_sequence_entry(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { - if first { - token := peek_token(parser) - parser.marks = append(parser.marks, token.start_mark) - skip_token(parser) - } - token := peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { - if !first { - if token.typ == yaml_FLOW_ENTRY_TOKEN { - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - } else { - context_mark := parser.marks[len(parser.marks)-1] - parser.marks = parser.marks[:len(parser.marks)-1] - return yaml_parser_set_parser_error_context(parser, - "while parsing a flow sequence", context_mark, - "did not find expected ',' or ']'", token.start_mark) - } - } - - if token.typ == yaml_KEY_TOKEN { - parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE - *event = yaml_event_t{ - typ: yaml_MAPPING_START_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - implicit: true, - style: yaml_style_t(yaml_FLOW_MAPPING_STYLE), - } - skip_token(parser) - return true - } else if token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE) - return yaml_parser_parse_node(parser, event, false, false) - } - } - - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - parser.marks = parser.marks[:len(parser.marks)-1] - - *event = yaml_event_t{ - typ: yaml_SEQUENCE_END_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - } - - skip_token(parser) - return true -} - -// -// Parse the productions: -// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -// *** * -// -func yaml_parser_parse_flow_sequence_entry_mapping_key(parser *yaml_parser_t, event *yaml_event_t) bool { - token := peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_VALUE_TOKEN && - token.typ != yaml_FLOW_ENTRY_TOKEN && - token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE) - return yaml_parser_parse_node(parser, event, false, false) - } - mark := token.end_mark - skip_token(parser) - parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE - return yaml_parser_process_empty_scalar(parser, event, mark) -} - -// Parse the productions: -// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -// ***** * -// -func yaml_parser_parse_flow_sequence_entry_mapping_value(parser *yaml_parser_t, event *yaml_event_t) bool { - token := peek_token(parser) - if token == nil { - return false - } - if token.typ == yaml_VALUE_TOKEN { - skip_token(parser) - token := peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_FLOW_ENTRY_TOKEN && token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE) - return yaml_parser_parse_node(parser, event, false, false) - } - } - parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE - return yaml_parser_process_empty_scalar(parser, event, token.start_mark) -} - -// Parse the productions: -// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -// * -// -func yaml_parser_parse_flow_sequence_entry_mapping_end(parser *yaml_parser_t, event *yaml_event_t) bool { - token := peek_token(parser) - if token == nil { - return false - } - parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE - *event = yaml_event_t{ - typ: yaml_MAPPING_END_EVENT, - start_mark: token.start_mark, - end_mark: token.start_mark, // [Go] Shouldn't this be end_mark? - } - return true -} - -// Parse the productions: -// flow_mapping ::= FLOW-MAPPING-START -// ****************** -// (flow_mapping_entry FLOW-ENTRY)* -// * ********** -// flow_mapping_entry? -// ****************** -// FLOW-MAPPING-END -// **************** -// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -// * *** * -// -func yaml_parser_parse_flow_mapping_key(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { - if first { - token := peek_token(parser) - parser.marks = append(parser.marks, token.start_mark) - skip_token(parser) - } - - token := peek_token(parser) - if token == nil { - return false - } - - if token.typ != yaml_FLOW_MAPPING_END_TOKEN { - if !first { - if token.typ == yaml_FLOW_ENTRY_TOKEN { - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - } else { - context_mark := parser.marks[len(parser.marks)-1] - parser.marks = parser.marks[:len(parser.marks)-1] - return yaml_parser_set_parser_error_context(parser, - "while parsing a flow mapping", context_mark, - "did not find expected ',' or '}'", token.start_mark) - } - } - - if token.typ == yaml_KEY_TOKEN { - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_VALUE_TOKEN && - token.typ != yaml_FLOW_ENTRY_TOKEN && - token.typ != yaml_FLOW_MAPPING_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_VALUE_STATE) - return yaml_parser_parse_node(parser, event, false, false) - } else { - parser.state = yaml_PARSE_FLOW_MAPPING_VALUE_STATE - return yaml_parser_process_empty_scalar(parser, event, token.start_mark) - } - } else if token.typ != yaml_FLOW_MAPPING_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE) - return yaml_parser_parse_node(parser, event, false, false) - } - } - - parser.state = parser.states[len(parser.states)-1] - parser.states = parser.states[:len(parser.states)-1] - parser.marks = parser.marks[:len(parser.marks)-1] - *event = yaml_event_t{ - typ: yaml_MAPPING_END_EVENT, - start_mark: token.start_mark, - end_mark: token.end_mark, - } - skip_token(parser) - return true -} - -// Parse the productions: -// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -// * ***** * -// -func yaml_parser_parse_flow_mapping_value(parser *yaml_parser_t, event *yaml_event_t, empty bool) bool { - token := peek_token(parser) - if token == nil { - return false - } - if empty { - parser.state = yaml_PARSE_FLOW_MAPPING_KEY_STATE - return yaml_parser_process_empty_scalar(parser, event, token.start_mark) - } - if token.typ == yaml_VALUE_TOKEN { - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - if token.typ != yaml_FLOW_ENTRY_TOKEN && token.typ != yaml_FLOW_MAPPING_END_TOKEN { - parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_KEY_STATE) - return yaml_parser_parse_node(parser, event, false, false) - } - } - parser.state = yaml_PARSE_FLOW_MAPPING_KEY_STATE - return yaml_parser_process_empty_scalar(parser, event, token.start_mark) -} - -// Generate an empty scalar event. -func yaml_parser_process_empty_scalar(parser *yaml_parser_t, event *yaml_event_t, mark yaml_mark_t) bool { - *event = yaml_event_t{ - typ: yaml_SCALAR_EVENT, - start_mark: mark, - end_mark: mark, - value: nil, // Empty - implicit: true, - style: yaml_style_t(yaml_PLAIN_SCALAR_STYLE), - } - return true -} - -var default_tag_directives = []yaml_tag_directive_t{ - {[]byte("!"), []byte("!")}, - {[]byte("!!"), []byte("tag:yaml.org,2002:")}, -} - -// Parse directives. -func yaml_parser_process_directives(parser *yaml_parser_t, - version_directive_ref **yaml_version_directive_t, - tag_directives_ref *[]yaml_tag_directive_t) bool { - - var version_directive *yaml_version_directive_t - var tag_directives []yaml_tag_directive_t - - token := peek_token(parser) - if token == nil { - return false - } - - for token.typ == yaml_VERSION_DIRECTIVE_TOKEN || token.typ == yaml_TAG_DIRECTIVE_TOKEN { - if token.typ == yaml_VERSION_DIRECTIVE_TOKEN { - if version_directive != nil { - yaml_parser_set_parser_error(parser, - "found duplicate %YAML directive", token.start_mark) - return false - } - if token.major != 1 || token.minor != 1 { - yaml_parser_set_parser_error(parser, - "found incompatible YAML document", token.start_mark) - return false - } - version_directive = &yaml_version_directive_t{ - major: token.major, - minor: token.minor, - } - } else if token.typ == yaml_TAG_DIRECTIVE_TOKEN { - value := yaml_tag_directive_t{ - handle: token.value, - prefix: token.prefix, - } - if !yaml_parser_append_tag_directive(parser, value, false, token.start_mark) { - return false - } - tag_directives = append(tag_directives, value) - } - - skip_token(parser) - token = peek_token(parser) - if token == nil { - return false - } - } - - for i := range default_tag_directives { - if !yaml_parser_append_tag_directive(parser, default_tag_directives[i], true, token.start_mark) { - return false - } - } - - if version_directive_ref != nil { - *version_directive_ref = version_directive - } - if tag_directives_ref != nil { - *tag_directives_ref = tag_directives - } - return true -} - -// Append a tag directive to the directives stack. -func yaml_parser_append_tag_directive(parser *yaml_parser_t, value yaml_tag_directive_t, allow_duplicates bool, mark yaml_mark_t) bool { - for i := range parser.tag_directives { - if bytes.Equal(value.handle, parser.tag_directives[i].handle) { - if allow_duplicates { - return true - } - return yaml_parser_set_parser_error(parser, "found duplicate %TAG directive", mark) - } - } - - // [Go] I suspect the copy is unnecessary. This was likely done - // because there was no way to track ownership of the data. - value_copy := yaml_tag_directive_t{ - handle: make([]byte, len(value.handle)), - prefix: make([]byte, len(value.prefix)), - } - copy(value_copy.handle, value.handle) - copy(value_copy.prefix, value.prefix) - parser.tag_directives = append(parser.tag_directives, value_copy) - return true -} diff --git a/vendor/gopkg.in/yaml.v2/readerc.go b/vendor/gopkg.in/yaml.v2/readerc.go deleted file mode 100644 index 7c1f5fac..00000000 --- a/vendor/gopkg.in/yaml.v2/readerc.go +++ /dev/null @@ -1,412 +0,0 @@ -package yaml - -import ( - "io" -) - -// Set the reader error and return 0. -func yaml_parser_set_reader_error(parser *yaml_parser_t, problem string, offset int, value int) bool { - parser.error = yaml_READER_ERROR - parser.problem = problem - parser.problem_offset = offset - parser.problem_value = value - return false -} - -// Byte order marks. -const ( - bom_UTF8 = "\xef\xbb\xbf" - bom_UTF16LE = "\xff\xfe" - bom_UTF16BE = "\xfe\xff" -) - -// Determine the input stream encoding by checking the BOM symbol. If no BOM is -// found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. -func yaml_parser_determine_encoding(parser *yaml_parser_t) bool { - // Ensure that we had enough bytes in the raw buffer. - for !parser.eof && len(parser.raw_buffer)-parser.raw_buffer_pos < 3 { - if !yaml_parser_update_raw_buffer(parser) { - return false - } - } - - // Determine the encoding. - buf := parser.raw_buffer - pos := parser.raw_buffer_pos - avail := len(buf) - pos - if avail >= 2 && buf[pos] == bom_UTF16LE[0] && buf[pos+1] == bom_UTF16LE[1] { - parser.encoding = yaml_UTF16LE_ENCODING - parser.raw_buffer_pos += 2 - parser.offset += 2 - } else if avail >= 2 && buf[pos] == bom_UTF16BE[0] && buf[pos+1] == bom_UTF16BE[1] { - parser.encoding = yaml_UTF16BE_ENCODING - parser.raw_buffer_pos += 2 - parser.offset += 2 - } else if avail >= 3 && buf[pos] == bom_UTF8[0] && buf[pos+1] == bom_UTF8[1] && buf[pos+2] == bom_UTF8[2] { - parser.encoding = yaml_UTF8_ENCODING - parser.raw_buffer_pos += 3 - parser.offset += 3 - } else { - parser.encoding = yaml_UTF8_ENCODING - } - return true -} - -// Update the raw buffer. -func yaml_parser_update_raw_buffer(parser *yaml_parser_t) bool { - size_read := 0 - - // Return if the raw buffer is full. - if parser.raw_buffer_pos == 0 && len(parser.raw_buffer) == cap(parser.raw_buffer) { - return true - } - - // Return on EOF. - if parser.eof { - return true - } - - // Move the remaining bytes in the raw buffer to the beginning. - if parser.raw_buffer_pos > 0 && parser.raw_buffer_pos < len(parser.raw_buffer) { - copy(parser.raw_buffer, parser.raw_buffer[parser.raw_buffer_pos:]) - } - parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)-parser.raw_buffer_pos] - parser.raw_buffer_pos = 0 - - // Call the read handler to fill the buffer. - size_read, err := parser.read_handler(parser, parser.raw_buffer[len(parser.raw_buffer):cap(parser.raw_buffer)]) - parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)+size_read] - if err == io.EOF { - parser.eof = true - } else if err != nil { - return yaml_parser_set_reader_error(parser, "input error: "+err.Error(), parser.offset, -1) - } - return true -} - -// Ensure that the buffer contains at least `length` characters. -// Return true on success, false on failure. -// -// The length is supposed to be significantly less that the buffer size. -func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool { - if parser.read_handler == nil { - panic("read handler must be set") - } - - // [Go] This function was changed to guarantee the requested length size at EOF. - // The fact we need to do this is pretty awful, but the description above implies - // for that to be the case, and there are tests - - // If the EOF flag is set and the raw buffer is empty, do nothing. - if parser.eof && parser.raw_buffer_pos == len(parser.raw_buffer) { - // [Go] ACTUALLY! Read the documentation of this function above. - // This is just broken. To return true, we need to have the - // given length in the buffer. Not doing that means every single - // check that calls this function to make sure the buffer has a - // given length is Go) panicking; or C) accessing invalid memory. - //return true - } - - // Return if the buffer contains enough characters. - if parser.unread >= length { - return true - } - - // Determine the input encoding if it is not known yet. - if parser.encoding == yaml_ANY_ENCODING { - if !yaml_parser_determine_encoding(parser) { - return false - } - } - - // Move the unread characters to the beginning of the buffer. - buffer_len := len(parser.buffer) - if parser.buffer_pos > 0 && parser.buffer_pos < buffer_len { - copy(parser.buffer, parser.buffer[parser.buffer_pos:]) - buffer_len -= parser.buffer_pos - parser.buffer_pos = 0 - } else if parser.buffer_pos == buffer_len { - buffer_len = 0 - parser.buffer_pos = 0 - } - - // Open the whole buffer for writing, and cut it before returning. - parser.buffer = parser.buffer[:cap(parser.buffer)] - - // Fill the buffer until it has enough characters. - first := true - for parser.unread < length { - - // Fill the raw buffer if necessary. - if !first || parser.raw_buffer_pos == len(parser.raw_buffer) { - if !yaml_parser_update_raw_buffer(parser) { - parser.buffer = parser.buffer[:buffer_len] - return false - } - } - first = false - - // Decode the raw buffer. - inner: - for parser.raw_buffer_pos != len(parser.raw_buffer) { - var value rune - var width int - - raw_unread := len(parser.raw_buffer) - parser.raw_buffer_pos - - // Decode the next character. - switch parser.encoding { - case yaml_UTF8_ENCODING: - // Decode a UTF-8 character. Check RFC 3629 - // (http://www.ietf.org/rfc/rfc3629.txt) for more details. - // - // The following table (taken from the RFC) is used for - // decoding. - // - // Char. number range | UTF-8 octet sequence - // (hexadecimal) | (binary) - // --------------------+------------------------------------ - // 0000 0000-0000 007F | 0xxxxxxx - // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx - // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx - // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - // - // Additionally, the characters in the range 0xD800-0xDFFF - // are prohibited as they are reserved for use with UTF-16 - // surrogate pairs. - - // Determine the length of the UTF-8 sequence. - octet := parser.raw_buffer[parser.raw_buffer_pos] - switch { - case octet&0x80 == 0x00: - width = 1 - case octet&0xE0 == 0xC0: - width = 2 - case octet&0xF0 == 0xE0: - width = 3 - case octet&0xF8 == 0xF0: - width = 4 - default: - // The leading octet is invalid. - return yaml_parser_set_reader_error(parser, - "invalid leading UTF-8 octet", - parser.offset, int(octet)) - } - - // Check if the raw buffer contains an incomplete character. - if width > raw_unread { - if parser.eof { - return yaml_parser_set_reader_error(parser, - "incomplete UTF-8 octet sequence", - parser.offset, -1) - } - break inner - } - - // Decode the leading octet. - switch { - case octet&0x80 == 0x00: - value = rune(octet & 0x7F) - case octet&0xE0 == 0xC0: - value = rune(octet & 0x1F) - case octet&0xF0 == 0xE0: - value = rune(octet & 0x0F) - case octet&0xF8 == 0xF0: - value = rune(octet & 0x07) - default: - value = 0 - } - - // Check and decode the trailing octets. - for k := 1; k < width; k++ { - octet = parser.raw_buffer[parser.raw_buffer_pos+k] - - // Check if the octet is valid. - if (octet & 0xC0) != 0x80 { - return yaml_parser_set_reader_error(parser, - "invalid trailing UTF-8 octet", - parser.offset+k, int(octet)) - } - - // Decode the octet. - value = (value << 6) + rune(octet&0x3F) - } - - // Check the length of the sequence against the value. - switch { - case width == 1: - case width == 2 && value >= 0x80: - case width == 3 && value >= 0x800: - case width == 4 && value >= 0x10000: - default: - return yaml_parser_set_reader_error(parser, - "invalid length of a UTF-8 sequence", - parser.offset, -1) - } - - // Check the range of the value. - if value >= 0xD800 && value <= 0xDFFF || value > 0x10FFFF { - return yaml_parser_set_reader_error(parser, - "invalid Unicode character", - parser.offset, int(value)) - } - - case yaml_UTF16LE_ENCODING, yaml_UTF16BE_ENCODING: - var low, high int - if parser.encoding == yaml_UTF16LE_ENCODING { - low, high = 0, 1 - } else { - low, high = 1, 0 - } - - // The UTF-16 encoding is not as simple as one might - // naively think. Check RFC 2781 - // (http://www.ietf.org/rfc/rfc2781.txt). - // - // Normally, two subsequent bytes describe a Unicode - // character. However a special technique (called a - // surrogate pair) is used for specifying character - // values larger than 0xFFFF. - // - // A surrogate pair consists of two pseudo-characters: - // high surrogate area (0xD800-0xDBFF) - // low surrogate area (0xDC00-0xDFFF) - // - // The following formulas are used for decoding - // and encoding characters using surrogate pairs: - // - // U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF) - // U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF) - // W1 = 110110yyyyyyyyyy - // W2 = 110111xxxxxxxxxx - // - // where U is the character value, W1 is the high surrogate - // area, W2 is the low surrogate area. - - // Check for incomplete UTF-16 character. - if raw_unread < 2 { - if parser.eof { - return yaml_parser_set_reader_error(parser, - "incomplete UTF-16 character", - parser.offset, -1) - } - break inner - } - - // Get the character. - value = rune(parser.raw_buffer[parser.raw_buffer_pos+low]) + - (rune(parser.raw_buffer[parser.raw_buffer_pos+high]) << 8) - - // Check for unexpected low surrogate area. - if value&0xFC00 == 0xDC00 { - return yaml_parser_set_reader_error(parser, - "unexpected low surrogate area", - parser.offset, int(value)) - } - - // Check for a high surrogate area. - if value&0xFC00 == 0xD800 { - width = 4 - - // Check for incomplete surrogate pair. - if raw_unread < 4 { - if parser.eof { - return yaml_parser_set_reader_error(parser, - "incomplete UTF-16 surrogate pair", - parser.offset, -1) - } - break inner - } - - // Get the next character. - value2 := rune(parser.raw_buffer[parser.raw_buffer_pos+low+2]) + - (rune(parser.raw_buffer[parser.raw_buffer_pos+high+2]) << 8) - - // Check for a low surrogate area. - if value2&0xFC00 != 0xDC00 { - return yaml_parser_set_reader_error(parser, - "expected low surrogate area", - parser.offset+2, int(value2)) - } - - // Generate the value of the surrogate pair. - value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF) - } else { - width = 2 - } - - default: - panic("impossible") - } - - // Check if the character is in the allowed range: - // #x9 | #xA | #xD | [#x20-#x7E] (8 bit) - // | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] (16 bit) - // | [#x10000-#x10FFFF] (32 bit) - switch { - case value == 0x09: - case value == 0x0A: - case value == 0x0D: - case value >= 0x20 && value <= 0x7E: - case value == 0x85: - case value >= 0xA0 && value <= 0xD7FF: - case value >= 0xE000 && value <= 0xFFFD: - case value >= 0x10000 && value <= 0x10FFFF: - default: - return yaml_parser_set_reader_error(parser, - "control characters are not allowed", - parser.offset, int(value)) - } - - // Move the raw pointers. - parser.raw_buffer_pos += width - parser.offset += width - - // Finally put the character into the buffer. - if value <= 0x7F { - // 0000 0000-0000 007F . 0xxxxxxx - parser.buffer[buffer_len+0] = byte(value) - buffer_len += 1 - } else if value <= 0x7FF { - // 0000 0080-0000 07FF . 110xxxxx 10xxxxxx - parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6)) - parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F)) - buffer_len += 2 - } else if value <= 0xFFFF { - // 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx - parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12)) - parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F)) - parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F)) - buffer_len += 3 - } else { - // 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18)) - parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F)) - parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F)) - parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F)) - buffer_len += 4 - } - - parser.unread++ - } - - // On EOF, put NUL into the buffer and return. - if parser.eof { - parser.buffer[buffer_len] = 0 - buffer_len++ - parser.unread++ - break - } - } - // [Go] Read the documentation of this function above. To return true, - // we need to have the given length in the buffer. Not doing that means - // every single check that calls this function to make sure the buffer - // has a given length is Go) panicking; or C) accessing invalid memory. - // This happens here due to the EOF above breaking early. - for buffer_len < length { - parser.buffer[buffer_len] = 0 - buffer_len++ - } - parser.buffer = parser.buffer[:buffer_len] - return true -} diff --git a/vendor/gopkg.in/yaml.v2/resolve.go b/vendor/gopkg.in/yaml.v2/resolve.go deleted file mode 100644 index 4120e0c9..00000000 --- a/vendor/gopkg.in/yaml.v2/resolve.go +++ /dev/null @@ -1,258 +0,0 @@ -package yaml - -import ( - "encoding/base64" - "math" - "regexp" - "strconv" - "strings" - "time" -) - -type resolveMapItem struct { - value interface{} - tag string -} - -var resolveTable = make([]byte, 256) -var resolveMap = make(map[string]resolveMapItem) - -func init() { - t := resolveTable - t[int('+')] = 'S' // Sign - t[int('-')] = 'S' - for _, c := range "0123456789" { - t[int(c)] = 'D' // Digit - } - for _, c := range "yYnNtTfFoO~" { - t[int(c)] = 'M' // In map - } - t[int('.')] = '.' // Float (potentially in map) - - var resolveMapList = []struct { - v interface{} - tag string - l []string - }{ - {true, yaml_BOOL_TAG, []string{"y", "Y", "yes", "Yes", "YES"}}, - {true, yaml_BOOL_TAG, []string{"true", "True", "TRUE"}}, - {true, yaml_BOOL_TAG, []string{"on", "On", "ON"}}, - {false, yaml_BOOL_TAG, []string{"n", "N", "no", "No", "NO"}}, - {false, yaml_BOOL_TAG, []string{"false", "False", "FALSE"}}, - {false, yaml_BOOL_TAG, []string{"off", "Off", "OFF"}}, - {nil, yaml_NULL_TAG, []string{"", "~", "null", "Null", "NULL"}}, - {math.NaN(), yaml_FLOAT_TAG, []string{".nan", ".NaN", ".NAN"}}, - {math.Inf(+1), yaml_FLOAT_TAG, []string{".inf", ".Inf", ".INF"}}, - {math.Inf(+1), yaml_FLOAT_TAG, []string{"+.inf", "+.Inf", "+.INF"}}, - {math.Inf(-1), yaml_FLOAT_TAG, []string{"-.inf", "-.Inf", "-.INF"}}, - {"<<", yaml_MERGE_TAG, []string{"<<"}}, - } - - m := resolveMap - for _, item := range resolveMapList { - for _, s := range item.l { - m[s] = resolveMapItem{item.v, item.tag} - } - } -} - -const longTagPrefix = "tag:yaml.org,2002:" - -func shortTag(tag string) string { - // TODO This can easily be made faster and produce less garbage. - if strings.HasPrefix(tag, longTagPrefix) { - return "!!" + tag[len(longTagPrefix):] - } - return tag -} - -func longTag(tag string) string { - if strings.HasPrefix(tag, "!!") { - return longTagPrefix + tag[2:] - } - return tag -} - -func resolvableTag(tag string) bool { - switch tag { - case "", yaml_STR_TAG, yaml_BOOL_TAG, yaml_INT_TAG, yaml_FLOAT_TAG, yaml_NULL_TAG, yaml_TIMESTAMP_TAG: - return true - } - return false -} - -var yamlStyleFloat = regexp.MustCompile(`^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$`) - -func resolve(tag string, in string) (rtag string, out interface{}) { - if !resolvableTag(tag) { - return tag, in - } - - defer func() { - switch tag { - case "", rtag, yaml_STR_TAG, yaml_BINARY_TAG: - return - case yaml_FLOAT_TAG: - if rtag == yaml_INT_TAG { - switch v := out.(type) { - case int64: - rtag = yaml_FLOAT_TAG - out = float64(v) - return - case int: - rtag = yaml_FLOAT_TAG - out = float64(v) - return - } - } - } - failf("cannot decode %s `%s` as a %s", shortTag(rtag), in, shortTag(tag)) - }() - - // Any data is accepted as a !!str or !!binary. - // Otherwise, the prefix is enough of a hint about what it might be. - hint := byte('N') - if in != "" { - hint = resolveTable[in[0]] - } - if hint != 0 && tag != yaml_STR_TAG && tag != yaml_BINARY_TAG { - // Handle things we can lookup in a map. - if item, ok := resolveMap[in]; ok { - return item.tag, item.value - } - - // Base 60 floats are a bad idea, were dropped in YAML 1.2, and - // are purposefully unsupported here. They're still quoted on - // the way out for compatibility with other parser, though. - - switch hint { - case 'M': - // We've already checked the map above. - - case '.': - // Not in the map, so maybe a normal float. - floatv, err := strconv.ParseFloat(in, 64) - if err == nil { - return yaml_FLOAT_TAG, floatv - } - - case 'D', 'S': - // Int, float, or timestamp. - // Only try values as a timestamp if the value is unquoted or there's an explicit - // !!timestamp tag. - if tag == "" || tag == yaml_TIMESTAMP_TAG { - t, ok := parseTimestamp(in) - if ok { - return yaml_TIMESTAMP_TAG, t - } - } - - plain := strings.Replace(in, "_", "", -1) - intv, err := strconv.ParseInt(plain, 0, 64) - if err == nil { - if intv == int64(int(intv)) { - return yaml_INT_TAG, int(intv) - } else { - return yaml_INT_TAG, intv - } - } - uintv, err := strconv.ParseUint(plain, 0, 64) - if err == nil { - return yaml_INT_TAG, uintv - } - if yamlStyleFloat.MatchString(plain) { - floatv, err := strconv.ParseFloat(plain, 64) - if err == nil { - return yaml_FLOAT_TAG, floatv - } - } - if strings.HasPrefix(plain, "0b") { - intv, err := strconv.ParseInt(plain[2:], 2, 64) - if err == nil { - if intv == int64(int(intv)) { - return yaml_INT_TAG, int(intv) - } else { - return yaml_INT_TAG, intv - } - } - uintv, err := strconv.ParseUint(plain[2:], 2, 64) - if err == nil { - return yaml_INT_TAG, uintv - } - } else if strings.HasPrefix(plain, "-0b") { - intv, err := strconv.ParseInt("-" + plain[3:], 2, 64) - if err == nil { - if true || intv == int64(int(intv)) { - return yaml_INT_TAG, int(intv) - } else { - return yaml_INT_TAG, intv - } - } - } - default: - panic("resolveTable item not yet handled: " + string(rune(hint)) + " (with " + in + ")") - } - } - return yaml_STR_TAG, in -} - -// encodeBase64 encodes s as base64 that is broken up into multiple lines -// as appropriate for the resulting length. -func encodeBase64(s string) string { - const lineLen = 70 - encLen := base64.StdEncoding.EncodedLen(len(s)) - lines := encLen/lineLen + 1 - buf := make([]byte, encLen*2+lines) - in := buf[0:encLen] - out := buf[encLen:] - base64.StdEncoding.Encode(in, []byte(s)) - k := 0 - for i := 0; i < len(in); i += lineLen { - j := i + lineLen - if j > len(in) { - j = len(in) - } - k += copy(out[k:], in[i:j]) - if lines > 1 { - out[k] = '\n' - k++ - } - } - return string(out[:k]) -} - -// This is a subset of the formats allowed by the regular expression -// defined at http://yaml.org/type/timestamp.html. -var allowedTimestampFormats = []string{ - "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. - "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". - "2006-1-2 15:4:5.999999999", // space separated with no time zone - "2006-1-2", // date only - // Notable exception: time.Parse cannot handle: "2001-12-14 21:59:43.10 -5" - // from the set of examples. -} - -// parseTimestamp parses s as a timestamp string and -// returns the timestamp and reports whether it succeeded. -// Timestamp formats are defined at http://yaml.org/type/timestamp.html -func parseTimestamp(s string) (time.Time, bool) { - // TODO write code to check all the formats supported by - // http://yaml.org/type/timestamp.html instead of using time.Parse. - - // Quick check: all date formats start with YYYY-. - i := 0 - for ; i < len(s); i++ { - if c := s[i]; c < '0' || c > '9' { - break - } - } - if i != 4 || i == len(s) || s[i] != '-' { - return time.Time{}, false - } - for _, format := range allowedTimestampFormats { - if t, err := time.Parse(format, s); err == nil { - return t, true - } - } - return time.Time{}, false -} diff --git a/vendor/gopkg.in/yaml.v2/scannerc.go b/vendor/gopkg.in/yaml.v2/scannerc.go deleted file mode 100644 index 0b9bb603..00000000 --- a/vendor/gopkg.in/yaml.v2/scannerc.go +++ /dev/null @@ -1,2711 +0,0 @@ -package yaml - -import ( - "bytes" - "fmt" -) - -// Introduction -// ************ -// -// The following notes assume that you are familiar with the YAML specification -// (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in -// some cases we are less restrictive that it requires. -// -// The process of transforming a YAML stream into a sequence of events is -// divided on two steps: Scanning and Parsing. -// -// The Scanner transforms the input stream into a sequence of tokens, while the -// parser transform the sequence of tokens produced by the Scanner into a -// sequence of parsing events. -// -// The Scanner is rather clever and complicated. The Parser, on the contrary, -// is a straightforward implementation of a recursive-descendant parser (or, -// LL(1) parser, as it is usually called). -// -// Actually there are two issues of Scanning that might be called "clever", the -// rest is quite straightforward. The issues are "block collection start" and -// "simple keys". Both issues are explained below in details. -// -// Here the Scanning step is explained and implemented. We start with the list -// of all the tokens produced by the Scanner together with short descriptions. -// -// Now, tokens: -// -// STREAM-START(encoding) # The stream start. -// STREAM-END # The stream end. -// VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. -// TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. -// DOCUMENT-START # '---' -// DOCUMENT-END # '...' -// BLOCK-SEQUENCE-START # Indentation increase denoting a block -// BLOCK-MAPPING-START # sequence or a block mapping. -// BLOCK-END # Indentation decrease. -// FLOW-SEQUENCE-START # '[' -// FLOW-SEQUENCE-END # ']' -// BLOCK-SEQUENCE-START # '{' -// BLOCK-SEQUENCE-END # '}' -// BLOCK-ENTRY # '-' -// FLOW-ENTRY # ',' -// KEY # '?' or nothing (simple keys). -// VALUE # ':' -// ALIAS(anchor) # '*anchor' -// ANCHOR(anchor) # '&anchor' -// TAG(handle,suffix) # '!handle!suffix' -// SCALAR(value,style) # A scalar. -// -// The following two tokens are "virtual" tokens denoting the beginning and the -// end of the stream: -// -// STREAM-START(encoding) -// STREAM-END -// -// We pass the information about the input stream encoding with the -// STREAM-START token. -// -// The next two tokens are responsible for tags: -// -// VERSION-DIRECTIVE(major,minor) -// TAG-DIRECTIVE(handle,prefix) -// -// Example: -// -// %YAML 1.1 -// %TAG ! !foo -// %TAG !yaml! tag:yaml.org,2002: -// --- -// -// The correspoding sequence of tokens: -// -// STREAM-START(utf-8) -// VERSION-DIRECTIVE(1,1) -// TAG-DIRECTIVE("!","!foo") -// TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") -// DOCUMENT-START -// STREAM-END -// -// Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole -// line. -// -// The document start and end indicators are represented by: -// -// DOCUMENT-START -// DOCUMENT-END -// -// Note that if a YAML stream contains an implicit document (without '---' -// and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be -// produced. -// -// In the following examples, we present whole documents together with the -// produced tokens. -// -// 1. An implicit document: -// -// 'a scalar' -// -// Tokens: -// -// STREAM-START(utf-8) -// SCALAR("a scalar",single-quoted) -// STREAM-END -// -// 2. An explicit document: -// -// --- -// 'a scalar' -// ... -// -// Tokens: -// -// STREAM-START(utf-8) -// DOCUMENT-START -// SCALAR("a scalar",single-quoted) -// DOCUMENT-END -// STREAM-END -// -// 3. Several documents in a stream: -// -// 'a scalar' -// --- -// 'another scalar' -// --- -// 'yet another scalar' -// -// Tokens: -// -// STREAM-START(utf-8) -// SCALAR("a scalar",single-quoted) -// DOCUMENT-START -// SCALAR("another scalar",single-quoted) -// DOCUMENT-START -// SCALAR("yet another scalar",single-quoted) -// STREAM-END -// -// We have already introduced the SCALAR token above. The following tokens are -// used to describe aliases, anchors, tag, and scalars: -// -// ALIAS(anchor) -// ANCHOR(anchor) -// TAG(handle,suffix) -// SCALAR(value,style) -// -// The following series of examples illustrate the usage of these tokens: -// -// 1. A recursive sequence: -// -// &A [ *A ] -// -// Tokens: -// -// STREAM-START(utf-8) -// ANCHOR("A") -// FLOW-SEQUENCE-START -// ALIAS("A") -// FLOW-SEQUENCE-END -// STREAM-END -// -// 2. A tagged scalar: -// -// !!float "3.14" # A good approximation. -// -// Tokens: -// -// STREAM-START(utf-8) -// TAG("!!","float") -// SCALAR("3.14",double-quoted) -// STREAM-END -// -// 3. Various scalar styles: -// -// --- # Implicit empty plain scalars do not produce tokens. -// --- a plain scalar -// --- 'a single-quoted scalar' -// --- "a double-quoted scalar" -// --- |- -// a literal scalar -// --- >- -// a folded -// scalar -// -// Tokens: -// -// STREAM-START(utf-8) -// DOCUMENT-START -// DOCUMENT-START -// SCALAR("a plain scalar",plain) -// DOCUMENT-START -// SCALAR("a single-quoted scalar",single-quoted) -// DOCUMENT-START -// SCALAR("a double-quoted scalar",double-quoted) -// DOCUMENT-START -// SCALAR("a literal scalar",literal) -// DOCUMENT-START -// SCALAR("a folded scalar",folded) -// STREAM-END -// -// Now it's time to review collection-related tokens. We will start with -// flow collections: -// -// FLOW-SEQUENCE-START -// FLOW-SEQUENCE-END -// FLOW-MAPPING-START -// FLOW-MAPPING-END -// FLOW-ENTRY -// KEY -// VALUE -// -// The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and -// FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' -// correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the -// indicators '?' and ':', which are used for denoting mapping keys and values, -// are represented by the KEY and VALUE tokens. -// -// The following examples show flow collections: -// -// 1. A flow sequence: -// -// [item 1, item 2, item 3] -// -// Tokens: -// -// STREAM-START(utf-8) -// FLOW-SEQUENCE-START -// SCALAR("item 1",plain) -// FLOW-ENTRY -// SCALAR("item 2",plain) -// FLOW-ENTRY -// SCALAR("item 3",plain) -// FLOW-SEQUENCE-END -// STREAM-END -// -// 2. A flow mapping: -// -// { -// a simple key: a value, # Note that the KEY token is produced. -// ? a complex key: another value, -// } -// -// Tokens: -// -// STREAM-START(utf-8) -// FLOW-MAPPING-START -// KEY -// SCALAR("a simple key",plain) -// VALUE -// SCALAR("a value",plain) -// FLOW-ENTRY -// KEY -// SCALAR("a complex key",plain) -// VALUE -// SCALAR("another value",plain) -// FLOW-ENTRY -// FLOW-MAPPING-END -// STREAM-END -// -// A simple key is a key which is not denoted by the '?' indicator. Note that -// the Scanner still produce the KEY token whenever it encounters a simple key. -// -// For scanning block collections, the following tokens are used (note that we -// repeat KEY and VALUE here): -// -// BLOCK-SEQUENCE-START -// BLOCK-MAPPING-START -// BLOCK-END -// BLOCK-ENTRY -// KEY -// VALUE -// -// The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation -// increase that precedes a block collection (cf. the INDENT token in Python). -// The token BLOCK-END denote indentation decrease that ends a block collection -// (cf. the DEDENT token in Python). However YAML has some syntax pecularities -// that makes detections of these tokens more complex. -// -// The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators -// '-', '?', and ':' correspondingly. -// -// The following examples show how the tokens BLOCK-SEQUENCE-START, -// BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: -// -// 1. Block sequences: -// -// - item 1 -// - item 2 -// - -// - item 3.1 -// - item 3.2 -// - -// key 1: value 1 -// key 2: value 2 -// -// Tokens: -// -// STREAM-START(utf-8) -// BLOCK-SEQUENCE-START -// BLOCK-ENTRY -// SCALAR("item 1",plain) -// BLOCK-ENTRY -// SCALAR("item 2",plain) -// BLOCK-ENTRY -// BLOCK-SEQUENCE-START -// BLOCK-ENTRY -// SCALAR("item 3.1",plain) -// BLOCK-ENTRY -// SCALAR("item 3.2",plain) -// BLOCK-END -// BLOCK-ENTRY -// BLOCK-MAPPING-START -// KEY -// SCALAR("key 1",plain) -// VALUE -// SCALAR("value 1",plain) -// KEY -// SCALAR("key 2",plain) -// VALUE -// SCALAR("value 2",plain) -// BLOCK-END -// BLOCK-END -// STREAM-END -// -// 2. Block mappings: -// -// a simple key: a value # The KEY token is produced here. -// ? a complex key -// : another value -// a mapping: -// key 1: value 1 -// key 2: value 2 -// a sequence: -// - item 1 -// - item 2 -// -// Tokens: -// -// STREAM-START(utf-8) -// BLOCK-MAPPING-START -// KEY -// SCALAR("a simple key",plain) -// VALUE -// SCALAR("a value",plain) -// KEY -// SCALAR("a complex key",plain) -// VALUE -// SCALAR("another value",plain) -// KEY -// SCALAR("a mapping",plain) -// BLOCK-MAPPING-START -// KEY -// SCALAR("key 1",plain) -// VALUE -// SCALAR("value 1",plain) -// KEY -// SCALAR("key 2",plain) -// VALUE -// SCALAR("value 2",plain) -// BLOCK-END -// KEY -// SCALAR("a sequence",plain) -// VALUE -// BLOCK-SEQUENCE-START -// BLOCK-ENTRY -// SCALAR("item 1",plain) -// BLOCK-ENTRY -// SCALAR("item 2",plain) -// BLOCK-END -// BLOCK-END -// STREAM-END -// -// YAML does not always require to start a new block collection from a new -// line. If the current line contains only '-', '?', and ':' indicators, a new -// block collection may start at the current line. The following examples -// illustrate this case: -// -// 1. Collections in a sequence: -// -// - - item 1 -// - item 2 -// - key 1: value 1 -// key 2: value 2 -// - ? complex key -// : complex value -// -// Tokens: -// -// STREAM-START(utf-8) -// BLOCK-SEQUENCE-START -// BLOCK-ENTRY -// BLOCK-SEQUENCE-START -// BLOCK-ENTRY -// SCALAR("item 1",plain) -// BLOCK-ENTRY -// SCALAR("item 2",plain) -// BLOCK-END -// BLOCK-ENTRY -// BLOCK-MAPPING-START -// KEY -// SCALAR("key 1",plain) -// VALUE -// SCALAR("value 1",plain) -// KEY -// SCALAR("key 2",plain) -// VALUE -// SCALAR("value 2",plain) -// BLOCK-END -// BLOCK-ENTRY -// BLOCK-MAPPING-START -// KEY -// SCALAR("complex key") -// VALUE -// SCALAR("complex value") -// BLOCK-END -// BLOCK-END -// STREAM-END -// -// 2. Collections in a mapping: -// -// ? a sequence -// : - item 1 -// - item 2 -// ? a mapping -// : key 1: value 1 -// key 2: value 2 -// -// Tokens: -// -// STREAM-START(utf-8) -// BLOCK-MAPPING-START -// KEY -// SCALAR("a sequence",plain) -// VALUE -// BLOCK-SEQUENCE-START -// BLOCK-ENTRY -// SCALAR("item 1",plain) -// BLOCK-ENTRY -// SCALAR("item 2",plain) -// BLOCK-END -// KEY -// SCALAR("a mapping",plain) -// VALUE -// BLOCK-MAPPING-START -// KEY -// SCALAR("key 1",plain) -// VALUE -// SCALAR("value 1",plain) -// KEY -// SCALAR("key 2",plain) -// VALUE -// SCALAR("value 2",plain) -// BLOCK-END -// BLOCK-END -// STREAM-END -// -// YAML also permits non-indented sequences if they are included into a block -// mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: -// -// key: -// - item 1 # BLOCK-SEQUENCE-START is NOT produced here. -// - item 2 -// -// Tokens: -// -// STREAM-START(utf-8) -// BLOCK-MAPPING-START -// KEY -// SCALAR("key",plain) -// VALUE -// BLOCK-ENTRY -// SCALAR("item 1",plain) -// BLOCK-ENTRY -// SCALAR("item 2",plain) -// BLOCK-END -// - -// Ensure that the buffer contains the required number of characters. -// Return true on success, false on failure (reader error or memory error). -func cache(parser *yaml_parser_t, length int) bool { - // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B) - return parser.unread >= length || yaml_parser_update_buffer(parser, length) -} - -// Advance the buffer pointer. -func skip(parser *yaml_parser_t) { - parser.mark.index++ - parser.mark.column++ - parser.unread-- - parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) -} - -func skip_line(parser *yaml_parser_t) { - if is_crlf(parser.buffer, parser.buffer_pos) { - parser.mark.index += 2 - parser.mark.column = 0 - parser.mark.line++ - parser.unread -= 2 - parser.buffer_pos += 2 - } else if is_break(parser.buffer, parser.buffer_pos) { - parser.mark.index++ - parser.mark.column = 0 - parser.mark.line++ - parser.unread-- - parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) - } -} - -// Copy a character to a string buffer and advance pointers. -func read(parser *yaml_parser_t, s []byte) []byte { - w := width(parser.buffer[parser.buffer_pos]) - if w == 0 { - panic("invalid character sequence") - } - if len(s) == 0 { - s = make([]byte, 0, 32) - } - if w == 1 && len(s)+w <= cap(s) { - s = s[:len(s)+1] - s[len(s)-1] = parser.buffer[parser.buffer_pos] - parser.buffer_pos++ - } else { - s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...) - parser.buffer_pos += w - } - parser.mark.index++ - parser.mark.column++ - parser.unread-- - return s -} - -// Copy a line break character to a string buffer and advance pointers. -func read_line(parser *yaml_parser_t, s []byte) []byte { - buf := parser.buffer - pos := parser.buffer_pos - switch { - case buf[pos] == '\r' && buf[pos+1] == '\n': - // CR LF . LF - s = append(s, '\n') - parser.buffer_pos += 2 - parser.mark.index++ - parser.unread-- - case buf[pos] == '\r' || buf[pos] == '\n': - // CR|LF . LF - s = append(s, '\n') - parser.buffer_pos += 1 - case buf[pos] == '\xC2' && buf[pos+1] == '\x85': - // NEL . LF - s = append(s, '\n') - parser.buffer_pos += 2 - case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): - // LS|PS . LS|PS - s = append(s, buf[parser.buffer_pos:pos+3]...) - parser.buffer_pos += 3 - default: - return s - } - parser.mark.index++ - parser.mark.column = 0 - parser.mark.line++ - parser.unread-- - return s -} - -// Get the next token. -func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { - // Erase the token object. - *token = yaml_token_t{} // [Go] Is this necessary? - - // No tokens after STREAM-END or error. - if parser.stream_end_produced || parser.error != yaml_NO_ERROR { - return true - } - - // Ensure that the tokens queue contains enough tokens. - if !parser.token_available { - if !yaml_parser_fetch_more_tokens(parser) { - return false - } - } - - // Fetch the next token from the queue. - *token = parser.tokens[parser.tokens_head] - parser.tokens_head++ - parser.tokens_parsed++ - parser.token_available = false - - if token.typ == yaml_STREAM_END_TOKEN { - parser.stream_end_produced = true - } - return true -} - -// Set the scanner error and return false. -func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { - parser.error = yaml_SCANNER_ERROR - parser.context = context - parser.context_mark = context_mark - parser.problem = problem - parser.problem_mark = parser.mark - return false -} - -func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { - context := "while parsing a tag" - if directive { - context = "while parsing a %TAG directive" - } - return yaml_parser_set_scanner_error(parser, context, context_mark, problem) -} - -func trace(args ...interface{}) func() { - pargs := append([]interface{}{"+++"}, args...) - fmt.Println(pargs...) - pargs = append([]interface{}{"---"}, args...) - return func() { fmt.Println(pargs...) } -} - -// Ensure that the tokens queue contains at least one token which can be -// returned to the Parser. -func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { - // While we need more tokens to fetch, do it. - for { - if parser.tokens_head != len(parser.tokens) { - // If queue is non-empty, check if any potential simple key may - // occupy the head position. - head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] - if !ok { - break - } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { - return false - } else if !valid { - break - } - } - // Fetch the next token. - if !yaml_parser_fetch_next_token(parser) { - return false - } - } - - parser.token_available = true - return true -} - -// The dispatcher for token fetchers. -func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { - // Ensure that the buffer is initialized. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - // Check if we just started scanning. Fetch STREAM-START then. - if !parser.stream_start_produced { - return yaml_parser_fetch_stream_start(parser) - } - - // Eat whitespaces and comments until we reach the next token. - if !yaml_parser_scan_to_next_token(parser) { - return false - } - - // Check the indentation level against the current column. - if !yaml_parser_unroll_indent(parser, parser.mark.column) { - return false - } - - // Ensure that the buffer contains at least 4 characters. 4 is the length - // of the longest indicators ('--- ' and '... '). - if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { - return false - } - - // Is it the end of the stream? - if is_z(parser.buffer, parser.buffer_pos) { - return yaml_parser_fetch_stream_end(parser) - } - - // Is it a directive? - if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { - return yaml_parser_fetch_directive(parser) - } - - buf := parser.buffer - pos := parser.buffer_pos - - // Is it the document start indicator? - if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { - return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) - } - - // Is it the document end indicator? - if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { - return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) - } - - // Is it the flow sequence start indicator? - if buf[pos] == '[' { - return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) - } - - // Is it the flow mapping start indicator? - if parser.buffer[parser.buffer_pos] == '{' { - return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) - } - - // Is it the flow sequence end indicator? - if parser.buffer[parser.buffer_pos] == ']' { - return yaml_parser_fetch_flow_collection_end(parser, - yaml_FLOW_SEQUENCE_END_TOKEN) - } - - // Is it the flow mapping end indicator? - if parser.buffer[parser.buffer_pos] == '}' { - return yaml_parser_fetch_flow_collection_end(parser, - yaml_FLOW_MAPPING_END_TOKEN) - } - - // Is it the flow entry indicator? - if parser.buffer[parser.buffer_pos] == ',' { - return yaml_parser_fetch_flow_entry(parser) - } - - // Is it the block entry indicator? - if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { - return yaml_parser_fetch_block_entry(parser) - } - - // Is it the key indicator? - if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { - return yaml_parser_fetch_key(parser) - } - - // Is it the value indicator? - if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { - return yaml_parser_fetch_value(parser) - } - - // Is it an alias? - if parser.buffer[parser.buffer_pos] == '*' { - return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) - } - - // Is it an anchor? - if parser.buffer[parser.buffer_pos] == '&' { - return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) - } - - // Is it a tag? - if parser.buffer[parser.buffer_pos] == '!' { - return yaml_parser_fetch_tag(parser) - } - - // Is it a literal scalar? - if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { - return yaml_parser_fetch_block_scalar(parser, true) - } - - // Is it a folded scalar? - if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { - return yaml_parser_fetch_block_scalar(parser, false) - } - - // Is it a single-quoted scalar? - if parser.buffer[parser.buffer_pos] == '\'' { - return yaml_parser_fetch_flow_scalar(parser, true) - } - - // Is it a double-quoted scalar? - if parser.buffer[parser.buffer_pos] == '"' { - return yaml_parser_fetch_flow_scalar(parser, false) - } - - // Is it a plain scalar? - // - // A plain scalar may start with any non-blank characters except - // - // '-', '?', ':', ',', '[', ']', '{', '}', - // '#', '&', '*', '!', '|', '>', '\'', '\"', - // '%', '@', '`'. - // - // In the block context (and, for the '-' indicator, in the flow context - // too), it may also start with the characters - // - // '-', '?', ':' - // - // if it is followed by a non-space character. - // - // The last rule is more restrictive than the specification requires. - // [Go] Make this logic more reasonable. - //switch parser.buffer[parser.buffer_pos] { - //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`': - //} - if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || - parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || - parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || - parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || - parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || - parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || - parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || - parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || - parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || - parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || - (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || - (parser.flow_level == 0 && - (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && - !is_blankz(parser.buffer, parser.buffer_pos+1)) { - return yaml_parser_fetch_plain_scalar(parser) - } - - // If we don't determine the token type so far, it is an error. - return yaml_parser_set_scanner_error(parser, - "while scanning for the next token", parser.mark, - "found character that cannot start any token") -} - -func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) { - if !simple_key.possible { - return false, true - } - - // The 1.2 specification says: - // - // "If the ? indicator is omitted, parsing needs to see past the - // implicit key to recognize it as such. To limit the amount of - // lookahead required, the “:” indicator must appear at most 1024 - // Unicode characters beyond the start of the key. In addition, the key - // is restricted to a single line." - // - if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { - // Check if the potential simple key to be removed is required. - if simple_key.required { - return false, yaml_parser_set_scanner_error(parser, - "while scanning a simple key", simple_key.mark, - "could not find expected ':'") - } - simple_key.possible = false - return false, true - } - return true, true -} - -// Check if a simple key may start at the current position and add it if -// needed. -func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { - // A simple key is required at the current position if the scanner is in - // the block context and the current column coincides with the indentation - // level. - - required := parser.flow_level == 0 && parser.indent == parser.mark.column - - // - // If the current position may start a simple key, save it. - // - if parser.simple_key_allowed { - simple_key := yaml_simple_key_t{ - possible: true, - required: required, - token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), - mark: parser.mark, - } - - if !yaml_parser_remove_simple_key(parser) { - return false - } - parser.simple_keys[len(parser.simple_keys)-1] = simple_key - parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 - } - return true -} - -// Remove a potential simple key at the current flow level. -func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { - i := len(parser.simple_keys) - 1 - if parser.simple_keys[i].possible { - // If the key is required, it is an error. - if parser.simple_keys[i].required { - return yaml_parser_set_scanner_error(parser, - "while scanning a simple key", parser.simple_keys[i].mark, - "could not find expected ':'") - } - // Remove the key from the stack. - parser.simple_keys[i].possible = false - delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) - } - return true -} - -// max_flow_level limits the flow_level -const max_flow_level = 10000 - -// Increase the flow level and resize the simple key list if needed. -func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { - // Reset the simple key on the next level. - parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ - possible: false, - required: false, - token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), - mark: parser.mark, - }) - - // Increase the flow level. - parser.flow_level++ - if parser.flow_level > max_flow_level { - return yaml_parser_set_scanner_error(parser, - "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, - fmt.Sprintf("exceeded max depth of %d", max_flow_level)) - } - return true -} - -// Decrease the flow level. -func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { - if parser.flow_level > 0 { - parser.flow_level-- - last := len(parser.simple_keys) - 1 - delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) - parser.simple_keys = parser.simple_keys[:last] - } - return true -} - -// max_indents limits the indents stack size -const max_indents = 10000 - -// Push the current indentation level to the stack and set the new level -// the current column is greater than the indentation level. In this case, -// append or insert the specified token into the token queue. -func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { - // In the flow context, do nothing. - if parser.flow_level > 0 { - return true - } - - if parser.indent < column { - // Push the current indentation level to the stack and set the new - // indentation level. - parser.indents = append(parser.indents, parser.indent) - parser.indent = column - if len(parser.indents) > max_indents { - return yaml_parser_set_scanner_error(parser, - "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, - fmt.Sprintf("exceeded max depth of %d", max_indents)) - } - - // Create a token and insert it into the queue. - token := yaml_token_t{ - typ: typ, - start_mark: mark, - end_mark: mark, - } - if number > -1 { - number -= parser.tokens_parsed - } - yaml_insert_token(parser, number, &token) - } - return true -} - -// Pop indentation levels from the indents stack until the current level -// becomes less or equal to the column. For each indentation level, append -// the BLOCK-END token. -func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool { - // In the flow context, do nothing. - if parser.flow_level > 0 { - return true - } - - // Loop through the indentation levels in the stack. - for parser.indent > column { - // Create a token and append it to the queue. - token := yaml_token_t{ - typ: yaml_BLOCK_END_TOKEN, - start_mark: parser.mark, - end_mark: parser.mark, - } - yaml_insert_token(parser, -1, &token) - - // Pop the indentation level. - parser.indent = parser.indents[len(parser.indents)-1] - parser.indents = parser.indents[:len(parser.indents)-1] - } - return true -} - -// Initialize the scanner and produce the STREAM-START token. -func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { - - // Set the initial indentation. - parser.indent = -1 - - // Initialize the simple key stack. - parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) - - parser.simple_keys_by_tok = make(map[int]int) - - // A simple key is allowed at the beginning of the stream. - parser.simple_key_allowed = true - - // We have started. - parser.stream_start_produced = true - - // Create the STREAM-START token and append it to the queue. - token := yaml_token_t{ - typ: yaml_STREAM_START_TOKEN, - start_mark: parser.mark, - end_mark: parser.mark, - encoding: parser.encoding, - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the STREAM-END token and shut down the scanner. -func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { - - // Force new line. - if parser.mark.column != 0 { - parser.mark.column = 0 - parser.mark.line++ - } - - // Reset the indentation level. - if !yaml_parser_unroll_indent(parser, -1) { - return false - } - - // Reset simple keys. - if !yaml_parser_remove_simple_key(parser) { - return false - } - - parser.simple_key_allowed = false - - // Create the STREAM-END token and append it to the queue. - token := yaml_token_t{ - typ: yaml_STREAM_END_TOKEN, - start_mark: parser.mark, - end_mark: parser.mark, - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. -func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { - // Reset the indentation level. - if !yaml_parser_unroll_indent(parser, -1) { - return false - } - - // Reset simple keys. - if !yaml_parser_remove_simple_key(parser) { - return false - } - - parser.simple_key_allowed = false - - // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. - token := yaml_token_t{} - if !yaml_parser_scan_directive(parser, &token) { - return false - } - // Append the token to the queue. - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the DOCUMENT-START or DOCUMENT-END token. -func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { - // Reset the indentation level. - if !yaml_parser_unroll_indent(parser, -1) { - return false - } - - // Reset simple keys. - if !yaml_parser_remove_simple_key(parser) { - return false - } - - parser.simple_key_allowed = false - - // Consume the token. - start_mark := parser.mark - - skip(parser) - skip(parser) - skip(parser) - - end_mark := parser.mark - - // Create the DOCUMENT-START or DOCUMENT-END token. - token := yaml_token_t{ - typ: typ, - start_mark: start_mark, - end_mark: end_mark, - } - // Append the token to the queue. - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. -func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { - // The indicators '[' and '{' may start a simple key. - if !yaml_parser_save_simple_key(parser) { - return false - } - - // Increase the flow level. - if !yaml_parser_increase_flow_level(parser) { - return false - } - - // A simple key may follow the indicators '[' and '{'. - parser.simple_key_allowed = true - - // Consume the token. - start_mark := parser.mark - skip(parser) - end_mark := parser.mark - - // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. - token := yaml_token_t{ - typ: typ, - start_mark: start_mark, - end_mark: end_mark, - } - // Append the token to the queue. - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. -func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { - // Reset any potential simple key on the current flow level. - if !yaml_parser_remove_simple_key(parser) { - return false - } - - // Decrease the flow level. - if !yaml_parser_decrease_flow_level(parser) { - return false - } - - // No simple keys after the indicators ']' and '}'. - parser.simple_key_allowed = false - - // Consume the token. - - start_mark := parser.mark - skip(parser) - end_mark := parser.mark - - // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. - token := yaml_token_t{ - typ: typ, - start_mark: start_mark, - end_mark: end_mark, - } - // Append the token to the queue. - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the FLOW-ENTRY token. -func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { - // Reset any potential simple keys on the current flow level. - if !yaml_parser_remove_simple_key(parser) { - return false - } - - // Simple keys are allowed after ','. - parser.simple_key_allowed = true - - // Consume the token. - start_mark := parser.mark - skip(parser) - end_mark := parser.mark - - // Create the FLOW-ENTRY token and append it to the queue. - token := yaml_token_t{ - typ: yaml_FLOW_ENTRY_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the BLOCK-ENTRY token. -func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { - // Check if the scanner is in the block context. - if parser.flow_level == 0 { - // Check if we are allowed to start a new entry. - if !parser.simple_key_allowed { - return yaml_parser_set_scanner_error(parser, "", parser.mark, - "block sequence entries are not allowed in this context") - } - // Add the BLOCK-SEQUENCE-START token if needed. - if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { - return false - } - } else { - // It is an error for the '-' indicator to occur in the flow context, - // but we let the Parser detect and report about it because the Parser - // is able to point to the context. - } - - // Reset any potential simple keys on the current flow level. - if !yaml_parser_remove_simple_key(parser) { - return false - } - - // Simple keys are allowed after '-'. - parser.simple_key_allowed = true - - // Consume the token. - start_mark := parser.mark - skip(parser) - end_mark := parser.mark - - // Create the BLOCK-ENTRY token and append it to the queue. - token := yaml_token_t{ - typ: yaml_BLOCK_ENTRY_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the KEY token. -func yaml_parser_fetch_key(parser *yaml_parser_t) bool { - - // In the block context, additional checks are required. - if parser.flow_level == 0 { - // Check if we are allowed to start a new key (not nessesary simple). - if !parser.simple_key_allowed { - return yaml_parser_set_scanner_error(parser, "", parser.mark, - "mapping keys are not allowed in this context") - } - // Add the BLOCK-MAPPING-START token if needed. - if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { - return false - } - } - - // Reset any potential simple keys on the current flow level. - if !yaml_parser_remove_simple_key(parser) { - return false - } - - // Simple keys are allowed after '?' in the block context. - parser.simple_key_allowed = parser.flow_level == 0 - - // Consume the token. - start_mark := parser.mark - skip(parser) - end_mark := parser.mark - - // Create the KEY token and append it to the queue. - token := yaml_token_t{ - typ: yaml_KEY_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the VALUE token. -func yaml_parser_fetch_value(parser *yaml_parser_t) bool { - - simple_key := &parser.simple_keys[len(parser.simple_keys)-1] - - // Have we found a simple key? - if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { - return false - - } else if valid { - - // Create the KEY token and insert it into the queue. - token := yaml_token_t{ - typ: yaml_KEY_TOKEN, - start_mark: simple_key.mark, - end_mark: simple_key.mark, - } - yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) - - // In the block context, we may need to add the BLOCK-MAPPING-START token. - if !yaml_parser_roll_indent(parser, simple_key.mark.column, - simple_key.token_number, - yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { - return false - } - - // Remove the simple key. - simple_key.possible = false - delete(parser.simple_keys_by_tok, simple_key.token_number) - - // A simple key cannot follow another simple key. - parser.simple_key_allowed = false - - } else { - // The ':' indicator follows a complex key. - - // In the block context, extra checks are required. - if parser.flow_level == 0 { - - // Check if we are allowed to start a complex value. - if !parser.simple_key_allowed { - return yaml_parser_set_scanner_error(parser, "", parser.mark, - "mapping values are not allowed in this context") - } - - // Add the BLOCK-MAPPING-START token if needed. - if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { - return false - } - } - - // Simple keys after ':' are allowed in the block context. - parser.simple_key_allowed = parser.flow_level == 0 - } - - // Consume the token. - start_mark := parser.mark - skip(parser) - end_mark := parser.mark - - // Create the VALUE token and append it to the queue. - token := yaml_token_t{ - typ: yaml_VALUE_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the ALIAS or ANCHOR token. -func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { - // An anchor or an alias could be a simple key. - if !yaml_parser_save_simple_key(parser) { - return false - } - - // A simple key cannot follow an anchor or an alias. - parser.simple_key_allowed = false - - // Create the ALIAS or ANCHOR token and append it to the queue. - var token yaml_token_t - if !yaml_parser_scan_anchor(parser, &token, typ) { - return false - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the TAG token. -func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { - // A tag could be a simple key. - if !yaml_parser_save_simple_key(parser) { - return false - } - - // A simple key cannot follow a tag. - parser.simple_key_allowed = false - - // Create the TAG token and append it to the queue. - var token yaml_token_t - if !yaml_parser_scan_tag(parser, &token) { - return false - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. -func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { - // Remove any potential simple keys. - if !yaml_parser_remove_simple_key(parser) { - return false - } - - // A simple key may follow a block scalar. - parser.simple_key_allowed = true - - // Create the SCALAR token and append it to the queue. - var token yaml_token_t - if !yaml_parser_scan_block_scalar(parser, &token, literal) { - return false - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. -func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { - // A plain scalar could be a simple key. - if !yaml_parser_save_simple_key(parser) { - return false - } - - // A simple key cannot follow a flow scalar. - parser.simple_key_allowed = false - - // Create the SCALAR token and append it to the queue. - var token yaml_token_t - if !yaml_parser_scan_flow_scalar(parser, &token, single) { - return false - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Produce the SCALAR(...,plain) token. -func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { - // A plain scalar could be a simple key. - if !yaml_parser_save_simple_key(parser) { - return false - } - - // A simple key cannot follow a flow scalar. - parser.simple_key_allowed = false - - // Create the SCALAR token and append it to the queue. - var token yaml_token_t - if !yaml_parser_scan_plain_scalar(parser, &token) { - return false - } - yaml_insert_token(parser, -1, &token) - return true -} - -// Eat whitespaces and comments until the next token is found. -func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { - - // Until the next token is not found. - for { - // Allow the BOM mark to start a line. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { - skip(parser) - } - - // Eat whitespaces. - // Tabs are allowed: - // - in the flow context - // - in the block context, but not at the beginning of the line or - // after '-', '?', or ':' (complex value). - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Eat a comment until a line break. - if parser.buffer[parser.buffer_pos] == '#' { - for !is_breakz(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - } - - // If it is a line break, eat it. - if is_break(parser.buffer, parser.buffer_pos) { - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - skip_line(parser) - - // In the block context, a new line may start a simple key. - if parser.flow_level == 0 { - parser.simple_key_allowed = true - } - } else { - break // We have found a token. - } - } - - return true -} - -// Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. -// -// Scope: -// %YAML 1.1 # a comment \n -// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -// %TAG !yaml! tag:yaml.org,2002: \n -// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -// -func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { - // Eat '%'. - start_mark := parser.mark - skip(parser) - - // Scan the directive name. - var name []byte - if !yaml_parser_scan_directive_name(parser, start_mark, &name) { - return false - } - - // Is it a YAML directive? - if bytes.Equal(name, []byte("YAML")) { - // Scan the VERSION directive value. - var major, minor int8 - if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { - return false - } - end_mark := parser.mark - - // Create a VERSION-DIRECTIVE token. - *token = yaml_token_t{ - typ: yaml_VERSION_DIRECTIVE_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - major: major, - minor: minor, - } - - // Is it a TAG directive? - } else if bytes.Equal(name, []byte("TAG")) { - // Scan the TAG directive value. - var handle, prefix []byte - if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { - return false - } - end_mark := parser.mark - - // Create a TAG-DIRECTIVE token. - *token = yaml_token_t{ - typ: yaml_TAG_DIRECTIVE_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - value: handle, - prefix: prefix, - } - - // Unknown directive. - } else { - yaml_parser_set_scanner_error(parser, "while scanning a directive", - start_mark, "found unknown directive name") - return false - } - - // Eat the rest of the line including any comments. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - for is_blank(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - if parser.buffer[parser.buffer_pos] == '#' { - for !is_breakz(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - } - - // Check if we are at the end of the line. - if !is_breakz(parser.buffer, parser.buffer_pos) { - yaml_parser_set_scanner_error(parser, "while scanning a directive", - start_mark, "did not find expected comment or line break") - return false - } - - // Eat a line break. - if is_break(parser.buffer, parser.buffer_pos) { - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - skip_line(parser) - } - - return true -} - -// Scan the directive name. -// -// Scope: -// %YAML 1.1 # a comment \n -// ^^^^ -// %TAG !yaml! tag:yaml.org,2002: \n -// ^^^ -// -func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { - // Consume the directive name. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - var s []byte - for is_alpha(parser.buffer, parser.buffer_pos) { - s = read(parser, s) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Check if the name is empty. - if len(s) == 0 { - yaml_parser_set_scanner_error(parser, "while scanning a directive", - start_mark, "could not find expected directive name") - return false - } - - // Check for an blank character after the name. - if !is_blankz(parser.buffer, parser.buffer_pos) { - yaml_parser_set_scanner_error(parser, "while scanning a directive", - start_mark, "found unexpected non-alphabetical character") - return false - } - *name = s - return true -} - -// Scan the value of VERSION-DIRECTIVE. -// -// Scope: -// %YAML 1.1 # a comment \n -// ^^^^^^ -func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { - // Eat whitespaces. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - for is_blank(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Consume the major version number. - if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { - return false - } - - // Eat '.'. - if parser.buffer[parser.buffer_pos] != '.' { - return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", - start_mark, "did not find expected digit or '.' character") - } - - skip(parser) - - // Consume the minor version number. - if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { - return false - } - return true -} - -const max_number_length = 2 - -// Scan the version number of VERSION-DIRECTIVE. -// -// Scope: -// %YAML 1.1 # a comment \n -// ^ -// %YAML 1.1 # a comment \n -// ^ -func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { - - // Repeat while the next character is digit. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - var value, length int8 - for is_digit(parser.buffer, parser.buffer_pos) { - // Check if the number is too long. - length++ - if length > max_number_length { - return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", - start_mark, "found extremely long version number") - } - value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos)) - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Check if the number was present. - if length == 0 { - return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", - start_mark, "did not find expected version number") - } - *number = value - return true -} - -// Scan the value of a TAG-DIRECTIVE token. -// -// Scope: -// %TAG !yaml! tag:yaml.org,2002: \n -// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -// -func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { - var handle_value, prefix_value []byte - - // Eat whitespaces. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - for is_blank(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Scan a handle. - if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { - return false - } - - // Expect a whitespace. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - if !is_blank(parser.buffer, parser.buffer_pos) { - yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", - start_mark, "did not find expected whitespace") - return false - } - - // Eat whitespaces. - for is_blank(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Scan a prefix. - if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { - return false - } - - // Expect a whitespace or line break. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - if !is_blankz(parser.buffer, parser.buffer_pos) { - yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", - start_mark, "did not find expected whitespace or line break") - return false - } - - *handle = handle_value - *prefix = prefix_value - return true -} - -func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { - var s []byte - - // Eat the indicator character. - start_mark := parser.mark - skip(parser) - - // Consume the value. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - for is_alpha(parser.buffer, parser.buffer_pos) { - s = read(parser, s) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - end_mark := parser.mark - - /* - * Check if length of the anchor is greater than 0 and it is followed by - * a whitespace character or one of the indicators: - * - * '?', ':', ',', ']', '}', '%', '@', '`'. - */ - - if len(s) == 0 || - !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || - parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || - parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || - parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || - parser.buffer[parser.buffer_pos] == '`') { - context := "while scanning an alias" - if typ == yaml_ANCHOR_TOKEN { - context = "while scanning an anchor" - } - yaml_parser_set_scanner_error(parser, context, start_mark, - "did not find expected alphabetic or numeric character") - return false - } - - // Create a token. - *token = yaml_token_t{ - typ: typ, - start_mark: start_mark, - end_mark: end_mark, - value: s, - } - - return true -} - -/* - * Scan a TAG token. - */ - -func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { - var handle, suffix []byte - - start_mark := parser.mark - - // Check if the tag is in the canonical form. - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - - if parser.buffer[parser.buffer_pos+1] == '<' { - // Keep the handle as '' - - // Eat '!<' - skip(parser) - skip(parser) - - // Consume the tag value. - if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { - return false - } - - // Check for '>' and eat it. - if parser.buffer[parser.buffer_pos] != '>' { - yaml_parser_set_scanner_error(parser, "while scanning a tag", - start_mark, "did not find the expected '>'") - return false - } - - skip(parser) - } else { - // The tag has either the '!suffix' or the '!handle!suffix' form. - - // First, try to scan a handle. - if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { - return false - } - - // Check if it is, indeed, handle. - if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { - // Scan the suffix now. - if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { - return false - } - } else { - // It wasn't a handle after all. Scan the rest of the tag. - if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { - return false - } - - // Set the handle to '!'. - handle = []byte{'!'} - - // A special case: the '!' tag. Set the handle to '' and the - // suffix to '!'. - if len(suffix) == 0 { - handle, suffix = suffix, handle - } - } - } - - // Check the character which ends the tag. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - if !is_blankz(parser.buffer, parser.buffer_pos) { - yaml_parser_set_scanner_error(parser, "while scanning a tag", - start_mark, "did not find expected whitespace or line break") - return false - } - - end_mark := parser.mark - - // Create a token. - *token = yaml_token_t{ - typ: yaml_TAG_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - value: handle, - suffix: suffix, - } - return true -} - -// Scan a tag handle. -func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { - // Check the initial '!' character. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - if parser.buffer[parser.buffer_pos] != '!' { - yaml_parser_set_scanner_tag_error(parser, directive, - start_mark, "did not find expected '!'") - return false - } - - var s []byte - - // Copy the '!' character. - s = read(parser, s) - - // Copy all subsequent alphabetical and numerical characters. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - for is_alpha(parser.buffer, parser.buffer_pos) { - s = read(parser, s) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Check if the trailing character is '!' and copy it. - if parser.buffer[parser.buffer_pos] == '!' { - s = read(parser, s) - } else { - // It's either the '!' tag or not really a tag handle. If it's a %TAG - // directive, it's an error. If it's a tag token, it must be a part of URI. - if directive && string(s) != "!" { - yaml_parser_set_scanner_tag_error(parser, directive, - start_mark, "did not find expected '!'") - return false - } - } - - *handle = s - return true -} - -// Scan a tag. -func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { - //size_t length = head ? strlen((char *)head) : 0 - var s []byte - hasTag := len(head) > 0 - - // Copy the head if needed. - // - // Note that we don't copy the leading '!' character. - if len(head) > 1 { - s = append(s, head[1:]...) - } - - // Scan the tag. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - // The set of characters that may appear in URI is as follows: - // - // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', - // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', - // '%'. - // [Go] Convert this into more reasonable logic. - for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || - parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || - parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || - parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || - parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || - parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || - parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || - parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || - parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || - parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || - parser.buffer[parser.buffer_pos] == '%' { - // Check if it is a URI-escape sequence. - if parser.buffer[parser.buffer_pos] == '%' { - if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { - return false - } - } else { - s = read(parser, s) - } - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - hasTag = true - } - - if !hasTag { - yaml_parser_set_scanner_tag_error(parser, directive, - start_mark, "did not find expected tag URI") - return false - } - *uri = s - return true -} - -// Decode an URI-escape sequence corresponding to a single UTF-8 character. -func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { - - // Decode the required number of characters. - w := 1024 - for w > 0 { - // Check for a URI-escaped octet. - if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { - return false - } - - if !(parser.buffer[parser.buffer_pos] == '%' && - is_hex(parser.buffer, parser.buffer_pos+1) && - is_hex(parser.buffer, parser.buffer_pos+2)) { - return yaml_parser_set_scanner_tag_error(parser, directive, - start_mark, "did not find URI escaped octet") - } - - // Get the octet. - octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) - - // If it is the leading octet, determine the length of the UTF-8 sequence. - if w == 1024 { - w = width(octet) - if w == 0 { - return yaml_parser_set_scanner_tag_error(parser, directive, - start_mark, "found an incorrect leading UTF-8 octet") - } - } else { - // Check if the trailing octet is correct. - if octet&0xC0 != 0x80 { - return yaml_parser_set_scanner_tag_error(parser, directive, - start_mark, "found an incorrect trailing UTF-8 octet") - } - } - - // Copy the octet and move the pointers. - *s = append(*s, octet) - skip(parser) - skip(parser) - skip(parser) - w-- - } - return true -} - -// Scan a block scalar. -func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { - // Eat the indicator '|' or '>'. - start_mark := parser.mark - skip(parser) - - // Scan the additional block scalar indicators. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - // Check for a chomping indicator. - var chomping, increment int - if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { - // Set the chomping method and eat the indicator. - if parser.buffer[parser.buffer_pos] == '+' { - chomping = +1 - } else { - chomping = -1 - } - skip(parser) - - // Check for an indentation indicator. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - if is_digit(parser.buffer, parser.buffer_pos) { - // Check that the indentation is greater than 0. - if parser.buffer[parser.buffer_pos] == '0' { - yaml_parser_set_scanner_error(parser, "while scanning a block scalar", - start_mark, "found an indentation indicator equal to 0") - return false - } - - // Get the indentation level and eat the indicator. - increment = as_digit(parser.buffer, parser.buffer_pos) - skip(parser) - } - - } else if is_digit(parser.buffer, parser.buffer_pos) { - // Do the same as above, but in the opposite order. - - if parser.buffer[parser.buffer_pos] == '0' { - yaml_parser_set_scanner_error(parser, "while scanning a block scalar", - start_mark, "found an indentation indicator equal to 0") - return false - } - increment = as_digit(parser.buffer, parser.buffer_pos) - skip(parser) - - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { - if parser.buffer[parser.buffer_pos] == '+' { - chomping = +1 - } else { - chomping = -1 - } - skip(parser) - } - } - - // Eat whitespaces and comments to the end of the line. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - for is_blank(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - if parser.buffer[parser.buffer_pos] == '#' { - for !is_breakz(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - } - - // Check if we are at the end of the line. - if !is_breakz(parser.buffer, parser.buffer_pos) { - yaml_parser_set_scanner_error(parser, "while scanning a block scalar", - start_mark, "did not find expected comment or line break") - return false - } - - // Eat a line break. - if is_break(parser.buffer, parser.buffer_pos) { - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - skip_line(parser) - } - - end_mark := parser.mark - - // Set the indentation level if it was specified. - var indent int - if increment > 0 { - if parser.indent >= 0 { - indent = parser.indent + increment - } else { - indent = increment - } - } - - // Scan the leading line breaks and determine the indentation level if needed. - var s, leading_break, trailing_breaks []byte - if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { - return false - } - - // Scan the block scalar content. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - var leading_blank, trailing_blank bool - for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { - // We are at the beginning of a non-empty line. - - // Is it a trailing whitespace? - trailing_blank = is_blank(parser.buffer, parser.buffer_pos) - - // Check if we need to fold the leading line break. - if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' { - // Do we need to join the lines by space? - if len(trailing_breaks) == 0 { - s = append(s, ' ') - } - } else { - s = append(s, leading_break...) - } - leading_break = leading_break[:0] - - // Append the remaining line breaks. - s = append(s, trailing_breaks...) - trailing_breaks = trailing_breaks[:0] - - // Is it a leading whitespace? - leading_blank = is_blank(parser.buffer, parser.buffer_pos) - - // Consume the current line. - for !is_breakz(parser.buffer, parser.buffer_pos) { - s = read(parser, s) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Consume the line break. - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - - leading_break = read_line(parser, leading_break) - - // Eat the following indentation spaces and line breaks. - if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { - return false - } - } - - // Chomp the tail. - if chomping != -1 { - s = append(s, leading_break...) - } - if chomping == 1 { - s = append(s, trailing_breaks...) - } - - // Create a token. - *token = yaml_token_t{ - typ: yaml_SCALAR_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - value: s, - style: yaml_LITERAL_SCALAR_STYLE, - } - if !literal { - token.style = yaml_FOLDED_SCALAR_STYLE - } - return true -} - -// Scan indentation spaces and line breaks for a block scalar. Determine the -// indentation level if needed. -func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { - *end_mark = parser.mark - - // Eat the indentation spaces and line breaks. - max_indent := 0 - for { - // Eat the indentation spaces. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) { - skip(parser) - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - if parser.mark.column > max_indent { - max_indent = parser.mark.column - } - - // Check for a tab character messing the indentation. - if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { - return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", - start_mark, "found a tab character where an indentation space is expected") - } - - // Have we found a non-empty line? - if !is_break(parser.buffer, parser.buffer_pos) { - break - } - - // Consume the line break. - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - // [Go] Should really be returning breaks instead. - *breaks = read_line(parser, *breaks) - *end_mark = parser.mark - } - - // Determine the indentation level if needed. - if *indent == 0 { - *indent = max_indent - if *indent < parser.indent+1 { - *indent = parser.indent + 1 - } - if *indent < 1 { - *indent = 1 - } - } - return true -} - -// Scan a quoted scalar. -func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { - // Eat the left quote. - start_mark := parser.mark - skip(parser) - - // Consume the content of the quoted scalar. - var s, leading_break, trailing_breaks, whitespaces []byte - for { - // Check that there are no document indicators at the beginning of the line. - if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { - return false - } - - if parser.mark.column == 0 && - ((parser.buffer[parser.buffer_pos+0] == '-' && - parser.buffer[parser.buffer_pos+1] == '-' && - parser.buffer[parser.buffer_pos+2] == '-') || - (parser.buffer[parser.buffer_pos+0] == '.' && - parser.buffer[parser.buffer_pos+1] == '.' && - parser.buffer[parser.buffer_pos+2] == '.')) && - is_blankz(parser.buffer, parser.buffer_pos+3) { - yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", - start_mark, "found unexpected document indicator") - return false - } - - // Check for EOF. - if is_z(parser.buffer, parser.buffer_pos) { - yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", - start_mark, "found unexpected end of stream") - return false - } - - // Consume non-blank characters. - leading_blanks := false - for !is_blankz(parser.buffer, parser.buffer_pos) { - if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { - // Is is an escaped single quote. - s = append(s, '\'') - skip(parser) - skip(parser) - - } else if single && parser.buffer[parser.buffer_pos] == '\'' { - // It is a right single quote. - break - } else if !single && parser.buffer[parser.buffer_pos] == '"' { - // It is a right double quote. - break - - } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { - // It is an escaped line break. - if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) { - return false - } - skip(parser) - skip_line(parser) - leading_blanks = true - break - - } else if !single && parser.buffer[parser.buffer_pos] == '\\' { - // It is an escape sequence. - code_length := 0 - - // Check the escape character. - switch parser.buffer[parser.buffer_pos+1] { - case '0': - s = append(s, 0) - case 'a': - s = append(s, '\x07') - case 'b': - s = append(s, '\x08') - case 't', '\t': - s = append(s, '\x09') - case 'n': - s = append(s, '\x0A') - case 'v': - s = append(s, '\x0B') - case 'f': - s = append(s, '\x0C') - case 'r': - s = append(s, '\x0D') - case 'e': - s = append(s, '\x1B') - case ' ': - s = append(s, '\x20') - case '"': - s = append(s, '"') - case '\'': - s = append(s, '\'') - case '\\': - s = append(s, '\\') - case 'N': // NEL (#x85) - s = append(s, '\xC2') - s = append(s, '\x85') - case '_': // #xA0 - s = append(s, '\xC2') - s = append(s, '\xA0') - case 'L': // LS (#x2028) - s = append(s, '\xE2') - s = append(s, '\x80') - s = append(s, '\xA8') - case 'P': // PS (#x2029) - s = append(s, '\xE2') - s = append(s, '\x80') - s = append(s, '\xA9') - case 'x': - code_length = 2 - case 'u': - code_length = 4 - case 'U': - code_length = 8 - default: - yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", - start_mark, "found unknown escape character") - return false - } - - skip(parser) - skip(parser) - - // Consume an arbitrary escape code. - if code_length > 0 { - var value int - - // Scan the character value. - if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) { - return false - } - for k := 0; k < code_length; k++ { - if !is_hex(parser.buffer, parser.buffer_pos+k) { - yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", - start_mark, "did not find expected hexdecimal number") - return false - } - value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) - } - - // Check the value and write the character. - if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { - yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", - start_mark, "found invalid Unicode character escape code") - return false - } - if value <= 0x7F { - s = append(s, byte(value)) - } else if value <= 0x7FF { - s = append(s, byte(0xC0+(value>>6))) - s = append(s, byte(0x80+(value&0x3F))) - } else if value <= 0xFFFF { - s = append(s, byte(0xE0+(value>>12))) - s = append(s, byte(0x80+((value>>6)&0x3F))) - s = append(s, byte(0x80+(value&0x3F))) - } else { - s = append(s, byte(0xF0+(value>>18))) - s = append(s, byte(0x80+((value>>12)&0x3F))) - s = append(s, byte(0x80+((value>>6)&0x3F))) - s = append(s, byte(0x80+(value&0x3F))) - } - - // Advance the pointer. - for k := 0; k < code_length; k++ { - skip(parser) - } - } - } else { - // It is a non-escaped non-blank character. - s = read(parser, s) - } - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - } - - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - // Check if we are at the end of the scalar. - if single { - if parser.buffer[parser.buffer_pos] == '\'' { - break - } - } else { - if parser.buffer[parser.buffer_pos] == '"' { - break - } - } - - // Consume blank characters. - for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { - if is_blank(parser.buffer, parser.buffer_pos) { - // Consume a space or a tab character. - if !leading_blanks { - whitespaces = read(parser, whitespaces) - } else { - skip(parser) - } - } else { - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - - // Check if it is a first line break. - if !leading_blanks { - whitespaces = whitespaces[:0] - leading_break = read_line(parser, leading_break) - leading_blanks = true - } else { - trailing_breaks = read_line(parser, trailing_breaks) - } - } - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Join the whitespaces or fold line breaks. - if leading_blanks { - // Do we need to fold line breaks? - if len(leading_break) > 0 && leading_break[0] == '\n' { - if len(trailing_breaks) == 0 { - s = append(s, ' ') - } else { - s = append(s, trailing_breaks...) - } - } else { - s = append(s, leading_break...) - s = append(s, trailing_breaks...) - } - trailing_breaks = trailing_breaks[:0] - leading_break = leading_break[:0] - } else { - s = append(s, whitespaces...) - whitespaces = whitespaces[:0] - } - } - - // Eat the right quote. - skip(parser) - end_mark := parser.mark - - // Create a token. - *token = yaml_token_t{ - typ: yaml_SCALAR_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - value: s, - style: yaml_SINGLE_QUOTED_SCALAR_STYLE, - } - if !single { - token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE - } - return true -} - -// Scan a plain scalar. -func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { - - var s, leading_break, trailing_breaks, whitespaces []byte - var leading_blanks bool - var indent = parser.indent + 1 - - start_mark := parser.mark - end_mark := parser.mark - - // Consume the content of the plain scalar. - for { - // Check for a document indicator. - if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) { - return false - } - if parser.mark.column == 0 && - ((parser.buffer[parser.buffer_pos+0] == '-' && - parser.buffer[parser.buffer_pos+1] == '-' && - parser.buffer[parser.buffer_pos+2] == '-') || - (parser.buffer[parser.buffer_pos+0] == '.' && - parser.buffer[parser.buffer_pos+1] == '.' && - parser.buffer[parser.buffer_pos+2] == '.')) && - is_blankz(parser.buffer, parser.buffer_pos+3) { - break - } - - // Check for a comment. - if parser.buffer[parser.buffer_pos] == '#' { - break - } - - // Consume non-blank characters. - for !is_blankz(parser.buffer, parser.buffer_pos) { - - // Check for indicators that may end a plain scalar. - if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || - (parser.flow_level > 0 && - (parser.buffer[parser.buffer_pos] == ',' || - parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || - parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || - parser.buffer[parser.buffer_pos] == '}')) { - break - } - - // Check if we need to join whitespaces and breaks. - if leading_blanks || len(whitespaces) > 0 { - if leading_blanks { - // Do we need to fold line breaks? - if leading_break[0] == '\n' { - if len(trailing_breaks) == 0 { - s = append(s, ' ') - } else { - s = append(s, trailing_breaks...) - } - } else { - s = append(s, leading_break...) - s = append(s, trailing_breaks...) - } - trailing_breaks = trailing_breaks[:0] - leading_break = leading_break[:0] - leading_blanks = false - } else { - s = append(s, whitespaces...) - whitespaces = whitespaces[:0] - } - } - - // Copy the character. - s = read(parser, s) - - end_mark = parser.mark - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - } - - // Is it the end? - if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { - break - } - - // Consume blank characters. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - - for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { - if is_blank(parser.buffer, parser.buffer_pos) { - - // Check for tab characters that abuse indentation. - if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { - yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", - start_mark, "found a tab character that violates indentation") - return false - } - - // Consume a space or a tab character. - if !leading_blanks { - whitespaces = read(parser, whitespaces) - } else { - skip(parser) - } - } else { - if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) { - return false - } - - // Check if it is a first line break. - if !leading_blanks { - whitespaces = whitespaces[:0] - leading_break = read_line(parser, leading_break) - leading_blanks = true - } else { - trailing_breaks = read_line(parser, trailing_breaks) - } - } - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - } - - // Check indentation level. - if parser.flow_level == 0 && parser.mark.column < indent { - break - } - } - - // Create a token. - *token = yaml_token_t{ - typ: yaml_SCALAR_TOKEN, - start_mark: start_mark, - end_mark: end_mark, - value: s, - style: yaml_PLAIN_SCALAR_STYLE, - } - - // Note that we change the 'simple_key_allowed' flag. - if leading_blanks { - parser.simple_key_allowed = true - } - return true -} diff --git a/vendor/gopkg.in/yaml.v2/sorter.go b/vendor/gopkg.in/yaml.v2/sorter.go deleted file mode 100644 index 4c45e660..00000000 --- a/vendor/gopkg.in/yaml.v2/sorter.go +++ /dev/null @@ -1,113 +0,0 @@ -package yaml - -import ( - "reflect" - "unicode" -) - -type keyList []reflect.Value - -func (l keyList) Len() int { return len(l) } -func (l keyList) Swap(i, j int) { l[i], l[j] = l[j], l[i] } -func (l keyList) Less(i, j int) bool { - a := l[i] - b := l[j] - ak := a.Kind() - bk := b.Kind() - for (ak == reflect.Interface || ak == reflect.Ptr) && !a.IsNil() { - a = a.Elem() - ak = a.Kind() - } - for (bk == reflect.Interface || bk == reflect.Ptr) && !b.IsNil() { - b = b.Elem() - bk = b.Kind() - } - af, aok := keyFloat(a) - bf, bok := keyFloat(b) - if aok && bok { - if af != bf { - return af < bf - } - if ak != bk { - return ak < bk - } - return numLess(a, b) - } - if ak != reflect.String || bk != reflect.String { - return ak < bk - } - ar, br := []rune(a.String()), []rune(b.String()) - for i := 0; i < len(ar) && i < len(br); i++ { - if ar[i] == br[i] { - continue - } - al := unicode.IsLetter(ar[i]) - bl := unicode.IsLetter(br[i]) - if al && bl { - return ar[i] < br[i] - } - if al || bl { - return bl - } - var ai, bi int - var an, bn int64 - if ar[i] == '0' || br[i] == '0' { - for j := i-1; j >= 0 && unicode.IsDigit(ar[j]); j-- { - if ar[j] != '0' { - an = 1 - bn = 1 - break - } - } - } - for ai = i; ai < len(ar) && unicode.IsDigit(ar[ai]); ai++ { - an = an*10 + int64(ar[ai]-'0') - } - for bi = i; bi < len(br) && unicode.IsDigit(br[bi]); bi++ { - bn = bn*10 + int64(br[bi]-'0') - } - if an != bn { - return an < bn - } - if ai != bi { - return ai < bi - } - return ar[i] < br[i] - } - return len(ar) < len(br) -} - -// keyFloat returns a float value for v if it is a number/bool -// and whether it is a number/bool or not. -func keyFloat(v reflect.Value) (f float64, ok bool) { - switch v.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return float64(v.Int()), true - case reflect.Float32, reflect.Float64: - return v.Float(), true - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return float64(v.Uint()), true - case reflect.Bool: - if v.Bool() { - return 1, true - } - return 0, true - } - return 0, false -} - -// numLess returns whether a < b. -// a and b must necessarily have the same kind. -func numLess(a, b reflect.Value) bool { - switch a.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return a.Int() < b.Int() - case reflect.Float32, reflect.Float64: - return a.Float() < b.Float() - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return a.Uint() < b.Uint() - case reflect.Bool: - return !a.Bool() && b.Bool() - } - panic("not a number") -} diff --git a/vendor/gopkg.in/yaml.v2/writerc.go b/vendor/gopkg.in/yaml.v2/writerc.go deleted file mode 100644 index a2dde608..00000000 --- a/vendor/gopkg.in/yaml.v2/writerc.go +++ /dev/null @@ -1,26 +0,0 @@ -package yaml - -// Set the writer error and return false. -func yaml_emitter_set_writer_error(emitter *yaml_emitter_t, problem string) bool { - emitter.error = yaml_WRITER_ERROR - emitter.problem = problem - return false -} - -// Flush the output buffer. -func yaml_emitter_flush(emitter *yaml_emitter_t) bool { - if emitter.write_handler == nil { - panic("write handler not set") - } - - // Check if the buffer is empty. - if emitter.buffer_pos == 0 { - return true - } - - if err := emitter.write_handler(emitter, emitter.buffer[:emitter.buffer_pos]); err != nil { - return yaml_emitter_set_writer_error(emitter, "write error: "+err.Error()) - } - emitter.buffer_pos = 0 - return true -} diff --git a/vendor/gopkg.in/yaml.v2/yaml.go b/vendor/gopkg.in/yaml.v2/yaml.go deleted file mode 100644 index 30813884..00000000 --- a/vendor/gopkg.in/yaml.v2/yaml.go +++ /dev/null @@ -1,478 +0,0 @@ -// Package yaml implements YAML support for the Go language. -// -// Source code and other details for the project are available at GitHub: -// -// https://github.com/go-yaml/yaml -// -package yaml - -import ( - "errors" - "fmt" - "io" - "reflect" - "strings" - "sync" -) - -// MapSlice encodes and decodes as a YAML map. -// The order of keys is preserved when encoding and decoding. -type MapSlice []MapItem - -// MapItem is an item in a MapSlice. -type MapItem struct { - Key, Value interface{} -} - -// The Unmarshaler interface may be implemented by types to customize their -// behavior when being unmarshaled from a YAML document. The UnmarshalYAML -// method receives a function that may be called to unmarshal the original -// YAML value into a field or variable. It is safe to call the unmarshal -// function parameter more than once if necessary. -type Unmarshaler interface { - UnmarshalYAML(unmarshal func(interface{}) error) error -} - -// The Marshaler interface may be implemented by types to customize their -// behavior when being marshaled into a YAML document. The returned value -// is marshaled in place of the original value implementing Marshaler. -// -// If an error is returned by MarshalYAML, the marshaling procedure stops -// and returns with the provided error. -type Marshaler interface { - MarshalYAML() (interface{}, error) -} - -// Unmarshal decodes the first document found within the in byte slice -// and assigns decoded values into the out value. -// -// Maps and pointers (to a struct, string, int, etc) are accepted as out -// values. If an internal pointer within a struct is not initialized, -// the yaml package will initialize it if necessary for unmarshalling -// the provided data. The out parameter must not be nil. -// -// The type of the decoded values should be compatible with the respective -// values in out. If one or more values cannot be decoded due to a type -// mismatches, decoding continues partially until the end of the YAML -// content, and a *yaml.TypeError is returned with details for all -// missed values. -// -// Struct fields are only unmarshalled if they are exported (have an -// upper case first letter), and are unmarshalled using the field name -// lowercased as the default key. Custom keys may be defined via the -// "yaml" name in the field tag: the content preceding the first comma -// is used as the key, and the following comma-separated options are -// used to tweak the marshalling process (see Marshal). -// Conflicting names result in a runtime error. -// -// For example: -// -// type T struct { -// F int `yaml:"a,omitempty"` -// B int -// } -// var t T -// yaml.Unmarshal([]byte("a: 1\nb: 2"), &t) -// -// See the documentation of Marshal for the format of tags and a list of -// supported tag options. -// -func Unmarshal(in []byte, out interface{}) (err error) { - return unmarshal(in, out, false) -} - -// UnmarshalStrict is like Unmarshal except that any fields that are found -// in the data that do not have corresponding struct members, or mapping -// keys that are duplicates, will result in -// an error. -func UnmarshalStrict(in []byte, out interface{}) (err error) { - return unmarshal(in, out, true) -} - -// A Decoder reads and decodes YAML values from an input stream. -type Decoder struct { - strict bool - parser *parser -} - -// NewDecoder returns a new decoder that reads from r. -// -// The decoder introduces its own buffering and may read -// data from r beyond the YAML values requested. -func NewDecoder(r io.Reader) *Decoder { - return &Decoder{ - parser: newParserFromReader(r), - } -} - -// SetStrict sets whether strict decoding behaviour is enabled when -// decoding items in the data (see UnmarshalStrict). By default, decoding is not strict. -func (dec *Decoder) SetStrict(strict bool) { - dec.strict = strict -} - -// Decode reads the next YAML-encoded value from its input -// and stores it in the value pointed to by v. -// -// See the documentation for Unmarshal for details about the -// conversion of YAML into a Go value. -func (dec *Decoder) Decode(v interface{}) (err error) { - d := newDecoder(dec.strict) - defer handleErr(&err) - node := dec.parser.parse() - if node == nil { - return io.EOF - } - out := reflect.ValueOf(v) - if out.Kind() == reflect.Ptr && !out.IsNil() { - out = out.Elem() - } - d.unmarshal(node, out) - if len(d.terrors) > 0 { - return &TypeError{d.terrors} - } - return nil -} - -func unmarshal(in []byte, out interface{}, strict bool) (err error) { - defer handleErr(&err) - d := newDecoder(strict) - p := newParser(in) - defer p.destroy() - node := p.parse() - if node != nil { - v := reflect.ValueOf(out) - if v.Kind() == reflect.Ptr && !v.IsNil() { - v = v.Elem() - } - d.unmarshal(node, v) - } - if len(d.terrors) > 0 { - return &TypeError{d.terrors} - } - return nil -} - -// Marshal serializes the value provided into a YAML document. The structure -// of the generated document will reflect the structure of the value itself. -// Maps and pointers (to struct, string, int, etc) are accepted as the in value. -// -// Struct fields are only marshalled if they are exported (have an upper case -// first letter), and are marshalled using the field name lowercased as the -// default key. Custom keys may be defined via the "yaml" name in the field -// tag: the content preceding the first comma is used as the key, and the -// following comma-separated options are used to tweak the marshalling process. -// Conflicting names result in a runtime error. -// -// The field tag format accepted is: -// -// `(...) yaml:"[][,[,]]" (...)` -// -// The following flags are currently supported: -// -// omitempty Only include the field if it's not set to the zero -// value for the type or to empty slices or maps. -// Zero valued structs will be omitted if all their public -// fields are zero, unless they implement an IsZero -// method (see the IsZeroer interface type), in which -// case the field will be excluded if IsZero returns true. -// -// flow Marshal using a flow style (useful for structs, -// sequences and maps). -// -// inline Inline the field, which must be a struct or a map, -// causing all of its fields or keys to be processed as if -// they were part of the outer struct. For maps, keys must -// not conflict with the yaml keys of other struct fields. -// -// In addition, if the key is "-", the field is ignored. -// -// For example: -// -// type T struct { -// F int `yaml:"a,omitempty"` -// B int -// } -// yaml.Marshal(&T{B: 2}) // Returns "b: 2\n" -// yaml.Marshal(&T{F: 1}} // Returns "a: 1\nb: 0\n" -// -func Marshal(in interface{}) (out []byte, err error) { - defer handleErr(&err) - e := newEncoder() - defer e.destroy() - e.marshalDoc("", reflect.ValueOf(in)) - e.finish() - out = e.out - return -} - -// An Encoder writes YAML values to an output stream. -type Encoder struct { - encoder *encoder -} - -// NewEncoder returns a new encoder that writes to w. -// The Encoder should be closed after use to flush all data -// to w. -func NewEncoder(w io.Writer) *Encoder { - return &Encoder{ - encoder: newEncoderWithWriter(w), - } -} - -// Encode writes the YAML encoding of v to the stream. -// If multiple items are encoded to the stream, the -// second and subsequent document will be preceded -// with a "---" document separator, but the first will not. -// -// See the documentation for Marshal for details about the conversion of Go -// values to YAML. -func (e *Encoder) Encode(v interface{}) (err error) { - defer handleErr(&err) - e.encoder.marshalDoc("", reflect.ValueOf(v)) - return nil -} - -// Close closes the encoder by writing any remaining data. -// It does not write a stream terminating string "...". -func (e *Encoder) Close() (err error) { - defer handleErr(&err) - e.encoder.finish() - return nil -} - -func handleErr(err *error) { - if v := recover(); v != nil { - if e, ok := v.(yamlError); ok { - *err = e.err - } else { - panic(v) - } - } -} - -type yamlError struct { - err error -} - -func fail(err error) { - panic(yamlError{err}) -} - -func failf(format string, args ...interface{}) { - panic(yamlError{fmt.Errorf("yaml: "+format, args...)}) -} - -// A TypeError is returned by Unmarshal when one or more fields in -// the YAML document cannot be properly decoded into the requested -// types. When this error is returned, the value is still -// unmarshaled partially. -type TypeError struct { - Errors []string -} - -func (e *TypeError) Error() string { - return fmt.Sprintf("yaml: unmarshal errors:\n %s", strings.Join(e.Errors, "\n ")) -} - -// -------------------------------------------------------------------------- -// Maintain a mapping of keys to structure field indexes - -// The code in this section was copied from mgo/bson. - -// structInfo holds details for the serialization of fields of -// a given struct. -type structInfo struct { - FieldsMap map[string]fieldInfo - FieldsList []fieldInfo - - // InlineMap is the number of the field in the struct that - // contains an ,inline map, or -1 if there's none. - InlineMap int -} - -type fieldInfo struct { - Key string - Num int - OmitEmpty bool - Flow bool - // Id holds the unique field identifier, so we can cheaply - // check for field duplicates without maintaining an extra map. - Id int - - // Inline holds the field index if the field is part of an inlined struct. - Inline []int -} - -var structMap = make(map[reflect.Type]*structInfo) -var fieldMapMutex sync.RWMutex - -func getStructInfo(st reflect.Type) (*structInfo, error) { - fieldMapMutex.RLock() - sinfo, found := structMap[st] - fieldMapMutex.RUnlock() - if found { - return sinfo, nil - } - - n := st.NumField() - fieldsMap := make(map[string]fieldInfo) - fieldsList := make([]fieldInfo, 0, n) - inlineMap := -1 - for i := 0; i != n; i++ { - field := st.Field(i) - if field.PkgPath != "" && !field.Anonymous { - continue // Private field - } - - info := fieldInfo{Num: i} - - tag := field.Tag.Get("yaml") - if tag == "" && strings.Index(string(field.Tag), ":") < 0 { - tag = string(field.Tag) - } - if tag == "-" { - continue - } - - inline := false - fields := strings.Split(tag, ",") - if len(fields) > 1 { - for _, flag := range fields[1:] { - switch flag { - case "omitempty": - info.OmitEmpty = true - case "flow": - info.Flow = true - case "inline": - inline = true - default: - return nil, errors.New(fmt.Sprintf("Unsupported flag %q in tag %q of type %s", flag, tag, st)) - } - } - tag = fields[0] - } - - if inline { - switch field.Type.Kind() { - case reflect.Map: - if inlineMap >= 0 { - return nil, errors.New("Multiple ,inline maps in struct " + st.String()) - } - if field.Type.Key() != reflect.TypeOf("") { - return nil, errors.New("Option ,inline needs a map with string keys in struct " + st.String()) - } - inlineMap = info.Num - case reflect.Struct: - sinfo, err := getStructInfo(field.Type) - if err != nil { - return nil, err - } - for _, finfo := range sinfo.FieldsList { - if _, found := fieldsMap[finfo.Key]; found { - msg := "Duplicated key '" + finfo.Key + "' in struct " + st.String() - return nil, errors.New(msg) - } - if finfo.Inline == nil { - finfo.Inline = []int{i, finfo.Num} - } else { - finfo.Inline = append([]int{i}, finfo.Inline...) - } - finfo.Id = len(fieldsList) - fieldsMap[finfo.Key] = finfo - fieldsList = append(fieldsList, finfo) - } - default: - //return nil, errors.New("Option ,inline needs a struct value or map field") - return nil, errors.New("Option ,inline needs a struct value field") - } - continue - } - - if tag != "" { - info.Key = tag - } else { - info.Key = strings.ToLower(field.Name) - } - - if _, found = fieldsMap[info.Key]; found { - msg := "Duplicated key '" + info.Key + "' in struct " + st.String() - return nil, errors.New(msg) - } - - info.Id = len(fieldsList) - fieldsList = append(fieldsList, info) - fieldsMap[info.Key] = info - } - - sinfo = &structInfo{ - FieldsMap: fieldsMap, - FieldsList: fieldsList, - InlineMap: inlineMap, - } - - fieldMapMutex.Lock() - structMap[st] = sinfo - fieldMapMutex.Unlock() - return sinfo, nil -} - -// IsZeroer is used to check whether an object is zero to -// determine whether it should be omitted when marshaling -// with the omitempty flag. One notable implementation -// is time.Time. -type IsZeroer interface { - IsZero() bool -} - -func isZero(v reflect.Value) bool { - kind := v.Kind() - if z, ok := v.Interface().(IsZeroer); ok { - if (kind == reflect.Ptr || kind == reflect.Interface) && v.IsNil() { - return true - } - return z.IsZero() - } - switch kind { - case reflect.String: - return len(v.String()) == 0 - case reflect.Interface, reflect.Ptr: - return v.IsNil() - case reflect.Slice: - return v.Len() == 0 - case reflect.Map: - return v.Len() == 0 - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return v.Int() == 0 - case reflect.Float32, reflect.Float64: - return v.Float() == 0 - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return v.Uint() == 0 - case reflect.Bool: - return !v.Bool() - case reflect.Struct: - vt := v.Type() - for i := v.NumField() - 1; i >= 0; i-- { - if vt.Field(i).PkgPath != "" { - continue // Private field - } - if !isZero(v.Field(i)) { - return false - } - } - return true - } - return false -} - -// FutureLineWrap globally disables line wrapping when encoding long strings. -// This is a temporary and thus deprecated method introduced to faciliate -// migration towards v3, which offers more control of line lengths on -// individual encodings, and has a default matching the behavior introduced -// by this function. -// -// The default formatting of v2 was erroneously changed in v2.3.0 and reverted -// in v2.4.0, at which point this function was introduced to help migration. -func FutureLineWrap() { - disableLineWrapping = true -} diff --git a/vendor/gopkg.in/yaml.v2/yamlh.go b/vendor/gopkg.in/yaml.v2/yamlh.go deleted file mode 100644 index f6a9c8e3..00000000 --- a/vendor/gopkg.in/yaml.v2/yamlh.go +++ /dev/null @@ -1,739 +0,0 @@ -package yaml - -import ( - "fmt" - "io" -) - -// The version directive data. -type yaml_version_directive_t struct { - major int8 // The major version number. - minor int8 // The minor version number. -} - -// The tag directive data. -type yaml_tag_directive_t struct { - handle []byte // The tag handle. - prefix []byte // The tag prefix. -} - -type yaml_encoding_t int - -// The stream encoding. -const ( - // Let the parser choose the encoding. - yaml_ANY_ENCODING yaml_encoding_t = iota - - yaml_UTF8_ENCODING // The default UTF-8 encoding. - yaml_UTF16LE_ENCODING // The UTF-16-LE encoding with BOM. - yaml_UTF16BE_ENCODING // The UTF-16-BE encoding with BOM. -) - -type yaml_break_t int - -// Line break types. -const ( - // Let the parser choose the break type. - yaml_ANY_BREAK yaml_break_t = iota - - yaml_CR_BREAK // Use CR for line breaks (Mac style). - yaml_LN_BREAK // Use LN for line breaks (Unix style). - yaml_CRLN_BREAK // Use CR LN for line breaks (DOS style). -) - -type yaml_error_type_t int - -// Many bad things could happen with the parser and emitter. -const ( - // No error is produced. - yaml_NO_ERROR yaml_error_type_t = iota - - yaml_MEMORY_ERROR // Cannot allocate or reallocate a block of memory. - yaml_READER_ERROR // Cannot read or decode the input stream. - yaml_SCANNER_ERROR // Cannot scan the input stream. - yaml_PARSER_ERROR // Cannot parse the input stream. - yaml_COMPOSER_ERROR // Cannot compose a YAML document. - yaml_WRITER_ERROR // Cannot write to the output stream. - yaml_EMITTER_ERROR // Cannot emit a YAML stream. -) - -// The pointer position. -type yaml_mark_t struct { - index int // The position index. - line int // The position line. - column int // The position column. -} - -// Node Styles - -type yaml_style_t int8 - -type yaml_scalar_style_t yaml_style_t - -// Scalar styles. -const ( - // Let the emitter choose the style. - yaml_ANY_SCALAR_STYLE yaml_scalar_style_t = iota - - yaml_PLAIN_SCALAR_STYLE // The plain scalar style. - yaml_SINGLE_QUOTED_SCALAR_STYLE // The single-quoted scalar style. - yaml_DOUBLE_QUOTED_SCALAR_STYLE // The double-quoted scalar style. - yaml_LITERAL_SCALAR_STYLE // The literal scalar style. - yaml_FOLDED_SCALAR_STYLE // The folded scalar style. -) - -type yaml_sequence_style_t yaml_style_t - -// Sequence styles. -const ( - // Let the emitter choose the style. - yaml_ANY_SEQUENCE_STYLE yaml_sequence_style_t = iota - - yaml_BLOCK_SEQUENCE_STYLE // The block sequence style. - yaml_FLOW_SEQUENCE_STYLE // The flow sequence style. -) - -type yaml_mapping_style_t yaml_style_t - -// Mapping styles. -const ( - // Let the emitter choose the style. - yaml_ANY_MAPPING_STYLE yaml_mapping_style_t = iota - - yaml_BLOCK_MAPPING_STYLE // The block mapping style. - yaml_FLOW_MAPPING_STYLE // The flow mapping style. -) - -// Tokens - -type yaml_token_type_t int - -// Token types. -const ( - // An empty token. - yaml_NO_TOKEN yaml_token_type_t = iota - - yaml_STREAM_START_TOKEN // A STREAM-START token. - yaml_STREAM_END_TOKEN // A STREAM-END token. - - yaml_VERSION_DIRECTIVE_TOKEN // A VERSION-DIRECTIVE token. - yaml_TAG_DIRECTIVE_TOKEN // A TAG-DIRECTIVE token. - yaml_DOCUMENT_START_TOKEN // A DOCUMENT-START token. - yaml_DOCUMENT_END_TOKEN // A DOCUMENT-END token. - - yaml_BLOCK_SEQUENCE_START_TOKEN // A BLOCK-SEQUENCE-START token. - yaml_BLOCK_MAPPING_START_TOKEN // A BLOCK-SEQUENCE-END token. - yaml_BLOCK_END_TOKEN // A BLOCK-END token. - - yaml_FLOW_SEQUENCE_START_TOKEN // A FLOW-SEQUENCE-START token. - yaml_FLOW_SEQUENCE_END_TOKEN // A FLOW-SEQUENCE-END token. - yaml_FLOW_MAPPING_START_TOKEN // A FLOW-MAPPING-START token. - yaml_FLOW_MAPPING_END_TOKEN // A FLOW-MAPPING-END token. - - yaml_BLOCK_ENTRY_TOKEN // A BLOCK-ENTRY token. - yaml_FLOW_ENTRY_TOKEN // A FLOW-ENTRY token. - yaml_KEY_TOKEN // A KEY token. - yaml_VALUE_TOKEN // A VALUE token. - - yaml_ALIAS_TOKEN // An ALIAS token. - yaml_ANCHOR_TOKEN // An ANCHOR token. - yaml_TAG_TOKEN // A TAG token. - yaml_SCALAR_TOKEN // A SCALAR token. -) - -func (tt yaml_token_type_t) String() string { - switch tt { - case yaml_NO_TOKEN: - return "yaml_NO_TOKEN" - case yaml_STREAM_START_TOKEN: - return "yaml_STREAM_START_TOKEN" - case yaml_STREAM_END_TOKEN: - return "yaml_STREAM_END_TOKEN" - case yaml_VERSION_DIRECTIVE_TOKEN: - return "yaml_VERSION_DIRECTIVE_TOKEN" - case yaml_TAG_DIRECTIVE_TOKEN: - return "yaml_TAG_DIRECTIVE_TOKEN" - case yaml_DOCUMENT_START_TOKEN: - return "yaml_DOCUMENT_START_TOKEN" - case yaml_DOCUMENT_END_TOKEN: - return "yaml_DOCUMENT_END_TOKEN" - case yaml_BLOCK_SEQUENCE_START_TOKEN: - return "yaml_BLOCK_SEQUENCE_START_TOKEN" - case yaml_BLOCK_MAPPING_START_TOKEN: - return "yaml_BLOCK_MAPPING_START_TOKEN" - case yaml_BLOCK_END_TOKEN: - return "yaml_BLOCK_END_TOKEN" - case yaml_FLOW_SEQUENCE_START_TOKEN: - return "yaml_FLOW_SEQUENCE_START_TOKEN" - case yaml_FLOW_SEQUENCE_END_TOKEN: - return "yaml_FLOW_SEQUENCE_END_TOKEN" - case yaml_FLOW_MAPPING_START_TOKEN: - return "yaml_FLOW_MAPPING_START_TOKEN" - case yaml_FLOW_MAPPING_END_TOKEN: - return "yaml_FLOW_MAPPING_END_TOKEN" - case yaml_BLOCK_ENTRY_TOKEN: - return "yaml_BLOCK_ENTRY_TOKEN" - case yaml_FLOW_ENTRY_TOKEN: - return "yaml_FLOW_ENTRY_TOKEN" - case yaml_KEY_TOKEN: - return "yaml_KEY_TOKEN" - case yaml_VALUE_TOKEN: - return "yaml_VALUE_TOKEN" - case yaml_ALIAS_TOKEN: - return "yaml_ALIAS_TOKEN" - case yaml_ANCHOR_TOKEN: - return "yaml_ANCHOR_TOKEN" - case yaml_TAG_TOKEN: - return "yaml_TAG_TOKEN" - case yaml_SCALAR_TOKEN: - return "yaml_SCALAR_TOKEN" - } - return "" -} - -// The token structure. -type yaml_token_t struct { - // The token type. - typ yaml_token_type_t - - // The start/end of the token. - start_mark, end_mark yaml_mark_t - - // The stream encoding (for yaml_STREAM_START_TOKEN). - encoding yaml_encoding_t - - // The alias/anchor/scalar value or tag/tag directive handle - // (for yaml_ALIAS_TOKEN, yaml_ANCHOR_TOKEN, yaml_SCALAR_TOKEN, yaml_TAG_TOKEN, yaml_TAG_DIRECTIVE_TOKEN). - value []byte - - // The tag suffix (for yaml_TAG_TOKEN). - suffix []byte - - // The tag directive prefix (for yaml_TAG_DIRECTIVE_TOKEN). - prefix []byte - - // The scalar style (for yaml_SCALAR_TOKEN). - style yaml_scalar_style_t - - // The version directive major/minor (for yaml_VERSION_DIRECTIVE_TOKEN). - major, minor int8 -} - -// Events - -type yaml_event_type_t int8 - -// Event types. -const ( - // An empty event. - yaml_NO_EVENT yaml_event_type_t = iota - - yaml_STREAM_START_EVENT // A STREAM-START event. - yaml_STREAM_END_EVENT // A STREAM-END event. - yaml_DOCUMENT_START_EVENT // A DOCUMENT-START event. - yaml_DOCUMENT_END_EVENT // A DOCUMENT-END event. - yaml_ALIAS_EVENT // An ALIAS event. - yaml_SCALAR_EVENT // A SCALAR event. - yaml_SEQUENCE_START_EVENT // A SEQUENCE-START event. - yaml_SEQUENCE_END_EVENT // A SEQUENCE-END event. - yaml_MAPPING_START_EVENT // A MAPPING-START event. - yaml_MAPPING_END_EVENT // A MAPPING-END event. -) - -var eventStrings = []string{ - yaml_NO_EVENT: "none", - yaml_STREAM_START_EVENT: "stream start", - yaml_STREAM_END_EVENT: "stream end", - yaml_DOCUMENT_START_EVENT: "document start", - yaml_DOCUMENT_END_EVENT: "document end", - yaml_ALIAS_EVENT: "alias", - yaml_SCALAR_EVENT: "scalar", - yaml_SEQUENCE_START_EVENT: "sequence start", - yaml_SEQUENCE_END_EVENT: "sequence end", - yaml_MAPPING_START_EVENT: "mapping start", - yaml_MAPPING_END_EVENT: "mapping end", -} - -func (e yaml_event_type_t) String() string { - if e < 0 || int(e) >= len(eventStrings) { - return fmt.Sprintf("unknown event %d", e) - } - return eventStrings[e] -} - -// The event structure. -type yaml_event_t struct { - - // The event type. - typ yaml_event_type_t - - // The start and end of the event. - start_mark, end_mark yaml_mark_t - - // The document encoding (for yaml_STREAM_START_EVENT). - encoding yaml_encoding_t - - // The version directive (for yaml_DOCUMENT_START_EVENT). - version_directive *yaml_version_directive_t - - // The list of tag directives (for yaml_DOCUMENT_START_EVENT). - tag_directives []yaml_tag_directive_t - - // The anchor (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT, yaml_ALIAS_EVENT). - anchor []byte - - // The tag (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT). - tag []byte - - // The scalar value (for yaml_SCALAR_EVENT). - value []byte - - // Is the document start/end indicator implicit, or the tag optional? - // (for yaml_DOCUMENT_START_EVENT, yaml_DOCUMENT_END_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT, yaml_SCALAR_EVENT). - implicit bool - - // Is the tag optional for any non-plain style? (for yaml_SCALAR_EVENT). - quoted_implicit bool - - // The style (for yaml_SCALAR_EVENT, yaml_SEQUENCE_START_EVENT, yaml_MAPPING_START_EVENT). - style yaml_style_t -} - -func (e *yaml_event_t) scalar_style() yaml_scalar_style_t { return yaml_scalar_style_t(e.style) } -func (e *yaml_event_t) sequence_style() yaml_sequence_style_t { return yaml_sequence_style_t(e.style) } -func (e *yaml_event_t) mapping_style() yaml_mapping_style_t { return yaml_mapping_style_t(e.style) } - -// Nodes - -const ( - yaml_NULL_TAG = "tag:yaml.org,2002:null" // The tag !!null with the only possible value: null. - yaml_BOOL_TAG = "tag:yaml.org,2002:bool" // The tag !!bool with the values: true and false. - yaml_STR_TAG = "tag:yaml.org,2002:str" // The tag !!str for string values. - yaml_INT_TAG = "tag:yaml.org,2002:int" // The tag !!int for integer values. - yaml_FLOAT_TAG = "tag:yaml.org,2002:float" // The tag !!float for float values. - yaml_TIMESTAMP_TAG = "tag:yaml.org,2002:timestamp" // The tag !!timestamp for date and time values. - - yaml_SEQ_TAG = "tag:yaml.org,2002:seq" // The tag !!seq is used to denote sequences. - yaml_MAP_TAG = "tag:yaml.org,2002:map" // The tag !!map is used to denote mapping. - - // Not in original libyaml. - yaml_BINARY_TAG = "tag:yaml.org,2002:binary" - yaml_MERGE_TAG = "tag:yaml.org,2002:merge" - - yaml_DEFAULT_SCALAR_TAG = yaml_STR_TAG // The default scalar tag is !!str. - yaml_DEFAULT_SEQUENCE_TAG = yaml_SEQ_TAG // The default sequence tag is !!seq. - yaml_DEFAULT_MAPPING_TAG = yaml_MAP_TAG // The default mapping tag is !!map. -) - -type yaml_node_type_t int - -// Node types. -const ( - // An empty node. - yaml_NO_NODE yaml_node_type_t = iota - - yaml_SCALAR_NODE // A scalar node. - yaml_SEQUENCE_NODE // A sequence node. - yaml_MAPPING_NODE // A mapping node. -) - -// An element of a sequence node. -type yaml_node_item_t int - -// An element of a mapping node. -type yaml_node_pair_t struct { - key int // The key of the element. - value int // The value of the element. -} - -// The node structure. -type yaml_node_t struct { - typ yaml_node_type_t // The node type. - tag []byte // The node tag. - - // The node data. - - // The scalar parameters (for yaml_SCALAR_NODE). - scalar struct { - value []byte // The scalar value. - length int // The length of the scalar value. - style yaml_scalar_style_t // The scalar style. - } - - // The sequence parameters (for YAML_SEQUENCE_NODE). - sequence struct { - items_data []yaml_node_item_t // The stack of sequence items. - style yaml_sequence_style_t // The sequence style. - } - - // The mapping parameters (for yaml_MAPPING_NODE). - mapping struct { - pairs_data []yaml_node_pair_t // The stack of mapping pairs (key, value). - pairs_start *yaml_node_pair_t // The beginning of the stack. - pairs_end *yaml_node_pair_t // The end of the stack. - pairs_top *yaml_node_pair_t // The top of the stack. - style yaml_mapping_style_t // The mapping style. - } - - start_mark yaml_mark_t // The beginning of the node. - end_mark yaml_mark_t // The end of the node. - -} - -// The document structure. -type yaml_document_t struct { - - // The document nodes. - nodes []yaml_node_t - - // The version directive. - version_directive *yaml_version_directive_t - - // The list of tag directives. - tag_directives_data []yaml_tag_directive_t - tag_directives_start int // The beginning of the tag directives list. - tag_directives_end int // The end of the tag directives list. - - start_implicit int // Is the document start indicator implicit? - end_implicit int // Is the document end indicator implicit? - - // The start/end of the document. - start_mark, end_mark yaml_mark_t -} - -// The prototype of a read handler. -// -// The read handler is called when the parser needs to read more bytes from the -// source. The handler should write not more than size bytes to the buffer. -// The number of written bytes should be set to the size_read variable. -// -// [in,out] data A pointer to an application data specified by -// yaml_parser_set_input(). -// [out] buffer The buffer to write the data from the source. -// [in] size The size of the buffer. -// [out] size_read The actual number of bytes read from the source. -// -// On success, the handler should return 1. If the handler failed, -// the returned value should be 0. On EOF, the handler should set the -// size_read to 0 and return 1. -type yaml_read_handler_t func(parser *yaml_parser_t, buffer []byte) (n int, err error) - -// This structure holds information about a potential simple key. -type yaml_simple_key_t struct { - possible bool // Is a simple key possible? - required bool // Is a simple key required? - token_number int // The number of the token. - mark yaml_mark_t // The position mark. -} - -// The states of the parser. -type yaml_parser_state_t int - -const ( - yaml_PARSE_STREAM_START_STATE yaml_parser_state_t = iota - - yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE // Expect the beginning of an implicit document. - yaml_PARSE_DOCUMENT_START_STATE // Expect DOCUMENT-START. - yaml_PARSE_DOCUMENT_CONTENT_STATE // Expect the content of a document. - yaml_PARSE_DOCUMENT_END_STATE // Expect DOCUMENT-END. - yaml_PARSE_BLOCK_NODE_STATE // Expect a block node. - yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE // Expect a block node or indentless sequence. - yaml_PARSE_FLOW_NODE_STATE // Expect a flow node. - yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a block sequence. - yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE // Expect an entry of a block sequence. - yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE // Expect an entry of an indentless sequence. - yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE // Expect the first key of a block mapping. - yaml_PARSE_BLOCK_MAPPING_KEY_STATE // Expect a block mapping key. - yaml_PARSE_BLOCK_MAPPING_VALUE_STATE // Expect a block mapping value. - yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a flow sequence. - yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE // Expect an entry of a flow sequence. - yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE // Expect a key of an ordered mapping. - yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE // Expect a value of an ordered mapping. - yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE // Expect the and of an ordered mapping entry. - yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE // Expect the first key of a flow mapping. - yaml_PARSE_FLOW_MAPPING_KEY_STATE // Expect a key of a flow mapping. - yaml_PARSE_FLOW_MAPPING_VALUE_STATE // Expect a value of a flow mapping. - yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE // Expect an empty value of a flow mapping. - yaml_PARSE_END_STATE // Expect nothing. -) - -func (ps yaml_parser_state_t) String() string { - switch ps { - case yaml_PARSE_STREAM_START_STATE: - return "yaml_PARSE_STREAM_START_STATE" - case yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE: - return "yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE" - case yaml_PARSE_DOCUMENT_START_STATE: - return "yaml_PARSE_DOCUMENT_START_STATE" - case yaml_PARSE_DOCUMENT_CONTENT_STATE: - return "yaml_PARSE_DOCUMENT_CONTENT_STATE" - case yaml_PARSE_DOCUMENT_END_STATE: - return "yaml_PARSE_DOCUMENT_END_STATE" - case yaml_PARSE_BLOCK_NODE_STATE: - return "yaml_PARSE_BLOCK_NODE_STATE" - case yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: - return "yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE" - case yaml_PARSE_FLOW_NODE_STATE: - return "yaml_PARSE_FLOW_NODE_STATE" - case yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: - return "yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE" - case yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: - return "yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE" - case yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: - return "yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE" - case yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: - return "yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE" - case yaml_PARSE_BLOCK_MAPPING_KEY_STATE: - return "yaml_PARSE_BLOCK_MAPPING_KEY_STATE" - case yaml_PARSE_BLOCK_MAPPING_VALUE_STATE: - return "yaml_PARSE_BLOCK_MAPPING_VALUE_STATE" - case yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: - return "yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE" - case yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE: - return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE" - case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: - return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE" - case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: - return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE" - case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: - return "yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE" - case yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: - return "yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE" - case yaml_PARSE_FLOW_MAPPING_KEY_STATE: - return "yaml_PARSE_FLOW_MAPPING_KEY_STATE" - case yaml_PARSE_FLOW_MAPPING_VALUE_STATE: - return "yaml_PARSE_FLOW_MAPPING_VALUE_STATE" - case yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: - return "yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE" - case yaml_PARSE_END_STATE: - return "yaml_PARSE_END_STATE" - } - return "" -} - -// This structure holds aliases data. -type yaml_alias_data_t struct { - anchor []byte // The anchor. - index int // The node id. - mark yaml_mark_t // The anchor mark. -} - -// The parser structure. -// -// All members are internal. Manage the structure using the -// yaml_parser_ family of functions. -type yaml_parser_t struct { - - // Error handling - - error yaml_error_type_t // Error type. - - problem string // Error description. - - // The byte about which the problem occurred. - problem_offset int - problem_value int - problem_mark yaml_mark_t - - // The error context. - context string - context_mark yaml_mark_t - - // Reader stuff - - read_handler yaml_read_handler_t // Read handler. - - input_reader io.Reader // File input data. - input []byte // String input data. - input_pos int - - eof bool // EOF flag - - buffer []byte // The working buffer. - buffer_pos int // The current position of the buffer. - - unread int // The number of unread characters in the buffer. - - raw_buffer []byte // The raw buffer. - raw_buffer_pos int // The current position of the buffer. - - encoding yaml_encoding_t // The input encoding. - - offset int // The offset of the current position (in bytes). - mark yaml_mark_t // The mark of the current position. - - // Scanner stuff - - stream_start_produced bool // Have we started to scan the input stream? - stream_end_produced bool // Have we reached the end of the input stream? - - flow_level int // The number of unclosed '[' and '{' indicators. - - tokens []yaml_token_t // The tokens queue. - tokens_head int // The head of the tokens queue. - tokens_parsed int // The number of tokens fetched from the queue. - token_available bool // Does the tokens queue contain a token ready for dequeueing. - - indent int // The current indentation level. - indents []int // The indentation levels stack. - - simple_key_allowed bool // May a simple key occur at the current position? - simple_keys []yaml_simple_key_t // The stack of simple keys. - simple_keys_by_tok map[int]int // possible simple_key indexes indexed by token_number - - // Parser stuff - - state yaml_parser_state_t // The current parser state. - states []yaml_parser_state_t // The parser states stack. - marks []yaml_mark_t // The stack of marks. - tag_directives []yaml_tag_directive_t // The list of TAG directives. - - // Dumper stuff - - aliases []yaml_alias_data_t // The alias data. - - document *yaml_document_t // The currently parsed document. -} - -// Emitter Definitions - -// The prototype of a write handler. -// -// The write handler is called when the emitter needs to flush the accumulated -// characters to the output. The handler should write @a size bytes of the -// @a buffer to the output. -// -// @param[in,out] data A pointer to an application data specified by -// yaml_emitter_set_output(). -// @param[in] buffer The buffer with bytes to be written. -// @param[in] size The size of the buffer. -// -// @returns On success, the handler should return @c 1. If the handler failed, -// the returned value should be @c 0. -// -type yaml_write_handler_t func(emitter *yaml_emitter_t, buffer []byte) error - -type yaml_emitter_state_t int - -// The emitter states. -const ( - // Expect STREAM-START. - yaml_EMIT_STREAM_START_STATE yaml_emitter_state_t = iota - - yaml_EMIT_FIRST_DOCUMENT_START_STATE // Expect the first DOCUMENT-START or STREAM-END. - yaml_EMIT_DOCUMENT_START_STATE // Expect DOCUMENT-START or STREAM-END. - yaml_EMIT_DOCUMENT_CONTENT_STATE // Expect the content of a document. - yaml_EMIT_DOCUMENT_END_STATE // Expect DOCUMENT-END. - yaml_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE // Expect the first item of a flow sequence. - yaml_EMIT_FLOW_SEQUENCE_ITEM_STATE // Expect an item of a flow sequence. - yaml_EMIT_FLOW_MAPPING_FIRST_KEY_STATE // Expect the first key of a flow mapping. - yaml_EMIT_FLOW_MAPPING_KEY_STATE // Expect a key of a flow mapping. - yaml_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE // Expect a value for a simple key of a flow mapping. - yaml_EMIT_FLOW_MAPPING_VALUE_STATE // Expect a value of a flow mapping. - yaml_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE // Expect the first item of a block sequence. - yaml_EMIT_BLOCK_SEQUENCE_ITEM_STATE // Expect an item of a block sequence. - yaml_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE // Expect the first key of a block mapping. - yaml_EMIT_BLOCK_MAPPING_KEY_STATE // Expect the key of a block mapping. - yaml_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE // Expect a value for a simple key of a block mapping. - yaml_EMIT_BLOCK_MAPPING_VALUE_STATE // Expect a value of a block mapping. - yaml_EMIT_END_STATE // Expect nothing. -) - -// The emitter structure. -// -// All members are internal. Manage the structure using the @c yaml_emitter_ -// family of functions. -type yaml_emitter_t struct { - - // Error handling - - error yaml_error_type_t // Error type. - problem string // Error description. - - // Writer stuff - - write_handler yaml_write_handler_t // Write handler. - - output_buffer *[]byte // String output data. - output_writer io.Writer // File output data. - - buffer []byte // The working buffer. - buffer_pos int // The current position of the buffer. - - raw_buffer []byte // The raw buffer. - raw_buffer_pos int // The current position of the buffer. - - encoding yaml_encoding_t // The stream encoding. - - // Emitter stuff - - canonical bool // If the output is in the canonical style? - best_indent int // The number of indentation spaces. - best_width int // The preferred width of the output lines. - unicode bool // Allow unescaped non-ASCII characters? - line_break yaml_break_t // The preferred line break. - - state yaml_emitter_state_t // The current emitter state. - states []yaml_emitter_state_t // The stack of states. - - events []yaml_event_t // The event queue. - events_head int // The head of the event queue. - - indents []int // The stack of indentation levels. - - tag_directives []yaml_tag_directive_t // The list of tag directives. - - indent int // The current indentation level. - - flow_level int // The current flow level. - - root_context bool // Is it the document root context? - sequence_context bool // Is it a sequence context? - mapping_context bool // Is it a mapping context? - simple_key_context bool // Is it a simple mapping key context? - - line int // The current line. - column int // The current column. - whitespace bool // If the last character was a whitespace? - indention bool // If the last character was an indentation character (' ', '-', '?', ':')? - open_ended bool // If an explicit document end is required? - - // Anchor analysis. - anchor_data struct { - anchor []byte // The anchor value. - alias bool // Is it an alias? - } - - // Tag analysis. - tag_data struct { - handle []byte // The tag handle. - suffix []byte // The tag suffix. - } - - // Scalar analysis. - scalar_data struct { - value []byte // The scalar value. - multiline bool // Does the scalar contain line breaks? - flow_plain_allowed bool // Can the scalar be expessed in the flow plain style? - block_plain_allowed bool // Can the scalar be expressed in the block plain style? - single_quoted_allowed bool // Can the scalar be expressed in the single quoted style? - block_allowed bool // Can the scalar be expressed in the literal or folded styles? - style yaml_scalar_style_t // The output style. - } - - // Dumper stuff - - opened bool // If the stream was already opened? - closed bool // If the stream was already closed? - - // The information associated with the document nodes. - anchors *struct { - references int // The number of references. - anchor int // The anchor id. - serialized bool // If the node has been emitted? - } - - last_anchor_id int // The last assigned anchor id. - - document *yaml_document_t // The currently emitted document. -} diff --git a/vendor/gopkg.in/yaml.v2/yamlprivateh.go b/vendor/gopkg.in/yaml.v2/yamlprivateh.go deleted file mode 100644 index 8110ce3c..00000000 --- a/vendor/gopkg.in/yaml.v2/yamlprivateh.go +++ /dev/null @@ -1,173 +0,0 @@ -package yaml - -const ( - // The size of the input raw buffer. - input_raw_buffer_size = 512 - - // The size of the input buffer. - // It should be possible to decode the whole raw buffer. - input_buffer_size = input_raw_buffer_size * 3 - - // The size of the output buffer. - output_buffer_size = 128 - - // The size of the output raw buffer. - // It should be possible to encode the whole output buffer. - output_raw_buffer_size = (output_buffer_size*2 + 2) - - // The size of other stacks and queues. - initial_stack_size = 16 - initial_queue_size = 16 - initial_string_size = 16 -) - -// Check if the character at the specified position is an alphabetical -// character, a digit, '_', or '-'. -func is_alpha(b []byte, i int) bool { - return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'Z' || b[i] >= 'a' && b[i] <= 'z' || b[i] == '_' || b[i] == '-' -} - -// Check if the character at the specified position is a digit. -func is_digit(b []byte, i int) bool { - return b[i] >= '0' && b[i] <= '9' -} - -// Get the value of a digit. -func as_digit(b []byte, i int) int { - return int(b[i]) - '0' -} - -// Check if the character at the specified position is a hex-digit. -func is_hex(b []byte, i int) bool { - return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'F' || b[i] >= 'a' && b[i] <= 'f' -} - -// Get the value of a hex-digit. -func as_hex(b []byte, i int) int { - bi := b[i] - if bi >= 'A' && bi <= 'F' { - return int(bi) - 'A' + 10 - } - if bi >= 'a' && bi <= 'f' { - return int(bi) - 'a' + 10 - } - return int(bi) - '0' -} - -// Check if the character is ASCII. -func is_ascii(b []byte, i int) bool { - return b[i] <= 0x7F -} - -// Check if the character at the start of the buffer can be printed unescaped. -func is_printable(b []byte, i int) bool { - return ((b[i] == 0x0A) || // . == #x0A - (b[i] >= 0x20 && b[i] <= 0x7E) || // #x20 <= . <= #x7E - (b[i] == 0xC2 && b[i+1] >= 0xA0) || // #0xA0 <= . <= #xD7FF - (b[i] > 0xC2 && b[i] < 0xED) || - (b[i] == 0xED && b[i+1] < 0xA0) || - (b[i] == 0xEE) || - (b[i] == 0xEF && // #xE000 <= . <= #xFFFD - !(b[i+1] == 0xBB && b[i+2] == 0xBF) && // && . != #xFEFF - !(b[i+1] == 0xBF && (b[i+2] == 0xBE || b[i+2] == 0xBF)))) -} - -// Check if the character at the specified position is NUL. -func is_z(b []byte, i int) bool { - return b[i] == 0x00 -} - -// Check if the beginning of the buffer is a BOM. -func is_bom(b []byte, i int) bool { - return b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF -} - -// Check if the character at the specified position is space. -func is_space(b []byte, i int) bool { - return b[i] == ' ' -} - -// Check if the character at the specified position is tab. -func is_tab(b []byte, i int) bool { - return b[i] == '\t' -} - -// Check if the character at the specified position is blank (space or tab). -func is_blank(b []byte, i int) bool { - //return is_space(b, i) || is_tab(b, i) - return b[i] == ' ' || b[i] == '\t' -} - -// Check if the character at the specified position is a line break. -func is_break(b []byte, i int) bool { - return (b[i] == '\r' || // CR (#xD) - b[i] == '\n' || // LF (#xA) - b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) - b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) - b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9) // PS (#x2029) -} - -func is_crlf(b []byte, i int) bool { - return b[i] == '\r' && b[i+1] == '\n' -} - -// Check if the character is a line break or NUL. -func is_breakz(b []byte, i int) bool { - //return is_break(b, i) || is_z(b, i) - return ( // is_break: - b[i] == '\r' || // CR (#xD) - b[i] == '\n' || // LF (#xA) - b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) - b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) - b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) - // is_z: - b[i] == 0) -} - -// Check if the character is a line break, space, or NUL. -func is_spacez(b []byte, i int) bool { - //return is_space(b, i) || is_breakz(b, i) - return ( // is_space: - b[i] == ' ' || - // is_breakz: - b[i] == '\r' || // CR (#xD) - b[i] == '\n' || // LF (#xA) - b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) - b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) - b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) - b[i] == 0) -} - -// Check if the character is a line break, space, tab, or NUL. -func is_blankz(b []byte, i int) bool { - //return is_blank(b, i) || is_breakz(b, i) - return ( // is_blank: - b[i] == ' ' || b[i] == '\t' || - // is_breakz: - b[i] == '\r' || // CR (#xD) - b[i] == '\n' || // LF (#xA) - b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) - b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) - b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029) - b[i] == 0) -} - -// Determine the width of the character. -func width(b byte) int { - // Don't replace these by a switch without first - // confirming that it is being inlined. - if b&0x80 == 0x00 { - return 1 - } - if b&0xE0 == 0xC0 { - return 2 - } - if b&0xF0 == 0xE0 { - return 3 - } - if b&0xF8 == 0xF0 { - return 4 - } - return 0 - -} diff --git a/vendor/gorm.io/driver/postgres/.gitignore b/vendor/gorm.io/driver/postgres/.gitignore new file mode 100644 index 00000000..485dee64 --- /dev/null +++ b/vendor/gorm.io/driver/postgres/.gitignore @@ -0,0 +1 @@ +.idea diff --git a/vendor/gorm.io/driver/postgres/error_translator.go b/vendor/gorm.io/driver/postgres/error_translator.go new file mode 100644 index 00000000..5f813501 --- /dev/null +++ b/vendor/gorm.io/driver/postgres/error_translator.go @@ -0,0 +1,50 @@ +package postgres + +import ( + "encoding/json" + + "gorm.io/gorm" + + "github.com/jackc/pgx/v5/pgconn" +) + +// The error codes to map PostgreSQL errors to gorm errors, here is the PostgreSQL error codes reference https://www.postgresql.org/docs/current/errcodes-appendix.html. +var errCodes = map[string]error{ + "23505": gorm.ErrDuplicatedKey, + "23503": gorm.ErrForeignKeyViolated, + "42703": gorm.ErrInvalidField, + "23514": gorm.ErrCheckConstraintViolated, +} + +type ErrMessage struct { + Code string + Severity string + Message string +} + +// Translate it will translate the error to native gorm errors. +// Since currently gorm supporting both pgx and pg drivers, only checking for pgx PgError types is not enough for translating errors, so we have additional error json marshal fallback. +func (dialector Dialector) Translate(err error) error { + if pgErr, ok := err.(*pgconn.PgError); ok { + if translatedErr, found := errCodes[pgErr.Code]; found { + return translatedErr + } + return err + } + + parsedErr, marshalErr := json.Marshal(err) + if marshalErr != nil { + return err + } + + var errMsg ErrMessage + unmarshalErr := json.Unmarshal(parsedErr, &errMsg) + if unmarshalErr != nil { + return err + } + + if translatedErr, found := errCodes[errMsg.Code]; found { + return translatedErr + } + return err +} diff --git a/vendor/gorm.io/driver/postgres/migrator.go b/vendor/gorm.io/driver/postgres/migrator.go index 5eb0accd..df18db1b 100644 --- a/vendor/gorm.io/driver/postgres/migrator.go +++ b/vendor/gorm.io/driver/postgres/migrator.go @@ -6,40 +6,78 @@ import ( "regexp" "strings" - "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v5" "gorm.io/gorm" "gorm.io/gorm/clause" "gorm.io/gorm/migrator" "gorm.io/gorm/schema" ) +// See https://stackoverflow.com/questions/2204058/list-columns-with-indexes-in-postgresql +// Here are some changes: +// - use `LEFT JOIN` instead of `CROSS JOIN` +// - exclude indexes used to support constraints (they are auto-generated) const indexSql = ` -select - t.relname as table_name, - i.relname as index_name, - a.attname as column_name, - ix.indisunique as non_unique, - ix.indisprimary as primary -from - pg_class t, - pg_class i, - pg_index ix, - pg_attribute a -where - t.oid = ix.indrelid - and i.oid = ix.indexrelid - and a.attrelid = t.oid - and a.attnum = ANY(ix.indkey) - and t.relkind = 'r' - and t.relname = ? +SELECT + ct.relname AS table_name, + ci.relname AS index_name, + i.indisunique AS non_unique, + i.indisprimary AS primary, + a.attname AS column_name +FROM + pg_index i + LEFT JOIN pg_class ct ON ct.oid = i.indrelid + LEFT JOIN pg_class ci ON ci.oid = i.indexrelid + LEFT JOIN pg_attribute a ON a.attrelid = ct.oid + LEFT JOIN pg_constraint con ON con.conindid = i.indexrelid +WHERE + a.attnum = ANY(i.indkey) + AND con.oid IS NULL + AND ct.relkind = 'r' + AND ct.relname = ? ` +var typeAliasMap = map[string][]string{ + "int": {"integer"}, + "int2": {"smallint"}, + "int4": {"integer"}, + "int8": {"bigint"}, + "smallint": {"int2"}, + "integer": {"int4"}, + "bigint": {"int8"}, + "decimal": {"numeric"}, + "numeric": {"decimal"}, + "timestamptz": {"timestamp with time zone"}, + "timestamp with time zone": {"timestamptz"}, + "bool": {"boolean"}, + "boolean": {"bool"}, + "serial2": {"smallserial"}, + "serial4": {"serial"}, + "serial8": {"bigserial"}, + "varbit": {"bit varying"}, + "char": {"character"}, + "varchar": {"character varying"}, + "float4": {"real"}, + "float8": {"double precision"}, + "timetz": {"time with time zone"}, +} + type Migrator struct { migrator.Migrator } +// select querys ignore dryrun +func (m Migrator) queryRaw(sql string, values ...interface{}) (tx *gorm.DB) { + queryTx := m.DB + if m.DB.DryRun { + queryTx = m.DB.Session(&gorm.Session{}) + queryTx.DryRun = false + } + return queryTx.Raw(sql, values...) +} + func (m Migrator) CurrentDatabase() (name string) { - m.DB.Raw("SELECT CURRENT_DATABASE()").Scan(&name) + m.queryRaw("SELECT CURRENT_DATABASE()").Scan(&name) return } @@ -65,11 +103,13 @@ func (m Migrator) BuildIndexOptions(opts []schema.IndexOption, stmt *gorm.Statem func (m Migrator) HasIndex(value interface{}, name string) bool { var count int64 m.RunWithValue(value, func(stmt *gorm.Statement) error { - if idx := stmt.Schema.LookIndex(name); idx != nil { - name = idx.Name + if stmt.Schema != nil { + if idx := stmt.Schema.LookIndex(name); idx != nil { + name = idx.Name + } } currentSchema, curTable := m.CurrentSchema(stmt, stmt.Table) - return m.DB.Raw( + return m.queryRaw( "SELECT count(*) FROM pg_indexes WHERE tablename = ? AND indexname = ? AND schemaname = ?", curTable, name, currentSchema, ).Scan(&count).Error }) @@ -79,33 +119,35 @@ func (m Migrator) HasIndex(value interface{}, name string) bool { func (m Migrator) CreateIndex(value interface{}, name string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { - if idx := stmt.Schema.LookIndex(name); idx != nil { - opts := m.BuildIndexOptions(idx.Fields, stmt) - values := []interface{}{clause.Column{Name: idx.Name}, m.CurrentTable(stmt), opts} + if stmt.Schema != nil { + if idx := stmt.Schema.LookIndex(name); idx != nil { + opts := m.BuildIndexOptions(idx.Fields, stmt) + values := []interface{}{clause.Column{Name: idx.Name}, m.CurrentTable(stmt), opts} - createIndexSQL := "CREATE " - if idx.Class != "" { - createIndexSQL += idx.Class + " " - } - createIndexSQL += "INDEX " + createIndexSQL := "CREATE " + if idx.Class != "" { + createIndexSQL += idx.Class + " " + } + createIndexSQL += "INDEX " - if strings.TrimSpace(strings.ToUpper(idx.Option)) == "CONCURRENTLY" { - createIndexSQL += "CONCURRENTLY " - } + if strings.TrimSpace(strings.ToUpper(idx.Option)) == "CONCURRENTLY" { + createIndexSQL += "CONCURRENTLY " + } - createIndexSQL += "IF NOT EXISTS ? ON ?" + createIndexSQL += "IF NOT EXISTS ? ON ?" - if idx.Type != "" { - createIndexSQL += " USING " + idx.Type + "(?)" - } else { - createIndexSQL += " ?" - } + if idx.Type != "" { + createIndexSQL += " USING " + idx.Type + "(?)" + } else { + createIndexSQL += " ?" + } - if idx.Where != "" { - createIndexSQL += " WHERE " + idx.Where - } + if idx.Where != "" { + createIndexSQL += " WHERE " + idx.Where + } - return m.DB.Exec(createIndexSQL, values...).Error + return m.DB.Exec(createIndexSQL, values...).Error + } } return fmt.Errorf("failed to create index with name %v", name) @@ -123,8 +165,10 @@ func (m Migrator) RenameIndex(value interface{}, oldName, newName string) error func (m Migrator) DropIndex(value interface{}, name string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { - if idx := stmt.Schema.LookIndex(name); idx != nil { - name = idx.Name + if stmt.Schema != nil { + if idx := stmt.Schema.LookIndex(name); idx != nil { + name = idx.Name + } } return m.DB.Exec("DROP INDEX ?", clause.Column{Name: name}).Error @@ -133,7 +177,7 @@ func (m Migrator) DropIndex(value interface{}, name string) error { func (m Migrator) GetTables() (tableList []string, err error) { currentSchema, _ := m.CurrentSchema(m.DB.Statement, "") - return tableList, m.DB.Raw("SELECT table_name FROM information_schema.tables WHERE table_schema = ? AND table_type = ?", currentSchema, "BASE TABLE").Scan(&tableList).Error + return tableList, m.queryRaw("SELECT table_name FROM information_schema.tables WHERE table_schema = ? AND table_type = ?", currentSchema, "BASE TABLE").Scan(&tableList).Error } func (m Migrator) CreateTable(values ...interface{}) (err error) { @@ -142,13 +186,16 @@ func (m Migrator) CreateTable(values ...interface{}) (err error) { } for _, value := range m.ReorderModels(values, false) { if err = m.RunWithValue(value, func(stmt *gorm.Statement) error { - for _, field := range stmt.Schema.FieldsByDBName { - if field.Comment != "" { - if err := m.DB.Exec( - "COMMENT ON COLUMN ?.? IS ?", - m.CurrentTable(stmt), clause.Column{Name: field.DBName}, gorm.Expr(m.Migrator.Dialector.Explain("$1", field.Comment)), - ).Error; err != nil { - return err + if stmt.Schema != nil { + for _, fieldName := range stmt.Schema.DBNames { + field := stmt.Schema.FieldsByDBName[fieldName] + if field.Comment != "" { + if err := m.DB.Exec( + "COMMENT ON COLUMN ?.? IS ?", + m.CurrentTable(stmt), clause.Column{Name: field.DBName}, gorm.Expr(m.Migrator.Dialector.Explain("$1", field.Comment)), + ).Error; err != nil { + return err + } } } } @@ -164,7 +211,7 @@ func (m Migrator) HasTable(value interface{}) bool { var count int64 m.RunWithValue(value, func(stmt *gorm.Statement) error { currentSchema, curTable := m.CurrentSchema(stmt, stmt.Table) - return m.DB.Raw("SELECT count(*) FROM information_schema.tables WHERE table_schema = ? AND table_name = ? AND table_type = ?", currentSchema, curTable, "BASE TABLE").Scan(&count).Error + return m.queryRaw("SELECT count(*) FROM information_schema.tables WHERE table_schema = ? AND table_name = ? AND table_type = ?", currentSchema, curTable, "BASE TABLE").Scan(&count).Error }) return count > 0 } @@ -186,14 +233,18 @@ func (m Migrator) AddColumn(value interface{}, field string) error { if err := m.Migrator.AddColumn(value, field); err != nil { return err } + m.resetPreparedStmts() + return m.RunWithValue(value, func(stmt *gorm.Statement) error { - if field := stmt.Schema.LookUpField(field); field != nil { - if field.Comment != "" { - if err := m.DB.Exec( - "COMMENT ON COLUMN ?.? IS ?", - m.CurrentTable(stmt), clause.Column{Name: field.DBName}, gorm.Expr(m.Migrator.Dialector.Explain("$1", field.Comment)), - ).Error; err != nil { - return err + if stmt.Schema != nil { + if field := stmt.Schema.LookUpField(field); field != nil { + if field.Comment != "" { + if err := m.DB.Exec( + "COMMENT ON COLUMN ?.? IS ?", + m.CurrentTable(stmt), clause.Column{Name: field.DBName}, gorm.Expr(m.Migrator.Dialector.Explain("$1", field.Comment)), + ).Error; err != nil { + return err + } } } } @@ -212,7 +263,7 @@ func (m Migrator) HasColumn(value interface{}, field string) bool { } currentSchema, curTable := m.CurrentSchema(stmt, stmt.Table) - return m.DB.Raw( + return m.queryRaw( "SELECT count(*) FROM INFORMATION_SCHEMA.columns WHERE table_schema = ? AND table_name = ? AND column_name = ?", currentSchema, curTable, name, ).Scan(&count).Error @@ -237,11 +288,10 @@ func (m Migrator) MigrateColumn(value interface{}, field *schema.Field, columnTy checkSQL += "WHERE objsubid = (SELECT ordinal_position FROM information_schema.columns WHERE table_schema = ? AND table_name = ? AND column_name = ?) " checkSQL += "AND objoid = (SELECT oid FROM pg_catalog.pg_class WHERE relname = ? AND relnamespace = " checkSQL += "(SELECT oid FROM pg_catalog.pg_namespace WHERE nspname = ?))" - m.DB.Raw(checkSQL, values...).Scan(&description) - comment := field.Comment - if comment != "" { - comment = comment[1 : len(comment)-1] - } + m.queryRaw(checkSQL, values...).Scan(&description) + + comment := strings.Trim(field.Comment, "'") + comment = strings.Trim(comment, `"`) if field.Comment != "" && comment != description { if err := m.DB.Exec( "COMMENT ON COLUMN ?.? IS ?", @@ -256,100 +306,137 @@ func (m Migrator) MigrateColumn(value interface{}, field *schema.Field, columnTy // AlterColumn alter value's `field` column' type based on schema definition func (m Migrator) AlterColumn(value interface{}, field string) error { - return m.RunWithValue(value, func(stmt *gorm.Statement) error { - if field := stmt.Schema.LookUpField(field); field != nil { - var ( - columnTypes, _ = m.DB.Migrator().ColumnTypes(value) - fieldColumnType *migrator.ColumnType - ) - for _, columnType := range columnTypes { - if columnType.Name() == field.DBName { - fieldColumnType, _ = columnType.(*migrator.ColumnType) + err := m.RunWithValue(value, func(stmt *gorm.Statement) error { + if stmt.Schema != nil { + if field := stmt.Schema.LookUpField(field); field != nil { + var ( + columnTypes, _ = m.DB.Migrator().ColumnTypes(value) + fieldColumnType *migrator.ColumnType + ) + for _, columnType := range columnTypes { + if columnType.Name() == field.DBName { + fieldColumnType, _ = columnType.(*migrator.ColumnType) + } } - } - fileType := clause.Expr{SQL: m.DataTypeOf(field)} - if fieldColumnType.DatabaseTypeName() != fileType.SQL { - filedColumnAutoIncrement, _ := fieldColumnType.AutoIncrement() - if field.AutoIncrement && filedColumnAutoIncrement { // update - serialDatabaseType, _ := getSerialDatabaseType(fileType.SQL) - if t, _ := fieldColumnType.ColumnType(); t != serialDatabaseType { - if err := m.UpdateSequence(m.DB, stmt, field, serialDatabaseType); err != nil { - return err + fileType := clause.Expr{SQL: m.DataTypeOf(field)} + // check for typeName and SQL name + isSameType := true + if !strings.EqualFold(fieldColumnType.DatabaseTypeName(), fileType.SQL) { + isSameType = false + // if different, also check for aliases + aliases := m.GetTypeAliases(fieldColumnType.DatabaseTypeName()) + for _, alias := range aliases { + if strings.HasPrefix(fileType.SQL, alias) { + isSameType = true + break } } - } else if field.AutoIncrement && !filedColumnAutoIncrement { // create - serialDatabaseType, _ := getSerialDatabaseType(fileType.SQL) - if err := m.CreateSequence(m.DB, stmt, field, serialDatabaseType); err != nil { - return err - } - } else if !field.AutoIncrement && filedColumnAutoIncrement { // delete - if err := m.DeleteSequence(m.DB, stmt, field, fileType); err != nil { - return err - } - } else { - if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? TYPE ?", m.CurrentTable(stmt), clause.Column{Name: field.DBName}, fileType).Error; err != nil { - return err - } } - } - if null, _ := fieldColumnType.Nullable(); null == field.NotNull { - if field.NotNull { - if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? SET NOT NULL", m.CurrentTable(stmt), clause.Column{Name: field.DBName}).Error; err != nil { - return err - } - } else { - if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? DROP NOT NULL", m.CurrentTable(stmt), clause.Column{Name: field.DBName}).Error; err != nil { - return err + // not same, migrate + if !isSameType { + filedColumnAutoIncrement, _ := fieldColumnType.AutoIncrement() + if field.AutoIncrement && filedColumnAutoIncrement { // update + serialDatabaseType, _ := getSerialDatabaseType(fileType.SQL) + if t, _ := fieldColumnType.ColumnType(); t != serialDatabaseType { + if err := m.UpdateSequence(m.DB, stmt, field, serialDatabaseType); err != nil { + return err + } + } + } else if field.AutoIncrement && !filedColumnAutoIncrement { // create + serialDatabaseType, _ := getSerialDatabaseType(fileType.SQL) + if err := m.CreateSequence(m.DB, stmt, field, serialDatabaseType); err != nil { + return err + } + } else if !field.AutoIncrement && filedColumnAutoIncrement { // delete + if err := m.DeleteSequence(m.DB, stmt, field, fileType); err != nil { + return err + } + } else { + if err := m.modifyColumn(stmt, field, fileType, fieldColumnType); err != nil { + return err + } } } - } - - if uniq, _ := fieldColumnType.Unique(); uniq != field.Unique { - idxName := clause.Column{Name: m.DB.Config.NamingStrategy.IndexName(stmt.Table, field.DBName)} - if err := m.DB.Exec("ALTER TABLE ? ADD CONSTRAINT ? UNIQUE(?)", m.CurrentTable(stmt), idxName, clause.Column{Name: field.DBName}).Error; err != nil { - return err - } - } - if v, _ := fieldColumnType.DefaultValue(); v != field.DefaultValue { - if field.HasDefaultValue && (field.DefaultValueInterface != nil || field.DefaultValue != "") { - if field.DefaultValueInterface != nil { - defaultStmt := &gorm.Statement{Vars: []interface{}{field.DefaultValueInterface}} - m.Dialector.BindVarTo(defaultStmt, defaultStmt, field.DefaultValueInterface) - if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? SET DEFAULT ?", m.CurrentTable(stmt), clause.Column{Name: field.DBName}, clause.Expr{SQL: m.Dialector.Explain(defaultStmt.SQL.String(), field.DefaultValueInterface)}).Error; err != nil { - return err - } - } else if field.DefaultValue != "(-)" { - if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? SET DEFAULT ?", m.CurrentTable(stmt), clause.Column{Name: field.DBName}, clause.Expr{SQL: field.DefaultValue}).Error; err != nil { + if null, _ := fieldColumnType.Nullable(); null == field.NotNull { + if field.NotNull { + if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? SET NOT NULL", m.CurrentTable(stmt), clause.Column{Name: field.DBName}).Error; err != nil { return err } } else { - if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? DROP DEFAULT", m.CurrentTable(stmt), clause.Column{Name: field.DBName}, clause.Expr{SQL: field.DefaultValue}).Error; err != nil { + if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? DROP NOT NULL", m.CurrentTable(stmt), clause.Column{Name: field.DBName}).Error; err != nil { return err } } } + + if v, ok := fieldColumnType.DefaultValue(); (field.DefaultValueInterface == nil && ok) || v != field.DefaultValue { + if field.HasDefaultValue && (field.DefaultValueInterface != nil || field.DefaultValue != "") { + if field.DefaultValueInterface != nil { + defaultStmt := &gorm.Statement{Vars: []interface{}{field.DefaultValueInterface}} + m.Dialector.BindVarTo(defaultStmt, defaultStmt, field.DefaultValueInterface) + if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? SET DEFAULT ?", m.CurrentTable(stmt), clause.Column{Name: field.DBName}, clause.Expr{SQL: m.Dialector.Explain(defaultStmt.SQL.String(), field.DefaultValueInterface)}).Error; err != nil { + return err + } + } else if field.DefaultValue != "(-)" { + if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? SET DEFAULT ?", m.CurrentTable(stmt), clause.Column{Name: field.DBName}, clause.Expr{SQL: field.DefaultValue}).Error; err != nil { + return err + } + } else { + if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? DROP DEFAULT", m.CurrentTable(stmt), clause.Column{Name: field.DBName}, clause.Expr{SQL: field.DefaultValue}).Error; err != nil { + return err + } + } + } + } + return nil } - return nil } return fmt.Errorf("failed to look up field with name: %s", field) }) + + if err != nil { + return err + } + m.resetPreparedStmts() + return nil +} + +func (m Migrator) modifyColumn(stmt *gorm.Statement, field *schema.Field, targetType clause.Expr, existingColumn *migrator.ColumnType) error { + alterSQL := "ALTER TABLE ? ALTER COLUMN ? TYPE ? USING ?::?" + isUncastableDefaultValue := false + + if targetType.SQL == "boolean" { + switch existingColumn.DatabaseTypeName() { + case "int2", "int8", "numeric": + alterSQL = "ALTER TABLE ? ALTER COLUMN ? TYPE ? USING ?::int::?" + } + isUncastableDefaultValue = true + } + + if dv, _ := existingColumn.DefaultValue(); dv != "" && isUncastableDefaultValue { + if err := m.DB.Exec("ALTER TABLE ? ALTER COLUMN ? DROP DEFAULT", m.CurrentTable(stmt), clause.Column{Name: field.DBName}).Error; err != nil { + return err + } + } + if err := m.DB.Exec(alterSQL, m.CurrentTable(stmt), clause.Column{Name: field.DBName}, targetType, clause.Column{Name: field.DBName}, targetType).Error; err != nil { + return err + } + return nil } func (m Migrator) HasConstraint(value interface{}, name string) bool { var count int64 m.RunWithValue(value, func(stmt *gorm.Statement) error { - constraint, chk, table := m.GuessConstraintAndTable(stmt, name) - currentSchema, curTable := m.CurrentSchema(stmt, table) + constraint, table := m.GuessConstraintInterfaceAndTable(stmt, name) if constraint != nil { - name = constraint.Name - } else if chk != nil { - name = chk.Name + name = constraint.GetName() } + currentSchema, curTable := m.CurrentSchema(stmt, table) - return m.DB.Raw( + return m.queryRaw( "SELECT count(*) FROM INFORMATION_SCHEMA.table_constraints WHERE table_schema = ? AND table_name = ? AND constraint_name = ?", currentSchema, curTable, name, ).Scan(&count).Error @@ -364,8 +451,8 @@ func (m Migrator) ColumnTypes(value interface{}) (columnTypes []gorm.ColumnType, var ( currentDatabase = m.DB.Migrator().CurrentDatabase() currentSchema, table = m.CurrentSchema(stmt, stmt.Table) - columns, err = m.DB.Raw( - "SELECT c.column_name, c.is_nullable = 'YES', c.udt_name, c.character_maximum_length, c.numeric_precision, c.numeric_precision_radix, c.numeric_scale, c.datetime_precision, 8 * typlen, c.column_default, pd.description FROM information_schema.columns AS c JOIN pg_type AS pgt ON c.udt_name = pgt.typname LEFT JOIN pg_catalog.pg_description as pd ON pd.objsubid = c.ordinal_position AND pd.objoid = (SELECT oid FROM pg_catalog.pg_class WHERE relname = c.table_name AND relnamespace = (SELECT oid FROM pg_catalog.pg_namespace WHERE nspname = c.table_schema)) where table_catalog = ? AND table_schema = ? AND table_name = ?", + columns, err = m.queryRaw( + "SELECT c.column_name, c.is_nullable = 'YES', c.udt_name, c.character_maximum_length, c.numeric_precision, c.numeric_precision_radix, c.numeric_scale, c.datetime_precision, 8 * typlen, c.column_default, pd.description, c.identity_increment FROM information_schema.columns AS c JOIN pg_type AS pgt ON c.udt_name = pgt.typname LEFT JOIN pg_catalog.pg_description as pd ON pd.objsubid = c.ordinal_position AND pd.objoid = (SELECT oid FROM pg_catalog.pg_class WHERE relname = c.table_name AND relnamespace = (SELECT oid FROM pg_catalog.pg_namespace WHERE nspname = c.table_schema)) where table_catalog = ? AND table_schema = ? AND table_name = ?", currentDatabase, currentSchema, table).Rows() ) @@ -382,11 +469,12 @@ func (m Migrator) ColumnTypes(value interface{}) (columnTypes []gorm.ColumnType, datetimePrecision sql.NullInt64 radixValue sql.NullInt64 typeLenValue sql.NullInt64 + identityIncrement sql.NullString ) err = columns.Scan( &column.NameValue, &column.NullableValue, &column.DataTypeValue, &column.LengthValue, &column.DecimalSizeValue, - &radixValue, &column.ScaleValue, &datetimePrecision, &typeLenValue, &column.DefaultValueValue, &column.CommentValue, + &radixValue, &column.ScaleValue, &datetimePrecision, &typeLenValue, &column.DefaultValueValue, &column.CommentValue, &identityIncrement, ) if err != nil { return err @@ -396,13 +484,14 @@ func (m Migrator) ColumnTypes(value interface{}) (columnTypes []gorm.ColumnType, column.LengthValue = typeLenValue } - if strings.HasPrefix(column.DefaultValueValue.String, "nextval('") && strings.HasSuffix(column.DefaultValueValue.String, "seq'::regclass)") { + if (strings.HasPrefix(column.DefaultValueValue.String, "nextval('") && + strings.HasSuffix(column.DefaultValueValue.String, "seq'::regclass)")) || (identityIncrement.Valid && identityIncrement.String != "") { column.AutoIncrementValue = sql.NullBool{Bool: true, Valid: true} column.DefaultValueValue = sql.NullString{} } if column.DefaultValueValue.Valid { - column.DefaultValueValue.String = regexp.MustCompile(`'(.*)'::[\w]+$`).ReplaceAllString(column.DefaultValueValue.String, "$1") + column.DefaultValueValue.String = parseDefaultValueValue(column.DefaultValueValue.String) } if datetimePrecision.Valid { @@ -436,14 +525,25 @@ func (m Migrator) ColumnTypes(value interface{}) (columnTypes []gorm.ColumnType, // check primary, unique field { - columnTypeRows, err := m.DB.Raw("SELECT c.column_name, constraint_type FROM information_schema.table_constraints tc JOIN information_schema.constraint_column_usage AS ccu USING (constraint_schema, constraint_name) JOIN information_schema.columns AS c ON c.table_schema = tc.constraint_schema AND tc.table_name = c.table_name AND ccu.column_name = c.column_name WHERE constraint_type IN ('PRIMARY KEY', 'UNIQUE') AND c.table_catalog = ? AND c.table_schema = ? AND c.table_name = ?", currentDatabase, currentSchema, table).Rows() + columnTypeRows, err := m.queryRaw("SELECT constraint_name FROM information_schema.table_constraints tc JOIN information_schema.constraint_column_usage AS ccu USING (constraint_schema, constraint_catalog, table_name, constraint_name) JOIN information_schema.columns AS c ON c.table_schema = tc.constraint_schema AND tc.table_name = c.table_name AND ccu.column_name = c.column_name WHERE constraint_type IN ('PRIMARY KEY', 'UNIQUE') AND c.table_catalog = ? AND c.table_schema = ? AND c.table_name = ? AND constraint_type = ?", currentDatabase, currentSchema, table, "UNIQUE").Rows() if err != nil { return err } + uniqueContraints := map[string]int{} + for columnTypeRows.Next() { + var constraintName string + columnTypeRows.Scan(&constraintName) + uniqueContraints[constraintName]++ + } + columnTypeRows.Close() + columnTypeRows, err = m.queryRaw("SELECT c.column_name, constraint_name, constraint_type FROM information_schema.table_constraints tc JOIN information_schema.constraint_column_usage AS ccu USING (constraint_schema, constraint_catalog, table_name, constraint_name) JOIN information_schema.columns AS c ON c.table_schema = tc.constraint_schema AND tc.table_name = c.table_name AND ccu.column_name = c.column_name WHERE constraint_type IN ('PRIMARY KEY', 'UNIQUE') AND c.table_catalog = ? AND c.table_schema = ? AND c.table_name = ?", currentDatabase, currentSchema, table).Rows() + if err != nil { + return err + } for columnTypeRows.Next() { - var name, columnType string - columnTypeRows.Scan(&name, &columnType) + var name, constraintName, columnType string + columnTypeRows.Scan(&name, &constraintName, &columnType) for _, c := range columnTypes { mc := c.(*migrator.ColumnType) if mc.NameValue.String == name { @@ -451,7 +551,9 @@ func (m Migrator) ColumnTypes(value interface{}) (columnTypes []gorm.ColumnType, case "PRIMARY KEY": mc.PrimaryKeyValue = sql.NullBool{Bool: true, Valid: true} case "UNIQUE": - mc.UniqueValue = sql.NullBool{Bool: true, Valid: true} + if uniqueContraints[constraintName] == 1 { + mc.UniqueValue = sql.NullBool{Bool: true, Valid: true} + } } break } @@ -462,8 +564,8 @@ func (m Migrator) ColumnTypes(value interface{}) (columnTypes []gorm.ColumnType, // check column type { - dataTypeRows, err := m.DB.Raw(`SELECT a.attname as column_name, format_type(a.atttypid, a.atttypmod) AS data_type - FROM pg_attribute a JOIN pg_class b ON a.attrelid = b.relfilenode AND relnamespace = (SELECT oid FROM pg_catalog.pg_namespace WHERE nspname = ?) + dataTypeRows, err := m.queryRaw(`SELECT a.attname as column_name, format_type(a.atttypid, a.atttypmod) AS data_type + FROM pg_attribute a JOIN pg_class b ON a.attrelid = b.oid AND relnamespace = (SELECT oid FROM pg_catalog.pg_namespace WHERE nspname = ?) WHERE a.attnum > 0 -- hide internal columns AND NOT a.attisdropped -- hide deleted columns AND b.relname = ?`, currentSchema, table).Rows() @@ -503,9 +605,10 @@ func (m Migrator) GetRows(currentSchema interface{}, table interface{}) (*sql.Ro } return m.DB.Session(&gorm.Session{}).Table(name).Limit(1).Scopes(func(d *gorm.DB) *gorm.DB { + dialector, _ := m.Dialector.(Dialector) // use simple protocol - if !m.DB.PrepareStmt { - d.Statement.Vars = append(d.Statement.Vars, pgx.QuerySimpleProtocol(true)) + if !m.DB.PrepareStmt && (dialector.Config != nil && (dialector.Config.DriverName == "" || dialector.Config.DriverName == "pgx")) { + d.Statement.Vars = append([]interface{}{pgx.QueryExecModeSimpleProtocol}, d.Statement.Vars...) } return d }).Rows() @@ -619,7 +722,7 @@ func (m Migrator) GetIndexes(value interface{}) ([]gorm.Index, error) { err := m.RunWithValue(value, func(stmt *gorm.Statement) error { result := make([]*Index, 0) - scanErr := m.DB.Raw(indexSql, stmt.Table).Scan(&result).Error + scanErr := m.queryRaw(indexSql, stmt.Table).Scan(&result).Error if scanErr != nil { return scanErr } @@ -663,3 +766,39 @@ func groupByIndexName(indexList []*Index) map[string][]*Index { } return columnIndexMap } + +func (m Migrator) GetTypeAliases(databaseTypeName string) []string { + return typeAliasMap[databaseTypeName] +} + +// should reset prepared stmts when table changed +func (m Migrator) resetPreparedStmts() { + if m.DB.PrepareStmt { + if pdb, ok := m.DB.ConnPool.(*gorm.PreparedStmtDB); ok { + pdb.Reset() + } + } +} + +func (m Migrator) DropColumn(dst interface{}, field string) error { + if err := m.Migrator.DropColumn(dst, field); err != nil { + return err + } + + m.resetPreparedStmts() + return nil +} + +func (m Migrator) RenameColumn(dst interface{}, oldName, field string) error { + if err := m.Migrator.RenameColumn(dst, oldName, field); err != nil { + return err + } + + m.resetPreparedStmts() + return nil +} + +func parseDefaultValueValue(defaultValue string) string { + value := regexp.MustCompile(`^(.*?)(?:::.*)?$`).ReplaceAllString(defaultValue, "$1") + return strings.Trim(value, "'") +} diff --git a/vendor/gorm.io/driver/postgres/postgres.go b/vendor/gorm.io/driver/postgres/postgres.go index c0fdea06..e865b0f8 100644 --- a/vendor/gorm.io/driver/postgres/postgres.go +++ b/vendor/gorm.io/driver/postgres/postgres.go @@ -5,9 +5,10 @@ import ( "fmt" "regexp" "strconv" + "strings" - "github.com/jackc/pgx/v4" - "github.com/jackc/pgx/v4/stdlib" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/stdlib" "gorm.io/gorm" "gorm.io/gorm/callbacks" "gorm.io/gorm/clause" @@ -23,11 +24,17 @@ type Dialector struct { type Config struct { DriverName string DSN string + WithoutQuotingCheck bool PreferSimpleProtocol bool WithoutReturning bool Conn gorm.ConnPool } +var ( + timeZoneMatcher = regexp.MustCompile("(time_zone|TimeZone)=(.*?)($|&| )") + defaultIdentifierLength = 63 //maximum identifier length for postgres +) + func Open(dsn string) gorm.Dialector { return &Dialector{&Config{DSN: dsn}} } @@ -40,15 +47,42 @@ func (dialector Dialector) Name() string { return "postgres" } -var timeZoneMatcher = regexp.MustCompile("(time_zone|TimeZone)=(.*?)($|&| )") +func (dialector Dialector) Apply(config *gorm.Config) error { + if config.NamingStrategy == nil { + config.NamingStrategy = schema.NamingStrategy{ + IdentifierMaxLength: defaultIdentifierLength, + } + return nil + } + + switch v := config.NamingStrategy.(type) { + case *schema.NamingStrategy: + if v.IdentifierMaxLength <= 0 { + v.IdentifierMaxLength = defaultIdentifierLength + } + case schema.NamingStrategy: + if v.IdentifierMaxLength <= 0 { + v.IdentifierMaxLength = defaultIdentifierLength + config.NamingStrategy = v + } + } + + return nil +} func (dialector Dialector) Initialize(db *gorm.DB) (err error) { + callbackConfig := &callbacks.Config{ + CreateClauses: []string{"INSERT", "VALUES", "ON CONFLICT"}, + UpdateClauses: []string{"UPDATE", "SET", "FROM", "WHERE"}, + DeleteClauses: []string{"DELETE", "FROM", "WHERE"}, + } // register callbacks - callbacks.RegisterDefaultCallbacks(db, &callbacks.Config{ - CreateClauses: []string{"INSERT", "VALUES", "ON CONFLICT", "RETURNING"}, - UpdateClauses: []string{"UPDATE", "SET", "WHERE", "RETURNING"}, - DeleteClauses: []string{"DELETE", "FROM", "WHERE", "RETURNING"}, - }) + if !dialector.WithoutReturning { + callbackConfig.CreateClauses = append(callbackConfig.CreateClauses, "RETURNING") + callbackConfig.UpdateClauses = append(callbackConfig.UpdateClauses, "RETURNING") + callbackConfig.DeleteClauses = append(callbackConfig.DeleteClauses, "RETURNING") + } + callbacks.RegisterDefaultCallbacks(db, callbackConfig) if dialector.Conn != nil { db.ConnPool = dialector.Conn @@ -62,7 +96,7 @@ func (dialector Dialector) Initialize(db *gorm.DB) (err error) { return } if dialector.Config.PreferSimpleProtocol { - config.PreferSimpleProtocol = true + config.DefaultQueryExecMode = pgx.QueryExecModeSimpleProtocol } result := timeZoneMatcher.FindStringSubmatch(dialector.Config.DSN) if len(result) > 2 { @@ -87,10 +121,23 @@ func (dialector Dialector) DefaultValueOf(field *schema.Field) clause.Expression func (dialector Dialector) BindVarTo(writer clause.Writer, stmt *gorm.Statement, v interface{}) { writer.WriteByte('$') - writer.WriteString(strconv.Itoa(len(stmt.Vars))) + index := 0 + varLen := len(stmt.Vars) + if varLen > 0 { + switch stmt.Vars[0].(type) { + case pgx.QueryExecMode: + index++ + } + } + writer.WriteString(strconv.Itoa(varLen - index)) } func (dialector Dialector) QuoteTo(writer clause.Writer, str string) { + if dialector.WithoutQuotingCheck { + writer.WriteString(str) + return + } + var ( underQuoted, selfQuoted bool continuousBacktick int8 @@ -192,17 +239,38 @@ func (dialector Dialector) DataTypeOf(field *schema.Field) string { return "timestamptz" case schema.Bytes: return "bytea" + default: + return dialector.getSchemaCustomType(field) + } +} + +func (dialector Dialector) getSchemaCustomType(field *schema.Field) string { + sqlType := string(field.DataType) + + if field.AutoIncrement && !strings.Contains(strings.ToLower(sqlType), "serial") { + size := field.Size + if field.GORMDataType == schema.Uint { + size++ + } + switch { + case size <= 16: + sqlType = "smallserial" + case size <= 32: + sqlType = "serial" + default: + sqlType = "bigserial" + } } - return string(field.DataType) + return sqlType } -func (dialectopr Dialector) SavePoint(tx *gorm.DB, name string) error { +func (dialector Dialector) SavePoint(tx *gorm.DB, name string) error { tx.Exec("SAVEPOINT " + name) return nil } -func (dialectopr Dialector) RollbackTo(tx *gorm.DB, name string) error { +func (dialector Dialector) RollbackTo(tx *gorm.DB, name string) error { tx.Exec("ROLLBACK TO SAVEPOINT " + name) return nil } diff --git a/vendor/gorm.io/gorm/.gitignore b/vendor/gorm.io/gorm/.gitignore index 45505cc9..72733326 100644 --- a/vendor/gorm.io/gorm/.gitignore +++ b/vendor/gorm.io/gorm/.gitignore @@ -3,4 +3,5 @@ documents coverage.txt _book .idea -vendor \ No newline at end of file +vendor +.vscode diff --git a/vendor/gorm.io/gorm/.golangci.yml b/vendor/gorm.io/gorm/.golangci.yml index 16903ed6..b88bf672 100644 --- a/vendor/gorm.io/gorm/.golangci.yml +++ b/vendor/gorm.io/gorm/.golangci.yml @@ -9,3 +9,12 @@ linters: - prealloc - unconvert - unparam + - goimports + - whitespace + +linters-settings: + whitespace: + multi-func: true + goimports: + local-prefixes: gorm.io/gorm + diff --git a/vendor/gorm.io/gorm/License b/vendor/gorm.io/gorm/LICENSE similarity index 100% rename from vendor/gorm.io/gorm/License rename to vendor/gorm.io/gorm/LICENSE diff --git a/vendor/gorm.io/gorm/README.md b/vendor/gorm.io/gorm/README.md index 312a3a59..745dad60 100644 --- a/vendor/gorm.io/gorm/README.md +++ b/vendor/gorm.io/gorm/README.md @@ -4,9 +4,6 @@ The fantastic ORM library for Golang, aims to be developer friendly. [![go report card](https://goreportcard.com/badge/github.com/go-gorm/gorm "go report card")](https://goreportcard.com/report/github.com/go-gorm/gorm) [![test status](https://github.com/go-gorm/gorm/workflows/tests/badge.svg?branch=master "test status")](https://github.com/go-gorm/gorm/actions) -[![Join the chat at https://gitter.im/jinzhu/gorm](https://img.shields.io/gitter/room/jinzhu/gorm.svg)](https://gitter.im/jinzhu/gorm?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[![Open Collective Backer](https://opencollective.com/gorm/tiers/backer/badge.svg?label=backer&color=brightgreen "Open Collective Backer")](https://opencollective.com/gorm) -[![Open Collective Sponsor](https://opencollective.com/gorm/tiers/sponsor/badge.svg?label=sponsor&color=brightgreen "Open Collective Sponsor")](https://opencollective.com/gorm) [![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT) [![Go.Dev reference](https://img.shields.io/badge/go.dev-reference-blue?logo=go&logoColor=white)](https://pkg.go.dev/gorm.io/gorm?tab=doc) @@ -30,14 +27,18 @@ The fantastic ORM library for Golang, aims to be developer friendly. ## Getting Started * GORM Guides [https://gorm.io](https://gorm.io) -* GORM Gen [gorm/gen](https://github.com/go-gorm/gen#gormgen) +* Gen Guides [https://gorm.io/gen/index.html](https://gorm.io/gen/index.html) ## Contributing [You can help to deliver a better GORM, check out things you can do](https://gorm.io/contribute.html) +## Contributors + +[Thank you](https://github.com/go-gorm/gorm/graphs/contributors) for contributing to the GORM framework! + ## License © Jinzhu, 2013~time.Now -Released under the [MIT License](https://github.com/go-gorm/gorm/blob/master/License) +Released under the [MIT License](https://github.com/go-gorm/gorm/blob/master/LICENSE) diff --git a/vendor/gorm.io/gorm/association.go b/vendor/gorm.io/gorm/association.go index 35e10ddd..e3f51d17 100644 --- a/vendor/gorm.io/gorm/association.go +++ b/vendor/gorm.io/gorm/association.go @@ -14,6 +14,7 @@ import ( type Association struct { DB *DB Relationship *schema.Relationship + Unscope bool Error error } @@ -40,6 +41,15 @@ func (db *DB) Association(column string) *Association { return association } +func (association *Association) Unscoped() *Association { + return &Association{ + DB: association.DB, + Relationship: association.Relationship, + Error: association.Error, + Unscope: true, + } +} + func (association *Association) Find(out interface{}, conds ...interface{}) error { if association.Error == nil { association.Error = association.buildCondition().Find(out, conds...).Error @@ -64,14 +74,30 @@ func (association *Association) Append(values ...interface{}) error { func (association *Association) Replace(values ...interface{}) error { if association.Error == nil { + reflectValue := association.DB.Statement.ReflectValue + rel := association.Relationship + + var oldBelongsToExpr clause.Expression + // we have to record the old BelongsTo value + if association.Unscope && rel.Type == schema.BelongsTo { + var foreignFields []*schema.Field + for _, ref := range rel.References { + if !ref.OwnPrimaryKey { + foreignFields = append(foreignFields, ref.ForeignKey) + } + } + if _, fvs := schema.GetIdentityFieldValuesMap(association.DB.Statement.Context, reflectValue, foreignFields); len(fvs) > 0 { + column, values := schema.ToQueryValues(rel.FieldSchema.Table, rel.FieldSchema.PrimaryFieldDBNames, fvs) + oldBelongsToExpr = clause.IN{Column: column, Values: values} + } + } + // save associations if association.saveAssociation( /*clear*/ true, values...); association.Error != nil { return association.Error } // set old associations's foreign key to null - reflectValue := association.DB.Statement.ReflectValue - rel := association.Relationship switch rel.Type { case schema.BelongsTo: if len(values) == 0 { @@ -91,6 +117,9 @@ func (association *Association) Replace(values ...interface{}) error { association.Error = association.DB.UpdateColumns(updateMap).Error } + if association.Unscope && oldBelongsToExpr != nil { + association.Error = association.DB.Model(nil).Where(oldBelongsToExpr).Delete(reflect.New(rel.FieldSchema.ModelType).Interface()).Error + } case schema.HasOne, schema.HasMany: var ( primaryFields []*schema.Field @@ -119,7 +148,11 @@ func (association *Association) Replace(values ...interface{}) error { if _, pvs := schema.GetIdentityFieldValuesMap(association.DB.Statement.Context, reflectValue, primaryFields); len(pvs) > 0 { column, values := schema.ToQueryValues(rel.FieldSchema.Table, foreignKeys, pvs) - association.Error = tx.Where(clause.IN{Column: column, Values: values}).UpdateColumns(updateMap).Error + if association.Unscope { + association.Error = tx.Where(clause.IN{Column: column, Values: values}).Delete(modelValue).Error + } else { + association.Error = tx.Where(clause.IN{Column: column, Values: values}).UpdateColumns(updateMap).Error + } } case schema.Many2Many: var ( @@ -184,7 +217,8 @@ func (association *Association) Delete(values ...interface{}) error { switch rel.Type { case schema.BelongsTo: - tx := association.DB.Model(reflect.New(rel.Schema.ModelType).Interface()) + associationDB := association.DB.Session(&Session{}) + tx := associationDB.Model(reflect.New(rel.Schema.ModelType).Interface()) _, pvs := schema.GetIdentityFieldValuesMap(association.DB.Statement.Context, reflectValue, rel.Schema.PrimaryFields) if pcolumn, pvalues := schema.ToQueryValues(rel.Schema.Table, rel.Schema.PrimaryFieldDBNames, pvs); len(pvalues) > 0 { @@ -198,8 +232,21 @@ func (association *Association) Delete(values ...interface{}) error { conds = append(conds, clause.IN{Column: relColumn, Values: relValues}) association.Error = tx.Clauses(conds...).UpdateColumns(updateAttrs).Error + if association.Unscope { + var foreignFields []*schema.Field + for _, ref := range rel.References { + if !ref.OwnPrimaryKey { + foreignFields = append(foreignFields, ref.ForeignKey) + } + } + if _, fvs := schema.GetIdentityFieldValuesMap(association.DB.Statement.Context, reflectValue, foreignFields); len(fvs) > 0 { + column, values := schema.ToQueryValues(rel.FieldSchema.Table, rel.FieldSchema.PrimaryFieldDBNames, fvs) + association.Error = associationDB.Model(nil).Where(clause.IN{Column: column, Values: values}).Delete(reflect.New(rel.FieldSchema.ModelType).Interface()).Error + } + } case schema.HasOne, schema.HasMany: - tx := association.DB.Model(reflect.New(rel.FieldSchema.ModelType).Interface()) + model := reflect.New(rel.FieldSchema.ModelType).Interface() + tx := association.DB.Model(model) _, pvs := schema.GetIdentityFieldValuesMap(association.DB.Statement.Context, reflectValue, primaryFields) if pcolumn, pvalues := schema.ToQueryValues(rel.FieldSchema.Table, foreignKeys, pvs); len(pvalues) > 0 { @@ -212,7 +259,11 @@ func (association *Association) Delete(values ...interface{}) error { relColumn, relValues := schema.ToQueryValues(rel.FieldSchema.Table, rel.FieldSchema.PrimaryFieldDBNames, rvs) conds = append(conds, clause.IN{Column: relColumn, Values: relValues}) - association.Error = tx.Clauses(conds...).UpdateColumns(updateAttrs).Error + if association.Unscope { + association.Error = tx.Clauses(conds...).Delete(model).Error + } else { + association.Error = tx.Clauses(conds...).UpdateColumns(updateAttrs).Error + } case schema.Many2Many: var ( primaryFields, relPrimaryFields []*schema.Field @@ -345,6 +396,10 @@ func (association *Association) saveAssociation(clear bool, values ...interface{ } } case reflect.Struct: + if !rv.CanAddr() { + association.Error = ErrInvalidValue + return + } association.Error = association.Relationship.Field.Set(association.DB.Statement.Context, source, rv.Addr().Interface()) if association.Relationship.Field.FieldType.Kind() == reflect.Struct { @@ -353,9 +408,13 @@ func (association *Association) saveAssociation(clear bool, values ...interface{ } case schema.HasMany, schema.Many2Many: elemType := association.Relationship.Field.IndirectFieldType.Elem() - fieldValue := reflect.Indirect(association.Relationship.Field.ReflectValueOf(association.DB.Statement.Context, source)) + oldFieldValue := reflect.Indirect(association.Relationship.Field.ReflectValueOf(association.DB.Statement.Context, source)) + var fieldValue reflect.Value if clear { - fieldValue = reflect.New(association.Relationship.Field.IndirectFieldType).Elem() + fieldValue = reflect.MakeSlice(oldFieldValue.Type(), 0, oldFieldValue.Cap()) + } else { + fieldValue = reflect.MakeSlice(oldFieldValue.Type(), oldFieldValue.Len(), oldFieldValue.Cap()) + reflect.Copy(fieldValue, oldFieldValue) } appendToFieldValues := func(ev reflect.Value) { @@ -378,6 +437,10 @@ func (association *Association) saveAssociation(clear bool, values ...interface{ appendToFieldValues(reflect.Indirect(rv.Index(i)).Addr()) } case reflect.Struct: + if !rv.CanAddr() { + association.Error = ErrInvalidValue + return + } appendToFieldValues(rv.Addr()) } @@ -455,6 +518,9 @@ func (association *Association) saveAssociation(clear bool, values ...interface{ for i := 0; i < reflectValue.Len(); i++ { appendToRelations(reflectValue.Index(i), reflect.Indirect(reflect.ValueOf(values[i])), clear) + if association.Error != nil { + return + } // TODO support save slice data, sql with case? association.Error = associationDB.Updates(reflectValue.Index(i).Addr().Interface()).Error @@ -476,6 +542,9 @@ func (association *Association) saveAssociation(clear bool, values ...interface{ for idx, value := range values { rv := reflect.Indirect(reflect.ValueOf(value)) appendToRelations(reflectValue, rv, clear && idx == 0) + if association.Error != nil { + return + } } if len(values) > 0 { @@ -507,7 +576,9 @@ func (association *Association) buildCondition() *DB { joinStmt.AddClause(queryClause) } joinStmt.Build("WHERE") - tx.Clauses(clause.Expr{SQL: strings.Replace(joinStmt.SQL.String(), "WHERE ", "", 1), Vars: joinStmt.Vars}) + if len(joinStmt.SQL.String()) > 0 { + tx.Clauses(clause.Expr{SQL: strings.Replace(joinStmt.SQL.String(), "WHERE ", "", 1), Vars: joinStmt.Vars}) + } } tx = tx.Session(&Session{QueryFields: true}).Clauses(clause.From{Joins: []clause.Join{{ diff --git a/vendor/gorm.io/gorm/callbacks.go b/vendor/gorm.io/gorm/callbacks.go index c060ea70..50b5b0e9 100644 --- a/vendor/gorm.io/gorm/callbacks.go +++ b/vendor/gorm.io/gorm/callbacks.go @@ -75,11 +75,7 @@ func (cs *callbacks) Raw() *processor { func (p *processor) Execute(db *DB) *DB { // call scopes for len(db.Statement.scopes) > 0 { - scopes := db.Statement.scopes - db.Statement.scopes = nil - for _, scope := range scopes { - db = scope(db) - } + db = db.executeScopes() } var ( @@ -93,6 +89,10 @@ func (p *processor) Execute(db *DB) *DB { resetBuildClauses = true } + if optimizer, ok := db.Statement.Dest.(StatementModifier); ok { + optimizer.ModifyStatement(stmt) + } + // assign model values if stmt.Model == nil { stmt.Model = stmt.Dest @@ -132,7 +132,11 @@ func (p *processor) Execute(db *DB) *DB { if stmt.SQL.Len() > 0 { db.Logger.Trace(stmt.Context, curTime, func() (string, int64) { - return db.Dialector.Explain(stmt.SQL.String(), stmt.Vars...), db.RowsAffected + sql, vars := stmt.SQL.String(), stmt.Vars + if filter, ok := db.Logger.(ParamsFilter); ok { + sql, vars = filter.ParamsFilter(stmt.Context, stmt.SQL.String(), stmt.Vars...) + } + return db.Dialector.Explain(sql, vars...), db.RowsAffected }, db.Error) } @@ -183,10 +187,18 @@ func (p *processor) Replace(name string, fn func(*DB)) error { func (p *processor) compile() (err error) { var callbacks []*callback + removedMap := map[string]bool{} for _, callback := range p.callbacks { if callback.match == nil || callback.match(p.db) { callbacks = append(callbacks, callback) } + if callback.remove { + removedMap[callback.name] = true + } + } + + if len(removedMap) > 0 { + callbacks = removeCallbacks(callbacks, removedMap) } p.callbacks = callbacks @@ -245,7 +257,7 @@ func sortCallbacks(cs []*callback) (fns []func(*DB), err error) { names, sorted []string sortCallback func(*callback) error ) - sort.Slice(cs, func(i, j int) bool { + sort.SliceStable(cs, func(i, j int) bool { if cs[j].before == "*" && cs[i].before != "*" { return true } @@ -335,3 +347,14 @@ func sortCallbacks(cs []*callback) (fns []func(*DB), err error) { return } + +func removeCallbacks(cs []*callback, nameMap map[string]bool) []*callback { + callbacks := make([]*callback, 0, len(cs)) + for _, callback := range cs { + if nameMap[callback.name] { + continue + } + callbacks = append(callbacks, callback) + } + return callbacks +} diff --git a/vendor/gorm.io/gorm/callbacks/associations.go b/vendor/gorm.io/gorm/callbacks/associations.go index 4a50e6c2..f3cd464a 100644 --- a/vendor/gorm.io/gorm/callbacks/associations.go +++ b/vendor/gorm.io/gorm/callbacks/associations.go @@ -51,25 +51,40 @@ func SaveBeforeAssociations(create bool) func(db *gorm.DB) { } elems := reflect.MakeSlice(reflect.SliceOf(fieldType), 0, 10) + distinctElems := reflect.MakeSlice(reflect.SliceOf(fieldType), 0, 10) + identityMap := map[string]bool{} for i := 0; i < rValLen; i++ { obj := db.Statement.ReflectValue.Index(i) if reflect.Indirect(obj).Kind() != reflect.Struct { break } - if _, zero := rel.Field.ValueOf(db.Statement.Context, obj); !zero { // check belongs to relation value rv := rel.Field.ReflectValueOf(db.Statement.Context, obj) // relation reflect value + if !isPtr { + rv = rv.Addr() + } objs = append(objs, obj) - if isPtr { - elems = reflect.Append(elems, rv) - } else { - elems = reflect.Append(elems, rv.Addr()) + elems = reflect.Append(elems, rv) + + relPrimaryValues := make([]interface{}, 0, len(rel.FieldSchema.PrimaryFields)) + for _, pf := range rel.FieldSchema.PrimaryFields { + if pfv, ok := pf.ValueOf(db.Statement.Context, rv); !ok { + relPrimaryValues = append(relPrimaryValues, pfv) + } + } + cacheKey := utils.ToStringKey(relPrimaryValues...) + if len(relPrimaryValues) != len(rel.FieldSchema.PrimaryFields) || !identityMap[cacheKey] { + if cacheKey != "" { // has primary fields + identityMap[cacheKey] = true + } + + distinctElems = reflect.Append(distinctElems, rv) } } } if elems.Len() > 0 { - if saveAssociations(db, rel, elems, selectColumns, restricted, nil) == nil { + if saveAssociations(db, rel, distinctElems, selectColumns, restricted, nil) == nil { for i := 0; i < elems.Len(); i++ { setupReferences(objs[i], elems.Index(i)) } @@ -206,9 +221,12 @@ func SaveAfterAssociations(create bool) func(db *gorm.DB) { } } - cacheKey := utils.ToStringKey(relPrimaryValues) + cacheKey := utils.ToStringKey(relPrimaryValues...) if len(relPrimaryValues) != len(rel.FieldSchema.PrimaryFields) || !identityMap[cacheKey] { - identityMap[cacheKey] = true + if cacheKey != "" { // has primary fields + identityMap[cacheKey] = true + } + if isPtr { elems = reflect.Append(elems, elem) } else { @@ -292,9 +310,12 @@ func SaveAfterAssociations(create bool) func(db *gorm.DB) { } } - cacheKey := utils.ToStringKey(relPrimaryValues) + cacheKey := utils.ToStringKey(relPrimaryValues...) if len(relPrimaryValues) != len(rel.FieldSchema.PrimaryFields) || !identityMap[cacheKey] { - identityMap[cacheKey] = true + if cacheKey != "" { // has primary fields + identityMap[cacheKey] = true + } + distinctElems = reflect.Append(distinctElems, elem) } diff --git a/vendor/gorm.io/gorm/callbacks/callmethod.go b/vendor/gorm.io/gorm/callbacks/callmethod.go index bcaa03f3..fb900037 100644 --- a/vendor/gorm.io/gorm/callbacks/callmethod.go +++ b/vendor/gorm.io/gorm/callbacks/callmethod.go @@ -13,11 +13,20 @@ func callMethod(db *gorm.DB, fc func(value interface{}, tx *gorm.DB) bool) { case reflect.Slice, reflect.Array: db.Statement.CurDestIndex = 0 for i := 0; i < db.Statement.ReflectValue.Len(); i++ { - fc(reflect.Indirect(db.Statement.ReflectValue.Index(i)).Addr().Interface(), tx) + if value := reflect.Indirect(db.Statement.ReflectValue.Index(i)); value.CanAddr() { + fc(value.Addr().Interface(), tx) + } else { + db.AddError(gorm.ErrInvalidValue) + return + } db.Statement.CurDestIndex++ } case reflect.Struct: - fc(db.Statement.ReflectValue.Addr().Interface(), tx) + if db.Statement.ReflectValue.CanAddr() { + fc(db.Statement.ReflectValue.Addr().Interface(), tx) + } else { + db.AddError(gorm.ErrInvalidValue) + } } } } diff --git a/vendor/gorm.io/gorm/callbacks/create.go b/vendor/gorm.io/gorm/callbacks/create.go index 0fe1dc93..8b7846b6 100644 --- a/vendor/gorm.io/gorm/callbacks/create.go +++ b/vendor/gorm.io/gorm/callbacks/create.go @@ -3,6 +3,7 @@ package callbacks import ( "fmt" "reflect" + "strings" "gorm.io/gorm" "gorm.io/gorm/clause" @@ -102,13 +103,62 @@ func Create(config *Config) func(db *gorm.DB) { } db.RowsAffected, _ = result.RowsAffected() - if db.RowsAffected != 0 && db.Statement.Schema != nil && - db.Statement.Schema.PrioritizedPrimaryField != nil && - db.Statement.Schema.PrioritizedPrimaryField.HasDefaultValue { - insertID, err := result.LastInsertId() - insertOk := err == nil && insertID > 0 - if !insertOk { + if db.RowsAffected == 0 { + return + } + + var ( + pkField *schema.Field + pkFieldName = "@id" + ) + + insertID, err := result.LastInsertId() + insertOk := err == nil && insertID > 0 + + if !insertOk { + if !supportReturning { db.AddError(err) + } + return + } + + if db.Statement.Schema != nil { + if db.Statement.Schema.PrioritizedPrimaryField == nil || !db.Statement.Schema.PrioritizedPrimaryField.HasDefaultValue { + return + } + pkField = db.Statement.Schema.PrioritizedPrimaryField + pkFieldName = db.Statement.Schema.PrioritizedPrimaryField.DBName + } + + // append @id column with value for auto-increment primary key + // the @id value is correct, when: 1. without setting auto-increment primary key, 2. database AutoIncrementIncrement = 1 + switch values := db.Statement.Dest.(type) { + case map[string]interface{}: + values[pkFieldName] = insertID + case *map[string]interface{}: + (*values)[pkFieldName] = insertID + case []map[string]interface{}, *[]map[string]interface{}: + mapValues, ok := values.([]map[string]interface{}) + if !ok { + if v, ok := values.(*[]map[string]interface{}); ok { + if *v != nil { + mapValues = *v + } + } + } + + if config.LastInsertIDReversed { + insertID -= int64(len(mapValues)-1) * schema.DefaultAutoIncrementIncrement + } + + for _, mapValue := range mapValues { + if mapValue != nil { + mapValue[pkFieldName] = insertID + } + insertID += schema.DefaultAutoIncrementIncrement + } + default: + if pkField == nil { return } @@ -121,10 +171,10 @@ func Create(config *Config) func(db *gorm.DB) { break } - _, isZero := db.Statement.Schema.PrioritizedPrimaryField.ValueOf(db.Statement.Context, rv) + _, isZero := pkField.ValueOf(db.Statement.Context, rv) if isZero { - db.AddError(db.Statement.Schema.PrioritizedPrimaryField.Set(db.Statement.Context, rv, insertID)) - insertID -= db.Statement.Schema.PrioritizedPrimaryField.AutoIncrementIncrement + db.AddError(pkField.Set(db.Statement.Context, rv, insertID)) + insertID -= pkField.AutoIncrementIncrement } } } else { @@ -134,16 +184,16 @@ func Create(config *Config) func(db *gorm.DB) { break } - if _, isZero := db.Statement.Schema.PrioritizedPrimaryField.ValueOf(db.Statement.Context, rv); isZero { - db.AddError(db.Statement.Schema.PrioritizedPrimaryField.Set(db.Statement.Context, rv, insertID)) - insertID += db.Statement.Schema.PrioritizedPrimaryField.AutoIncrementIncrement + if _, isZero := pkField.ValueOf(db.Statement.Context, rv); isZero { + db.AddError(pkField.Set(db.Statement.Context, rv, insertID)) + insertID += pkField.AutoIncrementIncrement } } } case reflect.Struct: - _, isZero := db.Statement.Schema.PrioritizedPrimaryField.ValueOf(db.Statement.Context, db.Statement.ReflectValue) + _, isZero := pkField.ValueOf(db.Statement.Context, db.Statement.ReflectValue) if isZero { - db.AddError(db.Statement.Schema.PrioritizedPrimaryField.Set(db.Statement.Context, db.Statement.ReflectValue, insertID)) + db.AddError(pkField.Set(db.Statement.Context, db.Statement.ReflectValue, insertID)) } } } @@ -252,13 +302,15 @@ func ConvertToCreateValues(stmt *gorm.Statement) (values clause.Values) { } } - for field, vs := range defaultValueFieldsHavingValue { - values.Columns = append(values.Columns, clause.Column{Name: field.DBName}) - for idx := range values.Values { - if vs[idx] == nil { - values.Values[idx] = append(values.Values[idx], stmt.Dialector.DefaultValueOf(field)) - } else { - values.Values[idx] = append(values.Values[idx], vs[idx]) + for _, field := range stmt.Schema.FieldsWithDefaultDBValue { + if vs, ok := defaultValueFieldsHavingValue[field]; ok { + values.Columns = append(values.Columns, clause.Column{Name: field.DBName}) + for idx := range values.Values { + if vs[idx] == nil { + values.Values[idx] = append(values.Values[idx], stmt.DefaultValueOf(field)) + } else { + values.Values[idx] = append(values.Values[idx], vs[idx]) + } } } } @@ -281,7 +333,7 @@ func ConvertToCreateValues(stmt *gorm.Statement) (values clause.Values) { } for _, field := range stmt.Schema.FieldsWithDefaultDBValue { - if v, ok := selectColumns[field.DBName]; (ok && v) || (!ok && !restricted) { + if v, ok := selectColumns[field.DBName]; (ok && v) || (!ok && !restricted) && field.DefaultValueInterface == nil { if rvOfvalue, isZero := field.ValueOf(stmt.Context, stmt.ReflectValue); !isZero { values.Columns = append(values.Columns, clause.Column{Name: field.DBName}) values.Values[0] = append(values.Values[0], rvOfvalue) @@ -302,14 +354,15 @@ func ConvertToCreateValues(stmt *gorm.Statement) (values clause.Values) { for _, column := range values.Columns { if field := stmt.Schema.LookUpField(column.Name); field != nil { if v, ok := selectColumns[field.DBName]; (ok && v) || (!ok && !restricted) { - if !field.PrimaryKey && (!field.HasDefaultValue || field.DefaultValueInterface != nil) && field.AutoCreateTime == 0 { + if !field.PrimaryKey && (!field.HasDefaultValue || field.DefaultValueInterface != nil || + strings.EqualFold(field.DefaultValue, "NULL")) && field.AutoCreateTime == 0 { if field.AutoUpdateTime > 0 { assignment := clause.Assignment{Column: clause.Column{Name: field.DBName}, Value: curTime} switch field.AutoUpdateTime { case schema.UnixNanosecond: assignment.Value = curTime.UnixNano() case schema.UnixMillisecond: - assignment.Value = curTime.UnixNano() / 1e6 + assignment.Value = curTime.UnixMilli() case schema.UnixSecond: assignment.Value = curTime.Unix() } diff --git a/vendor/gorm.io/gorm/callbacks/preload.go b/vendor/gorm.io/gorm/callbacks/preload.go index ea2570ba..fd8214bb 100644 --- a/vendor/gorm.io/gorm/callbacks/preload.go +++ b/vendor/gorm.io/gorm/callbacks/preload.go @@ -3,6 +3,8 @@ package callbacks import ( "fmt" "reflect" + "sort" + "strings" "gorm.io/gorm" "gorm.io/gorm/clause" @@ -10,6 +12,176 @@ import ( "gorm.io/gorm/utils" ) +// parsePreloadMap extracts nested preloads. e.g. +// +// // schema has a "k0" relation and a "k7.k8" embedded relation +// parsePreloadMap(schema, map[string][]interface{}{ +// clause.Associations: {"arg1"}, +// "k1": {"arg2"}, +// "k2.k3": {"arg3"}, +// "k4.k5.k6": {"arg4"}, +// }) +// // preloadMap is +// map[string]map[string][]interface{}{ +// "k0": {}, +// "k7": { +// "k8": {}, +// }, +// "k1": {}, +// "k2": { +// "k3": {"arg3"}, +// }, +// "k4": { +// "k5.k6": {"arg4"}, +// }, +// } +func parsePreloadMap(s *schema.Schema, preloads map[string][]interface{}) map[string]map[string][]interface{} { + preloadMap := map[string]map[string][]interface{}{} + setPreloadMap := func(name, value string, args []interface{}) { + if _, ok := preloadMap[name]; !ok { + preloadMap[name] = map[string][]interface{}{} + } + if value != "" { + preloadMap[name][value] = args + } + } + + for name, args := range preloads { + preloadFields := strings.Split(name, ".") + value := strings.TrimPrefix(strings.TrimPrefix(name, preloadFields[0]), ".") + if preloadFields[0] == clause.Associations { + for _, relation := range s.Relationships.Relations { + if relation.Schema == s { + setPreloadMap(relation.Name, value, args) + } + } + + for embedded, embeddedRelations := range s.Relationships.EmbeddedRelations { + for _, value := range embeddedValues(embeddedRelations) { + setPreloadMap(embedded, value, args) + } + } + } else { + setPreloadMap(preloadFields[0], value, args) + } + } + return preloadMap +} + +func embeddedValues(embeddedRelations *schema.Relationships) []string { + if embeddedRelations == nil { + return nil + } + names := make([]string, 0, len(embeddedRelations.Relations)+len(embeddedRelations.EmbeddedRelations)) + for _, relation := range embeddedRelations.Relations { + // skip first struct name + names = append(names, strings.Join(relation.Field.EmbeddedBindNames[1:], ".")) + } + for _, relations := range embeddedRelations.EmbeddedRelations { + names = append(names, embeddedValues(relations)...) + } + return names +} + +// preloadEntryPoint enters layer by layer. It will call real preload if it finds the right entry point. +// If the current relationship is embedded or joined, current query will be ignored. +// +//nolint:cyclop +func preloadEntryPoint(db *gorm.DB, joins []string, relationships *schema.Relationships, preloads map[string][]interface{}, associationsConds []interface{}) error { + preloadMap := parsePreloadMap(db.Statement.Schema, preloads) + + // avoid random traversal of the map + preloadNames := make([]string, 0, len(preloadMap)) + for key := range preloadMap { + preloadNames = append(preloadNames, key) + } + sort.Strings(preloadNames) + + isJoined := func(name string) (joined bool, nestedJoins []string) { + for _, join := range joins { + if _, ok := relationships.Relations[join]; ok && name == join { + joined = true + continue + } + joinNames := strings.SplitN(join, ".", 2) + if len(joinNames) == 2 { + if _, ok := relationships.Relations[joinNames[0]]; ok && name == joinNames[0] { + joined = true + nestedJoins = append(nestedJoins, joinNames[1]) + } + } + } + return joined, nestedJoins + } + + for _, name := range preloadNames { + if relations := relationships.EmbeddedRelations[name]; relations != nil { + if err := preloadEntryPoint(db, joins, relations, preloadMap[name], associationsConds); err != nil { + return err + } + } else if rel := relationships.Relations[name]; rel != nil { + if joined, nestedJoins := isJoined(name); joined { + switch rv := db.Statement.ReflectValue; rv.Kind() { + case reflect.Slice, reflect.Array: + if rv.Len() > 0 { + reflectValue := rel.FieldSchema.MakeSlice().Elem() + for i := 0; i < rv.Len(); i++ { + frv := rel.Field.ReflectValueOf(db.Statement.Context, rv.Index(i)) + if frv.Kind() != reflect.Ptr { + reflectValue = reflect.Append(reflectValue, frv.Addr()) + } else { + if frv.IsNil() { + continue + } + reflectValue = reflect.Append(reflectValue, frv) + } + } + + tx := preloadDB(db, reflectValue, reflectValue.Interface()) + if err := preloadEntryPoint(tx, nestedJoins, &tx.Statement.Schema.Relationships, preloadMap[name], associationsConds); err != nil { + return err + } + } + case reflect.Struct, reflect.Pointer: + reflectValue := rel.Field.ReflectValueOf(db.Statement.Context, rv) + tx := preloadDB(db, reflectValue, reflectValue.Interface()) + if err := preloadEntryPoint(tx, nestedJoins, &tx.Statement.Schema.Relationships, preloadMap[name], associationsConds); err != nil { + return err + } + default: + return gorm.ErrInvalidData + } + } else { + tx := db.Table("").Session(&gorm.Session{Context: db.Statement.Context, SkipHooks: db.Statement.SkipHooks}) + tx.Statement.ReflectValue = db.Statement.ReflectValue + tx.Statement.Unscoped = db.Statement.Unscoped + if err := preload(tx, rel, append(preloads[name], associationsConds...), preloadMap[name]); err != nil { + return err + } + } + } else { + return fmt.Errorf("%s: %w for schema %s", name, gorm.ErrUnsupportedRelation, db.Statement.Schema.Name) + } + } + return nil +} + +func preloadDB(db *gorm.DB, reflectValue reflect.Value, dest interface{}) *gorm.DB { + tx := db.Session(&gorm.Session{Context: db.Statement.Context, NewDB: true, SkipHooks: db.Statement.SkipHooks, Initialized: true}) + db.Statement.Settings.Range(func(k, v interface{}) bool { + tx.Statement.Settings.Store(k, v) + return true + }) + + if err := tx.Statement.Parse(dest); err != nil { + tx.AddError(err) + return tx + } + tx.Statement.ReflectValue = reflectValue + tx.Statement.Unscoped = db.Statement.Unscoped + return tx +} + func preload(tx *gorm.DB, rel *schema.Relationship, conds []interface{}, preloads map[string][]interface{}) error { var ( reflectValue = tx.Statement.ReflectValue diff --git a/vendor/gorm.io/gorm/callbacks/query.go b/vendor/gorm.io/gorm/callbacks/query.go index 26ee8c34..bbf238a9 100644 --- a/vendor/gorm.io/gorm/callbacks/query.go +++ b/vendor/gorm.io/gorm/callbacks/query.go @@ -3,11 +3,12 @@ package callbacks import ( "fmt" "reflect" - "sort" "strings" "gorm.io/gorm" "gorm.io/gorm/clause" + "gorm.io/gorm/schema" + "gorm.io/gorm/utils" ) func Query(db *gorm.DB) { @@ -109,78 +110,141 @@ func BuildQuerySQL(db *gorm.DB) { } } + specifiedRelationsName := make(map[string]interface{}) for _, join := range db.Statement.Joins { - if db.Statement.Schema == nil { - fromClause.Joins = append(fromClause.Joins, clause.Join{ - Expression: clause.NamedExpr{SQL: join.Name, Vars: join.Conds}, - }) - } else if relation, ok := db.Statement.Schema.Relationships.Relations[join.Name]; ok { - tableAliasName := relation.Name - - for _, s := range relation.FieldSchema.DBNames { - clauseSelect.Columns = append(clauseSelect.Columns, clause.Column{ - Table: tableAliasName, - Name: s, - Alias: tableAliasName + "__" + s, - }) + if db.Statement.Schema != nil { + var isRelations bool // is relations or raw sql + var relations []*schema.Relationship + relation, ok := db.Statement.Schema.Relationships.Relations[join.Name] + if ok { + isRelations = true + relations = append(relations, relation) + } else { + // handle nested join like "Manager.Company" + nestedJoinNames := strings.Split(join.Name, ".") + if len(nestedJoinNames) > 1 { + isNestedJoin := true + gussNestedRelations := make([]*schema.Relationship, 0, len(nestedJoinNames)) + currentRelations := db.Statement.Schema.Relationships.Relations + for _, relname := range nestedJoinNames { + // incomplete match, only treated as raw sql + if relation, ok = currentRelations[relname]; ok { + gussNestedRelations = append(gussNestedRelations, relation) + currentRelations = relation.FieldSchema.Relationships.Relations + } else { + isNestedJoin = false + break + } + } + + if isNestedJoin { + isRelations = true + relations = gussNestedRelations + } + } } - exprs := make([]clause.Expression, len(relation.References)) - for idx, ref := range relation.References { - if ref.OwnPrimaryKey { - exprs[idx] = clause.Eq{ - Column: clause.Column{Table: clause.CurrentTable, Name: ref.PrimaryKey.DBName}, - Value: clause.Column{Table: tableAliasName, Name: ref.ForeignKey.DBName}, + if isRelations { + genJoinClause := func(joinType clause.JoinType, parentTableName string, relation *schema.Relationship) clause.Join { + tableAliasName := relation.Name + if parentTableName != clause.CurrentTable { + tableAliasName = utils.NestedRelationName(parentTableName, tableAliasName) } - } else { - if ref.PrimaryValue == "" { - exprs[idx] = clause.Eq{ - Column: clause.Column{Table: clause.CurrentTable, Name: ref.ForeignKey.DBName}, - Value: clause.Column{Table: tableAliasName, Name: ref.PrimaryKey.DBName}, + + columnStmt := gorm.Statement{ + Table: tableAliasName, DB: db, Schema: relation.FieldSchema, + Selects: join.Selects, Omits: join.Omits, + } + + selectColumns, restricted := columnStmt.SelectAndOmitColumns(false, false) + for _, s := range relation.FieldSchema.DBNames { + if v, ok := selectColumns[s]; (ok && v) || (!ok && !restricted) { + clauseSelect.Columns = append(clauseSelect.Columns, clause.Column{ + Table: tableAliasName, + Name: s, + Alias: utils.NestedRelationName(tableAliasName, s), + }) } - } else { - exprs[idx] = clause.Eq{ - Column: clause.Column{Table: tableAliasName, Name: ref.ForeignKey.DBName}, - Value: ref.PrimaryValue, + } + + exprs := make([]clause.Expression, len(relation.References)) + for idx, ref := range relation.References { + if ref.OwnPrimaryKey { + exprs[idx] = clause.Eq{ + Column: clause.Column{Table: parentTableName, Name: ref.PrimaryKey.DBName}, + Value: clause.Column{Table: tableAliasName, Name: ref.ForeignKey.DBName}, + } + } else { + if ref.PrimaryValue == "" { + exprs[idx] = clause.Eq{ + Column: clause.Column{Table: parentTableName, Name: ref.ForeignKey.DBName}, + Value: clause.Column{Table: tableAliasName, Name: ref.PrimaryKey.DBName}, + } + } else { + exprs[idx] = clause.Eq{ + Column: clause.Column{Table: tableAliasName, Name: ref.ForeignKey.DBName}, + Value: ref.PrimaryValue, + } + } } } - } - } - { - onStmt := gorm.Statement{Table: tableAliasName, DB: db, Clauses: map[string]clause.Clause{}} - for _, c := range relation.FieldSchema.QueryClauses { - onStmt.AddClause(c) - } + { + onStmt := gorm.Statement{Table: tableAliasName, DB: db, Clauses: map[string]clause.Clause{}} + for _, c := range relation.FieldSchema.QueryClauses { + onStmt.AddClause(c) + } - if join.On != nil { - onStmt.AddClause(join.On) - } + if join.On != nil { + onStmt.AddClause(join.On) + } - if cs, ok := onStmt.Clauses["WHERE"]; ok { - if where, ok := cs.Expression.(clause.Where); ok { - where.Build(&onStmt) - - if onSQL := onStmt.SQL.String(); onSQL != "" { - vars := onStmt.Vars - for idx, v := range vars { - bindvar := strings.Builder{} - onStmt.Vars = vars[0 : idx+1] - db.Dialector.BindVarTo(&bindvar, &onStmt, v) - onSQL = strings.Replace(onSQL, bindvar.String(), "?", 1) + if cs, ok := onStmt.Clauses["WHERE"]; ok { + if where, ok := cs.Expression.(clause.Where); ok { + where.Build(&onStmt) + + if onSQL := onStmt.SQL.String(); onSQL != "" { + vars := onStmt.Vars + for idx, v := range vars { + bindvar := strings.Builder{} + onStmt.Vars = vars[0 : idx+1] + db.Dialector.BindVarTo(&bindvar, &onStmt, v) + onSQL = strings.Replace(onSQL, bindvar.String(), "?", 1) + } + + exprs = append(exprs, clause.Expr{SQL: onSQL, Vars: vars}) + } } - - exprs = append(exprs, clause.Expr{SQL: onSQL, Vars: vars}) } } + + return clause.Join{ + Type: joinType, + Table: clause.Table{Name: relation.FieldSchema.Table, Alias: tableAliasName}, + ON: clause.Where{Exprs: exprs}, + } } - } - fromClause.Joins = append(fromClause.Joins, clause.Join{ - Type: clause.LeftJoin, - Table: clause.Table{Name: relation.FieldSchema.Table, Alias: tableAliasName}, - ON: clause.Where{Exprs: exprs}, - }) + parentTableName := clause.CurrentTable + for _, rel := range relations { + // joins table alias like "Manager, Company, Manager__Company" + nestedAlias := utils.NestedRelationName(parentTableName, rel.Name) + if _, ok := specifiedRelationsName[nestedAlias]; !ok { + fromClause.Joins = append(fromClause.Joins, genJoinClause(join.JoinType, parentTableName, rel)) + specifiedRelationsName[nestedAlias] = nil + } + + if parentTableName != clause.CurrentTable { + parentTableName = utils.NestedRelationName(parentTableName, rel.Name) + } else { + parentTableName = rel.Name + } + } + } else { + fromClause.Joins = append(fromClause.Joins, clause.Join{ + Expression: clause.NamedExpr{SQL: join.Name, Vars: join.Conds}, + }) + } } else { fromClause.Joins = append(fromClause.Joins, clause.Join{ Expression: clause.NamedExpr{SQL: join.Name, Vars: join.Conds}, @@ -189,7 +253,6 @@ func BuildQuerySQL(db *gorm.DB) { } db.Statement.AddClause(fromClause) - db.Statement.Joins = nil } else { db.Statement.AddClauseIfNotExists(clause.From{}) } @@ -207,60 +270,27 @@ func Preload(db *gorm.DB) { return } - preloadMap := map[string]map[string][]interface{}{} - for name := range db.Statement.Preloads { - preloadFields := strings.Split(name, ".") - if preloadFields[0] == clause.Associations { - for _, rel := range db.Statement.Schema.Relationships.Relations { - if rel.Schema == db.Statement.Schema { - if _, ok := preloadMap[rel.Name]; !ok { - preloadMap[rel.Name] = map[string][]interface{}{} - } - - if value := strings.TrimPrefix(strings.TrimPrefix(name, preloadFields[0]), "."); value != "" { - preloadMap[rel.Name][value] = db.Statement.Preloads[name] - } - } - } - } else { - if _, ok := preloadMap[preloadFields[0]]; !ok { - preloadMap[preloadFields[0]] = map[string][]interface{}{} - } - - if value := strings.TrimPrefix(strings.TrimPrefix(name, preloadFields[0]), "."); value != "" { - preloadMap[preloadFields[0]][value] = db.Statement.Preloads[name] - } - } - } - - preloadNames := make([]string, 0, len(preloadMap)) - for key := range preloadMap { - preloadNames = append(preloadNames, key) + joins := make([]string, 0, len(db.Statement.Joins)) + for _, join := range db.Statement.Joins { + joins = append(joins, join.Name) } - sort.Strings(preloadNames) - - preloadDB := db.Session(&gorm.Session{Context: db.Statement.Context, NewDB: true, SkipHooks: db.Statement.SkipHooks, Initialized: true}) - db.Statement.Settings.Range(func(k, v interface{}) bool { - preloadDB.Statement.Settings.Store(k, v) - return true - }) - if err := preloadDB.Statement.Parse(db.Statement.Dest); err != nil { + tx := preloadDB(db, db.Statement.ReflectValue, db.Statement.Dest) + if tx.Error != nil { return } - preloadDB.Statement.ReflectValue = db.Statement.ReflectValue - for _, name := range preloadNames { - if rel := preloadDB.Statement.Schema.Relationships.Relations[name]; rel != nil { - db.AddError(preload(preloadDB.Table("").Session(&gorm.Session{Context: db.Statement.Context, SkipHooks: db.Statement.SkipHooks}), rel, append(db.Statement.Preloads[name], db.Statement.Preloads[clause.Associations]...), preloadMap[name])) - } else { - db.AddError(fmt.Errorf("%s: %w for schema %s", name, gorm.ErrUnsupportedRelation, db.Statement.Schema.Name)) - } - } + db.AddError(preloadEntryPoint(tx, joins, &tx.Statement.Schema.Relationships, db.Statement.Preloads, db.Statement.Preloads[clause.Associations])) } } func AfterQuery(db *gorm.DB) { + // clear the joins after query because preload need it + if v, ok := db.Statement.Clauses["FROM"].Expression.(clause.From); ok { + fromClause := db.Statement.Clauses["FROM"] + fromClause.Expression = clause.From{Tables: v.Tables, Joins: utils.RTrimSlice(v.Joins, len(db.Statement.Joins))} // keep the original From Joins + db.Statement.Clauses["FROM"] = fromClause + } if db.Error == nil && db.Statement.Schema != nil && !db.Statement.SkipHooks && db.Statement.Schema.AfterFind && db.RowsAffected > 0 { callMethod(db, func(value interface{}, tx *gorm.DB) bool { if i, ok := value.(AfterFindInterface); ok { diff --git a/vendor/gorm.io/gorm/callbacks/row.go b/vendor/gorm.io/gorm/callbacks/row.go index 56be742e..beaa189e 100644 --- a/vendor/gorm.io/gorm/callbacks/row.go +++ b/vendor/gorm.io/gorm/callbacks/row.go @@ -7,7 +7,7 @@ import ( func RowQuery(db *gorm.DB) { if db.Error == nil { BuildQuerySQL(db) - if db.DryRun { + if db.DryRun || db.Error != nil { return } diff --git a/vendor/gorm.io/gorm/callbacks/update.go b/vendor/gorm.io/gorm/callbacks/update.go index 42ffe2f6..7cde7f61 100644 --- a/vendor/gorm.io/gorm/callbacks/update.go +++ b/vendor/gorm.io/gorm/callbacks/update.go @@ -70,10 +70,13 @@ func Update(config *Config) func(db *gorm.DB) { if db.Statement.SQL.Len() == 0 { db.Statement.SQL.Grow(180) db.Statement.AddClauseIfNotExists(clause.Update{}) - if set := ConvertToAssignments(db.Statement); len(set) != 0 { - db.Statement.AddClause(set) - } else if _, ok := db.Statement.Clauses["SET"]; !ok { - return + if _, ok := db.Statement.Clauses["SET"]; !ok { + if set := ConvertToAssignments(db.Statement); len(set) != 0 { + defer delete(db.Statement.Clauses, "SET") + db.Statement.AddClause(set) + } else { + return + } } db.Statement.Build(db.Statement.BuildClauses...) @@ -135,7 +138,9 @@ func ConvertToAssignments(stmt *gorm.Statement) (set clause.Set) { case reflect.Slice, reflect.Array: assignValue = func(field *schema.Field, value interface{}) { for i := 0; i < stmt.ReflectValue.Len(); i++ { - field.Set(stmt.Context, stmt.ReflectValue.Index(i), value) + if stmt.ReflectValue.CanAddr() { + field.Set(stmt.Context, stmt.ReflectValue.Index(i), value) + } } } case reflect.Struct: @@ -158,21 +163,21 @@ func ConvertToAssignments(stmt *gorm.Statement) (set clause.Set) { switch stmt.ReflectValue.Kind() { case reflect.Slice, reflect.Array: if size := stmt.ReflectValue.Len(); size > 0 { - var primaryKeyExprs []clause.Expression + var isZero bool for i := 0; i < size; i++ { - exprs := make([]clause.Expression, len(stmt.Schema.PrimaryFields)) - var notZero bool - for idx, field := range stmt.Schema.PrimaryFields { - value, isZero := field.ValueOf(stmt.Context, stmt.ReflectValue.Index(i)) - exprs[idx] = clause.Eq{Column: field.DBName, Value: value} - notZero = notZero || !isZero - } - if notZero { - primaryKeyExprs = append(primaryKeyExprs, clause.And(exprs...)) + for _, field := range stmt.Schema.PrimaryFields { + _, isZero = field.ValueOf(stmt.Context, stmt.ReflectValue.Index(i)) + if !isZero { + break + } } } - stmt.AddClause(clause.Where{Exprs: []clause.Expression{clause.And(clause.Or(primaryKeyExprs...))}}) + if !isZero { + _, primaryValues := schema.GetIdentityFieldValuesMap(stmt.Context, stmt.ReflectValue, stmt.Schema.PrimaryFields) + column, values := schema.ToQueryValues("", stmt.Schema.PrimaryFieldDBNames, primaryValues) + stmt.AddClause(clause.Where{Exprs: []clause.Expression{clause.IN{Column: column, Values: values}}}) + } } case reflect.Struct: for _, field := range stmt.Schema.PrimaryFields { @@ -229,7 +234,7 @@ func ConvertToAssignments(stmt *gorm.Statement) (set clause.Set) { if field.AutoUpdateTime == schema.UnixNanosecond { set = append(set, clause.Assignment{Column: clause.Column{Name: field.DBName}, Value: now.UnixNano()}) } else if field.AutoUpdateTime == schema.UnixMillisecond { - set = append(set, clause.Assignment{Column: clause.Column{Name: field.DBName}, Value: now.UnixNano() / 1e6}) + set = append(set, clause.Assignment{Column: clause.Column{Name: field.DBName}, Value: now.UnixMilli()}) } else if field.AutoUpdateTime == schema.UnixSecond { set = append(set, clause.Assignment{Column: clause.Column{Name: field.DBName}, Value: now.Unix()}) } else { @@ -241,11 +246,13 @@ func ConvertToAssignments(stmt *gorm.Statement) (set clause.Set) { } default: updatingSchema := stmt.Schema + var isDiffSchema bool if !updatingValue.CanAddr() || stmt.Dest != stmt.Model { // different schema updatingStmt := &gorm.Statement{DB: stmt.DB} if err := updatingStmt.Parse(stmt.Dest); err == nil { updatingSchema = updatingStmt.Schema + isDiffSchema = true } } @@ -261,7 +268,7 @@ func ConvertToAssignments(stmt *gorm.Statement) (set clause.Set) { if field.AutoUpdateTime == schema.UnixNanosecond { value = stmt.DB.NowFunc().UnixNano() } else if field.AutoUpdateTime == schema.UnixMillisecond { - value = stmt.DB.NowFunc().UnixNano() / 1e6 + value = stmt.DB.NowFunc().UnixMilli() } else if field.AutoUpdateTime == schema.UnixSecond { value = stmt.DB.NowFunc().Unix() } else { @@ -272,7 +279,13 @@ func ConvertToAssignments(stmt *gorm.Statement) (set clause.Set) { if (ok || !isZero) && field.Updatable { set = append(set, clause.Assignment{Column: clause.Column{Name: field.DBName}, Value: value}) - assignValue(field, value) + assignField := field + if isDiffSchema { + if originField := stmt.Schema.LookUpField(dbName); originField != nil { + assignField = originField + } + } + assignValue(assignField, value) } } } else { diff --git a/vendor/gorm.io/gorm/chainable_api.go b/vendor/gorm.io/gorm/chainable_api.go index 68b4d1aa..8953413d 100644 --- a/vendor/gorm.io/gorm/chainable_api.go +++ b/vendor/gorm.io/gorm/chainable_api.go @@ -10,10 +10,11 @@ import ( ) // Model specify the model you would like to run db operations -// // update all users's name to `hello` -// db.Model(&User{}).Update("name", "hello") -// // if user's primary key is non-blank, will use it as condition, then will only update the user's name to `hello` -// db.Model(&user).Update("name", "hello") +// +// // update all users's name to `hello` +// db.Model(&User{}).Update("name", "hello") +// // if user's primary key is non-blank, will use it as condition, then will only update that user's name to `hello` +// db.Model(&user).Update("name", "hello") func (db *DB) Model(value interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.Model = value @@ -21,6 +22,19 @@ func (db *DB) Model(value interface{}) (tx *DB) { } // Clauses Add clauses +// +// This supports both standard clauses (clause.OrderBy, clause.Limit, clause.Where) and more +// advanced techniques like specifying lock strength and optimizer hints. See the +// [docs] for more depth. +// +// // add a simple limit clause +// db.Clauses(clause.Limit{Limit: 1}).Find(&User{}) +// // tell the optimizer to use the `idx_user_name` index +// db.Clauses(hints.UseIndex("idx_user_name")).Find(&User{}) +// // specify the lock strength to UPDATE +// db.Clauses(clause.Locking{Strength: "UPDATE"}).Find(&users) +// +// [docs]: https://gorm.io/docs/sql_builder.html#Clauses func (db *DB) Clauses(conds ...clause.Expression) (tx *DB) { tx = db.getInstance() var whereConds []interface{} @@ -41,15 +55,22 @@ func (db *DB) Clauses(conds ...clause.Expression) (tx *DB) { return } -var tableRegexp = regexp.MustCompile(`(?i).+? AS (\w+)\s*(?:$|,)`) +var tableRegexp = regexp.MustCompile(`(?i)(?:.+? AS (\w+)\s*(?:$|,)|^\w+\s+(\w+)$)`) // Table specify the table you would like to run db operations +// +// // Get a user +// db.Table("users").Take(&result) func (db *DB) Table(name string, args ...interface{}) (tx *DB) { tx = db.getInstance() if strings.Contains(name, " ") || strings.Contains(name, "`") || len(args) > 0 { tx.Statement.TableExpr = &clause.Expr{SQL: name, Vars: args} - if results := tableRegexp.FindStringSubmatch(name); len(results) == 2 { - tx.Statement.Table = results[1] + if results := tableRegexp.FindStringSubmatch(name); len(results) == 3 { + if results[1] != "" { + tx.Statement.Table = results[1] + } else { + tx.Statement.Table = results[2] + } } } else if tables := strings.Split(name, "."); len(tables) == 2 { tx.Statement.TableExpr = &clause.Expr{SQL: tx.Statement.Quote(name)} @@ -65,6 +86,11 @@ func (db *DB) Table(name string, args ...interface{}) (tx *DB) { } // Distinct specify distinct fields that you want querying +// +// // Select distinct names of users +// db.Distinct("name").Find(&results) +// // Select distinct name/age pairs from users +// db.Distinct("name", "age").Find(&results) func (db *DB) Distinct(args ...interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.Distinct = true @@ -75,6 +101,14 @@ func (db *DB) Distinct(args ...interface{}) (tx *DB) { } // Select specify fields that you want when querying, creating, updating +// +// Use Select when you only want a subset of the fields. By default, GORM will select all fields. +// Select accepts both string arguments and arrays. +// +// // Select name and age of user using multiple arguments +// db.Select("name", "age").Find(&users) +// // Select name and age of user using an array +// db.Select([]string{"name", "age"}).Find(&users) func (db *DB) Select(query interface{}, args ...interface{}) (tx *DB) { tx = db.getInstance() @@ -151,7 +185,25 @@ func (db *DB) Omit(columns ...string) (tx *DB) { return } +// MapColumns modify the column names in the query results to facilitate align to the corresponding structural fields +func (db *DB) MapColumns(m map[string]string) (tx *DB) { + tx = db.getInstance() + tx.Statement.ColumnMapping = m + return +} + // Where add conditions +// +// See the [docs] for details on the various formats that where clauses can take. By default, where clauses chain with AND. +// +// // Find the first user with name jinzhu +// db.Where("name = ?", "jinzhu").First(&user) +// // Find the first user with name jinzhu and age 20 +// db.Where(&User{Name: "jinzhu", Age: 20}).First(&user) +// // Find the first user with name jinzhu and age not equal to 20 +// db.Where("name = ?", "jinzhu").Where("age <> ?", "20").First(&user) +// +// [docs]: https://gorm.io/docs/query.html#Conditions func (db *DB) Where(query interface{}, args ...interface{}) (tx *DB) { tx = db.getInstance() if conds := tx.Statement.BuildCondition(query, args...); len(conds) > 0 { @@ -161,6 +213,11 @@ func (db *DB) Where(query interface{}, args ...interface{}) (tx *DB) { } // Not add NOT conditions +// +// Not works similarly to where, and has the same syntax. +// +// // Find the first user with name not equal to jinzhu +// db.Not("name = ?", "jinzhu").First(&user) func (db *DB) Not(query interface{}, args ...interface{}) (tx *DB) { tx = db.getInstance() if conds := tx.Statement.BuildCondition(query, args...); len(conds) > 0 { @@ -170,6 +227,11 @@ func (db *DB) Not(query interface{}, args ...interface{}) (tx *DB) { } // Or add OR conditions +// +// Or is used to chain together queries with an OR. +// +// // Find the first user with name equal to jinzhu or john +// db.Where("name = ?", "jinzhu").Or("name = ?", "john").First(&user) func (db *DB) Or(query interface{}, args ...interface{}) (tx *DB) { tx = db.getInstance() if conds := tx.Statement.BuildCondition(query, args...); len(conds) > 0 { @@ -179,26 +241,45 @@ func (db *DB) Or(query interface{}, args ...interface{}) (tx *DB) { } // Joins specify Joins conditions -// db.Joins("Account").Find(&user) -// db.Joins("JOIN emails ON emails.user_id = users.id AND emails.email = ?", "jinzhu@example.org").Find(&user) -// db.Joins("Account", DB.Select("id").Where("user_id = users.id AND name = ?", "someName").Model(&Account{})) +// +// db.Joins("Account").Find(&user) +// db.Joins("JOIN emails ON emails.user_id = users.id AND emails.email = ?", "jinzhu@example.org").Find(&user) +// db.Joins("Account", DB.Select("id").Where("user_id = users.id AND name = ?", "someName").Model(&Account{})) func (db *DB) Joins(query string, args ...interface{}) (tx *DB) { + return joins(db, clause.LeftJoin, query, args...) +} + +// InnerJoins specify inner joins conditions +// db.InnerJoins("Account").Find(&user) +func (db *DB) InnerJoins(query string, args ...interface{}) (tx *DB) { + return joins(db, clause.InnerJoin, query, args...) +} + +func joins(db *DB, joinType clause.JoinType, query string, args ...interface{}) (tx *DB) { tx = db.getInstance() if len(args) == 1 { if db, ok := args[0].(*DB); ok { + j := join{ + Name: query, Conds: args, Selects: db.Statement.Selects, + Omits: db.Statement.Omits, JoinType: joinType, + } if where, ok := db.Statement.Clauses["WHERE"].Expression.(clause.Where); ok { - tx.Statement.Joins = append(tx.Statement.Joins, join{Name: query, Conds: args, On: &where}) - return + j.On = &where } + tx.Statement.Joins = append(tx.Statement.Joins, j) + return } } - tx.Statement.Joins = append(tx.Statement.Joins, join{Name: query, Conds: args}) + tx.Statement.Joins = append(tx.Statement.Joins, join{Name: query, Conds: args, JoinType: joinType}) return } // Group specify the group method on the find +// +// // Select the sum age of users with given names +// db.Model(&User{}).Select("name, sum(age) as total").Group("name").Find(&results) func (db *DB) Group(name string) (tx *DB) { tx = db.getInstance() @@ -210,6 +291,9 @@ func (db *DB) Group(name string) (tx *DB) { } // Having specify HAVING conditions for GROUP BY +// +// // Select the sum age of users with name jinzhu +// db.Model(&User{}).Select("name, sum(age) as total").Group("name").Having("name = ?", "jinzhu").Find(&result) func (db *DB) Having(query interface{}, args ...interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.AddClause(clause.GroupBy{ @@ -218,13 +302,20 @@ func (db *DB) Having(query interface{}, args ...interface{}) (tx *DB) { return } -// Order specify order when retrieve records from database -// db.Order("name DESC") -// db.Order(clause.OrderByColumn{Column: clause.Column{Name: "name"}, Desc: true}) +// Order specify order when retrieving records from database +// +// db.Order("name DESC") +// db.Order(clause.OrderByColumn{Column: clause.Column{Name: "name"}, Desc: true}) +// db.Order(clause.OrderBy{Columns: []clause.OrderByColumn{ +// {Column: clause.Column{Name: "name"}, Desc: true}, +// {Column: clause.Column{Name: "age"}, Desc: true}, +// }}) func (db *DB) Order(value interface{}) (tx *DB) { tx = db.getInstance() switch v := value.(type) { + case clause.OrderBy: + tx.Statement.AddClause(v) case clause.OrderByColumn: tx.Statement.AddClause(clause.OrderBy{ Columns: []clause.OrderByColumn{v}, @@ -242,13 +333,27 @@ func (db *DB) Order(value interface{}) (tx *DB) { } // Limit specify the number of records to be retrieved +// +// Limit conditions can be cancelled by using `Limit(-1)`. +// +// // retrieve 3 users +// db.Limit(3).Find(&users) +// // retrieve 3 users into users1, and all users into users2 +// db.Limit(3).Find(&users1).Limit(-1).Find(&users2) func (db *DB) Limit(limit int) (tx *DB) { tx = db.getInstance() - tx.Statement.AddClause(clause.Limit{Limit: limit}) + tx.Statement.AddClause(clause.Limit{Limit: &limit}) return } // Offset specify the number of records to skip before starting to return the records +// +// Offset conditions can be cancelled by using `Offset(-1)`. +// +// // select the third user +// db.Offset(2).First(&user) +// // select the first user by cancelling an earlier chained offset +// db.Offset(5).Offset(-1).First(&user) func (db *DB) Offset(offset int) (tx *DB) { tx = db.getInstance() tx.Statement.AddClause(clause.Limit{Offset: offset}) @@ -256,25 +361,37 @@ func (db *DB) Offset(offset int) (tx *DB) { } // Scopes pass current database connection to arguments `func(DB) DB`, which could be used to add conditions dynamically -// func AmountGreaterThan1000(db *gorm.DB) *gorm.DB { -// return db.Where("amount > ?", 1000) -// } // -// func OrderStatus(status []string) func (db *gorm.DB) *gorm.DB { -// return func (db *gorm.DB) *gorm.DB { -// return db.Scopes(AmountGreaterThan1000).Where("status in (?)", status) -// } -// } +// func AmountGreaterThan1000(db *gorm.DB) *gorm.DB { +// return db.Where("amount > ?", 1000) +// } +// +// func OrderStatus(status []string) func (db *gorm.DB) *gorm.DB { +// return func (db *gorm.DB) *gorm.DB { +// return db.Scopes(AmountGreaterThan1000).Where("status in (?)", status) +// } +// } // -// db.Scopes(AmountGreaterThan1000, OrderStatus([]string{"paid", "shipped"})).Find(&orders) +// db.Scopes(AmountGreaterThan1000, OrderStatus([]string{"paid", "shipped"})).Find(&orders) func (db *DB) Scopes(funcs ...func(*DB) *DB) (tx *DB) { tx = db.getInstance() tx.Statement.scopes = append(tx.Statement.scopes, funcs...) return tx } +func (db *DB) executeScopes() (tx *DB) { + scopes := db.Statement.scopes + db.Statement.scopes = nil + for _, scope := range scopes { + db = scope(db) + } + return db +} + // Preload preload associations with given conditions -// db.Preload("Orders", "state NOT IN (?)", "cancelled").Find(&users) +// +// // get all users, and preload all non-cancelled orders +// db.Preload("Orders", "state NOT IN (?)", "cancelled").Find(&users) func (db *DB) Preload(query string, args ...interface{}) (tx *DB) { tx = db.getInstance() if tx.Statement.Preloads == nil { @@ -284,18 +401,56 @@ func (db *DB) Preload(query string, args ...interface{}) (tx *DB) { return } +// Attrs provide attributes used in [FirstOrCreate] or [FirstOrInit] +// +// Attrs only adds attributes if the record is not found. +// +// // assign an email if the record is not found +// db.Where(User{Name: "non_existing"}).Attrs(User{Email: "fake@fake.org"}).FirstOrInit(&user) +// // user -> User{Name: "non_existing", Email: "fake@fake.org"} +// +// // assign an email if the record is not found, otherwise ignore provided email +// db.Where(User{Name: "jinzhu"}).Attrs(User{Email: "fake@fake.org"}).FirstOrInit(&user) +// // user -> User{Name: "jinzhu", Age: 20} +// +// [FirstOrCreate]: https://gorm.io/docs/advanced_query.html#FirstOrCreate +// [FirstOrInit]: https://gorm.io/docs/advanced_query.html#FirstOrInit func (db *DB) Attrs(attrs ...interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.attrs = attrs return } +// Assign provide attributes used in [FirstOrCreate] or [FirstOrInit] +// +// Assign adds attributes even if the record is found. If using FirstOrCreate, this means that +// records will be updated even if they are found. +// +// // assign an email regardless of if the record is not found +// db.Where(User{Name: "non_existing"}).Assign(User{Email: "fake@fake.org"}).FirstOrInit(&user) +// // user -> User{Name: "non_existing", Email: "fake@fake.org"} +// +// // assign email regardless of if record is found +// db.Where(User{Name: "jinzhu"}).Assign(User{Email: "fake@fake.org"}).FirstOrInit(&user) +// // user -> User{Name: "jinzhu", Age: 20, Email: "fake@fake.org"} +// +// [FirstOrCreate]: https://gorm.io/docs/advanced_query.html#FirstOrCreate +// [FirstOrInit]: https://gorm.io/docs/advanced_query.html#FirstOrInit func (db *DB) Assign(attrs ...interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.assigns = attrs return } +// Unscoped disables the global scope of soft deletion in a query. +// By default, GORM uses soft deletion, marking records as "deleted" +// by setting a timestamp on a specific field (e.g., `deleted_at`). +// Unscoped allows queries to include records marked as deleted, +// overriding the soft deletion behavior. +// Example: +// var users []User +// db.Unscoped().Find(&users) +// // Retrieves all users, including deleted ones. func (db *DB) Unscoped() (tx *DB) { tx = db.getInstance() tx.Statement.Unscoped = true diff --git a/vendor/gorm.io/gorm/clause/clause.go b/vendor/gorm.io/gorm/clause/clause.go index de19f2e3..1354fc05 100644 --- a/vendor/gorm.io/gorm/clause/clause.go +++ b/vendor/gorm.io/gorm/clause/clause.go @@ -20,6 +20,7 @@ type Builder interface { Writer WriteQuoted(field interface{}) AddVar(Writer, ...interface{}) + AddError(error) error } // Clause diff --git a/vendor/gorm.io/gorm/clause/expression.go b/vendor/gorm.io/gorm/clause/expression.go index 92ac7f22..3140846e 100644 --- a/vendor/gorm.io/gorm/clause/expression.go +++ b/vendor/gorm.io/gorm/clause/expression.go @@ -126,7 +126,7 @@ func (expr NamedExpr) Build(builder Builder) { for _, v := range []byte(expr.SQL) { if v == '@' && !inName { inName = true - name = []byte{} + name = name[:0] } else if v == ' ' || v == ',' || v == ')' || v == '"' || v == '\'' || v == '`' || v == '\r' || v == '\n' || v == ';' { if inName { if nv, ok := namedMap[string(name)]; ok { @@ -246,15 +246,19 @@ func (eq Eq) Build(builder Builder) { switch eq.Value.(type) { case []string, []int, []int32, []int64, []uint, []uint32, []uint64, []interface{}: - builder.WriteString(" IN (") rv := reflect.ValueOf(eq.Value) - for i := 0; i < rv.Len(); i++ { - if i > 0 { - builder.WriteByte(',') + if rv.Len() == 0 { + builder.WriteString(" IN (NULL)") + } else { + builder.WriteString(" IN (") + for i := 0; i < rv.Len(); i++ { + if i > 0 { + builder.WriteByte(',') + } + builder.AddVar(builder, rv.Index(i).Interface()) } - builder.AddVar(builder, rv.Index(i).Interface()) + builder.WriteByte(')') } - builder.WriteByte(')') default: if eqNil(eq.Value) { builder.WriteString(" IS NULL") diff --git a/vendor/gorm.io/gorm/clause/joins.go b/vendor/gorm.io/gorm/clause/joins.go index f3e373f2..879892be 100644 --- a/vendor/gorm.io/gorm/clause/joins.go +++ b/vendor/gorm.io/gorm/clause/joins.go @@ -9,7 +9,7 @@ const ( RightJoin JoinType = "RIGHT" ) -// Join join clause for from +// Join clause for from type Join struct { Type JoinType Table Table diff --git a/vendor/gorm.io/gorm/clause/limit.go b/vendor/gorm.io/gorm/clause/limit.go index 184f6025..3edde434 100644 --- a/vendor/gorm.io/gorm/clause/limit.go +++ b/vendor/gorm.io/gorm/clause/limit.go @@ -1,10 +1,8 @@ package clause -import "strconv" - // Limit limit clause type Limit struct { - Limit int + Limit *int Offset int } @@ -15,16 +13,16 @@ func (limit Limit) Name() string { // Build build where clause func (limit Limit) Build(builder Builder) { - if limit.Limit > 0 { + if limit.Limit != nil && *limit.Limit >= 0 { builder.WriteString("LIMIT ") - builder.WriteString(strconv.Itoa(limit.Limit)) + builder.AddVar(builder, *limit.Limit) } if limit.Offset > 0 { - if limit.Limit > 0 { + if limit.Limit != nil && *limit.Limit >= 0 { builder.WriteByte(' ') } builder.WriteString("OFFSET ") - builder.WriteString(strconv.Itoa(limit.Offset)) + builder.AddVar(builder, limit.Offset) } } @@ -33,7 +31,7 @@ func (limit Limit) MergeClause(clause *Clause) { clause.Name = "" if v, ok := clause.Expression.(Limit); ok { - if limit.Limit == 0 && v.Limit != 0 { + if (limit.Limit == nil || *limit.Limit == 0) && v.Limit != nil { limit.Limit = v.Limit } diff --git a/vendor/gorm.io/gorm/clause/locking.go b/vendor/gorm.io/gorm/clause/locking.go index 290aac92..2bc48ceb 100644 --- a/vendor/gorm.io/gorm/clause/locking.go +++ b/vendor/gorm.io/gorm/clause/locking.go @@ -1,5 +1,12 @@ package clause +const ( + LockingStrengthUpdate = "UPDATE" + LockingStrengthShare = "SHARE" + LockingOptionsSkipLocked = "SKIP LOCKED" + LockingOptionsNoWait = "NOWAIT" +) + type Locking struct { Strength string Table Table diff --git a/vendor/gorm.io/gorm/clause/on_conflict.go b/vendor/gorm.io/gorm/clause/on_conflict.go index 309c5fcd..032bf4a1 100644 --- a/vendor/gorm.io/gorm/clause/on_conflict.go +++ b/vendor/gorm.io/gorm/clause/on_conflict.go @@ -16,27 +16,27 @@ func (OnConflict) Name() string { // Build build onConflict clause func (onConflict OnConflict) Build(builder Builder) { - if len(onConflict.Columns) > 0 { - builder.WriteByte('(') - for idx, column := range onConflict.Columns { - if idx > 0 { - builder.WriteByte(',') - } - builder.WriteQuoted(column) - } - builder.WriteString(`) `) - } - - if len(onConflict.TargetWhere.Exprs) > 0 { - builder.WriteString(" WHERE ") - onConflict.TargetWhere.Build(builder) - builder.WriteByte(' ') - } - if onConflict.OnConstraint != "" { builder.WriteString("ON CONSTRAINT ") builder.WriteString(onConflict.OnConstraint) builder.WriteByte(' ') + } else { + if len(onConflict.Columns) > 0 { + builder.WriteByte('(') + for idx, column := range onConflict.Columns { + if idx > 0 { + builder.WriteByte(',') + } + builder.WriteQuoted(column) + } + builder.WriteString(`) `) + } + + if len(onConflict.TargetWhere.Exprs) > 0 { + builder.WriteString(" WHERE ") + onConflict.TargetWhere.Build(builder) + builder.WriteByte(' ') + } } if onConflict.DoNothing { diff --git a/vendor/gorm.io/gorm/clause/where.go b/vendor/gorm.io/gorm/clause/where.go index a29401cf..2c3c90f1 100644 --- a/vendor/gorm.io/gorm/clause/where.go +++ b/vendor/gorm.io/gorm/clause/where.go @@ -21,6 +21,12 @@ func (where Where) Name() string { // Build build where clause func (where Where) Build(builder Builder) { + if len(where.Exprs) == 1 { + if andCondition, ok := where.Exprs[0].(AndConditions); ok { + where.Exprs = andCondition.Exprs + } + } + // Switch position if the first query expression is a single Or condition for idx, expr := range where.Exprs { if v, ok := expr.(OrConditions); !ok || len(v.Exprs) > 1 { @@ -147,6 +153,11 @@ func Not(exprs ...Expression) Expression { if len(exprs) == 0 { return nil } + if len(exprs) == 1 { + if andCondition, ok := exprs[0].(AndConditions); ok { + exprs = andCondition.Exprs + } + } return NotConditions{Exprs: exprs} } @@ -155,19 +166,63 @@ type NotConditions struct { } func (not NotConditions) Build(builder Builder) { - if len(not.Exprs) > 1 { - builder.WriteByte('(') + anyNegationBuilder := false + for _, c := range not.Exprs { + if _, ok := c.(NegationExpressionBuilder); ok { + anyNegationBuilder = true + break + } } - for idx, c := range not.Exprs { - if idx > 0 { - builder.WriteString(AndWithSpace) + if anyNegationBuilder { + if len(not.Exprs) > 1 { + builder.WriteByte('(') } - if negationBuilder, ok := c.(NegationExpressionBuilder); ok { - negationBuilder.NegationBuild(builder) - } else { - builder.WriteString("NOT ") + for idx, c := range not.Exprs { + if idx > 0 { + builder.WriteString(AndWithSpace) + } + + if negationBuilder, ok := c.(NegationExpressionBuilder); ok { + negationBuilder.NegationBuild(builder) + } else { + builder.WriteString("NOT ") + e, wrapInParentheses := c.(Expr) + if wrapInParentheses { + sql := strings.ToUpper(e.SQL) + if wrapInParentheses = strings.Contains(sql, AndWithSpace) || strings.Contains(sql, OrWithSpace); wrapInParentheses { + builder.WriteByte('(') + } + } + + c.Build(builder) + + if wrapInParentheses { + builder.WriteByte(')') + } + } + } + + if len(not.Exprs) > 1 { + builder.WriteByte(')') + } + } else { + builder.WriteString("NOT ") + if len(not.Exprs) > 1 { + builder.WriteByte('(') + } + + for idx, c := range not.Exprs { + if idx > 0 { + switch c.(type) { + case OrConditions: + builder.WriteString(OrWithSpace) + default: + builder.WriteString(AndWithSpace) + } + } + e, wrapInParentheses := c.(Expr) if wrapInParentheses { sql := strings.ToUpper(e.SQL) @@ -182,9 +237,9 @@ func (not NotConditions) Build(builder Builder) { builder.WriteByte(')') } } - } - if len(not.Exprs) > 1 { - builder.WriteByte(')') + if len(not.Exprs) > 1 { + builder.WriteByte(')') + } } } diff --git a/vendor/gorm.io/gorm/errors.go b/vendor/gorm.io/gorm/errors.go index 49cbfe64..025f5d64 100644 --- a/vendor/gorm.io/gorm/errors.go +++ b/vendor/gorm.io/gorm/errors.go @@ -21,6 +21,10 @@ var ( ErrPrimaryKeyRequired = errors.New("primary key required") // ErrModelValueRequired model value required ErrModelValueRequired = errors.New("model value required") + // ErrModelAccessibleFieldsRequired model accessible fields required + ErrModelAccessibleFieldsRequired = errors.New("model accessible fields required") + // ErrSubQueryRequired sub query required + ErrSubQueryRequired = errors.New("sub query required") // ErrInvalidData unsupported data ErrInvalidData = errors.New("unsupported data") // ErrUnsupportedDriver unsupported driver @@ -41,4 +45,10 @@ var ( ErrInvalidValueOfLength = errors.New("invalid association values, length doesn't match") // ErrPreloadNotAllowed preload is not allowed when count is used ErrPreloadNotAllowed = errors.New("preload is not allowed when count is used") + // ErrDuplicatedKey occurs when there is a unique key constraint violation + ErrDuplicatedKey = errors.New("duplicated key not allowed") + // ErrForeignKeyViolated occurs when there is a foreign key constraint violation + ErrForeignKeyViolated = errors.New("violates foreign key constraint") + // ErrCheckConstraintViolated occurs when there is a check constraint violation + ErrCheckConstraintViolated = errors.New("violates check constraint") ) diff --git a/vendor/gorm.io/gorm/finisher_api.go b/vendor/gorm.io/gorm/finisher_api.go index 7a3f27ba..f97571ed 100644 --- a/vendor/gorm.io/gorm/finisher_api.go +++ b/vendor/gorm.io/gorm/finisher_api.go @@ -13,7 +13,7 @@ import ( "gorm.io/gorm/utils" ) -// Create insert the value into database +// Create inserts value, returning the inserted data's primary key in value's id func (db *DB) Create(value interface{}) (tx *DB) { if db.CreateBatchSize > 0 { return db.CreateInBatches(value, db.CreateBatchSize) @@ -24,7 +24,7 @@ func (db *DB) Create(value interface{}) (tx *DB) { return tx.callbacks.Create().Execute(tx) } -// CreateInBatches insert the value in batches into database +// CreateInBatches inserts value in batches of batchSize func (db *DB) CreateInBatches(value interface{}, batchSize int) (tx *DB) { reflectValue := reflect.Indirect(reflect.ValueOf(value)) @@ -33,9 +33,10 @@ func (db *DB) CreateInBatches(value interface{}, batchSize int) (tx *DB) { var rowsAffected int64 tx = db.getInstance() + // the reflection length judgment of the optimized value + reflectLen := reflectValue.Len() + callFc := func(tx *DB) error { - // the reflection length judgment of the optimized value - reflectLen := reflectValue.Len() for i := 0; i < reflectLen; i += batchSize { ends := i + batchSize if ends > reflectLen { @@ -53,7 +54,7 @@ func (db *DB) CreateInBatches(value interface{}, batchSize int) (tx *DB) { return nil } - if tx.SkipDefaultTransaction { + if tx.SkipDefaultTransaction || reflectLen <= batchSize { tx.AddError(callFc(tx.Session(&Session{}))) } else { tx.AddError(tx.Transaction(callFc)) @@ -68,7 +69,7 @@ func (db *DB) CreateInBatches(value interface{}, batchSize int) (tx *DB) { return } -// Save update value in database, if the value doesn't have primary key, will insert it +// Save updates value in database. If value doesn't contain a matching primary key, value is inserted. func (db *DB) Save(value interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.Dest = value @@ -101,20 +102,19 @@ func (db *DB) Save(value interface{}) (tx *DB) { tx.Statement.Selects = append(tx.Statement.Selects, "*") } - tx = tx.callbacks.Update().Execute(tx) + updateTx := tx.callbacks.Update().Execute(tx.Session(&Session{Initialized: true})) - if tx.Error == nil && tx.RowsAffected == 0 && !tx.DryRun && !selectedUpdate { - result := reflect.New(tx.Statement.Schema.ModelType).Interface() - if result := tx.Session(&Session{}).Limit(1).Find(result); result.RowsAffected == 0 { - return tx.Create(value) - } + if updateTx.Error == nil && updateTx.RowsAffected == 0 && !updateTx.DryRun && !selectedUpdate { + return tx.Session(&Session{SkipHooks: true}).Clauses(clause.OnConflict{UpdateAll: true}).Create(value) } + + return updateTx } return } -// First find first record that match given conditions, order by primary key +// First finds the first record ordered by primary key, matching given conditions conds func (db *DB) First(dest interface{}, conds ...interface{}) (tx *DB) { tx = db.Limit(1).Order(clause.OrderByColumn{ Column: clause.Column{Table: clause.CurrentTable, Name: clause.PrimaryKey}, @@ -129,7 +129,7 @@ func (db *DB) First(dest interface{}, conds ...interface{}) (tx *DB) { return tx.callbacks.Query().Execute(tx) } -// Take return a record that match given conditions, the order will depend on the database implementation +// Take finds the first record returned by the database in no specified order, matching given conditions conds func (db *DB) Take(dest interface{}, conds ...interface{}) (tx *DB) { tx = db.Limit(1) if len(conds) > 0 { @@ -142,7 +142,7 @@ func (db *DB) Take(dest interface{}, conds ...interface{}) (tx *DB) { return tx.callbacks.Query().Execute(tx) } -// Last find last record that match given conditions, order by primary key +// Last finds the last record ordered by primary key, matching given conditions conds func (db *DB) Last(dest interface{}, conds ...interface{}) (tx *DB) { tx = db.Limit(1).Order(clause.OrderByColumn{ Column: clause.Column{Table: clause.CurrentTable, Name: clause.PrimaryKey}, @@ -158,7 +158,7 @@ func (db *DB) Last(dest interface{}, conds ...interface{}) (tx *DB) { return tx.callbacks.Query().Execute(tx) } -// Find find records that match given conditions +// Find finds all records matching given conditions conds func (db *DB) Find(dest interface{}, conds ...interface{}) (tx *DB) { tx = db.getInstance() if len(conds) > 0 { @@ -170,7 +170,7 @@ func (db *DB) Find(dest interface{}, conds ...interface{}) (tx *DB) { return tx.callbacks.Query().Execute(tx) } -// FindInBatches find records in batches +// FindInBatches finds all records in batches of batchSize func (db *DB) FindInBatches(dest interface{}, batchSize int, fc func(tx *DB, batch int) error) *DB { var ( tx = db.Order(clause.OrderByColumn{ @@ -185,7 +185,9 @@ func (db *DB) FindInBatches(dest interface{}, batchSize int, fc func(tx *DB, bat var totalSize int if c, ok := tx.Statement.Clauses["LIMIT"]; ok { if limit, ok := c.Expression.(clause.Limit); ok { - totalSize = limit.Limit + if limit.Limit != nil { + totalSize = *limit.Limit + } if totalSize > 0 && batchSize > totalSize { batchSize = totalSize @@ -202,7 +204,9 @@ func (db *DB) FindInBatches(dest interface{}, batchSize int, fc func(tx *DB, bat batch++ if result.Error == nil && result.RowsAffected != 0 { - tx.AddError(fc(result, batch)) + fcTx := result.Session(&Session{NewDB: true}) + fcTx.RowsAffected = result.RowsAffected + tx.AddError(fc(fcTx, batch)) } else if result.Error != nil { tx.AddError(result.Error) } @@ -227,7 +231,11 @@ func (db *DB) FindInBatches(dest interface{}, batchSize int, fc func(tx *DB, bat break } - primaryValue, _ := result.Statement.Schema.PrioritizedPrimaryField.ValueOf(tx.Statement.Context, resultsValue.Index(resultsValue.Len()-1)) + primaryValue, zero := result.Statement.Schema.PrioritizedPrimaryField.ValueOf(tx.Statement.Context, resultsValue.Index(resultsValue.Len()-1)) + if zero { + tx.AddError(ErrPrimaryKeyRequired) + break + } queryDB = tx.Clauses(clause.Gt{Column: clause.Column{Table: clause.CurrentTable, Name: clause.PrimaryKey}, Value: primaryValue}) } @@ -284,7 +292,18 @@ func (db *DB) assignInterfacesToValue(values ...interface{}) { } } -// FirstOrInit gets the first matched record or initialize a new instance with given conditions (only works with struct or map conditions) +// FirstOrInit finds the first matching record, otherwise if not found initializes a new instance with given conds. +// Each conds must be a struct or map. +// +// FirstOrInit never modifies the database. It is often used with Assign and Attrs. +// +// // assign an email if the record is not found +// db.Where(User{Name: "non_existing"}).Attrs(User{Email: "fake@fake.org"}).FirstOrInit(&user) +// // user -> User{Name: "non_existing", Email: "fake@fake.org"} +// +// // assign email regardless of if record is found +// db.Where(User{Name: "jinzhu"}).Assign(User{Email: "fake@fake.org"}).FirstOrInit(&user) +// // user -> User{Name: "jinzhu", Age: 20, Email: "fake@fake.org"} func (db *DB) FirstOrInit(dest interface{}, conds ...interface{}) (tx *DB) { queryTx := db.Limit(1).Order(clause.OrderByColumn{ Column: clause.Column{Table: clause.CurrentTable, Name: clause.PrimaryKey}, @@ -310,62 +329,82 @@ func (db *DB) FirstOrInit(dest interface{}, conds ...interface{}) (tx *DB) { return } -// FirstOrCreate gets the first matched record or create a new one with given conditions (only works with struct, map conditions) +// FirstOrCreate finds the first matching record, otherwise if not found creates a new instance with given conds. +// Each conds must be a struct or map. +// +// Using FirstOrCreate in conjunction with Assign will result in an update to the database even if the record exists. +// +// // assign an email if the record is not found +// result := db.Where(User{Name: "non_existing"}).Attrs(User{Email: "fake@fake.org"}).FirstOrCreate(&user) +// // user -> User{Name: "non_existing", Email: "fake@fake.org"} +// // result.RowsAffected -> 1 +// +// // assign email regardless of if record is found +// result := db.Where(User{Name: "jinzhu"}).Assign(User{Email: "fake@fake.org"}).FirstOrCreate(&user) +// // user -> User{Name: "jinzhu", Age: 20, Email: "fake@fake.org"} +// // result.RowsAffected -> 1 func (db *DB) FirstOrCreate(dest interface{}, conds ...interface{}) (tx *DB) { tx = db.getInstance() queryTx := db.Session(&Session{}).Limit(1).Order(clause.OrderByColumn{ Column: clause.Column{Table: clause.CurrentTable, Name: clause.PrimaryKey}, }) - if result := queryTx.Find(dest, conds...); result.Error == nil { - if result.RowsAffected == 0 { - if c, ok := result.Statement.Clauses["WHERE"]; ok { - if where, ok := c.Expression.(clause.Where); ok { - result.assignInterfacesToValue(where.Exprs) - } - } - // initialize with attrs, conds - if len(db.Statement.attrs) > 0 { - result.assignInterfacesToValue(db.Statement.attrs...) - } + result := queryTx.Find(dest, conds...) + if result.Error != nil { + tx.Error = result.Error + return tx + } - // initialize with attrs, conds - if len(db.Statement.assigns) > 0 { - result.assignInterfacesToValue(db.Statement.assigns...) + if result.RowsAffected == 0 { + if c, ok := result.Statement.Clauses["WHERE"]; ok { + if where, ok := c.Expression.(clause.Where); ok { + result.assignInterfacesToValue(where.Exprs) } + } - return tx.Create(dest) - } else if len(db.Statement.assigns) > 0 { - exprs := tx.Statement.BuildCondition(db.Statement.assigns[0], db.Statement.assigns[1:]...) - assigns := map[string]interface{}{} - for _, expr := range exprs { - if eq, ok := expr.(clause.Eq); ok { - switch column := eq.Column.(type) { - case string: - assigns[column] = eq.Value - case clause.Column: - assigns[column.Name] = eq.Value - default: - } + // initialize with attrs, conds + if len(db.Statement.attrs) > 0 { + result.assignInterfacesToValue(db.Statement.attrs...) + } + + // initialize with attrs, conds + if len(db.Statement.assigns) > 0 { + result.assignInterfacesToValue(db.Statement.assigns...) + } + + return tx.Create(dest) + } else if len(db.Statement.assigns) > 0 { + exprs := tx.Statement.BuildCondition(db.Statement.assigns[0], db.Statement.assigns[1:]...) + assigns := map[string]interface{}{} + for i := 0; i < len(exprs); i++ { + expr := exprs[i] + + if eq, ok := expr.(clause.AndConditions); ok { + exprs = append(exprs, eq.Exprs...) + } else if eq, ok := expr.(clause.Eq); ok { + switch column := eq.Column.(type) { + case string: + assigns[column] = eq.Value + case clause.Column: + assigns[column.Name] = eq.Value } } - - return tx.Model(dest).Updates(assigns) } - } else { - tx.Error = result.Error + + return tx.Model(dest).Updates(assigns) } + return tx } -// Update update attributes with callbacks, refer: https://gorm.io/docs/update.html#Update-Changed-Fields +// Update updates column with value using callbacks. Reference: https://gorm.io/docs/update.html#Update-Changed-Fields func (db *DB) Update(column string, value interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.Dest = map[string]interface{}{column: value} return tx.callbacks.Update().Execute(tx) } -// Updates update attributes with callbacks, refer: https://gorm.io/docs/update.html#Update-Changed-Fields +// Updates updates attributes using callbacks. values must be a struct or map. Reference: https://gorm.io/docs/update.html#Update-Changed-Fields func (db *DB) Updates(values interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.Dest = values @@ -386,7 +425,9 @@ func (db *DB) UpdateColumns(values interface{}) (tx *DB) { return tx.callbacks.Update().Execute(tx) } -// Delete delete value match given conditions, if the value has primary key, then will including the primary key as condition +// Delete deletes value matching given conditions. If value contains primary key it is included in the conditions. If +// value includes a deleted_at field, then Delete performs a soft delete instead by setting deleted_at with the current +// time if null. func (db *DB) Delete(value interface{}, conds ...interface{}) (tx *DB) { tx = db.getInstance() if len(conds) > 0 { @@ -480,7 +521,7 @@ func (db *DB) Rows() (*sql.Rows, error) { return rows, tx.Error } -// Scan scan value to a struct +// Scan scans selected value to the struct dest func (db *DB) Scan(dest interface{}) (tx *DB) { config := *db.Config currentLogger, newLogger := config.Logger, logger.Recorder.New() @@ -494,6 +535,7 @@ func (db *DB) Scan(dest interface{}) (tx *DB) { tx.ScanRows(rows, dest) } else { tx.RowsAffected = 0 + tx.AddError(rows.Err()) } tx.AddError(rows.Close()) } @@ -505,9 +547,10 @@ func (db *DB) Scan(dest interface{}) (tx *DB) { return } -// Pluck used to query single column from a model as a map -// var ages []int64 -// db.Model(&users).Pluck("age", &ages) +// Pluck queries a single column from a model, returning in the slice dest. E.g.: +// +// var ages []int64 +// db.Model(&users).Pluck("age", &ages) func (db *DB) Pluck(column string, dest interface{}) (tx *DB) { tx = db.getInstance() if tx.Statement.Model != nil { @@ -548,7 +591,8 @@ func (db *DB) ScanRows(rows *sql.Rows, dest interface{}) error { return tx.Error } -// Connection use a db conn to execute Multiple commands,this conn will put conn pool after it is executed. +// Connection uses a db connection to execute an arbitrary number of commands in fc. When finished, the connection is +// returned to the connection pool. func (db *DB) Connection(fc func(tx *DB) error) (err error) { if db.Error != nil { return db.Error @@ -570,7 +614,9 @@ func (db *DB) Connection(fc func(tx *DB) error) (err error) { return fc(tx) } -// Transaction start a transaction as a block, return error will rollback, otherwise to commit. +// Transaction start a transaction as a block, return error will rollback, otherwise to commit. Transaction executes an +// arbitrary number of commands in fc within a transaction. On success the changes are committed; if an error occurs +// they are rolled back. func (db *DB) Transaction(fc func(tx *DB) error, opts ...*sql.TxOptions) (err error) { panicked := true @@ -581,7 +627,6 @@ func (db *DB) Transaction(fc func(tx *DB) error, opts ...*sql.TxOptions) (err er if err != nil { return } - defer func() { // Make sure to rollback when panic, Block error or Commit error if panicked || err != nil { @@ -613,7 +658,7 @@ func (db *DB) Transaction(fc func(tx *DB) error, opts ...*sql.TxOptions) (err er return } -// Begin begins a transaction +// Begin begins a transaction with any transaction options opts func (db *DB) Begin(opts ...*sql.TxOptions) *DB { var ( // clone statement @@ -642,7 +687,7 @@ func (db *DB) Begin(opts ...*sql.TxOptions) *DB { return tx } -// Commit commit a transaction +// Commit commits the changes in a transaction func (db *DB) Commit() *DB { if committer, ok := db.Statement.ConnPool.(TxCommitter); ok && committer != nil && !reflect.ValueOf(committer).IsNil() { db.AddError(committer.Commit()) @@ -652,7 +697,7 @@ func (db *DB) Commit() *DB { return db } -// Rollback rollback a transaction +// Rollback rollbacks the changes in a transaction func (db *DB) Rollback() *DB { if committer, ok := db.Statement.ConnPool.(TxCommitter); ok && committer != nil { if !reflect.ValueOf(committer).IsNil() { @@ -666,7 +711,21 @@ func (db *DB) Rollback() *DB { func (db *DB) SavePoint(name string) *DB { if savePointer, ok := db.Dialector.(SavePointerDialectorInterface); ok { + // close prepared statement, because SavePoint not support prepared statement. + // e.g. mysql8.0 doc: https://dev.mysql.com/doc/refman/8.0/en/sql-prepared-statements.html + var ( + preparedStmtTx *PreparedStmtTX + isPreparedStmtTx bool + ) + // close prepared statement, because SavePoint not support prepared statement. + if preparedStmtTx, isPreparedStmtTx = db.Statement.ConnPool.(*PreparedStmtTX); isPreparedStmtTx { + db.Statement.ConnPool = preparedStmtTx.Tx + } db.AddError(savePointer.SavePoint(db, name)) + // restore prepared statement + if isPreparedStmtTx { + db.Statement.ConnPool = preparedStmtTx + } } else { db.AddError(ErrUnsupportedDriver) } @@ -675,14 +734,28 @@ func (db *DB) SavePoint(name string) *DB { func (db *DB) RollbackTo(name string) *DB { if savePointer, ok := db.Dialector.(SavePointerDialectorInterface); ok { + // close prepared statement, because RollbackTo not support prepared statement. + // e.g. mysql8.0 doc: https://dev.mysql.com/doc/refman/8.0/en/sql-prepared-statements.html + var ( + preparedStmtTx *PreparedStmtTX + isPreparedStmtTx bool + ) + // close prepared statement, because SavePoint not support prepared statement. + if preparedStmtTx, isPreparedStmtTx = db.Statement.ConnPool.(*PreparedStmtTX); isPreparedStmtTx { + db.Statement.ConnPool = preparedStmtTx.Tx + } db.AddError(savePointer.RollbackTo(db, name)) + // restore prepared statement + if isPreparedStmtTx { + db.Statement.ConnPool = preparedStmtTx + } } else { db.AddError(ErrUnsupportedDriver) } return db } -// Exec execute raw sql +// Exec executes raw sql func (db *DB) Exec(sql string, values ...interface{}) (tx *DB) { tx = db.getInstance() tx.Statement.SQL = strings.Builder{} diff --git a/vendor/gorm.io/gorm/gorm.go b/vendor/gorm.io/gorm/gorm.go index 6a6bb032..117d2fd0 100644 --- a/vendor/gorm.io/gorm/gorm.go +++ b/vendor/gorm.io/gorm/gorm.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "fmt" + "reflect" "sort" "sync" "time" @@ -37,6 +38,8 @@ type Config struct { DisableAutomaticPing bool // DisableForeignKeyConstraintWhenMigrating DisableForeignKeyConstraintWhenMigrating bool + // IgnoreRelationshipsWhenMigrating + IgnoreRelationshipsWhenMigrating bool // DisableNestedTransaction disable nested transaction DisableNestedTransaction bool // AllowGlobalUpdate allow global update @@ -45,6 +48,10 @@ type Config struct { QueryFields bool // CreateBatchSize default create batch size CreateBatchSize int + // TranslateError enabling error translation + TranslateError bool + // PropagateUnscoped propagate Unscoped to every other nested statement + PropagateUnscoped bool // ClauseBuilders clause builder ClauseBuilders map[string]clause.ClauseBuilder @@ -105,6 +112,7 @@ type Session struct { DisableNestedTransaction bool AllowGlobalUpdate bool FullSaveAssociations bool + PropagateUnscoped bool QueryFields bool Context context.Context Logger logger.Interface @@ -142,7 +150,7 @@ func Open(dialector Dialector, opts ...Option) (db *DB, err error) { } if config.NamingStrategy == nil { - config.NamingStrategy = schema.NamingStrategy{} + config.NamingStrategy = schema.NamingStrategy{IdentifierMaxLength: 64} // Default Identifier length is 64 } if config.Logger == nil { @@ -175,17 +183,17 @@ func Open(dialector Dialector, opts ...Option) (db *DB, err error) { if config.Dialector != nil { err = config.Dialector.Initialize(db) - } - preparedStmt := &PreparedStmtDB{ - ConnPool: db.ConnPool, - Stmts: map[string]Stmt{}, - Mux: &sync.RWMutex{}, - PreparedSQL: make([]string, 0, 100), + if err != nil { + if db, _ := db.DB(); db != nil { + _ = db.Close() + } + } } - db.cacheStore.Store(preparedStmtDBKey, preparedStmt) if config.PrepareStmt { + preparedStmt := NewPreparedStmtDB(db.ConnPool) + db.cacheStore.Store(preparedStmtDBKey, preparedStmt) db.ConnPool = preparedStmt } @@ -236,6 +244,10 @@ func (db *DB) Session(config *Session) *DB { txConfig.FullSaveAssociations = true } + if config.PropagateUnscoped { + txConfig.PropagateUnscoped = true + } + if config.Context != nil || config.PrepareStmt || config.SkipHooks { tx.Statement = tx.Statement.clone() tx.Statement.DB = tx @@ -246,16 +258,30 @@ func (db *DB) Session(config *Session) *DB { } if config.PrepareStmt { + var preparedStmt *PreparedStmtDB + if v, ok := db.cacheStore.Load(preparedStmtDBKey); ok { - preparedStmt := v.(*PreparedStmtDB) + preparedStmt = v.(*PreparedStmtDB) + } else { + preparedStmt = NewPreparedStmtDB(db.ConnPool) + db.cacheStore.Store(preparedStmtDBKey, preparedStmt) + } + + switch t := tx.Statement.ConnPool.(type) { + case Tx: + tx.Statement.ConnPool = &PreparedStmtTX{ + Tx: t, + PreparedStmtDB: preparedStmt, + } + default: tx.Statement.ConnPool = &PreparedStmtDB{ ConnPool: db.Config.ConnPool, Mux: preparedStmt.Mux, Stmts: preparedStmt.Stmts, } - txConfig.ConnPool = tx.Statement.ConnPool - txConfig.PrepareStmt = true } + txConfig.ConnPool = tx.Statement.ConnPool + txConfig.PrepareStmt = true } if config.SkipHooks { @@ -300,7 +326,8 @@ func (db *DB) WithContext(ctx context.Context) *DB { // Debug start debug mode func (db *DB) Debug() (tx *DB) { - return db.Session(&Session{ + tx = db.getInstance() + return tx.Session(&Session{ Logger: db.Logger.LogMode(logger.Info), }) } @@ -336,10 +363,18 @@ func (db *DB) Callback() *callbacks { // AddError add error to db func (db *DB) AddError(err error) error { - if db.Error == nil { - db.Error = err - } else if err != nil { - db.Error = fmt.Errorf("%v; %w", db.Error, err) + if err != nil { + if db.Config.TranslateError { + if errTranslator, ok := db.Dialector.(ErrorTranslator); ok { + err = errTranslator.Translate(err) + } + } + + if db.Error == nil { + db.Error = err + } else { + db.Error = fmt.Errorf("%v; %w", db.Error, err) + } } return db.Error } @@ -347,12 +382,20 @@ func (db *DB) AddError(err error) error { // DB returns `*sql.DB` func (db *DB) DB() (*sql.DB, error) { connPool := db.ConnPool + if db.Statement != nil && db.Statement.ConnPool != nil { + connPool = db.Statement.ConnPool + } + if tx, ok := connPool.(*sql.Tx); ok && tx != nil { + return (*sql.DB)(reflect.ValueOf(tx).Elem().FieldByName("db").UnsafePointer()), nil + } if dbConnector, ok := connPool.(GetDBConnector); ok && dbConnector != nil { - return dbConnector.GetDBConn() + if sqldb, err := dbConnector.GetDBConn(); sqldb != nil || err != nil { + return sqldb, err + } } - if sqldb, ok := connPool.(*sql.DB); ok { + if sqldb, ok := connPool.(*sql.DB); ok && sqldb != nil { return sqldb, nil } @@ -366,11 +409,15 @@ func (db *DB) getInstance() *DB { if db.clone == 1 { // clone with new statement tx.Statement = &Statement{ - DB: tx, - ConnPool: db.Statement.ConnPool, - Context: db.Statement.Context, - Clauses: map[string]clause.Clause{}, - Vars: make([]interface{}, 0, 8), + DB: tx, + ConnPool: db.Statement.ConnPool, + Context: db.Statement.Context, + Clauses: map[string]clause.Clause{}, + Vars: make([]interface{}, 0, 8), + SkipHooks: db.Statement.SkipHooks, + } + if db.Config.PropagateUnscoped { + tx.Statement.Unscoped = db.Statement.Unscoped } } else { // with clone statement @@ -412,7 +459,7 @@ func (db *DB) SetupJoinTable(model interface{}, field string, joinTable interfac relation, ok := modelSchema.Relationships.Relations[field] isRelation := ok && relation.JoinTable != nil if !isRelation { - return fmt.Errorf("failed to found relation: %s", field) + return fmt.Errorf("failed to find relation: %s", field) } for _, ref := range relation.References { @@ -455,12 +502,12 @@ func (db *DB) Use(plugin Plugin) error { // ToSQL for generate SQL string. // -// db.ToSQL(func(tx *gorm.DB) *gorm.DB { -// return tx.Model(&User{}).Where(&User{Name: "foo", Age: 20}) -// .Limit(10).Offset(5) -// .Order("name ASC") -// .First(&User{}) -// }) +// db.ToSQL(func(tx *gorm.DB) *gorm.DB { +// return tx.Model(&User{}).Where(&User{Name: "foo", Age: 20}) +// .Limit(10).Offset(5) +// .Order("name ASC") +// .First(&User{}) +// }) func (db *DB) ToSQL(queryFn func(tx *DB) *DB) string { tx := queryFn(db.Session(&Session{DryRun: true, SkipDefaultTransaction: true})) stmt := tx.Statement diff --git a/vendor/gorm.io/gorm/interfaces.go b/vendor/gorm.io/gorm/interfaces.go index 32d49605..3bcc3d57 100644 --- a/vendor/gorm.io/gorm/interfaces.go +++ b/vendor/gorm.io/gorm/interfaces.go @@ -26,6 +26,10 @@ type Plugin interface { Initialize(*DB) error } +type ParamsFilter interface { + ParamsFilter(ctx context.Context, sql string, params ...interface{}) (string, []interface{}) +} + // ConnPool db conns pool interface type ConnPool interface { PrepareContext(ctx context.Context, query string) (*sql.Stmt, error) @@ -82,3 +86,7 @@ type Rows interface { Err() error Close() error } + +type ErrorTranslator interface { + Translate(err error) error +} diff --git a/vendor/gorm.io/gorm/logger/logger.go b/vendor/gorm.io/gorm/logger/logger.go index 2ffd28d5..253f0325 100644 --- a/vendor/gorm.io/gorm/logger/logger.go +++ b/vendor/gorm.io/gorm/logger/logger.go @@ -4,7 +4,7 @@ import ( "context" "errors" "fmt" - "io/ioutil" + "io" "log" "os" "time" @@ -55,6 +55,7 @@ type Config struct { SlowThreshold time.Duration Colorful bool IgnoreRecordNotFoundError bool + ParameterizedQueries bool LogLevel LogLevel } @@ -68,8 +69,8 @@ type Interface interface { } var ( - // Discard Discard logger will print any log to ioutil.Discard - Discard = New(log.New(ioutil.Discard, "", log.LstdFlags), Config{}) + // Discard logger will print any log to io.Discard + Discard = New(log.New(io.Discard, "", log.LstdFlags), Config{}) // Default Default logger Default = New(log.New(os.Stdout, "\r\n", log.LstdFlags), Config{ SlowThreshold: 200 * time.Millisecond, @@ -77,7 +78,7 @@ var ( IgnoreRecordNotFoundError: false, Colorful: true, }) - // Recorder Recorder logger records running SQL into a recorder instance + // Recorder logger records running SQL into a recorder instance Recorder = traceRecorder{Interface: Default, BeginAt: time.Now()} ) @@ -128,28 +129,30 @@ func (l *logger) LogMode(level LogLevel) Interface { } // Info print info -func (l logger) Info(ctx context.Context, msg string, data ...interface{}) { +func (l *logger) Info(ctx context.Context, msg string, data ...interface{}) { if l.LogLevel >= Info { l.Printf(l.infoStr+msg, append([]interface{}{utils.FileWithLineNum()}, data...)...) } } // Warn print warn messages -func (l logger) Warn(ctx context.Context, msg string, data ...interface{}) { +func (l *logger) Warn(ctx context.Context, msg string, data ...interface{}) { if l.LogLevel >= Warn { l.Printf(l.warnStr+msg, append([]interface{}{utils.FileWithLineNum()}, data...)...) } } // Error print error messages -func (l logger) Error(ctx context.Context, msg string, data ...interface{}) { +func (l *logger) Error(ctx context.Context, msg string, data ...interface{}) { if l.LogLevel >= Error { l.Printf(l.errStr+msg, append([]interface{}{utils.FileWithLineNum()}, data...)...) } } // Trace print sql message -func (l logger) Trace(ctx context.Context, begin time.Time, fc func() (string, int64), err error) { +// +//nolint:cyclop +func (l *logger) Trace(ctx context.Context, begin time.Time, fc func() (string, int64), err error) { if l.LogLevel <= Silent { return } @@ -181,6 +184,14 @@ func (l logger) Trace(ctx context.Context, begin time.Time, fc func() (string, i } } +// ParamsFilter filter params +func (l *logger) ParamsFilter(ctx context.Context, sql string, params ...interface{}) (string, []interface{}) { + if l.Config.ParameterizedQueries { + return sql, nil + } + return sql, params +} + type traceRecorder struct { Interface BeginAt time.Time @@ -189,8 +200,8 @@ type traceRecorder struct { Err error } -// New new trace recorder -func (l traceRecorder) New() *traceRecorder { +// New trace recorder +func (l *traceRecorder) New() *traceRecorder { return &traceRecorder{Interface: l.Interface, BeginAt: time.Now()} } diff --git a/vendor/gorm.io/gorm/logger/sql.go b/vendor/gorm.io/gorm/logger/sql.go index c8b194c3..ad478795 100644 --- a/vendor/gorm.io/gorm/logger/sql.go +++ b/vendor/gorm.io/gorm/logger/sql.go @@ -28,8 +28,25 @@ func isPrintable(s string) bool { return true } +// A list of Go types that should be converted to SQL primitives var convertibleTypes = []reflect.Type{reflect.TypeOf(time.Time{}), reflect.TypeOf(false), reflect.TypeOf([]byte{})} +// RegEx matches only numeric values +var numericPlaceholderRe = regexp.MustCompile(`\$\d+\$`) + +func isNumeric(k reflect.Kind) bool { + switch k { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return true + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + return true + case reflect.Float32, reflect.Float64: + return true + default: + return false + } +} + // ExplainSQL generate SQL string with given parameters, the generated SQL is expected to be used in logger, execute it might introduce a SQL injection vulnerability func ExplainSQL(sql string, numericPlaceholder *regexp.Regexp, escaper string, avars ...interface{}) string { var ( @@ -75,26 +92,28 @@ func ExplainSQL(sql string, numericPlaceholder *regexp.Regexp, escaper string, a case reflect.Bool: vars[idx] = fmt.Sprintf("%t", reflectValue.Interface()) case reflect.String: - vars[idx] = escaper + strings.ReplaceAll(fmt.Sprintf("%v", v), escaper, "\\"+escaper) + escaper + vars[idx] = escaper + strings.ReplaceAll(fmt.Sprintf("%v", v), escaper, escaper+escaper) + escaper default: if v != nil && reflectValue.IsValid() && ((reflectValue.Kind() == reflect.Ptr && !reflectValue.IsNil()) || reflectValue.Kind() != reflect.Ptr) { - vars[idx] = escaper + strings.ReplaceAll(fmt.Sprintf("%v", v), escaper, "\\"+escaper) + escaper + vars[idx] = escaper + strings.ReplaceAll(fmt.Sprintf("%v", v), escaper, escaper+escaper) + escaper } else { vars[idx] = nullStr } } case []byte: if s := string(v); isPrintable(s) { - vars[idx] = escaper + strings.ReplaceAll(s, escaper, "\\"+escaper) + escaper + vars[idx] = escaper + strings.ReplaceAll(s, escaper, escaper+escaper) + escaper } else { vars[idx] = escaper + "" + escaper } case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: vars[idx] = utils.ToString(v) - case float64, float32: - vars[idx] = fmt.Sprintf("%.6f", v) + case float32: + vars[idx] = strconv.FormatFloat(float64(v), 'f', -1, 32) + case float64: + vars[idx] = strconv.FormatFloat(v, 'f', -1, 64) case string: - vars[idx] = escaper + strings.ReplaceAll(v, escaper, "\\"+escaper) + escaper + vars[idx] = escaper + strings.ReplaceAll(v, escaper, escaper+escaper) + escaper default: rv := reflect.ValueOf(v) if v == nil || !rv.IsValid() || rv.Kind() == reflect.Ptr && rv.IsNil() { @@ -104,6 +123,12 @@ func ExplainSQL(sql string, numericPlaceholder *regexp.Regexp, escaper string, a convertParams(v, idx) } else if rv.Kind() == reflect.Ptr && !rv.IsZero() { convertParams(reflect.Indirect(rv).Interface(), idx) + } else if isNumeric(rv.Kind()) { + if rv.CanInt() || rv.CanUint() { + vars[idx] = fmt.Sprintf("%d", rv.Interface()) + } else { + vars[idx] = fmt.Sprintf("%.6f", rv.Interface()) + } } else { for _, t := range convertibleTypes { if rv.Type().ConvertibleTo(t) { @@ -111,7 +136,7 @@ func ExplainSQL(sql string, numericPlaceholder *regexp.Regexp, escaper string, a return } } - vars[idx] = escaper + strings.ReplaceAll(fmt.Sprint(v), escaper, "\\"+escaper) + escaper + vars[idx] = escaper + strings.ReplaceAll(fmt.Sprint(v), escaper, escaper+escaper) + escaper } } } @@ -138,9 +163,18 @@ func ExplainSQL(sql string, numericPlaceholder *regexp.Regexp, escaper string, a sql = newSQL.String() } else { sql = numericPlaceholder.ReplaceAllString(sql, "$$$1$$") - for idx, v := range vars { - sql = strings.Replace(sql, "$"+strconv.Itoa(idx+1)+"$", v, 1) - } + + sql = numericPlaceholderRe.ReplaceAllStringFunc(sql, func(v string) string { + num := v[1 : len(v)-1] + n, _ := strconv.Atoi(num) + + // position var start from 1 ($1, $2) + n -= 1 + if n >= 0 && n <= len(vars)-1 { + return vars[n] + } + return v + }) } return sql diff --git a/vendor/gorm.io/gorm/migrator.go b/vendor/gorm.io/gorm/migrator.go index 34e888f2..3d2b032b 100644 --- a/vendor/gorm.io/gorm/migrator.go +++ b/vendor/gorm.io/gorm/migrator.go @@ -13,11 +13,7 @@ func (db *DB) Migrator() Migrator { // apply scopes to migrator for len(tx.Statement.scopes) > 0 { - scopes := tx.Statement.scopes - tx.Statement.scopes = nil - for _, scope := range scopes { - tx = scope(tx) - } + tx = tx.executeScopes() } return tx.Dialector.Migrator(tx.Session(&Session{})) @@ -30,9 +26,9 @@ func (db *DB) AutoMigrate(dst ...interface{}) error { // ViewOption view option type ViewOption struct { - Replace bool - CheckOption string - Query *DB + Replace bool // If true, exec `CREATE`. If false, exec `CREATE OR REPLACE` + CheckOption string // optional. e.g. `WITH [ CASCADED | LOCAL ] CHECK OPTION` + Query *DB // required subquery. } // ColumnType column type interface @@ -60,6 +56,14 @@ type Index interface { Option() string } +// TableType table type interface +type TableType interface { + Schema() string + Name() string + Type() string + Comment() (comment string, ok bool) +} + // Migrator migrator interface type Migrator interface { // AutoMigrate @@ -68,6 +72,7 @@ type Migrator interface { // Database CurrentDatabase() string FullDataTypeOf(*schema.Field) clause.Expr + GetTypeAliases(databaseTypeName string) []string // Tables CreateTable(dst ...interface{}) error @@ -75,12 +80,15 @@ type Migrator interface { HasTable(dst interface{}) bool RenameTable(oldName, newName interface{}) error GetTables() (tableList []string, err error) + TableType(dst interface{}) (TableType, error) // Columns AddColumn(dst interface{}, field string) error DropColumn(dst interface{}, field string) error AlterColumn(dst interface{}, field string) error MigrateColumn(dst interface{}, field *schema.Field, columnType ColumnType) error + // MigrateColumnUnique migrate column's UNIQUE constraint, it's part of MigrateColumn. + MigrateColumnUnique(dst interface{}, field *schema.Field, columnType ColumnType) error HasColumn(dst interface{}, field string) bool RenameColumn(dst interface{}, oldName, field string) error ColumnTypes(dst interface{}) ([]ColumnType, error) diff --git a/vendor/gorm.io/gorm/migrator/index.go b/vendor/gorm.io/gorm/migrator/index.go index fe686e5a..8845da95 100644 --- a/vendor/gorm.io/gorm/migrator/index.go +++ b/vendor/gorm.io/gorm/migrator/index.go @@ -17,12 +17,12 @@ func (idx Index) Table() string { return idx.TableName } -// Name return the name of the index. +// Name return the name of the index. func (idx Index) Name() string { return idx.NameValue } -// Columns return the columns fo the index +// Columns return the columns of the index func (idx Index) Columns() []string { return idx.ColumnList } @@ -37,7 +37,7 @@ func (idx Index) Unique() (unique bool, ok bool) { return idx.UniqueValue.Bool, idx.UniqueValue.Valid } -// Option return the optional attribute fo the index +// Option return the optional attribute of the index func (idx Index) Option() string { return idx.OptionValue } diff --git a/vendor/gorm.io/gorm/migrator/migrator.go b/vendor/gorm.io/gorm/migrator/migrator.go index 87ac7745..189a141f 100644 --- a/vendor/gorm.io/gorm/migrator/migrator.go +++ b/vendor/gorm.io/gorm/migrator/migrator.go @@ -7,16 +7,28 @@ import ( "fmt" "reflect" "regexp" + "strconv" "strings" + "time" "gorm.io/gorm" "gorm.io/gorm/clause" + "gorm.io/gorm/logger" "gorm.io/gorm/schema" ) -var ( - regFullDataType = regexp.MustCompile(`[^\d]*(\d+)[^\d]?`) -) +// This regular expression seeks to find a sequence of digits (\d+) among zero or more non-digit characters (\D*), +// with a possible trailing non-digit character (\D?). + +// For example, values that can pass this regular expression are: +// - "123" +// - "abc456" +// -"%$#@789" +var regFullDataType = regexp.MustCompile(`\D*(\d+)\D?`) + +// TODO:? Create const vars for raw sql queries ? + +var _ gorm.Migrator = (*Migrator)(nil) // Migrator m struct type Migrator struct { @@ -30,6 +42,16 @@ type Config struct { gorm.Dialector } +type printSQLLogger struct { + logger.Interface +} + +func (l *printSQLLogger) Trace(ctx context.Context, begin time.Time, fc func() (sql string, rowsAffected int64), err error) { + sql, _ := fc() + fmt.Println(sql + ";") + l.Interface.Trace(ctx, begin, fc, err) +} + // GormDataTypeInterface gorm data type interface type GormDataTypeInterface interface { GormDBDataType(*gorm.DB, *schema.Field) string @@ -72,10 +94,6 @@ func (m Migrator) FullDataTypeOf(field *schema.Field) (expr clause.Expr) { expr.SQL += " NOT NULL" } - if field.Unique { - expr.SQL += " UNIQUE" - } - if field.HasDefaultValue && (field.DefaultValueInterface != nil || field.DefaultValue != "") { if field.DefaultValueInterface != nil { defaultStmt := &gorm.Statement{Vars: []interface{}{field.DefaultValueInterface}} @@ -89,23 +107,40 @@ func (m Migrator) FullDataTypeOf(field *schema.Field) (expr clause.Expr) { return } +func (m Migrator) GetQueryAndExecTx() (queryTx, execTx *gorm.DB) { + queryTx = m.DB.Session(&gorm.Session{}) + execTx = queryTx + if m.DB.DryRun { + queryTx.DryRun = false + execTx = m.DB.Session(&gorm.Session{Logger: &printSQLLogger{Interface: m.DB.Logger}}) + } + return queryTx, execTx +} + // AutoMigrate auto migrate values func (m Migrator) AutoMigrate(values ...interface{}) error { for _, value := range m.ReorderModels(values, true) { - tx := m.DB.Session(&gorm.Session{}) - if !tx.Migrator().HasTable(value) { - if err := tx.Migrator().CreateTable(value); err != nil { + queryTx, execTx := m.GetQueryAndExecTx() + if !queryTx.Migrator().HasTable(value) { + if err := execTx.Migrator().CreateTable(value); err != nil { return err } } else { - if err := m.RunWithValue(value, func(stmt *gorm.Statement) (errr error) { - columnTypes, err := m.DB.Migrator().ColumnTypes(value) + if err := m.RunWithValue(value, func(stmt *gorm.Statement) error { + + if stmt.Schema == nil { + return errors.New("failed to get schema") + } + + columnTypes, err := queryTx.Migrator().ColumnTypes(value) if err != nil { return err } - + var ( + parseIndexes = stmt.Schema.ParseIndexes() + parseCheckConstraints = stmt.Schema.ParseCheckConstraints() + ) for _, dbName := range stmt.Schema.DBNames { - field := stmt.Schema.FieldsByDBName[dbName] var foundColumn gorm.ColumnType for _, columnType := range columnTypes { @@ -117,37 +152,43 @@ func (m Migrator) AutoMigrate(values ...interface{}) error { if foundColumn == nil { // not found, add column - if err := tx.Migrator().AddColumn(value, dbName); err != nil { + if err = execTx.Migrator().AddColumn(value, dbName); err != nil { + return err + } + } else { + // found, smartly migrate + field := stmt.Schema.FieldsByDBName[dbName] + if err = execTx.Migrator().MigrateColumn(value, field, foundColumn); err != nil { return err } - } else if err := m.DB.Migrator().MigrateColumn(value, field, foundColumn); err != nil { - // found, smart migrate - return err } } - for _, rel := range stmt.Schema.Relationships.Relations { - if !m.DB.Config.DisableForeignKeyConstraintWhenMigrating { + if !m.DB.DisableForeignKeyConstraintWhenMigrating && !m.DB.IgnoreRelationshipsWhenMigrating { + for _, rel := range stmt.Schema.Relationships.Relations { + if rel.Field.IgnoreMigration { + continue + } if constraint := rel.ParseConstraint(); constraint != nil && - constraint.Schema == stmt.Schema && !tx.Migrator().HasConstraint(value, constraint.Name) { - if err := tx.Migrator().CreateConstraint(value, constraint.Name); err != nil { + constraint.Schema == stmt.Schema && !queryTx.Migrator().HasConstraint(value, constraint.Name) { + if err := execTx.Migrator().CreateConstraint(value, constraint.Name); err != nil { return err } } } + } - for _, chk := range stmt.Schema.ParseCheckConstraints() { - if !tx.Migrator().HasConstraint(value, chk.Name) { - if err := tx.Migrator().CreateConstraint(value, chk.Name); err != nil { - return err - } + for _, chk := range parseCheckConstraints { + if !queryTx.Migrator().HasConstraint(value, chk.Name) { + if err := execTx.Migrator().CreateConstraint(value, chk.Name); err != nil { + return err } } } - for _, idx := range stmt.Schema.ParseIndexes() { - if !tx.Migrator().HasIndex(value, idx.Name) { - if err := tx.Migrator().CreateIndex(value, idx.Name); err != nil { + for _, idx := range parseIndexes { + if !queryTx.Migrator().HasIndex(value, idx.Name) { + if err := execTx.Migrator().CreateIndex(value, idx.Name); err != nil { return err } } @@ -174,7 +215,12 @@ func (m Migrator) GetTables() (tableList []string, err error) { func (m Migrator) CreateTable(values ...interface{}) error { for _, value := range m.ReorderModels(values, false) { tx := m.DB.Session(&gorm.Session{}) - if err := m.RunWithValue(value, func(stmt *gorm.Statement) (errr error) { + if err := m.RunWithValue(value, func(stmt *gorm.Statement) (err error) { + + if stmt.Schema == nil { + return errors.New("failed to get schema") + } + var ( createTableSQL = "CREATE TABLE ? (" values = []interface{}{m.CurrentTable(stmt)} @@ -185,7 +231,7 @@ func (m Migrator) CreateTable(values ...interface{}) error { field := stmt.Schema.FieldsByDBName[dbName] if !field.IgnoreMigration { createTableSQL += "? ?" - hasPrimaryKeyInDataType = hasPrimaryKeyInDataType || strings.Contains(strings.ToUpper(string(field.DataType)), "PRIMARY KEY") + hasPrimaryKeyInDataType = hasPrimaryKeyInDataType || strings.Contains(strings.ToUpper(m.DataTypeOf(field)), "PRIMARY KEY") values = append(values, clause.Column{Name: dbName}, m.DB.Migrator().FullDataTypeOf(field)) createTableSQL += "," } @@ -193,7 +239,7 @@ func (m Migrator) CreateTable(values ...interface{}) error { if !hasPrimaryKeyInDataType && len(stmt.Schema.PrimaryFields) > 0 { createTableSQL += "PRIMARY KEY ?," - primaryKeys := []interface{}{} + primaryKeys := make([]interface{}, 0, len(stmt.Schema.PrimaryFields)) for _, field := range stmt.Schema.PrimaryFields { primaryKeys = append(primaryKeys, clause.Column{Name: field.DBName}) } @@ -204,8 +250,8 @@ func (m Migrator) CreateTable(values ...interface{}) error { for _, idx := range stmt.Schema.ParseIndexes() { if m.CreateIndexAfterCreateTable { defer func(value interface{}, name string) { - if errr == nil { - errr = tx.Migrator().CreateIndex(value, name) + if err == nil { + err = tx.Migrator().CreateIndex(value, name) } }(value, idx.Name) } else { @@ -227,11 +273,14 @@ func (m Migrator) CreateTable(values ...interface{}) error { } } - for _, rel := range stmt.Schema.Relationships.Relations { - if !m.DB.DisableForeignKeyConstraintWhenMigrating { + if !m.DB.DisableForeignKeyConstraintWhenMigrating && !m.DB.IgnoreRelationshipsWhenMigrating { + for _, rel := range stmt.Schema.Relationships.Relations { + if rel.Field.IgnoreMigration { + continue + } if constraint := rel.ParseConstraint(); constraint != nil { if constraint.Schema == stmt.Schema { - sql, vars := buildConstraint(constraint) + sql, vars := constraint.Build() createTableSQL += sql + "," values = append(values, vars...) } @@ -239,6 +288,11 @@ func (m Migrator) CreateTable(values ...interface{}) error { } } + for _, uni := range stmt.Schema.ParseUniqueConstraints() { + createTableSQL += "CONSTRAINT ? UNIQUE (?)," + values = append(values, clause.Column{Name: uni.Name}, clause.Expr{SQL: stmt.Quote(uni.Field.DBName)}) + } + for _, chk := range stmt.Schema.ParseCheckConstraints() { createTableSQL += "CONSTRAINT ? CHECK (?)," values = append(values, clause.Column{Name: chk.Name}, clause.Expr{SQL: chk.Constraint}) @@ -252,8 +306,8 @@ func (m Migrator) CreateTable(values ...interface{}) error { createTableSQL += fmt.Sprint(tableOption) } - errr = tx.Exec(createTableSQL, values...).Error - return errr + err = tx.Exec(createTableSQL, values...).Error + return err }); err != nil { return err } @@ -319,6 +373,9 @@ func (m Migrator) RenameTable(oldName, newName interface{}) error { func (m Migrator) AddColumn(value interface{}, name string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { // avoid using the same name field + if stmt.Schema == nil { + return errors.New("failed to get schema") + } f := stmt.Schema.LookUpField(name) if f == nil { return fmt.Errorf("failed to look up field with name: %s", name) @@ -338,8 +395,10 @@ func (m Migrator) AddColumn(value interface{}, name string) error { // DropColumn drop value's `name` column func (m Migrator) DropColumn(value interface{}, name string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { - if field := stmt.Schema.LookUpField(name); field != nil { - name = field.DBName + if stmt.Schema != nil { + if field := stmt.Schema.LookUpField(name); field != nil { + name = field.DBName + } } return m.DB.Exec( @@ -351,13 +410,15 @@ func (m Migrator) DropColumn(value interface{}, name string) error { // AlterColumn alter value's `field` column' type based on schema definition func (m Migrator) AlterColumn(value interface{}, field string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { - if field := stmt.Schema.LookUpField(field); field != nil { - fileType := m.FullDataTypeOf(field) - return m.DB.Exec( - "ALTER TABLE ? ALTER COLUMN ? TYPE ?", - m.CurrentTable(stmt), clause.Column{Name: field.DBName}, fileType, - ).Error + if stmt.Schema != nil { + if field := stmt.Schema.LookUpField(field); field != nil { + fileType := m.FullDataTypeOf(field) + return m.DB.Exec( + "ALTER TABLE ? ALTER COLUMN ? TYPE ?", + m.CurrentTable(stmt), clause.Column{Name: field.DBName}, fileType, + ).Error + } } return fmt.Errorf("failed to look up field with name: %s", field) }) @@ -369,8 +430,10 @@ func (m Migrator) HasColumn(value interface{}, field string) bool { m.RunWithValue(value, func(stmt *gorm.Statement) error { currentDatabase := m.DB.Migrator().CurrentDatabase() name := field - if field := stmt.Schema.LookUpField(field); field != nil { - name = field.DBName + if stmt.Schema != nil { + if field := stmt.Schema.LookUpField(field); field != nil { + name = field.DBName + } } return m.DB.Raw( @@ -385,12 +448,14 @@ func (m Migrator) HasColumn(value interface{}, field string) bool { // RenameColumn rename value's field name from oldName to newName func (m Migrator) RenameColumn(value interface{}, oldName, newName string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { - if field := stmt.Schema.LookUpField(oldName); field != nil { - oldName = field.DBName - } + if stmt.Schema != nil { + if field := stmt.Schema.LookUpField(oldName); field != nil { + oldName = field.DBName + } - if field := stmt.Schema.LookUpField(newName); field != nil { - newName = field.DBName + if field := stmt.Schema.LookUpField(newName); field != nil { + newName = field.DBName + } } return m.DB.Exec( @@ -402,36 +467,58 @@ func (m Migrator) RenameColumn(value interface{}, oldName, newName string) error // MigrateColumn migrate column func (m Migrator) MigrateColumn(value interface{}, field *schema.Field, columnType gorm.ColumnType) error { + if field.IgnoreMigration { + return nil + } + // found, smart migrate fullDataType := strings.TrimSpace(strings.ToLower(m.DB.Migrator().FullDataTypeOf(field).SQL)) realDataType := strings.ToLower(columnType.DatabaseTypeName()) - alterColumn := false + var ( + alterColumn bool + isSameType = fullDataType == realDataType + ) + + if !field.PrimaryKey { + // check type + if !strings.HasPrefix(fullDataType, realDataType) { + // check type aliases + aliases := m.DB.Migrator().GetTypeAliases(realDataType) + for _, alias := range aliases { + if strings.HasPrefix(fullDataType, alias) { + isSameType = true + break + } + } - // check type - if !field.PrimaryKey && !strings.HasPrefix(fullDataType, realDataType) { - alterColumn = true + if !isSameType { + alterColumn = true + } + } } - // check size - if length, ok := columnType.Length(); length != int64(field.Size) { - if length > 0 && field.Size > 0 { - alterColumn = true - } else { - // has size in data type and not equal - // Since the following code is frequently called in the for loop, reg optimization is needed here - matches2 := regFullDataType.FindAllStringSubmatch(fullDataType, -1) - if !field.PrimaryKey && - (len(matches2) == 1 && matches2[0][1] != fmt.Sprint(length) && ok) { + if !isSameType { + // check size + if length, ok := columnType.Length(); length != int64(field.Size) { + if length > 0 && field.Size > 0 { alterColumn = true + } else { + // has size in data type and not equal + // Since the following code is frequently called in the for loop, reg optimization is needed here + matches2 := regFullDataType.FindAllStringSubmatch(fullDataType, -1) + if !field.PrimaryKey && + (len(matches2) == 1 && matches2[0][1] != fmt.Sprint(length) && ok) { + alterColumn = true + } } } - } - // check precision - if precision, _, ok := columnType.DecimalSize(); ok && int64(field.Precision) != precision { - if regexp.MustCompile(fmt.Sprintf("[^0-9]%d[^0-9]", field.Precision)).MatchString(m.DataTypeOf(field)) { - alterColumn = true + // check precision + if precision, _, ok := columnType.DecimalSize(); ok && int64(field.Precision) != precision { + if regexp.MustCompile(fmt.Sprintf("[^0-9]%d[^0-9]", field.Precision)).MatchString(m.DataTypeOf(field)) { + alterColumn = true + } } } @@ -443,28 +530,28 @@ func (m Migrator) MigrateColumn(value interface{}, field *schema.Field, columnTy } } - // check unique - if unique, ok := columnType.Unique(); ok && unique != field.Unique { - // not primary key - if !field.PrimaryKey { - alterColumn = true - } - } - // check default value if !field.PrimaryKey { + currentDefaultNotNull := field.HasDefaultValue && (field.DefaultValueInterface != nil || !strings.EqualFold(field.DefaultValue, "NULL")) dv, dvNotNull := columnType.DefaultValue() - if dvNotNull && field.DefaultValueInterface == nil { - // defalut value -> null + if dvNotNull && !currentDefaultNotNull { + // default value -> null alterColumn = true - } else if !dvNotNull && field.DefaultValueInterface != nil { + } else if !dvNotNull && currentDefaultNotNull { // null -> default value alterColumn = true - } else if dv != field.DefaultValue { - // default value not equal - // not both null - if !(field.DefaultValueInterface == nil && !dvNotNull) { - alterColumn = true + } else if currentDefaultNotNull || dvNotNull { + switch field.GORMDataType { + case schema.Time: + if !strings.EqualFold(strings.TrimSuffix(dv, "()"), strings.TrimSuffix(field.DefaultValue, "()")) { + alterColumn = true + } + case schema.Bool: + v1, _ := strconv.ParseBool(dv) + v2, _ := strconv.ParseBool(field.DefaultValue) + alterColumn = v1 != v2 + default: + alterColumn = dv != field.DefaultValue } } } @@ -477,13 +564,39 @@ func (m Migrator) MigrateColumn(value interface{}, field *schema.Field, columnTy } } - if alterColumn && !field.IgnoreMigration { - return m.DB.Migrator().AlterColumn(value, field.Name) + if alterColumn { + if err := m.DB.Migrator().AlterColumn(value, field.DBName); err != nil { + return err + } + } + + if err := m.DB.Migrator().MigrateColumnUnique(value, field, columnType); err != nil { + return err } return nil } +func (m Migrator) MigrateColumnUnique(value interface{}, field *schema.Field, columnType gorm.ColumnType) error { + unique, ok := columnType.Unique() + if !ok || field.PrimaryKey { + return nil // skip primary key + } + // By default, ColumnType's Unique is not affected by UniqueIndex, so we don't care about UniqueIndex. + return m.RunWithValue(value, func(stmt *gorm.Statement) error { + // We're currently only receiving boolean values on `Unique` tag, + // so the UniqueConstraint name is fixed + constraint := m.DB.NamingStrategy.UniqueName(stmt.Table, field.DBName) + if unique && !field.Unique { + return m.DB.Migrator().DropConstraint(value, constraint) + } + if !unique && field.Unique { + return m.DB.Migrator().CreateConstraint(value, constraint) + } + return nil + }) +} + // ColumnTypes return columnTypes []gorm.ColumnType and execErr error func (m Migrator) ColumnTypes(value interface{}) ([]gorm.ColumnType, error) { columnTypes := make([]gorm.ColumnType, 0) @@ -513,47 +626,76 @@ func (m Migrator) ColumnTypes(value interface{}) ([]gorm.ColumnType, error) { return columnTypes, execErr } -// CreateView create view +// CreateView create view from Query in gorm.ViewOption. +// Query in gorm.ViewOption is a [subquery] +// +// // CREATE VIEW `user_view` AS SELECT * FROM `users` WHERE age > 20 +// q := DB.Model(&User{}).Where("age > ?", 20) +// DB.Debug().Migrator().CreateView("user_view", gorm.ViewOption{Query: q}) +// +// // CREATE OR REPLACE VIEW `users_view` AS SELECT * FROM `users` WITH CHECK OPTION +// q := DB.Model(&User{}) +// DB.Debug().Migrator().CreateView("user_view", gorm.ViewOption{Query: q, Replace: true, CheckOption: "WITH CHECK OPTION"}) +// +// [subquery]: https://gorm.io/docs/advanced_query.html#SubQuery func (m Migrator) CreateView(name string, option gorm.ViewOption) error { - return gorm.ErrNotImplemented -} - -// DropView drop view -func (m Migrator) DropView(name string) error { - return gorm.ErrNotImplemented -} - -func buildConstraint(constraint *schema.Constraint) (sql string, results []interface{}) { - sql = "CONSTRAINT ? FOREIGN KEY ? REFERENCES ??" - if constraint.OnDelete != "" { - sql += " ON DELETE " + constraint.OnDelete + if option.Query == nil { + return gorm.ErrSubQueryRequired } - if constraint.OnUpdate != "" { - sql += " ON UPDATE " + constraint.OnUpdate + sql := new(strings.Builder) + sql.WriteString("CREATE ") + if option.Replace { + sql.WriteString("OR REPLACE ") } + sql.WriteString("VIEW ") + m.QuoteTo(sql, name) + sql.WriteString(" AS ") - var foreignKeys, references []interface{} - for _, field := range constraint.ForeignKeys { - foreignKeys = append(foreignKeys, clause.Column{Name: field.DBName}) - } + m.DB.Statement.AddVar(sql, option.Query) - for _, field := range constraint.References { - references = append(references, clause.Column{Name: field.DBName}) + if option.CheckOption != "" { + sql.WriteString(" ") + sql.WriteString(option.CheckOption) } - results = append(results, clause.Table{Name: constraint.Name}, foreignKeys, clause.Table{Name: constraint.ReferenceSchema.Table}, references) - return + return m.DB.Exec(m.Explain(sql.String(), m.DB.Statement.Vars...)).Error +} + +// DropView drop view +func (m Migrator) DropView(name string) error { + return m.DB.Exec("DROP VIEW IF EXISTS ?", clause.Table{Name: name}).Error } // GuessConstraintAndTable guess statement's constraint and it's table based on name -func (m Migrator) GuessConstraintAndTable(stmt *gorm.Statement, name string) (_ *schema.Constraint, _ *schema.Check, table string) { +// +// Deprecated: use GuessConstraintInterfaceAndTable instead. +func (m Migrator) GuessConstraintAndTable(stmt *gorm.Statement, name string) (*schema.Constraint, *schema.CheckConstraint, string) { + constraint, table := m.GuessConstraintInterfaceAndTable(stmt, name) + switch c := constraint.(type) { + case *schema.Constraint: + return c, nil, table + case *schema.CheckConstraint: + return nil, c, table + default: + return nil, nil, table + } +} + +// GuessConstraintInterfaceAndTable guess statement's constraint and it's table based on name +// nolint:cyclop +func (m Migrator) GuessConstraintInterfaceAndTable(stmt *gorm.Statement, name string) (_ schema.ConstraintInterface, table string) { if stmt.Schema == nil { - return nil, nil, stmt.Table + return nil, stmt.Table } checkConstraints := stmt.Schema.ParseCheckConstraints() if chk, ok := checkConstraints[name]; ok { - return nil, &chk, stmt.Table + return &chk, stmt.Table + } + + uniqueConstraints := stmt.Schema.ParseUniqueConstraints() + if uni, ok := uniqueConstraints[name]; ok { + return &uni, stmt.Table } getTable := func(rel *schema.Relationship) string { @@ -568,7 +710,7 @@ func (m Migrator) GuessConstraintAndTable(stmt *gorm.Statement, name string) (_ for _, rel := range stmt.Schema.Relationships.Relations { if constraint := rel.ParseConstraint(); constraint != nil && constraint.Name == name { - return constraint, nil, getTable(rel) + return constraint, getTable(rel) } } @@ -576,40 +718,39 @@ func (m Migrator) GuessConstraintAndTable(stmt *gorm.Statement, name string) (_ for k := range checkConstraints { if checkConstraints[k].Field == field { v := checkConstraints[k] - return nil, &v, stmt.Table + return &v, stmt.Table + } + } + + for k := range uniqueConstraints { + if uniqueConstraints[k].Field == field { + v := uniqueConstraints[k] + return &v, stmt.Table } } for _, rel := range stmt.Schema.Relationships.Relations { if constraint := rel.ParseConstraint(); constraint != nil && rel.Field == field { - return constraint, nil, getTable(rel) + return constraint, getTable(rel) } } } - return nil, nil, stmt.Schema.Table + return nil, stmt.Schema.Table } // CreateConstraint create constraint func (m Migrator) CreateConstraint(value interface{}, name string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { - constraint, chk, table := m.GuessConstraintAndTable(stmt, name) - if chk != nil { - return m.DB.Exec( - "ALTER TABLE ? ADD CONSTRAINT ? CHECK (?)", - m.CurrentTable(stmt), clause.Column{Name: chk.Name}, clause.Expr{SQL: chk.Constraint}, - ).Error - } - + constraint, table := m.GuessConstraintInterfaceAndTable(stmt, name) if constraint != nil { vars := []interface{}{clause.Table{Name: table}} if stmt.TableExpr != nil { vars[0] = stmt.TableExpr } - sql, values := buildConstraint(constraint) + sql, values := constraint.Build() return m.DB.Exec("ALTER TABLE ? ADD "+sql, append(vars, values...)...).Error } - return nil }) } @@ -617,11 +758,9 @@ func (m Migrator) CreateConstraint(value interface{}, name string) error { // DropConstraint drop constraint func (m Migrator) DropConstraint(value interface{}, name string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { - constraint, chk, table := m.GuessConstraintAndTable(stmt, name) + constraint, table := m.GuessConstraintInterfaceAndTable(stmt, name) if constraint != nil { - name = constraint.Name - } else if chk != nil { - name = chk.Name + name = constraint.GetName() } return m.DB.Exec("ALTER TABLE ? DROP CONSTRAINT ?", clause.Table{Name: table}, clause.Column{Name: name}).Error }) @@ -632,11 +771,9 @@ func (m Migrator) HasConstraint(value interface{}, name string) bool { var count int64 m.RunWithValue(value, func(stmt *gorm.Statement) error { currentDatabase := m.DB.Migrator().CurrentDatabase() - constraint, chk, table := m.GuessConstraintAndTable(stmt, name) + constraint, table := m.GuessConstraintInterfaceAndTable(stmt, name) if constraint != nil { - name = constraint.Name - } else if chk != nil { - name = chk.Name + name = constraint.GetName() } return m.DB.Raw( @@ -678,6 +815,9 @@ type BuildIndexOptionsInterface interface { // CreateIndex create index `name` func (m Migrator) CreateIndex(value interface{}, name string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { + if stmt.Schema == nil { + return errors.New("failed to get schema") + } if idx := stmt.Schema.LookIndex(name); idx != nil { opts := m.DB.Migrator().(BuildIndexOptionsInterface).BuildIndexOptions(idx.Fields, stmt) values := []interface{}{clause.Column{Name: idx.Name}, m.CurrentTable(stmt), opts} @@ -710,8 +850,10 @@ func (m Migrator) CreateIndex(value interface{}, name string) error { // DropIndex drop index `name` func (m Migrator) DropIndex(value interface{}, name string) error { return m.RunWithValue(value, func(stmt *gorm.Statement) error { - if idx := stmt.Schema.LookIndex(name); idx != nil { - name = idx.Name + if stmt.Schema != nil { + if idx := stmt.Schema.LookIndex(name); idx != nil { + name = idx.Name + } } return m.DB.Exec("DROP INDEX ? ON ?", clause.Column{Name: name}, m.CurrentTable(stmt)).Error @@ -723,8 +865,10 @@ func (m Migrator) HasIndex(value interface{}, name string) bool { var count int64 m.RunWithValue(value, func(stmt *gorm.Statement) error { currentDatabase := m.DB.Migrator().CurrentDatabase() - if idx := stmt.Schema.LookIndex(name); idx != nil { - name = idx.Name + if stmt.Schema != nil { + if idx := stmt.Schema.LookIndex(name); idx != nil { + name = idx.Name + } } return m.DB.Raw( @@ -782,26 +926,31 @@ func (m Migrator) ReorderModels(values []interface{}, autoAdd bool) (results []i } parsedSchemas[dep.Statement.Schema] = true - for _, rel := range dep.Schema.Relationships.Relations { - if c := rel.ParseConstraint(); c != nil && c.Schema == dep.Statement.Schema && c.Schema != c.ReferenceSchema { - dep.Depends = append(dep.Depends, c.ReferenceSchema) - } + if !m.DB.IgnoreRelationshipsWhenMigrating { + for _, rel := range dep.Schema.Relationships.Relations { + if rel.Field.IgnoreMigration { + continue + } + if c := rel.ParseConstraint(); c != nil && c.Schema == dep.Statement.Schema && c.Schema != c.ReferenceSchema { + dep.Depends = append(dep.Depends, c.ReferenceSchema) + } - if rel.Type == schema.HasOne || rel.Type == schema.HasMany { - beDependedOn[rel.FieldSchema] = true - } + if rel.Type == schema.HasOne || rel.Type == schema.HasMany { + beDependedOn[rel.FieldSchema] = true + } - if rel.JoinTable != nil { - // append join value - defer func(rel *schema.Relationship, joinValue interface{}) { - if !beDependedOn[rel.FieldSchema] { - dep.Depends = append(dep.Depends, rel.FieldSchema) - } else { - fieldValue := reflect.New(rel.FieldSchema.ModelType).Interface() - parseDependence(fieldValue, autoAdd) - } - parseDependence(joinValue, autoAdd) - }(rel, reflect.New(rel.JoinTable.ModelType).Interface()) + if rel.JoinTable != nil { + // append join value + defer func(rel *schema.Relationship, joinValue interface{}) { + if !beDependedOn[rel.FieldSchema] { + dep.Depends = append(dep.Depends, rel.FieldSchema) + } else { + fieldValue := reflect.New(rel.FieldSchema.ModelType).Interface() + parseDependence(fieldValue, autoAdd) + } + parseDependence(joinValue, autoAdd) + }(rel, reflect.New(rel.JoinTable.ModelType).Interface()) + } } } @@ -863,3 +1012,13 @@ func (m Migrator) CurrentTable(stmt *gorm.Statement) interface{} { func (m Migrator) GetIndexes(dst interface{}) ([]gorm.Index, error) { return nil, errors.New("not support") } + +// GetTypeAliases return database type aliases +func (m Migrator) GetTypeAliases(databaseTypeName string) []string { + return nil +} + +// TableType return tableType gorm.TableType and execErr error +func (m Migrator) TableType(dst interface{}) (gorm.TableType, error) { + return nil, errors.New("not support") +} diff --git a/vendor/gorm.io/gorm/migrator/table_type.go b/vendor/gorm.io/gorm/migrator/table_type.go new file mode 100644 index 00000000..ed6e42a0 --- /dev/null +++ b/vendor/gorm.io/gorm/migrator/table_type.go @@ -0,0 +1,33 @@ +package migrator + +import ( + "database/sql" +) + +// TableType table type implements TableType interface +type TableType struct { + SchemaValue string + NameValue string + TypeValue string + CommentValue sql.NullString +} + +// Schema returns the schema of the table. +func (ct TableType) Schema() string { + return ct.SchemaValue +} + +// Name returns the name of the table. +func (ct TableType) Name() string { + return ct.NameValue +} + +// Type returns the type of the table. +func (ct TableType) Type() string { + return ct.TypeValue +} + +// Comment returns the comment of current table. +func (ct TableType) Comment() (comment string, ok bool) { + return ct.CommentValue.String, ct.CommentValue.Valid +} diff --git a/vendor/gorm.io/gorm/model.go b/vendor/gorm.io/gorm/model.go index 3334d17c..fa705df1 100644 --- a/vendor/gorm.io/gorm/model.go +++ b/vendor/gorm.io/gorm/model.go @@ -4,9 +4,10 @@ import "time" // Model a basic GoLang struct which includes the following fields: ID, CreatedAt, UpdatedAt, DeletedAt // It may be embedded into your model or you may build your own model without it -// type User struct { -// gorm.Model -// } +// +// type User struct { +// gorm.Model +// } type Model struct { ID uint `gorm:"primarykey"` CreatedAt time.Time diff --git a/vendor/gorm.io/gorm/prepare_stmt.go b/vendor/gorm.io/gorm/prepare_stmt.go index b062b0d6..094bb477 100644 --- a/vendor/gorm.io/gorm/prepare_stmt.go +++ b/vendor/gorm.io/gorm/prepare_stmt.go @@ -3,30 +3,42 @@ package gorm import ( "context" "database/sql" + "database/sql/driver" + "errors" + "reflect" "sync" ) type Stmt struct { *sql.Stmt Transaction bool + prepared chan struct{} + prepareErr error } type PreparedStmtDB struct { - Stmts map[string]Stmt - PreparedSQL []string - Mux *sync.RWMutex + Stmts map[string]*Stmt + Mux *sync.RWMutex ConnPool } -func (db *PreparedStmtDB) GetDBConn() (*sql.DB, error) { - if dbConnector, ok := db.ConnPool.(GetDBConnector); ok && dbConnector != nil { - return dbConnector.GetDBConn() +func NewPreparedStmtDB(connPool ConnPool) *PreparedStmtDB { + return &PreparedStmtDB{ + ConnPool: connPool, + Stmts: make(map[string]*Stmt), + Mux: &sync.RWMutex{}, } +} +func (db *PreparedStmtDB) GetDBConn() (*sql.DB, error) { if sqldb, ok := db.ConnPool.(*sql.DB); ok { return sqldb, nil } + if dbConnector, ok := db.ConnPool.(GetDBConnector); ok && dbConnector != nil { + return dbConnector.GetDBConn() + } + return nil, ErrInvalidDB } @@ -34,39 +46,94 @@ func (db *PreparedStmtDB) Close() { db.Mux.Lock() defer db.Mux.Unlock() - for _, query := range db.PreparedSQL { - if stmt, ok := db.Stmts[query]; ok { - delete(db.Stmts, query) - go stmt.Close() - } + for _, stmt := range db.Stmts { + go func(s *Stmt) { + // make sure the stmt must finish preparation first + <-s.prepared + if s.Stmt != nil { + _ = s.Close() + } + }(stmt) + } + // setting db.Stmts to nil to avoid further using + db.Stmts = nil +} + +func (sdb *PreparedStmtDB) Reset() { + sdb.Mux.Lock() + defer sdb.Mux.Unlock() + + for _, stmt := range sdb.Stmts { + go func(s *Stmt) { + // make sure the stmt must finish preparation first + <-s.prepared + if s.Stmt != nil { + _ = s.Close() + } + }(stmt) } + sdb.Stmts = make(map[string]*Stmt) } func (db *PreparedStmtDB) prepare(ctx context.Context, conn ConnPool, isTransaction bool, query string) (Stmt, error) { db.Mux.RLock() if stmt, ok := db.Stmts[query]; ok && (!stmt.Transaction || isTransaction) { db.Mux.RUnlock() - return stmt, nil + // wait for other goroutines prepared + <-stmt.prepared + if stmt.prepareErr != nil { + return Stmt{}, stmt.prepareErr + } + + return *stmt, nil } db.Mux.RUnlock() db.Mux.Lock() - defer db.Mux.Unlock() - // double check if stmt, ok := db.Stmts[query]; ok && (!stmt.Transaction || isTransaction) { - return stmt, nil - } else if ok { - go stmt.Close() + db.Mux.Unlock() + // wait for other goroutines prepared + <-stmt.prepared + if stmt.prepareErr != nil { + return Stmt{}, stmt.prepareErr + } + + return *stmt, nil + } + // check db.Stmts first to avoid Segmentation Fault(setting value to nil map) + // which cause by calling Close and executing SQL concurrently + if db.Stmts == nil { + db.Mux.Unlock() + return Stmt{}, ErrInvalidDB } + // cache preparing stmt first + cacheStmt := Stmt{Transaction: isTransaction, prepared: make(chan struct{})} + db.Stmts[query] = &cacheStmt + db.Mux.Unlock() + // prepare completed + defer close(cacheStmt.prepared) + + // Reason why cannot lock conn.PrepareContext + // suppose the maxopen is 1, g1 is creating record and g2 is querying record. + // 1. g1 begin tx, g1 is requeue because of waiting for the system call, now `db.ConnPool` db.numOpen == 1. + // 2. g2 select lock `conn.PrepareContext(ctx, query)`, now db.numOpen == db.maxOpen , wait for release. + // 3. g1 tx exec insert, wait for unlock `conn.PrepareContext(ctx, query)` to finish tx and release. stmt, err := conn.PrepareContext(ctx, query) - if err == nil { - db.Stmts[query] = Stmt{Stmt: stmt, Transaction: isTransaction} - db.PreparedSQL = append(db.PreparedSQL, query) + if err != nil { + cacheStmt.prepareErr = err + db.Mux.Lock() + delete(db.Stmts, query) + db.Mux.Unlock() + return Stmt{}, err } - return db.Stmts[query], err + db.Mux.Lock() + cacheStmt.Stmt = stmt + db.Mux.Unlock() + + return cacheStmt, nil } func (db *PreparedStmtDB) BeginTx(ctx context.Context, opt *sql.TxOptions) (ConnPool, error) { @@ -74,6 +141,19 @@ func (db *PreparedStmtDB) BeginTx(ctx context.Context, opt *sql.TxOptions) (Conn tx, err := beginner.BeginTx(ctx, opt) return &PreparedStmtTX{PreparedStmtDB: db, Tx: tx}, err } + + beginner, ok := db.ConnPool.(ConnPoolBeginner) + if !ok { + return nil, ErrInvalidTransaction + } + + connPool, err := beginner.BeginTx(ctx, opt) + if err != nil { + return nil, err + } + if tx, ok := connPool.(Tx); ok { + return &PreparedStmtTX{PreparedStmtDB: db, Tx: tx}, nil + } return nil, ErrInvalidTransaction } @@ -81,7 +161,7 @@ func (db *PreparedStmtDB) ExecContext(ctx context.Context, query string, args .. stmt, err := db.prepare(ctx, db.ConnPool, false, query) if err == nil { result, err = stmt.ExecContext(ctx, args...) - if err != nil { + if errors.Is(err, driver.ErrBadConn) { db.Mux.Lock() defer db.Mux.Unlock() go stmt.Close() @@ -95,7 +175,7 @@ func (db *PreparedStmtDB) QueryContext(ctx context.Context, query string, args . stmt, err := db.prepare(ctx, db.ConnPool, false, query) if err == nil { rows, err = stmt.QueryContext(ctx, args...) - if err != nil { + if errors.Is(err, driver.ErrBadConn) { db.Mux.Lock() defer db.Mux.Unlock() @@ -114,20 +194,32 @@ func (db *PreparedStmtDB) QueryRowContext(ctx context.Context, query string, arg return &sql.Row{} } +func (db *PreparedStmtDB) Ping() error { + conn, err := db.GetDBConn() + if err != nil { + return err + } + return conn.Ping() +} + type PreparedStmtTX struct { Tx PreparedStmtDB *PreparedStmtDB } +func (db *PreparedStmtTX) GetDBConn() (*sql.DB, error) { + return db.PreparedStmtDB.GetDBConn() +} + func (tx *PreparedStmtTX) Commit() error { - if tx.Tx != nil { + if tx.Tx != nil && !reflect.ValueOf(tx.Tx).IsNil() { return tx.Tx.Commit() } return ErrInvalidTransaction } func (tx *PreparedStmtTX) Rollback() error { - if tx.Tx != nil { + if tx.Tx != nil && !reflect.ValueOf(tx.Tx).IsNil() { return tx.Tx.Rollback() } return ErrInvalidTransaction @@ -137,7 +229,7 @@ func (tx *PreparedStmtTX) ExecContext(ctx context.Context, query string, args .. stmt, err := tx.PreparedStmtDB.prepare(ctx, tx.Tx, true, query) if err == nil { result, err = tx.Tx.StmtContext(ctx, stmt.Stmt).ExecContext(ctx, args...) - if err != nil { + if errors.Is(err, driver.ErrBadConn) { tx.PreparedStmtDB.Mux.Lock() defer tx.PreparedStmtDB.Mux.Unlock() @@ -152,7 +244,7 @@ func (tx *PreparedStmtTX) QueryContext(ctx context.Context, query string, args . stmt, err := tx.PreparedStmtDB.prepare(ctx, tx.Tx, true, query) if err == nil { rows, err = tx.Tx.StmtContext(ctx, stmt.Stmt).QueryContext(ctx, args...) - if err != nil { + if errors.Is(err, driver.ErrBadConn) { tx.PreparedStmtDB.Mux.Lock() defer tx.PreparedStmtDB.Mux.Unlock() @@ -170,3 +262,11 @@ func (tx *PreparedStmtTX) QueryRowContext(ctx context.Context, query string, arg } return &sql.Row{} } + +func (tx *PreparedStmtTX) Ping() error { + conn, err := tx.GetDBConn() + if err != nil { + return err + } + return conn.Ping() +} diff --git a/vendor/gorm.io/gorm/scan.go b/vendor/gorm.io/gorm/scan.go index 6250fb57..d852c2c9 100644 --- a/vendor/gorm.io/gorm/scan.go +++ b/vendor/gorm.io/gorm/scan.go @@ -4,10 +4,10 @@ import ( "database/sql" "database/sql/driver" "reflect" - "strings" "time" "gorm.io/gorm/schema" + "gorm.io/gorm/utils" ) // prepareValues prepare values slice @@ -50,7 +50,7 @@ func scanIntoMap(mapValue map[string]interface{}, values []interface{}, columns } } -func (db *DB) scanIntoStruct(rows Rows, reflectValue reflect.Value, values []interface{}, fields []*schema.Field, joinFields [][2]*schema.Field) { +func (db *DB) scanIntoStruct(rows Rows, reflectValue reflect.Value, values []interface{}, fields []*schema.Field, joinFields [][]*schema.Field) { for idx, field := range fields { if field != nil { values[idx] = field.NewValuePool.Get() @@ -65,31 +65,49 @@ func (db *DB) scanIntoStruct(rows Rows, reflectValue reflect.Value, values []int db.RowsAffected++ db.AddError(rows.Scan(values...)) - - joinedSchemaMap := make(map[*schema.Field]interface{}, 0) + joinedNestedSchemaMap := make(map[string]interface{}) for idx, field := range fields { - if field != nil { - if len(joinFields) == 0 || joinFields[idx][0] == nil { - db.AddError(field.Set(db.Statement.Context, reflectValue, values[idx])) - } else { - joinSchema := joinFields[idx][0] - relValue := joinSchema.ReflectValueOf(db.Statement.Context, reflectValue) + if field == nil { + continue + } + + if len(joinFields) == 0 || len(joinFields[idx]) == 0 { + db.AddError(field.Set(db.Statement.Context, reflectValue, values[idx])) + } else { // joinFields count is larger than 2 when using join + var isNilPtrValue bool + var relValue reflect.Value + // does not contain raw dbname + nestedJoinSchemas := joinFields[idx][:len(joinFields[idx])-1] + // current reflect value + currentReflectValue := reflectValue + fullRels := make([]string, 0, len(nestedJoinSchemas)) + for _, joinSchema := range nestedJoinSchemas { + fullRels = append(fullRels, joinSchema.Name) + relValue = joinSchema.ReflectValueOf(db.Statement.Context, currentReflectValue) if relValue.Kind() == reflect.Ptr { - if _, ok := joinedSchemaMap[joinSchema]; !ok { + fullRelsName := utils.JoinNestedRelationNames(fullRels) + // same nested structure + if _, ok := joinedNestedSchemaMap[fullRelsName]; !ok { if value := reflect.ValueOf(values[idx]).Elem(); value.Kind() == reflect.Ptr && value.IsNil() { - continue + isNilPtrValue = true + break } relValue.Set(reflect.New(relValue.Type().Elem())) - joinedSchemaMap[joinSchema] = nil + joinedNestedSchemaMap[fullRelsName] = nil } } - db.AddError(joinFields[idx][1].Set(db.Statement.Context, relValue, values[idx])) + currentReflectValue = relValue } - // release data to pool - field.NewValuePool.Put(values[idx]) + if !isNilPtrValue { // ignore if value is nil + f := joinFields[idx][len(joinFields[idx])-1] + db.AddError(f.Set(db.Statement.Context, relValue, values[idx])) + } } + + // release data to pool + field.NewValuePool.Put(values[idx]) } } @@ -113,6 +131,15 @@ func Scan(rows Rows, db *DB, mode ScanMode) { onConflictDonothing = mode&ScanOnConflictDoNothing != 0 ) + if len(db.Statement.ColumnMapping) > 0 { + for i, column := range columns { + v, ok := db.Statement.ColumnMapping[column] + if ok { + columns[i] = v + } + } + } + db.RowsAffected = 0 switch dest := db.Statement.Dest.(type) { @@ -161,11 +188,10 @@ func Scan(rows Rows, db *DB, mode ScanMode) { } default: var ( - fields = make([]*schema.Field, len(columns)) - selectedColumnsMap = make(map[string]int, len(columns)) - joinFields [][2]*schema.Field - sch = db.Statement.Schema - reflectValue = db.Statement.ReflectValue + fields = make([]*schema.Field, len(columns)) + joinFields [][]*schema.Field + sch = db.Statement.Schema + reflectValue = db.Statement.ReflectValue ) if reflectValue.Kind() == reflect.Interface { @@ -198,42 +224,53 @@ func Scan(rows Rows, db *DB, mode ScanMode) { // Not Pluck if sch != nil { - schFieldsCount := len(sch.Fields) + matchedFieldCount := make(map[string]int, len(columns)) for idx, column := range columns { if field := sch.LookUpField(column); field != nil && field.Readable { - if curIndex, ok := selectedColumnsMap[column]; ok { - fields[idx] = field // handle duplicate fields - offset := curIndex + 1 - // handle sch inconsistent with database - // like Raw(`...`).Scan - if schFieldsCount > offset { - for fieldIndex, selectField := range sch.Fields[offset:] { - if selectField.DBName == column && selectField.Readable { - selectedColumnsMap[column] = curIndex + fieldIndex + 1 + fields[idx] = field + if count, ok := matchedFieldCount[column]; ok { + // handle duplicate fields + for _, selectField := range sch.Fields { + if selectField.DBName == column && selectField.Readable { + if count == 0 { + matchedFieldCount[column]++ fields[idx] = selectField break } + count-- } } } else { - fields[idx] = field - selectedColumnsMap[column] = idx + matchedFieldCount[column] = 1 } - } else if names := strings.Split(column, "__"); len(names) > 1 { + } else if names := utils.SplitNestedRelationName(column); len(names) > 1 { // has nested relation if rel, ok := sch.Relationships.Relations[names[0]]; ok { - if field := rel.FieldSchema.LookUpField(strings.Join(names[1:], "__")); field != nil && field.Readable { + subNameCount := len(names) + // nested relation fields + relFields := make([]*schema.Field, 0, subNameCount-1) + relFields = append(relFields, rel.Field) + for _, name := range names[1 : subNameCount-1] { + rel = rel.FieldSchema.Relationships.Relations[name] + relFields = append(relFields, rel.Field) + } + // latest name is raw dbname + dbName := names[subNameCount-1] + if field := rel.FieldSchema.LookUpField(dbName); field != nil && field.Readable { fields[idx] = field if len(joinFields) == 0 { - joinFields = make([][2]*schema.Field, len(columns)) + joinFields = make([][]*schema.Field, len(columns)) } - joinFields[idx] = [2]*schema.Field{rel.Field, field} + relFields = append(relFields, field) + joinFields[idx] = relFields continue } } - values[idx] = &sql.RawBytes{} + var val interface{} + values[idx] = &val } else { - values[idx] = &sql.RawBytes{} + var val interface{} + values[idx] = &val } } } @@ -241,12 +278,24 @@ func Scan(rows Rows, db *DB, mode ScanMode) { switch reflectValue.Kind() { case reflect.Slice, reflect.Array: - var elem reflect.Value - recyclableStruct := reflect.New(reflectValueType) + var ( + elem reflect.Value + isArrayKind = reflectValue.Kind() == reflect.Array + ) if !update || reflectValue.Len() == 0 { update = false - db.Statement.ReflectValue.Set(reflect.MakeSlice(reflectValue.Type(), 0, 20)) + if isArrayKind { + db.Statement.ReflectValue.Set(reflect.Zero(reflectValue.Type())) + } else { + // if the slice cap is externally initialized, the externally initialized slice is directly used here + if reflectValue.Cap() == 0 { + db.Statement.ReflectValue.Set(reflect.MakeSlice(reflectValue.Type(), 0, 20)) + } else { + reflectValue.SetLen(0) + db.Statement.ReflectValue.Set(reflectValue) + } + } } for initialized || rows.Next() { @@ -267,20 +316,21 @@ func Scan(rows Rows, db *DB, mode ScanMode) { } } } else { - if isPtr && db.RowsAffected > 0 { - elem = reflect.New(reflectValueType) - } else { - elem = recyclableStruct - } + elem = reflect.New(reflectValueType) } db.scanIntoStruct(rows, elem, values, fields, joinFields) if !update { - if isPtr { - reflectValue = reflect.Append(reflectValue, elem) + if !isPtr { + elem = elem.Elem() + } + if isArrayKind { + if reflectValue.Len() >= int(db.RowsAffected) { + reflectValue.Index(int(db.RowsAffected - 1)).Set(elem) + } } else { - reflectValue = reflect.Append(reflectValue, elem.Elem()) + reflectValue = reflect.Append(reflectValue, elem) } } } @@ -290,6 +340,9 @@ func Scan(rows Rows, db *DB, mode ScanMode) { } case reflect.Struct, reflect.Ptr: if initialized || rows.Next() { + if mode == ScanInitialized && reflectValue.Kind() == reflect.Struct { + db.Statement.ReflectValue.Set(reflect.Zero(reflectValue.Type())) + } db.scanIntoStruct(rows, reflectValue, values, fields, joinFields) } default: diff --git a/vendor/gorm.io/gorm/schema/check.go b/vendor/gorm.io/gorm/schema/check.go deleted file mode 100644 index 89e732d3..00000000 --- a/vendor/gorm.io/gorm/schema/check.go +++ /dev/null @@ -1,35 +0,0 @@ -package schema - -import ( - "regexp" - "strings" -) - -// reg match english letters and midline -var regEnLetterAndMidline = regexp.MustCompile("^[A-Za-z-_]+$") - -type Check struct { - Name string - Constraint string // length(phone) >= 10 - *Field -} - -// ParseCheckConstraints parse schema check constraints -func (schema *Schema) ParseCheckConstraints() map[string]Check { - checks := map[string]Check{} - for _, field := range schema.FieldsByDBName { - if chk := field.TagSettings["CHECK"]; chk != "" { - names := strings.Split(chk, ",") - if len(names) > 1 && regEnLetterAndMidline.MatchString(names[0]) { - checks[names[0]] = Check{Name: names[0], Constraint: strings.Join(names[1:], ","), Field: field} - } else { - if names[0] == "" { - chk = strings.Join(names[1:], ",") - } - name := schema.namer.CheckerName(schema.Table, field.DBName) - checks[name] = Check{Name: name, Constraint: chk, Field: field} - } - } - } - return checks -} diff --git a/vendor/gorm.io/gorm/schema/constraint.go b/vendor/gorm.io/gorm/schema/constraint.go new file mode 100644 index 00000000..80a743a8 --- /dev/null +++ b/vendor/gorm.io/gorm/schema/constraint.go @@ -0,0 +1,66 @@ +package schema + +import ( + "regexp" + "strings" + + "gorm.io/gorm/clause" +) + +// reg match english letters and midline +var regEnLetterAndMidline = regexp.MustCompile(`^[\w-]+$`) + +type CheckConstraint struct { + Name string + Constraint string // length(phone) >= 10 + *Field +} + +func (chk *CheckConstraint) GetName() string { return chk.Name } + +func (chk *CheckConstraint) Build() (sql string, vars []interface{}) { + return "CONSTRAINT ? CHECK (?)", []interface{}{clause.Column{Name: chk.Name}, clause.Expr{SQL: chk.Constraint}} +} + +// ParseCheckConstraints parse schema check constraints +func (schema *Schema) ParseCheckConstraints() map[string]CheckConstraint { + checks := map[string]CheckConstraint{} + for _, field := range schema.FieldsByDBName { + if chk := field.TagSettings["CHECK"]; chk != "" { + names := strings.Split(chk, ",") + if len(names) > 1 && regEnLetterAndMidline.MatchString(names[0]) { + checks[names[0]] = CheckConstraint{Name: names[0], Constraint: strings.Join(names[1:], ","), Field: field} + } else { + if names[0] == "" { + chk = strings.Join(names[1:], ",") + } + name := schema.namer.CheckerName(schema.Table, field.DBName) + checks[name] = CheckConstraint{Name: name, Constraint: chk, Field: field} + } + } + } + return checks +} + +type UniqueConstraint struct { + Name string + Field *Field +} + +func (uni *UniqueConstraint) GetName() string { return uni.Name } + +func (uni *UniqueConstraint) Build() (sql string, vars []interface{}) { + return "CONSTRAINT ? UNIQUE (?)", []interface{}{clause.Column{Name: uni.Name}, clause.Column{Name: uni.Field.DBName}} +} + +// ParseUniqueConstraints parse schema unique constraints +func (schema *Schema) ParseUniqueConstraints() map[string]UniqueConstraint { + uniques := make(map[string]UniqueConstraint) + for _, field := range schema.Fields { + if field.Unique { + name := schema.namer.UniqueName(schema.Table, field.DBName) + uniques[name] = UniqueConstraint{Name: name, Field: field} + } + } + return uniques +} diff --git a/vendor/gorm.io/gorm/schema/field.go b/vendor/gorm.io/gorm/schema/field.go index d4dfbd6f..a16c98ab 100644 --- a/vendor/gorm.io/gorm/schema/field.go +++ b/vendor/gorm.io/gorm/schema/field.go @@ -49,11 +49,14 @@ const ( Bytes DataType = "bytes" ) +const DefaultAutoIncrementIncrement int64 = 1 + // Field is the representation of model schema's field type Field struct { Name string DBName string BindNames []string + EmbeddedBindNames []string DataType DataType GORMDataType DataType PrimaryKey bool @@ -87,6 +90,16 @@ type Field struct { Set func(context.Context, reflect.Value, interface{}) error Serializer SerializerInterface NewValuePool FieldNewValuePool + + // In some db (e.g. MySQL), Unique and UniqueIndex are indistinguishable. + // When a column has a (not Mul) UniqueIndex, Migrator always reports its gorm.ColumnType is Unique. + // It causes field unnecessarily migration. + // Therefore, we need to record the UniqueIndex on this column (exclude Mul UniqueIndex) for MigrateColumnUnique. + UniqueIndex string +} + +func (field *Field) BindName() string { + return strings.Join(field.BindNames, ".") } // ParseField parses reflect.StructField to Field @@ -100,6 +113,7 @@ func (schema *Schema) ParseField(fieldStruct reflect.StructField) *Field { Name: fieldStruct.Name, DBName: tagSetting["COLUMN"], BindNames: []string{fieldStruct.Name}, + EmbeddedBindNames: []string{fieldStruct.Name}, FieldType: fieldStruct.Type, IndirectFieldType: fieldStruct.Type, StructField: fieldStruct, @@ -115,7 +129,7 @@ func (schema *Schema) ParseField(fieldStruct reflect.StructField) *Field { NotNull: utils.CheckTruth(tagSetting["NOT NULL"], tagSetting["NOTNULL"]), Unique: utils.CheckTruth(tagSetting["UNIQUE"]), Comment: tagSetting["COMMENT"], - AutoIncrementIncrement: 1, + AutoIncrementIncrement: DefaultAutoIncrementIncrement, } for field.IndirectFieldType.Kind() == reflect.Ptr { @@ -174,7 +188,7 @@ func (schema *Schema) ParseField(fieldStruct reflect.StructField) *Field { field.DataType = String field.Serializer = v } else { - var serializerName = field.TagSettings["JSON"] + serializerName := field.TagSettings["JSON"] if serializerName == "" { serializerName = field.TagSettings["SERIALIZER"] } @@ -391,6 +405,9 @@ func (schema *Schema) ParseField(fieldStruct reflect.StructField) *Field { ef.Schema = schema ef.OwnerSchema = field.EmbeddedSchema ef.BindNames = append([]string{fieldStruct.Name}, ef.BindNames...) + if _, ok := field.TagSettings["EMBEDDED"]; ok || !fieldStruct.Anonymous { + ef.EmbeddedBindNames = append([]string{fieldStruct.Name}, ef.EmbeddedBindNames...) + } // index is negative means is pointer if field.FieldType.Kind() == reflect.Struct { ef.StructField.Index = append([]int{fieldStruct.Index[0]}, ef.StructField.Index...) @@ -403,18 +420,14 @@ func (schema *Schema) ParseField(fieldStruct reflect.StructField) *Field { } if ef.PrimaryKey { - if val, ok := ef.TagSettings["PRIMARYKEY"]; ok && utils.CheckTruth(val) { - ef.PrimaryKey = true - } else if val, ok := ef.TagSettings["PRIMARY_KEY"]; ok && utils.CheckTruth(val) { - ef.PrimaryKey = true - } else { + if !utils.CheckTruth(ef.TagSettings["PRIMARYKEY"], ef.TagSettings["PRIMARY_KEY"]) { ef.PrimaryKey = false if val, ok := ef.TagSettings["AUTOINCREMENT"]; !ok || !utils.CheckTruth(val) { ef.AutoIncrement = false } - if ef.DefaultValue == "" { + if !ef.AutoIncrement && ef.DefaultValue == "" { ef.HasDefaultValue = false } } @@ -472,9 +485,6 @@ func (field *Field) setupValuerAndSetter() { oldValuerOf := field.ValueOf field.ValueOf = func(ctx context.Context, v reflect.Value) (interface{}, bool) { value, zero := oldValuerOf(ctx, v) - if zero { - return value, zero - } s, ok := value.(SerializerValuerInterface) if !ok { @@ -487,7 +497,7 @@ func (field *Field) setupValuerAndSetter() { Destination: v, Context: ctx, fieldValue: value, - }, false + }, zero } } @@ -587,8 +597,6 @@ func (field *Field) setupValuerAndSetter() { case **bool: if data != nil && *data != nil { field.ReflectValueOf(ctx, value).SetBool(**data) - } else { - field.ReflectValueOf(ctx, value).SetBool(false) } case bool: field.ReflectValueOf(ctx, value).SetBool(data) @@ -608,8 +616,22 @@ func (field *Field) setupValuerAndSetter() { case **int64: if data != nil && *data != nil { field.ReflectValueOf(ctx, value).SetInt(**data) - } else { - field.ReflectValueOf(ctx, value).SetInt(0) + } + case **int: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetInt(int64(**data)) + } + case **int8: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetInt(int64(**data)) + } + case **int16: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetInt(int64(**data)) + } + case **int32: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetInt(int64(**data)) } case int64: field.ReflectValueOf(ctx, value).SetInt(data) @@ -647,7 +669,7 @@ func (field *Field) setupValuerAndSetter() { if field.AutoCreateTime == UnixNanosecond || field.AutoUpdateTime == UnixNanosecond { field.ReflectValueOf(ctx, value).SetInt(data.UnixNano()) } else if field.AutoCreateTime == UnixMillisecond || field.AutoUpdateTime == UnixMillisecond { - field.ReflectValueOf(ctx, value).SetInt(data.UnixNano() / 1e6) + field.ReflectValueOf(ctx, value).SetInt(data.UnixMilli()) } else { field.ReflectValueOf(ctx, value).SetInt(data.Unix()) } @@ -656,7 +678,7 @@ func (field *Field) setupValuerAndSetter() { if field.AutoCreateTime == UnixNanosecond || field.AutoUpdateTime == UnixNanosecond { field.ReflectValueOf(ctx, value).SetInt(data.UnixNano()) } else if field.AutoCreateTime == UnixMillisecond || field.AutoUpdateTime == UnixMillisecond { - field.ReflectValueOf(ctx, value).SetInt(data.UnixNano() / 1e6) + field.ReflectValueOf(ctx, value).SetInt(data.UnixMilli()) } else { field.ReflectValueOf(ctx, value).SetInt(data.Unix()) } @@ -674,8 +696,22 @@ func (field *Field) setupValuerAndSetter() { case **uint64: if data != nil && *data != nil { field.ReflectValueOf(ctx, value).SetUint(**data) - } else { - field.ReflectValueOf(ctx, value).SetUint(0) + } + case **uint: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetUint(uint64(**data)) + } + case **uint8: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetUint(uint64(**data)) + } + case **uint16: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetUint(uint64(**data)) + } + case **uint32: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetUint(uint64(**data)) } case uint64: field.ReflectValueOf(ctx, value).SetUint(data) @@ -707,7 +743,7 @@ func (field *Field) setupValuerAndSetter() { if field.AutoCreateTime == UnixNanosecond || field.AutoUpdateTime == UnixNanosecond { field.ReflectValueOf(ctx, value).SetUint(uint64(data.UnixNano())) } else if field.AutoCreateTime == UnixMillisecond || field.AutoUpdateTime == UnixMillisecond { - field.ReflectValueOf(ctx, value).SetUint(uint64(data.UnixNano() / 1e6)) + field.ReflectValueOf(ctx, value).SetUint(uint64(data.UnixMilli())) } else { field.ReflectValueOf(ctx, value).SetUint(uint64(data.Unix())) } @@ -728,8 +764,10 @@ func (field *Field) setupValuerAndSetter() { case **float64: if data != nil && *data != nil { field.ReflectValueOf(ctx, value).SetFloat(**data) - } else { - field.ReflectValueOf(ctx, value).SetFloat(0) + } + case **float32: + if data != nil && *data != nil { + field.ReflectValueOf(ctx, value).SetFloat(float64(**data)) } case float64: field.ReflectValueOf(ctx, value).SetFloat(data) @@ -774,8 +812,6 @@ func (field *Field) setupValuerAndSetter() { case **string: if data != nil && *data != nil { field.ReflectValueOf(ctx, value).SetString(**data) - } else { - field.ReflectValueOf(ctx, value).SetString("") } case string: field.ReflectValueOf(ctx, value).SetString(data) @@ -823,7 +859,7 @@ func (field *Field) setupValuerAndSetter() { field.Set = func(ctx context.Context, value reflect.Value, v interface{}) error { switch data := v.(type) { case **time.Time: - if data != nil { + if data != nil && *data != nil { field.ReflectValueOf(ctx, value).Set(reflect.ValueOf(*data)) } case time.Time: @@ -859,14 +895,12 @@ func (field *Field) setupValuerAndSetter() { reflectV := reflect.ValueOf(v) if !reflectV.IsValid() { field.ReflectValueOf(ctx, value).Set(reflect.New(field.FieldType).Elem()) + } else if reflectV.Kind() == reflect.Ptr && reflectV.IsNil() { + return } else if reflectV.Type().AssignableTo(field.FieldType) { field.ReflectValueOf(ctx, value).Set(reflectV) } else if reflectV.Kind() == reflect.Ptr { - if reflectV.IsNil() || !reflectV.IsValid() { - field.ReflectValueOf(ctx, value).Set(reflect.New(field.FieldType).Elem()) - } else { - return field.Set(ctx, value, reflectV.Elem().Interface()) - } + return field.Set(ctx, value, reflectV.Elem().Interface()) } else { fieldValue := field.ReflectValueOf(ctx, value) if fieldValue.IsNil() { @@ -887,14 +921,12 @@ func (field *Field) setupValuerAndSetter() { reflectV := reflect.ValueOf(v) if !reflectV.IsValid() { field.ReflectValueOf(ctx, value).Set(reflect.New(field.FieldType).Elem()) + } else if reflectV.Kind() == reflect.Ptr && reflectV.IsNil() { + return } else if reflectV.Type().AssignableTo(field.FieldType) { field.ReflectValueOf(ctx, value).Set(reflectV) } else if reflectV.Kind() == reflect.Ptr { - if reflectV.IsNil() || !reflectV.IsValid() { - field.ReflectValueOf(ctx, value).Set(reflect.New(field.FieldType).Elem()) - } else { - return field.Set(ctx, value, reflectV.Elem().Interface()) - } + return field.Set(ctx, value, reflectV.Elem().Interface()) } else { if valuer, ok := v.(driver.Valuer); ok { v, _ = valuer.Value() @@ -923,6 +955,8 @@ func (field *Field) setupValuerAndSetter() { sameElemType = field.FieldType == reflect.ValueOf(field.Serializer).Type().Elem() } + serializerValue := reflect.Indirect(reflect.ValueOf(field.Serializer)) + serializerType := serializerValue.Type() field.Set = func(ctx context.Context, value reflect.Value, v interface{}) (err error) { if s, ok := v.(*serializer); ok { if s.fieldValue != nil { @@ -930,11 +964,12 @@ func (field *Field) setupValuerAndSetter() { } else if err = s.Serializer.Scan(ctx, field, value, s.value); err == nil { if sameElemType { field.ReflectValueOf(ctx, value).Set(reflect.ValueOf(s.Serializer).Elem()) - s.Serializer = reflect.New(reflect.Indirect(reflect.ValueOf(field.Serializer)).Type()).Interface().(SerializerInterface) } else if sameType { field.ReflectValueOf(ctx, value).Set(reflect.ValueOf(s.Serializer)) - s.Serializer = reflect.New(reflect.Indirect(reflect.ValueOf(field.Serializer)).Type()).Interface().(SerializerInterface) } + si := reflect.New(serializerType) + si.Elem().Set(serializerValue) + s.Serializer = si.Interface().(SerializerInterface) } } else { err = oldFieldSetter(ctx, value, v) @@ -946,11 +981,15 @@ func (field *Field) setupValuerAndSetter() { func (field *Field) setupNewValuePool() { if field.Serializer != nil { + serializerValue := reflect.Indirect(reflect.ValueOf(field.Serializer)) + serializerType := serializerValue.Type() field.NewValuePool = &sync.Pool{ New: func() interface{} { + si := reflect.New(serializerType) + si.Elem().Set(serializerValue) return &serializer{ Field: field, - Serializer: field.Serializer, + Serializer: si.Interface().(SerializerInterface), } }, } diff --git a/vendor/gorm.io/gorm/schema/index.go b/vendor/gorm.io/gorm/schema/index.go index 5003c742..f4f36751 100644 --- a/vendor/gorm.io/gorm/schema/index.go +++ b/vendor/gorm.io/gorm/schema/index.go @@ -13,8 +13,8 @@ type Index struct { Type string // btree, hash, gist, spgist, gin, and brin Where string Comment string - Option string // WITH PARSER parser_name - Fields []IndexOption + Option string // WITH PARSER parser_name + Fields []IndexOption // Note: IndexOption's Field maybe the same } type IndexOption struct { @@ -65,7 +65,11 @@ func (schema *Schema) ParseIndexes() map[string]Index { } } } - + for _, index := range indexes { + if index.Class == "UNIQUE" && len(index.Fields) == 1 { + index.Fields[0].Field.UniqueIndex = index.Name + } + } return indexes } diff --git a/vendor/gorm.io/gorm/schema/interfaces.go b/vendor/gorm.io/gorm/schema/interfaces.go index a75a33c0..306d4f4e 100644 --- a/vendor/gorm.io/gorm/schema/interfaces.go +++ b/vendor/gorm.io/gorm/schema/interfaces.go @@ -4,6 +4,12 @@ import ( "gorm.io/gorm/clause" ) +// ConstraintInterface database constraint interface +type ConstraintInterface interface { + GetName() string + Build() (sql string, vars []interface{}) +} + // GormDataTypeInterface gorm data type interface type GormDataTypeInterface interface { GormDataType() string diff --git a/vendor/gorm.io/gorm/schema/naming.go b/vendor/gorm.io/gorm/schema/naming.go index a258beed..6248bde8 100644 --- a/vendor/gorm.io/gorm/schema/naming.go +++ b/vendor/gorm.io/gorm/schema/naming.go @@ -8,6 +8,8 @@ import ( "unicode/utf8" "github.com/jinzhu/inflection" + "golang.org/x/text/cases" + "golang.org/x/text/language" ) // Namer namer interface @@ -19,6 +21,7 @@ type Namer interface { RelationshipFKName(Relationship) string CheckerName(table, column string) string IndexName(table, column string) string + UniqueName(table, column string) string } // Replacer replacer interface like strings.Replacer @@ -26,12 +29,15 @@ type Replacer interface { Replace(name string) string } +var _ Namer = (*NamingStrategy)(nil) + // NamingStrategy tables, columns naming strategy type NamingStrategy struct { - TablePrefix string - SingularTable bool - NameReplacer Replacer - NoLowerCase bool + TablePrefix string + SingularTable bool + NameReplacer Replacer + NoLowerCase bool + IdentifierMaxLength int } // TableName convert string to table name @@ -84,17 +90,26 @@ func (ns NamingStrategy) IndexName(table, column string) string { return ns.formatName("idx", table, ns.toDBName(column)) } +// UniqueName generate unique constraint name +func (ns NamingStrategy) UniqueName(table, column string) string { + return ns.formatName("uni", table, ns.toDBName(column)) +} + func (ns NamingStrategy) formatName(prefix, table, name string) string { formattedName := strings.ReplaceAll(strings.Join([]string{ prefix, table, name, }, "_"), ".", "_") - if utf8.RuneCountInString(formattedName) > 64 { + if ns.IdentifierMaxLength == 0 { + ns.IdentifierMaxLength = 64 + } + + if utf8.RuneCountInString(formattedName) > ns.IdentifierMaxLength { h := sha1.New() h.Write([]byte(formattedName)) bs := h.Sum(nil) - formattedName = formattedName[0:56] + hex.EncodeToString(bs)[:8] + formattedName = formattedName[0:ns.IdentifierMaxLength-8] + hex.EncodeToString(bs)[:8] } return formattedName } @@ -108,7 +123,7 @@ var ( func init() { commonInitialismsForReplacer := make([]string, 0, len(commonInitialisms)) for _, initialism := range commonInitialisms { - commonInitialismsForReplacer = append(commonInitialismsForReplacer, initialism, strings.Title(strings.ToLower(initialism))) + commonInitialismsForReplacer = append(commonInitialismsForReplacer, initialism, cases.Title(language.Und).String(initialism)) } commonInitialismsReplacer = strings.NewReplacer(commonInitialismsForReplacer...) } @@ -173,9 +188,9 @@ func (ns NamingStrategy) toDBName(name string) string { } func (ns NamingStrategy) toSchemaName(name string) string { - result := strings.ReplaceAll(strings.Title(strings.ReplaceAll(name, "_", " ")), " ", "") + result := strings.ReplaceAll(cases.Title(language.Und, cases.NoLower).String(strings.ReplaceAll(name, "_", " ")), " ", "") for _, initialism := range commonInitialisms { - result = regexp.MustCompile(strings.Title(strings.ToLower(initialism))+"([A-Z]|$|_)").ReplaceAllString(result, initialism+"$1") + result = regexp.MustCompile(cases.Title(language.Und, cases.NoLower).String(strings.ToLower(initialism))+"([A-Z]|$|_)").ReplaceAllString(result, initialism+"$1") } return result } diff --git a/vendor/gorm.io/gorm/schema/relationship.go b/vendor/gorm.io/gorm/schema/relationship.go index 0aa33e51..32676b39 100644 --- a/vendor/gorm.io/gorm/schema/relationship.go +++ b/vendor/gorm.io/gorm/schema/relationship.go @@ -7,6 +7,9 @@ import ( "strings" "github.com/jinzhu/inflection" + "golang.org/x/text/cases" + "golang.org/x/text/language" + "gorm.io/gorm/clause" ) @@ -27,6 +30,8 @@ type Relationships struct { HasMany []*Relationship Many2Many []*Relationship Relations map[string]*Relationship + + EmbeddedRelations map[string]*Relationships } type Relationship struct { @@ -74,8 +79,8 @@ func (schema *Schema) parseRelation(field *Field) *Relationship { return nil } - if polymorphic := field.TagSettings["POLYMORPHIC"]; polymorphic != "" { - schema.buildPolymorphicRelation(relation, field, polymorphic) + if hasPolymorphicRelation(field.TagSettings) { + schema.buildPolymorphicRelation(relation, field) } else if many2many := field.TagSettings["MANY2MANY"]; many2many != "" { schema.buildMany2ManyRelation(relation, field, many2many) } else if belongsTo := field.TagSettings["BELONGSTO"]; belongsTo != "" { @@ -87,7 +92,8 @@ func (schema *Schema) parseRelation(field *Field) *Relationship { case reflect.Slice: schema.guessRelation(relation, field, guessHas) default: - schema.err = fmt.Errorf("unsupported data type %v for %v on field %s", relation.FieldSchema, schema, field.Name) + schema.err = fmt.Errorf("unsupported data type %v for %v on field %s", relation.FieldSchema, schema, + field.Name) } } @@ -106,7 +112,7 @@ func (schema *Schema) parseRelation(field *Field) *Relationship { } if schema.err == nil { - schema.Relationships.Relations[relation.Name] = relation + schema.setRelation(relation) switch relation.Type { case HasOne: schema.Relationships.HasOne = append(schema.Relationships.HasOne, relation) @@ -122,34 +128,100 @@ func (schema *Schema) parseRelation(field *Field) *Relationship { return relation } +// hasPolymorphicRelation check if has polymorphic relation +// 1. `POLYMORPHIC` tag +// 2. `POLYMORPHICTYPE` and `POLYMORPHICID` tag +func hasPolymorphicRelation(tagSettings map[string]string) bool { + if _, ok := tagSettings["POLYMORPHIC"]; ok { + return true + } + + _, hasType := tagSettings["POLYMORPHICTYPE"] + _, hasId := tagSettings["POLYMORPHICID"] + + return hasType && hasId +} + +func (schema *Schema) setRelation(relation *Relationship) { + // set non-embedded relation + if rel := schema.Relationships.Relations[relation.Name]; rel != nil { + if len(rel.Field.BindNames) > 1 { + schema.Relationships.Relations[relation.Name] = relation + } + } else { + schema.Relationships.Relations[relation.Name] = relation + } + + // set embedded relation + if len(relation.Field.EmbeddedBindNames) <= 1 { + return + } + relationships := &schema.Relationships + for i, name := range relation.Field.EmbeddedBindNames { + if i < len(relation.Field.EmbeddedBindNames)-1 { + if relationships.EmbeddedRelations == nil { + relationships.EmbeddedRelations = map[string]*Relationships{} + } + if r := relationships.EmbeddedRelations[name]; r == nil { + relationships.EmbeddedRelations[name] = &Relationships{} + } + relationships = relationships.EmbeddedRelations[name] + } else { + if relationships.Relations == nil { + relationships.Relations = map[string]*Relationship{} + } + relationships.Relations[relation.Name] = relation + } + } +} + // User has many Toys, its `Polymorphic` is `Owner`, Pet has one Toy, its `Polymorphic` is `Owner` -// type User struct { -// Toys []Toy `gorm:"polymorphic:Owner;"` -// } -// type Pet struct { -// Toy Toy `gorm:"polymorphic:Owner;"` -// } -// type Toy struct { -// OwnerID int -// OwnerType string -// } -func (schema *Schema) buildPolymorphicRelation(relation *Relationship, field *Field, polymorphic string) { +// +// type User struct { +// Toys []Toy `gorm:"polymorphic:Owner;"` +// } +// type Pet struct { +// Toy Toy `gorm:"polymorphic:Owner;"` +// } +// type Toy struct { +// OwnerID int +// OwnerType string +// } +func (schema *Schema) buildPolymorphicRelation(relation *Relationship, field *Field) { + polymorphic := field.TagSettings["POLYMORPHIC"] + relation.Polymorphic = &Polymorphic{ - Value: schema.Table, - PolymorphicType: relation.FieldSchema.FieldsByName[polymorphic+"Type"], - PolymorphicID: relation.FieldSchema.FieldsByName[polymorphic+"ID"], + Value: schema.Table, + } + + var ( + typeName = polymorphic + "Type" + typeId = polymorphic + "ID" + ) + + if value, ok := field.TagSettings["POLYMORPHICTYPE"]; ok { + typeName = strings.TrimSpace(value) } + if value, ok := field.TagSettings["POLYMORPHICID"]; ok { + typeId = strings.TrimSpace(value) + } + + relation.Polymorphic.PolymorphicType = relation.FieldSchema.FieldsByName[typeName] + relation.Polymorphic.PolymorphicID = relation.FieldSchema.FieldsByName[typeId] + if value, ok := field.TagSettings["POLYMORPHICVALUE"]; ok { relation.Polymorphic.Value = strings.TrimSpace(value) } if relation.Polymorphic.PolymorphicType == nil { - schema.err = fmt.Errorf("invalid polymorphic type %v for %v on field %s, missing field %s", relation.FieldSchema, schema, field.Name, polymorphic+"Type") + schema.err = fmt.Errorf("invalid polymorphic type %v for %v on field %s, missing field %s", + relation.FieldSchema, schema, field.Name, polymorphic+"Type") } if relation.Polymorphic.PolymorphicID == nil { - schema.err = fmt.Errorf("invalid polymorphic type %v for %v on field %s, missing field %s", relation.FieldSchema, schema, field.Name, polymorphic+"ID") + schema.err = fmt.Errorf("invalid polymorphic type %v for %v on field %s, missing field %s", + relation.FieldSchema, schema, field.Name, polymorphic+"ID") } if schema.err == nil { @@ -161,10 +233,17 @@ func (schema *Schema) buildPolymorphicRelation(relation *Relationship, field *Fi primaryKeyField := schema.PrioritizedPrimaryField if len(relation.foreignKeys) > 0 { if primaryKeyField = schema.LookUpField(relation.foreignKeys[0]); primaryKeyField == nil || len(relation.foreignKeys) > 1 { - schema.err = fmt.Errorf("invalid polymorphic foreign keys %+v for %v on field %s", relation.foreignKeys, schema, field.Name) + schema.err = fmt.Errorf("invalid polymorphic foreign keys %+v for %v on field %s", relation.foreignKeys, + schema, field.Name) } } + if primaryKeyField == nil { + schema.err = fmt.Errorf("invalid polymorphic type %v for %v on field %s, missing primaryKey field", + relation.FieldSchema, schema, field.Name) + return + } + // use same data type for foreign keys if copyableDataType(primaryKeyField.DataType) { relation.Polymorphic.PolymorphicID.DataType = primaryKeyField.DataType @@ -191,7 +270,8 @@ func (schema *Schema) buildMany2ManyRelation(relation *Relationship, field *Fiel err error joinTableFields []reflect.StructField fieldsMap = map[string]*Field{} - ownFieldsMap = map[string]bool{} // fix self join many2many + ownFieldsMap = map[string]*Field{} // fix self join many2many + referFieldsMap = map[string]*Field{} joinForeignKeys = toColumns(field.TagSettings["JOINFOREIGNKEY"]) joinReferences = toColumns(field.TagSettings["JOINREFERENCES"]) ) @@ -224,12 +304,12 @@ func (schema *Schema) buildMany2ManyRelation(relation *Relationship, field *Fiel } for idx, ownField := range ownForeignFields { - joinFieldName := strings.Title(schema.Name) + ownField.Name + joinFieldName := cases.Title(language.Und, cases.NoLower).String(schema.Name) + ownField.Name if len(joinForeignKeys) > idx { - joinFieldName = strings.Title(joinForeignKeys[idx]) + joinFieldName = cases.Title(language.Und, cases.NoLower).String(joinForeignKeys[idx]) } - ownFieldsMap[joinFieldName] = true + ownFieldsMap[joinFieldName] = ownField fieldsMap[joinFieldName] = ownField joinTableFields = append(joinTableFields, reflect.StructField{ Name: joinFieldName, @@ -241,10 +321,7 @@ func (schema *Schema) buildMany2ManyRelation(relation *Relationship, field *Fiel } for idx, relField := range refForeignFields { - joinFieldName := strings.Title(relation.FieldSchema.Name) + relField.Name - if len(joinReferences) > idx { - joinFieldName = strings.Title(joinReferences[idx]) - } + joinFieldName := cases.Title(language.Und, cases.NoLower).String(relation.FieldSchema.Name) + relField.Name if _, ok := ownFieldsMap[joinFieldName]; ok { if field.Name != relation.FieldSchema.Name { @@ -254,23 +331,32 @@ func (schema *Schema) buildMany2ManyRelation(relation *Relationship, field *Fiel } } - fieldsMap[joinFieldName] = relField - joinTableFields = append(joinTableFields, reflect.StructField{ - Name: joinFieldName, - PkgPath: relField.StructField.PkgPath, - Type: relField.StructField.Type, - Tag: removeSettingFromTag(appendSettingFromTag(relField.StructField.Tag, "primaryKey"), - "column", "autoincrement", "index", "unique", "uniqueindex"), - }) + if len(joinReferences) > idx { + joinFieldName = cases.Title(language.Und, cases.NoLower).String(joinReferences[idx]) + } + + referFieldsMap[joinFieldName] = relField + + if _, ok := fieldsMap[joinFieldName]; !ok { + fieldsMap[joinFieldName] = relField + joinTableFields = append(joinTableFields, reflect.StructField{ + Name: joinFieldName, + PkgPath: relField.StructField.PkgPath, + Type: relField.StructField.Type, + Tag: removeSettingFromTag(appendSettingFromTag(relField.StructField.Tag, "primaryKey"), + "column", "autoincrement", "index", "unique", "uniqueindex"), + }) + } } joinTableFields = append(joinTableFields, reflect.StructField{ - Name: strings.Title(schema.Name) + field.Name, + Name: cases.Title(language.Und, cases.NoLower).String(schema.Name) + field.Name, Type: schema.ModelType, Tag: `gorm:"-"`, }) - if relation.JoinTable, err = Parse(reflect.New(reflect.StructOf(joinTableFields)).Interface(), schema.cacheStore, schema.namer); err != nil { + if relation.JoinTable, err = Parse(reflect.New(reflect.StructOf(joinTableFields)).Interface(), schema.cacheStore, + schema.namer); err != nil { schema.err = err } relation.JoinTable.Name = many2many @@ -317,31 +403,37 @@ func (schema *Schema) buildMany2ManyRelation(relation *Relationship, field *Fiel f.Size = fieldsMap[f.Name].Size } relation.JoinTable.PrimaryFields = append(relation.JoinTable.PrimaryFields, f) - ownPrimaryField := schema == fieldsMap[f.Name].Schema && ownFieldsMap[f.Name] - if ownPrimaryField { + if of, ok := ownFieldsMap[f.Name]; ok { joinRel := relation.JoinTable.Relationships.Relations[relName] joinRel.Field = relation.Field joinRel.References = append(joinRel.References, &Reference{ - PrimaryKey: fieldsMap[f.Name], + PrimaryKey: of, ForeignKey: f, }) - } else { + + relation.References = append(relation.References, &Reference{ + PrimaryKey: of, + ForeignKey: f, + OwnPrimaryKey: true, + }) + } + + if rf, ok := referFieldsMap[f.Name]; ok { joinRefRel := relation.JoinTable.Relationships.Relations[relRefName] if joinRefRel.Field == nil { joinRefRel.Field = relation.Field } joinRefRel.References = append(joinRefRel.References, &Reference{ - PrimaryKey: fieldsMap[f.Name], + PrimaryKey: rf, ForeignKey: f, }) - } - relation.References = append(relation.References, &Reference{ - PrimaryKey: fieldsMap[f.Name], - ForeignKey: f, - OwnPrimaryKey: ownPrimaryField, - }) + relation.References = append(relation.References, &Reference{ + PrimaryKey: rf, + ForeignKey: f, + }) + } } } } @@ -383,7 +475,8 @@ func (schema *Schema) guessRelation(relation *Relationship, field *Field, cgl gu schema.guessRelation(relation, field, guessEmbeddedHas) // case guessEmbeddedHas: default: - schema.err = fmt.Errorf("invalid field found for struct %v's field %s: define a valid foreign key for relations or implement the Valuer/Scanner interface", schema, field.Name) + schema.err = fmt.Errorf("invalid field found for struct %v's field %s: define a valid foreign key for relations or implement the Valuer/Scanner interface", + schema, field.Name) } } @@ -391,34 +484,31 @@ func (schema *Schema) guessRelation(relation *Relationship, field *Field, cgl gu case guessBelongs: primarySchema, foreignSchema = relation.FieldSchema, schema case guessEmbeddedBelongs: - if field.OwnerSchema != nil { - primarySchema, foreignSchema = relation.FieldSchema, field.OwnerSchema - } else { + if field.OwnerSchema == nil { reguessOrErr() return } + primarySchema, foreignSchema = relation.FieldSchema, field.OwnerSchema case guessHas: case guessEmbeddedHas: - if field.OwnerSchema != nil { - primarySchema, foreignSchema = field.OwnerSchema, relation.FieldSchema - } else { + if field.OwnerSchema == nil { reguessOrErr() return } + primarySchema, foreignSchema = field.OwnerSchema, relation.FieldSchema } if len(relation.foreignKeys) > 0 { for _, foreignKey := range relation.foreignKeys { - if f := foreignSchema.LookUpField(foreignKey); f != nil { - foreignFields = append(foreignFields, f) - } else { + f := foreignSchema.LookUpField(foreignKey) + if f == nil { reguessOrErr() return } + foreignFields = append(foreignFields, f) } } else { - var primaryFields []*Field - var primarySchemaName = primarySchema.Name + primarySchemaName := primarySchema.Name if primarySchemaName == "" { primarySchemaName = relation.FieldSchema.Name } @@ -433,6 +523,7 @@ func (schema *Schema) guessRelation(relation *Relationship, field *Field, cgl gu primaryFields = primarySchema.PrimaryFields } + primaryFieldLoop: for _, primaryField := range primaryFields { lookUpName := primarySchemaName + primaryField.Name if gl == guessBelongs { @@ -441,23 +532,33 @@ func (schema *Schema) guessRelation(relation *Relationship, field *Field, cgl gu lookUpNames := []string{lookUpName} if len(primaryFields) == 1 { - lookUpNames = append(lookUpNames, strings.TrimSuffix(lookUpName, primaryField.Name)+"ID", strings.TrimSuffix(lookUpName, primaryField.Name)+"Id", schema.namer.ColumnName(foreignSchema.Table, strings.TrimSuffix(lookUpName, primaryField.Name)+"ID")) + lookUpNames = append(lookUpNames, strings.TrimSuffix(lookUpName, primaryField.Name)+"ID", + strings.TrimSuffix(lookUpName, primaryField.Name)+"Id", schema.namer.ColumnName(foreignSchema.Table, + strings.TrimSuffix(lookUpName, primaryField.Name)+"ID")) } + for _, name := range lookUpNames { + if f := foreignSchema.LookUpFieldByBindName(field.BindNames, name); f != nil { + foreignFields = append(foreignFields, f) + primaryFields = append(primaryFields, primaryField) + continue primaryFieldLoop + } + } for _, name := range lookUpNames { if f := foreignSchema.LookUpField(name); f != nil { foreignFields = append(foreignFields, f) primaryFields = append(primaryFields, primaryField) - break + continue primaryFieldLoop } } } } - if len(foreignFields) == 0 { + switch { + case len(foreignFields) == 0: reguessOrErr() return - } else if len(relation.primaryKeys) > 0 { + case len(relation.primaryKeys) > 0: for idx, primaryKey := range relation.primaryKeys { if f := primarySchema.LookUpField(primaryKey); f != nil { if len(primaryFields) < idx+1 { @@ -471,7 +572,7 @@ func (schema *Schema) guessRelation(relation *Relationship, field *Field, cgl gu return } } - } else if len(primaryFields) == 0 { + case len(primaryFields) == 0: if len(foreignFields) == 1 && primarySchema.PrioritizedPrimaryField != nil { primaryFields = append(primaryFields, primarySchema.PrioritizedPrimaryField) } else if len(primarySchema.PrimaryFields) == len(foreignFields) { @@ -507,6 +608,7 @@ func (schema *Schema) guessRelation(relation *Relationship, field *Field, cgl gu } } +// Constraint is ForeignKey Constraint type Constraint struct { Name string Field *Field @@ -518,6 +620,31 @@ type Constraint struct { OnUpdate string } +func (constraint *Constraint) GetName() string { return constraint.Name } + +func (constraint *Constraint) Build() (sql string, vars []interface{}) { + sql = "CONSTRAINT ? FOREIGN KEY ? REFERENCES ??" + if constraint.OnDelete != "" { + sql += " ON DELETE " + constraint.OnDelete + } + + if constraint.OnUpdate != "" { + sql += " ON UPDATE " + constraint.OnUpdate + } + + foreignKeys := make([]interface{}, 0, len(constraint.ForeignKeys)) + for _, field := range constraint.ForeignKeys { + foreignKeys = append(foreignKeys, clause.Column{Name: field.DBName}) + } + + references := make([]interface{}, 0, len(constraint.References)) + for _, field := range constraint.References { + references = append(references, clause.Column{Name: field.DBName}) + } + vars = append(vars, clause.Table{Name: constraint.Name}, foreignKeys, clause.Table{Name: constraint.ReferenceSchema.Table}, references) + return +} + func (rel *Relationship) ParseConstraint() *Constraint { str := rel.Field.TagSettings["CONSTRAINT"] if str == "-" { diff --git a/vendor/gorm.io/gorm/schema/schema.go b/vendor/gorm.io/gorm/schema/schema.go index eca113e9..db236797 100644 --- a/vendor/gorm.io/gorm/schema/schema.go +++ b/vendor/gorm.io/gorm/schema/schema.go @@ -6,12 +6,27 @@ import ( "fmt" "go/ast" "reflect" + "strings" "sync" "gorm.io/gorm/clause" "gorm.io/gorm/logger" ) +type callbackType string + +const ( + callbackTypeBeforeCreate callbackType = "BeforeCreate" + callbackTypeBeforeUpdate callbackType = "BeforeUpdate" + callbackTypeAfterCreate callbackType = "AfterCreate" + callbackTypeAfterUpdate callbackType = "AfterUpdate" + callbackTypeBeforeSave callbackType = "BeforeSave" + callbackTypeAfterSave callbackType = "AfterSave" + callbackTypeBeforeDelete callbackType = "BeforeDelete" + callbackTypeAfterDelete callbackType = "AfterDelete" + callbackTypeAfterFind callbackType = "AfterFind" +) + // ErrUnsupportedDataType unsupported data type var ErrUnsupportedDataType = errors.New("unsupported data type") @@ -25,6 +40,7 @@ type Schema struct { PrimaryFieldDBNames []string Fields []*Field FieldsByName map[string]*Field + FieldsByBindName map[string]*Field // embedded fields is 'Embed.Field' FieldsByDBName map[string]*Field FieldsWithDefaultDBValue []*Field // fields with default value assigned by database Relationships Relationships @@ -51,9 +67,10 @@ func (schema Schema) String() string { } func (schema Schema) MakeSlice() reflect.Value { - slice := reflect.MakeSlice(reflect.SliceOf(reflect.PtrTo(schema.ModelType)), 0, 20) + slice := reflect.MakeSlice(reflect.SliceOf(reflect.PointerTo(schema.ModelType)), 0, 20) results := reflect.New(slice.Type()) results.Elem().Set(slice) + return results } @@ -67,10 +84,35 @@ func (schema Schema) LookUpField(name string) *Field { return nil } +// LookUpFieldByBindName looks for the closest field in the embedded struct. +// +// type Struct struct { +// Embedded struct { +// ID string // is selected by LookUpFieldByBindName([]string{"Embedded", "ID"}, "ID") +// } +// ID string // is selected by LookUpFieldByBindName([]string{"ID"}, "ID") +// } +func (schema Schema) LookUpFieldByBindName(bindNames []string, name string) *Field { + if len(bindNames) == 0 { + return nil + } + for i := len(bindNames) - 1; i >= 0; i-- { + find := strings.Join(bindNames[:i], ".") + "." + name + if field, ok := schema.FieldsByBindName[find]; ok { + return field + } + } + return nil +} + type Tabler interface { TableName() string } +type TablerWithNamer interface { + TableName(Namer) string +} + // Parse get data type from dialector func Parse(dest interface{}, cacheStore *sync.Map, namer Namer) (*Schema, error) { return ParseWithSpecialTableName(dest, cacheStore, namer, "") @@ -112,7 +154,7 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam schemaCacheKey = modelType } - // Load exist schmema cache, return if exists + // Load exist schema cache, return if exists if v, ok := cacheStore.Load(schemaCacheKey); ok { s := v.(*Schema) // Wait for the initialization of other goroutines to complete @@ -125,6 +167,9 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam if tabler, ok := modelValue.Interface().(Tabler); ok { tableName = tabler.TableName() } + if tabler, ok := modelValue.Interface().(TablerWithNamer); ok { + tableName = tabler.TableName(namer) + } if en, ok := namer.(embeddedNamer); ok { tableName = en.Table } @@ -133,20 +178,21 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam } schema := &Schema{ - Name: modelType.Name(), - ModelType: modelType, - Table: tableName, - FieldsByName: map[string]*Field{}, - FieldsByDBName: map[string]*Field{}, - Relationships: Relationships{Relations: map[string]*Relationship{}}, - cacheStore: cacheStore, - namer: namer, - initialized: make(chan struct{}), + Name: modelType.Name(), + ModelType: modelType, + Table: tableName, + FieldsByName: map[string]*Field{}, + FieldsByBindName: map[string]*Field{}, + FieldsByDBName: map[string]*Field{}, + Relationships: Relationships{Relations: map[string]*Relationship{}}, + cacheStore: cacheStore, + namer: namer, + initialized: make(chan struct{}), } // When the schema initialization is completed, the channel will be closed defer close(schema.initialized) - // Load exist schmema cache, return if exists + // Load exist schema cache, return if exists if v, ok := cacheStore.Load(schemaCacheKey); ok { s := v.(*Schema) // Wait for the initialization of other goroutines to complete @@ -169,6 +215,7 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam field.DBName = namer.ColumnName(schema.Table, field.Name) } + bindName := field.BindName() if field.DBName != "" { // nonexistence or shortest path or first appear prioritized if has permission if v, ok := schema.FieldsByDBName[field.DBName]; !ok || ((field.Creatable || field.Updatable || field.Readable) && len(field.BindNames) < len(v.BindNames)) { @@ -177,6 +224,7 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam } schema.FieldsByDBName[field.DBName] = field schema.FieldsByName[field.Name] = field + schema.FieldsByBindName[bindName] = field if v != nil && v.PrimaryKey { for idx, f := range schema.PrimaryFields { @@ -195,6 +243,9 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam if of, ok := schema.FieldsByName[field.Name]; !ok || of.TagSettings["-"] == "-" { schema.FieldsByName[field.Name] = field } + if of, ok := schema.FieldsByBindName[bindName]; !ok || of.TagSettings["-"] == "-" { + schema.FieldsByBindName[bindName] = field + } field.setupValuerAndSetter() } @@ -214,8 +265,18 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam } } - if schema.PrioritizedPrimaryField == nil && len(schema.PrimaryFields) == 1 { - schema.PrioritizedPrimaryField = schema.PrimaryFields[0] + if schema.PrioritizedPrimaryField == nil { + if len(schema.PrimaryFields) == 1 { + schema.PrioritizedPrimaryField = schema.PrimaryFields[0] + } else if len(schema.PrimaryFields) > 1 { + // If there are multiple primary keys, the AUTOINCREMENT field is prioritized + for _, field := range schema.PrimaryFields { + if field.AutoIncrement { + schema.PrioritizedPrimaryField = field + break + } + } + } } for _, field := range schema.PrimaryFields { @@ -223,7 +284,7 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam } for _, field := range schema.Fields { - if field.HasDefaultValue && field.DefaultValueInterface == nil { + if field.DataType != "" && field.HasDefaultValue && field.DefaultValueInterface == nil { schema.FieldsWithDefaultDBValue = append(schema.FieldsWithDefaultDBValue, field) } } @@ -242,14 +303,20 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam } } - callbacks := []string{"BeforeCreate", "AfterCreate", "BeforeUpdate", "AfterUpdate", "BeforeSave", "AfterSave", "BeforeDelete", "AfterDelete", "AfterFind"} - for _, name := range callbacks { - if methodValue := modelValue.MethodByName(name); methodValue.IsValid() { + callbackTypes := []callbackType{ + callbackTypeBeforeCreate, callbackTypeAfterCreate, + callbackTypeBeforeUpdate, callbackTypeAfterUpdate, + callbackTypeBeforeSave, callbackTypeAfterSave, + callbackTypeBeforeDelete, callbackTypeAfterDelete, + callbackTypeAfterFind, + } + for _, cbName := range callbackTypes { + if methodValue := callBackToMethodValue(modelValue, cbName); methodValue.IsValid() { switch methodValue.Type().String() { case "func(*gorm.DB) error": // TODO hack - reflect.Indirect(reflect.ValueOf(schema)).FieldByName(name).SetBool(true) + reflect.Indirect(reflect.ValueOf(schema)).FieldByName(string(cbName)).SetBool(true) default: - logger.Default.Warn(context.Background(), "Model %v don't match %vInterface, should be `%v(*gorm.DB) error`. Please see https://gorm.io/docs/hooks.html", schema, name, name) + logger.Default.Warn(context.Background(), "Model %v don't match %vInterface, should be `%v(*gorm.DB) error`. Please see https://gorm.io/docs/hooks.html", schema, cbName, cbName) } } } @@ -271,11 +338,12 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam if _, embedded := schema.cacheStore.Load(embeddedCacheKey); !embedded { for _, field := range schema.Fields { - if field.DataType == "" && (field.Creatable || field.Updatable || field.Readable) { + if field.DataType == "" && field.GORMDataType == "" && (field.Creatable || field.Updatable || field.Readable) { if schema.parseRelation(field); schema.err != nil { return schema, schema.err } else { schema.FieldsByName[field.Name] = field + schema.FieldsByBindName[field.BindName()] = field } } @@ -302,6 +370,39 @@ func ParseWithSpecialTableName(dest interface{}, cacheStore *sync.Map, namer Nam return schema, schema.err } +// This unrolling is needed to show to the compiler the exact set of methods +// that can be used on the modelType. +// Prior to go1.22 any use of MethodByName would cause the linker to +// abandon dead code elimination for the entire binary. +// As of go1.22 the compiler supports one special case of a string constant +// being passed to MethodByName. For enterprise customers or those building +// large binaries, this gives a significant reduction in binary size. +// https://github.com/golang/go/issues/62257 +func callBackToMethodValue(modelType reflect.Value, cbType callbackType) reflect.Value { + switch cbType { + case callbackTypeBeforeCreate: + return modelType.MethodByName(string(callbackTypeBeforeCreate)) + case callbackTypeAfterCreate: + return modelType.MethodByName(string(callbackTypeAfterCreate)) + case callbackTypeBeforeUpdate: + return modelType.MethodByName(string(callbackTypeBeforeUpdate)) + case callbackTypeAfterUpdate: + return modelType.MethodByName(string(callbackTypeAfterUpdate)) + case callbackTypeBeforeSave: + return modelType.MethodByName(string(callbackTypeBeforeSave)) + case callbackTypeAfterSave: + return modelType.MethodByName(string(callbackTypeAfterSave)) + case callbackTypeBeforeDelete: + return modelType.MethodByName(string(callbackTypeBeforeDelete)) + case callbackTypeAfterDelete: + return modelType.MethodByName(string(callbackTypeAfterDelete)) + case callbackTypeAfterFind: + return modelType.MethodByName(string(callbackTypeAfterFind)) + default: + return reflect.ValueOf(nil) + } +} + func getOrParse(dest interface{}, cacheStore *sync.Map, namer Namer) (*Schema, error) { modelType := reflect.ValueOf(dest).Type() for modelType.Kind() == reflect.Slice || modelType.Kind() == reflect.Array || modelType.Kind() == reflect.Ptr { diff --git a/vendor/gorm.io/gorm/schema/serializer.go b/vendor/gorm.io/gorm/schema/serializer.go index 758a6421..0fafbcba 100644 --- a/vendor/gorm.io/gorm/schema/serializer.go +++ b/vendor/gorm.io/gorm/schema/serializer.go @@ -70,8 +70,7 @@ type SerializerValuerInterface interface { } // JSONSerializer json serializer -type JSONSerializer struct { -} +type JSONSerializer struct{} // Scan implements serializer interface func (JSONSerializer) Scan(ctx context.Context, field *Field, dst reflect.Value, dbValue interface{}) (err error) { @@ -85,10 +84,15 @@ func (JSONSerializer) Scan(ctx context.Context, field *Field, dst reflect.Value, case string: bytes = []byte(v) default: - return fmt.Errorf("failed to unmarshal JSONB value: %#v", dbValue) + bytes, err = json.Marshal(v) + if err != nil { + return err + } } - err = json.Unmarshal(bytes, fieldValue.Interface()) + if len(bytes) > 0 { + err = json.Unmarshal(bytes, fieldValue.Interface()) + } } field.ReflectValueOf(ctx, dst).Set(fieldValue.Elem()) @@ -98,12 +102,17 @@ func (JSONSerializer) Scan(ctx context.Context, field *Field, dst reflect.Value, // Value implements serializer interface func (JSONSerializer) Value(ctx context.Context, field *Field, dst reflect.Value, fieldValue interface{}) (interface{}, error) { result, err := json.Marshal(fieldValue) + if string(result) == "null" { + if field.TagSettings["NOT NULL"] != "" { + return "", nil + } + return nil, err + } return string(result), err } // UnixSecondSerializer json serializer -type UnixSecondSerializer struct { -} +type UnixSecondSerializer struct{} // Scan implements serializer interface func (UnixSecondSerializer) Scan(ctx context.Context, field *Field, dst reflect.Value, dbValue interface{}) (err error) { @@ -117,9 +126,15 @@ func (UnixSecondSerializer) Scan(ctx context.Context, field *Field, dst reflect. // Value implements serializer interface func (UnixSecondSerializer) Value(ctx context.Context, field *Field, dst reflect.Value, fieldValue interface{}) (result interface{}, err error) { + rv := reflect.ValueOf(fieldValue) switch v := fieldValue.(type) { - case int64, int, uint, uint64, int32, uint32, int16, uint16, *int64, *int, *uint, *uint64, *int32, *uint32, *int16, *uint16: - result = time.Unix(reflect.Indirect(reflect.ValueOf(v)).Int(), 0) + case int64, int, uint, uint64, int32, uint32, int16, uint16: + result = time.Unix(reflect.Indirect(rv).Int(), 0).UTC() + case *int64, *int, *uint, *uint64, *int32, *uint32, *int16, *uint16: + if rv.IsZero() { + return nil, nil + } + result = time.Unix(reflect.Indirect(rv).Int(), 0).UTC() default: err = fmt.Errorf("invalid field type %#v for UnixSecondSerializer, only int, uint supported", v) } @@ -127,8 +142,7 @@ func (UnixSecondSerializer) Value(ctx context.Context, field *Field, dst reflect } // GobSerializer gob serializer -type GobSerializer struct { -} +type GobSerializer struct{} // Scan implements serializer interface func (GobSerializer) Scan(ctx context.Context, field *Field, dst reflect.Value, dbValue interface{}) (err error) { @@ -142,8 +156,10 @@ func (GobSerializer) Scan(ctx context.Context, field *Field, dst reflect.Value, default: return fmt.Errorf("failed to unmarshal gob value: %#v", dbValue) } - decoder := gob.NewDecoder(bytes.NewBuffer(bytesValue)) - err = decoder.Decode(fieldValue.Interface()) + if len(bytesValue) > 0 { + decoder := gob.NewDecoder(bytes.NewBuffer(bytesValue)) + err = decoder.Decode(fieldValue.Interface()) + } } field.ReflectValueOf(ctx, dst).Set(fieldValue.Elem()) return diff --git a/vendor/gorm.io/gorm/schema/utils.go b/vendor/gorm.io/gorm/schema/utils.go index acf1a739..7fdda185 100644 --- a/vendor/gorm.io/gorm/schema/utils.go +++ b/vendor/gorm.io/gorm/schema/utils.go @@ -115,6 +115,11 @@ func GetIdentityFieldValuesMap(ctx context.Context, reflectValue reflect.Value, notZero, zero bool ) + if reflectValue.Kind() == reflect.Ptr || + reflectValue.Kind() == reflect.Interface { + reflectValue = reflectValue.Elem() + } + switch reflectValue.Kind() { case reflect.Struct: results = [][]interface{}{make([]interface{}, len(fields))} @@ -133,7 +138,7 @@ func GetIdentityFieldValuesMap(ctx context.Context, reflectValue reflect.Value, for i := 0; i < reflectValue.Len(); i++ { elem := reflectValue.Index(i) elemKey := elem.Interface() - if elem.Kind() != reflect.Ptr { + if elem.Kind() != reflect.Ptr && elem.CanAddr() { elemKey = elem.Addr().Interface() } diff --git a/vendor/gorm.io/gorm/soft_delete.go b/vendor/gorm.io/gorm/soft_delete.go index 6d646288..5673d3b8 100644 --- a/vendor/gorm.io/gorm/soft_delete.go +++ b/vendor/gorm.io/gorm/soft_delete.go @@ -6,6 +6,7 @@ import ( "encoding/json" "reflect" + "github.com/jinzhu/now" "gorm.io/gorm/clause" "gorm.io/gorm/schema" ) @@ -45,11 +46,21 @@ func (n *DeletedAt) UnmarshalJSON(b []byte) error { } func (DeletedAt) QueryClauses(f *schema.Field) []clause.Interface { - return []clause.Interface{SoftDeleteQueryClause{Field: f}} + return []clause.Interface{SoftDeleteQueryClause{Field: f, ZeroValue: parseZeroValueTag(f)}} +} + +func parseZeroValueTag(f *schema.Field) sql.NullString { + if v, ok := f.TagSettings["ZEROVALUE"]; ok { + if _, err := now.Parse(v); err == nil { + return sql.NullString{String: v, Valid: true} + } + } + return sql.NullString{Valid: false} } type SoftDeleteQueryClause struct { - Field *schema.Field + ZeroValue sql.NullString + Field *schema.Field } func (sd SoftDeleteQueryClause) Name() string { @@ -78,18 +89,19 @@ func (sd SoftDeleteQueryClause) ModifyStatement(stmt *Statement) { } stmt.AddClause(clause.Where{Exprs: []clause.Expression{ - clause.Eq{Column: clause.Column{Table: clause.CurrentTable, Name: sd.Field.DBName}, Value: nil}, + clause.Eq{Column: clause.Column{Table: clause.CurrentTable, Name: sd.Field.DBName}, Value: sd.ZeroValue}, }}) stmt.Clauses["soft_delete_enabled"] = clause.Clause{} } } func (DeletedAt) UpdateClauses(f *schema.Field) []clause.Interface { - return []clause.Interface{SoftDeleteUpdateClause{Field: f}} + return []clause.Interface{SoftDeleteUpdateClause{Field: f, ZeroValue: parseZeroValueTag(f)}} } type SoftDeleteUpdateClause struct { - Field *schema.Field + ZeroValue sql.NullString + Field *schema.Field } func (sd SoftDeleteUpdateClause) Name() string { @@ -109,11 +121,12 @@ func (sd SoftDeleteUpdateClause) ModifyStatement(stmt *Statement) { } func (DeletedAt) DeleteClauses(f *schema.Field) []clause.Interface { - return []clause.Interface{SoftDeleteDeleteClause{Field: f}} + return []clause.Interface{SoftDeleteDeleteClause{Field: f, ZeroValue: parseZeroValueTag(f)}} } type SoftDeleteDeleteClause struct { - Field *schema.Field + ZeroValue sql.NullString + Field *schema.Field } func (sd SoftDeleteDeleteClause) Name() string { diff --git a/vendor/gorm.io/gorm/statement.go b/vendor/gorm.io/gorm/statement.go index 850af6cb..39e05d09 100644 --- a/vendor/gorm.io/gorm/statement.go +++ b/vendor/gorm.io/gorm/statement.go @@ -30,8 +30,9 @@ type Statement struct { Clauses map[string]clause.Clause BuildClauses []string Distinct bool - Selects []string // selected columns - Omits []string // omit columns + Selects []string // selected columns + Omits []string // omit columns + ColumnMapping map[string]string // map columns Joins []join Preloads map[string][]interface{} Settings sync.Map @@ -49,9 +50,12 @@ type Statement struct { } type join struct { - Name string - Conds []interface{} - On *clause.Where + Name string + Conds []interface{} + On *clause.Where + Selects []string + Omits []string + JoinType clause.JoinType } // StatementModifier statement modifier interface @@ -117,6 +121,8 @@ func (stmt *Statement) QuoteTo(writer clause.Writer, field interface{}) { write(v.Raw, stmt.Schema.PrioritizedPrimaryField.DBName) } else if len(stmt.Schema.DBNames) > 0 { write(v.Raw, stmt.Schema.DBNames[0]) + } else { + stmt.DB.AddError(ErrModelAccessibleFieldsRequired) //nolint:typecheck,errcheck } } else { write(v.Raw, v.Name) @@ -179,6 +185,10 @@ func (stmt *Statement) AddVar(writer clause.Writer, vars ...interface{}) { } else { stmt.AddVar(writer, v.GormValue(stmt.Context, stmt.DB)) } + case clause.Interface: + c := clause.Clause{Name: v.Name()} + v.MergeClause(&c) + c.Build(stmt) case clause.Expression: v.Build(stmt) case driver.Valuer: @@ -304,6 +314,9 @@ func (stmt *Statement) BuildCondition(query interface{}, args ...interface{}) [] conds := make([]clause.Expression, 0, 4) args = append([]interface{}{query}, args...) for idx, arg := range args { + if arg == nil { + continue + } if valuer, ok := arg.(driver.Valuer); ok { arg, _ = valuer.Value() } @@ -312,9 +325,7 @@ func (stmt *Statement) BuildCondition(query interface{}, args ...interface{}) [] case clause.Expression: conds = append(conds, v) case *DB: - for _, scope := range v.Statement.scopes { - v = scope(v) - } + v.executeScopes() if cs, ok := v.Statement.Clauses["WHERE"]; ok { if where, ok := cs.Expression.(clause.Where); ok { @@ -437,8 +448,9 @@ func (stmt *Statement) BuildCondition(query interface{}, args ...interface{}) [] if len(values) > 0 { conds = append(conds, clause.IN{Column: clause.PrimaryColumn, Values: values}) + return []clause.Expression{clause.And(conds...)} } - return conds + return nil } } @@ -447,7 +459,10 @@ func (stmt *Statement) BuildCondition(query interface{}, args ...interface{}) [] } } - return conds + if len(conds) > 0 { + return []clause.Expression{clause.And(conds...)} + } + return nil } // Build build sql with clauses names @@ -499,6 +514,7 @@ func (stmt *Statement) clone() *Statement { Distinct: stmt.Distinct, Selects: stmt.Selects, Omits: stmt.Omits, + ColumnMapping: stmt.ColumnMapping, Preloads: map[string][]interface{}{}, ConnPool: stmt.ConnPool, Schema: stmt.Schema, @@ -540,8 +556,9 @@ func (stmt *Statement) clone() *Statement { } // SetColumn set column's value -// stmt.SetColumn("Name", "jinzhu") // Hooks Method -// stmt.SetColumn("Name", "jinzhu", true) // Callbacks Method +// +// stmt.SetColumn("Name", "jinzhu") // Hooks Method +// stmt.SetColumn("Name", "jinzhu", true) // Callbacks Method func (stmt *Statement) SetColumn(name string, value interface{}, fromCallbacks ...bool) { if v, ok := stmt.Dest.(map[string]interface{}); ok { v[name] = value @@ -650,54 +667,62 @@ func (stmt *Statement) Changed(fields ...string) bool { return false } -var nameMatcher = regexp.MustCompile(`^[\W]?(?:[a-z_0-9]+?)[\W]?\.[\W]?([a-z_0-9]+?)[\W]?$`) +var matchName = func() func(tableColumn string) (table, column string) { + nameMatcher := regexp.MustCompile(`^(?:\W?(\w+?)\W?\.)?(?:(\*)|\W?(\w+?)\W?)$`) + return func(tableColumn string) (table, column string) { + if matches := nameMatcher.FindStringSubmatch(tableColumn); len(matches) == 4 { + table = matches[1] + star := matches[2] + columnName := matches[3] + if star != "" { + return table, star + } + return table, columnName + } + return "", "" + } +}() // SelectAndOmitColumns get select and omit columns, select -> true, omit -> false func (stmt *Statement) SelectAndOmitColumns(requireCreate, requireUpdate bool) (map[string]bool, bool) { results := map[string]bool{} notRestricted := false - // select columns - for _, column := range stmt.Selects { + processColumn := func(column string, result bool) { if stmt.Schema == nil { - results[column] = true + results[column] = result } else if column == "*" { - notRestricted = true + notRestricted = result for _, dbName := range stmt.Schema.DBNames { - results[dbName] = true + results[dbName] = result } } else if column == clause.Associations { for _, rel := range stmt.Schema.Relationships.Relations { - results[rel.Name] = true + results[rel.Name] = result } } else if field := stmt.Schema.LookUpField(column); field != nil && field.DBName != "" { - results[field.DBName] = true - } else if matches := nameMatcher.FindStringSubmatch(column); len(matches) == 2 { - results[matches[1]] = true + results[field.DBName] = result + } else if table, col := matchName(column); col != "" && (table == stmt.Table || table == "") { + if col == "*" { + for _, dbName := range stmt.Schema.DBNames { + results[dbName] = result + } + } else { + results[col] = result + } } else { - results[column] = true + results[column] = result } } + // select columns + for _, column := range stmt.Selects { + processColumn(column, true) + } + // omit columns - for _, omit := range stmt.Omits { - if stmt.Schema == nil { - results[omit] = false - } else if omit == "*" { - for _, dbName := range stmt.Schema.DBNames { - results[dbName] = false - } - } else if omit == clause.Associations { - for _, rel := range stmt.Schema.Relationships.Relations { - results[rel.Name] = false - } - } else if field := stmt.Schema.LookUpField(omit); field != nil && field.DBName != "" { - results[field.DBName] = false - } else if matches := nameMatcher.FindStringSubmatch(omit); len(matches) == 2 { - results[matches[1]] = false - } else { - results[omit] = false - } + for _, column := range stmt.Omits { + processColumn(column, false) } if stmt.Schema != nil { diff --git a/vendor/gorm.io/gorm/utils/utils.go b/vendor/gorm.io/gorm/utils/utils.go index 296917b9..fc615d73 100644 --- a/vendor/gorm.io/gorm/utils/utils.go +++ b/vendor/gorm.io/gorm/utils/utils.go @@ -3,8 +3,8 @@ package utils import ( "database/sql/driver" "fmt" + "path/filepath" "reflect" - "regexp" "runtime" "strconv" "strings" @@ -16,16 +16,32 @@ var gormSourceDir string func init() { _, file, _, _ := runtime.Caller(0) // compatible solution to get gorm source directory with various operating systems - gormSourceDir = regexp.MustCompile(`utils.utils\.go`).ReplaceAllString(file, "") + gormSourceDir = sourceDir(file) +} + +func sourceDir(file string) string { + dir := filepath.Dir(file) + dir = filepath.Dir(dir) + + s := filepath.Dir(dir) + if filepath.Base(s) != "gorm.io" { + s = dir + } + return filepath.ToSlash(s) + "/" } // FileWithLineNum return the file name and line number of the current file func FileWithLineNum() string { - // the second caller usually from gorm internal, so set i start from 2 - for i := 2; i < 15; i++ { - _, file, line, ok := runtime.Caller(i) - if ok && (!strings.HasPrefix(file, gormSourceDir) || strings.HasSuffix(file, "_test.go")) { - return file + ":" + strconv.FormatInt(int64(line), 10) + pcs := [13]uintptr{} + // the third caller usually from gorm internal + len := runtime.Callers(3, pcs[:]) + frames := runtime.CallersFrames(pcs[:len]) + for i := 0; i < len; i++ { + // second return value is "more", not "ok" + frame, _ := frames.Next() + if (!strings.HasPrefix(frame.File, gormSourceDir) || + strings.HasSuffix(frame.File, "_test.go")) && !strings.HasSuffix(frame.File, ".gen.go") { + return string(strconv.AppendInt(append([]byte(frame.File), ':'), int64(frame.Line), 10)) } } @@ -62,7 +78,11 @@ func ToStringKey(values ...interface{}) string { case uint: results[idx] = strconv.FormatUint(uint64(v), 10) default: - results[idx] = fmt.Sprint(reflect.Indirect(reflect.ValueOf(v)).Interface()) + results[idx] = "nil" + vv := reflect.ValueOf(v) + if vv.IsValid() && !vv.IsZero() { + results[idx] = fmt.Sprint(reflect.Indirect(vv).Interface()) + } } } @@ -78,19 +98,28 @@ func Contains(elems []string, elem string) bool { return false } -func AssertEqual(src, dst interface{}) bool { - if !reflect.DeepEqual(src, dst) { - if valuer, ok := src.(driver.Valuer); ok { - src, _ = valuer.Value() - } +func AssertEqual(x, y interface{}) bool { + if reflect.DeepEqual(x, y) { + return true + } + if x == nil || y == nil { + return false + } - if valuer, ok := dst.(driver.Valuer); ok { - dst, _ = valuer.Value() - } + xval := reflect.ValueOf(x) + yval := reflect.ValueOf(y) + if xval.Kind() == reflect.Ptr && xval.IsNil() || + yval.Kind() == reflect.Ptr && yval.IsNil() { + return false + } - return reflect.DeepEqual(src, dst) + if valuer, ok := x.(driver.Valuer); ok { + x, _ = valuer.Value() } - return true + if valuer, ok := y.(driver.Valuer); ok { + y, _ = valuer.Value() + } + return reflect.DeepEqual(x, y) } func ToString(value interface{}) string { @@ -120,3 +149,31 @@ func ToString(value interface{}) string { } return "" } + +const nestedRelationSplit = "__" + +// NestedRelationName nested relationships like `Manager__Company` +func NestedRelationName(prefix, name string) string { + return prefix + nestedRelationSplit + name +} + +// SplitNestedRelationName Split nested relationships to `[]string{"Manager","Company"}` +func SplitNestedRelationName(name string) []string { + return strings.Split(name, nestedRelationSplit) +} + +// JoinNestedRelationNames nested relationships like `Manager__Company` +func JoinNestedRelationNames(relationNames []string) string { + return strings.Join(relationNames, nestedRelationSplit) +} + +// RTrimSlice Right trims the given slice by given length +func RTrimSlice[T any](v []T, trimLen int) []T { + if trimLen >= len(v) { // trimLen greater than slice len means fully sliced + return v[:0] + } + if trimLen < 0 { // negative trimLen is ignored + return v[:] + } + return v[:len(v)-trimLen] +} diff --git a/vendor/howett.net/plist/bplist_parser.go b/vendor/howett.net/plist/bplist_parser.go index 1825b570..8bb164e1 100644 --- a/vendor/howett.net/plist/bplist_parser.go +++ b/vendor/howett.net/plist/bplist_parser.go @@ -137,7 +137,10 @@ func (p *bplistParser) parseSizedInteger(off offset, nbytes int) (lo uint64, hi case 16: lo, hi = binary.BigEndian.Uint64(p.buffer[off+8:]), binary.BigEndian.Uint64(p.buffer[off:]) default: - panic(errors.New("illegal integer size")) + if nbytes > 8 { + panic(errors.New("illegal integer size")) + } + lo, hi = binary.BigEndian.Uint64(p.buffer[off-(8-offset(nbytes)):]) & ((1< 0 || cfg.NullGlob { - fields = append(fields, matches...) - continue - } + } else if len(matches) > 0 || cfg.NullGlob { + fields = append(fields, matches...) + continue } } fields = append(fields, cfg.fieldJoin(field)) @@ -476,9 +504,9 @@ const ( func (cfg *Config) wordField(wps []syntax.WordPart, ql quoteLevel) ([]fieldPart, error) { var field []fieldPart for i, wp := range wps { - switch x := wp.(type) { + switch wp := wp.(type) { case *syntax.Lit: - s := x.Value + s := wp.Value if i == 0 && ql == quoteNone { if prefix, rest := cfg.expandUser(s); prefix != "" { // TODO: return two separate fieldParts, @@ -505,13 +533,13 @@ func (cfg *Config) wordField(wps []syntax.WordPart, ql quoteLevel) ([]fieldPart, } field = append(field, fieldPart{val: s}) case *syntax.SglQuoted: - fp := fieldPart{quote: quoteSingle, val: x.Value} - if x.Dollar { + fp := fieldPart{quote: quoteSingle, val: wp.Value} + if wp.Dollar { fp.val, _, _ = Format(cfg, fp.val, nil) } field = append(field, fp) case *syntax.DblQuoted: - wfield, err := cfg.wordField(x.Parts, quoteDouble) + wfield, err := cfg.wordField(wp.Parts, quoteDouble) if err != nil { return nil, err } @@ -520,31 +548,31 @@ func (cfg *Config) wordField(wps []syntax.WordPart, ql quoteLevel) ([]fieldPart, field = append(field, part) } case *syntax.ParamExp: - val, err := cfg.paramExp(x) + val, err := cfg.paramExp(wp) if err != nil { return nil, err } field = append(field, fieldPart{val: val}) case *syntax.CmdSubst: - val, err := cfg.cmdSubst(x) + val, err := cfg.cmdSubst(wp) if err != nil { return nil, err } field = append(field, fieldPart{val: val}) case *syntax.ArithmExp: - n, err := Arithm(cfg, x.X) + n, err := Arithm(cfg, wp.X) if err != nil { return nil, err } field = append(field, fieldPart{val: strconv.Itoa(n)}) case *syntax.ProcSubst: - path, err := cfg.ProcSubst(x) + path, err := cfg.ProcSubst(wp) if err != nil { return nil, err } field = append(field, fieldPart{val: path}) default: - panic(fmt.Sprintf("unhandled word part: %T", x)) + panic(fmt.Sprintf("unhandled word part: %T", wp)) } } return field, nil @@ -596,9 +624,9 @@ func (cfg *Config) wordFields(wps []syntax.WordPart) ([][]fieldPart, error) { } } for i, wp := range wps { - switch x := wp.(type) { + switch wp := wp.(type) { case *syntax.Lit: - s := x.Value + s := wp.Value if i == 0 { prefix, rest := cfg.expandUser(s) curField = append(curField, fieldPart{ @@ -624,14 +652,14 @@ func (cfg *Config) wordFields(wps []syntax.WordPart) ([][]fieldPart, error) { curField = append(curField, fieldPart{val: s}) case *syntax.SglQuoted: allowEmpty = true - fp := fieldPart{quote: quoteSingle, val: x.Value} - if x.Dollar { + fp := fieldPart{quote: quoteSingle, val: wp.Value} + if wp.Dollar { fp.val, _, _ = Format(cfg, fp.val, nil) } curField = append(curField, fp) case *syntax.DblQuoted: - if len(x.Parts) == 1 { - pe, _ := x.Parts[0].(*syntax.ParamExp) + if len(wp.Parts) == 1 { + pe, _ := wp.Parts[0].(*syntax.ParamExp) if elems := cfg.quotedElemFields(pe); elems != nil { for i, elem := range elems { if i > 0 { @@ -646,7 +674,7 @@ func (cfg *Config) wordFields(wps []syntax.WordPart) ([][]fieldPart, error) { } } allowEmpty = true - wfield, err := cfg.wordField(x.Parts, quoteDouble) + wfield, err := cfg.wordField(wp.Parts, quoteDouble) if err != nil { return nil, err } @@ -655,25 +683,25 @@ func (cfg *Config) wordFields(wps []syntax.WordPart) ([][]fieldPart, error) { curField = append(curField, part) } case *syntax.ParamExp: - val, err := cfg.paramExp(x) + val, err := cfg.paramExp(wp) if err != nil { return nil, err } splitAdd(val) case *syntax.CmdSubst: - val, err := cfg.cmdSubst(x) + val, err := cfg.cmdSubst(wp) if err != nil { return nil, err } splitAdd(val) case *syntax.ArithmExp: - n, err := Arithm(cfg, x.X) + n, err := Arithm(cfg, wp.X) if err != nil { return nil, err } curField = append(curField, fieldPart{val: strconv.Itoa(n)}) case *syntax.ProcSubst: - path, err := cfg.ProcSubst(x) + path, err := cfg.ProcSubst(wp) if err != nil { return nil, err } @@ -681,7 +709,7 @@ func (cfg *Config) wordFields(wps []syntax.WordPart) ([][]fieldPart, error) { case *syntax.ExtGlob: return nil, fmt.Errorf("extended globbing is not supported") default: - panic(fmt.Sprintf("unhandled word part: %T", x)) + panic(fmt.Sprintf("unhandled word part: %T", wp)) } } flush() @@ -710,12 +738,14 @@ func (cfg *Config) quotedElemFields(pe *syntax.ParamExp) []string { switch vr := cfg.Env.Get(name); vr.Kind { case Indexed: keys := make([]string, 0, len(vr.Map)) + // TODO: maps.Keys if it makes it into Go 1.23 for key := range vr.List { keys = append(keys, strconv.Itoa(key)) } return keys case Associative: keys := make([]string, 0, len(vr.Map)) + // TODO: maps.Keys if it makes it into Go 1.23 for key := range vr.Map { keys = append(keys, key) } @@ -736,6 +766,7 @@ func (cfg *Config) quotedElemFields(pe *syntax.ParamExp) []string { case Indexed: return vr.List case Associative: + // TODO: maps.Values if it makes it into Go 1.23 elems := make([]string, 0, len(vr.Map)) for _, elem := range vr.Map { elems = append(elems, elem) @@ -839,11 +870,11 @@ func (cfg *Config) glob(base, pat string) ([]string, error) { // TODO: as an optimization, we could do chunks of the path all at once, // like doing a single stat for "/foo/bar" in "/foo/bar/*". - // TODO: Another optimization would be to reduce the number of ReadDir calls. + // TODO: Another optimization would be to reduce the number of ReadDir2 calls. // For example, /foo/* can end up doing one duplicate call: // - // ReadDir("/foo") to ensure that "/foo/" exists and only matches a directory - // ReadDir("/foo") glob "*" + // ReadDir2("/foo") to ensure that "/foo/" exists and only matches a directory + // ReadDir2("/foo") glob "*" for i, part := range parts { // Keep around for debugging. @@ -864,12 +895,12 @@ func (cfg *Config) glob(base, pat string) ([]string, error) { match = filepath.Join(base, match) } match = pathJoin2(match, part) - // We can't use ReadDir on the parent and match the directory + // We can't use ReadDir2 on the parent and match the directory // entry by name, because short paths on Windows break that. - // Our only option is to ReadDir on the directory entry itself, + // Our only option is to ReadDir2 on the directory entry itself, // which can be wasteful if we only want to see if it exists, // but at least it's correct in all scenarios. - if _, err := cfg.ReadDir(match); err != nil { + if _, err := cfg.ReadDir2(match); err != nil { const errPathNotFound = syscall.Errno(3) // from syscall/types_windows.go, to avoid a build tag var pathErr *os.PathError if runtime.GOOS == "windows" && errors.As(err, &pathErr) && pathErr.Err == errPathNotFound { @@ -922,7 +953,11 @@ func (cfg *Config) glob(base, pat string) ([]string, error) { } continue } - expr, err := pattern.Regexp(part, pattern.Filenames|pattern.EntireString) + mode := pattern.Filenames | pattern.EntireString + if cfg.NoCaseGlob { + mode |= pattern.NoGlobCase + } + expr, err := pattern.Regexp(part, mode) if err != nil { return nil, err } @@ -945,7 +980,7 @@ func (cfg *Config) globDir(base, dir string, rx *regexp.Regexp, matchHidden bool if !filepath.IsAbs(dir) { fullDir = filepath.Join(base, dir) } - infos, err := cfg.ReadDir(fullDir) + infos, err := cfg.ReadDir2(fullDir) if err != nil { // We still want to return matches, for the sake of reusing slices. return matches, err @@ -954,13 +989,13 @@ func (cfg *Config) globDir(base, dir string, rx *regexp.Regexp, matchHidden bool name := info.Name() if !wantDir { // No filtering. - } else if mode := info.Mode(); mode&os.ModeSymlink != 0 { + } else if mode := info.Type(); mode&os.ModeSymlink != 0 { // We need to know if the symlink points to a directory. // This requires an extra syscall, as ReadDir on the parent directory // does not follow symlinks for each of the directory entries. // ReadDir is somewhat wasteful here, as we only want its error result, // but we could try to reuse its result as per the TODO in Config.glob. - if _, err := cfg.ReadDir(filepath.Join(fullDir, info.Name())); err != nil { + if _, err := cfg.ReadDir2(filepath.Join(fullDir, info.Name())); err != nil { continue } } else if !mode.IsDir() { diff --git a/vendor/mvdan.cc/sh/v3/expand/param.go b/vendor/mvdan.cc/sh/v3/expand/param.go index 1f1a475f..07d3e13b 100644 --- a/vendor/mvdan.cc/sh/v3/expand/param.go +++ b/vendor/mvdan.cc/sh/v3/expand/param.go @@ -6,7 +6,7 @@ package expand import ( "fmt" "regexp" - "sort" + "slices" "strconv" "strings" "unicode" @@ -167,6 +167,7 @@ func (cfg *Config) paramExp(pe *syntax.ParamExp) (string, error) { } } case pe.Index != nil && vr.Kind == Associative: + // TODO: use maps.Keys for k := range vr.Map { strs = append(strs, k) } @@ -178,7 +179,7 @@ func (cfg *Config) paramExp(pe *syntax.ParamExp) (string, error) { vr = cfg.Env.Get(str) strs = append(strs, vr.String()) } - sort.Strings(strs) + slices.Sort(strs) str = strings.Join(strs, " ") case pe.Slice != nil: if callVarInd { @@ -205,6 +206,9 @@ func (cfg *Config) paramExp(pe *syntax.ParamExp) (string, error) { if err != nil { return "", err } + if orig == "" { + break // nothing to replace + } with, err := Literal(cfg, pe.Repl.With) if err != nil { return "", err @@ -397,10 +401,11 @@ func (cfg *Config) varInd(vr Variable, idx syntax.ArithmExpr) (string, error) { switch lit := nodeLit(idx); lit { case "@", "*": strs := make([]string, 0, len(vr.Map)) + // TODO: use maps.Values for _, val := range vr.Map { strs = append(strs, val) } - sort.Strings(strs) + slices.Sort(strs) if lit == "*" { return cfg.ifsJoin(strs), nil } diff --git a/vendor/mvdan.cc/sh/v3/fileutil/file.go b/vendor/mvdan.cc/sh/v3/fileutil/file.go index 249ae94c..d69a2b9b 100644 --- a/vendor/mvdan.cc/sh/v3/fileutil/file.go +++ b/vendor/mvdan.cc/sh/v3/fileutil/file.go @@ -50,7 +50,7 @@ const ( // ConfIfShebang describes files which might be shell scripts, depending // on the shebang line in the file's contents. Since CouldBeScript only - // works on os.FileInfo, the answer in this case can't be final. + // works on fs.FileInfo, the answer in this case can't be final. ConfIfShebang // ConfIsScript describes files which are definitely shell scripts, @@ -61,7 +61,7 @@ const ( // CouldBeScript is a shortcut for CouldBeScript2(fs.FileInfoToDirEntry(info)). // // Deprecated: prefer CouldBeScript2, which usually requires fewer syscalls. -func CouldBeScript(info os.FileInfo) ScriptConfidence { +func CouldBeScript(info fs.FileInfo) ScriptConfidence { return CouldBeScript2(fs.FileInfoToDirEntry(info)) } diff --git a/vendor/mvdan.cc/sh/v3/pattern/pattern.go b/vendor/mvdan.cc/sh/v3/pattern/pattern.go index 7cd98d14..e5b30a76 100644 --- a/vendor/mvdan.cc/sh/v3/pattern/pattern.go +++ b/vendor/mvdan.cc/sh/v3/pattern/pattern.go @@ -34,6 +34,7 @@ const ( Filenames // "*" and "?" don't match slashes; only "**" does Braces // support "{a,b}" and "{1..4}" EntireString // match the entire string using ^$ delimiters + NoGlobCase // Do case-insensitive match (that is, use (?i) in the regexp) ) var numRange = regexp.MustCompile(`^([+-]?\d+)\.\.([+-]?\d+)}`) @@ -68,6 +69,9 @@ noopLoop: // Enable matching `\n` with the `.` metacharacter as globs match `\n` buf.WriteString("(?s)") dotMeta := false + if mode&NoGlobCase != 0 { + buf.WriteString("(?i)") + } if mode&EntireString != 0 { buf.WriteString("^") } @@ -242,7 +246,7 @@ writeLoop: if mode&EntireString != 0 { buf.WriteString("$") } - // No `.` metacharacters were used, so don't return the flag. + // No `.` metacharacters were used, so don't return the (?s) flag. if !dotMeta { return string(buf.Bytes()[4:]), nil } diff --git a/vendor/mvdan.cc/sh/v3/syntax/braces.go b/vendor/mvdan.cc/sh/v3/syntax/braces.go index f3452819..94c64ea8 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/braces.go +++ b/vendor/mvdan.cc/sh/v3/syntax/braces.go @@ -110,7 +110,8 @@ func SplitBraces(word *Word) bool { val := elem.Lit() if _, err := strconv.Atoi(val); err == nil { } else if len(val) == 1 && - 'a' <= val[0] && val[0] <= 'z' { + (('a' <= val[0] && val[0] <= 'z') || + ('A' <= val[0] && val[0] <= 'Z')) { chars[i] = true } else { broken = true diff --git a/vendor/mvdan.cc/sh/v3/syntax/lexer.go b/vendor/mvdan.cc/sh/v3/syntax/lexer.go index b5dddab7..28f96df6 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/lexer.go +++ b/vendor/mvdan.cc/sh/v3/syntax/lexer.go @@ -61,18 +61,13 @@ func (p *Parser) rune() rune { if p.r == '\n' || p.r == escNewl { // p.r instead of b so that newline // character positions don't have col 0. - if p.line++; p.line > lineMax { - p.lineOverflow = true - } + p.line++ p.col = 0 - p.colOverflow = false - } - if p.col += p.w; p.col > colMax { - p.colOverflow = true } + p.col += int64(p.w) bquotes := 0 retry: - if p.bsp < len(p.bs) { + if p.bsp < uint(len(p.bs)) { if b := p.bs[p.bsp]; b < utf8.RuneSelf { p.bsp++ if b == '\x00' { @@ -91,7 +86,10 @@ retry: return escNewl } if p.openBquotes > 0 && bquotes < p.openBquotes && - p.bsp < len(p.bs) && bquoteEscaped(p.bs[p.bsp]) { + p.bsp < uint(len(p.bs)) && bquoteEscaped(p.bs[p.bsp]) { + // We turn backquote command substitutions into $(), + // so we remove the extra backslashes needed by the backquotes. + // For good position information, we still include them in p.w. bquotes++ goto retry } @@ -102,7 +100,7 @@ retry: if p.litBs != nil { p.litBs = append(p.litBs, b) } - p.w, p.r = 1, rune(b) + p.w, p.r = 1+bquotes, rune(b) return p.r } if !utf8.FullRune(p.bs[p.bsp:]) { @@ -112,9 +110,9 @@ retry: var w int p.r, w = utf8.DecodeRune(p.bs[p.bsp:]) if p.litBs != nil { - p.litBs = append(p.litBs, p.bs[p.bsp:p.bsp+w]...) + p.litBs = append(p.litBs, p.bs[p.bsp:p.bsp+uint(w)]...) } - p.bsp += w + p.bsp += uint(w) if p.r == utf8.RuneError && w == 1 { p.posErr(p.nextPos(), "invalid UTF-8 encoding") } @@ -136,8 +134,8 @@ retry: // had not yet been used at the end of the buffer are slid into the // beginning of the buffer. func (p *Parser) fill() { - p.offs += p.bsp - left := len(p.bs) - p.bsp + p.offs += int64(p.bsp) + left := len(p.bs) - int(p.bsp) copy(p.readBuf[:left], p.readBuf[p.bsp:]) readAgain: n, err := 0, p.readErr @@ -256,7 +254,7 @@ skipSpace: } if p.stopAt != nil && (p.spaced || p.tok == illegalTok || p.stopToken()) { w := utf8.RuneLen(r) - if bytes.HasPrefix(p.bs[p.bsp-w:], p.stopAt) { + if bytes.HasPrefix(p.bs[p.bsp-uint(w):], p.stopAt) { p.r = utf8.RuneSelf p.w = 1 p.tok = _EOF @@ -390,7 +388,7 @@ func (p *Parser) extendedGlob() bool { } func (p *Parser) peekBytes(s string) bool { - peekEnd := p.bsp + len(s) + peekEnd := int(p.bsp) + len(s) // TODO: This should loop for slow readers, e.g. those providing one byte at // a time. Use a loop and test it with testing/iotest.OneByteReader. if peekEnd > len(p.bs) { @@ -400,20 +398,15 @@ func (p *Parser) peekBytes(s string) bool { } func (p *Parser) peekByte(b byte) bool { - if p.bsp == len(p.bs) { + if p.bsp == uint(len(p.bs)) { p.fill() } - return p.bsp < len(p.bs) && p.bs[p.bsp] == b + return p.bsp < uint(len(p.bs)) && p.bs[p.bsp] == b } func (p *Parser) regToken(r rune) token { switch r { case '\'': - if p.openBquotes > 0 { - // bury openBquotes - p.buriedBquotes = p.openBquotes - p.openBquotes = 0 - } p.rune() return sglQuote case '"': @@ -816,7 +809,7 @@ func (p *Parser) newLit(r rune) { p.litBs[0] = byte(r) case r > escNewl: w := utf8.RuneLen(r) - p.litBs = append(p.litBuf[:0], p.bs[p.bsp-w:p.bsp]...) + p.litBs = append(p.litBuf[:0], p.bs[p.bsp-uint(w):p.bsp]...) default: // don't let r == utf8.RuneSelf go to the second case as RuneLen // would return -1 @@ -827,6 +820,9 @@ func (p *Parser) newLit(r rune) { func (p *Parser) endLit() (s string) { if p.r == utf8.RuneSelf || p.r == escNewl { s = string(p.litBs) + } else if p.r == '`' && p.w > 1 { + // If we ended at a nested and escaped backquote, litBs does not include the backslash. + s = string(p.litBs[:len(p.litBs)-1]) } else { s = string(p.litBs[:len(p.litBs)-p.w]) } diff --git a/vendor/mvdan.cc/sh/v3/syntax/nodes.go b/vendor/mvdan.cc/sh/v3/syntax/nodes.go index 88eb7fea..8424545c 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/nodes.go +++ b/vendor/mvdan.cc/sh/v3/syntax/nodes.go @@ -4,6 +4,7 @@ package syntax import ( + "math" "strconv" "strings" ) @@ -72,6 +73,8 @@ type Pos struct { // We used to split line and column numbers evenly in 16 bits, but line numbers // are significantly more important in practice. Use more bits for them. const ( + offsetMax = math.MaxUint32 + lineBitSize = 18 lineMax = (1 << lineBitSize) - 1 @@ -90,6 +93,9 @@ const ( // Note that Pos uses a limited number of bits to store these numbers. // If line or column overflow their allocated space, they are replaced with 0. func NewPos(offset, line, column uint) Pos { + // Basic protection against offset overflow; + // note that an offset of 0 is valid, so we leave the maximum. + offset = min(offset, offsetMax) if line > lineMax { line = 0 // protect against overflows; rendered as "?" } @@ -105,8 +111,8 @@ func NewPos(offset, line, column uint) Pos { // Offset returns the byte offset of the position in the original source file. // Byte offsets start at 0. // -// Note that Offset is not protected against overflows; -// if an input is larger than 4GiB, the offset will wrap around to 0. +// Offset has basic protection against overflows; if an input is too large, +// offset numbers will stop increasing past a very large number. func (p Pos) Offset() uint { return uint(p.offs) } // Line returns the line number of the position, starting at 1. diff --git a/vendor/mvdan.cc/sh/v3/syntax/parser.go b/vendor/mvdan.cc/sh/v3/syntax/parser.go index 99ae17c2..580dab71 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/parser.go +++ b/vendor/mvdan.cc/sh/v3/syntax/parser.go @@ -31,7 +31,7 @@ const ( // LangBash corresponds to the GNU Bash language, as described in its // manual at https://www.gnu.org/software/bash/manual/bash.html. // - // We currently follow Bash version 5.1. + // We currently follow Bash version 5.2. // // Its string representation is "bash". LangBash LangVariant = iota @@ -196,7 +196,7 @@ type wrappedReader struct { *Parser io.Reader - lastLine int + lastLine int64 accumulated []*Stmt fn func([]*Stmt) bool } @@ -338,7 +338,7 @@ func (p *Parser) Arithmetic(r io.Reader) (ArithmExpr, error) { type Parser struct { src io.Reader bs []byte // current chunk of read bytes - bsp int // pos within chunk for the rune after r + bsp uint // pos within chunk for the rune after r; uint helps eliminate bounds checks r rune // next rune w int // width of r @@ -353,15 +353,10 @@ type Parser struct { val string // current value (valid if tok is _Lit*) // position of r, to be converted to Parser.pos later - offs, line, col int + offs, line, col int64 pos Pos // position of tok - // TODO: Guard against offset overflow too. Less likely as it's 32-bit, - // whereas line and col are 16-bit. - lineOverflow bool - colOverflow bool - quote quoteState // current lexer state eqlOffs int // position of '=' in val (a literal) @@ -389,9 +384,6 @@ type Parser struct { // lastBquoteEsc is how many times the last backquote token was escaped lastBquoteEsc int - // buriedBquotes is like openBquotes, but saved for when the parser - // comes out of single quotes - buriedBquotes int rxOpenParens int rxFirstPart bool @@ -401,8 +393,6 @@ type Parser struct { litBatch []Lit wordBatch []wordAlloc - stmtBatch []Stmt - callBatch []callAlloc readBuf [bufSize]byte litBuf [bufSize]byte @@ -434,30 +424,33 @@ func (p *Parser) reset() { p.quote, p.forbidNested = noState, false p.openStmts = 0 p.heredocs, p.buriedHdocs = p.heredocs[:0], 0 + p.hdocStops = nil p.parsingDoc = false - p.openBquotes, p.buriedBquotes = 0, 0 + p.openBquotes = 0 + p.accComs = nil p.accComs, p.curComs = nil, &p.accComs p.litBatch = nil p.wordBatch = nil - p.stmtBatch = nil - p.callBatch = nil + p.litBs = nil } func (p *Parser) nextPos() Pos { - // TODO: detect offset overflow while lexing as well. + // Basic protection against offset overflow; + // note that an offset of 0 is valid, so we leave the maximum. + offset := min(p.offs+int64(p.bsp)-int64(p.w), offsetMax) var line, col uint - if !p.lineOverflow { + if p.line <= lineMax { line = uint(p.line) } - if !p.colOverflow { + if p.col <= colMax { col = uint(p.col) } - return NewPos(uint(p.offs+p.bsp-p.w), line, col) + return NewPos(uint(offset), line, col) } func (p *Parser) lit(pos Pos, val string) *Lit { if len(p.litBatch) == 0 { - p.litBatch = make([]Lit, 64) + p.litBatch = make([]Lit, 32) } l := &p.litBatch[0] p.litBatch = p.litBatch[1:] @@ -495,27 +488,11 @@ func (p *Parser) wordOne(part WordPart) *Word { return w } -func (p *Parser) stmt(pos Pos) *Stmt { - if len(p.stmtBatch) == 0 { - p.stmtBatch = make([]Stmt, 32) - } - s := &p.stmtBatch[0] - p.stmtBatch = p.stmtBatch[1:] - s.Position = pos - return s -} - -type callAlloc struct { - ce CallExpr - ws [4]*Word -} - func (p *Parser) call(w *Word) *CallExpr { - if len(p.callBatch) == 0 { - p.callBatch = make([]callAlloc, 32) + var alloc struct { + ce CallExpr + ws [4]*Word } - alloc := &p.callBatch[0] - p.callBatch = p.callBatch[1:] ce := &alloc.ce ce.Args = alloc.ws[:1] ce.Args[0] = w @@ -582,12 +559,12 @@ func (p *Parser) unquotedWordBytes(w *Word) ([]byte, bool) { } func (p *Parser) unquotedWordPart(buf []byte, wp WordPart, quotes bool) (_ []byte, quoted bool) { - switch x := wp.(type) { + switch wp := wp.(type) { case *Lit: - for i := 0; i < len(x.Value); i++ { - if b := x.Value[i]; b == '\\' && !quotes { - if i++; i < len(x.Value) { - buf = append(buf, x.Value[i]) + for i := 0; i < len(wp.Value); i++ { + if b := wp.Value[i]; b == '\\' && !quotes { + if i++; i < len(wp.Value) { + buf = append(buf, wp.Value[i]) } quoted = true } else { @@ -595,10 +572,10 @@ func (p *Parser) unquotedWordPart(buf []byte, wp WordPart, quotes bool) (_ []byt } } case *SglQuoted: - buf = append(buf, []byte(x.Value)...) + buf = append(buf, []byte(wp.Value)...) quoted = true case *DblQuoted: - for _, wp2 := range x.Parts { + for _, wp2 := range wp.Parts { buf, _ = p.unquotedWordPart(buf, wp2, true) } quoted = true @@ -636,7 +613,7 @@ func (p *Parser) doHeredocs() { r.Hdoc = p.getWord() } if r.Hdoc != nil { - lastLine = int(r.Hdoc.End().Line()) + lastLine = int64(r.Hdoc.End().Line()) } if lastLine < p.line { // TODO: It seems like this triggers more often than it @@ -753,7 +730,7 @@ func (p *Parser) matched(lpos Pos, left, right token) Pos { func (p *Parser) errPass(err error) { if p.err == nil { p.err = err - p.bsp = len(p.bs) + 1 + p.bsp = uint(len(p.bs)) + 1 p.r = utf8.RuneSelf p.w = 1 p.tok = _EOF @@ -1124,10 +1101,6 @@ func (p *Parser) wordPart() WordPart { sq.Right = p.nextPos() sq.Value = p.endLit() - // restore openBquotes - p.openBquotes = p.buriedBquotes - p.buriedBquotes = 0 - p.rune() p.next() return sq @@ -1383,8 +1356,7 @@ func (p *Parser) paramExp() *ParamExp { p.curErr("not a valid parameter expansion operator: %v", p.tok) } p.quote = old - pe.Rbrace = p.pos - p.matched(pe.Dollar, dollBrace, rightBrace) + pe.Rbrace = p.matched(pe.Dollar, dollBrace, rightBrace) return pe } @@ -1399,7 +1371,7 @@ func (p *Parser) paramExpExp() *Expansion { p.curErr("@ expansion operator requires a literal") } switch p.val { - case "a", "u", "A", "E", "K", "L", "P", "U": + case "a", "k", "u", "A", "E", "K", "L", "P", "U": if !p.lang.isBash() { p.langErr(p.pos, "this expansion operator", LangBash) } @@ -1409,7 +1381,7 @@ func (p *Parser) paramExpExp() *Expansion { } case "Q": default: - p.curErr("invalid @ expansion operator") + p.curErr("invalid @ expansion operator %q", p.val) } } return &Expansion{Op: op, Word: p.getWord()} @@ -1661,7 +1633,7 @@ func (p *Parser) doRedirect(s *Stmt) { func (p *Parser) getStmt(readEnd, binCmd, fnBody bool) *Stmt { pos, ok := p.gotRsrv("!") - s := p.stmt(pos) + s := &Stmt{Position: pos} if ok { s.Negated = true if p.stopToken() { @@ -1693,7 +1665,7 @@ func (p *Parser) getStmt(readEnd, binCmd, fnBody bool) *Stmt { p.followErr(b.OpPos, b.Op.String(), "a statement") return nil } - s = p.stmt(s.Position) + s = &Stmt{Position: s.Position} s.Cmd = b s.Comments, b.X.Comments = b.X.Comments, nil } @@ -1862,11 +1834,11 @@ func (p *Parser) gotStmtPipe(s *Stmt, binCmd bool) *Stmt { b := &BinaryCmd{OpPos: p.pos, Op: BinCmdOperator(p.tok), X: s} p.next() p.got(_Newl) - if b.Y = p.gotStmtPipe(p.stmt(p.pos), true); b.Y == nil || p.err != nil { + if b.Y = p.gotStmtPipe(&Stmt{Position: p.pos}, true); b.Y == nil || p.err != nil { p.followErr(b.OpPos, b.Op.String(), "a statement") break } - s = p.stmt(s.Position) + s = &Stmt{Position: s.Position} s.Cmd = b s.Comments, b.X.Comments = b.X.Comments, nil // in "! x | y", the bang applies to the entire pipeline @@ -2148,7 +2120,7 @@ func (p *Parser) testClause(s *Stmt) { if _, ok := p.gotRsrv("]]"); ok || p.tok == _EOF { p.posErr(tc.Left, "test clause requires at least one expression") } - tc.X = p.testExpr(dblLeftBrack, tc.Left, false) + tc.X = p.testExpr(false) if tc.X == nil { p.followErrExp(tc.Left, "[[") } @@ -2160,13 +2132,13 @@ func (p *Parser) testClause(s *Stmt) { s.Cmd = tc } -func (p *Parser) testExpr(ftok token, fpos Pos, pastAndOr bool) TestExpr { +func (p *Parser) testExpr(pastAndOr bool) TestExpr { p.got(_Newl) var left TestExpr if pastAndOr { left = p.testExprBase() } else { - left = p.testExpr(ftok, fpos, true) + left = p.testExpr(true) } if left == nil { return left @@ -2200,7 +2172,7 @@ func (p *Parser) testExpr(ftok token, fpos Pos, pastAndOr bool) TestExpr { switch b.Op { case AndTest, OrTest: p.next() - if b.Y = p.testExpr(token(b.Op), b.OpPos, false); b.Y == nil { + if b.Y = p.testExpr(false); b.Y == nil { p.followErrExp(b.OpPos, b.Op.String()) } case TsReMatch: @@ -2246,7 +2218,7 @@ func (p *Parser) testExprBase() TestExpr { case exclMark: u := &UnaryTest{OpPos: p.pos, Op: TsNot} p.next() - if u.X = p.testExpr(token(u.Op), u.OpPos, false); u.X == nil { + if u.X = p.testExpr(false); u.X == nil { p.followErrExp(u.OpPos, u.Op.String()) } return u @@ -2261,7 +2233,7 @@ func (p *Parser) testExprBase() TestExpr { case leftParen: pe := &ParenTest{Lparen: p.pos} p.next() - if pe.X = p.testExpr(leftParen, pe.Lparen, false); pe.X == nil { + if pe.X = p.testExpr(false); pe.X == nil { p.followErrExp(pe.Lparen, "(") } pe.Rparen = p.matched(pe.Lparen, leftParen, rightParen) @@ -2326,7 +2298,7 @@ func (p *Parser) timeClause(s *Stmt) { if _, ok := p.gotRsrv("-p"); ok { tc.PosixFormat = true } - tc.Stmt = p.gotStmtPipe(p.stmt(p.pos), false) + tc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) s.Cmd = tc } @@ -2334,19 +2306,19 @@ func (p *Parser) coprocClause(s *Stmt) { cc := &CoprocClause{Coproc: p.pos} if p.next(); isBashCompoundCommand(p.tok, p.val) { // has no name - cc.Stmt = p.gotStmtPipe(p.stmt(p.pos), false) + cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) s.Cmd = cc return } cc.Name = p.getWord() - cc.Stmt = p.gotStmtPipe(p.stmt(p.pos), false) + cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false) if cc.Stmt == nil { if cc.Name == nil { p.posErr(cc.Coproc, "coproc clause requires a command") return } // name was in fact the stmt - cc.Stmt = p.stmt(cc.Name.Pos()) + cc.Stmt = &Stmt{Position: cc.Name.Pos()} cc.Stmt.Cmd = p.call(cc.Name) cc.Name = nil } else if cc.Name != nil { @@ -2422,6 +2394,10 @@ loop: ce.Assigns = append(ce.Assigns, p.getAssign(true)) break } + // Avoid failing later with the confusing "} can only be used to close a block". + if p.lang == LangPOSIX && p.val == "{" && w != nil && w.Lit() == "function" { + p.curErr("the %q builtin is a bash feature; tried parsing as posix", "function") + } ce.Args = append(ce.Args, p.wordOne(p.lit(p.pos, p.val))) p.next() case _Lit: @@ -2453,7 +2429,7 @@ loop: // Note that we'll only keep the first error that happens. if len(ce.Args) > 0 { if cmd := ce.Args[0].Lit(); p.lang == LangPOSIX && isBashCompoundCommand(_LitWord, cmd) { - p.curErr("the %q builtin exists in bash; tried parsing as posix", cmd) + p.curErr("the %q builtin is a bash feature; tried parsing as posix", cmd) } } p.curErr("a command can only contain words and redirects; encountered %s", p.tok) diff --git a/vendor/mvdan.cc/sh/v3/syntax/parser_arithm.go b/vendor/mvdan.cc/sh/v3/syntax/parser_arithm.go index a6d6a951..c8567b52 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/parser_arithm.go +++ b/vendor/mvdan.cc/sh/v3/syntax/parser_arithm.go @@ -295,11 +295,11 @@ func isArithName(left ArithmExpr) bool { if !ok || len(w.Parts) != 1 { return false } - switch x := w.Parts[0].(type) { + switch wp := w.Parts[0].(type) { case *Lit: - return ValidName(x.Value) + return ValidName(wp.Value) case *ParamExp: - return x.nakedIndex() + return wp.nakedIndex() default: return false } diff --git a/vendor/mvdan.cc/sh/v3/syntax/printer.go b/vendor/mvdan.cc/sh/v3/syntax/printer.go index 84ad6850..55cf69fc 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/printer.go +++ b/vendor/mvdan.cc/sh/v3/syntax/printer.go @@ -134,25 +134,25 @@ func (p *Printer) Print(w io.Writer, node Node) error { w = p.tabWriter p.bufWriter.Reset(w) - switch x := node.(type) { + switch node := node.(type) { case *File: - p.stmtList(x.Stmts, x.Last) + p.stmtList(node.Stmts, node.Last) p.newline(Pos{}) case *Stmt: - p.stmtList([]*Stmt{x}, nil) + p.stmtList([]*Stmt{node}, nil) case Command: - p.command(x, nil) + p.command(node, nil) case *Word: - p.line = x.Pos().Line() - p.word(x) + p.line = node.Pos().Line() + p.word(node) case WordPart: - p.line = x.Pos().Line() - p.wordPart(x, nil) + p.line = node.Pos().Line() + p.wordPart(node, nil) case *Assign: - p.line = x.Pos().Line() - p.assigns([]*Assign{x}) + p.line = node.Pos().Line() + p.assigns([]*Assign{node}) default: - return fmt.Errorf("unsupported node type: %T", x) + return fmt.Errorf("unsupported node type: %T", node) } p.flushHeredocs() p.flushComments() @@ -635,88 +635,88 @@ func (p *Printer) wordParts(wps []WordPart, quoted bool) { } func (p *Printer) wordPart(wp, next WordPart) { - switch x := wp.(type) { + switch wp := wp.(type) { case *Lit: - p.writeLit(x.Value) + p.writeLit(wp.Value) case *SglQuoted: - if x.Dollar { + if wp.Dollar { p.WriteByte('$') } p.WriteByte('\'') - p.writeLit(x.Value) + p.writeLit(wp.Value) p.WriteByte('\'') - p.advanceLine(x.End().Line()) + p.advanceLine(wp.End().Line()) case *DblQuoted: - p.dblQuoted(x) + p.dblQuoted(wp) case *CmdSubst: - p.advanceLine(x.Pos().Line()) + p.advanceLine(wp.Pos().Line()) switch { - case x.TempFile: + case wp.TempFile: p.WriteString("${") p.wantSpace = spaceRequired - p.nestedStmts(x.Stmts, x.Last, x.Right) + p.nestedStmts(wp.Stmts, wp.Last, wp.Right) p.wantSpace = spaceNotRequired - p.semiRsrv("}", x.Right) - case x.ReplyVar: + p.semiRsrv("}", wp.Right) + case wp.ReplyVar: p.WriteString("${|") - p.nestedStmts(x.Stmts, x.Last, x.Right) + p.nestedStmts(wp.Stmts, wp.Last, wp.Right) p.wantSpace = spaceNotRequired - p.semiRsrv("}", x.Right) + p.semiRsrv("}", wp.Right) // Special case: `# inline comment` - case x.Backquotes && len(x.Stmts) == 0 && - len(x.Last) == 1 && x.Right.Line() == p.line: + case wp.Backquotes && len(wp.Stmts) == 0 && + len(wp.Last) == 1 && wp.Right.Line() == p.line: p.WriteString("`#") - p.WriteString(x.Last[0].Text) + p.WriteString(wp.Last[0].Text) p.WriteString("`") default: p.WriteString("$(") - if len(x.Stmts) > 0 && startsWithLparen(x.Stmts[0]) { + if len(wp.Stmts) > 0 && startsWithLparen(wp.Stmts[0]) { p.wantSpace = spaceRequired } else { p.wantSpace = spaceNotRequired } - p.nestedStmts(x.Stmts, x.Last, x.Right) - p.rightParen(x.Right) + p.nestedStmts(wp.Stmts, wp.Last, wp.Right) + p.rightParen(wp.Right) } case *ParamExp: litCont := ";" if nextLit, ok := next.(*Lit); ok && nextLit.Value != "" { litCont = nextLit.Value[:1] } - name := x.Param.Value + name := wp.Param.Value switch { case !p.minify: - case x.Excl, x.Length, x.Width: - case x.Index != nil, x.Slice != nil: - case x.Repl != nil, x.Exp != nil: + case wp.Excl, wp.Length, wp.Width: + case wp.Index != nil, wp.Slice != nil: + case wp.Repl != nil, wp.Exp != nil: case len(name) > 1 && !ValidName(name): // ${10} case ValidName(name + litCont): // ${var}cont default: - x2 := *x + x2 := *wp x2.Short = true p.paramExp(&x2) return } - p.paramExp(x) + p.paramExp(wp) case *ArithmExp: p.WriteString("$((") - if x.Unsigned { + if wp.Unsigned { p.WriteString("# ") } - p.arithmExpr(x.X, false, false) + p.arithmExpr(wp.X, false, false) p.WriteString("))") case *ExtGlob: - p.WriteString(x.Op.String()) - p.writeLit(x.Pattern.Value) + p.WriteString(wp.Op.String()) + p.writeLit(wp.Pattern.Value) p.WriteByte(')') case *ProcSubst: // avoid conflict with << and others if p.wantSpace == spaceRequired { p.space() } - p.WriteString(x.Op.String()) - p.nestedStmts(x.Stmts, x.Last, x.Rparen) - p.rightParen(x.Rparen) + p.WriteString(wp.Op.String()) + p.nestedStmts(wp.Stmts, wp.Last, wp.Rparen) + p.rightParen(wp.Rparen) } } @@ -801,23 +801,23 @@ func (p *Printer) paramExp(pe *ParamExp) { } func (p *Printer) loop(loop Loop) { - switch x := loop.(type) { + switch loop := loop.(type) { case *WordIter: - p.writeLit(x.Name.Value) - if x.InPos.IsValid() { + p.writeLit(loop.Name.Value) + if loop.InPos.IsValid() { p.spacedString(" in", Pos{}) - p.wordJoin(x.Items) + p.wordJoin(loop.Items) } case *CStyleLoop: p.WriteString("((") - if x.Init == nil { + if loop.Init == nil { p.space() } - p.arithmExpr(x.Init, false, false) + p.arithmExpr(loop.Init, false, false) p.WriteString("; ") - p.arithmExpr(x.Cond, false, false) + p.arithmExpr(loop.Cond, false, false) p.WriteString("; ") - p.arithmExpr(x.Post, false, false) + p.arithmExpr(loop.Post, false, false) p.WriteString("))") } } @@ -826,40 +826,40 @@ func (p *Printer) arithmExpr(expr ArithmExpr, compact, spacePlusMinus bool) { if p.minify { compact = true } - switch x := expr.(type) { + switch expr := expr.(type) { case *Word: - p.word(x) + p.word(expr) case *BinaryArithm: if compact { - p.arithmExpr(x.X, compact, spacePlusMinus) - p.WriteString(x.Op.String()) - p.arithmExpr(x.Y, compact, false) + p.arithmExpr(expr.X, compact, spacePlusMinus) + p.WriteString(expr.Op.String()) + p.arithmExpr(expr.Y, compact, false) } else { - p.arithmExpr(x.X, compact, spacePlusMinus) - if x.Op != Comma { + p.arithmExpr(expr.X, compact, spacePlusMinus) + if expr.Op != Comma { p.space() } - p.WriteString(x.Op.String()) + p.WriteString(expr.Op.String()) p.space() - p.arithmExpr(x.Y, compact, false) + p.arithmExpr(expr.Y, compact, false) } case *UnaryArithm: - if x.Post { - p.arithmExpr(x.X, compact, spacePlusMinus) - p.WriteString(x.Op.String()) + if expr.Post { + p.arithmExpr(expr.X, compact, spacePlusMinus) + p.WriteString(expr.Op.String()) } else { if spacePlusMinus { - switch x.Op { + switch expr.Op { case Plus, Minus: p.space() } } - p.WriteString(x.Op.String()) - p.arithmExpr(x.X, compact, false) + p.WriteString(expr.Op.String()) + p.arithmExpr(expr.X, compact, false) } case *ParenArithm: p.WriteByte('(') - p.arithmExpr(x.X, false, false) + p.arithmExpr(expr.X, false, false) p.WriteByte(')') } } @@ -877,33 +877,33 @@ func (p *Printer) testExpr(expr TestExpr) { func (p *Printer) testExprSameLine(expr TestExpr) { p.advanceLine(expr.Pos().Line()) - switch x := expr.(type) { + switch expr := expr.(type) { case *Word: - p.word(x) + p.word(expr) case *BinaryTest: - p.testExprSameLine(x.X) + p.testExprSameLine(expr.X) p.space() - p.WriteString(x.Op.String()) - switch x.Op { + p.WriteString(expr.Op.String()) + switch expr.Op { case AndTest, OrTest: p.wantSpace = spaceRequired - p.testExpr(x.Y) + p.testExpr(expr.Y) default: p.space() - p.testExprSameLine(x.Y) + p.testExprSameLine(expr.Y) } case *UnaryTest: - p.WriteString(x.Op.String()) + p.WriteString(expr.Op.String()) p.space() - p.testExprSameLine(x.X) + p.testExprSameLine(expr.X) case *ParenTest: p.WriteByte('(') - if startsWithLparen(x.X) { + if startsWithLparen(expr.X) { p.wantSpace = spaceRequired } else { p.wantSpace = spaceNotRequired } - p.testExpr(x.X) + p.testExpr(expr.X) p.WriteByte(')') } } @@ -915,16 +915,16 @@ func (p *Printer) word(w *Word) { func (p *Printer) unquotedWord(w *Word) { for _, wp := range w.Parts { - switch x := wp.(type) { + switch wp := wp.(type) { case *SglQuoted: - p.writeLit(x.Value) + p.writeLit(wp.Value) case *DblQuoted: - p.wordParts(x.Parts, true) + p.wordParts(wp.Parts, true) case *Lit: - for i := 0; i < len(x.Value); i++ { - if b := x.Value[i]; b == '\\' { - if i++; i < len(x.Value) { - p.WriteByte(x.Value[i]) + for i := 0; i < len(wp.Value); i++ { + if b := wp.Value[i]; b == '\\' { + if i++; i < len(wp.Value) { + p.WriteByte(wp.Value[i]) } } else { p.WriteByte(b) @@ -1058,54 +1058,62 @@ func (p *Printer) stmt(s *Stmt) { p.decLevel() } +func (p *Printer) printRedirsUntil(redirs []*Redirect, startRedirs int, pos Pos) int { + for _, r := range redirs[startRedirs:] { + if r.Pos().After(pos) || r.Op == Hdoc || r.Op == DashHdoc { + break + } + if p.wantSpace == spaceRequired { + p.spacePad(r.Pos()) + } + if r.N != nil { + p.writeLit(r.N.Value) + } + p.WriteString(r.Op.String()) + if p.spaceRedirects && (r.Op != DplIn && r.Op != DplOut) { + p.space() + } else { + p.wantSpace = spaceRequired + } + p.word(r.Word) + startRedirs++ + } + return startRedirs +} + func (p *Printer) command(cmd Command, redirs []*Redirect) (startRedirs int) { p.advanceLine(cmd.Pos().Line()) p.spacePad(cmd.Pos()) - switch x := cmd.(type) { + switch cmd := cmd.(type) { case *CallExpr: - p.assigns(x.Assigns) - if len(x.Args) <= 1 { - p.wordJoin(x.Args) - return 0 - } - p.wordJoin(x.Args[:1]) - for _, r := range redirs { - if r.Pos().After(x.Args[1].Pos()) || r.Op == Hdoc || r.Op == DashHdoc { - break - } - if p.wantSpace == spaceRequired { - p.spacePad(r.Pos()) - } - if r.N != nil { - p.writeLit(r.N.Value) - } - p.WriteString(r.Op.String()) - if p.spaceRedirects && (r.Op != DplIn && r.Op != DplOut) { - p.space() - } else { - p.wantSpace = spaceRequired - } - p.word(r.Word) - startRedirs++ + p.assigns(cmd.Assigns) + if len(cmd.Args) > 0 { + startRedirs = p.printRedirsUntil(redirs, startRedirs, cmd.Args[0].Pos()) + } + if len(cmd.Args) <= 1 { + p.wordJoin(cmd.Args) + return startRedirs } - p.wordJoin(x.Args[1:]) + p.wordJoin(cmd.Args[:1]) + startRedirs = p.printRedirsUntil(redirs, startRedirs, cmd.Args[1].Pos()) + p.wordJoin(cmd.Args[1:]) case *Block: p.WriteByte('{') p.wantSpace = spaceRequired // Forbid "foo()\n{ bar; }" p.wantNewline = p.wantNewline || p.funcNextLine - p.nestedStmts(x.Stmts, x.Last, x.Rbrace) - p.semiRsrv("}", x.Rbrace) + p.nestedStmts(cmd.Stmts, cmd.Last, cmd.Rbrace) + p.semiRsrv("}", cmd.Rbrace) case *IfClause: - p.ifClause(x, false) + p.ifClause(cmd, false) case *Subshell: p.WriteByte('(') - stmts := x.Stmts + stmts := cmd.Stmts if len(stmts) > 0 && startsWithLparen(stmts[0]) { p.wantSpace = spaceRequired // Add a space between nested parentheses if we're printing them in a single line, // to avoid the ambiguity between `((` and `( (`. - if (x.Lparen.Line() != stmts[0].Pos().Line() || len(stmts) > 1) && !p.singleLine { + if (cmd.Lparen.Line() != stmts[0].Pos().Line() || len(stmts) > 1) && !p.singleLine { p.wantSpace = spaceNotRequired if p.minify { @@ -1116,38 +1124,38 @@ func (p *Printer) command(cmd Command, redirs []*Redirect) (startRedirs int) { p.wantSpace = spaceNotRequired } - p.spacePad(stmtsPos(x.Stmts, x.Last)) - p.nestedStmts(x.Stmts, x.Last, x.Rparen) + p.spacePad(stmtsPos(cmd.Stmts, cmd.Last)) + p.nestedStmts(cmd.Stmts, cmd.Last, cmd.Rparen) p.wantSpace = spaceNotRequired - p.spacePad(x.Rparen) - p.rightParen(x.Rparen) + p.spacePad(cmd.Rparen) + p.rightParen(cmd.Rparen) case *WhileClause: - if x.Until { - p.spacedString("until", x.Pos()) + if cmd.Until { + p.spacedString("until", cmd.Pos()) } else { - p.spacedString("while", x.Pos()) + p.spacedString("while", cmd.Pos()) } - p.nestedStmts(x.Cond, x.CondLast, Pos{}) - p.semiOrNewl("do", x.DoPos) - p.nestedStmts(x.Do, x.DoLast, x.DonePos) - p.semiRsrv("done", x.DonePos) + p.nestedStmts(cmd.Cond, cmd.CondLast, Pos{}) + p.semiOrNewl("do", cmd.DoPos) + p.nestedStmts(cmd.Do, cmd.DoLast, cmd.DonePos) + p.semiRsrv("done", cmd.DonePos) case *ForClause: - if x.Select { + if cmd.Select { p.WriteString("select ") } else { p.WriteString("for ") } - p.loop(x.Loop) - p.semiOrNewl("do", x.DoPos) - p.nestedStmts(x.Do, x.DoLast, x.DonePos) - p.semiRsrv("done", x.DonePos) + p.loop(cmd.Loop) + p.semiOrNewl("do", cmd.DoPos) + p.nestedStmts(cmd.Do, cmd.DoLast, cmd.DonePos) + p.semiRsrv("done", cmd.DonePos) case *BinaryCmd: - p.stmt(x.X) - if p.minify || p.singleLine || x.Y.Pos().Line() <= p.line { + p.stmt(cmd.X) + if p.minify || p.singleLine || cmd.Y.Pos().Line() <= p.line { // leave p.nestedBinary untouched - p.spacedToken(x.Op.String(), x.OpPos) - p.advanceLine(x.Y.Pos().Line()) - p.stmt(x.Y) + p.spacedToken(cmd.Op.String(), cmd.OpPos) + p.advanceLine(cmd.Y.Pos().Line()) + p.stmt(cmd.Y) break } indent := !p.nestedBinary @@ -1158,60 +1166,60 @@ func (p *Printer) command(cmd Command, redirs []*Redirect) (startRedirs int) { if len(p.pendingHdocs) == 0 { p.bslashNewl() } - p.spacedToken(x.Op.String(), x.OpPos) - if len(x.Y.Comments) > 0 { + p.spacedToken(cmd.Op.String(), cmd.OpPos) + if len(cmd.Y.Comments) > 0 { p.wantSpace = spaceNotRequired - p.newline(x.Y.Pos()) + p.newline(cmd.Y.Pos()) p.indent() - p.comments(x.Y.Comments...) + p.comments(cmd.Y.Comments...) p.newline(Pos{}) p.indent() } } else { - p.spacedToken(x.Op.String(), x.OpPos) - p.advanceLine(x.OpPos.Line()) - p.comments(x.Y.Comments...) + p.spacedToken(cmd.Op.String(), cmd.OpPos) + p.advanceLine(cmd.OpPos.Line()) + p.comments(cmd.Y.Comments...) p.newline(Pos{}) p.indent() } - p.advanceLine(x.Y.Pos().Line()) - _, p.nestedBinary = x.Y.Cmd.(*BinaryCmd) - p.stmt(x.Y) + p.advanceLine(cmd.Y.Pos().Line()) + _, p.nestedBinary = cmd.Y.Cmd.(*BinaryCmd) + p.stmt(cmd.Y) if indent { p.decLevel() } p.nestedBinary = false case *FuncDecl: - if x.RsrvWord { + if cmd.RsrvWord { p.WriteString("function ") } - p.writeLit(x.Name.Value) - if !x.RsrvWord || x.Parens { + p.writeLit(cmd.Name.Value) + if !cmd.RsrvWord || cmd.Parens { p.WriteString("()") } if p.funcNextLine { p.newline(Pos{}) p.indent() - } else if !x.Parens || !p.minify { + } else if !cmd.Parens || !p.minify { p.space() } - p.advanceLine(x.Body.Pos().Line()) - p.comments(x.Body.Comments...) - p.stmt(x.Body) + p.advanceLine(cmd.Body.Pos().Line()) + p.comments(cmd.Body.Comments...) + p.stmt(cmd.Body) case *CaseClause: p.WriteString("case ") - p.word(x.Word) + p.word(cmd.Word) p.WriteString(" in") - p.advanceLine(x.In.Line()) + p.advanceLine(cmd.In.Line()) p.wantSpace = spaceRequired if p.swtCaseIndent { p.incLevel() } - if len(x.Items) == 0 { + if len(cmd.Items) == 0 { // Apparently "case x in; esac" is invalid shell. p.mustNewline = true } - for i, ci := range x.Items { + for i, ci := range cmd.Items { var last []Comment for i, c := range ci.Comments { if c.Pos().After(ci.Pos()) { @@ -1236,7 +1244,7 @@ func (p *Printer) command(cmd Command, redirs []*Redirect) (startRedirs int) { (bodyEnd.IsValid() && ci.OpPos.Line() > bodyEnd.Line()) p.nestedStmts(ci.Stmts, ci.Last, ci.OpPos) p.level++ - if !p.minify || i != len(x.Items)-1 { + if !p.minify || i != len(cmd.Items)-1 { if sep { p.newlines(ci.OpPos) p.wantNewline = true @@ -1250,58 +1258,58 @@ func (p *Printer) command(cmd Command, redirs []*Redirect) (startRedirs int) { p.flushComments() p.level-- } - p.comments(x.Last...) + p.comments(cmd.Last...) if p.swtCaseIndent { p.flushComments() p.decLevel() } - p.semiRsrv("esac", x.Esac) + p.semiRsrv("esac", cmd.Esac) case *ArithmCmd: p.WriteString("((") - if x.Unsigned { + if cmd.Unsigned { p.WriteString("# ") } - p.arithmExpr(x.X, false, false) + p.arithmExpr(cmd.X, false, false) p.WriteString("))") case *TestClause: p.WriteString("[[ ") p.incLevel() - p.testExpr(x.X) + p.testExpr(cmd.X) p.decLevel() - p.spacedString("]]", x.Right) + p.spacedString("]]", cmd.Right) case *DeclClause: - p.spacedString(x.Variant.Value, x.Pos()) - p.assigns(x.Args) + p.spacedString(cmd.Variant.Value, cmd.Pos()) + p.assigns(cmd.Args) case *TimeClause: - p.spacedString("time", x.Pos()) - if x.PosixFormat { - p.spacedString("-p", x.Pos()) + p.spacedString("time", cmd.Pos()) + if cmd.PosixFormat { + p.spacedString("-p", cmd.Pos()) } - if x.Stmt != nil { - p.stmt(x.Stmt) + if cmd.Stmt != nil { + p.stmt(cmd.Stmt) } case *CoprocClause: - p.spacedString("coproc", x.Pos()) - if x.Name != nil { + p.spacedString("coproc", cmd.Pos()) + if cmd.Name != nil { p.space() - p.word(x.Name) + p.word(cmd.Name) } p.space() - p.stmt(x.Stmt) + p.stmt(cmd.Stmt) case *LetClause: - p.spacedString("let", x.Pos()) - for _, n := range x.Exprs { + p.spacedString("let", cmd.Pos()) + for _, n := range cmd.Exprs { p.space() p.arithmExpr(n, true, false) } case *TestDecl: - p.spacedString("@test", x.Pos()) + p.spacedString("@test", cmd.Pos()) p.space() - p.word(x.Description) + p.word(cmd.Description) p.space() - p.stmt(x.Body) + p.stmt(cmd.Body) default: - panic(fmt.Sprintf("syntax.Printer: unexpected node type %T", x)) + panic(fmt.Sprintf("syntax.Printer: unexpected node type %T", cmd)) } return startRedirs } diff --git a/vendor/mvdan.cc/sh/v3/syntax/simplify.go b/vendor/mvdan.cc/sh/v3/syntax/simplify.go index e82fd55a..34059c62 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/simplify.go +++ b/vendor/mvdan.cc/sh/v3/syntax/simplify.go @@ -27,63 +27,63 @@ type simplifier struct { } func (s *simplifier) visit(node Node) bool { - switch x := node.(type) { + switch node := node.(type) { case *Assign: - x.Index = s.removeParensArithm(x.Index) + node.Index = s.removeParensArithm(node.Index) // Don't inline params, as x[i] and x[$i] mean // different things when x is an associative // array; the first means "i", the second "$i". case *ParamExp: - x.Index = s.removeParensArithm(x.Index) + node.Index = s.removeParensArithm(node.Index) // don't inline params - same as above. - if x.Slice == nil { + if node.Slice == nil { break } - x.Slice.Offset = s.removeParensArithm(x.Slice.Offset) - x.Slice.Offset = s.inlineSimpleParams(x.Slice.Offset) - x.Slice.Length = s.removeParensArithm(x.Slice.Length) - x.Slice.Length = s.inlineSimpleParams(x.Slice.Length) + node.Slice.Offset = s.removeParensArithm(node.Slice.Offset) + node.Slice.Offset = s.inlineSimpleParams(node.Slice.Offset) + node.Slice.Length = s.removeParensArithm(node.Slice.Length) + node.Slice.Length = s.inlineSimpleParams(node.Slice.Length) case *ArithmExp: - x.X = s.removeParensArithm(x.X) - x.X = s.inlineSimpleParams(x.X) + node.X = s.removeParensArithm(node.X) + node.X = s.inlineSimpleParams(node.X) case *ArithmCmd: - x.X = s.removeParensArithm(x.X) - x.X = s.inlineSimpleParams(x.X) + node.X = s.removeParensArithm(node.X) + node.X = s.inlineSimpleParams(node.X) case *ParenArithm: - x.X = s.removeParensArithm(x.X) - x.X = s.inlineSimpleParams(x.X) + node.X = s.removeParensArithm(node.X) + node.X = s.inlineSimpleParams(node.X) case *BinaryArithm: - x.X = s.inlineSimpleParams(x.X) - x.Y = s.inlineSimpleParams(x.Y) + node.X = s.inlineSimpleParams(node.X) + node.Y = s.inlineSimpleParams(node.Y) case *CmdSubst: - x.Stmts = s.inlineSubshell(x.Stmts) + node.Stmts = s.inlineSubshell(node.Stmts) case *Subshell: - x.Stmts = s.inlineSubshell(x.Stmts) + node.Stmts = s.inlineSubshell(node.Stmts) case *Word: - x.Parts = s.simplifyWord(x.Parts) + node.Parts = s.simplifyWord(node.Parts) case *TestClause: - x.X = s.removeParensTest(x.X) - x.X = s.removeNegateTest(x.X) + node.X = s.removeParensTest(node.X) + node.X = s.removeNegateTest(node.X) case *ParenTest: - x.X = s.removeParensTest(x.X) - x.X = s.removeNegateTest(x.X) + node.X = s.removeParensTest(node.X) + node.X = s.removeNegateTest(node.X) case *BinaryTest: - x.X = s.unquoteParams(x.X) - x.X = s.removeNegateTest(x.X) - if x.Op == TsMatchShort { + node.X = s.unquoteParams(node.X) + node.X = s.removeNegateTest(node.X) + if node.Op == TsMatchShort { s.modified = true - x.Op = TsMatch + node.Op = TsMatch } - switch x.Op { + switch node.Op { case TsMatch, TsNoMatch: // unquoting enables globbing default: - x.Y = s.unquoteParams(x.Y) + node.Y = s.unquoteParams(node.Y) } - x.Y = s.removeNegateTest(x.Y) + node.Y = s.removeNegateTest(node.Y) case *UnaryTest: - x.X = s.unquoteParams(x.X) + node.X = s.unquoteParams(node.X) } return true } diff --git a/vendor/mvdan.cc/sh/v3/syntax/tokens.go b/vendor/mvdan.cc/sh/v3/syntax/tokens.go index 6a64b213..97dec543 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/tokens.go +++ b/vendor/mvdan.cc/sh/v3/syntax/tokens.go @@ -312,6 +312,7 @@ const ( TsVarSet // -v TsRefVar // -R TsNot = UnTestOperator(exclMark) // ! + TsParen = UnTestOperator(leftParen) // ( ) type BinTestOperator token diff --git a/vendor/mvdan.cc/sh/v3/syntax/walk.go b/vendor/mvdan.cc/sh/v3/syntax/walk.go index 5be8f9c6..85d66924 100644 --- a/vendor/mvdan.cc/sh/v3/syntax/walk.go +++ b/vendor/mvdan.cc/sh/v3/syntax/walk.go @@ -33,198 +33,198 @@ func Walk(node Node, f func(Node) bool) { return } - switch x := node.(type) { + switch node := node.(type) { case *File: - walkStmts(x.Stmts, x.Last, f) + walkStmts(node.Stmts, node.Last, f) case *Comment: case *Stmt: - for _, c := range x.Comments { - if !x.End().After(c.Pos()) { + for _, c := range node.Comments { + if !node.End().After(c.Pos()) { defer Walk(&c, f) break } Walk(&c, f) } - if x.Cmd != nil { - Walk(x.Cmd, f) + if node.Cmd != nil { + Walk(node.Cmd, f) } - for _, r := range x.Redirs { + for _, r := range node.Redirs { Walk(r, f) } case *Assign: - if x.Name != nil { - Walk(x.Name, f) + if node.Name != nil { + Walk(node.Name, f) } - if x.Value != nil { - Walk(x.Value, f) + if node.Value != nil { + Walk(node.Value, f) } - if x.Index != nil { - Walk(x.Index, f) + if node.Index != nil { + Walk(node.Index, f) } - if x.Array != nil { - Walk(x.Array, f) + if node.Array != nil { + Walk(node.Array, f) } case *Redirect: - if x.N != nil { - Walk(x.N, f) + if node.N != nil { + Walk(node.N, f) } - Walk(x.Word, f) - if x.Hdoc != nil { - Walk(x.Hdoc, f) + Walk(node.Word, f) + if node.Hdoc != nil { + Walk(node.Hdoc, f) } case *CallExpr: - for _, a := range x.Assigns { + for _, a := range node.Assigns { Walk(a, f) } - walkWords(x.Args, f) + walkWords(node.Args, f) case *Subshell: - walkStmts(x.Stmts, x.Last, f) + walkStmts(node.Stmts, node.Last, f) case *Block: - walkStmts(x.Stmts, x.Last, f) + walkStmts(node.Stmts, node.Last, f) case *IfClause: - walkStmts(x.Cond, x.CondLast, f) - walkStmts(x.Then, x.ThenLast, f) - if x.Else != nil { - Walk(x.Else, f) + walkStmts(node.Cond, node.CondLast, f) + walkStmts(node.Then, node.ThenLast, f) + if node.Else != nil { + Walk(node.Else, f) } case *WhileClause: - walkStmts(x.Cond, x.CondLast, f) - walkStmts(x.Do, x.DoLast, f) + walkStmts(node.Cond, node.CondLast, f) + walkStmts(node.Do, node.DoLast, f) case *ForClause: - Walk(x.Loop, f) - walkStmts(x.Do, x.DoLast, f) + Walk(node.Loop, f) + walkStmts(node.Do, node.DoLast, f) case *WordIter: - Walk(x.Name, f) - walkWords(x.Items, f) + Walk(node.Name, f) + walkWords(node.Items, f) case *CStyleLoop: - if x.Init != nil { - Walk(x.Init, f) + if node.Init != nil { + Walk(node.Init, f) } - if x.Cond != nil { - Walk(x.Cond, f) + if node.Cond != nil { + Walk(node.Cond, f) } - if x.Post != nil { - Walk(x.Post, f) + if node.Post != nil { + Walk(node.Post, f) } case *BinaryCmd: - Walk(x.X, f) - Walk(x.Y, f) + Walk(node.X, f) + Walk(node.Y, f) case *FuncDecl: - Walk(x.Name, f) - Walk(x.Body, f) + Walk(node.Name, f) + Walk(node.Body, f) case *Word: - for _, wp := range x.Parts { + for _, wp := range node.Parts { Walk(wp, f) } case *Lit: case *SglQuoted: case *DblQuoted: - for _, wp := range x.Parts { + for _, wp := range node.Parts { Walk(wp, f) } case *CmdSubst: - walkStmts(x.Stmts, x.Last, f) + walkStmts(node.Stmts, node.Last, f) case *ParamExp: - Walk(x.Param, f) - if x.Index != nil { - Walk(x.Index, f) + Walk(node.Param, f) + if node.Index != nil { + Walk(node.Index, f) } - if x.Repl != nil { - if x.Repl.Orig != nil { - Walk(x.Repl.Orig, f) + if node.Repl != nil { + if node.Repl.Orig != nil { + Walk(node.Repl.Orig, f) } - if x.Repl.With != nil { - Walk(x.Repl.With, f) + if node.Repl.With != nil { + Walk(node.Repl.With, f) } } - if x.Exp != nil && x.Exp.Word != nil { - Walk(x.Exp.Word, f) + if node.Exp != nil && node.Exp.Word != nil { + Walk(node.Exp.Word, f) } case *ArithmExp: - Walk(x.X, f) + Walk(node.X, f) case *ArithmCmd: - Walk(x.X, f) + Walk(node.X, f) case *BinaryArithm: - Walk(x.X, f) - Walk(x.Y, f) + Walk(node.X, f) + Walk(node.Y, f) case *BinaryTest: - Walk(x.X, f) - Walk(x.Y, f) + Walk(node.X, f) + Walk(node.Y, f) case *UnaryArithm: - Walk(x.X, f) + Walk(node.X, f) case *UnaryTest: - Walk(x.X, f) + Walk(node.X, f) case *ParenArithm: - Walk(x.X, f) + Walk(node.X, f) case *ParenTest: - Walk(x.X, f) + Walk(node.X, f) case *CaseClause: - Walk(x.Word, f) - for _, ci := range x.Items { + Walk(node.Word, f) + for _, ci := range node.Items { Walk(ci, f) } - for _, c := range x.Last { + for _, c := range node.Last { Walk(&c, f) } case *CaseItem: - for _, c := range x.Comments { - if c.Pos().After(x.Pos()) { + for _, c := range node.Comments { + if c.Pos().After(node.Pos()) { defer Walk(&c, f) break } Walk(&c, f) } - walkWords(x.Patterns, f) - walkStmts(x.Stmts, x.Last, f) + walkWords(node.Patterns, f) + walkStmts(node.Stmts, node.Last, f) case *TestClause: - Walk(x.X, f) + Walk(node.X, f) case *DeclClause: - for _, a := range x.Args { + for _, a := range node.Args { Walk(a, f) } case *ArrayExpr: - for _, el := range x.Elems { + for _, el := range node.Elems { Walk(el, f) } - for _, c := range x.Last { + for _, c := range node.Last { Walk(&c, f) } case *ArrayElem: - for _, c := range x.Comments { - if c.Pos().After(x.Pos()) { + for _, c := range node.Comments { + if c.Pos().After(node.Pos()) { defer Walk(&c, f) break } Walk(&c, f) } - if x.Index != nil { - Walk(x.Index, f) + if node.Index != nil { + Walk(node.Index, f) } - if x.Value != nil { - Walk(x.Value, f) + if node.Value != nil { + Walk(node.Value, f) } case *ExtGlob: - Walk(x.Pattern, f) + Walk(node.Pattern, f) case *ProcSubst: - walkStmts(x.Stmts, x.Last, f) + walkStmts(node.Stmts, node.Last, f) case *TimeClause: - if x.Stmt != nil { - Walk(x.Stmt, f) + if node.Stmt != nil { + Walk(node.Stmt, f) } case *CoprocClause: - if x.Name != nil { - Walk(x.Name, f) + if node.Name != nil { + Walk(node.Name, f) } - Walk(x.Stmt, f) + Walk(node.Stmt, f) case *LetClause: - for _, expr := range x.Exprs { + for _, expr := range node.Exprs { Walk(expr, f) } case *TestDecl: - Walk(x.Description, f) - Walk(x.Body, f) + Walk(node.Description, f) + Walk(node.Body, f) default: - panic(fmt.Sprintf("syntax.Walk: unexpected node type %T", x)) + panic(fmt.Sprintf("syntax.Walk: unexpected node type %T", node)) } f(nil) @@ -235,6 +235,7 @@ func Walk(node Node, f func(Node) bool) { func DebugPrint(w io.Writer, node Node) error { p := debugPrinter{out: w} p.print(reflect.ValueOf(node)) + p.printf("\n") return p.err } @@ -308,6 +309,10 @@ func (p *debugPrinter) print(x reflect.Value) { } p.printf("}") default: - p.printf("%#v", x.Interface()) + if s, ok := x.Interface().(fmt.Stringer); ok && !x.IsZero() { + p.printf("%#v (%s)", x.Interface(), s) + } else { + p.printf("%#v", x.Interface()) + } } } diff --git a/vendor/nhooyr.io/websocket/.gitignore b/vendor/nhooyr.io/websocket/.gitignore deleted file mode 100644 index 6961e5c8..00000000 --- a/vendor/nhooyr.io/websocket/.gitignore +++ /dev/null @@ -1 +0,0 @@ -websocket.test diff --git a/vendor/nhooyr.io/websocket/LICENSE.txt b/vendor/nhooyr.io/websocket/LICENSE.txt index b5b5fef3..77b5bef6 100644 --- a/vendor/nhooyr.io/websocket/LICENSE.txt +++ b/vendor/nhooyr.io/websocket/LICENSE.txt @@ -1,21 +1,13 @@ -MIT License - -Copyright (c) 2018 Anmol Sethi - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +Copyright (c) 2023 Anmol Sethi + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/vendor/nhooyr.io/websocket/README.md b/vendor/nhooyr.io/websocket/README.md index df20c581..d663d5d0 100644 --- a/vendor/nhooyr.io/websocket/README.md +++ b/vendor/nhooyr.io/websocket/README.md @@ -1,132 +1 @@ -# websocket - -[![godoc](https://godoc.org/nhooyr.io/websocket?status.svg)](https://pkg.go.dev/nhooyr.io/websocket) -[![coverage](https://img.shields.io/badge/coverage-88%25-success)](https://nhooyrio-websocket-coverage.netlify.app) - -websocket is a minimal and idiomatic WebSocket library for Go. - -## Install - -```bash -go get nhooyr.io/websocket -``` - -## Highlights - -- Minimal and idiomatic API -- First class [context.Context](https://blog.golang.org/context) support -- Fully passes the WebSocket [autobahn-testsuite](https://github.com/crossbario/autobahn-testsuite) -- [Single dependency](https://pkg.go.dev/nhooyr.io/websocket?tab=imports) -- JSON and protobuf helpers in the [wsjson](https://pkg.go.dev/nhooyr.io/websocket/wsjson) and [wspb](https://pkg.go.dev/nhooyr.io/websocket/wspb) subpackages -- Zero alloc reads and writes -- Concurrent writes -- [Close handshake](https://pkg.go.dev/nhooyr.io/websocket#Conn.Close) -- [net.Conn](https://pkg.go.dev/nhooyr.io/websocket#NetConn) wrapper -- [Ping pong](https://pkg.go.dev/nhooyr.io/websocket#Conn.Ping) API -- [RFC 7692](https://tools.ietf.org/html/rfc7692) permessage-deflate compression -- Compile to [Wasm](https://pkg.go.dev/nhooyr.io/websocket#hdr-Wasm) - -## Roadmap - -- [ ] HTTP/2 [#4](https://github.com/nhooyr/websocket/issues/4) - -## Examples - -For a production quality example that demonstrates the complete API, see the -[echo example](./examples/echo). - -For a full stack example, see the [chat example](./examples/chat). - -### Server - -```go -http.HandlerFunc(func (w http.ResponseWriter, r *http.Request) { - c, err := websocket.Accept(w, r, nil) - if err != nil { - // ... - } - defer c.Close(websocket.StatusInternalError, "the sky is falling") - - ctx, cancel := context.WithTimeout(r.Context(), time.Second*10) - defer cancel() - - var v interface{} - err = wsjson.Read(ctx, c, &v) - if err != nil { - // ... - } - - log.Printf("received: %v", v) - - c.Close(websocket.StatusNormalClosure, "") -}) -``` - -### Client - -```go -ctx, cancel := context.WithTimeout(context.Background(), time.Minute) -defer cancel() - -c, _, err := websocket.Dial(ctx, "ws://localhost:8080", nil) -if err != nil { - // ... -} -defer c.Close(websocket.StatusInternalError, "the sky is falling") - -err = wsjson.Write(ctx, c, "hi") -if err != nil { - // ... -} - -c.Close(websocket.StatusNormalClosure, "") -``` - -## Comparison - -### gorilla/websocket - -Advantages of [gorilla/websocket](https://github.com/gorilla/websocket): - -- Mature and widely used -- [Prepared writes](https://pkg.go.dev/github.com/gorilla/websocket#PreparedMessage) -- Configurable [buffer sizes](https://pkg.go.dev/github.com/gorilla/websocket#hdr-Buffers) - -Advantages of nhooyr.io/websocket: - -- Minimal and idiomatic API - - Compare godoc of [nhooyr.io/websocket](https://pkg.go.dev/nhooyr.io/websocket) with [gorilla/websocket](https://pkg.go.dev/github.com/gorilla/websocket) side by side. -- [net.Conn](https://pkg.go.dev/nhooyr.io/websocket#NetConn) wrapper -- Zero alloc reads and writes ([gorilla/websocket#535](https://github.com/gorilla/websocket/issues/535)) -- Full [context.Context](https://blog.golang.org/context) support -- Dial uses [net/http.Client](https://golang.org/pkg/net/http/#Client) - - Will enable easy HTTP/2 support in the future - - Gorilla writes directly to a net.Conn and so duplicates features of net/http.Client. -- Concurrent writes -- Close handshake ([gorilla/websocket#448](https://github.com/gorilla/websocket/issues/448)) -- Idiomatic [ping pong](https://pkg.go.dev/nhooyr.io/websocket#Conn.Ping) API - - Gorilla requires registering a pong callback before sending a Ping -- Can target Wasm ([gorilla/websocket#432](https://github.com/gorilla/websocket/issues/432)) -- Transparent message buffer reuse with [wsjson](https://pkg.go.dev/nhooyr.io/websocket/wsjson) and [wspb](https://pkg.go.dev/nhooyr.io/websocket/wspb) subpackages -- [1.75x](https://github.com/nhooyr/websocket/releases/tag/v1.7.4) faster WebSocket masking implementation in pure Go - - Gorilla's implementation is slower and uses [unsafe](https://golang.org/pkg/unsafe/). -- Full [permessage-deflate](https://tools.ietf.org/html/rfc7692) compression extension support - - Gorilla only supports no context takeover mode - - We use [klauspost/compress](https://github.com/klauspost/compress) for much lower memory usage ([gorilla/websocket#203](https://github.com/gorilla/websocket/issues/203)) -- [CloseRead](https://pkg.go.dev/nhooyr.io/websocket#Conn.CloseRead) helper ([gorilla/websocket#492](https://github.com/gorilla/websocket/issues/492)) -- Actively maintained ([gorilla/websocket#370](https://github.com/gorilla/websocket/issues/370)) - -#### golang.org/x/net/websocket - -[golang.org/x/net/websocket](https://pkg.go.dev/golang.org/x/net/websocket) is deprecated. -See [golang/go/issues/18152](https://github.com/golang/go/issues/18152). - -The [net.Conn](https://pkg.go.dev/nhooyr.io/websocket#NetConn) can help in transitioning -to nhooyr.io/websocket. - -#### gobwas/ws - -[gobwas/ws](https://github.com/gobwas/ws) has an extremely flexible API that allows it to be used -in an event driven style for performance. See the author's [blog post](https://medium.freecodecamp.org/million-websockets-and-go-cc58418460bb). - -However when writing idiomatic Go, nhooyr.io/websocket will be faster and easier to use. +deprecated: Use https://github.com/coder/websocket instead diff --git a/vendor/nhooyr.io/websocket/accept.go b/vendor/nhooyr.io/websocket/accept.go index 18536bdb..e1fd1f4f 100644 --- a/vendor/nhooyr.io/websocket/accept.go +++ b/vendor/nhooyr.io/websocket/accept.go @@ -1,3 +1,4 @@ +//go:build !js // +build !js package websocket @@ -20,6 +21,8 @@ import ( ) // AcceptOptions represents Accept's options. +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. type AcceptOptions struct { // Subprotocols lists the WebSocket subprotocols that Accept will negotiate with the client. // The empty subprotocol will always be negotiated as per RFC 6455. If you would like to @@ -51,7 +54,7 @@ type AcceptOptions struct { OriginPatterns []string // CompressionMode controls the compression mode. - // Defaults to CompressionNoContextTakeover. + // Defaults to CompressionDisabled. // // See docs on CompressionMode for details. CompressionMode CompressionMode @@ -63,9 +66,19 @@ type AcceptOptions struct { CompressionThreshold int } +func (opts *AcceptOptions) cloneWithDefaults() *AcceptOptions { + var o AcceptOptions + if opts != nil { + o = *opts + } + return &o +} + // Accept accepts a WebSocket handshake from a client and upgrades the // the connection to a WebSocket. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // Accept will not allow cross origin requests by default. // See the InsecureSkipVerify and OriginPatterns options to allow cross origin requests. // @@ -77,17 +90,13 @@ func Accept(w http.ResponseWriter, r *http.Request, opts *AcceptOptions) (*Conn, func accept(w http.ResponseWriter, r *http.Request, opts *AcceptOptions) (_ *Conn, err error) { defer errd.Wrap(&err, "failed to accept WebSocket connection") - if opts == nil { - opts = &AcceptOptions{} - } - opts = &*opts - errCode, err := verifyClientRequest(w, r) if err != nil { http.Error(w, err.Error(), errCode) return nil, err } + opts = opts.cloneWithDefaults() if !opts.InsecureSkipVerify { err = authenticateOrigin(r, opts.OriginPatterns) if err != nil { @@ -118,9 +127,9 @@ func accept(w http.ResponseWriter, r *http.Request, opts *AcceptOptions) (_ *Con w.Header().Set("Sec-WebSocket-Protocol", subproto) } - copts, err := acceptCompression(r, w, opts.CompressionMode) - if err != nil { - return nil, err + copts, ok := selectDeflate(websocketExtensions(r.Header), opts.CompressionMode) + if ok { + w.Header().Set("Sec-WebSocket-Extensions", copts.String()) } w.WriteHeader(http.StatusSwitchingProtocols) @@ -180,10 +189,21 @@ func verifyClientRequest(w http.ResponseWriter, r *http.Request) (errCode int, _ return http.StatusBadRequest, fmt.Errorf("unsupported WebSocket protocol version (only 13 is supported): %q", r.Header.Get("Sec-WebSocket-Version")) } - if r.Header.Get("Sec-WebSocket-Key") == "" { + websocketSecKeys := r.Header.Values("Sec-WebSocket-Key") + if len(websocketSecKeys) == 0 { return http.StatusBadRequest, errors.New("WebSocket protocol violation: missing Sec-WebSocket-Key") } + if len(websocketSecKeys) > 1 { + return http.StatusBadRequest, errors.New("WebSocket protocol violation: multiple Sec-WebSocket-Key headers") + } + + // The RFC states to remove any leading or trailing whitespace. + websocketSecKey := strings.TrimSpace(websocketSecKeys[0]) + if v, err := base64.StdEncoding.DecodeString(websocketSecKey); err != nil || len(v) != 16 { + return http.StatusBadRequest, fmt.Errorf("WebSocket protocol violation: invalid Sec-WebSocket-Key %q, must be a 16 byte base64 encoded string", websocketSecKey) + } + return 0, nil } @@ -211,7 +231,10 @@ func authenticateOrigin(r *http.Request, originHosts []string) error { return nil } } - return fmt.Errorf("request Origin %q is not authorized for Host %q", origin, r.Host) + if u.Host == "" { + return fmt.Errorf("request Origin %q is not a valid URL with a host", origin) + } + return fmt.Errorf("request Origin %q is not authorized for Host %q", u.Host, r.Host) } func match(pattern, s string) (bool, error) { @@ -230,26 +253,26 @@ func selectSubprotocol(r *http.Request, subprotocols []string) string { return "" } -func acceptCompression(r *http.Request, w http.ResponseWriter, mode CompressionMode) (*compressionOptions, error) { +func selectDeflate(extensions []websocketExtension, mode CompressionMode) (*compressionOptions, bool) { if mode == CompressionDisabled { - return nil, nil + return nil, false } - - for _, ext := range websocketExtensions(r.Header) { + for _, ext := range extensions { switch ext.name { + // We used to implement x-webkit-deflate-frame too for Safari but Safari has bugs... + // See https://github.com/nhooyr/websocket/issues/218 case "permessage-deflate": - return acceptDeflate(w, ext, mode) - // Disabled for now, see https://github.com/nhooyr/websocket/issues/218 - // case "x-webkit-deflate-frame": - // return acceptWebkitDeflate(w, ext, mode) + copts, ok := acceptDeflate(ext, mode) + if ok { + return copts, true + } } } - return nil, nil + return nil, false } -func acceptDeflate(w http.ResponseWriter, ext websocketExtension, mode CompressionMode) (*compressionOptions, error) { +func acceptDeflate(ext websocketExtension, mode CompressionMode) (*compressionOptions, bool) { copts := mode.opts() - for _, p := range ext.params { switch p { case "client_no_context_takeover": @@ -258,55 +281,18 @@ func acceptDeflate(w http.ResponseWriter, ext websocketExtension, mode Compressi case "server_no_context_takeover": copts.serverNoContextTakeover = true continue - } - - if strings.HasPrefix(p, "client_max_window_bits") { - // We cannot adjust the read sliding window so cannot make use of this. + case "client_max_window_bits", + "server_max_window_bits=15": continue } - err := fmt.Errorf("unsupported permessage-deflate parameter: %q", p) - http.Error(w, err.Error(), http.StatusBadRequest) - return nil, err - } - - copts.setHeader(w.Header()) - - return copts, nil -} - -func acceptWebkitDeflate(w http.ResponseWriter, ext websocketExtension, mode CompressionMode) (*compressionOptions, error) { - copts := mode.opts() - // The peer must explicitly request it. - copts.serverNoContextTakeover = false - - for _, p := range ext.params { - if p == "no_context_takeover" { - copts.serverNoContextTakeover = true + if strings.HasPrefix(p, "client_max_window_bits=") { + // We can't adjust the deflate window, but decoding with a larger window is acceptable. continue } - - // We explicitly fail on x-webkit-deflate-frame's max_window_bits parameter instead - // of ignoring it as the draft spec is unclear. It says the server can ignore it - // but the server has no way of signalling to the client it was ignored as the parameters - // are set one way. - // Thus us ignoring it would make the client think we understood it which would cause issues. - // See https://tools.ietf.org/html/draft-tyoshino-hybi-websocket-perframe-deflate-06#section-4.1 - // - // Either way, we're only implementing this for webkit which never sends the max_window_bits - // parameter so we don't need to worry about it. - err := fmt.Errorf("unsupported x-webkit-deflate-frame parameter: %q", p) - http.Error(w, err.Error(), http.StatusBadRequest) - return nil, err - } - - s := "x-webkit-deflate-frame" - if copts.clientNoContextTakeover { - s += "; no_context_takeover" + return nil, false } - w.Header().Set("Sec-WebSocket-Extensions", s) - - return copts, nil + return copts, true } func headerContainsTokenIgnoreCase(h http.Header, key, token string) bool { diff --git a/vendor/nhooyr.io/websocket/accept_js.go b/vendor/nhooyr.io/websocket/accept_js.go deleted file mode 100644 index daad4b79..00000000 --- a/vendor/nhooyr.io/websocket/accept_js.go +++ /dev/null @@ -1,20 +0,0 @@ -package websocket - -import ( - "errors" - "net/http" -) - -// AcceptOptions represents Accept's options. -type AcceptOptions struct { - Subprotocols []string - InsecureSkipVerify bool - OriginPatterns []string - CompressionMode CompressionMode - CompressionThreshold int -} - -// Accept is stubbed out for Wasm. -func Accept(w http.ResponseWriter, r *http.Request, opts *AcceptOptions) (*Conn, error) { - return nil, errors.New("unimplemented") -} diff --git a/vendor/nhooyr.io/websocket/close.go b/vendor/nhooyr.io/websocket/close.go index 7cbc19e9..efbc2038 100644 --- a/vendor/nhooyr.io/websocket/close.go +++ b/vendor/nhooyr.io/websocket/close.go @@ -1,12 +1,23 @@ +//go:build !js +// +build !js + package websocket import ( + "context" + "encoding/binary" "errors" "fmt" + "net" + "time" + + "nhooyr.io/websocket/internal/errd" ) // StatusCode represents a WebSocket status code. // https://tools.ietf.org/html/rfc6455#section-7.4 +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. type StatusCode int // https://www.iana.org/assignments/websocket/websocket.xhtml#close-code-number @@ -52,6 +63,8 @@ const ( // CloseError is returned when the connection is closed with a status and reason. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // Use Go 1.13's errors.As to check for this error. // Also see the CloseStatus helper. type CloseError struct { @@ -59,6 +72,7 @@ type CloseError struct { Reason string } +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. func (ce CloseError) Error() string { return fmt.Sprintf("status = %v and reason = %q", ce.Code, ce.Reason) } @@ -66,6 +80,8 @@ func (ce CloseError) Error() string { // CloseStatus is a convenience wrapper around Go 1.13's errors.As to grab // the status code from a CloseError. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // -1 will be returned if the passed error is nil or not a CloseError. func CloseStatus(err error) StatusCode { var ce CloseError @@ -74,3 +90,270 @@ func CloseStatus(err error) StatusCode { } return -1 } + +// Close performs the WebSocket close handshake with the given status code and reason. +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// +// It will write a WebSocket close frame with a timeout of 5s and then wait 5s for +// the peer to send a close frame. +// All data messages received from the peer during the close handshake will be discarded. +// +// The connection can only be closed once. Additional calls to Close +// are no-ops. +// +// The maximum length of reason must be 125 bytes. Avoid sending a dynamic reason. +// +// Close will unblock all goroutines interacting with the connection once +// complete. +func (c *Conn) Close(code StatusCode, reason string) (err error) { + defer errd.Wrap(&err, "failed to close WebSocket") + + if !c.casClosing() { + err = c.waitGoroutines() + if err != nil { + return err + } + return net.ErrClosed + } + defer func() { + if errors.Is(err, net.ErrClosed) { + err = nil + } + }() + + err = c.closeHandshake(code, reason) + + err2 := c.close() + if err == nil && err2 != nil { + err = err2 + } + + err2 = c.waitGoroutines() + if err == nil && err2 != nil { + err = err2 + } + + return err +} + +// CloseNow closes the WebSocket connection without attempting a close handshake. +// Use when you do not want the overhead of the close handshake. +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +func (c *Conn) CloseNow() (err error) { + defer errd.Wrap(&err, "failed to immediately close WebSocket") + + if !c.casClosing() { + err = c.waitGoroutines() + if err != nil { + return err + } + return net.ErrClosed + } + defer func() { + if errors.Is(err, net.ErrClosed) { + err = nil + } + }() + + err = c.close() + + err2 := c.waitGoroutines() + if err == nil && err2 != nil { + err = err2 + } + return err +} + +func (c *Conn) closeHandshake(code StatusCode, reason string) error { + err := c.writeClose(code, reason) + if err != nil { + return err + } + + err = c.waitCloseHandshake() + if CloseStatus(err) != code { + return err + } + return nil +} + +func (c *Conn) writeClose(code StatusCode, reason string) error { + ce := CloseError{ + Code: code, + Reason: reason, + } + + var p []byte + var err error + if ce.Code != StatusNoStatusRcvd { + p, err = ce.bytes() + if err != nil { + return err + } + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + + err = c.writeControl(ctx, opClose, p) + // If the connection closed as we're writing we ignore the error as we might + // have written the close frame, the peer responded and then someone else read it + // and closed the connection. + if err != nil && !errors.Is(err, net.ErrClosed) { + return err + } + return nil +} + +func (c *Conn) waitCloseHandshake() error { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + + err := c.readMu.lock(ctx) + if err != nil { + return err + } + defer c.readMu.unlock() + + for i := int64(0); i < c.msgReader.payloadLength; i++ { + _, err := c.br.ReadByte() + if err != nil { + return err + } + } + + for { + h, err := c.readLoop(ctx) + if err != nil { + return err + } + + for i := int64(0); i < h.payloadLength; i++ { + _, err := c.br.ReadByte() + if err != nil { + return err + } + } + } +} + +func (c *Conn) waitGoroutines() error { + t := time.NewTimer(time.Second * 15) + defer t.Stop() + + select { + case <-c.timeoutLoopDone: + case <-t.C: + return errors.New("failed to wait for timeoutLoop goroutine to exit") + } + + c.closeReadMu.Lock() + closeRead := c.closeReadCtx != nil + c.closeReadMu.Unlock() + if closeRead { + select { + case <-c.closeReadDone: + case <-t.C: + return errors.New("failed to wait for close read goroutine to exit") + } + } + + select { + case <-c.closed: + case <-t.C: + return errors.New("failed to wait for connection to be closed") + } + + return nil +} + +func parseClosePayload(p []byte) (CloseError, error) { + if len(p) == 0 { + return CloseError{ + Code: StatusNoStatusRcvd, + }, nil + } + + if len(p) < 2 { + return CloseError{}, fmt.Errorf("close payload %q too small, cannot even contain the 2 byte status code", p) + } + + ce := CloseError{ + Code: StatusCode(binary.BigEndian.Uint16(p)), + Reason: string(p[2:]), + } + + if !validWireCloseCode(ce.Code) { + return CloseError{}, fmt.Errorf("invalid status code %v", ce.Code) + } + + return ce, nil +} + +// See http://www.iana.org/assignments/websocket/websocket.xhtml#close-code-number +// and https://tools.ietf.org/html/rfc6455#section-7.4.1 +func validWireCloseCode(code StatusCode) bool { + switch code { + case statusReserved, StatusNoStatusRcvd, StatusAbnormalClosure, StatusTLSHandshake: + return false + } + + if code >= StatusNormalClosure && code <= StatusBadGateway { + return true + } + if code >= 3000 && code <= 4999 { + return true + } + + return false +} + +func (ce CloseError) bytes() ([]byte, error) { + p, err := ce.bytesErr() + if err != nil { + err = fmt.Errorf("failed to marshal close frame: %w", err) + ce = CloseError{ + Code: StatusInternalError, + } + p, _ = ce.bytesErr() + } + return p, err +} + +const maxCloseReason = maxControlPayload - 2 + +func (ce CloseError) bytesErr() ([]byte, error) { + if len(ce.Reason) > maxCloseReason { + return nil, fmt.Errorf("reason string max is %v but got %q with length %v", maxCloseReason, ce.Reason, len(ce.Reason)) + } + + if !validWireCloseCode(ce.Code) { + return nil, fmt.Errorf("status code %v cannot be set", ce.Code) + } + + buf := make([]byte, 2+len(ce.Reason)) + binary.BigEndian.PutUint16(buf, uint16(ce.Code)) + copy(buf[2:], ce.Reason) + return buf, nil +} + +func (c *Conn) casClosing() bool { + c.closeMu.Lock() + defer c.closeMu.Unlock() + if !c.closing { + c.closing = true + return true + } + return false +} + +func (c *Conn) isClosed() bool { + select { + case <-c.closed: + return true + default: + return false + } +} diff --git a/vendor/nhooyr.io/websocket/close_notjs.go b/vendor/nhooyr.io/websocket/close_notjs.go deleted file mode 100644 index 4251311d..00000000 --- a/vendor/nhooyr.io/websocket/close_notjs.go +++ /dev/null @@ -1,211 +0,0 @@ -// +build !js - -package websocket - -import ( - "context" - "encoding/binary" - "errors" - "fmt" - "log" - "time" - - "nhooyr.io/websocket/internal/errd" -) - -// Close performs the WebSocket close handshake with the given status code and reason. -// -// It will write a WebSocket close frame with a timeout of 5s and then wait 5s for -// the peer to send a close frame. -// All data messages received from the peer during the close handshake will be discarded. -// -// The connection can only be closed once. Additional calls to Close -// are no-ops. -// -// The maximum length of reason must be 125 bytes. Avoid -// sending a dynamic reason. -// -// Close will unblock all goroutines interacting with the connection once -// complete. -func (c *Conn) Close(code StatusCode, reason string) error { - return c.closeHandshake(code, reason) -} - -func (c *Conn) closeHandshake(code StatusCode, reason string) (err error) { - defer errd.Wrap(&err, "failed to close WebSocket") - - writeErr := c.writeClose(code, reason) - closeHandshakeErr := c.waitCloseHandshake() - - if writeErr != nil { - return writeErr - } - - if CloseStatus(closeHandshakeErr) == -1 { - return closeHandshakeErr - } - - return nil -} - -var errAlreadyWroteClose = errors.New("already wrote close") - -func (c *Conn) writeClose(code StatusCode, reason string) error { - c.closeMu.Lock() - wroteClose := c.wroteClose - c.wroteClose = true - c.closeMu.Unlock() - if wroteClose { - return errAlreadyWroteClose - } - - ce := CloseError{ - Code: code, - Reason: reason, - } - - var p []byte - var marshalErr error - if ce.Code != StatusNoStatusRcvd { - p, marshalErr = ce.bytes() - if marshalErr != nil { - log.Printf("websocket: %v", marshalErr) - } - } - - writeErr := c.writeControl(context.Background(), opClose, p) - if CloseStatus(writeErr) != -1 { - // Not a real error if it's due to a close frame being received. - writeErr = nil - } - - // We do this after in case there was an error writing the close frame. - c.setCloseErr(fmt.Errorf("sent close frame: %w", ce)) - - if marshalErr != nil { - return marshalErr - } - return writeErr -} - -func (c *Conn) waitCloseHandshake() error { - defer c.close(nil) - - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - - err := c.readMu.lock(ctx) - if err != nil { - return err - } - defer c.readMu.unlock() - - if c.readCloseFrameErr != nil { - return c.readCloseFrameErr - } - - for { - h, err := c.readLoop(ctx) - if err != nil { - return err - } - - for i := int64(0); i < h.payloadLength; i++ { - _, err := c.br.ReadByte() - if err != nil { - return err - } - } - } -} - -func parseClosePayload(p []byte) (CloseError, error) { - if len(p) == 0 { - return CloseError{ - Code: StatusNoStatusRcvd, - }, nil - } - - if len(p) < 2 { - return CloseError{}, fmt.Errorf("close payload %q too small, cannot even contain the 2 byte status code", p) - } - - ce := CloseError{ - Code: StatusCode(binary.BigEndian.Uint16(p)), - Reason: string(p[2:]), - } - - if !validWireCloseCode(ce.Code) { - return CloseError{}, fmt.Errorf("invalid status code %v", ce.Code) - } - - return ce, nil -} - -// See http://www.iana.org/assignments/websocket/websocket.xhtml#close-code-number -// and https://tools.ietf.org/html/rfc6455#section-7.4.1 -func validWireCloseCode(code StatusCode) bool { - switch code { - case statusReserved, StatusNoStatusRcvd, StatusAbnormalClosure, StatusTLSHandshake: - return false - } - - if code >= StatusNormalClosure && code <= StatusBadGateway { - return true - } - if code >= 3000 && code <= 4999 { - return true - } - - return false -} - -func (ce CloseError) bytes() ([]byte, error) { - p, err := ce.bytesErr() - if err != nil { - err = fmt.Errorf("failed to marshal close frame: %w", err) - ce = CloseError{ - Code: StatusInternalError, - } - p, _ = ce.bytesErr() - } - return p, err -} - -const maxCloseReason = maxControlPayload - 2 - -func (ce CloseError) bytesErr() ([]byte, error) { - if len(ce.Reason) > maxCloseReason { - return nil, fmt.Errorf("reason string max is %v but got %q with length %v", maxCloseReason, ce.Reason, len(ce.Reason)) - } - - if !validWireCloseCode(ce.Code) { - return nil, fmt.Errorf("status code %v cannot be set", ce.Code) - } - - buf := make([]byte, 2+len(ce.Reason)) - binary.BigEndian.PutUint16(buf, uint16(ce.Code)) - copy(buf[2:], ce.Reason) - return buf, nil -} - -func (c *Conn) setCloseErr(err error) { - c.closeMu.Lock() - c.setCloseErrLocked(err) - c.closeMu.Unlock() -} - -func (c *Conn) setCloseErrLocked(err error) { - if c.closeErr == nil { - c.closeErr = fmt.Errorf("WebSocket closed: %w", err) - } -} - -func (c *Conn) isClosed() bool { - select { - case <-c.closed: - return true - default: - return false - } -} diff --git a/vendor/nhooyr.io/websocket/compress.go b/vendor/nhooyr.io/websocket/compress.go index 80b46d1c..e50ae0b3 100644 --- a/vendor/nhooyr.io/websocket/compress.go +++ b/vendor/nhooyr.io/websocket/compress.go @@ -1,39 +1,235 @@ +//go:build !js +// +build !js + package websocket -// CompressionMode represents the modes available to the deflate extension. +import ( + "compress/flate" + "io" + "sync" +) + +// CompressionMode represents the modes available to the permessage-deflate extension. // See https://tools.ietf.org/html/rfc7692 // -// A compatibility layer is implemented for the older deflate-frame extension used -// by safari. See https://tools.ietf.org/html/draft-tyoshino-hybi-websocket-perframe-deflate-06 -// It will work the same in every way except that we cannot signal to the peer we -// want to use no context takeover on our side, we can only signal that they should. -// It is however currently disabled due to Safari bugs. See https://github.com/nhooyr/websocket/issues/218 +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// +// Works in all modern browsers except Safari which does not implement the permessage-deflate extension. +// +// Compression is only used if the peer supports the mode selected. type CompressionMode int const ( - // CompressionNoContextTakeover grabs a new flate.Reader and flate.Writer as needed - // for every message. This applies to both server and client side. - // - // This means less efficient compression as the sliding window from previous messages - // will not be used but the memory overhead will be lower if the connections - // are long lived and seldom used. + // CompressionDisabled disables the negotiation of the permessage-deflate extension. // - // The message will only be compressed if greater than 512 bytes. - CompressionNoContextTakeover CompressionMode = iota + // This is the default. Do not enable compression without benchmarking for your particular use case first. + CompressionDisabled CompressionMode = iota - // CompressionContextTakeover uses a flate.Reader and flate.Writer per connection. - // This enables reusing the sliding window from previous messages. - // As most WebSocket protocols are repetitive, this can be very efficient. - // It carries an overhead of 8 kB for every connection compared to CompressionNoContextTakeover. + // CompressionContextTakeover compresses each message greater than 128 bytes reusing the 32 KB sliding window from + // previous messages. i.e compression context across messages is preserved. // - // If the peer negotiates NoContextTakeover on the client or server side, it will be - // used instead as this is required by the RFC. + // As most WebSocket protocols are text based and repetitive, this compression mode can be very efficient. + // + // The memory overhead is a fixed 32 KB sliding window, a fixed 1.2 MB flate.Writer and a sync.Pool of 40 KB flate.Reader's + // that are used when reading and then returned. + // + // Thus, it uses more memory than CompressionNoContextTakeover but compresses more efficiently. + // + // If the peer does not support CompressionContextTakeover then we will fall back to CompressionNoContextTakeover. CompressionContextTakeover - // CompressionDisabled disables the deflate extension. + // CompressionNoContextTakeover compresses each message greater than 512 bytes. Each message is compressed with + // a new 1.2 MB flate.Writer pulled from a sync.Pool. Each message is read with a 40 KB flate.Reader pulled from + // a sync.Pool. + // + // This means less efficient compression as the sliding window from previous messages will not be used but the + // memory overhead will be lower as there will be no fixed cost for the flate.Writer nor the 32 KB sliding window. + // Especially if the connections are long lived and seldom written to. + // + // Thus, it uses less memory than CompressionContextTakeover but compresses less efficiently. // - // Use this if you are using a predominantly binary protocol with very - // little duplication in between messages or CPU and memory are more - // important than bandwidth. - CompressionDisabled + // If the peer does not support CompressionNoContextTakeover then we will fall back to CompressionDisabled. + CompressionNoContextTakeover ) + +func (m CompressionMode) opts() *compressionOptions { + return &compressionOptions{ + clientNoContextTakeover: m == CompressionNoContextTakeover, + serverNoContextTakeover: m == CompressionNoContextTakeover, + } +} + +type compressionOptions struct { + clientNoContextTakeover bool + serverNoContextTakeover bool +} + +func (copts *compressionOptions) String() string { + s := "permessage-deflate" + if copts.clientNoContextTakeover { + s += "; client_no_context_takeover" + } + if copts.serverNoContextTakeover { + s += "; server_no_context_takeover" + } + return s +} + +// These bytes are required to get flate.Reader to return. +// They are removed when sending to avoid the overhead as +// WebSocket framing tell's when the message has ended but then +// we need to add them back otherwise flate.Reader keeps +// trying to read more bytes. +const deflateMessageTail = "\x00\x00\xff\xff" + +type trimLastFourBytesWriter struct { + w io.Writer + tail []byte +} + +func (tw *trimLastFourBytesWriter) reset() { + if tw != nil && tw.tail != nil { + tw.tail = tw.tail[:0] + } +} + +func (tw *trimLastFourBytesWriter) Write(p []byte) (int, error) { + if tw.tail == nil { + tw.tail = make([]byte, 0, 4) + } + + extra := len(tw.tail) + len(p) - 4 + + if extra <= 0 { + tw.tail = append(tw.tail, p...) + return len(p), nil + } + + // Now we need to write as many extra bytes as we can from the previous tail. + if extra > len(tw.tail) { + extra = len(tw.tail) + } + if extra > 0 { + _, err := tw.w.Write(tw.tail[:extra]) + if err != nil { + return 0, err + } + + // Shift remaining bytes in tail over. + n := copy(tw.tail, tw.tail[extra:]) + tw.tail = tw.tail[:n] + } + + // If p is less than or equal to 4 bytes, + // all of it is is part of the tail. + if len(p) <= 4 { + tw.tail = append(tw.tail, p...) + return len(p), nil + } + + // Otherwise, only the last 4 bytes are. + tw.tail = append(tw.tail, p[len(p)-4:]...) + + p = p[:len(p)-4] + n, err := tw.w.Write(p) + return n + 4, err +} + +var flateReaderPool sync.Pool + +func getFlateReader(r io.Reader, dict []byte) io.Reader { + fr, ok := flateReaderPool.Get().(io.Reader) + if !ok { + return flate.NewReaderDict(r, dict) + } + fr.(flate.Resetter).Reset(r, dict) + return fr +} + +func putFlateReader(fr io.Reader) { + flateReaderPool.Put(fr) +} + +var flateWriterPool sync.Pool + +func getFlateWriter(w io.Writer) *flate.Writer { + fw, ok := flateWriterPool.Get().(*flate.Writer) + if !ok { + fw, _ = flate.NewWriter(w, flate.BestSpeed) + return fw + } + fw.Reset(w) + return fw +} + +func putFlateWriter(w *flate.Writer) { + flateWriterPool.Put(w) +} + +type slidingWindow struct { + buf []byte +} + +var swPoolMu sync.RWMutex +var swPool = map[int]*sync.Pool{} + +func slidingWindowPool(n int) *sync.Pool { + swPoolMu.RLock() + p, ok := swPool[n] + swPoolMu.RUnlock() + if ok { + return p + } + + p = &sync.Pool{} + + swPoolMu.Lock() + swPool[n] = p + swPoolMu.Unlock() + + return p +} + +func (sw *slidingWindow) init(n int) { + if sw.buf != nil { + return + } + + if n == 0 { + n = 32768 + } + + p := slidingWindowPool(n) + sw2, ok := p.Get().(*slidingWindow) + if ok { + *sw = *sw2 + } else { + sw.buf = make([]byte, 0, n) + } +} + +func (sw *slidingWindow) close() { + sw.buf = sw.buf[:0] + swPoolMu.Lock() + swPool[cap(sw.buf)].Put(sw) + swPoolMu.Unlock() +} + +func (sw *slidingWindow) write(p []byte) { + if len(p) >= cap(sw.buf) { + sw.buf = sw.buf[:cap(sw.buf)] + p = p[len(p)-cap(sw.buf):] + copy(sw.buf, p) + return + } + + left := cap(sw.buf) - len(sw.buf) + if left < len(p) { + // We need to shift spaceNeeded bytes from the end to make room for p at the end. + spaceNeeded := len(p) - left + copy(sw.buf, sw.buf[spaceNeeded:]) + sw.buf = sw.buf[:len(sw.buf)-spaceNeeded] + } + + sw.buf = append(sw.buf, p...) +} diff --git a/vendor/nhooyr.io/websocket/compress_notjs.go b/vendor/nhooyr.io/websocket/compress_notjs.go deleted file mode 100644 index 809a272c..00000000 --- a/vendor/nhooyr.io/websocket/compress_notjs.go +++ /dev/null @@ -1,181 +0,0 @@ -// +build !js - -package websocket - -import ( - "io" - "net/http" - "sync" - - "github.com/klauspost/compress/flate" -) - -func (m CompressionMode) opts() *compressionOptions { - return &compressionOptions{ - clientNoContextTakeover: m == CompressionNoContextTakeover, - serverNoContextTakeover: m == CompressionNoContextTakeover, - } -} - -type compressionOptions struct { - clientNoContextTakeover bool - serverNoContextTakeover bool -} - -func (copts *compressionOptions) setHeader(h http.Header) { - s := "permessage-deflate" - if copts.clientNoContextTakeover { - s += "; client_no_context_takeover" - } - if copts.serverNoContextTakeover { - s += "; server_no_context_takeover" - } - h.Set("Sec-WebSocket-Extensions", s) -} - -// These bytes are required to get flate.Reader to return. -// They are removed when sending to avoid the overhead as -// WebSocket framing tell's when the message has ended but then -// we need to add them back otherwise flate.Reader keeps -// trying to return more bytes. -const deflateMessageTail = "\x00\x00\xff\xff" - -type trimLastFourBytesWriter struct { - w io.Writer - tail []byte -} - -func (tw *trimLastFourBytesWriter) reset() { - if tw != nil && tw.tail != nil { - tw.tail = tw.tail[:0] - } -} - -func (tw *trimLastFourBytesWriter) Write(p []byte) (int, error) { - if tw.tail == nil { - tw.tail = make([]byte, 0, 4) - } - - extra := len(tw.tail) + len(p) - 4 - - if extra <= 0 { - tw.tail = append(tw.tail, p...) - return len(p), nil - } - - // Now we need to write as many extra bytes as we can from the previous tail. - if extra > len(tw.tail) { - extra = len(tw.tail) - } - if extra > 0 { - _, err := tw.w.Write(tw.tail[:extra]) - if err != nil { - return 0, err - } - - // Shift remaining bytes in tail over. - n := copy(tw.tail, tw.tail[extra:]) - tw.tail = tw.tail[:n] - } - - // If p is less than or equal to 4 bytes, - // all of it is is part of the tail. - if len(p) <= 4 { - tw.tail = append(tw.tail, p...) - return len(p), nil - } - - // Otherwise, only the last 4 bytes are. - tw.tail = append(tw.tail, p[len(p)-4:]...) - - p = p[:len(p)-4] - n, err := tw.w.Write(p) - return n + 4, err -} - -var flateReaderPool sync.Pool - -func getFlateReader(r io.Reader, dict []byte) io.Reader { - fr, ok := flateReaderPool.Get().(io.Reader) - if !ok { - return flate.NewReaderDict(r, dict) - } - fr.(flate.Resetter).Reset(r, dict) - return fr -} - -func putFlateReader(fr io.Reader) { - flateReaderPool.Put(fr) -} - -type slidingWindow struct { - buf []byte -} - -var swPoolMu sync.RWMutex -var swPool = map[int]*sync.Pool{} - -func slidingWindowPool(n int) *sync.Pool { - swPoolMu.RLock() - p, ok := swPool[n] - swPoolMu.RUnlock() - if ok { - return p - } - - p = &sync.Pool{} - - swPoolMu.Lock() - swPool[n] = p - swPoolMu.Unlock() - - return p -} - -func (sw *slidingWindow) init(n int) { - if sw.buf != nil { - return - } - - if n == 0 { - n = 32768 - } - - p := slidingWindowPool(n) - buf, ok := p.Get().([]byte) - if ok { - sw.buf = buf[:0] - } else { - sw.buf = make([]byte, 0, n) - } -} - -func (sw *slidingWindow) close() { - if sw.buf == nil { - return - } - - swPoolMu.Lock() - swPool[cap(sw.buf)].Put(sw.buf) - swPoolMu.Unlock() - sw.buf = nil -} - -func (sw *slidingWindow) write(p []byte) { - if len(p) >= cap(sw.buf) { - sw.buf = sw.buf[:cap(sw.buf)] - p = p[len(p)-cap(sw.buf):] - copy(sw.buf, p) - return - } - - left := cap(sw.buf) - len(sw.buf) - if left < len(p) { - // We need to shift spaceNeeded bytes from the end to make room for p at the end. - spaceNeeded := len(p) - left - copy(sw.buf, sw.buf[spaceNeeded:]) - sw.buf = sw.buf[:len(sw.buf)-spaceNeeded] - } - - sw.buf = append(sw.buf, p...) -} diff --git a/vendor/nhooyr.io/websocket/conn.go b/vendor/nhooyr.io/websocket/conn.go index a41808be..2bf221ca 100644 --- a/vendor/nhooyr.io/websocket/conn.go +++ b/vendor/nhooyr.io/websocket/conn.go @@ -1,7 +1,24 @@ +//go:build !js +// +build !js + package websocket +import ( + "bufio" + "context" + "fmt" + "io" + "net" + "runtime" + "strconv" + "sync" + "sync/atomic" +) + // MessageType represents the type of a WebSocket message. // See https://tools.ietf.org/html/rfc6455#section-5.6 +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. type MessageType int // MessageType constants. @@ -11,3 +28,276 @@ const ( // MessageBinary is for binary messages like protobufs. MessageBinary ) + +// Conn represents a WebSocket connection. +// All methods may be called concurrently except for Reader and Read. +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// +// You must always read from the connection. Otherwise control +// frames will not be handled. See Reader and CloseRead. +// +// Be sure to call Close on the connection when you +// are finished with it to release associated resources. +// +// On any error from any method, the connection is closed +// with an appropriate reason. +// +// This applies to context expirations as well unfortunately. +// See https://github.com/nhooyr/websocket/issues/242#issuecomment-633182220 +type Conn struct { + noCopy noCopy + + subprotocol string + rwc io.ReadWriteCloser + client bool + copts *compressionOptions + flateThreshold int + br *bufio.Reader + bw *bufio.Writer + + readTimeout chan context.Context + writeTimeout chan context.Context + timeoutLoopDone chan struct{} + + // Read state. + readMu *mu + readHeaderBuf [8]byte + readControlBuf [maxControlPayload]byte + msgReader *msgReader + + // Write state. + msgWriter *msgWriter + writeFrameMu *mu + writeBuf []byte + writeHeaderBuf [8]byte + writeHeader header + + closeReadMu sync.Mutex + closeReadCtx context.Context + closeReadDone chan struct{} + + closed chan struct{} + closeMu sync.Mutex + closing bool + + pingCounter int32 + activePingsMu sync.Mutex + activePings map[string]chan<- struct{} +} + +type connConfig struct { + subprotocol string + rwc io.ReadWriteCloser + client bool + copts *compressionOptions + flateThreshold int + + br *bufio.Reader + bw *bufio.Writer +} + +func newConn(cfg connConfig) *Conn { + c := &Conn{ + subprotocol: cfg.subprotocol, + rwc: cfg.rwc, + client: cfg.client, + copts: cfg.copts, + flateThreshold: cfg.flateThreshold, + + br: cfg.br, + bw: cfg.bw, + + readTimeout: make(chan context.Context), + writeTimeout: make(chan context.Context), + timeoutLoopDone: make(chan struct{}), + + closed: make(chan struct{}), + activePings: make(map[string]chan<- struct{}), + } + + c.readMu = newMu(c) + c.writeFrameMu = newMu(c) + + c.msgReader = newMsgReader(c) + + c.msgWriter = newMsgWriter(c) + if c.client { + c.writeBuf = extractBufioWriterBuf(c.bw, c.rwc) + } + + if c.flate() && c.flateThreshold == 0 { + c.flateThreshold = 128 + if !c.msgWriter.flateContextTakeover() { + c.flateThreshold = 512 + } + } + + runtime.SetFinalizer(c, func(c *Conn) { + c.close() + }) + + go c.timeoutLoop() + + return c +} + +// Subprotocol returns the negotiated subprotocol. +// An empty string means the default protocol. +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +func (c *Conn) Subprotocol() string { + return c.subprotocol +} + +func (c *Conn) close() error { + c.closeMu.Lock() + defer c.closeMu.Unlock() + + if c.isClosed() { + return net.ErrClosed + } + runtime.SetFinalizer(c, nil) + close(c.closed) + + // Have to close after c.closed is closed to ensure any goroutine that wakes up + // from the connection being closed also sees that c.closed is closed and returns + // closeErr. + err := c.rwc.Close() + // With the close of rwc, these become safe to close. + c.msgWriter.close() + c.msgReader.close() + return err +} + +func (c *Conn) timeoutLoop() { + defer close(c.timeoutLoopDone) + + readCtx := context.Background() + writeCtx := context.Background() + + for { + select { + case <-c.closed: + return + + case writeCtx = <-c.writeTimeout: + case readCtx = <-c.readTimeout: + + case <-readCtx.Done(): + c.close() + return + case <-writeCtx.Done(): + c.close() + return + } + } +} + +func (c *Conn) flate() bool { + return c.copts != nil +} + +// Ping sends a ping to the peer and waits for a pong. +// Use this to measure latency or ensure the peer is responsive. +// Ping must be called concurrently with Reader as it does +// not read from the connection but instead waits for a Reader call +// to read the pong. +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// +// TCP Keepalives should suffice for most use cases. +func (c *Conn) Ping(ctx context.Context) error { + p := atomic.AddInt32(&c.pingCounter, 1) + + err := c.ping(ctx, strconv.Itoa(int(p))) + if err != nil { + return fmt.Errorf("failed to ping: %w", err) + } + return nil +} + +func (c *Conn) ping(ctx context.Context, p string) error { + pong := make(chan struct{}, 1) + + c.activePingsMu.Lock() + c.activePings[p] = pong + c.activePingsMu.Unlock() + + defer func() { + c.activePingsMu.Lock() + delete(c.activePings, p) + c.activePingsMu.Unlock() + }() + + err := c.writeControl(ctx, opPing, []byte(p)) + if err != nil { + return err + } + + select { + case <-c.closed: + return net.ErrClosed + case <-ctx.Done(): + return fmt.Errorf("failed to wait for pong: %w", ctx.Err()) + case <-pong: + return nil + } +} + +type mu struct { + c *Conn + ch chan struct{} +} + +func newMu(c *Conn) *mu { + return &mu{ + c: c, + ch: make(chan struct{}, 1), + } +} + +func (m *mu) forceLock() { + m.ch <- struct{}{} +} + +func (m *mu) tryLock() bool { + select { + case m.ch <- struct{}{}: + return true + default: + return false + } +} + +func (m *mu) lock(ctx context.Context) error { + select { + case <-m.c.closed: + return net.ErrClosed + case <-ctx.Done(): + return fmt.Errorf("failed to acquire lock: %w", ctx.Err()) + case m.ch <- struct{}{}: + // To make sure the connection is certainly alive. + // As it's possible the send on m.ch was selected + // over the receive on closed. + select { + case <-m.c.closed: + // Make sure to release. + m.unlock() + return net.ErrClosed + default: + } + return nil + } +} + +func (m *mu) unlock() { + select { + case <-m.ch: + default: + } +} + +type noCopy struct{} + +func (*noCopy) Lock() {} diff --git a/vendor/nhooyr.io/websocket/conn_notjs.go b/vendor/nhooyr.io/websocket/conn_notjs.go deleted file mode 100644 index 0c85ab77..00000000 --- a/vendor/nhooyr.io/websocket/conn_notjs.go +++ /dev/null @@ -1,265 +0,0 @@ -// +build !js - -package websocket - -import ( - "bufio" - "context" - "errors" - "fmt" - "io" - "runtime" - "strconv" - "sync" - "sync/atomic" -) - -// Conn represents a WebSocket connection. -// All methods may be called concurrently except for Reader and Read. -// -// You must always read from the connection. Otherwise control -// frames will not be handled. See Reader and CloseRead. -// -// Be sure to call Close on the connection when you -// are finished with it to release associated resources. -// -// On any error from any method, the connection is closed -// with an appropriate reason. -type Conn struct { - subprotocol string - rwc io.ReadWriteCloser - client bool - copts *compressionOptions - flateThreshold int - br *bufio.Reader - bw *bufio.Writer - - readTimeout chan context.Context - writeTimeout chan context.Context - - // Read state. - readMu *mu - readHeaderBuf [8]byte - readControlBuf [maxControlPayload]byte - msgReader *msgReader - readCloseFrameErr error - - // Write state. - msgWriterState *msgWriterState - writeFrameMu *mu - writeBuf []byte - writeHeaderBuf [8]byte - writeHeader header - - closed chan struct{} - closeMu sync.Mutex - closeErr error - wroteClose bool - - pingCounter int32 - activePingsMu sync.Mutex - activePings map[string]chan<- struct{} -} - -type connConfig struct { - subprotocol string - rwc io.ReadWriteCloser - client bool - copts *compressionOptions - flateThreshold int - - br *bufio.Reader - bw *bufio.Writer -} - -func newConn(cfg connConfig) *Conn { - c := &Conn{ - subprotocol: cfg.subprotocol, - rwc: cfg.rwc, - client: cfg.client, - copts: cfg.copts, - flateThreshold: cfg.flateThreshold, - - br: cfg.br, - bw: cfg.bw, - - readTimeout: make(chan context.Context), - writeTimeout: make(chan context.Context), - - closed: make(chan struct{}), - activePings: make(map[string]chan<- struct{}), - } - - c.readMu = newMu(c) - c.writeFrameMu = newMu(c) - - c.msgReader = newMsgReader(c) - - c.msgWriterState = newMsgWriterState(c) - if c.client { - c.writeBuf = extractBufioWriterBuf(c.bw, c.rwc) - } - - if c.flate() && c.flateThreshold == 0 { - c.flateThreshold = 128 - if !c.msgWriterState.flateContextTakeover() { - c.flateThreshold = 512 - } - } - - runtime.SetFinalizer(c, func(c *Conn) { - c.close(errors.New("connection garbage collected")) - }) - - go c.timeoutLoop() - - return c -} - -// Subprotocol returns the negotiated subprotocol. -// An empty string means the default protocol. -func (c *Conn) Subprotocol() string { - return c.subprotocol -} - -func (c *Conn) close(err error) { - c.closeMu.Lock() - defer c.closeMu.Unlock() - - if c.isClosed() { - return - } - c.setCloseErrLocked(err) - close(c.closed) - runtime.SetFinalizer(c, nil) - - // Have to close after c.closed is closed to ensure any goroutine that wakes up - // from the connection being closed also sees that c.closed is closed and returns - // closeErr. - c.rwc.Close() - - go func() { - c.msgWriterState.close() - - c.msgReader.close() - }() -} - -func (c *Conn) timeoutLoop() { - readCtx := context.Background() - writeCtx := context.Background() - - for { - select { - case <-c.closed: - return - - case writeCtx = <-c.writeTimeout: - case readCtx = <-c.readTimeout: - - case <-readCtx.Done(): - c.setCloseErr(fmt.Errorf("read timed out: %w", readCtx.Err())) - go c.writeError(StatusPolicyViolation, errors.New("timed out")) - case <-writeCtx.Done(): - c.close(fmt.Errorf("write timed out: %w", writeCtx.Err())) - return - } - } -} - -func (c *Conn) flate() bool { - return c.copts != nil -} - -// Ping sends a ping to the peer and waits for a pong. -// Use this to measure latency or ensure the peer is responsive. -// Ping must be called concurrently with Reader as it does -// not read from the connection but instead waits for a Reader call -// to read the pong. -// -// TCP Keepalives should suffice for most use cases. -func (c *Conn) Ping(ctx context.Context) error { - p := atomic.AddInt32(&c.pingCounter, 1) - - err := c.ping(ctx, strconv.Itoa(int(p))) - if err != nil { - return fmt.Errorf("failed to ping: %w", err) - } - return nil -} - -func (c *Conn) ping(ctx context.Context, p string) error { - pong := make(chan struct{}, 1) - - c.activePingsMu.Lock() - c.activePings[p] = pong - c.activePingsMu.Unlock() - - defer func() { - c.activePingsMu.Lock() - delete(c.activePings, p) - c.activePingsMu.Unlock() - }() - - err := c.writeControl(ctx, opPing, []byte(p)) - if err != nil { - return err - } - - select { - case <-c.closed: - return c.closeErr - case <-ctx.Done(): - err := fmt.Errorf("failed to wait for pong: %w", ctx.Err()) - c.close(err) - return err - case <-pong: - return nil - } -} - -type mu struct { - c *Conn - ch chan struct{} -} - -func newMu(c *Conn) *mu { - return &mu{ - c: c, - ch: make(chan struct{}, 1), - } -} - -func (m *mu) forceLock() { - m.ch <- struct{}{} -} - -func (m *mu) lock(ctx context.Context) error { - select { - case <-m.c.closed: - return m.c.closeErr - case <-ctx.Done(): - err := fmt.Errorf("failed to acquire lock: %w", ctx.Err()) - m.c.close(err) - return err - case m.ch <- struct{}{}: - // To make sure the connection is certainly alive. - // As it's possible the send on m.ch was selected - // over the receive on closed. - select { - case <-m.c.closed: - // Make sure to release. - m.unlock() - return m.c.closeErr - default: - } - return nil - } -} - -func (m *mu) unlock() { - select { - case <-m.ch: - default: - } -} diff --git a/vendor/nhooyr.io/websocket/dial.go b/vendor/nhooyr.io/websocket/dial.go index 7a7787ff..6dd80502 100644 --- a/vendor/nhooyr.io/websocket/dial.go +++ b/vendor/nhooyr.io/websocket/dial.go @@ -1,3 +1,4 @@ +//go:build !js // +build !js package websocket @@ -10,7 +11,6 @@ import ( "encoding/base64" "fmt" "io" - "io/ioutil" "net/http" "net/url" "strings" @@ -21,6 +21,8 @@ import ( ) // DialOptions represents Dial's options. +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. type DialOptions struct { // HTTPClient is used for the connection. // Its Transport must return writable bodies for WebSocket handshakes. @@ -30,11 +32,15 @@ type DialOptions struct { // HTTPHeader specifies the HTTP headers included in the handshake request. HTTPHeader http.Header + // Host optionally overrides the Host HTTP header to send. If empty, the value + // of URL.Host will be used. + Host string + // Subprotocols lists the WebSocket subprotocols to negotiate with the server. Subprotocols []string // CompressionMode controls the compression mode. - // Defaults to CompressionNoContextTakeover. + // Defaults to CompressionDisabled. // // See docs on CompressionMode for details. CompressionMode CompressionMode @@ -46,8 +52,49 @@ type DialOptions struct { CompressionThreshold int } +func (opts *DialOptions) cloneWithDefaults(ctx context.Context) (context.Context, context.CancelFunc, *DialOptions) { + var cancel context.CancelFunc + + var o DialOptions + if opts != nil { + o = *opts + } + if o.HTTPClient == nil { + o.HTTPClient = http.DefaultClient + } + if o.HTTPClient.Timeout > 0 { + ctx, cancel = context.WithTimeout(ctx, o.HTTPClient.Timeout) + + newClient := *o.HTTPClient + newClient.Timeout = 0 + o.HTTPClient = &newClient + } + if o.HTTPHeader == nil { + o.HTTPHeader = http.Header{} + } + newClient := *o.HTTPClient + oldCheckRedirect := o.HTTPClient.CheckRedirect + newClient.CheckRedirect = func(req *http.Request, via []*http.Request) error { + switch req.URL.Scheme { + case "ws": + req.URL.Scheme = "http" + case "wss": + req.URL.Scheme = "https" + } + if oldCheckRedirect != nil { + return oldCheckRedirect(req, via) + } + return nil + } + o.HTTPClient = &newClient + + return ctx, cancel, &o +} + // Dial performs a WebSocket handshake on url. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // The response is the WebSocket handshake response from the server. // You never need to close resp.Body yourself. // @@ -66,26 +113,10 @@ func Dial(ctx context.Context, u string, opts *DialOptions) (*Conn, *http.Respon func dial(ctx context.Context, urls string, opts *DialOptions, rand io.Reader) (_ *Conn, _ *http.Response, err error) { defer errd.Wrap(&err, "failed to WebSocket dial") - if opts == nil { - opts = &DialOptions{} - } - - opts = &*opts - if opts.HTTPClient == nil { - opts.HTTPClient = http.DefaultClient - } else if opts.HTTPClient.Timeout > 0 { - var cancel context.CancelFunc - - ctx, cancel = context.WithTimeout(ctx, opts.HTTPClient.Timeout) + var cancel context.CancelFunc + ctx, cancel, opts = opts.cloneWithDefaults(ctx) + if cancel != nil { defer cancel() - - newClient := *opts.HTTPClient - newClient.Timeout = 0 - opts.HTTPClient = &newClient - } - - if opts.HTTPHeader == nil { - opts.HTTPHeader = http.Header{} } secWebSocketKey, err := secWebSocketKey(rand) @@ -114,9 +145,9 @@ func dial(ctx context.Context, urls string, opts *DialOptions, rand io.Reader) ( }) defer timer.Stop() - b, _ := ioutil.ReadAll(r) + b, _ := io.ReadAll(r) respBody.Close() - resp.Body = ioutil.NopCloser(bytes.NewReader(b)) + resp.Body = io.NopCloser(bytes.NewReader(b)) } }() @@ -157,7 +188,13 @@ func handshakeRequest(ctx context.Context, urls string, opts *DialOptions, copts return nil, fmt.Errorf("unexpected url scheme: %q", u.Scheme) } - req, _ := http.NewRequestWithContext(ctx, "GET", u.String(), nil) + req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil) + if err != nil { + return nil, fmt.Errorf("failed to create new http request: %w", err) + } + if len(opts.Host) > 0 { + req.Host = opts.Host + } req.Header = opts.HTTPHeader.Clone() req.Header.Set("Connection", "Upgrade") req.Header.Set("Upgrade", "websocket") @@ -167,7 +204,7 @@ func handshakeRequest(ctx context.Context, urls string, opts *DialOptions, copts req.Header.Set("Sec-WebSocket-Protocol", strings.Join(opts.Subprotocols, ",")) } if copts != nil { - copts.setHeader(req.Header) + req.Header.Set("Sec-WebSocket-Extensions", copts.String()) } resp, err := opts.HTTPClient.Do(req) @@ -243,7 +280,8 @@ func verifyServerExtensions(copts *compressionOptions, h http.Header) (*compress return nil, fmt.Errorf("WebSocket protcol violation: unsupported extensions from server: %+v", exts[1:]) } - copts = &*copts + _copts := *copts + copts = &_copts for _, p := range ext.params { switch p { @@ -254,6 +292,10 @@ func verifyServerExtensions(copts *compressionOptions, h http.Header) (*compress copts.serverNoContextTakeover = true continue } + if strings.HasPrefix(p, "server_max_window_bits=") { + // We can't adjust the deflate window, but decoding with a larger window is acceptable. + continue + } return nil, fmt.Errorf("unsupported permessage-deflate parameter: %q", p) } diff --git a/vendor/nhooyr.io/websocket/doc.go b/vendor/nhooyr.io/websocket/doc.go index efa920e3..0d8e66ae 100644 --- a/vendor/nhooyr.io/websocket/doc.go +++ b/vendor/nhooyr.io/websocket/doc.go @@ -1,7 +1,10 @@ +//go:build !js // +build !js // Package websocket implements the RFC 6455 WebSocket protocol. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // https://tools.ietf.org/html/rfc6455 // // Use Dial to dial a WebSocket server. @@ -12,11 +15,11 @@ // // The examples are the best way to understand how to correctly use the library. // -// The wsjson and wspb subpackages contain helpers for JSON and protobuf messages. +// The wsjson subpackage contain helpers for JSON and protobuf messages. // // More documentation at https://nhooyr.io/websocket. // -// Wasm +// # Wasm // // The client side supports compiling to Wasm. // It wraps the WebSocket browser API. @@ -25,8 +28,9 @@ // // Some important caveats to be aware of: // -// - Accept always errors out -// - Conn.Ping is no-op -// - HTTPClient, HTTPHeader and CompressionMode in DialOptions are no-op -// - *http.Response from Dial is &http.Response{} with a 101 status code on success +// - Accept always errors out +// - Conn.Ping is no-op +// - Conn.CloseNow is Close(StatusGoingAway, "") +// - HTTPClient, HTTPHeader and CompressionMode in DialOptions are no-op +// - *http.Response from Dial is &http.Response{} with a 101 status code on success package websocket // import "nhooyr.io/websocket" diff --git a/vendor/nhooyr.io/websocket/frame.go b/vendor/nhooyr.io/websocket/frame.go index 2a036f94..d5631863 100644 --- a/vendor/nhooyr.io/websocket/frame.go +++ b/vendor/nhooyr.io/websocket/frame.go @@ -1,3 +1,5 @@ +//go:build !js + package websocket import ( @@ -6,7 +8,6 @@ import ( "fmt" "io" "math" - "math/bits" "nhooyr.io/websocket/internal/errd" ) @@ -170,125 +171,3 @@ func writeFrameHeader(h header, w *bufio.Writer, buf []byte) (err error) { return nil } - -// mask applies the WebSocket masking algorithm to p -// with the given key. -// See https://tools.ietf.org/html/rfc6455#section-5.3 -// -// The returned value is the correctly rotated key to -// to continue to mask/unmask the message. -// -// It is optimized for LittleEndian and expects the key -// to be in little endian. -// -// See https://github.com/golang/go/issues/31586 -func mask(key uint32, b []byte) uint32 { - if len(b) >= 8 { - key64 := uint64(key)<<32 | uint64(key) - - // At some point in the future we can clean these unrolled loops up. - // See https://github.com/golang/go/issues/31586#issuecomment-487436401 - - // Then we xor until b is less than 128 bytes. - for len(b) >= 128 { - v := binary.LittleEndian.Uint64(b) - binary.LittleEndian.PutUint64(b, v^key64) - v = binary.LittleEndian.Uint64(b[8:16]) - binary.LittleEndian.PutUint64(b[8:16], v^key64) - v = binary.LittleEndian.Uint64(b[16:24]) - binary.LittleEndian.PutUint64(b[16:24], v^key64) - v = binary.LittleEndian.Uint64(b[24:32]) - binary.LittleEndian.PutUint64(b[24:32], v^key64) - v = binary.LittleEndian.Uint64(b[32:40]) - binary.LittleEndian.PutUint64(b[32:40], v^key64) - v = binary.LittleEndian.Uint64(b[40:48]) - binary.LittleEndian.PutUint64(b[40:48], v^key64) - v = binary.LittleEndian.Uint64(b[48:56]) - binary.LittleEndian.PutUint64(b[48:56], v^key64) - v = binary.LittleEndian.Uint64(b[56:64]) - binary.LittleEndian.PutUint64(b[56:64], v^key64) - v = binary.LittleEndian.Uint64(b[64:72]) - binary.LittleEndian.PutUint64(b[64:72], v^key64) - v = binary.LittleEndian.Uint64(b[72:80]) - binary.LittleEndian.PutUint64(b[72:80], v^key64) - v = binary.LittleEndian.Uint64(b[80:88]) - binary.LittleEndian.PutUint64(b[80:88], v^key64) - v = binary.LittleEndian.Uint64(b[88:96]) - binary.LittleEndian.PutUint64(b[88:96], v^key64) - v = binary.LittleEndian.Uint64(b[96:104]) - binary.LittleEndian.PutUint64(b[96:104], v^key64) - v = binary.LittleEndian.Uint64(b[104:112]) - binary.LittleEndian.PutUint64(b[104:112], v^key64) - v = binary.LittleEndian.Uint64(b[112:120]) - binary.LittleEndian.PutUint64(b[112:120], v^key64) - v = binary.LittleEndian.Uint64(b[120:128]) - binary.LittleEndian.PutUint64(b[120:128], v^key64) - b = b[128:] - } - - // Then we xor until b is less than 64 bytes. - for len(b) >= 64 { - v := binary.LittleEndian.Uint64(b) - binary.LittleEndian.PutUint64(b, v^key64) - v = binary.LittleEndian.Uint64(b[8:16]) - binary.LittleEndian.PutUint64(b[8:16], v^key64) - v = binary.LittleEndian.Uint64(b[16:24]) - binary.LittleEndian.PutUint64(b[16:24], v^key64) - v = binary.LittleEndian.Uint64(b[24:32]) - binary.LittleEndian.PutUint64(b[24:32], v^key64) - v = binary.LittleEndian.Uint64(b[32:40]) - binary.LittleEndian.PutUint64(b[32:40], v^key64) - v = binary.LittleEndian.Uint64(b[40:48]) - binary.LittleEndian.PutUint64(b[40:48], v^key64) - v = binary.LittleEndian.Uint64(b[48:56]) - binary.LittleEndian.PutUint64(b[48:56], v^key64) - v = binary.LittleEndian.Uint64(b[56:64]) - binary.LittleEndian.PutUint64(b[56:64], v^key64) - b = b[64:] - } - - // Then we xor until b is less than 32 bytes. - for len(b) >= 32 { - v := binary.LittleEndian.Uint64(b) - binary.LittleEndian.PutUint64(b, v^key64) - v = binary.LittleEndian.Uint64(b[8:16]) - binary.LittleEndian.PutUint64(b[8:16], v^key64) - v = binary.LittleEndian.Uint64(b[16:24]) - binary.LittleEndian.PutUint64(b[16:24], v^key64) - v = binary.LittleEndian.Uint64(b[24:32]) - binary.LittleEndian.PutUint64(b[24:32], v^key64) - b = b[32:] - } - - // Then we xor until b is less than 16 bytes. - for len(b) >= 16 { - v := binary.LittleEndian.Uint64(b) - binary.LittleEndian.PutUint64(b, v^key64) - v = binary.LittleEndian.Uint64(b[8:16]) - binary.LittleEndian.PutUint64(b[8:16], v^key64) - b = b[16:] - } - - // Then we xor until b is less than 8 bytes. - for len(b) >= 8 { - v := binary.LittleEndian.Uint64(b) - binary.LittleEndian.PutUint64(b, v^key64) - b = b[8:] - } - } - - // Then we xor until b is less than 4 bytes. - for len(b) >= 4 { - v := binary.LittleEndian.Uint32(b) - binary.LittleEndian.PutUint32(b, v^key) - b = b[4:] - } - - // xor remaining bytes. - for i := range b { - b[i] ^= byte(key) - key = bits.RotateLeft32(key, -8) - } - - return key -} diff --git a/vendor/nhooyr.io/websocket/internal/util/util.go b/vendor/nhooyr.io/websocket/internal/util/util.go new file mode 100644 index 00000000..aa210703 --- /dev/null +++ b/vendor/nhooyr.io/websocket/internal/util/util.go @@ -0,0 +1,15 @@ +package util + +// WriterFunc is used to implement one off io.Writers. +type WriterFunc func(p []byte) (int, error) + +func (f WriterFunc) Write(p []byte) (int, error) { + return f(p) +} + +// ReaderFunc is used to implement one off io.Readers. +type ReaderFunc func(p []byte) (int, error) + +func (f ReaderFunc) Read(p []byte) (int, error) { + return f(p) +} diff --git a/vendor/nhooyr.io/websocket/internal/wsjs/wsjs_js.go b/vendor/nhooyr.io/websocket/internal/wsjs/wsjs_js.go index 26ffb456..11eb59cb 100644 --- a/vendor/nhooyr.io/websocket/internal/wsjs/wsjs_js.go +++ b/vendor/nhooyr.io/websocket/internal/wsjs/wsjs_js.go @@ -1,3 +1,4 @@ +//go:build js // +build js // Package wsjs implements typed access to the browser javascript WebSocket API. @@ -118,8 +119,6 @@ func (c WebSocket) OnMessage(fn func(m MessageEvent)) (remove func()) { Data: data, } fn(me) - - return }) } diff --git a/vendor/nhooyr.io/websocket/internal/xsync/go.go b/vendor/nhooyr.io/websocket/internal/xsync/go.go index 7a61f27f..5229b12a 100644 --- a/vendor/nhooyr.io/websocket/internal/xsync/go.go +++ b/vendor/nhooyr.io/websocket/internal/xsync/go.go @@ -2,6 +2,7 @@ package xsync import ( "fmt" + "runtime/debug" ) // Go allows running a function in another goroutine @@ -13,7 +14,7 @@ func Go(fn func() error) <-chan error { r := recover() if r != nil { select { - case errs <- fmt.Errorf("panic in go fn: %v", r): + case errs <- fmt.Errorf("panic in go fn: %v, %s", r, debug.Stack()): default: } } diff --git a/vendor/nhooyr.io/websocket/make.sh b/vendor/nhooyr.io/websocket/make.sh new file mode 100644 index 00000000..170d00a8 --- /dev/null +++ b/vendor/nhooyr.io/websocket/make.sh @@ -0,0 +1,12 @@ +#!/bin/sh +set -eu +cd -- "$(dirname "$0")" + +echo "=== fmt.sh" +./ci/fmt.sh +echo "=== lint.sh" +./ci/lint.sh +echo "=== test.sh" +./ci/test.sh "$@" +echo "=== bench.sh" +./ci/bench.sh diff --git a/vendor/nhooyr.io/websocket/mask.go b/vendor/nhooyr.io/websocket/mask.go new file mode 100644 index 00000000..7bc0c8d5 --- /dev/null +++ b/vendor/nhooyr.io/websocket/mask.go @@ -0,0 +1,128 @@ +package websocket + +import ( + "encoding/binary" + "math/bits" +) + +// maskGo applies the WebSocket masking algorithm to p +// with the given key. +// See https://tools.ietf.org/html/rfc6455#section-5.3 +// +// The returned value is the correctly rotated key to +// to continue to mask/unmask the message. +// +// It is optimized for LittleEndian and expects the key +// to be in little endian. +// +// See https://github.com/golang/go/issues/31586 +func maskGo(b []byte, key uint32) uint32 { + if len(b) >= 8 { + key64 := uint64(key)<<32 | uint64(key) + + // At some point in the future we can clean these unrolled loops up. + // See https://github.com/golang/go/issues/31586#issuecomment-487436401 + + // Then we xor until b is less than 128 bytes. + for len(b) >= 128 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^key64) + v = binary.LittleEndian.Uint64(b[8:16]) + binary.LittleEndian.PutUint64(b[8:16], v^key64) + v = binary.LittleEndian.Uint64(b[16:24]) + binary.LittleEndian.PutUint64(b[16:24], v^key64) + v = binary.LittleEndian.Uint64(b[24:32]) + binary.LittleEndian.PutUint64(b[24:32], v^key64) + v = binary.LittleEndian.Uint64(b[32:40]) + binary.LittleEndian.PutUint64(b[32:40], v^key64) + v = binary.LittleEndian.Uint64(b[40:48]) + binary.LittleEndian.PutUint64(b[40:48], v^key64) + v = binary.LittleEndian.Uint64(b[48:56]) + binary.LittleEndian.PutUint64(b[48:56], v^key64) + v = binary.LittleEndian.Uint64(b[56:64]) + binary.LittleEndian.PutUint64(b[56:64], v^key64) + v = binary.LittleEndian.Uint64(b[64:72]) + binary.LittleEndian.PutUint64(b[64:72], v^key64) + v = binary.LittleEndian.Uint64(b[72:80]) + binary.LittleEndian.PutUint64(b[72:80], v^key64) + v = binary.LittleEndian.Uint64(b[80:88]) + binary.LittleEndian.PutUint64(b[80:88], v^key64) + v = binary.LittleEndian.Uint64(b[88:96]) + binary.LittleEndian.PutUint64(b[88:96], v^key64) + v = binary.LittleEndian.Uint64(b[96:104]) + binary.LittleEndian.PutUint64(b[96:104], v^key64) + v = binary.LittleEndian.Uint64(b[104:112]) + binary.LittleEndian.PutUint64(b[104:112], v^key64) + v = binary.LittleEndian.Uint64(b[112:120]) + binary.LittleEndian.PutUint64(b[112:120], v^key64) + v = binary.LittleEndian.Uint64(b[120:128]) + binary.LittleEndian.PutUint64(b[120:128], v^key64) + b = b[128:] + } + + // Then we xor until b is less than 64 bytes. + for len(b) >= 64 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^key64) + v = binary.LittleEndian.Uint64(b[8:16]) + binary.LittleEndian.PutUint64(b[8:16], v^key64) + v = binary.LittleEndian.Uint64(b[16:24]) + binary.LittleEndian.PutUint64(b[16:24], v^key64) + v = binary.LittleEndian.Uint64(b[24:32]) + binary.LittleEndian.PutUint64(b[24:32], v^key64) + v = binary.LittleEndian.Uint64(b[32:40]) + binary.LittleEndian.PutUint64(b[32:40], v^key64) + v = binary.LittleEndian.Uint64(b[40:48]) + binary.LittleEndian.PutUint64(b[40:48], v^key64) + v = binary.LittleEndian.Uint64(b[48:56]) + binary.LittleEndian.PutUint64(b[48:56], v^key64) + v = binary.LittleEndian.Uint64(b[56:64]) + binary.LittleEndian.PutUint64(b[56:64], v^key64) + b = b[64:] + } + + // Then we xor until b is less than 32 bytes. + for len(b) >= 32 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^key64) + v = binary.LittleEndian.Uint64(b[8:16]) + binary.LittleEndian.PutUint64(b[8:16], v^key64) + v = binary.LittleEndian.Uint64(b[16:24]) + binary.LittleEndian.PutUint64(b[16:24], v^key64) + v = binary.LittleEndian.Uint64(b[24:32]) + binary.LittleEndian.PutUint64(b[24:32], v^key64) + b = b[32:] + } + + // Then we xor until b is less than 16 bytes. + for len(b) >= 16 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^key64) + v = binary.LittleEndian.Uint64(b[8:16]) + binary.LittleEndian.PutUint64(b[8:16], v^key64) + b = b[16:] + } + + // Then we xor until b is less than 8 bytes. + for len(b) >= 8 { + v := binary.LittleEndian.Uint64(b) + binary.LittleEndian.PutUint64(b, v^key64) + b = b[8:] + } + } + + // Then we xor until b is less than 4 bytes. + for len(b) >= 4 { + v := binary.LittleEndian.Uint32(b) + binary.LittleEndian.PutUint32(b, v^key) + b = b[4:] + } + + // xor remaining bytes. + for i := range b { + b[i] ^= byte(key) + key = bits.RotateLeft32(key, -8) + } + + return key +} diff --git a/vendor/nhooyr.io/websocket/mask_amd64.s b/vendor/nhooyr.io/websocket/mask_amd64.s new file mode 100644 index 00000000..bd42be31 --- /dev/null +++ b/vendor/nhooyr.io/websocket/mask_amd64.s @@ -0,0 +1,127 @@ +#include "textflag.h" + +// func maskAsm(b *byte, len int, key uint32) +TEXT ·maskAsm(SB), NOSPLIT, $0-28 + // AX = b + // CX = len (left length) + // SI = key (uint32) + // DI = uint64(SI) | uint64(SI)<<32 + MOVQ b+0(FP), AX + MOVQ len+8(FP), CX + MOVL key+16(FP), SI + + // calculate the DI + // DI = SI<<32 | SI + MOVL SI, DI + MOVQ DI, DX + SHLQ $32, DI + ORQ DX, DI + + CMPQ CX, $15 + JLE less_than_16 + CMPQ CX, $63 + JLE less_than_64 + CMPQ CX, $128 + JLE sse + TESTQ $31, AX + JNZ unaligned + +unaligned_loop_1byte: + XORB SI, (AX) + INCQ AX + DECQ CX + ROLL $24, SI + TESTQ $7, AX + JNZ unaligned_loop_1byte + + // calculate DI again since SI was modified + // DI = SI<<32 | SI + MOVL SI, DI + MOVQ DI, DX + SHLQ $32, DI + ORQ DX, DI + + TESTQ $31, AX + JZ sse + +unaligned: + TESTQ $7, AX // AND $7 & len, if not zero jump to loop_1b. + JNZ unaligned_loop_1byte + +unaligned_loop: + // we don't need to check the CX since we know it's above 128 + XORQ DI, (AX) + ADDQ $8, AX + SUBQ $8, CX + TESTQ $31, AX + JNZ unaligned_loop + JMP sse + +sse: + CMPQ CX, $0x40 + JL less_than_64 + MOVQ DI, X0 + PUNPCKLQDQ X0, X0 + +sse_loop: + MOVOU 0*16(AX), X1 + MOVOU 1*16(AX), X2 + MOVOU 2*16(AX), X3 + MOVOU 3*16(AX), X4 + PXOR X0, X1 + PXOR X0, X2 + PXOR X0, X3 + PXOR X0, X4 + MOVOU X1, 0*16(AX) + MOVOU X2, 1*16(AX) + MOVOU X3, 2*16(AX) + MOVOU X4, 3*16(AX) + ADDQ $0x40, AX + SUBQ $0x40, CX + CMPQ CX, $0x40 + JAE sse_loop + +less_than_64: + TESTQ $32, CX + JZ less_than_32 + XORQ DI, (AX) + XORQ DI, 8(AX) + XORQ DI, 16(AX) + XORQ DI, 24(AX) + ADDQ $32, AX + +less_than_32: + TESTQ $16, CX + JZ less_than_16 + XORQ DI, (AX) + XORQ DI, 8(AX) + ADDQ $16, AX + +less_than_16: + TESTQ $8, CX + JZ less_than_8 + XORQ DI, (AX) + ADDQ $8, AX + +less_than_8: + TESTQ $4, CX + JZ less_than_4 + XORL SI, (AX) + ADDQ $4, AX + +less_than_4: + TESTQ $2, CX + JZ less_than_2 + XORW SI, (AX) + ROLL $16, SI + ADDQ $2, AX + +less_than_2: + TESTQ $1, CX + JZ done + XORB SI, (AX) + ROLL $24, SI + +done: + MOVL SI, ret+24(FP) + RET diff --git a/vendor/nhooyr.io/websocket/mask_arm64.s b/vendor/nhooyr.io/websocket/mask_arm64.s new file mode 100644 index 00000000..e494b43a --- /dev/null +++ b/vendor/nhooyr.io/websocket/mask_arm64.s @@ -0,0 +1,72 @@ +#include "textflag.h" + +// func maskAsm(b *byte, len int, key uint32) +TEXT ·maskAsm(SB), NOSPLIT, $0-28 + // R0 = b + // R1 = len + // R3 = key (uint32) + // R2 = uint64(key)<<32 | uint64(key) + MOVD b_ptr+0(FP), R0 + MOVD b_len+8(FP), R1 + MOVWU key+16(FP), R3 + MOVD R3, R2 + ORR R2<<32, R2, R2 + VDUP R2, V0.D2 + CMP $64, R1 + BLT less_than_64 + +loop_64: + VLD1 (R0), [V1.B16, V2.B16, V3.B16, V4.B16] + VEOR V1.B16, V0.B16, V1.B16 + VEOR V2.B16, V0.B16, V2.B16 + VEOR V3.B16, V0.B16, V3.B16 + VEOR V4.B16, V0.B16, V4.B16 + VST1.P [V1.B16, V2.B16, V3.B16, V4.B16], 64(R0) + SUBS $64, R1 + CMP $64, R1 + BGE loop_64 + +less_than_64: + CBZ R1, end + TBZ $5, R1, less_than_32 + VLD1 (R0), [V1.B16, V2.B16] + VEOR V1.B16, V0.B16, V1.B16 + VEOR V2.B16, V0.B16, V2.B16 + VST1.P [V1.B16, V2.B16], 32(R0) + +less_than_32: + TBZ $4, R1, less_than_16 + LDP (R0), (R11, R12) + EOR R11, R2, R11 + EOR R12, R2, R12 + STP.P (R11, R12), 16(R0) + +less_than_16: + TBZ $3, R1, less_than_8 + MOVD (R0), R11 + EOR R2, R11, R11 + MOVD.P R11, 8(R0) + +less_than_8: + TBZ $2, R1, less_than_4 + MOVWU (R0), R11 + EORW R2, R11, R11 + MOVWU.P R11, 4(R0) + +less_than_4: + TBZ $1, R1, less_than_2 + MOVHU (R0), R11 + EORW R3, R11, R11 + MOVHU.P R11, 2(R0) + RORW $16, R3 + +less_than_2: + TBZ $0, R1, end + MOVBU (R0), R11 + EORW R3, R11, R11 + MOVBU.P R11, 1(R0) + RORW $8, R3 + +end: + MOVWU R3, ret+24(FP) + RET diff --git a/vendor/nhooyr.io/websocket/mask_asm.go b/vendor/nhooyr.io/websocket/mask_asm.go new file mode 100644 index 00000000..f9484b5b --- /dev/null +++ b/vendor/nhooyr.io/websocket/mask_asm.go @@ -0,0 +1,26 @@ +//go:build amd64 || arm64 + +package websocket + +func mask(b []byte, key uint32) uint32 { + // TODO: Will enable in v1.9.0. + return maskGo(b, key) + /* + if len(b) > 0 { + return maskAsm(&b[0], len(b), key) + } + return key + */ +} + +// @nhooyr: I am not confident that the amd64 or the arm64 implementations of this +// function are perfect. There are almost certainly missing optimizations or +// opportunities for simplification. I'm confident there are no bugs though. +// For example, the arm64 implementation doesn't align memory like the amd64. +// Or the amd64 implementation could use AVX512 instead of just AVX2. +// The AVX2 code I had to disable anyway as it wasn't performing as expected. +// See https://github.com/nhooyr/websocket/pull/326#issuecomment-1771138049 +// +//go:noescape +//lint:ignore U1000 disabled till v1.9.0 +func maskAsm(b *byte, len int, key uint32) uint32 diff --git a/vendor/nhooyr.io/websocket/mask_go.go b/vendor/nhooyr.io/websocket/mask_go.go new file mode 100644 index 00000000..b29435e9 --- /dev/null +++ b/vendor/nhooyr.io/websocket/mask_go.go @@ -0,0 +1,7 @@ +//go:build !amd64 && !arm64 && !js + +package websocket + +func mask(b []byte, key uint32) uint32 { + return maskGo(b, key) +} diff --git a/vendor/nhooyr.io/websocket/netconn.go b/vendor/nhooyr.io/websocket/netconn.go index 64aadf0b..9359bbce 100644 --- a/vendor/nhooyr.io/websocket/netconn.go +++ b/vendor/nhooyr.io/websocket/netconn.go @@ -6,12 +6,14 @@ import ( "io" "math" "net" - "sync" + "sync/atomic" "time" ) // NetConn converts a *websocket.Conn into a net.Conn. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // It's for tunneling arbitrary protocols over WebSockets. // Few users of the library will need this but it's tricky to implement // correctly and so provided in the library. @@ -28,30 +30,64 @@ import ( // // Close will close the *websocket.Conn with StatusNormalClosure. // -// When a deadline is hit, the connection will be closed. This is -// different from most net.Conn implementations where only the -// reading/writing goroutines are interrupted but the connection is kept alive. +// When a deadline is hit and there is an active read or write goroutine, the +// connection will be closed. This is different from most net.Conn implementations +// where only the reading/writing goroutines are interrupted but the connection +// is kept alive. +// +// The Addr methods will return the real addresses for connections obtained +// from websocket.Accept. But for connections obtained from websocket.Dial, a mock net.Addr +// will be returned that gives "websocket" for Network() and "websocket/unknown-addr" for +// String(). This is because websocket.Dial only exposes a io.ReadWriteCloser instead of the +// full net.Conn to us. // -// The Addr methods will return a mock net.Addr that returns "websocket" for Network -// and "websocket/unknown-addr" for String. +// When running as WASM, the Addr methods will always return the mock address described above. // // A received StatusNormalClosure or StatusGoingAway close frame will be translated to // io.EOF when reading. +// +// Furthermore, the ReadLimit is set to -1 to disable it. func NetConn(ctx context.Context, c *Conn, msgType MessageType) net.Conn { + c.SetReadLimit(-1) + nc := &netConn{ c: c, msgType: msgType, + readMu: newMu(c), + writeMu: newMu(c), } - var cancel context.CancelFunc - nc.writeContext, cancel = context.WithCancel(ctx) - nc.writeTimer = time.AfterFunc(math.MaxInt64, cancel) + nc.writeCtx, nc.writeCancel = context.WithCancel(ctx) + nc.readCtx, nc.readCancel = context.WithCancel(ctx) + + nc.writeTimer = time.AfterFunc(math.MaxInt64, func() { + if !nc.writeMu.tryLock() { + // If the lock cannot be acquired, then there is an + // active write goroutine and so we should cancel the context. + nc.writeCancel() + return + } + defer nc.writeMu.unlock() + + // Prevents future writes from writing until the deadline is reset. + atomic.StoreInt64(&nc.writeExpired, 1) + }) if !nc.writeTimer.Stop() { <-nc.writeTimer.C } - nc.readContext, cancel = context.WithCancel(ctx) - nc.readTimer = time.AfterFunc(math.MaxInt64, cancel) + nc.readTimer = time.AfterFunc(math.MaxInt64, func() { + if !nc.readMu.tryLock() { + // If the lock cannot be acquired, then there is an + // active read goroutine and so we should cancel the context. + nc.readCancel() + return + } + defer nc.readMu.unlock() + + // Prevents future reads from reading until the deadline is reset. + atomic.StoreInt64(&nc.readExpired, 1) + }) if !nc.readTimer.Stop() { <-nc.readTimer.C } @@ -60,63 +96,98 @@ func NetConn(ctx context.Context, c *Conn, msgType MessageType) net.Conn { } type netConn struct { + // These must be first to be aligned on 32 bit platforms. + // https://github.com/nhooyr/websocket/pull/438 + readExpired int64 + writeExpired int64 + c *Conn msgType MessageType - writeTimer *time.Timer - writeContext context.Context - - readTimer *time.Timer - readContext context.Context - - readMu sync.Mutex - eofed bool - reader io.Reader + writeTimer *time.Timer + writeMu *mu + writeCtx context.Context + writeCancel context.CancelFunc + + readTimer *time.Timer + readMu *mu + readCtx context.Context + readCancel context.CancelFunc + readEOFed bool + reader io.Reader } var _ net.Conn = &netConn{} -func (c *netConn) Close() error { - return c.c.Close(StatusNormalClosure, "") +func (nc *netConn) Close() error { + nc.writeTimer.Stop() + nc.writeCancel() + nc.readTimer.Stop() + nc.readCancel() + return nc.c.Close(StatusNormalClosure, "") } -func (c *netConn) Write(p []byte) (int, error) { - err := c.c.Write(c.writeContext, c.msgType, p) +func (nc *netConn) Write(p []byte) (int, error) { + nc.writeMu.forceLock() + defer nc.writeMu.unlock() + + if atomic.LoadInt64(&nc.writeExpired) == 1 { + return 0, fmt.Errorf("failed to write: %w", context.DeadlineExceeded) + } + + err := nc.c.Write(nc.writeCtx, nc.msgType, p) if err != nil { return 0, err } return len(p), nil } -func (c *netConn) Read(p []byte) (int, error) { - c.readMu.Lock() - defer c.readMu.Unlock() +func (nc *netConn) Read(p []byte) (int, error) { + nc.readMu.forceLock() + defer nc.readMu.unlock() - if c.eofed { + for { + n, err := nc.read(p) + if err != nil { + return n, err + } + if n == 0 { + continue + } + return n, nil + } +} + +func (nc *netConn) read(p []byte) (int, error) { + if atomic.LoadInt64(&nc.readExpired) == 1 { + return 0, fmt.Errorf("failed to read: %w", context.DeadlineExceeded) + } + + if nc.readEOFed { return 0, io.EOF } - if c.reader == nil { - typ, r, err := c.c.Reader(c.readContext) + if nc.reader == nil { + typ, r, err := nc.c.Reader(nc.readCtx) if err != nil { switch CloseStatus(err) { case StatusNormalClosure, StatusGoingAway: - c.eofed = true + nc.readEOFed = true return 0, io.EOF } return 0, err } - if typ != c.msgType { - err := fmt.Errorf("unexpected frame type read (expected %v): %v", c.msgType, typ) - c.c.Close(StatusUnsupportedData, err.Error()) + if typ != nc.msgType { + err := fmt.Errorf("unexpected frame type read (expected %v): %v", nc.msgType, typ) + nc.c.Close(StatusUnsupportedData, err.Error()) return 0, err } - c.reader = r + nc.reader = r } - n, err := c.reader.Read(p) + n, err := nc.reader.Read(p) if err == io.EOF { - c.reader = nil + nc.reader = nil err = nil } return n, err @@ -133,34 +204,36 @@ func (a websocketAddr) String() string { return "websocket/unknown-addr" } -func (c *netConn) RemoteAddr() net.Addr { - return websocketAddr{} -} - -func (c *netConn) LocalAddr() net.Addr { - return websocketAddr{} -} - -func (c *netConn) SetDeadline(t time.Time) error { - c.SetWriteDeadline(t) - c.SetReadDeadline(t) +func (nc *netConn) SetDeadline(t time.Time) error { + nc.SetWriteDeadline(t) + nc.SetReadDeadline(t) return nil } -func (c *netConn) SetWriteDeadline(t time.Time) error { +func (nc *netConn) SetWriteDeadline(t time.Time) error { + atomic.StoreInt64(&nc.writeExpired, 0) if t.IsZero() { - c.writeTimer.Stop() + nc.writeTimer.Stop() } else { - c.writeTimer.Reset(t.Sub(time.Now())) + dur := time.Until(t) + if dur <= 0 { + dur = 1 + } + nc.writeTimer.Reset(dur) } return nil } -func (c *netConn) SetReadDeadline(t time.Time) error { +func (nc *netConn) SetReadDeadline(t time.Time) error { + atomic.StoreInt64(&nc.readExpired, 0) if t.IsZero() { - c.readTimer.Stop() + nc.readTimer.Stop() } else { - c.readTimer.Reset(t.Sub(time.Now())) + dur := time.Until(t) + if dur <= 0 { + dur = 1 + } + nc.readTimer.Reset(dur) } return nil } diff --git a/vendor/nhooyr.io/websocket/netconn_js.go b/vendor/nhooyr.io/websocket/netconn_js.go new file mode 100644 index 00000000..ccc8c89f --- /dev/null +++ b/vendor/nhooyr.io/websocket/netconn_js.go @@ -0,0 +1,11 @@ +package websocket + +import "net" + +func (nc *netConn) RemoteAddr() net.Addr { + return websocketAddr{} +} + +func (nc *netConn) LocalAddr() net.Addr { + return websocketAddr{} +} diff --git a/vendor/nhooyr.io/websocket/netconn_notjs.go b/vendor/nhooyr.io/websocket/netconn_notjs.go new file mode 100644 index 00000000..f3eb0d66 --- /dev/null +++ b/vendor/nhooyr.io/websocket/netconn_notjs.go @@ -0,0 +1,20 @@ +//go:build !js +// +build !js + +package websocket + +import "net" + +func (nc *netConn) RemoteAddr() net.Addr { + if unc, ok := nc.c.rwc.(net.Conn); ok { + return unc.RemoteAddr() + } + return websocketAddr{} +} + +func (nc *netConn) LocalAddr() net.Addr { + if unc, ok := nc.c.rwc.(net.Conn); ok { + return unc.LocalAddr() + } + return websocketAddr{} +} diff --git a/vendor/nhooyr.io/websocket/read.go b/vendor/nhooyr.io/websocket/read.go index ae05cf93..6ea2d500 100644 --- a/vendor/nhooyr.io/websocket/read.go +++ b/vendor/nhooyr.io/websocket/read.go @@ -1,3 +1,4 @@ +//go:build !js // +build !js package websocket @@ -8,17 +9,20 @@ import ( "errors" "fmt" "io" - "io/ioutil" + "net" "strings" "time" "nhooyr.io/websocket/internal/errd" + "nhooyr.io/websocket/internal/util" "nhooyr.io/websocket/internal/xsync" ) -// Reader reads from the connection until until there is a WebSocket +// Reader reads from the connection until there is a WebSocket // data message to be read. It will handle ping, pong and close frames as appropriate. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // It returns the type of the message and an io.Reader to read it. // The passed context will also bound the reader. // Ensure you read to EOF otherwise the connection will hang. @@ -26,25 +30,34 @@ import ( // Call CloseRead if you do not expect any data messages from the peer. // // Only one Reader may be open at a time. +// +// If you need a separate timeout on the Reader call and the Read itself, +// use time.AfterFunc to cancel the context passed in. +// See https://github.com/nhooyr/websocket/issues/87#issue-451703332 +// Most users should not need this. func (c *Conn) Reader(ctx context.Context) (MessageType, io.Reader, error) { return c.reader(ctx) } // Read is a convenience method around Reader to read a single message // from the connection. +// +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. func (c *Conn) Read(ctx context.Context) (MessageType, []byte, error) { typ, r, err := c.Reader(ctx) if err != nil { return 0, nil, err } - b, err := ioutil.ReadAll(r) + b, err := io.ReadAll(r) return typ, b, err } // CloseRead starts a goroutine to read from the connection until it is closed // or a data message is received. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // Once CloseRead is called you cannot read any messages from the connection. // The returned context will be cancelled when the connection is closed. // @@ -53,12 +66,28 @@ func (c *Conn) Read(ctx context.Context) (MessageType, []byte, error) { // Call CloseRead when you do not expect to read any more messages. // Since it actively reads from the connection, it will ensure that ping, pong and close // frames are responded to. This means c.Ping and c.Close will still work as expected. +// +// This function is idempotent. func (c *Conn) CloseRead(ctx context.Context) context.Context { + c.closeReadMu.Lock() + ctx2 := c.closeReadCtx + if ctx2 != nil { + c.closeReadMu.Unlock() + return ctx2 + } ctx, cancel := context.WithCancel(ctx) + c.closeReadCtx = ctx + c.closeReadDone = make(chan struct{}) + c.closeReadMu.Unlock() + go func() { + defer close(c.closeReadDone) defer cancel() - c.Reader(ctx) - c.Close(StatusPolicyViolation, "unexpected data message") + defer c.close() + _, _, err := c.Reader(ctx) + if err == nil { + c.Close(StatusPolicyViolation, "unexpected data message") + } }() return ctx } @@ -66,13 +95,21 @@ func (c *Conn) CloseRead(ctx context.Context) context.Context { // SetReadLimit sets the max number of bytes to read for a single message. // It applies to the Reader and Read methods. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // By default, the connection has a message read limit of 32768 bytes. // // When the limit is hit, the connection will be closed with StatusMessageTooBig. +// +// Set to -1 to disable. func (c *Conn) SetReadLimit(n int64) { - // We add read one more byte than the limit in case - // there is a fin frame that needs to be read. - c.msgReader.limitReader.limit.Store(n + 1) + if n >= 0 { + // We read one more byte than the limit in case + // there is a fin frame that needs to be read. + n++ + } + + c.msgReader.limitReader.limit.Store(n) } const defaultReadLimit = 32768 @@ -90,13 +127,20 @@ func newMsgReader(c *Conn) *msgReader { func (mr *msgReader) resetFlate() { if mr.flateContextTakeover() { + if mr.dict == nil { + mr.dict = &slidingWindow{} + } mr.dict.init(32768) } if mr.flateBufio == nil { mr.flateBufio = getBufioReader(mr.readFunc) } - mr.flateReader = getFlateReader(mr.flateBufio, mr.dict.buf) + if mr.flateContextTakeover() { + mr.flateReader = getFlateReader(mr.flateBufio, mr.dict.buf) + } else { + mr.flateReader = getFlateReader(mr.flateBufio, nil) + } mr.limitReader.r = mr.flateReader mr.flateTail.Reset(deflateMessageTail) } @@ -111,7 +155,10 @@ func (mr *msgReader) putFlateReader() { func (mr *msgReader) close() { mr.c.readMu.forceLock() mr.putFlateReader() - mr.dict.close() + if mr.dict != nil { + mr.dict.close() + mr.dict = nil + } if mr.flateBufio != nil { putBufioReader(mr.flateBufio) } @@ -181,7 +228,7 @@ func (c *Conn) readLoop(ctx context.Context) (header, error) { func (c *Conn) readFrameHeader(ctx context.Context) (header, error) { select { case <-c.closed: - return header{}, c.closeErr + return header{}, net.ErrClosed case c.readTimeout <- ctx: } @@ -189,18 +236,17 @@ func (c *Conn) readFrameHeader(ctx context.Context) (header, error) { if err != nil { select { case <-c.closed: - return header{}, c.closeErr + return header{}, net.ErrClosed case <-ctx.Done(): return header{}, ctx.Err() default: - c.close(err) return header{}, err } } select { case <-c.closed: - return header{}, c.closeErr + return header{}, net.ErrClosed case c.readTimeout <- context.Background(): } @@ -210,7 +256,7 @@ func (c *Conn) readFrameHeader(ctx context.Context) (header, error) { func (c *Conn) readFramePayload(ctx context.Context, p []byte) (int, error) { select { case <-c.closed: - return 0, c.closeErr + return 0, net.ErrClosed case c.readTimeout <- ctx: } @@ -218,19 +264,17 @@ func (c *Conn) readFramePayload(ctx context.Context, p []byte) (int, error) { if err != nil { select { case <-c.closed: - return n, c.closeErr + return n, net.ErrClosed case <-ctx.Done(): return n, ctx.Err() default: - err = fmt.Errorf("failed to read frame payload: %w", err) - c.close(err) - return n, err + return n, fmt.Errorf("failed to read frame payload: %w", err) } } select { case <-c.closed: - return n, c.closeErr + return n, net.ErrClosed case c.readTimeout <- context.Background(): } @@ -260,7 +304,7 @@ func (c *Conn) handleControl(ctx context.Context, h header) (err error) { } if h.masked { - mask(h.maskKey, b) + mask(b, h.maskKey) } switch h.opcode { @@ -279,9 +323,7 @@ func (c *Conn) handleControl(ctx context.Context, h header) (err error) { return nil } - defer func() { - c.readCloseFrameErr = err - }() + // opClose ce, err := parseClosePayload(b) if err != nil { @@ -291,9 +333,9 @@ func (c *Conn) handleControl(ctx context.Context, h header) (err error) { } err = fmt.Errorf("received close frame: %w", ce) - c.setCloseErr(err) c.writeClose(ce.Code, ce.Reason) - c.close(err) + c.readMu.unlock() + c.close() return err } @@ -307,9 +349,7 @@ func (c *Conn) reader(ctx context.Context) (_ MessageType, _ io.Reader, err erro defer c.readMu.unlock() if !c.msgReader.fin { - err = errors.New("previous message not read to completion") - c.close(fmt.Errorf("failed to get reader: %w", err)) - return 0, nil, err + return 0, nil, errors.New("previous message not read to completion") } h, err := c.readLoop(ctx) @@ -337,14 +377,14 @@ type msgReader struct { flateBufio *bufio.Reader flateTail strings.Reader limitReader *limitReader - dict slidingWindow + dict *slidingWindow fin bool payloadLength int64 maskKey uint32 - // readerFunc(mr.Read) to avoid continuous allocations. - readFunc readerFunc + // util.ReaderFunc(mr.Read) to avoid continuous allocations. + readFunc util.ReaderFunc } func (mr *msgReader) reset(ctx context.Context, h header) { @@ -382,10 +422,9 @@ func (mr *msgReader) Read(p []byte) (n int, err error) { return n, io.EOF } if err != nil { - err = fmt.Errorf("failed to read: %w", err) - mr.c.close(err) + return n, fmt.Errorf("failed to read: %w", err) } - return n, err + return n, nil } func (mr *msgReader) read(p []byte) (int, error) { @@ -424,7 +463,7 @@ func (mr *msgReader) read(p []byte) (int, error) { mr.payloadLength -= int64(n) if !mr.c.client { - mr.maskKey = mask(mr.maskKey, p) + mr.maskKey = mask(p, mr.maskKey) } return n, nil @@ -453,7 +492,11 @@ func (lr *limitReader) reset(r io.Reader) { } func (lr *limitReader) Read(p []byte) (int, error) { - if lr.n <= 0 { + if lr.n < 0 { + return lr.r.Read(p) + } + + if lr.n == 0 { err := fmt.Errorf("read limited at %v bytes", lr.limit.Load()) lr.c.writeError(StatusMessageTooBig, err) return 0, err @@ -464,11 +507,8 @@ func (lr *limitReader) Read(p []byte) (int, error) { } n, err := lr.r.Read(p) lr.n -= int64(n) + if lr.n < 0 { + lr.n = 0 + } return n, err } - -type readerFunc func(p []byte) (int, error) - -func (f readerFunc) Read(p []byte) (int, error) { - return f(p) -} diff --git a/vendor/nhooyr.io/websocket/stringer.go b/vendor/nhooyr.io/websocket/stringer.go index 5a66ba29..f70b623d 100644 --- a/vendor/nhooyr.io/websocket/stringer.go +++ b/vendor/nhooyr.io/websocket/stringer.go @@ -49,6 +49,7 @@ const _MessageType_name = "MessageTextMessageBinary" var _MessageType_index = [...]uint8{0, 11, 24} +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. func (i MessageType) String() string { i -= 1 if i < 0 || i >= MessageType(len(_MessageType_index)-1) { @@ -82,6 +83,7 @@ const _StatusCode_name = "StatusNormalClosureStatusGoingAwayStatusProtocolErrorS var _StatusCode_index = [...]uint16{0, 19, 34, 53, 74, 88, 106, 127, 156, 177, 196, 220, 239, 259, 278, 294, 312} +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. func (i StatusCode) String() string { i -= 1000 if i < 0 || i >= StatusCode(len(_StatusCode_index)-1) { diff --git a/vendor/nhooyr.io/websocket/write.go b/vendor/nhooyr.io/websocket/write.go index 2210cf81..6eaecada 100644 --- a/vendor/nhooyr.io/websocket/write.go +++ b/vendor/nhooyr.io/websocket/write.go @@ -1,3 +1,4 @@ +//go:build !js // +build !js package websocket @@ -10,16 +11,20 @@ import ( "errors" "fmt" "io" + "net" "time" - "github.com/klauspost/compress/flate" + "compress/flate" "nhooyr.io/websocket/internal/errd" + "nhooyr.io/websocket/internal/util" ) // Writer returns a writer bounded by the context that will write // a WebSocket message of type dataType to the connection. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // You must close the writer once you have written the entire message. // // Only one writer can be open at a time, multiple calls will block until the previous writer @@ -34,9 +39,11 @@ func (c *Conn) Writer(ctx context.Context, typ MessageType) (io.WriteCloser, err // Write writes a message to the connection. // +// Deprecated: coder now maintains this library at https://github.com/coder/websocket. +// // See the Writer method if you want to stream a message. // -// If compression is disabled or the threshold is not met, then it +// If compression is disabled or the compression threshold is not met, then it // will write the message in a single frame. func (c *Conn) Write(ctx context.Context, typ MessageType, p []byte) error { _, err := c.write(ctx, typ, p) @@ -47,41 +54,22 @@ func (c *Conn) Write(ctx context.Context, typ MessageType, p []byte) error { } type msgWriter struct { - mw *msgWriterState - closed bool -} - -func (mw *msgWriter) Write(p []byte) (int, error) { - if mw.closed { - return 0, errors.New("cannot use closed writer") - } - return mw.mw.Write(p) -} - -func (mw *msgWriter) Close() error { - if mw.closed { - return errors.New("cannot use closed writer") - } - mw.closed = true - return mw.mw.Close() -} - -type msgWriterState struct { c *Conn mu *mu writeMu *mu + closed bool ctx context.Context opcode opcode flate bool - trimWriter *trimLastFourBytesWriter - dict slidingWindow + trimWriter *trimLastFourBytesWriter + flateWriter *flate.Writer } -func newMsgWriterState(c *Conn) *msgWriterState { - mw := &msgWriterState{ +func newMsgWriter(c *Conn) *msgWriter { + mw := &msgWriter{ c: c, mu: newMu(c), writeMu: newMu(c), @@ -89,18 +77,20 @@ func newMsgWriterState(c *Conn) *msgWriterState { return mw } -func (mw *msgWriterState) ensureFlate() { +func (mw *msgWriter) ensureFlate() { if mw.trimWriter == nil { mw.trimWriter = &trimLastFourBytesWriter{ - w: writerFunc(mw.write), + w: util.WriterFunc(mw.write), } } - mw.dict.init(8192) + if mw.flateWriter == nil { + mw.flateWriter = getFlateWriter(mw.trimWriter) + } mw.flate = true } -func (mw *msgWriterState) flateContextTakeover() bool { +func (mw *msgWriter) flateContextTakeover() bool { if mw.c.client { return !mw.c.copts.clientNoContextTakeover } @@ -108,14 +98,11 @@ func (mw *msgWriterState) flateContextTakeover() bool { } func (c *Conn) writer(ctx context.Context, typ MessageType) (io.WriteCloser, error) { - err := c.msgWriterState.reset(ctx, typ) + err := c.msgWriter.reset(ctx, typ) if err != nil { return nil, err } - return &msgWriter{ - mw: c.msgWriterState, - closed: false, - }, nil + return c.msgWriter, nil } func (c *Conn) write(ctx context.Context, typ MessageType, p []byte) (int, error) { @@ -125,8 +112,8 @@ func (c *Conn) write(ctx context.Context, typ MessageType, p []byte) (int, error } if !c.flate() { - defer c.msgWriterState.mu.unlock() - return c.writeFrame(ctx, true, false, c.msgWriterState.opcode, p) + defer c.msgWriter.mu.unlock() + return c.writeFrame(ctx, true, false, c.msgWriter.opcode, p) } n, err := mw.Write(p) @@ -138,7 +125,7 @@ func (c *Conn) write(ctx context.Context, typ MessageType, p []byte) (int, error return n, err } -func (mw *msgWriterState) reset(ctx context.Context, typ MessageType) error { +func (mw *msgWriter) reset(ctx context.Context, typ MessageType) error { err := mw.mu.lock(ctx) if err != nil { return err @@ -147,24 +134,35 @@ func (mw *msgWriterState) reset(ctx context.Context, typ MessageType) error { mw.ctx = ctx mw.opcode = opcode(typ) mw.flate = false + mw.closed = false mw.trimWriter.reset() return nil } +func (mw *msgWriter) putFlateWriter() { + if mw.flateWriter != nil { + putFlateWriter(mw.flateWriter) + mw.flateWriter = nil + } +} + // Write writes the given bytes to the WebSocket connection. -func (mw *msgWriterState) Write(p []byte) (_ int, err error) { +func (mw *msgWriter) Write(p []byte) (_ int, err error) { err = mw.writeMu.lock(mw.ctx) if err != nil { return 0, fmt.Errorf("failed to write: %w", err) } defer mw.writeMu.unlock() + if mw.closed { + return 0, errors.New("cannot use closed writer") + } + defer func() { if err != nil { err = fmt.Errorf("failed to write: %w", err) - mw.c.close(err) } }() @@ -177,18 +175,13 @@ func (mw *msgWriterState) Write(p []byte) (_ int, err error) { } if mw.flate { - err = flate.StatelessDeflate(mw.trimWriter, p, false, mw.dict.buf) - if err != nil { - return 0, err - } - mw.dict.write(p) - return len(p), nil + return mw.flateWriter.Write(p) } return mw.write(p) } -func (mw *msgWriterState) write(p []byte) (int, error) { +func (mw *msgWriter) write(p []byte) (int, error) { n, err := mw.c.writeFrame(mw.ctx, false, mw.flate, mw.opcode, p) if err != nil { return n, fmt.Errorf("failed to write data frame: %w", err) @@ -198,7 +191,7 @@ func (mw *msgWriterState) write(p []byte) (int, error) { } // Close flushes the frame to the connection. -func (mw *msgWriterState) Close() (err error) { +func (mw *msgWriter) Close() (err error) { defer errd.Wrap(&err, "failed to close writer") err = mw.writeMu.lock(mw.ctx) @@ -207,26 +200,38 @@ func (mw *msgWriterState) Close() (err error) { } defer mw.writeMu.unlock() + if mw.closed { + return errors.New("writer already closed") + } + mw.closed = true + + if mw.flate { + err = mw.flateWriter.Flush() + if err != nil { + return fmt.Errorf("failed to flush flate: %w", err) + } + } + _, err = mw.c.writeFrame(mw.ctx, true, mw.flate, mw.opcode, nil) if err != nil { return fmt.Errorf("failed to write fin frame: %w", err) } if mw.flate && !mw.flateContextTakeover() { - mw.dict.close() + mw.putFlateWriter() } mw.mu.unlock() return nil } -func (mw *msgWriterState) close() { +func (mw *msgWriter) close() { if mw.c.client { mw.c.writeFrameMu.forceLock() putBufioWriter(mw.c.bw) } mw.writeMu.forceLock() - mw.dict.close() + mw.putFlateWriter() } func (c *Conn) writeControl(ctx context.Context, opcode opcode, p []byte) error { @@ -240,7 +245,7 @@ func (c *Conn) writeControl(ctx context.Context, opcode opcode, p []byte) error return nil } -// frame handles all writes to the connection. +// writeFrame handles all writes to the connection. func (c *Conn) writeFrame(ctx context.Context, fin bool, flate bool, opcode opcode, p []byte) (_ int, err error) { err = c.writeFrameMu.lock(ctx) if err != nil { @@ -248,26 +253,9 @@ func (c *Conn) writeFrame(ctx context.Context, fin bool, flate bool, opcode opco } defer c.writeFrameMu.unlock() - // If the state says a close has already been written, we wait until - // the connection is closed and return that error. - // - // However, if the frame being written is a close, that means its the close from - // the state being set so we let it go through. - c.closeMu.Lock() - wroteClose := c.wroteClose - c.closeMu.Unlock() - if wroteClose && opcode != opClose { - select { - case <-ctx.Done(): - return 0, ctx.Err() - case <-c.closed: - return 0, c.closeErr - } - } - select { case <-c.closed: - return 0, c.closeErr + return 0, net.ErrClosed case c.writeTimeout <- ctx: } @@ -275,11 +263,11 @@ func (c *Conn) writeFrame(ctx context.Context, fin bool, flate bool, opcode opco if err != nil { select { case <-c.closed: - err = c.closeErr + err = net.ErrClosed case <-ctx.Done(): err = ctx.Err() + default: } - c.close(err) err = fmt.Errorf("failed to write frame: %w", err) } }() @@ -321,7 +309,10 @@ func (c *Conn) writeFrame(ctx context.Context, fin bool, flate bool, opcode opco select { case <-c.closed: - return n, c.closeErr + if opcode == opClose { + return n, nil + } + return n, net.ErrClosed case c.writeTimeout <- context.Background(): } @@ -358,7 +349,7 @@ func (c *Conn) writeFramePayload(p []byte) (n int, err error) { return n, err } - maskKey = mask(maskKey, c.writeBuf[i:c.bw.Buffered()]) + maskKey = mask(c.writeBuf[i:c.bw.Buffered()], maskKey) p = p[j:] n += j @@ -367,17 +358,11 @@ func (c *Conn) writeFramePayload(p []byte) (n int, err error) { return n, nil } -type writerFunc func(p []byte) (int, error) - -func (f writerFunc) Write(p []byte) (int, error) { - return f(p) -} - // extractBufioWriterBuf grabs the []byte backing a *bufio.Writer // and returns it. func extractBufioWriterBuf(bw *bufio.Writer, w io.Writer) []byte { var writeBuf []byte - bw.Reset(writerFunc(func(p2 []byte) (int, error) { + bw.Reset(util.WriterFunc(func(p2 []byte) (int, error) { writeBuf = p2[:cap(p2)] return len(p2), nil })) @@ -391,7 +376,5 @@ func extractBufioWriterBuf(bw *bufio.Writer, w io.Writer) []byte { } func (c *Conn) writeError(code StatusCode, err error) { - c.setCloseErr(err) c.writeClose(code, err.Error()) - c.close(nil) } diff --git a/vendor/nhooyr.io/websocket/ws_js.go b/vendor/nhooyr.io/websocket/ws_js.go index b87e32cd..02d61f28 100644 --- a/vendor/nhooyr.io/websocket/ws_js.go +++ b/vendor/nhooyr.io/websocket/ws_js.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "net" "net/http" "reflect" "runtime" @@ -18,15 +19,38 @@ import ( "nhooyr.io/websocket/internal/xsync" ) +// opcode represents a WebSocket opcode. +type opcode int + +// https://tools.ietf.org/html/rfc6455#section-11.8. +const ( + opContinuation opcode = iota + opText + opBinary + // 3 - 7 are reserved for further non-control frames. + _ + _ + _ + _ + _ + opClose + opPing + opPong + // 11-16 are reserved for further control frames. +) + // Conn provides a wrapper around the browser WebSocket API. type Conn struct { - ws wsjs.WebSocket + noCopy noCopy + ws wsjs.WebSocket // read limit for a message in bytes. msgReadLimit xsync.Int64 + closeReadMu sync.Mutex + closeReadCtx context.Context + closingMu sync.Mutex - isReadClosed xsync.Int64 closeOnce sync.Once closed chan struct{} closeErrOnce sync.Once @@ -34,6 +58,7 @@ type Conn struct { closeWasClean bool releaseOnClose func() + releaseOnError func() releaseOnMessage func() readSignal chan struct{} @@ -71,9 +96,15 @@ func (c *Conn) init() { c.close(err, e.WasClean) c.releaseOnClose() + c.releaseOnError() c.releaseOnMessage() }) + c.releaseOnError = c.ws.OnError(func(v js.Value) { + c.setCloseErr(errors.New(v.Get("message").String())) + c.closeWithInternal() + }) + c.releaseOnMessage = c.ws.OnMessage(func(e wsjs.MessageEvent) { c.readBufMu.Lock() defer c.readBufMu.Unlock() @@ -100,7 +131,10 @@ func (c *Conn) closeWithInternal() { // Read attempts to read a message from the connection. // The maximum time spent waiting is bounded by the context. func (c *Conn) Read(ctx context.Context) (MessageType, []byte, error) { - if c.isReadClosed.Load() == 1 { + c.closeReadMu.Lock() + closedRead := c.closeReadCtx != nil + c.closeReadMu.Unlock() + if closedRead { return 0, nil, errors.New("WebSocket connection read closed") } @@ -108,7 +142,8 @@ func (c *Conn) Read(ctx context.Context) (MessageType, []byte, error) { if err != nil { return 0, nil, fmt.Errorf("failed to read: %w", err) } - if int64(len(p)) > c.msgReadLimit.Load() { + readLimit := c.msgReadLimit.Load() + if readLimit >= 0 && int64(len(p)) > readLimit { err := fmt.Errorf("read limited at %v bytes", c.msgReadLimit.Load()) c.Close(StatusMessageTooBig, err.Error()) return 0, nil, err @@ -123,7 +158,7 @@ func (c *Conn) read(ctx context.Context) (MessageType, []byte, error) { return 0, nil, ctx.Err() case <-c.readSignal: case <-c.closed: - return 0, nil, c.closeErr + return 0, nil, net.ErrClosed } c.readBufMu.Lock() @@ -177,7 +212,7 @@ func (c *Conn) Write(ctx context.Context, typ MessageType, p []byte) error { func (c *Conn) write(ctx context.Context, typ MessageType, p []byte) error { if c.isClosed() { - return c.closeErr + return net.ErrClosed } switch typ { case MessageBinary: @@ -201,19 +236,28 @@ func (c *Conn) Close(code StatusCode, reason string) error { return nil } +// CloseNow closes the WebSocket connection without attempting a close handshake. +// Use when you do not want the overhead of the close handshake. +// +// note: No different from Close(StatusGoingAway, "") in WASM as there is no way to close +// a WebSocket without the close handshake. +func (c *Conn) CloseNow() error { + return c.Close(StatusGoingAway, "") +} + func (c *Conn) exportedClose(code StatusCode, reason string) error { c.closingMu.Lock() defer c.closingMu.Unlock() + if c.isClosed() { + return net.ErrClosed + } + ce := fmt.Errorf("sent close: %w", CloseError{ Code: code, Reason: reason, }) - if c.isClosed() { - return fmt.Errorf("tried to close with %q but connection already closed: %w", ce, c.closeErr) - } - c.setCloseErr(ce) err := c.ws.Close(int(code), reason) if err != nil { @@ -284,7 +328,7 @@ func dial(ctx context.Context, url string, opts *DialOptions) (*Conn, *http.Resp StatusCode: http.StatusSwitchingProtocols, }, nil case <-c.closed: - return nil, nil, c.closeErr + return nil, nil, net.ErrClosed } } @@ -302,7 +346,7 @@ func (c *Conn) Reader(ctx context.Context) (MessageType, io.Reader, error) { // It buffers the entire message in memory and then sends it when the writer // is closed. func (c *Conn) Writer(ctx context.Context, typ MessageType) (io.WriteCloser, error) { - return writer{ + return &writer{ c: c, ctx: ctx, typ: typ, @@ -320,7 +364,7 @@ type writer struct { b *bytes.Buffer } -func (w writer) Write(p []byte) (int, error) { +func (w *writer) Write(p []byte) (int, error) { if w.closed { return 0, errors.New("cannot write to closed writer") } @@ -331,7 +375,7 @@ func (w writer) Write(p []byte) (int, error) { return n, nil } -func (w writer) Close() error { +func (w *writer) Close() error { if w.closed { return errors.New("cannot close closed writer") } @@ -347,13 +391,23 @@ func (w writer) Close() error { // CloseRead implements *Conn.CloseRead for wasm. func (c *Conn) CloseRead(ctx context.Context) context.Context { - c.isReadClosed.Store(1) - + c.closeReadMu.Lock() + ctx2 := c.closeReadCtx + if ctx2 != nil { + c.closeReadMu.Unlock() + return ctx2 + } ctx, cancel := context.WithCancel(ctx) + c.closeReadCtx = ctx + c.closeReadMu.Unlock() + go func() { defer cancel() - c.read(ctx) - c.Close(StatusPolicyViolation, "unexpected data message") + defer c.CloseNow() + _, _, err := c.read(ctx) + if err != nil { + c.Close(StatusPolicyViolation, "unexpected data message") + } }() return ctx } @@ -377,3 +431,168 @@ func (c *Conn) isClosed() bool { return false } } + +// AcceptOptions represents Accept's options. +type AcceptOptions struct { + Subprotocols []string + InsecureSkipVerify bool + OriginPatterns []string + CompressionMode CompressionMode + CompressionThreshold int +} + +// Accept is stubbed out for Wasm. +func Accept(w http.ResponseWriter, r *http.Request, opts *AcceptOptions) (*Conn, error) { + return nil, errors.New("unimplemented") +} + +// StatusCode represents a WebSocket status code. +// https://tools.ietf.org/html/rfc6455#section-7.4 +type StatusCode int + +// https://www.iana.org/assignments/websocket/websocket.xhtml#close-code-number +// +// These are only the status codes defined by the protocol. +// +// You can define custom codes in the 3000-4999 range. +// The 3000-3999 range is reserved for use by libraries, frameworks and applications. +// The 4000-4999 range is reserved for private use. +const ( + StatusNormalClosure StatusCode = 1000 + StatusGoingAway StatusCode = 1001 + StatusProtocolError StatusCode = 1002 + StatusUnsupportedData StatusCode = 1003 + + // 1004 is reserved and so unexported. + statusReserved StatusCode = 1004 + + // StatusNoStatusRcvd cannot be sent in a close message. + // It is reserved for when a close message is received without + // a status code. + StatusNoStatusRcvd StatusCode = 1005 + + // StatusAbnormalClosure is exported for use only with Wasm. + // In non Wasm Go, the returned error will indicate whether the + // connection was closed abnormally. + StatusAbnormalClosure StatusCode = 1006 + + StatusInvalidFramePayloadData StatusCode = 1007 + StatusPolicyViolation StatusCode = 1008 + StatusMessageTooBig StatusCode = 1009 + StatusMandatoryExtension StatusCode = 1010 + StatusInternalError StatusCode = 1011 + StatusServiceRestart StatusCode = 1012 + StatusTryAgainLater StatusCode = 1013 + StatusBadGateway StatusCode = 1014 + + // StatusTLSHandshake is only exported for use with Wasm. + // In non Wasm Go, the returned error will indicate whether there was + // a TLS handshake failure. + StatusTLSHandshake StatusCode = 1015 +) + +// CloseError is returned when the connection is closed with a status and reason. +// +// Use Go 1.13's errors.As to check for this error. +// Also see the CloseStatus helper. +type CloseError struct { + Code StatusCode + Reason string +} + +func (ce CloseError) Error() string { + return fmt.Sprintf("status = %v and reason = %q", ce.Code, ce.Reason) +} + +// CloseStatus is a convenience wrapper around Go 1.13's errors.As to grab +// the status code from a CloseError. +// +// -1 will be returned if the passed error is nil or not a CloseError. +func CloseStatus(err error) StatusCode { + var ce CloseError + if errors.As(err, &ce) { + return ce.Code + } + return -1 +} + +// CompressionMode represents the modes available to the deflate extension. +// See https://tools.ietf.org/html/rfc7692 +// Works in all browsers except Safari which does not implement the deflate extension. +type CompressionMode int + +const ( + // CompressionNoContextTakeover grabs a new flate.Reader and flate.Writer as needed + // for every message. This applies to both server and client side. + // + // This means less efficient compression as the sliding window from previous messages + // will not be used but the memory overhead will be lower if the connections + // are long lived and seldom used. + // + // The message will only be compressed if greater than 512 bytes. + CompressionNoContextTakeover CompressionMode = iota + + // CompressionContextTakeover uses a flate.Reader and flate.Writer per connection. + // This enables reusing the sliding window from previous messages. + // As most WebSocket protocols are repetitive, this can be very efficient. + // It carries an overhead of 8 kB for every connection compared to CompressionNoContextTakeover. + // + // If the peer negotiates NoContextTakeover on the client or server side, it will be + // used instead as this is required by the RFC. + CompressionContextTakeover + + // CompressionDisabled disables the deflate extension. + // + // Use this if you are using a predominantly binary protocol with very + // little duplication in between messages or CPU and memory are more + // important than bandwidth. + CompressionDisabled +) + +// MessageType represents the type of a WebSocket message. +// See https://tools.ietf.org/html/rfc6455#section-5.6 +type MessageType int + +// MessageType constants. +const ( + // MessageText is for UTF-8 encoded text messages like JSON. + MessageText MessageType = iota + 1 + // MessageBinary is for binary messages like protobufs. + MessageBinary +) + +type mu struct { + c *Conn + ch chan struct{} +} + +func newMu(c *Conn) *mu { + return &mu{ + c: c, + ch: make(chan struct{}, 1), + } +} + +func (m *mu) forceLock() { + m.ch <- struct{}{} +} + +func (m *mu) tryLock() bool { + select { + case m.ch <- struct{}{}: + return true + default: + return false + } +} + +func (m *mu) unlock() { + select { + case <-m.ch: + default: + } +} + +type noCopy struct{} + +func (*noCopy) Lock() {}